diff options
Diffstat (limited to 'arch/x86')
170 files changed, 2719 insertions, 1940 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a825bf031f49..53bab123a8ee 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,6 +27,7 @@ config X86_64 # Options that are inherently 64-bit kernel only: select ARCH_HAS_GIGANTIC_PAGE select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 + select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY select MODULES_USE_ELF_RELA @@ -125,8 +126,8 @@ config X86 select ARCH_WANTS_NO_INSTR select ARCH_WANT_GENERAL_HUGETLB select ARCH_WANT_HUGE_PMD_SHARE - select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP if X86_64 select ARCH_WANT_LD_ORPHAN_WARN + select ARCH_WANT_OPTIMIZE_VMEMMAP if X86_64 select ARCH_WANTS_THP_SWAP if X86_64 select ARCH_HAS_PARANOID_L1D_FLUSH select BUILDTIME_TABLE_SORT @@ -162,6 +163,7 @@ config X86 select GUP_GET_PXX_LOW_HIGH if X86_PAE select HARDIRQS_SW_RESEND select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 + select HAS_IOPORT select HAVE_ACPI_APEI if ACPI select HAVE_ACPI_APEI_NMI if ACPI select HAVE_ALIGNED_STRUCT_PAGE if SLUB @@ -283,7 +285,6 @@ config X86 select RTC_LIB select RTC_MC146818_LIB select SPARSE_IRQ - select SRCU select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT @@ -434,7 +435,7 @@ config SMP Y to "Enhanced Real Time Clock Support", below. The "Advanced Power Management" code will be disabled if you say Y here. - See also <file:Documentation/x86/i386/IO-APIC.rst>, + See also <file:Documentation/arch/x86/i386/IO-APIC.rst>, <file:Documentation/admin-guide/lockup-watchdogs.rst> and the SMP-HOWTO available at <http://www.tldp.org/docs.html#howto>. @@ -1324,7 +1325,7 @@ config MICROCODE the Linux kernel. The preferred method to load microcode from a detached initrd is described - in Documentation/x86/microcode.rst. For that you need to enable + in Documentation/arch/x86/microcode.rst. For that you need to enable CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the initrd for microcode blobs. @@ -1510,7 +1511,7 @@ config X86_5LEVEL A kernel with the option enabled can be booted on machines that support 4- or 5-level paging. - See Documentation/x86/x86_64/5level-paging.rst for more + See Documentation/arch/x86/x86_64/5level-paging.rst for more information. Say N if unsure. @@ -1774,7 +1775,7 @@ config MTRR You can safely say Y even if your machine doesn't have MTRRs, you'll just add about 9 KB to your kernel. - See <file:Documentation/x86/mtrr.rst> for more information. + See <file:Documentation/arch/x86/mtrr.rst> for more information. config MTRR_SANITIZER def_bool y @@ -1938,7 +1939,6 @@ config X86_SGX depends on X86_64 && CPU_SUP_INTEL && X86_X2APIC depends on CRYPTO=y depends on CRYPTO_SHA256=y - select SRCU select MMU_NOTIFIER select NUMA_KEEP_MEMINFO if NUMA select XARRAY_MULTI @@ -2290,6 +2290,17 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING If unsure, leave at the default value. +config ADDRESS_MASKING + bool "Linear Address Masking support" + depends on X86_64 + help + Linear Address Masking (LAM) modifies the checking that is applied + to 64-bit linear addresses, allowing software to use of the + untranslated address bits for metadata. + + The capability can be used for efficient address sanitizers (ASAN) + implementation and for optimizations in JITs. + config HOTPLUG_CPU def_bool y depends on SMP @@ -2551,7 +2562,7 @@ config PAGE_TABLE_ISOLATION ensuring that the majority of kernel addresses are not mapped into userspace. - See Documentation/x86/pti.rst for more details. + See Documentation/arch/x86/pti.rst for more details. config RETPOLINE bool "Avoid speculative indirect branches in kernel" diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index bdfe08f1a930..c5d614d28a75 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -97,7 +97,7 @@ config IOMMU_DEBUG code. When you use it make sure you have a big enough IOMMU/AGP aperture. Most of the options enabled by this can be set more finegrained using the iommu= command line - options. See Documentation/x86/x86_64/boot-options.rst for more + options. See Documentation/arch/x86/x86_64/boot-options.rst for more details. config IOMMU_LEAK diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index b70559b821df..2106a2bd152b 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um @@ -3,9 +3,14 @@ core-y += arch/x86/crypto/ # # Disable SSE and other FP/SIMD instructions to match normal x86 +# This is required to work around issues in older LLVM versions, but breaks +# GCC versions < 11. See: +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99652 # +ifeq ($(CONFIG_CC_IS_CLANG),y) KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 +endif ifeq ($(CONFIG_X86_32),y) START := 0x8048000 diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index 321a5011042d..bcc956c17872 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -8,14 +8,6 @@ * Copyright (C) 2016 Kees Cook */ -/* - * Since we're dealing with identity mappings, physical and virtual - * addresses are the same, so override these defines which are ultimately - * used by the headers in misc.h. - */ -#define __pa(x) ((unsigned long)(x)) -#define __va(x) ((void *)((unsigned long)(x))) - /* No PAGE_TABLE_ISOLATION support needed either: */ #undef CONFIG_PAGE_TABLE_ISOLATION diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 20118fb7c53b..2f155a0e3041 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -19,6 +19,15 @@ /* cpu_feature_enabled() cannot be used this early */ #define USE_EARLY_PGTABLE_L5 +/* + * Boot stub deals with identity mappings, physical and virtual addresses are + * the same, so override these defines. + * + * <asm/page.h> will not define them if they are already defined. + */ +#define __pa(x) ((unsigned long)(x)) +#define __va(x) ((void *)((unsigned long)(x))) + #include <linux/linkage.h> #include <linux/screen_info.h> #include <linux/elf.h> diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index d63ad8f99f83..014b89c89088 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -104,9 +104,7 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, } #undef __init -#undef __pa #define __init -#define __pa(x) ((unsigned long)(x)) #define __BOOT_COMPRESSED diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c index 918a7606f53c..2d81d3cc72a1 100644 --- a/arch/x86/boot/compressed/tdx.c +++ b/arch/x86/boot/compressed/tdx.c @@ -26,7 +26,7 @@ static inline unsigned int tdx_io_in(int size, u16 port) .r14 = port, }; - if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) + if (__tdx_hypercall_ret(&args)) return UINT_MAX; return args.r11; @@ -43,7 +43,7 @@ static inline void tdx_io_out(int size, u16 port, u32 value) .r15 = value, }; - __tdx_hypercall(&args, 0); + __tdx_hypercall(&args); } static inline u8 tdx_inb(u16 port) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 9338c68e7413..b04ca8e2b213 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -321,7 +321,7 @@ start_sys_seg: .word SYSSEG # obsolete and meaningless, but just type_of_loader: .byte 0 # 0 means ancient bootloader, newer # bootloaders know to change this. - # See Documentation/x86/boot.rst for + # See Documentation/arch/x86/boot.rst for # assigned ids # flags, unused bits must be zero (RFU) bit within loadflags diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 49b44f881484..73f83233d25d 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -13,7 +13,7 @@ #include <asm/coco.h> #include <asm/processor.h> -static enum cc_vendor vendor __ro_after_init; +enum cc_vendor cc_vendor __ro_after_init; static u64 cc_mask __ro_after_init; static bool intel_cc_platform_has(enum cc_attr attr) @@ -30,6 +30,22 @@ static bool intel_cc_platform_has(enum cc_attr attr) } /* + * Handle the SEV-SNP vTOM case where sme_me_mask is zero, and + * the other levels of SME/SEV functionality, including C-bit + * based SEV-SNP, are not enabled. + */ +static __maybe_unused bool amd_cc_platform_vtom(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_GUEST_MEM_ENCRYPT: + case CC_ATTR_MEM_ENCRYPT: + return true; + default: + return false; + } +} + +/* * SME and SEV are very similar but they are not the same, so there are * times that the kernel will need to distinguish between SME and SEV. The * cc_platform_has() function is used for this. When a distinction isn't @@ -41,9 +57,14 @@ static bool intel_cc_platform_has(enum cc_attr attr) * up under SME the trampoline area cannot be encrypted, whereas under SEV * the trampoline area must be encrypted. */ + static bool amd_cc_platform_has(enum cc_attr attr) { #ifdef CONFIG_AMD_MEM_ENCRYPT + + if (sev_status & MSR_AMD64_SNP_VTOM) + return amd_cc_platform_vtom(attr); + switch (attr) { case CC_ATTR_MEM_ENCRYPT: return sme_me_mask; @@ -76,20 +97,13 @@ static bool amd_cc_platform_has(enum cc_attr attr) #endif } -static bool hyperv_cc_platform_has(enum cc_attr attr) -{ - return attr == CC_ATTR_GUEST_MEM_ENCRYPT; -} - bool cc_platform_has(enum cc_attr attr) { - switch (vendor) { + switch (cc_vendor) { case CC_VENDOR_AMD: return amd_cc_platform_has(attr); case CC_VENDOR_INTEL: return intel_cc_platform_has(attr); - case CC_VENDOR_HYPERV: - return hyperv_cc_platform_has(attr); default: return false; } @@ -103,11 +117,14 @@ u64 cc_mkenc(u64 val) * encryption status of the page. * * - for AMD, bit *set* means the page is encrypted - * - for Intel *clear* means encrypted. + * - for AMD with vTOM and for Intel, *clear* means encrypted */ - switch (vendor) { + switch (cc_vendor) { case CC_VENDOR_AMD: - return val | cc_mask; + if (sev_status & MSR_AMD64_SNP_VTOM) + return val & ~cc_mask; + else + return val | cc_mask; case CC_VENDOR_INTEL: return val & ~cc_mask; default: @@ -118,9 +135,12 @@ u64 cc_mkenc(u64 val) u64 cc_mkdec(u64 val) { /* See comment in cc_mkenc() */ - switch (vendor) { + switch (cc_vendor) { case CC_VENDOR_AMD: - return val & ~cc_mask; + if (sev_status & MSR_AMD64_SNP_VTOM) + return val | cc_mask; + else + return val & ~cc_mask; case CC_VENDOR_INTEL: return val | cc_mask; default: @@ -129,11 +149,6 @@ u64 cc_mkdec(u64 val) } EXPORT_SYMBOL_GPL(cc_mkdec); -__init void cc_set_vendor(enum cc_vendor v) -{ - vendor = v; -} - __init void cc_set_mask(u64 mask) { cc_mask = mask; diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S index 6a255e6809bc..b193c0a1d8db 100644 --- a/arch/x86/coco/tdx/tdcall.S +++ b/arch/x86/coco/tdx/tdcall.S @@ -85,12 +85,12 @@ SYM_FUNC_START(__tdx_module_call) SYM_FUNC_END(__tdx_module_call) /* - * __tdx_hypercall() - Make hypercalls to a TDX VMM using TDVMCALL leaf - * of TDCALL instruction + * TDX_HYPERCALL - Make hypercalls to a TDX VMM using TDVMCALL leaf of TDCALL + * instruction * * Transforms values in function call argument struct tdx_hypercall_args @args * into the TDCALL register ABI. After TDCALL operation, VMM output is saved - * back in @args. + * back in @args, if \ret is 1. * *------------------------------------------------------------------------- * TD VMCALL ABI: @@ -105,26 +105,18 @@ SYM_FUNC_END(__tdx_module_call) * specification. Non zero value indicates vendor * specific ABI. * R11 - VMCALL sub function number - * RBX, RBP, RDI, RSI - Used to pass VMCALL sub function specific arguments. + * RBX, RDX, RDI, RSI - Used to pass VMCALL sub function specific arguments. * R8-R9, R12-R15 - Same as above. * * Output Registers: * * RAX - TDCALL instruction status (Not related to hypercall * output). - * R10 - Hypercall output error code. - * R11-R15 - Hypercall sub function specific output values. + * RBX, RDX, RDI, RSI - Hypercall sub function specific output values. + * R8-R15 - Same as above. * - *------------------------------------------------------------------------- - * - * __tdx_hypercall() function ABI: - * - * @args (RDI) - struct tdx_hypercall_args for input and output - * @flags (RSI) - TDX_HCALL_* flags - * - * On successful completion, return the hypercall error code. */ -SYM_FUNC_START(__tdx_hypercall) +.macro TDX_HYPERCALL ret:req FRAME_BEGIN /* Save callee-saved GPRs as mandated by the x86_64 ABI */ @@ -134,9 +126,8 @@ SYM_FUNC_START(__tdx_hypercall) push %r12 push %rbx - /* Free RDI and RSI to be used as TDVMCALL arguments */ + /* Free RDI to be used as TDVMCALL arguments */ movq %rdi, %rax - push %rsi /* Copy hypercall registers from arg struct: */ movq TDX_HYPERCALL_r8(%rax), %r8 @@ -171,14 +162,11 @@ SYM_FUNC_START(__tdx_hypercall) * and are handled by callers. */ testq %rax, %rax - jne .Lpanic + jne .Lpanic\@ pop %rax - /* Copy hypercall result registers to arg struct if needed */ - testq $TDX_HCALL_HAS_OUTPUT, (%rsp) - jz .Lout - + .if \ret movq %r8, TDX_HYPERCALL_r8(%rax) movq %r9, TDX_HYPERCALL_r9(%rax) movq %r10, TDX_HYPERCALL_r10(%rax) @@ -191,7 +179,8 @@ SYM_FUNC_START(__tdx_hypercall) movq %rsi, TDX_HYPERCALL_rsi(%rax) movq %rbx, TDX_HYPERCALL_rbx(%rax) movq %rdx, TDX_HYPERCALL_rdx(%rax) -.Lout: + .endif + /* TDVMCALL leaf return code is in R10 */ movq %r10, %rax @@ -208,9 +197,6 @@ SYM_FUNC_START(__tdx_hypercall) xor %rdi, %rdi xor %rdx, %rdx - /* Remove TDX_HCALL_* flags from the stack */ - pop %rsi - /* Restore callee-saved GPRs as mandated by the x86_64 ABI */ pop %rbx pop %r12 @@ -221,9 +207,33 @@ SYM_FUNC_START(__tdx_hypercall) FRAME_END RET -.Lpanic: +.Lpanic\@: call __tdx_hypercall_failed /* __tdx_hypercall_failed never returns */ REACHABLE - jmp .Lpanic + jmp .Lpanic\@ +.endm + +/* + * + * __tdx_hypercall() function ABI: + * + * @args (RDI) - struct tdx_hypercall_args for input + * + * On successful completion, return the hypercall error code. + */ +SYM_FUNC_START(__tdx_hypercall) + TDX_HYPERCALL ret=0 SYM_FUNC_END(__tdx_hypercall) + +/* + * + * __tdx_hypercall_ret() function ABI: + * + * @args (RDI) - struct tdx_hypercall_args for input and output + * + * On successful completion, return the hypercall error code. + */ +SYM_FUNC_START(__tdx_hypercall_ret) + TDX_HYPERCALL ret=1 +SYM_FUNC_END(__tdx_hypercall_ret) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 055300e08fb3..e146b599260f 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -66,7 +66,7 @@ static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) .r15 = r15, }; - return __tdx_hypercall(&args, 0); + return __tdx_hypercall(&args); } /* Called from __tdx_hypercall() for unrecoverable failure */ @@ -99,7 +99,7 @@ long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2, .r14 = p4, }; - return __tdx_hypercall(&args, 0); + return __tdx_hypercall(&args); } EXPORT_SYMBOL_GPL(tdx_kvm_hypercall); #endif @@ -179,7 +179,7 @@ static void __noreturn tdx_panic(const char *msg) * happens to return. */ while (1) - __tdx_hypercall(&args, 0); + __tdx_hypercall(&args); } static void tdx_parse_tdinfo(u64 *cc_mask) @@ -289,7 +289,7 @@ static u64 __cpuidle __halt(const bool irq_disabled) * can keep the vCPU in virtual HLT, even if an IRQ is * pending, without hanging/breaking the guest. */ - return __tdx_hypercall(&args, 0); + return __tdx_hypercall(&args); } static int handle_halt(struct ve_info *ve) @@ -326,7 +326,7 @@ static int read_msr(struct pt_regs *regs, struct ve_info *ve) * can be found in TDX Guest-Host-Communication Interface * (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>". */ - if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) + if (__tdx_hypercall_ret(&args)) return -EIO; regs->ax = lower_32_bits(args.r11); @@ -348,7 +348,7 @@ static int write_msr(struct pt_regs *regs, struct ve_info *ve) * can be found in TDX Guest-Host-Communication Interface * (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>". */ - if (__tdx_hypercall(&args, 0)) + if (__tdx_hypercall(&args)) return -EIO; return ve_instr_len(ve); @@ -380,7 +380,7 @@ static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve) * ABI can be found in TDX Guest-Host-Communication Interface * (GHCI), section titled "VP.VMCALL<Instruction.CPUID>". */ - if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) + if (__tdx_hypercall_ret(&args)) return -EIO; /* @@ -407,7 +407,7 @@ static bool mmio_read(int size, unsigned long addr, unsigned long *val) .r15 = *val, }; - if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) + if (__tdx_hypercall_ret(&args)) return false; *val = args.r11; return true; @@ -541,7 +541,7 @@ static bool handle_in(struct pt_regs *regs, int size, int port) * in TDX Guest-Host-Communication Interface (GHCI) section titled * "TDG.VP.VMCALL<Instruction.IO>". */ - success = !__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT); + success = !__tdx_hypercall_ret(&args); /* Update part of the register affected by the emulated instruction */ regs->ax &= ~mask; diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index cdf3215ec272..ad7f4c891625 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -201,8 +201,8 @@ SYM_FUNC_START(crypto_aegis128_aesni_init) movdqa KEY, STATE4 /* load the constants: */ - movdqa .Laegis128_const_0, STATE2 - movdqa .Laegis128_const_1, STATE1 + movdqa .Laegis128_const_0(%rip), STATE2 + movdqa .Laegis128_const_1(%rip), STATE1 pxor STATE2, STATE3 pxor STATE1, STATE4 @@ -682,7 +682,7 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) punpcklbw T0, T0 punpcklbw T0, T0 punpcklbw T0, T0 - movdqa .Laegis128_counter, T1 + movdqa .Laegis128_counter(%rip), T1 pcmpgtb T1, T0 pand T0, MSG diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 837c1e0aa021..3ac7487ecad2 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -288,53 +288,53 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff # Encrypt/Decrypt first few blocks and $(3<<4), %r12 - jz _initial_num_blocks_is_0_\@ + jz .L_initial_num_blocks_is_0_\@ cmp $(2<<4), %r12 - jb _initial_num_blocks_is_1_\@ - je _initial_num_blocks_is_2_\@ -_initial_num_blocks_is_3_\@: + jb .L_initial_num_blocks_is_1_\@ + je .L_initial_num_blocks_is_2_\@ +.L_initial_num_blocks_is_3_\@: INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation sub $48, %r13 - jmp _initial_blocks_\@ -_initial_num_blocks_is_2_\@: + jmp .L_initial_blocks_\@ +.L_initial_num_blocks_is_2_\@: INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation sub $32, %r13 - jmp _initial_blocks_\@ -_initial_num_blocks_is_1_\@: + jmp .L_initial_blocks_\@ +.L_initial_num_blocks_is_1_\@: INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation sub $16, %r13 - jmp _initial_blocks_\@ -_initial_num_blocks_is_0_\@: + jmp .L_initial_blocks_\@ +.L_initial_num_blocks_is_0_\@: INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation -_initial_blocks_\@: +.L_initial_blocks_\@: # Main loop - Encrypt/Decrypt remaining blocks test %r13, %r13 - je _zero_cipher_left_\@ + je .L_zero_cipher_left_\@ sub $64, %r13 - je _four_cipher_left_\@ -_crypt_by_4_\@: + je .L_four_cipher_left_\@ +.L_crypt_by_4_\@: GHASH_4_ENCRYPT_4_PARALLEL_\operation %xmm9, %xmm10, %xmm11, %xmm12, \ %xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \ %xmm7, %xmm8, enc add $64, %r11 sub $64, %r13 - jne _crypt_by_4_\@ -_four_cipher_left_\@: + jne .L_crypt_by_4_\@ +.L_four_cipher_left_\@: GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 -_zero_cipher_left_\@: +.L_zero_cipher_left_\@: movdqu %xmm8, AadHash(%arg2) movdqu %xmm0, CurCount(%arg2) mov %arg5, %r13 and $15, %r13 # %r13 = arg5 (mod 16) - je _multiple_of_16_bytes_\@ + je .L_multiple_of_16_bytes_\@ mov %r13, PBlockLen(%arg2) @@ -348,14 +348,14 @@ _zero_cipher_left_\@: movdqu %xmm0, PBlockEncKey(%arg2) cmp $16, %arg5 - jge _large_enough_update_\@ + jge .L_large_enough_update_\@ lea (%arg4,%r11,1), %r10 mov %r13, %r12 READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 - jmp _data_read_\@ + jmp .L_data_read_\@ -_large_enough_update_\@: +.L_large_enough_update_\@: sub $16, %r11 add %r13, %r11 @@ -374,7 +374,7 @@ _large_enough_update_\@: # shift right 16-r13 bytes pshufb %xmm2, %xmm1 -_data_read_\@: +.L_data_read_\@: lea ALL_F+16(%rip), %r12 sub %r13, %r12 @@ -409,19 +409,19 @@ _data_read_\@: # Output %r13 bytes movq %xmm0, %rax cmp $8, %r13 - jle _less_than_8_bytes_left_\@ + jle .L_less_than_8_bytes_left_\@ mov %rax, (%arg3 , %r11, 1) add $8, %r11 psrldq $8, %xmm0 movq %xmm0, %rax sub $8, %r13 -_less_than_8_bytes_left_\@: +.L_less_than_8_bytes_left_\@: mov %al, (%arg3, %r11, 1) add $1, %r11 shr $8, %rax sub $1, %r13 - jne _less_than_8_bytes_left_\@ -_multiple_of_16_bytes_\@: + jne .L_less_than_8_bytes_left_\@ +.L_multiple_of_16_bytes_\@: .endm # GCM_COMPLETE Finishes update of tag of last partial block @@ -434,11 +434,11 @@ _multiple_of_16_bytes_\@: mov PBlockLen(%arg2), %r12 test %r12, %r12 - je _partial_done\@ + je .L_partial_done\@ GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 -_partial_done\@: +.L_partial_done\@: mov AadLen(%arg2), %r12 # %r13 = aadLen (number of bytes) shl $3, %r12 # convert into number of bits movd %r12d, %xmm15 # len(A) in %xmm15 @@ -457,44 +457,44 @@ _partial_done\@: movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) pxor %xmm8, %xmm0 -_return_T_\@: +.L_return_T_\@: mov \AUTHTAG, %r10 # %r10 = authTag mov \AUTHTAGLEN, %r11 # %r11 = auth_tag_len cmp $16, %r11 - je _T_16_\@ + je .L_T_16_\@ cmp $8, %r11 - jl _T_4_\@ -_T_8_\@: + jl .L_T_4_\@ +.L_T_8_\@: movq %xmm0, %rax mov %rax, (%r10) add $8, %r10 sub $8, %r11 psrldq $8, %xmm0 test %r11, %r11 - je _return_T_done_\@ -_T_4_\@: + je .L_return_T_done_\@ +.L_T_4_\@: movd %xmm0, %eax mov %eax, (%r10) add $4, %r10 sub $4, %r11 psrldq $4, %xmm0 test %r11, %r11 - je _return_T_done_\@ -_T_123_\@: + je .L_return_T_done_\@ +.L_T_123_\@: movd %xmm0, %eax cmp $2, %r11 - jl _T_1_\@ + jl .L_T_1_\@ mov %ax, (%r10) cmp $2, %r11 - je _return_T_done_\@ + je .L_return_T_done_\@ add $2, %r10 sar $16, %eax -_T_1_\@: +.L_T_1_\@: mov %al, (%r10) - jmp _return_T_done_\@ -_T_16_\@: + jmp .L_return_T_done_\@ +.L_T_16_\@: movdqu %xmm0, (%r10) -_return_T_done_\@: +.L_return_T_done_\@: .endm #ifdef __x86_64__ @@ -563,30 +563,30 @@ _return_T_done_\@: # Clobbers %rax, DLEN and XMM1 .macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst cmp $8, \DLEN - jl _read_lt8_\@ + jl .L_read_lt8_\@ mov (\DPTR), %rax movq %rax, \XMMDst sub $8, \DLEN - jz _done_read_partial_block_\@ + jz .L_done_read_partial_block_\@ xor %eax, %eax -_read_next_byte_\@: +.L_read_next_byte_\@: shl $8, %rax mov 7(\DPTR, \DLEN, 1), %al dec \DLEN - jnz _read_next_byte_\@ + jnz .L_read_next_byte_\@ movq %rax, \XMM1 pslldq $8, \XMM1 por \XMM1, \XMMDst - jmp _done_read_partial_block_\@ -_read_lt8_\@: + jmp .L_done_read_partial_block_\@ +.L_read_lt8_\@: xor %eax, %eax -_read_next_byte_lt8_\@: +.L_read_next_byte_lt8_\@: shl $8, %rax mov -1(\DPTR, \DLEN, 1), %al dec \DLEN - jnz _read_next_byte_lt8_\@ + jnz .L_read_next_byte_lt8_\@ movq %rax, \XMMDst -_done_read_partial_block_\@: +.L_done_read_partial_block_\@: .endm # CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted. @@ -600,8 +600,8 @@ _done_read_partial_block_\@: pxor \TMP6, \TMP6 cmp $16, %r11 - jl _get_AAD_rest\@ -_get_AAD_blocks\@: + jl .L_get_AAD_rest\@ +.L_get_AAD_blocks\@: movdqu (%r10), \TMP7 pshufb %xmm14, \TMP7 # byte-reflect the AAD data pxor \TMP7, \TMP6 @@ -609,14 +609,14 @@ _get_AAD_blocks\@: add $16, %r10 sub $16, %r11 cmp $16, %r11 - jge _get_AAD_blocks\@ + jge .L_get_AAD_blocks\@ movdqu \TMP6, \TMP7 /* read the last <16B of AAD */ -_get_AAD_rest\@: +.L_get_AAD_rest\@: test %r11, %r11 - je _get_AAD_done\@ + je .L_get_AAD_done\@ READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 pshufb %xmm14, \TMP7 # byte-reflect the AAD data @@ -624,7 +624,7 @@ _get_AAD_rest\@: GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 movdqu \TMP7, \TMP6 -_get_AAD_done\@: +.L_get_AAD_done\@: movdqu \TMP6, AadHash(%arg2) .endm @@ -637,21 +637,21 @@ _get_AAD_done\@: AAD_HASH operation mov PBlockLen(%arg2), %r13 test %r13, %r13 - je _partial_block_done_\@ # Leave Macro if no partial blocks + je .L_partial_block_done_\@ # Leave Macro if no partial blocks # Read in input data without over reading cmp $16, \PLAIN_CYPH_LEN - jl _fewer_than_16_bytes_\@ + jl .L_fewer_than_16_bytes_\@ movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm - jmp _data_read_\@ + jmp .L_data_read_\@ -_fewer_than_16_bytes_\@: +.L_fewer_than_16_bytes_\@: lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 mov \PLAIN_CYPH_LEN, %r12 READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1 mov PBlockLen(%arg2), %r13 -_data_read_\@: # Finished reading in data +.L_data_read_\@: # Finished reading in data movdqu PBlockEncKey(%arg2), %xmm9 movdqu HashKey(%arg2), %xmm13 @@ -674,9 +674,9 @@ _data_read_\@: # Finished reading in data sub $16, %r10 # Determine if if partial block is not being filled and # shift mask accordingly - jge _no_extra_mask_1_\@ + jge .L_no_extra_mask_1_\@ sub %r10, %r12 -_no_extra_mask_1_\@: +.L_no_extra_mask_1_\@: movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out bottom r13 bytes of xmm9 @@ -689,17 +689,17 @@ _no_extra_mask_1_\@: pxor %xmm3, \AAD_HASH test %r10, %r10 - jl _partial_incomplete_1_\@ + jl .L_partial_incomplete_1_\@ # GHASH computation for the last <16 Byte block GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 xor %eax, %eax mov %rax, PBlockLen(%arg2) - jmp _dec_done_\@ -_partial_incomplete_1_\@: + jmp .L_dec_done_\@ +.L_partial_incomplete_1_\@: add \PLAIN_CYPH_LEN, PBlockLen(%arg2) -_dec_done_\@: +.L_dec_done_\@: movdqu \AAD_HASH, AadHash(%arg2) .else pxor %xmm1, %xmm9 # Plaintext XOR E(K, Yn) @@ -710,9 +710,9 @@ _dec_done_\@: sub $16, %r10 # Determine if if partial block is not being filled and # shift mask accordingly - jge _no_extra_mask_2_\@ + jge .L_no_extra_mask_2_\@ sub %r10, %r12 -_no_extra_mask_2_\@: +.L_no_extra_mask_2_\@: movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out bottom r13 bytes of xmm9 @@ -724,17 +724,17 @@ _no_extra_mask_2_\@: pxor %xmm9, \AAD_HASH test %r10, %r10 - jl _partial_incomplete_2_\@ + jl .L_partial_incomplete_2_\@ # GHASH computation for the last <16 Byte block GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 xor %eax, %eax mov %rax, PBlockLen(%arg2) - jmp _encode_done_\@ -_partial_incomplete_2_\@: + jmp .L_encode_done_\@ +.L_partial_incomplete_2_\@: add \PLAIN_CYPH_LEN, PBlockLen(%arg2) -_encode_done_\@: +.L_encode_done_\@: movdqu \AAD_HASH, AadHash(%arg2) movdqa SHUF_MASK(%rip), %xmm10 @@ -744,32 +744,32 @@ _encode_done_\@: .endif # output encrypted Bytes test %r10, %r10 - jl _partial_fill_\@ + jl .L_partial_fill_\@ mov %r13, %r12 mov $16, %r13 # Set r13 to be the number of bytes to write out sub %r12, %r13 - jmp _count_set_\@ -_partial_fill_\@: + jmp .L_count_set_\@ +.L_partial_fill_\@: mov \PLAIN_CYPH_LEN, %r13 -_count_set_\@: +.L_count_set_\@: movdqa %xmm9, %xmm0 movq %xmm0, %rax cmp $8, %r13 - jle _less_than_8_bytes_left_\@ + jle .L_less_than_8_bytes_left_\@ mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) add $8, \DATA_OFFSET psrldq $8, %xmm0 movq %xmm0, %rax sub $8, %r13 -_less_than_8_bytes_left_\@: +.L_less_than_8_bytes_left_\@: movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) add $1, \DATA_OFFSET shr $8, %rax sub $1, %r13 - jne _less_than_8_bytes_left_\@ -_partial_block_done_\@: + jne .L_less_than_8_bytes_left_\@ +.L_partial_block_done_\@: .endm # PARTIAL_BLOCK /* @@ -813,14 +813,14 @@ _partial_block_done_\@: shr $2,%eax # 128->4, 192->6, 256->8 add $5,%eax # 128->9, 192->11, 256->13 -aes_loop_initial_\@: +.Laes_loop_initial_\@: MOVADQ (%r10),\TMP1 .irpc index, \i_seq aesenc \TMP1, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_initial_\@ + jnz .Laes_loop_initial_\@ MOVADQ (%r10), \TMP1 .irpc index, \i_seq @@ -861,7 +861,7 @@ aes_loop_initial_\@: GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 .endif cmp $64, %r13 - jl _initial_blocks_done\@ + jl .L_initial_blocks_done\@ # no need for precomputed values /* * @@ -908,18 +908,18 @@ aes_loop_initial_\@: mov keysize,%eax shr $2,%eax # 128->4, 192->6, 256->8 sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_pre_done\@ + jz .Laes_loop_pre_done\@ -aes_loop_pre_\@: +.Laes_loop_pre_\@: MOVADQ (%r10),\TMP2 .irpc index, 1234 aesenc \TMP2, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_pre_\@ + jnz .Laes_loop_pre_\@ -aes_loop_pre_done\@: +.Laes_loop_pre_done\@: MOVADQ (%r10), \TMP2 aesenclast \TMP2, \XMM1 aesenclast \TMP2, \XMM2 @@ -963,7 +963,7 @@ aes_loop_pre_done\@: pshufb %xmm14, \XMM3 # perform a 16 byte swap pshufb %xmm14, \XMM4 # perform a 16 byte swap -_initial_blocks_done\@: +.L_initial_blocks_done\@: .endm @@ -1095,18 +1095,18 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation mov keysize,%eax shr $2,%eax # 128->4, 192->6, 256->8 sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_par_enc_done\@ + jz .Laes_loop_par_enc_done\@ -aes_loop_par_enc\@: +.Laes_loop_par_enc\@: MOVADQ (%r10),\TMP3 .irpc index, 1234 aesenc \TMP3, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_par_enc\@ + jnz .Laes_loop_par_enc\@ -aes_loop_par_enc_done\@: +.Laes_loop_par_enc_done\@: MOVADQ (%r10), \TMP3 aesenclast \TMP3, \XMM1 # Round 10 aesenclast \TMP3, \XMM2 @@ -1303,18 +1303,18 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation mov keysize,%eax shr $2,%eax # 128->4, 192->6, 256->8 sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_par_dec_done\@ + jz .Laes_loop_par_dec_done\@ -aes_loop_par_dec\@: +.Laes_loop_par_dec\@: MOVADQ (%r10),\TMP3 .irpc index, 1234 aesenc \TMP3, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_par_dec\@ + jnz .Laes_loop_par_dec\@ -aes_loop_par_dec_done\@: +.Laes_loop_par_dec_done\@: MOVADQ (%r10), \TMP3 aesenclast \TMP3, \XMM1 # last round aesenclast \TMP3, \XMM2 @@ -2717,7 +2717,7 @@ SYM_FUNC_END(aesni_cts_cbc_dec) * BSWAP_MASK == endian swapping mask */ SYM_FUNC_START_LOCAL(_aesni_inc_init) - movaps .Lbswap_mask, BSWAP_MASK + movaps .Lbswap_mask(%rip), BSWAP_MASK movaps IV, CTR pshufb BSWAP_MASK, CTR mov $1, TCTR_LOW diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 0852ab573fd3..46cddd78857b 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -154,30 +154,6 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 ALL_F: .octa 0xffffffffffffffffffffffffffffffff .octa 0x00000000000000000000000000000000 -.section .rodata -.align 16 -.type aad_shift_arr, @object -.size aad_shift_arr, 272 -aad_shift_arr: - .octa 0xffffffffffffffffffffffffffffffff - .octa 0xffffffffffffffffffffffffffffff0C - .octa 0xffffffffffffffffffffffffffff0D0C - .octa 0xffffffffffffffffffffffffff0E0D0C - .octa 0xffffffffffffffffffffffff0F0E0D0C - .octa 0xffffffffffffffffffffff0C0B0A0908 - .octa 0xffffffffffffffffffff0D0C0B0A0908 - .octa 0xffffffffffffffffff0E0D0C0B0A0908 - .octa 0xffffffffffffffff0F0E0D0C0B0A0908 - .octa 0xffffffffffffff0C0B0A090807060504 - .octa 0xffffffffffff0D0C0B0A090807060504 - .octa 0xffffffffff0E0D0C0B0A090807060504 - .octa 0xffffffff0F0E0D0C0B0A090807060504 - .octa 0xffffff0C0B0A09080706050403020100 - .octa 0xffff0D0C0B0A09080706050403020100 - .octa 0xff0E0D0C0B0A09080706050403020100 - .octa 0x0F0E0D0C0B0A09080706050403020100 - - .text @@ -302,68 +278,68 @@ VARIABLE_OFFSET = 16*8 mov %r13, %r12 shr $4, %r12 and $7, %r12 - jz _initial_num_blocks_is_0\@ + jz .L_initial_num_blocks_is_0\@ cmp $7, %r12 - je _initial_num_blocks_is_7\@ + je .L_initial_num_blocks_is_7\@ cmp $6, %r12 - je _initial_num_blocks_is_6\@ + je .L_initial_num_blocks_is_6\@ cmp $5, %r12 - je _initial_num_blocks_is_5\@ + je .L_initial_num_blocks_is_5\@ cmp $4, %r12 - je _initial_num_blocks_is_4\@ + je .L_initial_num_blocks_is_4\@ cmp $3, %r12 - je _initial_num_blocks_is_3\@ + je .L_initial_num_blocks_is_3\@ cmp $2, %r12 - je _initial_num_blocks_is_2\@ + je .L_initial_num_blocks_is_2\@ - jmp _initial_num_blocks_is_1\@ + jmp .L_initial_num_blocks_is_1\@ -_initial_num_blocks_is_7\@: +.L_initial_num_blocks_is_7\@: \INITIAL_BLOCKS \REP, 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC sub $16*7, %r13 - jmp _initial_blocks_encrypted\@ + jmp .L_initial_blocks_encrypted\@ -_initial_num_blocks_is_6\@: +.L_initial_num_blocks_is_6\@: \INITIAL_BLOCKS \REP, 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC sub $16*6, %r13 - jmp _initial_blocks_encrypted\@ + jmp .L_initial_blocks_encrypted\@ -_initial_num_blocks_is_5\@: +.L_initial_num_blocks_is_5\@: \INITIAL_BLOCKS \REP, 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC sub $16*5, %r13 - jmp _initial_blocks_encrypted\@ + jmp .L_initial_blocks_encrypted\@ -_initial_num_blocks_is_4\@: +.L_initial_num_blocks_is_4\@: \INITIAL_BLOCKS \REP, 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC sub $16*4, %r13 - jmp _initial_blocks_encrypted\@ + jmp .L_initial_blocks_encrypted\@ -_initial_num_blocks_is_3\@: +.L_initial_num_blocks_is_3\@: \INITIAL_BLOCKS \REP, 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC sub $16*3, %r13 - jmp _initial_blocks_encrypted\@ + jmp .L_initial_blocks_encrypted\@ -_initial_num_blocks_is_2\@: +.L_initial_num_blocks_is_2\@: \INITIAL_BLOCKS \REP, 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC sub $16*2, %r13 - jmp _initial_blocks_encrypted\@ + jmp .L_initial_blocks_encrypted\@ -_initial_num_blocks_is_1\@: +.L_initial_num_blocks_is_1\@: \INITIAL_BLOCKS \REP, 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC sub $16*1, %r13 - jmp _initial_blocks_encrypted\@ + jmp .L_initial_blocks_encrypted\@ -_initial_num_blocks_is_0\@: +.L_initial_num_blocks_is_0\@: \INITIAL_BLOCKS \REP, 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC -_initial_blocks_encrypted\@: +.L_initial_blocks_encrypted\@: test %r13, %r13 - je _zero_cipher_left\@ + je .L_zero_cipher_left\@ sub $128, %r13 - je _eight_cipher_left\@ + je .L_eight_cipher_left\@ @@ -373,9 +349,9 @@ _initial_blocks_encrypted\@: vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 -_encrypt_by_8_new\@: +.L_encrypt_by_8_new\@: cmp $(255-8), %r15d - jg _encrypt_by_8\@ + jg .L_encrypt_by_8\@ @@ -383,30 +359,30 @@ _encrypt_by_8_new\@: \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC add $128, %r11 sub $128, %r13 - jne _encrypt_by_8_new\@ + jne .L_encrypt_by_8_new\@ vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 - jmp _eight_cipher_left\@ + jmp .L_eight_cipher_left\@ -_encrypt_by_8\@: +.L_encrypt_by_8\@: vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 add $8, %r15b \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 add $128, %r11 sub $128, %r13 - jne _encrypt_by_8_new\@ + jne .L_encrypt_by_8_new\@ vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 -_eight_cipher_left\@: +.L_eight_cipher_left\@: \GHASH_LAST_8 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 -_zero_cipher_left\@: +.L_zero_cipher_left\@: vmovdqu %xmm14, AadHash(arg2) vmovdqu %xmm9, CurCount(arg2) @@ -414,7 +390,7 @@ _zero_cipher_left\@: mov arg5, %r13 and $15, %r13 # r13 = (arg5 mod 16) - je _multiple_of_16_bytes\@ + je .L_multiple_of_16_bytes\@ # handle the last <16 Byte block separately @@ -428,7 +404,7 @@ _zero_cipher_left\@: vmovdqu %xmm9, PBlockEncKey(arg2) cmp $16, arg5 - jge _large_enough_update\@ + jge .L_large_enough_update\@ lea (arg4,%r11,1), %r10 mov %r13, %r12 @@ -440,9 +416,9 @@ _zero_cipher_left\@: # able to shift 16-r13 bytes (r13 is the # number of bytes in plaintext mod 16) - jmp _final_ghash_mul\@ + jmp .L_final_ghash_mul\@ -_large_enough_update\@: +.L_large_enough_update\@: sub $16, %r11 add %r13, %r11 @@ -461,7 +437,7 @@ _large_enough_update\@: # shift right 16-r13 bytes vpshufb %xmm2, %xmm1, %xmm1 -_final_ghash_mul\@: +.L_final_ghash_mul\@: .if \ENC_DEC == DEC vmovdqa %xmm1, %xmm2 vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) @@ -490,7 +466,7 @@ _final_ghash_mul\@: # output r13 Bytes vmovq %xmm9, %rax cmp $8, %r13 - jle _less_than_8_bytes_left\@ + jle .L_less_than_8_bytes_left\@ mov %rax, (arg3 , %r11) add $8, %r11 @@ -498,15 +474,15 @@ _final_ghash_mul\@: vmovq %xmm9, %rax sub $8, %r13 -_less_than_8_bytes_left\@: +.L_less_than_8_bytes_left\@: movb %al, (arg3 , %r11) add $1, %r11 shr $8, %rax sub $1, %r13 - jne _less_than_8_bytes_left\@ + jne .L_less_than_8_bytes_left\@ ############################# -_multiple_of_16_bytes\@: +.L_multiple_of_16_bytes\@: .endm @@ -519,12 +495,12 @@ _multiple_of_16_bytes\@: mov PBlockLen(arg2), %r12 test %r12, %r12 - je _partial_done\@ + je .L_partial_done\@ #GHASH computation for the last <16 Byte block \GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 -_partial_done\@: +.L_partial_done\@: mov AadLen(arg2), %r12 # r12 = aadLen (number of bytes) shl $3, %r12 # convert into number of bits vmovd %r12d, %xmm15 # len(A) in xmm15 @@ -547,49 +523,49 @@ _partial_done\@: -_return_T\@: +.L_return_T\@: mov \AUTH_TAG, %r10 # r10 = authTag mov \AUTH_TAG_LEN, %r11 # r11 = auth_tag_len cmp $16, %r11 - je _T_16\@ + je .L_T_16\@ cmp $8, %r11 - jl _T_4\@ + jl .L_T_4\@ -_T_8\@: +.L_T_8\@: vmovq %xmm9, %rax mov %rax, (%r10) add $8, %r10 sub $8, %r11 vpsrldq $8, %xmm9, %xmm9 test %r11, %r11 - je _return_T_done\@ -_T_4\@: + je .L_return_T_done\@ +.L_T_4\@: vmovd %xmm9, %eax mov %eax, (%r10) add $4, %r10 sub $4, %r11 vpsrldq $4, %xmm9, %xmm9 test %r11, %r11 - je _return_T_done\@ -_T_123\@: + je .L_return_T_done\@ +.L_T_123\@: vmovd %xmm9, %eax cmp $2, %r11 - jl _T_1\@ + jl .L_T_1\@ mov %ax, (%r10) cmp $2, %r11 - je _return_T_done\@ + je .L_return_T_done\@ add $2, %r10 sar $16, %eax -_T_1\@: +.L_T_1\@: mov %al, (%r10) - jmp _return_T_done\@ + jmp .L_return_T_done\@ -_T_16\@: +.L_T_16\@: vmovdqu %xmm9, (%r10) -_return_T_done\@: +.L_return_T_done\@: .endm .macro CALC_AAD_HASH GHASH_MUL AAD AADLEN T1 T2 T3 T4 T5 T6 T7 T8 @@ -603,8 +579,8 @@ _return_T_done\@: vpxor \T8, \T8, \T8 vpxor \T7, \T7, \T7 cmp $16, %r11 - jl _get_AAD_rest8\@ -_get_AAD_blocks\@: + jl .L_get_AAD_rest8\@ +.L_get_AAD_blocks\@: vmovdqu (%r10), \T7 vpshufb SHUF_MASK(%rip), \T7, \T7 vpxor \T7, \T8, \T8 @@ -613,29 +589,29 @@ _get_AAD_blocks\@: sub $16, %r12 sub $16, %r11 cmp $16, %r11 - jge _get_AAD_blocks\@ + jge .L_get_AAD_blocks\@ vmovdqu \T8, \T7 test %r11, %r11 - je _get_AAD_done\@ + je .L_get_AAD_done\@ vpxor \T7, \T7, \T7 /* read the last <16B of AAD. since we have at least 4B of data right after the AAD (the ICV, and maybe some CT), we can read 4B/8B blocks safely, and then get rid of the extra stuff */ -_get_AAD_rest8\@: +.L_get_AAD_rest8\@: cmp $4, %r11 - jle _get_AAD_rest4\@ + jle .L_get_AAD_rest4\@ movq (%r10), \T1 add $8, %r10 sub $8, %r11 vpslldq $8, \T1, \T1 vpsrldq $8, \T7, \T7 vpxor \T1, \T7, \T7 - jmp _get_AAD_rest8\@ -_get_AAD_rest4\@: + jmp .L_get_AAD_rest8\@ +.L_get_AAD_rest4\@: test %r11, %r11 - jle _get_AAD_rest0\@ + jle .L_get_AAD_rest0\@ mov (%r10), %eax movq %rax, \T1 add $4, %r10 @@ -643,20 +619,22 @@ _get_AAD_rest4\@: vpslldq $12, \T1, \T1 vpsrldq $4, \T7, \T7 vpxor \T1, \T7, \T7 -_get_AAD_rest0\@: +.L_get_AAD_rest0\@: /* finalize: shift out the extra bytes we read, and align left. since pslldq can only shift by an immediate, we use - vpshufb and an array of shuffle masks */ - movq %r12, %r11 - salq $4, %r11 - vmovdqu aad_shift_arr(%r11), \T1 - vpshufb \T1, \T7, \T7 -_get_AAD_rest_final\@: + vpshufb and a pair of shuffle masks */ + leaq ALL_F(%rip), %r11 + subq %r12, %r11 + vmovdqu 16(%r11), \T1 + andq $~3, %r11 + vpshufb (%r11), \T7, \T7 + vpand \T1, \T7, \T7 +.L_get_AAD_rest_final\@: vpshufb SHUF_MASK(%rip), \T7, \T7 vpxor \T8, \T7, \T7 \GHASH_MUL \T7, \T2, \T1, \T3, \T4, \T5, \T6 -_get_AAD_done\@: +.L_get_AAD_done\@: vmovdqu \T7, AadHash(arg2) .endm @@ -707,28 +685,28 @@ _get_AAD_done\@: vpxor \XMMDst, \XMMDst, \XMMDst cmp $8, \DLEN - jl _read_lt8_\@ + jl .L_read_lt8_\@ mov (\DPTR), %rax vpinsrq $0, %rax, \XMMDst, \XMMDst sub $8, \DLEN - jz _done_read_partial_block_\@ + jz .L_done_read_partial_block_\@ xor %eax, %eax -_read_next_byte_\@: +.L_read_next_byte_\@: shl $8, %rax mov 7(\DPTR, \DLEN, 1), %al dec \DLEN - jnz _read_next_byte_\@ + jnz .L_read_next_byte_\@ vpinsrq $1, %rax, \XMMDst, \XMMDst - jmp _done_read_partial_block_\@ -_read_lt8_\@: + jmp .L_done_read_partial_block_\@ +.L_read_lt8_\@: xor %eax, %eax -_read_next_byte_lt8_\@: +.L_read_next_byte_lt8_\@: shl $8, %rax mov -1(\DPTR, \DLEN, 1), %al dec \DLEN - jnz _read_next_byte_lt8_\@ + jnz .L_read_next_byte_lt8_\@ vpinsrq $0, %rax, \XMMDst, \XMMDst -_done_read_partial_block_\@: +.L_done_read_partial_block_\@: .endm # PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks @@ -740,21 +718,21 @@ _done_read_partial_block_\@: AAD_HASH ENC_DEC mov PBlockLen(arg2), %r13 test %r13, %r13 - je _partial_block_done_\@ # Leave Macro if no partial blocks + je .L_partial_block_done_\@ # Leave Macro if no partial blocks # Read in input data without over reading cmp $16, \PLAIN_CYPH_LEN - jl _fewer_than_16_bytes_\@ + jl .L_fewer_than_16_bytes_\@ vmovdqu (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm - jmp _data_read_\@ + jmp .L_data_read_\@ -_fewer_than_16_bytes_\@: +.L_fewer_than_16_bytes_\@: lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 mov \PLAIN_CYPH_LEN, %r12 READ_PARTIAL_BLOCK %r10 %r12 %xmm1 mov PBlockLen(arg2), %r13 -_data_read_\@: # Finished reading in data +.L_data_read_\@: # Finished reading in data vmovdqu PBlockEncKey(arg2), %xmm9 vmovdqu HashKey(arg2), %xmm13 @@ -777,9 +755,9 @@ _data_read_\@: # Finished reading in data sub $16, %r10 # Determine if if partial block is not being filled and # shift mask accordingly - jge _no_extra_mask_1_\@ + jge .L_no_extra_mask_1_\@ sub %r10, %r12 -_no_extra_mask_1_\@: +.L_no_extra_mask_1_\@: vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out bottom r13 bytes of xmm9 @@ -792,17 +770,17 @@ _no_extra_mask_1_\@: vpxor %xmm3, \AAD_HASH, \AAD_HASH test %r10, %r10 - jl _partial_incomplete_1_\@ + jl .L_partial_incomplete_1_\@ # GHASH computation for the last <16 Byte block \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 xor %eax,%eax mov %rax, PBlockLen(arg2) - jmp _dec_done_\@ -_partial_incomplete_1_\@: + jmp .L_dec_done_\@ +.L_partial_incomplete_1_\@: add \PLAIN_CYPH_LEN, PBlockLen(arg2) -_dec_done_\@: +.L_dec_done_\@: vmovdqu \AAD_HASH, AadHash(arg2) .else vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) @@ -813,9 +791,9 @@ _dec_done_\@: sub $16, %r10 # Determine if if partial block is not being filled and # shift mask accordingly - jge _no_extra_mask_2_\@ + jge .L_no_extra_mask_2_\@ sub %r10, %r12 -_no_extra_mask_2_\@: +.L_no_extra_mask_2_\@: vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out bottom r13 bytes of xmm9 @@ -827,17 +805,17 @@ _no_extra_mask_2_\@: vpxor %xmm9, \AAD_HASH, \AAD_HASH test %r10, %r10 - jl _partial_incomplete_2_\@ + jl .L_partial_incomplete_2_\@ # GHASH computation for the last <16 Byte block \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 xor %eax,%eax mov %rax, PBlockLen(arg2) - jmp _encode_done_\@ -_partial_incomplete_2_\@: + jmp .L_encode_done_\@ +.L_partial_incomplete_2_\@: add \PLAIN_CYPH_LEN, PBlockLen(arg2) -_encode_done_\@: +.L_encode_done_\@: vmovdqu \AAD_HASH, AadHash(arg2) vmovdqa SHUF_MASK(%rip), %xmm10 @@ -847,32 +825,32 @@ _encode_done_\@: .endif # output encrypted Bytes test %r10, %r10 - jl _partial_fill_\@ + jl .L_partial_fill_\@ mov %r13, %r12 mov $16, %r13 # Set r13 to be the number of bytes to write out sub %r12, %r13 - jmp _count_set_\@ -_partial_fill_\@: + jmp .L_count_set_\@ +.L_partial_fill_\@: mov \PLAIN_CYPH_LEN, %r13 -_count_set_\@: +.L_count_set_\@: vmovdqa %xmm9, %xmm0 vmovq %xmm0, %rax cmp $8, %r13 - jle _less_than_8_bytes_left_\@ + jle .L_less_than_8_bytes_left_\@ mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) add $8, \DATA_OFFSET psrldq $8, %xmm0 vmovq %xmm0, %rax sub $8, %r13 -_less_than_8_bytes_left_\@: +.L_less_than_8_bytes_left_\@: movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) add $1, \DATA_OFFSET shr $8, %rax sub $1, %r13 - jne _less_than_8_bytes_left_\@ -_partial_block_done_\@: + jne .L_less_than_8_bytes_left_\@ +.L_partial_block_done_\@: .endm # PARTIAL_BLOCK ############################################################################### @@ -1073,7 +1051,7 @@ _partial_block_done_\@: vmovdqa \XMM8, \T3 cmp $128, %r13 - jl _initial_blocks_done\@ # no need for precomputed constants + jl .L_initial_blocks_done\@ # no need for precomputed constants ############################################################################### # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i @@ -1215,7 +1193,7 @@ _partial_block_done_\@: ############################################################################### -_initial_blocks_done\@: +.L_initial_blocks_done\@: .endm @@ -2023,7 +2001,7 @@ SYM_FUNC_END(aesni_gcm_finalize_avx_gen2) vmovdqa \XMM8, \T3 cmp $128, %r13 - jl _initial_blocks_done\@ # no need for precomputed constants + jl .L_initial_blocks_done\@ # no need for precomputed constants ############################################################################### # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i @@ -2167,7 +2145,7 @@ SYM_FUNC_END(aesni_gcm_finalize_avx_gen2) ############################################################################### -_initial_blocks_done\@: +.L_initial_blocks_done\@: .endm diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S index 9243f6289d34..7c1abc513f34 100644 --- a/arch/x86/crypto/aria-aesni-avx-asm_64.S +++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S @@ -80,7 +80,7 @@ transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \ \ - vmovdqu .Lshufb_16x16b, a0; \ + vmovdqu .Lshufb_16x16b(%rip), a0; \ vmovdqu st1, a1; \ vpshufb a0, a2, a2; \ vpshufb a0, a3, a3; \ @@ -132,7 +132,7 @@ transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \ \ - vmovdqu .Lshufb_16x16b, a0; \ + vmovdqu .Lshufb_16x16b(%rip), a0; \ vmovdqu st1, a1; \ vpshufb a0, a2, a2; \ vpshufb a0, a3, a3; \ @@ -300,11 +300,11 @@ x4, x5, x6, x7, \ t0, t1, t2, t3, \ t4, t5, t6, t7) \ - vmovdqa .Ltf_s2_bitmatrix, t0; \ - vmovdqa .Ltf_inv_bitmatrix, t1; \ - vmovdqa .Ltf_id_bitmatrix, t2; \ - vmovdqa .Ltf_aff_bitmatrix, t3; \ - vmovdqa .Ltf_x2_bitmatrix, t4; \ + vmovdqa .Ltf_s2_bitmatrix(%rip), t0; \ + vmovdqa .Ltf_inv_bitmatrix(%rip), t1; \ + vmovdqa .Ltf_id_bitmatrix(%rip), t2; \ + vmovdqa .Ltf_aff_bitmatrix(%rip), t3; \ + vmovdqa .Ltf_x2_bitmatrix(%rip), t4; \ vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ @@ -324,13 +324,13 @@ x4, x5, x6, x7, \ t0, t1, t2, t3, \ t4, t5, t6, t7) \ - vmovdqa .Linv_shift_row, t0; \ - vmovdqa .Lshift_row, t1; \ - vbroadcastss .L0f0f0f0f, t6; \ - vmovdqa .Ltf_lo__inv_aff__and__s2, t2; \ - vmovdqa .Ltf_hi__inv_aff__and__s2, t3; \ - vmovdqa .Ltf_lo__x2__and__fwd_aff, t4; \ - vmovdqa .Ltf_hi__x2__and__fwd_aff, t5; \ + vmovdqa .Linv_shift_row(%rip), t0; \ + vmovdqa .Lshift_row(%rip), t1; \ + vbroadcastss .L0f0f0f0f(%rip), t6; \ + vmovdqa .Ltf_lo__inv_aff__and__s2(%rip), t2; \ + vmovdqa .Ltf_hi__inv_aff__and__s2(%rip), t3; \ + vmovdqa .Ltf_lo__x2__and__fwd_aff(%rip), t4; \ + vmovdqa .Ltf_hi__x2__and__fwd_aff(%rip), t5; \ \ vaesenclast t7, x0, x0; \ vaesenclast t7, x4, x4; \ diff --git a/arch/x86/crypto/aria-aesni-avx2-asm_64.S b/arch/x86/crypto/aria-aesni-avx2-asm_64.S index 82a14b4ad920..c60fa2980630 100644 --- a/arch/x86/crypto/aria-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/aria-aesni-avx2-asm_64.S @@ -96,7 +96,7 @@ transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \ \ - vbroadcasti128 .Lshufb_16x16b, a0; \ + vbroadcasti128 .Lshufb_16x16b(%rip), a0; \ vmovdqu st1, a1; \ vpshufb a0, a2, a2; \ vpshufb a0, a3, a3; \ @@ -148,7 +148,7 @@ transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \ \ - vbroadcasti128 .Lshufb_16x16b, a0; \ + vbroadcasti128 .Lshufb_16x16b(%rip), a0; \ vmovdqu st1, a1; \ vpshufb a0, a2, a2; \ vpshufb a0, a3, a3; \ @@ -307,11 +307,11 @@ x4, x5, x6, x7, \ t0, t1, t2, t3, \ t4, t5, t6, t7) \ - vpbroadcastq .Ltf_s2_bitmatrix, t0; \ - vpbroadcastq .Ltf_inv_bitmatrix, t1; \ - vpbroadcastq .Ltf_id_bitmatrix, t2; \ - vpbroadcastq .Ltf_aff_bitmatrix, t3; \ - vpbroadcastq .Ltf_x2_bitmatrix, t4; \ + vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0; \ + vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1; \ + vpbroadcastq .Ltf_id_bitmatrix(%rip), t2; \ + vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3; \ + vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4; \ vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ @@ -332,12 +332,12 @@ t4, t5, t6, t7) \ vpxor t7, t7, t7; \ vpxor t6, t6, t6; \ - vbroadcasti128 .Linv_shift_row, t0; \ - vbroadcasti128 .Lshift_row, t1; \ - vbroadcasti128 .Ltf_lo__inv_aff__and__s2, t2; \ - vbroadcasti128 .Ltf_hi__inv_aff__and__s2, t3; \ - vbroadcasti128 .Ltf_lo__x2__and__fwd_aff, t4; \ - vbroadcasti128 .Ltf_hi__x2__and__fwd_aff, t5; \ + vbroadcasti128 .Linv_shift_row(%rip), t0; \ + vbroadcasti128 .Lshift_row(%rip), t1; \ + vbroadcasti128 .Ltf_lo__inv_aff__and__s2(%rip), t2; \ + vbroadcasti128 .Ltf_hi__inv_aff__and__s2(%rip), t3; \ + vbroadcasti128 .Ltf_lo__x2__and__fwd_aff(%rip), t4; \ + vbroadcasti128 .Ltf_hi__x2__and__fwd_aff(%rip), t5; \ \ vextracti128 $1, x0, t6##_x; \ vaesenclast t7##_x, x0##_x, x0##_x; \ @@ -369,7 +369,7 @@ vaesdeclast t7##_x, t6##_x, t6##_x; \ vinserti128 $1, t6##_x, x6, x6; \ \ - vpbroadcastd .L0f0f0f0f, t6; \ + vpbroadcastd .L0f0f0f0f(%rip), t6; \ \ /* AES inverse shift rows */ \ vpshufb t0, x0, x0; \ diff --git a/arch/x86/crypto/aria-gfni-avx512-asm_64.S b/arch/x86/crypto/aria-gfni-avx512-asm_64.S index 3193f0701450..860887e5d02e 100644 --- a/arch/x86/crypto/aria-gfni-avx512-asm_64.S +++ b/arch/x86/crypto/aria-gfni-avx512-asm_64.S @@ -80,7 +80,7 @@ transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \ \ - vbroadcasti64x2 .Lshufb_16x16b, a0; \ + vbroadcasti64x2 .Lshufb_16x16b(%rip), a0; \ vmovdqu64 st1, a1; \ vpshufb a0, a2, a2; \ vpshufb a0, a3, a3; \ @@ -132,7 +132,7 @@ transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \ \ - vbroadcasti64x2 .Lshufb_16x16b, a0; \ + vbroadcasti64x2 .Lshufb_16x16b(%rip), a0; \ vmovdqu64 st1, a1; \ vpshufb a0, a2, a2; \ vpshufb a0, a3, a3; \ @@ -308,11 +308,11 @@ x4, x5, x6, x7, \ t0, t1, t2, t3, \ t4, t5, t6, t7) \ - vpbroadcastq .Ltf_s2_bitmatrix, t0; \ - vpbroadcastq .Ltf_inv_bitmatrix, t1; \ - vpbroadcastq .Ltf_id_bitmatrix, t2; \ - vpbroadcastq .Ltf_aff_bitmatrix, t3; \ - vpbroadcastq .Ltf_x2_bitmatrix, t4; \ + vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0; \ + vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1; \ + vpbroadcastq .Ltf_id_bitmatrix(%rip), t2; \ + vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3; \ + vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4; \ vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ @@ -332,11 +332,11 @@ y4, y5, y6, y7, \ t0, t1, t2, t3, \ t4, t5, t6, t7) \ - vpbroadcastq .Ltf_s2_bitmatrix, t0; \ - vpbroadcastq .Ltf_inv_bitmatrix, t1; \ - vpbroadcastq .Ltf_id_bitmatrix, t2; \ - vpbroadcastq .Ltf_aff_bitmatrix, t3; \ - vpbroadcastq .Ltf_x2_bitmatrix, t4; \ + vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0; \ + vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1; \ + vpbroadcastq .Ltf_id_bitmatrix(%rip), t2; \ + vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3; \ + vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4; \ vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c index aaba21230528..0313f9673f56 100644 --- a/arch/x86/crypto/blake2s-glue.c +++ b/arch/x86/crypto/blake2s-glue.c @@ -8,7 +8,6 @@ #include <linux/types.h> #include <linux/jump_label.h> #include <linux/kernel.h> -#include <linux/module.h> #include <linux/sizes.h> #include <asm/cpufeature.h> @@ -72,6 +71,4 @@ static int __init blake2s_mod_init(void) return 0; } -module_init(blake2s_mod_init); - -MODULE_LICENSE("GPL v2"); +subsys_initcall(blake2s_mod_init); diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index 4a30618281ec..646477a13e11 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -52,10 +52,10 @@ /* \ * S-function with AES subbytes \ */ \ - vmovdqa .Linv_shift_row, t4; \ - vbroadcastss .L0f0f0f0f, t7; \ - vmovdqa .Lpre_tf_lo_s1, t0; \ - vmovdqa .Lpre_tf_hi_s1, t1; \ + vmovdqa .Linv_shift_row(%rip), t4; \ + vbroadcastss .L0f0f0f0f(%rip), t7; \ + vmovdqa .Lpre_tf_lo_s1(%rip), t0; \ + vmovdqa .Lpre_tf_hi_s1(%rip), t1; \ \ /* AES inverse shift rows */ \ vpshufb t4, x0, x0; \ @@ -68,8 +68,8 @@ vpshufb t4, x6, x6; \ \ /* prefilter sboxes 1, 2 and 3 */ \ - vmovdqa .Lpre_tf_lo_s4, t2; \ - vmovdqa .Lpre_tf_hi_s4, t3; \ + vmovdqa .Lpre_tf_lo_s4(%rip), t2; \ + vmovdqa .Lpre_tf_hi_s4(%rip), t3; \ filter_8bit(x0, t0, t1, t7, t6); \ filter_8bit(x7, t0, t1, t7, t6); \ filter_8bit(x1, t0, t1, t7, t6); \ @@ -83,8 +83,8 @@ filter_8bit(x6, t2, t3, t7, t6); \ \ /* AES subbytes + AES shift rows */ \ - vmovdqa .Lpost_tf_lo_s1, t0; \ - vmovdqa .Lpost_tf_hi_s1, t1; \ + vmovdqa .Lpost_tf_lo_s1(%rip), t0; \ + vmovdqa .Lpost_tf_hi_s1(%rip), t1; \ vaesenclast t4, x0, x0; \ vaesenclast t4, x7, x7; \ vaesenclast t4, x1, x1; \ @@ -95,16 +95,16 @@ vaesenclast t4, x6, x6; \ \ /* postfilter sboxes 1 and 4 */ \ - vmovdqa .Lpost_tf_lo_s3, t2; \ - vmovdqa .Lpost_tf_hi_s3, t3; \ + vmovdqa .Lpost_tf_lo_s3(%rip), t2; \ + vmovdqa .Lpost_tf_hi_s3(%rip), t3; \ filter_8bit(x0, t0, t1, t7, t6); \ filter_8bit(x7, t0, t1, t7, t6); \ filter_8bit(x3, t0, t1, t7, t6); \ filter_8bit(x6, t0, t1, t7, t6); \ \ /* postfilter sbox 3 */ \ - vmovdqa .Lpost_tf_lo_s2, t4; \ - vmovdqa .Lpost_tf_hi_s2, t5; \ + vmovdqa .Lpost_tf_lo_s2(%rip), t4; \ + vmovdqa .Lpost_tf_hi_s2(%rip), t5; \ filter_8bit(x2, t2, t3, t7, t6); \ filter_8bit(x5, t2, t3, t7, t6); \ \ @@ -443,7 +443,7 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \ \ - vmovdqu .Lshufb_16x16b, a0; \ + vmovdqu .Lshufb_16x16b(%rip), a0; \ vmovdqu st1, a1; \ vpshufb a0, a2, a2; \ vpshufb a0, a3, a3; \ @@ -482,7 +482,7 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) #define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ y6, y7, rio, key) \ vmovq key, x0; \ - vpshufb .Lpack_bswap, x0, x0; \ + vpshufb .Lpack_bswap(%rip), x0, x0; \ \ vpxor 0 * 16(rio), x0, y7; \ vpxor 1 * 16(rio), x0, y6; \ @@ -533,7 +533,7 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) vmovdqu x0, stack_tmp0; \ \ vmovq key, x0; \ - vpshufb .Lpack_bswap, x0, x0; \ + vpshufb .Lpack_bswap(%rip), x0, x0; \ \ vpxor x0, y7, y7; \ vpxor x0, y6, y6; \ diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index deaf62aa73a6..a0eb94e53b1b 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -64,12 +64,12 @@ /* \ * S-function with AES subbytes \ */ \ - vbroadcasti128 .Linv_shift_row, t4; \ - vpbroadcastd .L0f0f0f0f, t7; \ - vbroadcasti128 .Lpre_tf_lo_s1, t5; \ - vbroadcasti128 .Lpre_tf_hi_s1, t6; \ - vbroadcasti128 .Lpre_tf_lo_s4, t2; \ - vbroadcasti128 .Lpre_tf_hi_s4, t3; \ + vbroadcasti128 .Linv_shift_row(%rip), t4; \ + vpbroadcastd .L0f0f0f0f(%rip), t7; \ + vbroadcasti128 .Lpre_tf_lo_s1(%rip), t5; \ + vbroadcasti128 .Lpre_tf_hi_s1(%rip), t6; \ + vbroadcasti128 .Lpre_tf_lo_s4(%rip), t2; \ + vbroadcasti128 .Lpre_tf_hi_s4(%rip), t3; \ \ /* AES inverse shift rows */ \ vpshufb t4, x0, x0; \ @@ -115,8 +115,8 @@ vinserti128 $1, t2##_x, x6, x6; \ vextracti128 $1, x1, t3##_x; \ vextracti128 $1, x4, t2##_x; \ - vbroadcasti128 .Lpost_tf_lo_s1, t0; \ - vbroadcasti128 .Lpost_tf_hi_s1, t1; \ + vbroadcasti128 .Lpost_tf_lo_s1(%rip), t0; \ + vbroadcasti128 .Lpost_tf_hi_s1(%rip), t1; \ vaesenclast t4##_x, x2##_x, x2##_x; \ vaesenclast t4##_x, t6##_x, t6##_x; \ vinserti128 $1, t6##_x, x2, x2; \ @@ -131,16 +131,16 @@ vinserti128 $1, t2##_x, x4, x4; \ \ /* postfilter sboxes 1 and 4 */ \ - vbroadcasti128 .Lpost_tf_lo_s3, t2; \ - vbroadcasti128 .Lpost_tf_hi_s3, t3; \ + vbroadcasti128 .Lpost_tf_lo_s3(%rip), t2; \ + vbroadcasti128 .Lpost_tf_hi_s3(%rip), t3; \ filter_8bit(x0, t0, t1, t7, t6); \ filter_8bit(x7, t0, t1, t7, t6); \ filter_8bit(x3, t0, t1, t7, t6); \ filter_8bit(x6, t0, t1, t7, t6); \ \ /* postfilter sbox 3 */ \ - vbroadcasti128 .Lpost_tf_lo_s2, t4; \ - vbroadcasti128 .Lpost_tf_hi_s2, t5; \ + vbroadcasti128 .Lpost_tf_lo_s2(%rip), t4; \ + vbroadcasti128 .Lpost_tf_hi_s2(%rip), t5; \ filter_8bit(x2, t2, t3, t7, t6); \ filter_8bit(x5, t2, t3, t7, t6); \ \ @@ -475,7 +475,7 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \ \ - vbroadcasti128 .Lshufb_16x16b, a0; \ + vbroadcasti128 .Lshufb_16x16b(%rip), a0; \ vmovdqu st1, a1; \ vpshufb a0, a2, a2; \ vpshufb a0, a3, a3; \ @@ -514,7 +514,7 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) #define inpack32_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ y6, y7, rio, key) \ vpbroadcastq key, x0; \ - vpshufb .Lpack_bswap, x0, x0; \ + vpshufb .Lpack_bswap(%rip), x0, x0; \ \ vpxor 0 * 32(rio), x0, y7; \ vpxor 1 * 32(rio), x0, y6; \ @@ -565,7 +565,7 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) vmovdqu x0, stack_tmp0; \ \ vpbroadcastq key, x0; \ - vpshufb .Lpack_bswap, x0, x0; \ + vpshufb .Lpack_bswap(%rip), x0, x0; \ \ vpxor x0, y7, y7; \ vpxor x0, y6, y6; \ diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S index 347c059f5940..816b6bb8bded 100644 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S @@ -77,11 +77,13 @@ #define RXORbl %r9b #define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \ + leaq T0(%rip), tmp1; \ movzbl ab ## bl, tmp2 ## d; \ + xorq (tmp1, tmp2, 8), dst; \ + leaq T1(%rip), tmp2; \ movzbl ab ## bh, tmp1 ## d; \ rorq $16, ab; \ - xorq T0(, tmp2, 8), dst; \ - xorq T1(, tmp1, 8), dst; + xorq (tmp2, tmp1, 8), dst; /********************************************************************** 1-way camellia diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index 0326a01503c3..b4e460a87f18 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -84,15 +84,19 @@ #define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \ movzbl src ## bh, RID1d; \ + leaq s1(%rip), RID2; \ + movl (RID2,RID1,4), dst ## d; \ movzbl src ## bl, RID2d; \ + leaq s2(%rip), RID1; \ + op1 (RID1,RID2,4), dst ## d; \ shrq $16, src; \ - movl s1(, RID1, 4), dst ## d; \ - op1 s2(, RID2, 4), dst ## d; \ movzbl src ## bh, RID1d; \ + leaq s3(%rip), RID2; \ + op2 (RID2,RID1,4), dst ## d; \ movzbl src ## bl, RID2d; \ interleave_op(il_reg); \ - op2 s3(, RID1, 4), dst ## d; \ - op3 s4(, RID2, 4), dst ## d; + leaq s4(%rip), RID1; \ + op3 (RID1,RID2,4), dst ## d; #define dummy(d) /* do nothing */ @@ -151,15 +155,15 @@ subround(l ## 3, r ## 3, l ## 4, r ## 4, f); #define enc_preload_rkr() \ - vbroadcastss .L16_mask, RKR; \ + vbroadcastss .L16_mask(%rip), RKR; \ /* add 16-bit rotation to key rotations (mod 32) */ \ vpxor kr(CTX), RKR, RKR; #define dec_preload_rkr() \ - vbroadcastss .L16_mask, RKR; \ + vbroadcastss .L16_mask(%rip), RKR; \ /* add 16-bit rotation to key rotations (mod 32) */ \ vpxor kr(CTX), RKR, RKR; \ - vpshufb .Lbswap128_mask, RKR, RKR; + vpshufb .Lbswap128_mask(%rip), RKR, RKR; #define transpose_2x4(x0, x1, t0, t1) \ vpunpckldq x1, x0, t0; \ @@ -235,9 +239,9 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16) movq %rdi, CTX; - vmovdqa .Lbswap_mask, RKM; - vmovd .Lfirst_mask, R1ST; - vmovd .L32_mask, R32; + vmovdqa .Lbswap_mask(%rip), RKM; + vmovd .Lfirst_mask(%rip), R1ST; + vmovd .L32_mask(%rip), R32; enc_preload_rkr(); inpack_blocks(RL1, RR1, RTMP, RX, RKM); @@ -271,7 +275,7 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16) popq %rbx; popq %r15; - vmovdqa .Lbswap_mask, RKM; + vmovdqa .Lbswap_mask(%rip), RKM; outunpack_blocks(RR1, RL1, RTMP, RX, RKM); outunpack_blocks(RR2, RL2, RTMP, RX, RKM); @@ -308,9 +312,9 @@ SYM_FUNC_START_LOCAL(__cast5_dec_blk16) movq %rdi, CTX; - vmovdqa .Lbswap_mask, RKM; - vmovd .Lfirst_mask, R1ST; - vmovd .L32_mask, R32; + vmovdqa .Lbswap_mask(%rip), RKM; + vmovd .Lfirst_mask(%rip), R1ST; + vmovd .L32_mask(%rip), R32; dec_preload_rkr(); inpack_blocks(RL1, RR1, RTMP, RX, RKM); @@ -341,7 +345,7 @@ SYM_FUNC_START_LOCAL(__cast5_dec_blk16) round(RL, RR, 1, 2); round(RR, RL, 0, 1); - vmovdqa .Lbswap_mask, RKM; + vmovdqa .Lbswap_mask(%rip), RKM; popq %rbx; popq %r15; @@ -504,8 +508,8 @@ SYM_FUNC_START(cast5_ctr_16way) vpcmpeqd RKR, RKR, RKR; vpaddq RKR, RKR, RKR; /* low: -2, high: -2 */ - vmovdqa .Lbswap_iv_mask, R1ST; - vmovdqa .Lbswap128_mask, RKM; + vmovdqa .Lbswap_iv_mask(%rip), R1ST; + vmovdqa .Lbswap128_mask(%rip), RKM; /* load IV and byteswap */ vmovq (%rcx), RX; diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 82b716fd5dba..9e86d460b409 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -84,15 +84,19 @@ #define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \ movzbl src ## bh, RID1d; \ + leaq s1(%rip), RID2; \ + movl (RID2,RID1,4), dst ## d; \ movzbl src ## bl, RID2d; \ + leaq s2(%rip), RID1; \ + op1 (RID1,RID2,4), dst ## d; \ shrq $16, src; \ - movl s1(, RID1, 4), dst ## d; \ - op1 s2(, RID2, 4), dst ## d; \ movzbl src ## bh, RID1d; \ + leaq s3(%rip), RID2; \ + op2 (RID2,RID1,4), dst ## d; \ movzbl src ## bl, RID2d; \ interleave_op(il_reg); \ - op2 s3(, RID1, 4), dst ## d; \ - op3 s4(, RID2, 4), dst ## d; + leaq s4(%rip), RID1; \ + op3 (RID1,RID2,4), dst ## d; #define dummy(d) /* do nothing */ @@ -175,10 +179,10 @@ qop(RD, RC, 1); #define shuffle(mask) \ - vpshufb mask, RKR, RKR; + vpshufb mask(%rip), RKR, RKR; #define preload_rkr(n, do_mask, mask) \ - vbroadcastss .L16_mask, RKR; \ + vbroadcastss .L16_mask(%rip), RKR; \ /* add 16-bit rotation to key rotations (mod 32) */ \ vpxor (kr+n*16)(CTX), RKR, RKR; \ do_mask(mask); @@ -258,9 +262,9 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8) movq %rdi, CTX; - vmovdqa .Lbswap_mask, RKM; - vmovd .Lfirst_mask, R1ST; - vmovd .L32_mask, R32; + vmovdqa .Lbswap_mask(%rip), RKM; + vmovd .Lfirst_mask(%rip), R1ST; + vmovd .L32_mask(%rip), R32; inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); @@ -284,7 +288,7 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8) popq %rbx; popq %r15; - vmovdqa .Lbswap_mask, RKM; + vmovdqa .Lbswap_mask(%rip), RKM; outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); @@ -306,9 +310,9 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8) movq %rdi, CTX; - vmovdqa .Lbswap_mask, RKM; - vmovd .Lfirst_mask, R1ST; - vmovd .L32_mask, R32; + vmovdqa .Lbswap_mask(%rip), RKM; + vmovd .Lfirst_mask(%rip), R1ST; + vmovd .L32_mask(%rip), R32; inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); @@ -332,7 +336,7 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8) popq %rbx; popq %r15; - vmovdqa .Lbswap_mask, RKM; + vmovdqa .Lbswap_mask(%rip), RKM; outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S index ca53e96996ac..5d31137e2c7d 100644 --- a/arch/x86/crypto/crc32-pclmul_asm.S +++ b/arch/x86/crypto/crc32-pclmul_asm.S @@ -90,7 +90,7 @@ SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligne sub $0x40, LEN add $0x40, BUF cmp $0x40, LEN - jb less_64 + jb .Lless_64 #ifdef __x86_64__ movdqa .Lconstant_R2R1(%rip), CONSTANT @@ -98,7 +98,7 @@ SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligne movdqa .Lconstant_R2R1, CONSTANT #endif -loop_64:/* 64 bytes Full cache line folding */ +.Lloop_64:/* 64 bytes Full cache line folding */ prefetchnta 0x40(BUF) movdqa %xmm1, %xmm5 movdqa %xmm2, %xmm6 @@ -139,8 +139,8 @@ loop_64:/* 64 bytes Full cache line folding */ sub $0x40, LEN add $0x40, BUF cmp $0x40, LEN - jge loop_64 -less_64:/* Folding cache line into 128bit */ + jge .Lloop_64 +.Lless_64:/* Folding cache line into 128bit */ #ifdef __x86_64__ movdqa .Lconstant_R4R3(%rip), CONSTANT #else @@ -167,8 +167,8 @@ less_64:/* Folding cache line into 128bit */ pxor %xmm4, %xmm1 cmp $0x10, LEN - jb fold_64 -loop_16:/* Folding rest buffer into 128bit */ + jb .Lfold_64 +.Lloop_16:/* Folding rest buffer into 128bit */ movdqa %xmm1, %xmm5 pclmulqdq $0x00, CONSTANT, %xmm1 pclmulqdq $0x11, CONSTANT, %xmm5 @@ -177,9 +177,9 @@ loop_16:/* Folding rest buffer into 128bit */ sub $0x10, LEN add $0x10, BUF cmp $0x10, LEN - jge loop_16 + jge .Lloop_16 -fold_64: +.Lfold_64: /* perform the last 64 bit fold, also adds 32 zeroes * to the input stream */ pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */ diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index ec35915f0901..81ce0f4db555 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -49,15 +49,15 @@ ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction .macro LABEL prefix n -\prefix\n\(): +.L\prefix\n\(): .endm .macro JMPTBL_ENTRY i -.quad crc_\i +.quad .Lcrc_\i .endm .macro JNC_LESS_THAN j - jnc less_than_\j + jnc .Lless_than_\j .endm # Define threshold where buffers are considered "small" and routed to more @@ -108,30 +108,30 @@ SYM_FUNC_START(crc_pcl) neg %bufp and $7, %bufp # calculate the unalignment amount of # the address - je proc_block # Skip if aligned + je .Lproc_block # Skip if aligned ## If len is less than 8 and we're unaligned, we need to jump ## to special code to avoid reading beyond the end of the buffer cmp $8, len - jae do_align + jae .Ldo_align # less_than_8 expects length in upper 3 bits of len_dw # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30] shl $32-3+1, len_dw - jmp less_than_8_post_shl1 + jmp .Lless_than_8_post_shl1 -do_align: +.Ldo_align: #### Calculate CRC of unaligned bytes of the buffer (if any) movq (bufptmp), tmp # load a quadward from the buffer add %bufp, bufptmp # align buffer pointer for quadword # processing sub %bufp, len # update buffer length -align_loop: +.Lalign_loop: crc32b %bl, crc_init_dw # compute crc32 of 1-byte shr $8, tmp # get next byte dec %bufp - jne align_loop + jne .Lalign_loop -proc_block: +.Lproc_block: ################################################################ ## 2) PROCESS BLOCKS: @@ -141,11 +141,11 @@ proc_block: movq len, tmp # save num bytes in tmp cmpq $128*24, len - jae full_block + jae .Lfull_block -continue_block: +.Lcontinue_block: cmpq $SMALL_SIZE, len - jb small + jb .Lsmall ## len < 128*24 movq $2731, %rax # 2731 = ceil(2^16 / 24) @@ -168,13 +168,14 @@ continue_block: xor crc2, crc2 ## branch into array - mov jump_table(,%rax,8), %bufp + leaq jump_table(%rip), %bufp + mov (%bufp,%rax,8), %bufp JMP_NOSPEC bufp ################################################################ ## 2a) PROCESS FULL BLOCKS: ################################################################ -full_block: +.Lfull_block: movl $128,%eax lea 128*8*2(block_0), block_1 lea 128*8*3(block_0), block_2 @@ -189,7 +190,6 @@ full_block: ## 3) CRC Array: ################################################################ -crc_array: i=128 .rept 128-1 .altmacro @@ -242,28 +242,28 @@ LABEL crc_ 0 ENDBR mov tmp, len cmp $128*24, tmp - jae full_block + jae .Lfull_block cmp $24, tmp - jae continue_block + jae .Lcontinue_block -less_than_24: +.Lless_than_24: shl $32-4, len_dw # less_than_16 expects length # in upper 4 bits of len_dw - jnc less_than_16 + jnc .Lless_than_16 crc32q (bufptmp), crc_init crc32q 8(bufptmp), crc_init - jz do_return + jz .Ldo_return add $16, bufptmp # len is less than 8 if we got here # less_than_8 expects length in upper 3 bits of len_dw # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30] shl $2, len_dw - jmp less_than_8_post_shl1 + jmp .Lless_than_8_post_shl1 ####################################################################### ## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full) ####################################################################### -small: +.Lsmall: shl $32-8, len_dw # Prepare len_dw for less_than_256 j=256 .rept 5 # j = {256, 128, 64, 32, 16} @@ -279,32 +279,32 @@ LABEL less_than_ %j # less_than_j: Length should be in crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data i=i+8 .endr - jz do_return # Return if remaining length is zero + jz .Ldo_return # Return if remaining length is zero add $j, bufptmp # Advance buf .endr -less_than_8: # Length should be stored in +.Lless_than_8: # Length should be stored in # upper 3 bits of len_dw shl $1, len_dw -less_than_8_post_shl1: - jnc less_than_4 +.Lless_than_8_post_shl1: + jnc .Lless_than_4 crc32l (bufptmp), crc_init_dw # CRC of 4 bytes - jz do_return # return if remaining data is zero + jz .Ldo_return # return if remaining data is zero add $4, bufptmp -less_than_4: # Length should be stored in +.Lless_than_4: # Length should be stored in # upper 2 bits of len_dw shl $1, len_dw - jnc less_than_2 + jnc .Lless_than_2 crc32w (bufptmp), crc_init_dw # CRC of 2 bytes - jz do_return # return if remaining data is zero + jz .Ldo_return # return if remaining data is zero add $2, bufptmp -less_than_2: # Length should be stored in the MSB +.Lless_than_2: # Length should be stored in the MSB # of len_dw shl $1, len_dw - jnc less_than_1 + jnc .Lless_than_1 crc32b (bufptmp), crc_init_dw # CRC of 1 byte -less_than_1: # Length should be zero -do_return: +.Lless_than_1: # Length should be zero +.Ldo_return: movq crc_init, %rax popq %rsi popq %rdi diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S index f4c760f4cade..cf21b998e77c 100644 --- a/arch/x86/crypto/des3_ede-asm_64.S +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -129,21 +129,29 @@ movzbl RW0bl, RT2d; \ movzbl RW0bh, RT3d; \ shrq $16, RW0; \ - movq s8(, RT0, 8), RT0; \ - xorq s6(, RT1, 8), to; \ + leaq s8(%rip), RW1; \ + movq (RW1, RT0, 8), RT0; \ + leaq s6(%rip), RW1; \ + xorq (RW1, RT1, 8), to; \ movzbl RW0bl, RL1d; \ movzbl RW0bh, RT1d; \ shrl $16, RW0d; \ - xorq s4(, RT2, 8), RT0; \ - xorq s2(, RT3, 8), to; \ + leaq s4(%rip), RW1; \ + xorq (RW1, RT2, 8), RT0; \ + leaq s2(%rip), RW1; \ + xorq (RW1, RT3, 8), to; \ movzbl RW0bl, RT2d; \ movzbl RW0bh, RT3d; \ - xorq s7(, RL1, 8), RT0; \ - xorq s5(, RT1, 8), to; \ - xorq s3(, RT2, 8), RT0; \ + leaq s7(%rip), RW1; \ + xorq (RW1, RL1, 8), RT0; \ + leaq s5(%rip), RW1; \ + xorq (RW1, RT1, 8), to; \ + leaq s3(%rip), RW1; \ + xorq (RW1, RT2, 8), RT0; \ load_next_key(n, RW0); \ xorq RT0, to; \ - xorq s1(, RT3, 8), to; \ + leaq s1(%rip), RW1; \ + xorq (RW1, RT3, 8), to; \ #define load_next_key(n, RWx) \ movq (((n) + 1) * 8)(CTX), RWx; @@ -355,65 +363,89 @@ SYM_FUNC_END(des3_ede_x86_64_crypt_blk) movzbl RW0bl, RT3d; \ movzbl RW0bh, RT1d; \ shrq $16, RW0; \ - xorq s8(, RT3, 8), to##0; \ - xorq s6(, RT1, 8), to##0; \ + leaq s8(%rip), RT2; \ + xorq (RT2, RT3, 8), to##0; \ + leaq s6(%rip), RT2; \ + xorq (RT2, RT1, 8), to##0; \ movzbl RW0bl, RT3d; \ movzbl RW0bh, RT1d; \ shrq $16, RW0; \ - xorq s4(, RT3, 8), to##0; \ - xorq s2(, RT1, 8), to##0; \ + leaq s4(%rip), RT2; \ + xorq (RT2, RT3, 8), to##0; \ + leaq s2(%rip), RT2; \ + xorq (RT2, RT1, 8), to##0; \ movzbl RW0bl, RT3d; \ movzbl RW0bh, RT1d; \ shrl $16, RW0d; \ - xorq s7(, RT3, 8), to##0; \ - xorq s5(, RT1, 8), to##0; \ + leaq s7(%rip), RT2; \ + xorq (RT2, RT3, 8), to##0; \ + leaq s5(%rip), RT2; \ + xorq (RT2, RT1, 8), to##0; \ movzbl RW0bl, RT3d; \ movzbl RW0bh, RT1d; \ load_next_key(n, RW0); \ - xorq s3(, RT3, 8), to##0; \ - xorq s1(, RT1, 8), to##0; \ + leaq s3(%rip), RT2; \ + xorq (RT2, RT3, 8), to##0; \ + leaq s1(%rip), RT2; \ + xorq (RT2, RT1, 8), to##0; \ xorq from##1, RW1; \ movzbl RW1bl, RT3d; \ movzbl RW1bh, RT1d; \ shrq $16, RW1; \ - xorq s8(, RT3, 8), to##1; \ - xorq s6(, RT1, 8), to##1; \ + leaq s8(%rip), RT2; \ + xorq (RT2, RT3, 8), to##1; \ + leaq s6(%rip), RT2; \ + xorq (RT2, RT1, 8), to##1; \ movzbl RW1bl, RT3d; \ movzbl RW1bh, RT1d; \ shrq $16, RW1; \ - xorq s4(, RT3, 8), to##1; \ - xorq s2(, RT1, 8), to##1; \ + leaq s4(%rip), RT2; \ + xorq (RT2, RT3, 8), to##1; \ + leaq s2(%rip), RT2; \ + xorq (RT2, RT1, 8), to##1; \ movzbl RW1bl, RT3d; \ movzbl RW1bh, RT1d; \ shrl $16, RW1d; \ - xorq s7(, RT3, 8), to##1; \ - xorq s5(, RT1, 8), to##1; \ + leaq s7(%rip), RT2; \ + xorq (RT2, RT3, 8), to##1; \ + leaq s5(%rip), RT2; \ + xorq (RT2, RT1, 8), to##1; \ movzbl RW1bl, RT3d; \ movzbl RW1bh, RT1d; \ do_movq(RW0, RW1); \ - xorq s3(, RT3, 8), to##1; \ - xorq s1(, RT1, 8), to##1; \ + leaq s3(%rip), RT2; \ + xorq (RT2, RT3, 8), to##1; \ + leaq s1(%rip), RT2; \ + xorq (RT2, RT1, 8), to##1; \ xorq from##2, RW2; \ movzbl RW2bl, RT3d; \ movzbl RW2bh, RT1d; \ shrq $16, RW2; \ - xorq s8(, RT3, 8), to##2; \ - xorq s6(, RT1, 8), to##2; \ + leaq s8(%rip), RT2; \ + xorq (RT2, RT3, 8), to##2; \ + leaq s6(%rip), RT2; \ + xorq (RT2, RT1, 8), to##2; \ movzbl RW2bl, RT3d; \ movzbl RW2bh, RT1d; \ shrq $16, RW2; \ - xorq s4(, RT3, 8), to##2; \ - xorq s2(, RT1, 8), to##2; \ + leaq s4(%rip), RT2; \ + xorq (RT2, RT3, 8), to##2; \ + leaq s2(%rip), RT2; \ + xorq (RT2, RT1, 8), to##2; \ movzbl RW2bl, RT3d; \ movzbl RW2bh, RT1d; \ shrl $16, RW2d; \ - xorq s7(, RT3, 8), to##2; \ - xorq s5(, RT1, 8), to##2; \ + leaq s7(%rip), RT2; \ + xorq (RT2, RT3, 8), to##2; \ + leaq s5(%rip), RT2; \ + xorq (RT2, RT1, 8), to##2; \ movzbl RW2bl, RT3d; \ movzbl RW2bh, RT1d; \ do_movq(RW0, RW2); \ - xorq s3(, RT3, 8), to##2; \ - xorq s1(, RT1, 8), to##2; + leaq s3(%rip), RT2; \ + xorq (RT2, RT3, 8), to##2; \ + leaq s1(%rip), RT2; \ + xorq (RT2, RT1, 8), to##2; #define __movq(src, dst) \ movq src, dst; diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 257ed9446f3e..99cb983ded9e 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -93,7 +93,7 @@ SYM_FUNC_START(clmul_ghash_mul) FRAME_BEGIN movups (%rdi), DATA movups (%rsi), SHASH - movaps .Lbswap_mask, BSWAP + movaps .Lbswap_mask(%rip), BSWAP pshufb BSWAP, DATA call __clmul_gf128mul_ble pshufb BSWAP, DATA @@ -110,7 +110,7 @@ SYM_FUNC_START(clmul_ghash_update) FRAME_BEGIN cmp $16, %rdx jb .Lupdate_just_ret # check length - movaps .Lbswap_mask, BSWAP + movaps .Lbswap_mask(%rip), BSWAP movups (%rdi), DATA movups (%rcx), SHASH pshufb BSWAP, DATA diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index a96b2fd26dab..4b49bdc95265 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -485,18 +485,18 @@ xchg WK_BUF, PRECALC_BUF .align 32 -_loop: +.L_loop: /* * code loops through more than one block * we use K_BASE value as a signal of a last block, * it is set below by: cmovae BUFFER_PTR, K_BASE */ test BLOCKS_CTR, BLOCKS_CTR - jnz _begin + jnz .L_begin .align 32 - jmp _end + jmp .L_end .align 32 -_begin: +.L_begin: /* * Do first block @@ -508,9 +508,6 @@ _begin: .set j, j+2 .endr - jmp _loop0 -_loop0: - /* * rounds: * 10,12,14,16,18 @@ -545,7 +542,7 @@ _loop0: UPDATE_HASH 16(HASH_PTR), E test BLOCKS_CTR, BLOCKS_CTR - jz _loop + jz .L_loop mov TB, B @@ -562,8 +559,6 @@ _loop0: .set j, j+2 .endr - jmp _loop1 -_loop1: /* * rounds * 20+80,22+80,24+80,26+80,28+80 @@ -574,9 +569,6 @@ _loop1: .set j, j+2 .endr - jmp _loop2 -_loop2: - /* * rounds * 40+80,42+80,44+80,46+80,48+80 @@ -592,9 +584,6 @@ _loop2: /* Move to the next block only if needed*/ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128 - jmp _loop3 -_loop3: - /* * rounds * 60+80,62+80,64+80,66+80,68+80 @@ -623,10 +612,10 @@ _loop3: xchg WK_BUF, PRECALC_BUF - jmp _loop + jmp .L_loop .align 32 - _end: +.L_end: .endm /* diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 5555b5d5215a..53de72bdd851 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -360,7 +360,7 @@ SYM_TYPED_FUNC_START(sha256_transform_avx) and $~15, %rsp # align stack pointer shl $6, NUM_BLKS # convert to bytes - jz done_hash + jz .Ldone_hash add INP, NUM_BLKS # pointer to end of data mov NUM_BLKS, _INP_END(%rsp) @@ -377,7 +377,7 @@ SYM_TYPED_FUNC_START(sha256_transform_avx) vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK vmovdqa _SHUF_00BA(%rip), SHUF_00BA vmovdqa _SHUF_DC00(%rip), SHUF_DC00 -loop0: +.Lloop0: lea K256(%rip), TBL ## byte swap first 16 dwords @@ -391,7 +391,7 @@ loop0: ## schedule 48 input dwords, by doing 3 rounds of 16 each mov $3, SRND .align 16 -loop1: +.Lloop1: vpaddd (TBL), X0, XFER vmovdqa XFER, _XFER(%rsp) FOUR_ROUNDS_AND_SCHED @@ -410,10 +410,10 @@ loop1: FOUR_ROUNDS_AND_SCHED sub $1, SRND - jne loop1 + jne .Lloop1 mov $2, SRND -loop2: +.Lloop2: vpaddd (TBL), X0, XFER vmovdqa XFER, _XFER(%rsp) DO_ROUND 0 @@ -433,7 +433,7 @@ loop2: vmovdqa X3, X1 sub $1, SRND - jne loop2 + jne .Lloop2 addm (4*0)(CTX),a addm (4*1)(CTX),b @@ -447,9 +447,9 @@ loop2: mov _INP(%rsp), INP add $64, INP cmp _INP_END(%rsp), INP - jne loop0 + jne .Lloop0 -done_hash: +.Ldone_hash: mov %rbp, %rsp popq %rbp diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 3eada9416852..9918212faf91 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -538,12 +538,12 @@ SYM_TYPED_FUNC_START(sha256_transform_rorx) and $-32, %rsp # align rsp to 32 byte boundary shl $6, NUM_BLKS # convert to bytes - jz done_hash + jz .Ldone_hash lea -64(INP, NUM_BLKS), NUM_BLKS # pointer to last block mov NUM_BLKS, _INP_END(%rsp) cmp NUM_BLKS, INP - je only_one_block + je .Lonly_one_block ## load initial digest mov (CTX), a @@ -561,7 +561,7 @@ SYM_TYPED_FUNC_START(sha256_transform_rorx) mov CTX, _CTX(%rsp) -loop0: +.Lloop0: ## Load first 16 dwords from two blocks VMOVDQ 0*32(INP),XTMP0 VMOVDQ 1*32(INP),XTMP1 @@ -580,7 +580,7 @@ loop0: vperm2i128 $0x20, XTMP3, XTMP1, X2 vperm2i128 $0x31, XTMP3, XTMP1, X3 -last_block_enter: +.Llast_block_enter: add $64, INP mov INP, _INP(%rsp) @@ -588,34 +588,40 @@ last_block_enter: xor SRND, SRND .align 16 -loop1: - vpaddd K256+0*32(SRND), X0, XFER +.Lloop1: + leaq K256+0*32(%rip), INP ## reuse INP as scratch reg + vpaddd (INP, SRND), X0, XFER vmovdqa XFER, 0*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 0*32 - vpaddd K256+1*32(SRND), X0, XFER + leaq K256+1*32(%rip), INP + vpaddd (INP, SRND), X0, XFER vmovdqa XFER, 1*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 1*32 - vpaddd K256+2*32(SRND), X0, XFER + leaq K256+2*32(%rip), INP + vpaddd (INP, SRND), X0, XFER vmovdqa XFER, 2*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 2*32 - vpaddd K256+3*32(SRND), X0, XFER + leaq K256+3*32(%rip), INP + vpaddd (INP, SRND), X0, XFER vmovdqa XFER, 3*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 3*32 add $4*32, SRND cmp $3*4*32, SRND - jb loop1 + jb .Lloop1 -loop2: +.Lloop2: ## Do last 16 rounds with no scheduling - vpaddd K256+0*32(SRND), X0, XFER + leaq K256+0*32(%rip), INP + vpaddd (INP, SRND), X0, XFER vmovdqa XFER, 0*32+_XFER(%rsp, SRND) DO_4ROUNDS _XFER + 0*32 - vpaddd K256+1*32(SRND), X1, XFER + leaq K256+1*32(%rip), INP + vpaddd (INP, SRND), X1, XFER vmovdqa XFER, 1*32+_XFER(%rsp, SRND) DO_4ROUNDS _XFER + 1*32 add $2*32, SRND @@ -624,7 +630,7 @@ loop2: vmovdqa X3, X1 cmp $4*4*32, SRND - jb loop2 + jb .Lloop2 mov _CTX(%rsp), CTX mov _INP(%rsp), INP @@ -639,17 +645,17 @@ loop2: addm (4*7)(CTX),h cmp _INP_END(%rsp), INP - ja done_hash + ja .Ldone_hash #### Do second block using previously scheduled results xor SRND, SRND .align 16 -loop3: +.Lloop3: DO_4ROUNDS _XFER + 0*32 + 16 DO_4ROUNDS _XFER + 1*32 + 16 add $2*32, SRND cmp $4*4*32, SRND - jb loop3 + jb .Lloop3 mov _CTX(%rsp), CTX mov _INP(%rsp), INP @@ -665,10 +671,10 @@ loop3: addm (4*7)(CTX),h cmp _INP_END(%rsp), INP - jb loop0 - ja done_hash + jb .Lloop0 + ja .Ldone_hash -do_last_block: +.Ldo_last_block: VMOVDQ 0*16(INP),XWORD0 VMOVDQ 1*16(INP),XWORD1 VMOVDQ 2*16(INP),XWORD2 @@ -679,9 +685,9 @@ do_last_block: vpshufb X_BYTE_FLIP_MASK, XWORD2, XWORD2 vpshufb X_BYTE_FLIP_MASK, XWORD3, XWORD3 - jmp last_block_enter + jmp .Llast_block_enter -only_one_block: +.Lonly_one_block: ## load initial digest mov (4*0)(CTX),a @@ -698,9 +704,9 @@ only_one_block: vmovdqa _SHUF_DC00(%rip), SHUF_DC00 mov CTX, _CTX(%rsp) - jmp do_last_block + jmp .Ldo_last_block -done_hash: +.Ldone_hash: mov %rbp, %rsp pop %rbp diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index 959288eecc68..93264ee44543 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -369,7 +369,7 @@ SYM_TYPED_FUNC_START(sha256_transform_ssse3) and $~15, %rsp shl $6, NUM_BLKS # convert to bytes - jz done_hash + jz .Ldone_hash add INP, NUM_BLKS mov NUM_BLKS, _INP_END(%rsp) # pointer to end of data @@ -387,7 +387,7 @@ SYM_TYPED_FUNC_START(sha256_transform_ssse3) movdqa _SHUF_00BA(%rip), SHUF_00BA movdqa _SHUF_DC00(%rip), SHUF_DC00 -loop0: +.Lloop0: lea K256(%rip), TBL ## byte swap first 16 dwords @@ -401,7 +401,7 @@ loop0: ## schedule 48 input dwords, by doing 3 rounds of 16 each mov $3, SRND .align 16 -loop1: +.Lloop1: movdqa (TBL), XFER paddd X0, XFER movdqa XFER, _XFER(%rsp) @@ -424,10 +424,10 @@ loop1: FOUR_ROUNDS_AND_SCHED sub $1, SRND - jne loop1 + jne .Lloop1 mov $2, SRND -loop2: +.Lloop2: paddd (TBL), X0 movdqa X0, _XFER(%rsp) DO_ROUND 0 @@ -446,7 +446,7 @@ loop2: movdqa X3, X1 sub $1, SRND - jne loop2 + jne .Lloop2 addm (4*0)(CTX),a addm (4*1)(CTX),b @@ -460,9 +460,9 @@ loop2: mov _INP(%rsp), INP add $64, INP cmp _INP_END(%rsp), INP - jne loop0 + jne .Lloop0 -done_hash: +.Ldone_hash: mov %rbp, %rsp popq %rbp diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index b0984f19fdb4..d902b8ea0721 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -276,7 +276,7 @@ frame_size = frame_WK + WK_SIZE ######################################################################## SYM_TYPED_FUNC_START(sha512_transform_avx) test msglen, msglen - je nowork + je .Lnowork # Save GPRs push %rbx @@ -291,7 +291,7 @@ SYM_TYPED_FUNC_START(sha512_transform_avx) sub $frame_size, %rsp and $~(0x20 - 1), %rsp -updateblock: +.Lupdateblock: # Load state variables mov DIGEST(0), a_64 @@ -348,7 +348,7 @@ updateblock: # Advance to next message block add $16*8, msg dec msglen - jnz updateblock + jnz .Lupdateblock # Restore Stack Pointer mov %rbp, %rsp @@ -361,7 +361,7 @@ updateblock: pop %r12 pop %rbx -nowork: +.Lnowork: RET SYM_FUNC_END(sha512_transform_avx) diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index b1ca99055ef9..f08496cd6870 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -581,7 +581,7 @@ SYM_TYPED_FUNC_START(sha512_transform_rorx) and $~(0x20 - 1), %rsp shl $7, NUM_BLKS # convert to bytes - jz done_hash + jz .Ldone_hash add INP, NUM_BLKS # pointer to end of data mov NUM_BLKS, frame_INPEND(%rsp) @@ -600,7 +600,7 @@ SYM_TYPED_FUNC_START(sha512_transform_rorx) vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK -loop0: +.Lloop0: lea K512(%rip), TBL ## byte swap first 16 dwords @@ -615,7 +615,7 @@ loop0: movq $4, frame_SRND(%rsp) .align 16 -loop1: +.Lloop1: vpaddq (TBL), Y_0, XFER vmovdqa XFER, frame_XFER(%rsp) FOUR_ROUNDS_AND_SCHED @@ -634,10 +634,10 @@ loop1: FOUR_ROUNDS_AND_SCHED subq $1, frame_SRND(%rsp) - jne loop1 + jne .Lloop1 movq $2, frame_SRND(%rsp) -loop2: +.Lloop2: vpaddq (TBL), Y_0, XFER vmovdqa XFER, frame_XFER(%rsp) DO_4ROUNDS @@ -650,7 +650,7 @@ loop2: vmovdqa Y_3, Y_1 subq $1, frame_SRND(%rsp) - jne loop2 + jne .Lloop2 mov frame_CTX(%rsp), CTX2 addm 8*0(CTX2), a @@ -665,9 +665,9 @@ loop2: mov frame_INP(%rsp), INP add $128, INP cmp frame_INPEND(%rsp), INP - jne loop0 + jne .Lloop0 -done_hash: +.Ldone_hash: # Restore Stack Pointer mov %rbp, %rsp diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index c06afb5270e5..65be30156816 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -278,7 +278,7 @@ frame_size = frame_WK + WK_SIZE SYM_TYPED_FUNC_START(sha512_transform_ssse3) test msglen, msglen - je nowork + je .Lnowork # Save GPRs push %rbx @@ -293,7 +293,7 @@ SYM_TYPED_FUNC_START(sha512_transform_ssse3) sub $frame_size, %rsp and $~(0x20 - 1), %rsp -updateblock: +.Lupdateblock: # Load state variables mov DIGEST(0), a_64 @@ -350,7 +350,7 @@ updateblock: # Advance to next message block add $16*8, msg dec msglen - jnz updateblock + jnz .Lupdateblock # Restore Stack Pointer mov %rbp, %rsp @@ -363,7 +363,7 @@ updateblock: pop %r12 pop %rbx -nowork: +.Lnowork: RET SYM_FUNC_END(sha512_transform_ssse3) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index eccc3431e515..f31e286c2977 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -8,7 +8,7 @@ * * entry.S contains the system-call and fault low-level handling routines. * - * Some of this is documented in Documentation/x86/entry_64.rst + * Some of this is documented in Documentation/arch/x86/entry_64.rst * * A note on terminology: * - iret frame: Architecture defined interrupt frame from SS to RIP @@ -205,7 +205,7 @@ syscall_return_via_sysret: */ movq %rsp, %rdi movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK pushq RSP-RDI(%rdi) /* RSP */ pushq (%rdi) /* RDI */ @@ -286,7 +286,7 @@ SYM_FUNC_END(__switch_to_asm) .pushsection .text, "ax" __FUNC_ALIGN SYM_CODE_START_NOALIGN(ret_from_fork) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR // copy_thread CALL_DEPTH_ACCOUNT movq %rax, %rdi @@ -303,7 +303,7 @@ SYM_CODE_START_NOALIGN(ret_from_fork) 1: /* kernel thread */ - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK movq %r12, %rdi CALL_NOSPEC rbx /* @@ -388,9 +388,9 @@ SYM_CODE_START(\asmsym) .if \vector == X86_TRAP_BP /* #BP advances %rip to the next instruction */ - UNWIND_HINT_IRET_REGS offset=\has_error_code*8 signal=0 + UNWIND_HINT_IRET_ENTRY offset=\has_error_code*8 signal=0 .else - UNWIND_HINT_IRET_REGS offset=\has_error_code*8 + UNWIND_HINT_IRET_ENTRY offset=\has_error_code*8 .endif ENDBR @@ -461,7 +461,7 @@ SYM_CODE_END(\asmsym) */ .macro idtentry_mce_db vector asmsym cfunc SYM_CODE_START(\asmsym) - UNWIND_HINT_IRET_REGS + UNWIND_HINT_IRET_ENTRY ENDBR ASM_CLAC cld @@ -518,7 +518,7 @@ SYM_CODE_END(\asmsym) */ .macro idtentry_vc vector asmsym cfunc SYM_CODE_START(\asmsym) - UNWIND_HINT_IRET_REGS + UNWIND_HINT_IRET_ENTRY ENDBR ASM_CLAC cld @@ -582,7 +582,7 @@ SYM_CODE_END(\asmsym) */ .macro idtentry_df vector asmsym cfunc SYM_CODE_START(\asmsym) - UNWIND_HINT_IRET_REGS offset=8 + UNWIND_HINT_IRET_ENTRY offset=8 ENDBR ASM_CLAC cld @@ -643,7 +643,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) */ movq %rsp, %rdi movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK /* Copy the IRET frame to the trampoline stack. */ pushq 6*8(%rdi) /* SS */ @@ -869,7 +869,7 @@ SYM_CODE_END(exc_xen_hypervisor_callback) */ __FUNC_ALIGN SYM_CODE_START_NOALIGN(xen_failsafe_callback) - UNWIND_HINT_EMPTY + UNWIND_HINT_UNDEFINED ENDBR movl %ds, %ecx cmpw %cx, 0x10(%rsp) @@ -1027,7 +1027,7 @@ SYM_CODE_START_LOCAL(paranoid_exit) * * NB to anyone to try to optimize this code: this code does * not execute at all for exceptions from user mode. Those - * exceptions go through error_exit instead. + * exceptions go through error_return instead. */ RESTORE_CR3 scratch_reg=%rax save_reg=%r14 @@ -1107,7 +1107,7 @@ SYM_CODE_START(error_entry) FENCE_SWAPGS_KERNEL_ENTRY CALL_DEPTH_ACCOUNT leaq 8(%rsp), %rax /* return pt_regs pointer */ - ANNOTATE_UNRET_END + VALIDATE_UNRET_END RET .Lbstep_iret: @@ -1153,7 +1153,7 @@ SYM_CODE_END(error_return) * when PAGE_TABLE_ISOLATION is in use. Do not clobber. */ SYM_CODE_START(asm_exc_nmi) - UNWIND_HINT_IRET_REGS + UNWIND_HINT_IRET_ENTRY ENDBR /* @@ -1520,7 +1520,7 @@ SYM_CODE_END(asm_exc_nmi) * MSRs to fully disable 32-bit SYSCALL. */ SYM_CODE_START(ignore_sysret) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK ENDBR mov $-ENOSYS, %eax sysretl diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 1506a22a4fb6..6a1821bd7d5e 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,10 +3,7 @@ # Building vDSO images for x86. # -# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before -# the inclusion of generic Makefile. -ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE| -ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE +# Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile # Sanitizer runtimes are unavailable and cannot be linked here. diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 3b300a773c7e..f3b3cacbcbb0 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -70,18 +70,9 @@ static struct ctl_table abi_table2[] = { {} }; -static struct ctl_table abi_root_table2[] = { - { - .procname = "abi", - .mode = 0555, - .child = abi_table2 - }, - {} -}; - static __init int ia32_binfmt_init(void) { - register_sysctl_table(abi_root_table2); + register_sysctl("abi", abi_table2); return 0; } __initcall(ia32_binfmt_init); diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index d234ca797e4a..e0ca8120aea8 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -317,7 +317,7 @@ static struct vm_area_struct gate_vma __ro_after_init = { struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { #ifdef CONFIG_COMPAT - if (!mm || !(mm->context.flags & MM_CONTEXT_HAS_VSYSCALL)) + if (!mm || !test_bit(MM_CONTEXT_HAS_VSYSCALL, &mm->context.flags)) return NULL; #endif if (vsyscall_mode == NONE) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 8c45b198b62f..bccea57dee81 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -923,6 +923,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) /* Event overflow */ handled++; + status &= ~mask; perf_sample_data_init(&data, 0, hwc->last_period); if (!x86_perf_event_set_period(event)) @@ -933,8 +934,6 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); - - status &= ~mask; } /* diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a3fb996a86a1..070cc4ef2672 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -5470,6 +5470,15 @@ pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i) } static umode_t +mem_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + if (attr == &event_attr_mem_ld_aux.attr.attr) + return x86_pmu.flags & PMU_FL_MEM_LOADS_AUX ? attr->mode : 0; + + return pebs_is_visible(kobj, attr, i); +} + +static umode_t lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { return x86_pmu.lbr_nr ? attr->mode : 0; @@ -5496,7 +5505,7 @@ static struct attribute_group group_events_td = { static struct attribute_group group_events_mem = { .name = "events", - .is_visible = pebs_is_visible, + .is_visible = mem_is_visible, }; static struct attribute_group group_events_tsx = { @@ -6486,6 +6495,10 @@ __init int intel_pmu_init(void) case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_EMERALDRAPIDS_X: + x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + fallthrough; + case INTEL_FAM6_GRANITERAPIDS_X: + case INTEL_FAM6_GRANITERAPIDS_D: pmem = true; x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6502,7 +6515,6 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = spr_get_event_constraints; diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 551741e79e03..835862c548cc 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -678,6 +678,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &icx_cstates), X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &icx_cstates), + X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, &icx_cstates), + X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D, &icx_cstates), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates), diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 7d1199554fe3..fa9b209a11fa 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -6068,6 +6068,17 @@ static struct intel_uncore_ops spr_uncore_mmio_ops = { .read_counter = uncore_mmio_read_counter, }; +static struct uncore_event_desc spr_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x01,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x05,umask=0xcf"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x05,umask=0xf0"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"), + { /* end: all zeroes */ }, +}; + static struct intel_uncore_type spr_uncore_imc = { SPR_UNCORE_COMMON_FORMAT(), .name = "imc", @@ -6075,6 +6086,7 @@ static struct intel_uncore_type spr_uncore_imc = { .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, .ops = &spr_uncore_mmio_ops, + .event_descs = spr_uncore_imc_events, }; static void spr_uncore_pci_enable_event(struct intel_uncore_box *box, diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index c65d8906cbcf..0feaaa571303 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -70,6 +70,8 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_BROADWELL_X: case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_EMERALDRAPIDS_X: + case INTEL_FAM6_GRANITERAPIDS_X: + case INTEL_FAM6_GRANITERAPIDS_D: case INTEL_FAM6_ATOM_SILVERMONT: case INTEL_FAM6_ATOM_SILVERMONT_D: diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index 5d2de10809ae..3a1548054b48 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y := hv_init.o mmu.o nested.o irqdomain.o ivm.o obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o +obj-$(CONFIG_HYPERV_VTL_MODE) += hv_vtl.o ifdef CONFIG_X86_64 obj-$(CONFIG_PARAVIRT_SPINLOCKS) += hv_spinlock.o diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index fb8b2c088681..1fbda2f94184 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -96,6 +96,11 @@ static void hv_apic_eoi_write(u32 reg, u32 val) wrmsr(HV_X64_MSR_EOI, val, 0); } +static bool cpu_is_self(int cpu) +{ + return cpu == smp_processor_id(); +} + /* * IPI implementation on Hyper-V. */ @@ -128,10 +133,9 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, */ if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) { ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; - if (exclude_self) - nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask); - else - nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask); + + nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask, + exclude_self ? cpu_is_self : NULL); /* * 'nr_bank <= 0' means some CPUs in cpumask can't be diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 41ef036ebb7b..a5f9474f08e1 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -29,7 +29,6 @@ #include <linux/syscore_ops.h> #include <clocksource/hyperv_timer.h> #include <linux/highmem.h> -#include <linux/swiotlb.h> int hyperv_init_cpuhp; u64 hv_current_partition_id = ~0ull; @@ -64,7 +63,10 @@ static int hyperv_init_ghcb(void) * memory boundary and map it here. */ rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa); - ghcb_va = memremap(ghcb_gpa, HV_HYP_PAGE_SIZE, MEMREMAP_WB); + + /* Mask out vTOM bit. ioremap_cache() maps decrypted */ + ghcb_gpa &= ~ms_hyperv.shared_gpa_boundary; + ghcb_va = (void *)ioremap_cache(ghcb_gpa, HV_HYP_PAGE_SIZE); if (!ghcb_va) return -ENOMEM; @@ -218,7 +220,7 @@ static int hv_cpu_die(unsigned int cpu) if (hv_ghcb_pg) { ghcb_va = (void **)this_cpu_ptr(hv_ghcb_pg); if (*ghcb_va) - memunmap(*ghcb_va); + iounmap(*ghcb_va); *ghcb_va = NULL; } @@ -504,16 +506,6 @@ void __init hyperv_init(void) /* Query the VMs extended capability once, so that it can be cached. */ hv_query_ext_cap(0); -#ifdef CONFIG_SWIOTLB - /* - * Swiotlb bounce buffer needs to be mapped in extra address - * space. Map function doesn't work in the early place and so - * call swiotlb_update_mem_attributes() here. - */ - if (hv_is_isolation_supported()) - swiotlb_update_mem_attributes(); -#endif - return; clean_guest_os_id: diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c new file mode 100644 index 000000000000..1ba5d3b99b16 --- /dev/null +++ b/arch/x86/hyperv/hv_vtl.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023, Microsoft Corporation. + * + * Author: + * Saurabh Sengar <ssengar@microsoft.com> + */ + +#include <asm/apic.h> +#include <asm/boot.h> +#include <asm/desc.h> +#include <asm/i8259.h> +#include <asm/mshyperv.h> +#include <asm/realmode.h> + +extern struct boot_params boot_params; +static struct real_mode_header hv_vtl_real_mode_header; + +void __init hv_vtl_init_platform(void) +{ + pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); + + x86_init.irqs.pre_vector_init = x86_init_noop; + x86_init.timers.timer_init = x86_init_noop; + + x86_platform.get_wallclock = get_rtc_noop; + x86_platform.set_wallclock = set_rtc_noop; + x86_platform.get_nmi_reason = hv_get_nmi_reason; + + x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT; + x86_platform.legacy.rtc = 0; + x86_platform.legacy.warm_reset = 0; + x86_platform.legacy.reserve_bios_regions = 0; + x86_platform.legacy.devices.pnpbios = 0; +} + +static inline u64 hv_vtl_system_desc_base(struct ldttss_desc *desc) +{ + return ((u64)desc->base3 << 32) | ((u64)desc->base2 << 24) | + (desc->base1 << 16) | desc->base0; +} + +static inline u32 hv_vtl_system_desc_limit(struct ldttss_desc *desc) +{ + return ((u32)desc->limit1 << 16) | (u32)desc->limit0; +} + +typedef void (*secondary_startup_64_fn)(void*, void*); +static void hv_vtl_ap_entry(void) +{ + ((secondary_startup_64_fn)secondary_startup_64)(&boot_params, &boot_params); +} + +static int hv_vtl_bringup_vcpu(u32 target_vp_index, u64 eip_ignored) +{ + u64 status; + int ret = 0; + struct hv_enable_vp_vtl *input; + unsigned long irq_flags; + + struct desc_ptr gdt_ptr; + struct desc_ptr idt_ptr; + + struct ldttss_desc *tss; + struct ldttss_desc *ldt; + struct desc_struct *gdt; + + u64 rsp = current->thread.sp; + u64 rip = (u64)&hv_vtl_ap_entry; + + native_store_gdt(&gdt_ptr); + store_idt(&idt_ptr); + + gdt = (struct desc_struct *)((void *)(gdt_ptr.address)); + tss = (struct ldttss_desc *)(gdt + GDT_ENTRY_TSS); + ldt = (struct ldttss_desc *)(gdt + GDT_ENTRY_LDT); + + local_irq_save(irq_flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + memset(input, 0, sizeof(*input)); + + input->partition_id = HV_PARTITION_ID_SELF; + input->vp_index = target_vp_index; + input->target_vtl.target_vtl = HV_VTL_MGMT; + + /* + * The x86_64 Linux kernel follows the 16-bit -> 32-bit -> 64-bit + * mode transition sequence after waking up an AP with SIPI whose + * vector points to the 16-bit AP startup trampoline code. Here in + * VTL2, we can't perform that sequence as the AP has to start in + * the 64-bit mode. + * + * To make this happen, we tell the hypervisor to load a valid 64-bit + * context (most of which is just magic numbers from the CPU manual) + * so that AP jumps right to the 64-bit entry of the kernel, and the + * control registers are loaded with values that let the AP fetch the + * code and data and carry on with work it gets assigned. + */ + + input->vp_context.rip = rip; + input->vp_context.rsp = rsp; + input->vp_context.rflags = 0x0000000000000002; + input->vp_context.efer = __rdmsr(MSR_EFER); + input->vp_context.cr0 = native_read_cr0(); + input->vp_context.cr3 = __native_read_cr3(); + input->vp_context.cr4 = native_read_cr4(); + input->vp_context.msr_cr_pat = __rdmsr(MSR_IA32_CR_PAT); + input->vp_context.idtr.limit = idt_ptr.size; + input->vp_context.idtr.base = idt_ptr.address; + input->vp_context.gdtr.limit = gdt_ptr.size; + input->vp_context.gdtr.base = gdt_ptr.address; + + /* Non-system desc (64bit), long, code, present */ + input->vp_context.cs.selector = __KERNEL_CS; + input->vp_context.cs.base = 0; + input->vp_context.cs.limit = 0xffffffff; + input->vp_context.cs.attributes = 0xa09b; + /* Non-system desc (64bit), data, present, granularity, default */ + input->vp_context.ss.selector = __KERNEL_DS; + input->vp_context.ss.base = 0; + input->vp_context.ss.limit = 0xffffffff; + input->vp_context.ss.attributes = 0xc093; + + /* System desc (128bit), present, LDT */ + input->vp_context.ldtr.selector = GDT_ENTRY_LDT * 8; + input->vp_context.ldtr.base = hv_vtl_system_desc_base(ldt); + input->vp_context.ldtr.limit = hv_vtl_system_desc_limit(ldt); + input->vp_context.ldtr.attributes = 0x82; + + /* System desc (128bit), present, TSS, 0x8b - busy, 0x89 -- default */ + input->vp_context.tr.selector = GDT_ENTRY_TSS * 8; + input->vp_context.tr.base = hv_vtl_system_desc_base(tss); + input->vp_context.tr.limit = hv_vtl_system_desc_limit(tss); + input->vp_context.tr.attributes = 0x8b; + + status = hv_do_hypercall(HVCALL_ENABLE_VP_VTL, input, NULL); + + if (!hv_result_success(status) && + hv_result(status) != HV_STATUS_VTL_ALREADY_ENABLED) { + pr_err("HVCALL_ENABLE_VP_VTL failed for VP : %d ! [Err: %#llx\n]", + target_vp_index, status); + ret = -EINVAL; + goto free_lock; + } + + status = hv_do_hypercall(HVCALL_START_VP, input, NULL); + + if (!hv_result_success(status)) { + pr_err("HVCALL_START_VP failed for VP : %d ! [Err: %#llx]\n", + target_vp_index, status); + ret = -EINVAL; + } + +free_lock: + local_irq_restore(irq_flags); + + return ret; +} + +static int hv_vtl_apicid_to_vp_id(u32 apic_id) +{ + u64 control; + u64 status; + unsigned long irq_flags; + struct hv_get_vp_from_apic_id_in *input; + u32 *output, ret; + + local_irq_save(irq_flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + memset(input, 0, sizeof(*input)); + input->partition_id = HV_PARTITION_ID_SELF; + input->apic_ids[0] = apic_id; + + output = (u32 *)input; + + control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID; + status = hv_do_hypercall(control, input, output); + ret = output[0]; + + local_irq_restore(irq_flags); + + if (!hv_result_success(status)) { + pr_err("failed to get vp id from apic id %d, status %#llx\n", + apic_id, status); + return -EINVAL; + } + + return ret; +} + +static int hv_vtl_wakeup_secondary_cpu(int apicid, unsigned long start_eip) +{ + int vp_id; + + pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid); + vp_id = hv_vtl_apicid_to_vp_id(apicid); + + if (vp_id < 0) { + pr_err("Couldn't find CPU with APIC ID %d\n", apicid); + return -EINVAL; + } + if (vp_id > ms_hyperv.max_vp_index) { + pr_err("Invalid CPU id %d for APIC ID %d\n", vp_id, apicid); + return -EINVAL; + } + + return hv_vtl_bringup_vcpu(vp_id, start_eip); +} + +static int __init hv_vtl_early_init(void) +{ + /* + * `boot_cpu_has` returns the runtime feature support, + * and here is the earliest it can be used. + */ + if (cpu_feature_enabled(X86_FEATURE_XSAVE)) + panic("XSAVE has to be disabled as it is not supported by this module.\n" + "Please add 'noxsave' to the kernel command line.\n"); + + real_mode_header = &hv_vtl_real_mode_header; + apic->wakeup_secondary_cpu_64 = hv_vtl_wakeup_secondary_cpu; + + return 0; +} +early_initcall(hv_vtl_early_init); diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 1dbcbd9da74d..cc92388b7a99 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -13,6 +13,8 @@ #include <asm/svm.h> #include <asm/sev.h> #include <asm/io.h> +#include <asm/coco.h> +#include <asm/mem_encrypt.h> #include <asm/mshyperv.h> #include <asm/hypervisor.h> @@ -127,7 +129,7 @@ static enum es_result hv_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code, return ES_OK; } -void hv_ghcb_terminate(unsigned int set, unsigned int reason) +void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason) { u64 val = GHCB_MSR_TERM_REQ; @@ -233,41 +235,6 @@ void hv_ghcb_msr_read(u64 msr, u64 *value) local_irq_restore(flags); } EXPORT_SYMBOL_GPL(hv_ghcb_msr_read); -#endif - -enum hv_isolation_type hv_get_isolation_type(void) -{ - if (!(ms_hyperv.priv_high & HV_ISOLATION)) - return HV_ISOLATION_TYPE_NONE; - return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); -} -EXPORT_SYMBOL_GPL(hv_get_isolation_type); - -/* - * hv_is_isolation_supported - Check system runs in the Hyper-V - * isolation VM. - */ -bool hv_is_isolation_supported(void) -{ - if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) - return false; - - if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) - return false; - - return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; -} - -DEFINE_STATIC_KEY_FALSE(isolation_type_snp); - -/* - * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based - * isolation VM. - */ -bool hv_isolation_type_snp(void) -{ - return static_branch_unlikely(&isolation_type_snp); -} /* * hv_mark_gpa_visibility - Set pages visible to host via hvcall. @@ -320,27 +287,25 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], } /* - * hv_set_mem_host_visibility - Set specified memory visible to host. + * hv_vtom_set_host_visibility - Set specified memory visible to host. * * In Isolation VM, all guest memory is encrypted from host and guest * needs to set memory visible to host via hvcall before sharing memory * with host. This function works as wrap of hv_mark_gpa_visibility() * with memory base and size. */ -int hv_set_mem_host_visibility(unsigned long kbuffer, int pagecount, bool visible) +static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc) { - enum hv_mem_host_visibility visibility = visible ? - VMBUS_PAGE_VISIBLE_READ_WRITE : VMBUS_PAGE_NOT_VISIBLE; + enum hv_mem_host_visibility visibility = enc ? + VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE; u64 *pfn_array; int ret = 0; + bool result = true; int i, pfn; - if (!hv_is_isolation_supported() || !hv_hypercall_pg) - return 0; - pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); if (!pfn_array) - return -ENOMEM; + return false; for (i = 0, pfn = 0; i < pagecount; i++) { pfn_array[pfn] = virt_to_hvpfn((void *)kbuffer + i * HV_HYP_PAGE_SIZE); @@ -349,41 +314,98 @@ int hv_set_mem_host_visibility(unsigned long kbuffer, int pagecount, bool visibl if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) { ret = hv_mark_gpa_visibility(pfn, pfn_array, visibility); - if (ret) + if (ret) { + result = false; goto err_free_pfn_array; + } pfn = 0; } } err_free_pfn_array: kfree(pfn_array); - return ret; + return result; } -/* - * hv_map_memory - map memory to extra space in the AMD SEV-SNP Isolation VM. - */ -void *hv_map_memory(void *addr, unsigned long size) +static bool hv_vtom_tlb_flush_required(bool private) { - unsigned long *pfns = kcalloc(size / PAGE_SIZE, - sizeof(unsigned long), GFP_KERNEL); - void *vaddr; - int i; + return true; +} + +static bool hv_vtom_cache_flush_required(void) +{ + return false; +} - if (!pfns) - return NULL; +static bool hv_is_private_mmio(u64 addr) +{ + /* + * Hyper-V always provides a single IO-APIC in a guest VM. + * When a paravisor is used, it is emulated by the paravisor + * in the guest context and must be mapped private. + */ + if (addr >= HV_IOAPIC_BASE_ADDRESS && + addr < (HV_IOAPIC_BASE_ADDRESS + PAGE_SIZE)) + return true; + + /* Same with a vTPM */ + if (addr >= VTPM_BASE_ADDRESS && + addr < (VTPM_BASE_ADDRESS + PAGE_SIZE)) + return true; + + return false; +} + +void __init hv_vtom_init(void) +{ + /* + * By design, a VM using vTOM doesn't see the SEV setting, + * so SEV initialization is bypassed and sev_status isn't set. + * Set it here to indicate a vTOM VM. + */ + sev_status = MSR_AMD64_SNP_VTOM; + cc_set_vendor(CC_VENDOR_AMD); + cc_set_mask(ms_hyperv.shared_gpa_boundary); + physical_mask &= ms_hyperv.shared_gpa_boundary - 1; + + x86_platform.hyper.is_private_mmio = hv_is_private_mmio; + x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required; + x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required; + x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; +} + +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + +enum hv_isolation_type hv_get_isolation_type(void) +{ + if (!(ms_hyperv.priv_high & HV_ISOLATION)) + return HV_ISOLATION_TYPE_NONE; + return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); +} +EXPORT_SYMBOL_GPL(hv_get_isolation_type); - for (i = 0; i < size / PAGE_SIZE; i++) - pfns[i] = vmalloc_to_pfn(addr + i * PAGE_SIZE) + - (ms_hyperv.shared_gpa_boundary >> PAGE_SHIFT); +/* + * hv_is_isolation_supported - Check system runs in the Hyper-V + * isolation VM. + */ +bool hv_is_isolation_supported(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return false; - vaddr = vmap_pfn(pfns, size / PAGE_SIZE, PAGE_KERNEL_IO); - kfree(pfns); + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) + return false; - return vaddr; + return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; } -void hv_unmap_memory(void *addr) +DEFINE_STATIC_KEY_FALSE(isolation_type_snp); + +/* + * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based + * isolation VM. + */ +bool hv_isolation_type_snp(void) { - vunmap(addr); + return static_branch_unlikely(&isolation_type_snp); } diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 0ad2378fe6ad..8460bd35e10c 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -52,6 +52,11 @@ static inline int fill_gva_list(u64 gva_list[], int offset, return gva_n - offset; } +static bool cpu_is_lazy(int cpu) +{ + return per_cpu(cpu_tlbstate_shared.is_lazy, cpu); +} + static void hyperv_flush_tlb_multi(const struct cpumask *cpus, const struct flush_tlb_info *info) { @@ -60,6 +65,7 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus, struct hv_tlb_flush *flush; u64 status; unsigned long flags; + bool do_lazy = !info->freed_tables; trace_hyperv_mmu_flush_tlb_multi(cpus, info); @@ -112,6 +118,8 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus, goto do_ex_hypercall; for_each_cpu(cpu, cpus) { + if (do_lazy && cpu_is_lazy(cpu)) + continue; vcpu = hv_cpu_number_to_vp_number(cpu); if (vcpu == VP_INVAL) { local_irq_restore(flags); @@ -198,7 +206,8 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, flush->hv_vp_set.valid_bank_mask = 0; flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; - nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus); + nr_bank = cpumask_to_vpset_skip(&flush->hv_vp_set, cpus, + info->freed_tables ? NULL : cpu_is_lazy); if (nr_bank < 0) return HV_STATUS_INVALID_PARAMETER; diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index e2975a32d443..d7da28fada87 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -8,7 +8,7 @@ #define ALT_FLAGS_SHIFT 16 -#define ALT_FLAG_NOT BIT(0) +#define ALT_FLAG_NOT (1 << 0) #define ALT_NOT(feature) ((ALT_FLAG_NOT << ALT_FLAGS_SHIFT) | (feature)) #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index 53e9b0620d96..d90ae472fb76 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -38,7 +38,7 @@ static void sanitize_boot_params(struct boot_params *boot_params) * IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear * this field. The purpose of this field is to guarantee * compliance with the x86 boot spec located in - * Documentation/x86/boot.rst . That spec says that the + * Documentation/arch/x86/boot.rst . That spec says that the * *whole* structure should be cleared, after which only the * portion defined by struct setup_header (boot_params->hdr) * should be copied in. diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 3d98c3a60d34..eb08796002f3 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -7,17 +7,33 @@ enum cc_vendor { CC_VENDOR_NONE, CC_VENDOR_AMD, - CC_VENDOR_HYPERV, CC_VENDOR_INTEL, }; -void cc_set_vendor(enum cc_vendor v); -void cc_set_mask(u64 mask); - #ifdef CONFIG_ARCH_HAS_CC_PLATFORM +extern enum cc_vendor cc_vendor; + +static inline enum cc_vendor cc_get_vendor(void) +{ + return cc_vendor; +} + +static inline void cc_set_vendor(enum cc_vendor vendor) +{ + cc_vendor = vendor; +} + +void cc_set_mask(u64 mask); u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); #else +static inline enum cc_vendor cc_get_vendor(void) +{ + return CC_VENDOR_NONE; +} + +static inline void cc_set_vendor(enum cc_vendor vendor) { } + static inline u64 cc_mkenc(u64 val) { return val; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 73c9672c123b..353b054812de 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -321,6 +321,7 @@ #define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */ #define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ #define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ +#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 5dfa4fb76f4b..fafe9be7a6f4 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -75,6 +75,12 @@ # define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31)) #endif +#ifdef CONFIG_ADDRESS_MASKING +# define DISABLE_LAM 0 +#else +# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31)) +#endif + #ifdef CONFIG_INTEL_IOMMU_SVM # define DISABLE_ENQCMD 0 #else @@ -115,7 +121,7 @@ #define DISABLED_MASK10 0 #define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \ DISABLE_CALL_DEPTH_TRACKING) -#define DISABLED_MASK12 0 +#define DISABLED_MASK12 (DISABLE_LAM) #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 #define DISABLED_MASK15 0 diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 0b73a809e9e1..cea95dcd27c2 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -122,6 +122,9 @@ /* Recommend using enlightened VMCS */ #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) +/* Use hypercalls for MMIO config space access */ +#define HV_X64_USE_MMIO_HYPERCALLS BIT(21) + /* * CPU management features identification. * These are HYPERV_CPUID_CPU_MANAGEMENT_FEATURES.EAX bits. @@ -713,6 +716,81 @@ union hv_msi_entry { } __packed; }; +struct hv_x64_segment_register { + u64 base; + u32 limit; + u16 selector; + union { + struct { + u16 segment_type : 4; + u16 non_system_segment : 1; + u16 descriptor_privilege_level : 2; + u16 present : 1; + u16 reserved : 4; + u16 available : 1; + u16 _long : 1; + u16 _default : 1; + u16 granularity : 1; + } __packed; + u16 attributes; + }; +} __packed; + +struct hv_x64_table_register { + u16 pad[3]; + u16 limit; + u64 base; +} __packed; + +struct hv_init_vp_context { + u64 rip; + u64 rsp; + u64 rflags; + + struct hv_x64_segment_register cs; + struct hv_x64_segment_register ds; + struct hv_x64_segment_register es; + struct hv_x64_segment_register fs; + struct hv_x64_segment_register gs; + struct hv_x64_segment_register ss; + struct hv_x64_segment_register tr; + struct hv_x64_segment_register ldtr; + + struct hv_x64_table_register idtr; + struct hv_x64_table_register gdtr; + + u64 efer; + u64 cr0; + u64 cr3; + u64 cr4; + u64 msr_cr_pat; +} __packed; + +union hv_input_vtl { + u8 as_uint8; + struct { + u8 target_vtl: 4; + u8 use_target_vtl: 1; + u8 reserved_z: 3; + }; +} __packed; + +struct hv_enable_vp_vtl { + u64 partition_id; + u32 vp_index; + union hv_input_vtl target_vtl; + u8 mbz0; + u16 mbz1; + struct hv_init_vp_context vp_context; +} __packed; + +struct hv_get_vp_from_apic_id_in { + u64 partition_id; + union hv_input_vtl target_vtl; + u8 res[7]; + u32 apic_ids[]; +} __packed; + #include <asm-generic/hyperv-tlfs.h> #endif diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index cbaf174d8efd..b3af2d45bbbb 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -125,6 +125,8 @@ #define INTEL_FAM6_LUNARLAKE_M 0xBD +#define INTEL_FAM6_ARROWLAKE 0xC6 + /* "Small Core" Processors (Atom/E-Core) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index c201083b34f6..a3abdcd89a32 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -20,25 +20,4 @@ extern void intel_mid_pwr_power_off(void); extern int intel_mid_pwr_get_lss_id(struct pci_dev *pdev); -#ifdef CONFIG_X86_INTEL_MID - -extern void intel_scu_devices_create(void); -extern void intel_scu_devices_destroy(void); - -#else /* !CONFIG_X86_INTEL_MID */ - -static inline void intel_scu_devices_create(void) { } -static inline void intel_scu_devices_destroy(void) { } - -#endif /* !CONFIG_X86_INTEL_MID */ - -/* Bus Select SoC Fuse value */ -#define BSEL_SOC_FUSE_MASK 0x7 -/* FSB 133MHz */ -#define BSEL_SOC_FUSE_001 0x1 -/* FSB 100MHz */ -#define BSEL_SOC_FUSE_101 0x5 -/* FSB 83MHz */ -#define BSEL_SOC_FUSE_111 0x7 - #endif /* _ASM_X86_INTEL_MID_H */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index a3760ca796aa..5b77bbc28f96 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -200,9 +200,6 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *symtab); #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add -void *arch_kexec_kernel_image_load(struct kimage *image); -#define arch_kexec_kernel_image_load arch_kexec_kernel_image_load - int arch_kimage_file_post_load_cleanup(struct kimage *image); #define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup #endif diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index dd9b8118f784..0953aa32a324 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -99,7 +99,7 @@ /* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */ #define SYM_TYPED_FUNC_START(name) \ - SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \ ENDBR /* SYM_FUNC_START -- use for global functions */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 72ca90552b6a..b7126701574c 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -56,6 +56,7 @@ void __init sev_es_init_vc_handling(void); #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define sme_me_mask 0ULL +#define sev_status 0ULL static inline void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) { } diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 5d7494631ea9..0da5c227f490 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -9,9 +9,13 @@ #include <linux/bits.h> /* Uprobes on this MM assume 32-bit code */ -#define MM_CONTEXT_UPROBE_IA32 BIT(0) +#define MM_CONTEXT_UPROBE_IA32 0 /* vsyscall page is accessible on this MM */ -#define MM_CONTEXT_HAS_VSYSCALL BIT(1) +#define MM_CONTEXT_HAS_VSYSCALL 1 +/* Do not allow changing LAM mode */ +#define MM_CONTEXT_LOCK_LAM 2 +/* Allow LAM and SVA coexisting */ +#define MM_CONTEXT_FORCE_TAGGED_SVA 3 /* * x86 has arch-specific MMU state beyond what lives in mm_struct. @@ -39,7 +43,15 @@ typedef struct { #endif #ifdef CONFIG_X86_64 - unsigned short flags; + unsigned long flags; +#endif + +#ifdef CONFIG_ADDRESS_MASKING + /* Active LAM mode: X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */ + unsigned long lam_cr3_mask; + + /* Significant bits of the virtual address. Excludes tag bits. */ + u64 untag_mask; #endif struct mutex lock; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index e01aa74a6de7..1d29dc791f5a 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -16,13 +16,6 @@ extern atomic64_t last_mm_ctx_id; -#ifndef CONFIG_PARAVIRT_XXL -static inline void paravirt_activate_mm(struct mm_struct *prev, - struct mm_struct *next) -{ -} -#endif /* !CONFIG_PARAVIRT_XXL */ - #ifdef CONFIG_PERF_EVENTS DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key); DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); @@ -92,6 +85,51 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) } #endif +#ifdef CONFIG_ADDRESS_MASKING +static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm) +{ + return mm->context.lam_cr3_mask; +} + +static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm) +{ + mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask; + mm->context.untag_mask = oldmm->context.untag_mask; +} + +#define mm_untag_mask mm_untag_mask +static inline unsigned long mm_untag_mask(struct mm_struct *mm) +{ + return mm->context.untag_mask; +} + +static inline void mm_reset_untag_mask(struct mm_struct *mm) +{ + mm->context.untag_mask = -1UL; +} + +#define arch_pgtable_dma_compat arch_pgtable_dma_compat +static inline bool arch_pgtable_dma_compat(struct mm_struct *mm) +{ + return !mm_lam_cr3_mask(mm) || + test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags); +} +#else + +static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm) +{ + return 0; +} + +static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm) +{ +} + +static inline void mm_reset_untag_mask(struct mm_struct *mm) +{ +} +#endif + #define enter_lazy_tlb enter_lazy_tlb extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); @@ -116,6 +154,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.execute_only_pkey = -1; } #endif + mm_reset_untag_mask(mm); init_new_context_ldt(mm); return 0; } @@ -135,7 +174,7 @@ extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, #define activate_mm(prev, next) \ do { \ - paravirt_activate_mm((prev), (next)); \ + paravirt_enter_mmap(next); \ switch_mm((prev), (next), NULL); \ } while (0); @@ -168,7 +207,8 @@ static inline void arch_dup_pkeys(struct mm_struct *oldmm, static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { arch_dup_pkeys(oldmm, mm); - paravirt_arch_dup_mmap(oldmm, mm); + paravirt_enter_mmap(mm); + dup_lam(oldmm, mm); return ldt_dup_context(oldmm, mm); } @@ -182,7 +222,7 @@ static inline void arch_exit_mmap(struct mm_struct *mm) static inline bool is_64bit_mm(struct mm_struct *mm) { return !IS_ENABLED(CONFIG_IA32_EMULATION) || - !(mm->context.flags & MM_CONTEXT_UPROBE_IA32); + !test_bit(MM_CONTEXT_UPROBE_IA32, &mm->context.flags); } #else static inline bool is_64bit_mm(struct mm_struct *mm) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 4c4c0ec3b62e..49bb4f2bd300 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -11,6 +11,18 @@ #include <asm/paravirt.h> #include <asm/mshyperv.h> +/* + * Hyper-V always provides a single IO-APIC at this MMIO address. + * Ideally, the value should be looked up in ACPI tables, but it + * is needed for mapping the IO-APIC early in boot on Confidential + * VMs, before ACPI functions can be used. + */ +#define HV_IOAPIC_BASE_ADDRESS 0xfec00000 + +#define HV_VTL_NORMAL 0x0 +#define HV_VTL_SECURE 0x1 +#define HV_VTL_MGMT 0x2 + union hv_ghcb; DECLARE_STATIC_KEY_FALSE(isolation_type_snp); @@ -21,6 +33,11 @@ typedef int (*hyperv_fill_flush_list_func)( void hyperv_vector_handler(struct pt_regs *regs); +static inline unsigned char hv_get_nmi_reason(void) +{ + return 0; +} + #if IS_ENABLED(CONFIG_HYPERV) extern int hyperv_init_cpuhp; @@ -206,18 +223,19 @@ struct irq_domain *hv_create_pci_msi_domain(void); int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, struct hv_interrupt_entry *entry); int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); -int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible); #ifdef CONFIG_AMD_MEM_ENCRYPT void hv_ghcb_msr_write(u64 msr, u64 value); void hv_ghcb_msr_read(u64 msr, u64 *value); bool hv_ghcb_negotiate_protocol(void); -void hv_ghcb_terminate(unsigned int set, unsigned int reason); +void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); +void hv_vtom_init(void); #else static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} +static inline void hv_vtom_init(void) {} #endif extern bool hv_isolation_type_snp(void); @@ -259,14 +277,15 @@ static inline void hv_set_register(unsigned int reg, u64 value) { } static inline u64 hv_get_register(unsigned int reg) { return 0; } static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { } static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; } -static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages, - bool visible) -{ - return -1; -} #endif /* CONFIG_HYPERV */ +#ifdef CONFIG_HYPERV_VTL_MODE +void __init hv_vtl_init_platform(void); +#else +static inline void __init hv_vtl_init_platform(void) {} +#endif + #include <asm-generic/mshyperv.h> #endif diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ad35355ee43e..3aedae61af4f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -206,6 +206,8 @@ /* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */ #define MSR_INTEGRITY_CAPS 0x000002d9 +#define MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT 2 +#define MSR_INTEGRITY_CAPS_ARRAY_BIST BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT) #define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4 #define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 3ef70e54a858..edb2b0cb8efe 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -194,9 +194,9 @@ * builds. */ .macro ANNOTATE_RETPOLINE_SAFE - .Lannotate_\@: +.Lhere_\@: .pushsection .discard.retpoline_safe - _ASM_PTR .Lannotate_\@ + .long .Lhere_\@ - . .popsection .endm @@ -210,8 +210,8 @@ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should * eventually turn into it's own annotation. */ -.macro ANNOTATE_UNRET_END -#ifdef CONFIG_DEBUG_ENTRY +.macro VALIDATE_UNRET_END +#if defined(CONFIG_NOINSTR_VALIDATION) && defined(CONFIG_CPU_UNRET_ENTRY) ANNOTATE_RETPOLINE_SAFE nop #endif @@ -286,7 +286,7 @@ .macro UNTRAIN_RET #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ defined(CONFIG_CALL_DEPTH_TRACKING) - ANNOTATE_UNRET_END + VALIDATE_UNRET_END ALTERNATIVE_3 "", \ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ @@ -297,7 +297,7 @@ .macro UNTRAIN_RET_FROM_CALL #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ defined(CONFIG_CALL_DEPTH_TRACKING) - ANNOTATE_UNRET_END + VALIDATE_UNRET_END ALTERNATIVE_3 "", \ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ @@ -318,7 +318,7 @@ #define ANNOTATE_RETPOLINE_SAFE \ "999:\n\t" \ ".pushsection .discard.retpoline_safe\n\t" \ - _ASM_PTR " 999b\n\t" \ + ".long 999b - .\n\t" \ ".popsection\n\t" typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index 1343a62106de..46d7e06763c9 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -39,6 +39,12 @@ #define ORC_REG_SP_INDIRECT 9 #define ORC_REG_MAX 15 +#define ORC_TYPE_UNDEFINED 0 +#define ORC_TYPE_END_OF_STACK 1 +#define ORC_TYPE_CALL 2 +#define ORC_TYPE_REGS 3 +#define ORC_TYPE_REGS_PARTIAL 4 + #ifndef __ASSEMBLY__ #include <asm/byteorder.h> @@ -56,16 +62,14 @@ struct orc_entry { #if defined(__LITTLE_ENDIAN_BITFIELD) unsigned sp_reg:4; unsigned bp_reg:4; - unsigned type:2; + unsigned type:3; unsigned signal:1; - unsigned end:1; #elif defined(__BIG_ENDIAN_BITFIELD) unsigned bp_reg:4; unsigned sp_reg:4; unsigned unused:4; - unsigned end:1; unsigned signal:1; - unsigned type:2; + unsigned type:3; #endif } __packed; diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index e9e2c3ba5923..06ef25411d62 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -49,7 +49,7 @@ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) -/* See Documentation/x86/x86_64/mm.rst for a description of the memory map. */ +/* See Documentation/arch/x86/x86_64/mm.rst for a description of the memory map. */ #define __PHYSICAL_MASK_SHIFT 52 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index cf40e813b3d7..b49778664d2b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -334,16 +334,9 @@ static inline void tss_update_io_bitmap(void) } #endif -static inline void paravirt_activate_mm(struct mm_struct *prev, - struct mm_struct *next) +static inline void paravirt_enter_mmap(struct mm_struct *next) { - PVOP_VCALL2(mmu.activate_mm, prev, next); -} - -static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) -{ - PVOP_VCALL2(mmu.dup_mmap, oldmm, mm); + PVOP_VCALL1(mmu.enter_mmap, next); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) @@ -789,8 +782,7 @@ extern void default_banner(void); #ifndef __ASSEMBLY__ #ifndef CONFIG_PARAVIRT_XXL -static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline void paravirt_enter_mmap(struct mm_struct *mm) { } #endif diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 8c1da419260f..4acbcddddc29 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -164,11 +164,8 @@ struct pv_mmu_ops { unsigned long (*read_cr3)(void); void (*write_cr3)(unsigned long); - /* Hooks for intercepting the creation/use of an mm_struct. */ - void (*activate_mm)(struct mm_struct *prev, - struct mm_struct *next); - void (*dup_mmap)(struct mm_struct *oldmm, - struct mm_struct *mm); + /* Hook for intercepting the creation/use of an mm_struct. */ + void (*enter_mmap)(struct mm_struct *mm); /* Hooks for allocating and freeing a pagetable top-level */ int (*pgd_alloc)(struct mm_struct *mm); @@ -562,8 +559,14 @@ void paravirt_flush_lazy_mmu(void); void _paravirt_nop(void); void paravirt_BUG(void); -u64 _paravirt_ident_64(u64); unsigned long paravirt_ret0(void); +#ifdef CONFIG_PARAVIRT_XXL +u64 _paravirt_ident_64(u64); +unsigned long pv_native_save_fl(void); +void pv_native_irq_disable(void); +void pv_native_irq_enable(void); +unsigned long pv_native_read_cr2(void); +#endif #define paravirt_nop ((void *)_paravirt_nop) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 7425f32e5293..15ae4d6ba476 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1097,7 +1097,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte); } -#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0) +#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 38bf837e3554..38b54b992f32 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -104,7 +104,7 @@ extern unsigned int ptrs_per_p4d; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) /* - * See Documentation/x86/x86_64/mm.rst for a description of the memory map. + * See Documentation/arch/x86/x86_64/mm.rst for a description of the memory map. * * Be very careful vs. KASLR when changing anything here. The KASLR address * range must not overlap with anything except the KASAN shadow area, which diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index a7f3d9100adb..d8cccadc83a6 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -28,6 +28,8 @@ * On systems with SME, one bit (in a variable position!) is stolen to indicate * that the top-level paging structure is encrypted. * + * On systemms with LAM, bits 61 and 62 are used to indicate LAM mode. + * * All of the remaining bits indicate the physical address of the top-level * paging structure. * diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8d73004e4cac..a1e4fa58b357 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -647,7 +647,11 @@ static inline void spin_lock_prefetch(const void *x) #define KSTK_ESP(task) (task_pt_regs(task)->sp) #else -#define INIT_THREAD { } +extern unsigned long __end_init_task[]; + +#define INIT_THREAD { \ + .sp = (unsigned long)&__end_init_task - sizeof(struct pt_regs), \ +} extern unsigned long KSTK_ESP(struct task_struct *task); diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index a336feef0af1..f6a1737c77be 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -59,7 +59,6 @@ extern struct real_mode_header *real_mode_header; extern unsigned char real_mode_blob_end[]; extern unsigned long initial_code; -extern unsigned long initial_gs; extern unsigned long initial_stack; #ifdef CONFIG_AMD_MEM_ENCRYPT extern unsigned long initial_vc_handler; diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index bc5b4d788c08..9177b4354c3f 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -28,7 +28,6 @@ void __noreturn machine_real_restart(unsigned int type); void cpu_emergency_disable_virtualization(void); typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); -void nmi_panic_self_stop(struct pt_regs *regs); void nmi_shootdown_cpus(nmi_shootdown_cb callback); void run_crash_ipi_callback(struct pt_regs *regs); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index f37cbff7354c..f3495623ac99 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -125,11 +125,11 @@ void clear_bss(void); #ifdef __i386__ -asmlinkage void __init i386_start_kernel(void); +asmlinkage void __init __noreturn i386_start_kernel(void); #else -asmlinkage void __init x86_64_start_kernel(char *real_mode); -asmlinkage void __init x86_64_start_reservations(char *real_mode_data); +asmlinkage void __init __noreturn x86_64_start_kernel(char *real_mode); +asmlinkage void __init __noreturn x86_64_start_reservations(char *real_mode_data); #endif /* __i386__ */ #endif /* _SETUP */ diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index b63be696b776..0759af9b1acf 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -128,10 +128,6 @@ struct snp_psc_desc { struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY]; } __packed; -/* Guest message request error codes */ -#define SNP_GUEST_REQ_INVALID_LEN BIT_ULL(32) -#define SNP_GUEST_REQ_ERR_BUSY BIT_ULL(33) - #define GHCB_MSR_TERM_REQ 0x100 #define GHCB_MSR_TERM_REASON_SET_POS 12 #define GHCB_MSR_TERM_REASON_SET_MASK 0xf diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index ebc271bb6d8e..13dc2a9d23c1 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -9,6 +9,8 @@ #define __ASM_ENCRYPTED_STATE_H #include <linux/types.h> +#include <linux/sev-guest.h> + #include <asm/insn.h> #include <asm/sev-common.h> #include <asm/bootparam.h> @@ -185,6 +187,9 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) return rc; } + +struct snp_guest_request_ioctl; + void setup_ghcb(void); void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages); @@ -196,7 +201,7 @@ void snp_set_memory_private(unsigned long vaddr, unsigned int npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __init __noreturn snp_abort(void); -int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err); +int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -216,8 +221,7 @@ static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npag static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } static inline void snp_abort(void) { } -static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, - unsigned long *fw_err) +static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { return -ENOTTY; } diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index 4a03993e0785..2631e01f6e0f 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -7,8 +7,6 @@ #define TDX_HYPERCALL_STANDARD 0 -#define TDX_HCALL_HAS_OUTPUT BIT(0) - #define TDX_CPUID_LEAF_ID 0x21 #define TDX_IDENT "IntelTDX " @@ -36,7 +34,8 @@ struct tdx_hypercall_args { }; /* Used to request services from the VMM */ -u64 __tdx_hypercall(struct tdx_hypercall_args *args, unsigned long flags); +u64 __tdx_hypercall(struct tdx_hypercall_args *args); +u64 __tdx_hypercall_ret(struct tdx_hypercall_args *args); /* Called from __tdx_hypercall() for unrecoverable failure */ void __tdx_hypercall_failed(void); diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index f9757123d8fa..4e91054c84be 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -93,9 +93,10 @@ static inline void __cpu_die(unsigned int cpu) smp_ops.cpu_die(cpu); } -static inline void play_dead(void) +static inline void __noreturn play_dead(void) { smp_ops.play_dead(); + BUG(); } static inline void arch_smp_send_reschedule(int cpu) @@ -124,7 +125,7 @@ int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_disable(void); int common_cpu_die(unsigned int cpu); void native_cpu_die(unsigned int cpu); -void hlt_play_dead(void); +void __noreturn hlt_play_dead(void); void native_play_dead(void); void play_dead_common(void); void wbinvd_on_cpu(int cpu); @@ -199,5 +200,8 @@ extern void nmi_selftest(void); #define nmi_selftest() do { } while (0) #endif -#endif /* __ASSEMBLY__ */ +extern unsigned int smpboot_control; + +#endif /* !__ASSEMBLY__ */ + #endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index c1e14cee0722..857d364b9888 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -15,24 +15,18 @@ #endif #define __HAVE_ARCH_MEMCPY 1 -#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY) -#undef memcpy -#define memcpy __msan_memcpy -#else extern void *memcpy(void *to, const void *from, size_t len); -#endif extern void *__memcpy(void *to, const void *from, size_t len); #define __HAVE_ARCH_MEMSET -#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY) -extern void *__msan_memset(void *s, int c, size_t n); -#undef memset -#define memset __msan_memset -#else void *memset(void *s, int c, size_t n); -#endif void *__memset(void *s, int c, size_t n); +/* + * KMSAN needs to instrument as much code as possible. Use C versions of + * memsetXX() from lib/string.c under KMSAN. + */ +#if !defined(CONFIG_KMSAN) #define __HAVE_ARCH_MEMSET16 static inline void *memset16(uint16_t *s, uint16_t v, size_t n) { @@ -68,15 +62,10 @@ static inline void *memset64(uint64_t *s, uint64_t v, size_t n) : "memory"); return s; } +#endif #define __HAVE_ARCH_MEMMOVE -#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY) -#undef memmove -void *__msan_memmove(void *dest, const void *src, size_t len); -#define memmove __msan_memmove -#else void *memmove(void *dest, const void *src, size_t count); -#endif void *__memmove(void *dest, const void *src, size_t count); int memcmp(const void *cs, const void *ct, size_t count); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index cda3118f3b27..75bfaa421030 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_TLBFLUSH_H #define _ASM_X86_TLBFLUSH_H -#include <linux/mm.h> +#include <linux/mm_types.h> #include <linux/sched.h> #include <asm/processor.h> @@ -12,6 +12,7 @@ #include <asm/invpcid.h> #include <asm/pti.h> #include <asm/processor-flags.h> +#include <asm/pgtable.h> void __flush_tlb_all(void); @@ -53,6 +54,15 @@ static inline void cr4_clear_bits(unsigned long mask) local_irq_restore(flags); } +#ifdef CONFIG_ADDRESS_MASKING +DECLARE_PER_CPU(u64, tlbstate_untag_mask); + +static inline u64 current_untag_mask(void) +{ + return this_cpu_read(tlbstate_untag_mask); +} +#endif + #ifndef MODULE /* * 6 because 6 should be plenty and struct tlb_state will fit in two cache @@ -101,6 +111,16 @@ struct tlb_state { */ bool invalidate_other; +#ifdef CONFIG_ADDRESS_MASKING + /* + * Active LAM mode. + * + * X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM + * disabled. + */ + u8 lam; +#endif + /* * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate * the corresponding user PCID needs a flush next time we @@ -357,6 +377,32 @@ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) } #define huge_pmd_needs_flush huge_pmd_needs_flush +#ifdef CONFIG_ADDRESS_MASKING +static inline u64 tlbstate_lam_cr3_mask(void) +{ + u64 lam = this_cpu_read(cpu_tlbstate.lam); + + return lam << X86_CR3_LAM_U57_BIT; +} + +static inline void set_tlbstate_lam_mode(struct mm_struct *mm) +{ + this_cpu_write(cpu_tlbstate.lam, + mm->context.lam_cr3_mask >> X86_CR3_LAM_U57_BIT); + this_cpu_write(tlbstate_untag_mask, mm->context.untag_mask); +} + +#else + +static inline u64 tlbstate_lam_cr3_mask(void) +{ + return 0; +} + +static inline void set_tlbstate_lam_mode(struct mm_struct *mm) +{ +} +#endif #endif /* !MODULE */ static inline void __native_tlb_flush_global(unsigned long cr4) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 1cc756eafa44..457e814712af 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -7,11 +7,14 @@ #include <linux/compiler.h> #include <linux/instrumented.h> #include <linux/kasan-checks.h> +#include <linux/mm_types.h> #include <linux/string.h> +#include <linux/mmap_lock.h> #include <asm/asm.h> #include <asm/page.h> #include <asm/smap.h> #include <asm/extable.h> +#include <asm/tlbflush.h> #ifdef CONFIG_DEBUG_ATOMIC_SLEEP static inline bool pagefault_disabled(void); @@ -21,6 +24,57 @@ static inline bool pagefault_disabled(void); # define WARN_ON_IN_IRQ() #endif +#ifdef CONFIG_ADDRESS_MASKING +/* + * Mask out tag bits from the address. + * + * Magic with the 'sign' allows to untag userspace pointer without any branches + * while leaving kernel addresses intact. + */ +static inline unsigned long __untagged_addr(unsigned long addr) +{ + long sign; + + /* + * Refer tlbstate_untag_mask directly to avoid RIP-relative relocation + * in alternative instructions. The relocation gets wrong when gets + * copied to the target place. + */ + asm (ALTERNATIVE("", + "sar $63, %[sign]\n\t" /* user_ptr ? 0 : -1UL */ + "or %%gs:tlbstate_untag_mask, %[sign]\n\t" + "and %[sign], %[addr]\n\t", X86_FEATURE_LAM) + : [addr] "+r" (addr), [sign] "=r" (sign) + : "m" (tlbstate_untag_mask), "[sign]" (addr)); + + return addr; +} + +#define untagged_addr(addr) ({ \ + unsigned long __addr = (__force unsigned long)(addr); \ + (__force __typeof__(addr))__untagged_addr(__addr); \ +}) + +static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, + unsigned long addr) +{ + long sign = addr >> 63; + + mmap_assert_locked(mm); + addr &= (mm)->context.untag_mask | sign; + + return addr; +} + +#define untagged_addr_remote(mm, addr) ({ \ + unsigned long __addr = (__force unsigned long)(addr); \ + (__force __typeof__(addr))__untagged_addr_remote(mm, __addr); \ +}) + +#else +#define untagged_addr(addr) (addr) +#endif + /** * access_ok - Checks if a user space pointer is valid * @addr: User space pointer to start of block to check @@ -38,10 +92,10 @@ static inline bool pagefault_disabled(void); * Return: true (nonzero) if the memory block may be valid, false (zero) * if it is definitely invalid. */ -#define access_ok(addr, size) \ +#define access_ok(addr, size) \ ({ \ WARN_ON_IN_IRQ(); \ - likely(__access_ok(addr, size)); \ + likely(__access_ok(untagged_addr(addr), size)); \ }) #include <asm-generic/access_ok.h> diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index d13d71af5cf6..c972bd21aa23 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -18,32 +18,26 @@ /* Handles exceptions in both to and from, but doesn't do access_ok */ __must_check unsigned long -copy_user_enhanced_fast_string(void *to, const void *from, unsigned len); -__must_check unsigned long -copy_user_generic_string(void *to, const void *from, unsigned len); -__must_check unsigned long -copy_user_generic_unrolled(void *to, const void *from, unsigned len); +rep_movs_alternative(void *to, const void *from, unsigned len); static __always_inline __must_check unsigned long -copy_user_generic(void *to, const void *from, unsigned len) +copy_user_generic(void *to, const void *from, unsigned long len) { - unsigned ret; - + stac(); /* - * If CPU has ERMS feature, use copy_user_enhanced_fast_string. - * Otherwise, if CPU has rep_good feature, use copy_user_generic_string. - * Otherwise, use copy_user_generic_unrolled. + * If CPU has FSRM feature, use 'rep movs'. + * Otherwise, use rep_movs_alternative. */ - alternative_call_2(copy_user_generic_unrolled, - copy_user_generic_string, - X86_FEATURE_REP_GOOD, - copy_user_enhanced_fast_string, - X86_FEATURE_ERMS, - ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from), - "=d" (len)), - "1" (to), "2" (from), "3" (len) - : "memory", "rcx", "r8", "r9", "r10", "r11"); - return ret; + asm volatile( + "1:\n\t" + ALTERNATIVE("rep movsb", + "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM)) + "2:\n" + _ASM_EXTABLE_UA(1b, 2b) + :"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT + : : "memory", "rax", "r8", "r9", "r10", "r11"); + clac(); + return len; } static __always_inline __must_check unsigned long @@ -58,19 +52,19 @@ raw_copy_to_user(void __user *dst, const void *src, unsigned long size) return copy_user_generic((__force void *)dst, src, size); } -extern long __copy_user_nocache(void *dst, const void __user *src, - unsigned size, int zerorest); - +extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size); extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size); -extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset, - size_t len); static inline int __copy_from_user_inatomic_nocache(void *dst, const void __user *src, unsigned size) { + long ret; kasan_check_write(dst, size); - return __copy_user_nocache(dst, src, size, 0); + stac(); + ret = __copy_user_nocache(dst, src, size); + clac(); + return ret; } static inline int @@ -85,11 +79,7 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) */ __must_check unsigned long -clear_user_original(void __user *addr, unsigned long len); -__must_check unsigned long -clear_user_rep_good(void __user *addr, unsigned long len); -__must_check unsigned long -clear_user_erms(void __user *addr, unsigned long len); +rep_stos_alternative(void __user *addr, unsigned long len); static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size) { @@ -102,16 +92,12 @@ static __always_inline __must_check unsigned long __clear_user(void __user *addr */ asm volatile( "1:\n\t" - ALTERNATIVE_3("rep stosb", - "call clear_user_erms", ALT_NOT(X86_FEATURE_FSRM), - "call clear_user_rep_good", ALT_NOT(X86_FEATURE_ERMS), - "call clear_user_original", ALT_NOT(X86_FEATURE_REP_GOOD)) + ALTERNATIVE("rep stosb", + "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRS)) "2:\n" _ASM_EXTABLE_UA(1b, 2b) : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT - : "a" (0) - /* rep_good clobbers %rdx */ - : "rdx"); + : "a" (0)); clac(); diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index e7c71750b309..01cb9692b160 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -7,12 +7,17 @@ #ifdef __ASSEMBLY__ -.macro UNWIND_HINT_EMPTY - UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1 +.macro UNWIND_HINT_END_OF_STACK + UNWIND_HINT type=UNWIND_HINT_TYPE_END_OF_STACK +.endm + +.macro UNWIND_HINT_UNDEFINED + UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED .endm .macro UNWIND_HINT_ENTRY - UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1 + VALIDATE_UNRET_BEGIN + UNWIND_HINT_END_OF_STACK .endm .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 signal=1 @@ -52,6 +57,11 @@ UNWIND_HINT_REGS base=\base offset=\offset partial=1 signal=\signal .endm +.macro UNWIND_HINT_IRET_ENTRY base=%rsp offset=0 signal=1 + VALIDATE_UNRET_BEGIN + UNWIND_HINT_IRET_REGS base=\base offset=\offset signal=\signal +.endm + .macro UNWIND_HINT_FUNC UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC .endm @@ -67,7 +77,7 @@ #else #define UNWIND_HINT_FUNC \ - UNWIND_HINT(ORC_REG_SP, 8, UNWIND_HINT_TYPE_FUNC, 0, 0) + UNWIND_HINT(UNWIND_HINT_TYPE_FUNC, ORC_REG_SP, 8, 0) #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index c1c8c581759d..88085f369ff6 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -259,11 +259,15 @@ struct x86_legacy_features { * VMMCALL under SEV-ES. Needs to return 'false' * if the checks fail. Called from the #VC * exception handler. + * @is_private_mmio: For CoCo VMs, must map MMIO address as private. + * Used when device is emulated by a paravisor + * layer in the VM context. */ struct x86_hyper_runtime { void (*pin_vcpu)(int cpu); void (*sev_es_hcall_prepare)(struct ghcb *ghcb, struct pt_regs *regs); bool (*sev_es_hcall_finish)(struct ghcb *ghcb, struct pt_regs *regs); + bool (*is_private_mmio)(u64 addr); }; /** @@ -326,5 +330,7 @@ extern void x86_init_uint_noop(unsigned int unused); extern bool bool_x86_init_noop(void); extern void x86_op_int_noop(int cpu); extern bool x86_pnpbios_disabled(void); +extern int set_rtc_noop(const struct timespec64 *now); +extern void get_rtc_noop(struct timespec64 *now); #endif diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 500b96e71f18..e8d7ebbca1a4 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -16,8 +16,16 @@ #define ARCH_GET_XCOMP_GUEST_PERM 0x1024 #define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 +#define ARCH_XCOMP_TILECFG 17 +#define ARCH_XCOMP_TILEDATA 18 + #define ARCH_MAP_VDSO_X32 0x2001 #define ARCH_MAP_VDSO_32 0x2002 #define ARCH_MAP_VDSO_64 0x2003 +#define ARCH_GET_UNTAG_MASK 0x4001 +#define ARCH_ENABLE_TAGGED_ADDR 0x4002 +#define ARCH_GET_MAX_TAG_BITS 0x4003 +#define ARCH_FORCE_TAGGED_SVA 0x4004 + #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index c47cc7f2feeb..d898432947ff 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -82,6 +82,10 @@ #define X86_CR3_PCID_BITS 12 #define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL)) +#define X86_CR3_LAM_U57_BIT 61 /* Activate LAM for userspace, 62:57 bits masked */ +#define X86_CR3_LAM_U57 _BITULL(X86_CR3_LAM_U57_BIT) +#define X86_CR3_LAM_U48_BIT 62 /* Activate LAM for userspace, 62:48 bits masked */ +#define X86_CR3_LAM_U48 _BITULL(X86_CR3_LAM_U48_BIT) #define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */ #define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT) @@ -132,6 +136,8 @@ #define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT) #define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */ #define X86_CR4_CET _BITUL(X86_CR4_CET_BIT) +#define X86_CR4_LAM_SUP_BIT 28 /* LAM for supervisor pointers */ +#define X86_CR4_LAM_SUP _BITUL(X86_CR4_LAM_SUP_BIT) /* * x86-64 Task Priority Register, CR8 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 1c38174b5f01..21b542a6866c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -146,7 +146,11 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) pr_debug("Local APIC address 0x%08x\n", madt->address); } - if (madt->header.revision >= 5) + + /* ACPI 6.3 and newer support the online capable bit. */ + if (acpi_gbl_FADT.header.revision > 6 || + (acpi_gbl_FADT.header.revision == 6 && + acpi_gbl_FADT.minor_revision >= 3)) acpi_support_online_capable = true; default_acpi_madt_oem_check(madt->header.oem_id, @@ -193,7 +197,8 @@ static bool __init acpi_is_processor_usable(u32 lapic_flags) if (lapic_flags & ACPI_MADT_ENABLED) return true; - if (acpi_support_online_capable && (lapic_flags & ACPI_MADT_ONLINE_CAPABLE)) + if (!acpi_support_online_capable || + (lapic_flags & ACPI_MADT_ONLINE_CAPABLE)) return true; return false; @@ -1853,13 +1858,18 @@ early_param("acpi_sci", setup_acpi_sci); int __acpi_acquire_global_lock(unsigned int *lock) { - unsigned int old, new; + unsigned int old, new, val; old = READ_ONCE(*lock); do { - new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); + val = (old >> 1) & 0x1; + new = (old & ~0x3) + 2 + val; } while (!try_cmpxchg(lock, &old, new)); - return ((new & 0x3) < 3) ? -1 : 0; + + if (val) + return 0; + + return -1; } int __acpi_release_global_lock(unsigned int *lock) diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 3b7f4cdbf2e0..1328c221af30 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -111,13 +111,26 @@ int x86_acpi_suspend_lowlevel(void) saved_magic = 0x12345678; #else /* CONFIG_64BIT */ #ifdef CONFIG_SMP - initial_stack = (unsigned long)temp_stack + sizeof(temp_stack); - early_gdt_descr.address = - (unsigned long)get_cpu_gdt_rw(smp_processor_id()); - initial_gs = per_cpu_offset(smp_processor_id()); + /* + * As each CPU starts up, it will find its own stack pointer + * from its current_task->thread.sp. Typically that will be + * the idle thread for a newly-started AP, or even the boot + * CPU which will find it set to &init_task in the static + * per-cpu data. + * + * Make the resuming CPU use the temporary stack at startup + * by setting current->thread.sp to point to that. The true + * %rsp will be restored with the rest of the CPU context, + * by do_suspend_lowlevel(). And unwinders don't care about + * the abuse of ->thread.sp because it's a dead variable + * while the thread is running on the CPU anyway; the true + * value is in the actual %rsp register. + */ + current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack); + smpboot_control = smp_processor_id(); #endif initial_code = (unsigned long)wakeup_long64; - saved_magic = 0x123456789abcdef0L; + saved_magic = 0x123456789abcdef0L; #endif /* CONFIG_64BIT */ /* diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 20d9a604da7c..770557110051 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -422,10 +422,9 @@ static unsigned int reserve_eilvt_offset(int offset, unsigned int new) if (vector && !eilvt_entry_is_changeable(vector, new)) /* may not change if vectors are different */ return rsvd; - rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new); - } while (rsvd != new); + } while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new)); - rsvd &= ~APIC_EILVT_MASKED; + rsvd = new & ~APIC_EILVT_MASKED; if (rsvd && rsvd != vector) pr_info("LVT offset %d assigned for vector 0x%02x\n", offset, rsvd); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1f83b052bb74..4241dc243aa8 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -66,6 +66,7 @@ #include <asm/hw_irq.h> #include <asm/apic.h> #include <asm/pgtable.h> +#include <asm/x86_init.h> #define for_each_ioapic(idx) \ for ((idx) = 0; (idx) < nr_ioapics; (idx)++) @@ -2477,17 +2478,21 @@ static int io_apic_get_redir_entries(int ioapic) unsigned int arch_dynirq_lower_bound(unsigned int from) { + unsigned int ret; + /* * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use * gsi_top if ioapic_dynirq_base hasn't been initialized yet. */ - if (!ioapic_initialized) - return gsi_top; + ret = ioapic_dynirq_base ? : gsi_top; + /* - * For DT enabled machines ioapic_dynirq_base is irrelevant and not - * updated. So simply return @from if ioapic_dynirq_base == 0. + * For DT enabled machines ioapic_dynirq_base is irrelevant and + * always 0. gsi_top can be 0 if there is no IO/APIC registered. + * 0 is an invalid interrupt number for dynamic allocations. Return + * @from instead. */ - return ioapic_dynirq_base ? : from; + return ret ? : from; } #ifdef CONFIG_X86_32 @@ -2680,10 +2685,15 @@ static void io_apic_set_fixmap(enum fixed_addresses idx, phys_addr_t phys) pgprot_t flags = FIXMAP_PAGE_NOCACHE; /* - * Ensure fixmaps for IOAPIC MMIO respect memory encryption pgprot + * Ensure fixmaps for IO-APIC MMIO respect memory encryption pgprot * bits, just like normal ioremap(): */ - flags = pgprot_decrypted(flags); + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { + if (x86_platform.hyper.is_private_mmio(phys)) + flags = pgprot_encrypted(flags); + else + flags = pgprot_decrypted(flags); + } __set_fixmap(idx, phys, flags); } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index e696e22d0531..b2b2b7f3e03f 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -9,11 +9,7 @@ #include "local.h" -struct cluster_mask { - unsigned int clusterid; - int node; - struct cpumask mask; -}; +#define apic_cluster(apicid) ((apicid) >> 4) /* * __x2apic_send_IPI_mask() possibly needs to read @@ -23,8 +19,7 @@ struct cluster_mask { static u32 *x86_cpu_to_logical_apicid __read_mostly; static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); -static DEFINE_PER_CPU_READ_MOSTLY(struct cluster_mask *, cluster_masks); -static struct cluster_mask *cluster_hotplug_mask; +static DEFINE_PER_CPU_READ_MOSTLY(struct cpumask *, cluster_masks); static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { @@ -60,10 +55,10 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) /* Collapse cpus in a cluster so a single IPI per cluster is sent */ for_each_cpu(cpu, tmpmsk) { - struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu); + struct cpumask *cmsk = per_cpu(cluster_masks, cpu); dest = 0; - for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask) + for_each_cpu_and(clustercpu, tmpmsk, cmsk) dest |= x86_cpu_to_logical_apicid[clustercpu]; if (!dest) @@ -71,7 +66,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL); /* Remove cluster CPUs from tmpmask */ - cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask); + cpumask_andnot(tmpmsk, tmpmsk, cmsk); } local_irq_restore(flags); @@ -105,55 +100,98 @@ static u32 x2apic_calc_apicid(unsigned int cpu) static void init_x2apic_ldr(void) { - struct cluster_mask *cmsk = this_cpu_read(cluster_masks); - u32 cluster, apicid = apic_read(APIC_LDR); - unsigned int cpu; + struct cpumask *cmsk = this_cpu_read(cluster_masks); - x86_cpu_to_logical_apicid[smp_processor_id()] = apicid; + BUG_ON(!cmsk); - if (cmsk) - goto update; - - cluster = apicid >> 16; - for_each_online_cpu(cpu) { - cmsk = per_cpu(cluster_masks, cpu); - /* Matching cluster found. Link and update it. */ - if (cmsk && cmsk->clusterid == cluster) - goto update; + cpumask_set_cpu(smp_processor_id(), cmsk); +} + +/* + * As an optimisation during boot, set the cluster_mask for all present + * CPUs at once, to prevent each of them having to iterate over the others + * to find the existing cluster_mask. + */ +static void prefill_clustermask(struct cpumask *cmsk, unsigned int cpu, u32 cluster) +{ + int cpu_i; + + for_each_present_cpu(cpu_i) { + struct cpumask **cpu_cmsk = &per_cpu(cluster_masks, cpu_i); + u32 apicid = apic->cpu_present_to_apicid(cpu_i); + + if (apicid == BAD_APICID || cpu_i == cpu || apic_cluster(apicid) != cluster) + continue; + + if (WARN_ON_ONCE(*cpu_cmsk == cmsk)) + continue; + + BUG_ON(*cpu_cmsk); + *cpu_cmsk = cmsk; } - cmsk = cluster_hotplug_mask; - cmsk->clusterid = cluster; - cluster_hotplug_mask = NULL; -update: - this_cpu_write(cluster_masks, cmsk); - cpumask_set_cpu(smp_processor_id(), &cmsk->mask); } -static int alloc_clustermask(unsigned int cpu, int node) +static int alloc_clustermask(unsigned int cpu, u32 cluster, int node) { + struct cpumask *cmsk = NULL; + unsigned int cpu_i; + + /* + * At boot time, the CPU present mask is stable. The cluster mask is + * allocated for the first CPU in the cluster and propagated to all + * present siblings in the cluster. If the cluster mask is already set + * on entry to this function for a given CPU, there is nothing to do. + */ if (per_cpu(cluster_masks, cpu)) return 0; + + if (system_state < SYSTEM_RUNNING) + goto alloc; + /* - * If a hotplug spare mask exists, check whether it's on the right - * node. If not, free it and allocate a new one. + * On post boot hotplug for a CPU which was not present at boot time, + * iterate over all possible CPUs (even those which are not present + * any more) to find any existing cluster mask. */ - if (cluster_hotplug_mask) { - if (cluster_hotplug_mask->node == node) - return 0; - kfree(cluster_hotplug_mask); + for_each_possible_cpu(cpu_i) { + u32 apicid = apic->cpu_present_to_apicid(cpu_i); + + if (apicid != BAD_APICID && apic_cluster(apicid) == cluster) { + cmsk = per_cpu(cluster_masks, cpu_i); + /* + * If the cluster is already initialized, just store + * the mask and return. There's no need to propagate. + */ + if (cmsk) { + per_cpu(cluster_masks, cpu) = cmsk; + return 0; + } + } } - - cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask), - GFP_KERNEL, node); - if (!cluster_hotplug_mask) + /* + * No CPU in the cluster has ever been initialized, so fall through to + * the boot time code which will also populate the cluster mask for any + * other CPU in the cluster which is (now) present. + */ +alloc: + cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node); + if (!cmsk) return -ENOMEM; - cluster_hotplug_mask->node = node; + per_cpu(cluster_masks, cpu) = cmsk; + prefill_clustermask(cmsk, cpu, cluster); + return 0; } static int x2apic_prepare_cpu(unsigned int cpu) { - if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0) + u32 phys_apicid = apic->cpu_present_to_apicid(cpu); + u32 cluster = apic_cluster(phys_apicid); + u32 logical_apicid = (cluster << 16) | (1 << (phys_apicid & 0xf)); + + x86_cpu_to_logical_apicid[cpu] = logical_apicid; + + if (alloc_clustermask(cpu, cluster, cpu_to_node(cpu)) < 0) return -ENOMEM; if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) return -ENOMEM; @@ -162,10 +200,10 @@ static int x2apic_prepare_cpu(unsigned int cpu) static int x2apic_dead_cpu(unsigned int dead_cpu) { - struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu); + struct cpumask *cmsk = per_cpu(cluster_masks, dead_cpu); if (cmsk) - cpumask_clear_cpu(dead_cpu, &cmsk->mask); + cpumask_clear_cpu(dead_cpu, cmsk); free_cpumask_var(per_cpu(ipi_mask, dead_cpu)); return 0; } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 283dcd2f62c8..dc3576303f1a 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -115,6 +115,7 @@ static void __used common(void) OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack); + OFFSET(X86_current_task, pcpu_hot, current_task); #ifdef CONFIG_CALL_DEPTH_TRACKING OFFSET(X86_call_depth, pcpu_hot, call_depth); #endif diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index ffea98f9064b..22ab13966427 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -330,8 +330,8 @@ void noinline callthunks_patch_module_calls(struct callthunk_sites *cs, struct module *mod) { struct core_text ct = { - .base = (unsigned long)mod->core_layout.base, - .end = (unsigned long)mod->core_layout.base + mod->core_layout.size, + .base = (unsigned long)mod->mem[MOD_TEXT].base, + .end = (unsigned long)mod->mem[MOD_TEXT].base + mod->mem[MOD_TEXT].size, .name = mod->name, }; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 95cdd08c4cbb..571abf808ea3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -929,6 +929,10 @@ static void init_amd(struct cpuinfo_x86 *c) if (c->x86 >= 0x10) set_cpu_cap(c, X86_FEATURE_REP_GOOD); + /* AMD FSRM also implies FSRS */ + if (cpu_has(c, X86_FEATURE_FSRM)) + set_cpu_cap(c, X86_FEATURE_FSRS); + /* get apicid instead of initial apic id from cpuid */ c->apicid = hard_smp_processor_id(); @@ -1005,6 +1009,17 @@ static void init_amd(struct cpuinfo_x86 *c) msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); check_null_seg_clears_base(c); + + /* + * Make sure EFER[AIBRSE - Automatic IBRS Enable] is set. The APs are brought up + * using the trampoline code and as part of it, MSR_EFER gets prepared there in + * order to be replicated onto them. Regardless, set it here again, if not set, + * to protect against any future refactoring/code reorganization which might + * miss setting this important bit. + */ + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + cpu_has(c, X86_FEATURE_AUTOIBRS)) + WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS)); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f9d060e71c3e..182af64387d0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -784,8 +784,7 @@ static int __init nospectre_v1_cmdline(char *str) } early_param("nospectre_v1", nospectre_v1_cmdline); -static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = - SPECTRE_V2_NONE; +enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; #undef pr_fmt #define pr_fmt(fmt) "RETBleed: " fmt @@ -1133,13 +1132,6 @@ spectre_v2_parse_user_cmdline(void) return SPECTRE_V2_USER_CMD_AUTO; } -static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) -{ - return mode == SPECTRE_V2_EIBRS || - mode == SPECTRE_V2_EIBRS_RETPOLINE || - mode == SPECTRE_V2_EIBRS_LFENCE; -} - static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) { return spectre_v2_in_eibrs_mode(mode) || mode == SPECTRE_V2_IBRS; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8cd4126d8253..80710a68ef7d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -121,6 +121,7 @@ static const struct x86_cpu_id ppin_cpuids[] = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]), diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 57a5349e6954..f97b0fe13da8 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -83,4 +83,12 @@ unsigned int aperfmperf_get_khz(int cpu); extern void x86_spec_ctrl_setup_ap(void); extern void update_srbds_msr(void); +extern enum spectre_v2_mitigation spectre_v2_enabled; + +static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) +{ + return mode == SPECTRE_V2_EIBRS || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_EIBRS_LFENCE; +} #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 291d4167fab8..1c4639588ff9 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1177,7 +1177,7 @@ static const struct { static struct ratelimit_state bld_ratelimit; static unsigned int sysctl_sld_mitigate = 1; -static DEFINE_SEMAPHORE(buslock_sem); +static DEFINE_SEMAPHORE(buslock_sem, 1); #ifdef CONFIG_PROC_SYSCTL static struct ctl_table sld_sysctls[] = { @@ -1451,31 +1451,13 @@ void handle_bus_lock(struct pt_regs *regs) } /* - * Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should - * only be trusted if it is confirmed that a CPU model implements a - * specific feature at a particular bit position. - * - * The possible driver data field values: - * - * - 0: CPU models that are known to have the per-core split-lock detection - * feature even though they do not enumerate IA32_CORE_CAPABILITIES. - * - * - 1: CPU models which may enumerate IA32_CORE_CAPABILITIES and if so use - * bit 5 to enumerate the per-core split-lock detection feature. + * CPU models that are known to have the per-core split-lock detection + * feature even though they do not enumerate IA32_CORE_CAPABILITIES. */ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1), - X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, 1), - X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1), - X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, 1), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), {} }; @@ -1487,24 +1469,27 @@ static void __init split_lock_setup(struct cpuinfo_x86 *c) if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return; + /* Check for CPUs that have support but do not enumerate it: */ m = x86_match_cpu(split_lock_cpu_ids); - if (!m) - return; + if (m) + goto supported; - switch (m->driver_data) { - case 0: - break; - case 1: - if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) - return; - rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); - if (!(ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)) - return; - break; - default: + if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) return; - } + /* + * Not all bits in MSR_IA32_CORE_CAPS are architectural, but + * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set + * it have split lock detection. + */ + rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); + if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT) + goto supported; + + /* CPU is not in the model list and does not have the MSR bit: */ + return; + +supported: cpu_model_supports_sld = true; __split_lock_setup(); } diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 23c5072fbbb7..0b971f974096 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -235,10 +235,10 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); * A list of the banks enabled on each logical CPU. Controls which respective * descriptors to initialize later in mce_threshold_create_device(). */ -static DEFINE_PER_CPU(unsigned int, bank_map); +static DEFINE_PER_CPU(u64, bank_map); /* Map of banks that have more than MCA_MISC0 available. */ -static DEFINE_PER_CPU(u32, smca_misc_banks_map); +static DEFINE_PER_CPU(u64, smca_misc_banks_map); static void amd_threshold_interrupt(void); static void amd_deferred_error_interrupt(void); @@ -267,7 +267,7 @@ static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu) return; if (low & MASK_BLKPTR_LO) - per_cpu(smca_misc_banks_map, cpu) |= BIT(bank); + per_cpu(smca_misc_banks_map, cpu) |= BIT_ULL(bank); } @@ -530,7 +530,7 @@ static u32 smca_get_block_address(unsigned int bank, unsigned int block, if (!block) return MSR_AMD64_SMCA_MCx_MISC(bank); - if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank))) + if (!(per_cpu(smca_misc_banks_map, cpu) & BIT_ULL(bank))) return 0; return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); @@ -574,7 +574,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, int new; if (!block) - per_cpu(bank_map, cpu) |= (1 << bank); + per_cpu(bank_map, cpu) |= BIT_ULL(bank); memset(&b, 0, sizeof(b)); b.cpu = cpu; @@ -878,7 +878,7 @@ static void amd_threshold_interrupt(void) return; for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { - if (!(per_cpu(bank_map, cpu) & (1 << bank))) + if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank))) continue; first_block = bp[bank]->blocks; @@ -1029,7 +1029,7 @@ static const struct sysfs_ops threshold_ops = { static void threshold_block_release(struct kobject *kobj); -static struct kobj_type threshold_ktype = { +static const struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, .default_groups = default_groups, .release = threshold_block_release, @@ -1356,7 +1356,7 @@ int mce_threshold_create_device(unsigned int cpu) return -ENOMEM; for (bank = 0; bank < numbanks; ++bank) { - if (!(this_cpu_read(bank_map) & (1 << bank))) + if (!(this_cpu_read(bank_map) & BIT_ULL(bank))) continue; err = threshold_create_bank(bp, cpu, bank); if (err) { diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 91a415553c27..d2412ce2d312 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -244,11 +244,11 @@ noinstr void pentium_machine_check(struct pt_regs *regs); noinstr void winchip_machine_check(struct pt_regs *regs); static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } #else -static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} -static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} -static inline void enable_p5_mce(void) {} -static inline void pentium_machine_check(struct pt_regs *regs) {} -static inline void winchip_machine_check(struct pt_regs *regs) {} +static __always_inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} +static __always_inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} +static __always_inline void enable_p5_mce(void) {} +static __always_inline void pentium_machine_check(struct pt_regs *regs) {} +static __always_inline void winchip_machine_check(struct pt_regs *regs) {} #endif noinstr u64 mce_rdmsrl(u32 msr); diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 9eb457b10341..f5fdeb1e3606 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -61,7 +61,7 @@ static u8 amd_ucode_patch[MAX_NUMNODES][PATCH_MAX_SIZE]; /* * Microcode patch container file is prepended to the initrd in cpio - * format. See Documentation/x86/microcode.rst + * format. See Documentation/arch/x86/microcode.rst */ static const char ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 7a329e561354..3afcf3de0dd4 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -632,6 +632,7 @@ static const struct attribute_group cpu_root_microcode_group = { static int __init microcode_init(void) { + struct device *dev_root; struct cpuinfo_x86 *c = &boot_cpu_data; int error; @@ -652,10 +653,14 @@ static int __init microcode_init(void) if (IS_ERR(microcode_pdev)) return PTR_ERR(microcode_pdev); - error = sysfs_create_group(&cpu_subsys.dev_root->kobj, &cpu_root_microcode_group); - if (error) { - pr_err("Error creating microcode group!\n"); - goto out_pdev; + dev_root = bus_get_dev_root(&cpu_subsys); + if (dev_root) { + error = sysfs_create_group(&dev_root->kobj, &cpu_root_microcode_group); + put_device(dev_root); + if (error) { + pr_err("Error creating microcode group!\n"); + goto out_pdev; + } } /* Do per-CPU setup */ diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index f36dc2f796c5..c7969e806c64 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -18,7 +18,6 @@ #include <linux/kexec.h> #include <linux/i8253.h> #include <linux/random.h> -#include <linux/swiotlb.h> #include <asm/processor.h> #include <asm/hypervisor.h> #include <asm/hyperv-tlfs.h> @@ -33,7 +32,6 @@ #include <asm/nmi.h> #include <clocksource/hyperv_timer.h> #include <asm/numa.h> -#include <asm/coco.h> /* Is Linux running as the root partition? */ bool hv_root_partition; @@ -250,11 +248,6 @@ static uint32_t __init ms_hyperv_platform(void) return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS; } -static unsigned char hv_get_nmi_reason(void) -{ - return 0; -} - #ifdef CONFIG_X86_LOCAL_APIC /* * Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes @@ -358,12 +351,16 @@ static void __init ms_hyperv_init_platform(void) * To mirror what Windows does we should extract CPU management * features and use the ReservedIdentityBit to detect if Linux is the * root partition. But that requires negotiating CPU management - * interface (a process to be finalized). + * interface (a process to be finalized). For now, use the privilege + * flag as the indicator for running as root. * - * For now, use the privilege flag as the indicator for running as - * root. + * Hyper-V should never specify running as root and as a Confidential + * VM. But to protect against a compromised/malicious Hyper-V trying + * to exploit root behavior to expose Confidential VM memory, ignore + * the root partition setting if also a Confidential VM. */ - if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) { + if ((ms_hyperv.priv_high & HV_CPU_MANAGEMENT) && + !(ms_hyperv.priv_high & HV_ISOLATION)) { hv_root_partition = true; pr_info("Hyper-V: running as root partition\n"); } @@ -397,23 +394,16 @@ static void __init ms_hyperv_init_platform(void) if (ms_hyperv.priv_high & HV_ISOLATION) { ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG); ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG); - ms_hyperv.shared_gpa_boundary = - BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); + + if (ms_hyperv.shared_gpa_boundary_active) + ms_hyperv.shared_gpa_boundary = + BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); - if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) { + if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) static_branch_enable(&isolation_type_snp); -#ifdef CONFIG_SWIOTLB - swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary; -#endif - } - /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */ - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { - if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE) - cc_set_vendor(CC_VENDOR_HYPERV); - } } if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) { @@ -482,6 +472,9 @@ static void __init ms_hyperv_init_platform(void) i8253_clear_counter_on_shutdown = false; #if IS_ENABLED(CONFIG_HYPERV) + if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) || + (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)) + hv_vtom_init(); /* * Setup the hook to get control post apic initialization. */ @@ -521,6 +514,7 @@ static void __init ms_hyperv_init_platform(void) /* Register Hyper-V specific clocksource */ hv_init_clocksource(); + hv_vtl_init_platform(); #endif /* * TSC should be marked as unstable only after Hyper-V diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 7fe51488e136..ded1fc7cb7cb 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -76,7 +76,7 @@ unsigned int resctrl_rmid_realloc_limit; #define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5)) /* - * The correction factor table is documented in Documentation/x86/resctrl.rst. + * The correction factor table is documented in Documentation/arch/x86/resctrl.rst. * If rmid > rmid threshold, MBM total and local values should be multiplied * by the correction factor. * @@ -383,41 +383,36 @@ void free_rmid(u32 rmid) list_add_tail(&entry->list, &rmid_free_lru); } +static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 rmid, + enum resctrl_event_id evtid) +{ + switch (evtid) { + case QOS_L3_MBM_TOTAL_EVENT_ID: + return &d->mbm_total[rmid]; + case QOS_L3_MBM_LOCAL_EVENT_ID: + return &d->mbm_local[rmid]; + default: + return NULL; + } +} + static int __mon_event_count(u32 rmid, struct rmid_read *rr) { struct mbm_state *m; u64 tval = 0; - if (rr->first) + if (rr->first) { resctrl_arch_reset_rmid(rr->r, rr->d, rmid, rr->evtid); + m = get_mbm_state(rr->d, rmid, rr->evtid); + if (m) + memset(m, 0, sizeof(struct mbm_state)); + return 0; + } rr->err = resctrl_arch_rmid_read(rr->r, rr->d, rmid, rr->evtid, &tval); if (rr->err) return rr->err; - switch (rr->evtid) { - case QOS_L3_OCCUP_EVENT_ID: - rr->val += tval; - return 0; - case QOS_L3_MBM_TOTAL_EVENT_ID: - m = &rr->d->mbm_total[rmid]; - break; - case QOS_L3_MBM_LOCAL_EVENT_ID: - m = &rr->d->mbm_local[rmid]; - break; - default: - /* - * Code would never reach here because an invalid - * event id would fail in resctrl_arch_rmid_read(). - */ - return -EINVAL; - } - - if (rr->first) { - memset(m, 0, sizeof(struct mbm_state)); - return 0; - } - rr->val += tval; return 0; diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 524f8ff3e69c..458cb7419502 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -1580,7 +1580,7 @@ int rdt_pseudo_lock_init(void) pseudo_lock_major = ret; - pseudo_lock_class = class_create(THIS_MODULE, "pseudo_lock"); + pseudo_lock_class = class_create("pseudo_lock"); if (IS_ERR(pseudo_lock_class)) { ret = PTR_ERR(pseudo_lock_class); unregister_chrdev(pseudo_lock_major, "pseudo_lock"); diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index e5a37b6e9aa5..166692f2d501 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -892,20 +892,19 @@ static struct miscdevice sgx_dev_provision = { int sgx_set_attribute(unsigned long *allowed_attributes, unsigned int attribute_fd) { - struct file *file; + struct fd f = fdget(attribute_fd); - file = fget(attribute_fd); - if (!file) + if (!f.file) return -EINVAL; - if (file->f_op != &sgx_provision_fops) { - fput(file); + if (f.file->f_op != &sgx_provision_fops) { + fdput(f); return -EINVAL; } *allowed_attributes |= SGX_ATTR_PROVISIONKEY; - fput(file); + fdput(f); return 0; } EXPORT_SYMBOL_GPL(sgx_set_attribute); diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h index 0f2020653fba..d2dad21259a8 100644 --- a/arch/x86/kernel/cpu/sgx/sgx.h +++ b/arch/x86/kernel/cpu/sgx/sgx.h @@ -15,7 +15,7 @@ #define EREMOVE_ERROR_MESSAGE \ "EREMOVE returned %d (0x%x) and an EPC page was leaked. SGX may become unusable. " \ - "Refer to Documentation/x86/sgx.rst for more information." + "Refer to Documentation/arch/x86/sgx.rst for more information." #define SGX_MAX_EPC_SECTIONS 8 #define SGX_EEXTEND_BLOCK_SIZE 256 diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c index ec8064c0ae03..2293efd6ffa6 100644 --- a/arch/x86/kernel/cpu/umwait.c +++ b/arch/x86/kernel/cpu/umwait.c @@ -232,7 +232,11 @@ static int __init umwait_init(void) * Add umwait control interface. Ignore failure, so at least the * default values are set up in case the machine manages to boot. */ - dev = cpu_subsys.dev_root; - return sysfs_create_group(&dev->kobj, &umwait_attr_group); + dev = bus_get_dev_root(&cpu_subsys); + if (dev) { + ret = sysfs_create_group(&dev->kobj, &umwait_attr_group); + put_device(dev); + } + return ret; } device_initcall(umwait_init); diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 621ba9c0f17a..bdc0d5539b57 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -154,7 +154,7 @@ static int __init cpuid_init(void) CPUID_MAJOR); return -EBUSY; } - cpuid_class = class_create(THIS_MODULE, "cpuid"); + cpuid_class = class_create("cpuid"); if (IS_ERR(cpuid_class)) { err = PTR_ERR(cpuid_class); goto out_chrdev; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 714166cc25f2..0bab497c9436 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1118,21 +1118,20 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, zerofrom = offsetof(struct xregs_state, extended_state_area); /* - * The ptrace buffer is in non-compacted XSAVE format. In - * non-compacted format disabled features still occupy state space, - * but there is no state to copy from in the compacted - * init_fpstate. The gap tracking will zero these states. - */ - mask = fpstate->user_xfeatures; - - /* - * Dynamic features are not present in init_fpstate. When they are - * in an all zeros init state, remove those from 'mask' to zero - * those features in the user buffer instead of retrieving them - * from init_fpstate. + * This 'mask' indicates which states to copy from fpstate. + * Those extended states that are not present in fpstate are + * either disabled or initialized: + * + * In non-compacted format, disabled features still occupy + * state space but there is no state to copy from in the + * compacted init_fpstate. The gap tracking will zero these + * states. + * + * The extended features have an all zeroes init state. Thus, + * remove them from 'mask' to zero those features in the user + * buffer instead of retrieving them from init_fpstate. */ - if (fpu_state_size_dynamic()) - mask &= (header.xfeatures | xinit->header.xcomp_bv); + mask = header.xfeatures; for_each_extended_xfeature(i, mask) { /* @@ -1151,9 +1150,8 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, pkru.pkru = pkru_val; membuf_write(&to, &pkru, sizeof(pkru)); } else { - copy_feature(header.xfeatures & BIT_ULL(i), &to, + membuf_write(&to, __raw_xsave_addr(xsave, i), - __raw_xsave_addr(xinit, i), xstate_sizes[i]); } /* diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index a0ed0e4a2c0c..0d9a14528176 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -163,6 +163,11 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) jmp .Lftrace_ret SYM_CODE_END(ftrace_regs_caller) +SYM_FUNC_START(ftrace_stub_direct_tramp) + CALL_DEPTH_ACCOUNT + RET +SYM_FUNC_END(ftrace_stub_direct_tramp) + #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_CODE_START(ftrace_graph_caller) pushl %eax diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index fb4f1e01b64a..b8c720b5dab2 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -309,6 +309,10 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) SYM_FUNC_END(ftrace_regs_caller) STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) +SYM_FUNC_START(ftrace_stub_direct_tramp) + CALL_DEPTH_ACCOUNT + RET +SYM_FUNC_END(ftrace_stub_direct_tramp) #else /* ! CONFIG_DYNAMIC_FTRACE */ @@ -342,7 +346,7 @@ STACK_FRAME_NON_STANDARD_FP(__fentry__) #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_CODE_START(return_to_handler) - UNWIND_HINT_EMPTY + UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR subq $16, %rsp diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index ec6fefbfd3c0..10c27b4261eb 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -29,7 +29,7 @@ static void __init i386_default_early_setup(void) x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; } -asmlinkage __visible void __init i386_start_kernel(void) +asmlinkage __visible void __init __noreturn i386_start_kernel(void) { /* Make sure IDT is set up before any exception happens */ idt_setup_early_handler(); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 387e4b12e823..49f7629b17f7 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -471,7 +471,7 @@ static void __init copy_bootdata(char *real_mode_data) sme_unmap_bootdata(real_mode_data); } -asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) +asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode_data) { /* * Build-time sanity checks on the kernel image and module @@ -537,7 +537,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) x86_64_start_reservations(real_mode_data); } -void __init x86_64_start_reservations(char *real_mode_data) +void __init __noreturn x86_64_start_reservations(char *real_mode_data) { /* version is always not zero if it is copied */ if (!boot_params.hdr.version) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 222efd4a09bc..a5df3e994f04 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -42,7 +42,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) __HEAD .code64 SYM_CODE_START_NOALIGN(startup_64) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded an identity mapped page table @@ -61,23 +61,15 @@ SYM_CODE_START_NOALIGN(startup_64) * tables and then reload them. */ - /* Set up the stack for verify_cpu(), similar to initial_stack below */ - leaq (__end_init_task - FRAME_SIZE)(%rip), %rsp + /* Set up the stack for verify_cpu() */ + leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp leaq _text(%rip), %rdi - /* - * initial_gs points to initial fixed_percpu_data struct with storage for - * the stack protector canary. Global pointer fixups are needed at this - * stage, so apply them as is done in fixup_pointer(), and initialize %gs - * such that the canary can be accessed at %gs:40 for subsequent C calls. - */ + /* Setup GSBASE to allow stack canary access for C code */ movl $MSR_GS_BASE, %ecx - movq initial_gs(%rip), %rax - movq $_text, %rdx - subq %rdx, %rax - addq %rdi, %rax - movq %rax, %rdx + leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx + movl %edx, %eax shrq $32, %rdx wrmsr @@ -105,7 +97,7 @@ SYM_CODE_START_NOALIGN(startup_64) lretq .Lon_kernel_cs: - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK /* Sanitize CPU configuration */ call verify_cpu @@ -127,7 +119,7 @@ SYM_CODE_START_NOALIGN(startup_64) SYM_CODE_END(startup_64) SYM_CODE_START(secondary_startup_64) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, @@ -156,7 +148,7 @@ SYM_CODE_START(secondary_startup_64) * verify_cpu() above to make sure NX is enabled. */ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR /* @@ -238,16 +230,39 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) ANNOTATE_RETPOLINE_SAFE jmp *%rax 1: - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR // above +#ifdef CONFIG_SMP + movl smpboot_control(%rip), %ecx + + /* Get the per cpu offset for the given CPU# which is in ECX */ + movq __per_cpu_offset(,%rcx,8), %rdx +#else + xorl %edx, %edx /* zero-extended to clear all of RDX */ +#endif /* CONFIG_SMP */ + + /* + * Setup a boot time stack - Any secondary CPU will have lost its stack + * by now because the cr3-switch above unmaps the real-mode stack. + * + * RDX contains the per-cpu offset + */ + movq pcpu_hot + X86_current_task(%rdx), %rax + movq TASK_threadsp(%rax), %rsp + /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace * addresses where we're currently running on. We have to do that here * because in 32bit we couldn't load a 64bit linear address. */ - lgdt early_gdt_descr(%rip) + subq $16, %rsp + movw $(GDT_SIZE-1), (%rsp) + leaq gdt_page(%rdx), %rax + movq %rax, 2(%rsp) + lgdt (%rsp) + addq $16, %rsp /* set up data segments */ xorl %eax,%eax @@ -271,16 +286,13 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) * the per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx - movl initial_gs(%rip),%eax - movl initial_gs+4(%rip),%edx +#ifndef CONFIG_SMP + leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx +#endif + movl %edx, %eax + shrq $32, %rdx wrmsr - /* - * Setup a boot time stack - Any secondary CPU will have lost its stack - * by now because the cr3-switch above unmaps the real-mode stack - */ - movq initial_stack(%rip), %rsp - /* Setup and Load IDT */ pushq %rsi call early_setup_idt @@ -371,8 +383,12 @@ SYM_CODE_END(secondary_startup_64) */ SYM_CODE_START(start_cpu0) ANNOTATE_NOENDBR - UNWIND_HINT_EMPTY - movq initial_stack(%rip), %rsp + UNWIND_HINT_END_OF_STACK + + /* Find the idle task stack */ + movq PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx + movq TASK_threadsp(%rcx), %rsp + jmp .Ljump_to_C_code SYM_CODE_END(start_cpu0) #endif @@ -390,8 +406,6 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb) UNWIND_HINT_IRET_REGS offset=8 ENDBR - ANNOTATE_UNRET_END - /* Build pt_regs */ PUSH_AND_CLEAR_REGS @@ -416,16 +430,9 @@ SYM_CODE_END(vc_boot_ghcb) __REFDATA .balign 8 SYM_DATA(initial_code, .quad x86_64_start_kernel) -SYM_DATA(initial_gs, .quad INIT_PER_CPU_VAR(fixed_percpu_data)) #ifdef CONFIG_AMD_MEM_ENCRYPT SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb) #endif - -/* - * The FRAME_SIZE gap is a convention which helps the in-kernel unwinder - * reliably detect the end of the stack. - */ -SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - FRAME_SIZE) __FINITDATA __INIT @@ -451,7 +458,6 @@ SYM_CODE_END(early_idt_handler_array) SYM_CODE_START_LOCAL(early_idt_handler_common) UNWIND_HINT_IRET_REGS offset=16 - ANNOTATE_UNRET_END /* * The stack is the hardware frame, an error code or zero, and the * vector number. @@ -501,8 +507,6 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb) UNWIND_HINT_IRET_REGS offset=8 ENDBR - ANNOTATE_UNRET_END - /* Build pt_regs */ PUSH_AND_CLEAR_REGS @@ -657,8 +661,7 @@ SYM_DATA_END(level1_fixmap_pgt) .data .align 16 -SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1) -SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page)) +SYM_DATA(smpboot_control, .long 0) .align 16 /* This must match the first entry in level2_kernel_pgt */ diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index 9ff480e94511..670eb08b972a 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -77,15 +77,6 @@ static struct ctl_table itmt_kern_table[] = { {} }; -static struct ctl_table itmt_root_table[] = { - { - .procname = "kernel", - .mode = 0555, - .child = itmt_kern_table, - }, - {} -}; - static struct ctl_table_header *itmt_sysctl_header; /** @@ -114,7 +105,7 @@ int sched_set_itmt_support(void) return 0; } - itmt_sysctl_header = register_sysctl_table(itmt_root_table); + itmt_sysctl_header = register_sysctl("kernel", itmt_kern_table); if (!itmt_sysctl_header) { mutex_unlock(&itmt_update_mutex); return -ENOMEM; diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 6b58610a1552..a61c12c01270 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -476,7 +476,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, efi_map_offset = params_cmdline_sz; efi_setup_data_offset = efi_map_offset + ALIGN(efi_map_sz, 16); - /* Copy setup header onto bootparams. Documentation/x86/boot.rst */ + /* Copy setup header onto bootparams. Documentation/arch/x86/boot.rst */ setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset; /* Is there a limit on setup header size? */ diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 0611fd83858e..1a3e2c05a8a5 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -374,17 +374,6 @@ void machine_kexec(struct kimage *image) /* arch-dependent functionality related to kexec file-based syscall */ #ifdef CONFIG_KEXEC_FILE -void *arch_kexec_kernel_image_load(struct kimage *image) -{ - if (!image->fops || !image->fops->load) - return ERR_PTR(-ENOEXEC); - - return image->fops->load(image, image->kernel_buf, - image->kernel_buf_len, image->initrd_buf, - image->initrd_buf_len, image->cmdline_buf, - image->cmdline_buf_len); -} - /* * Apply purgatory relocations. * diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 84ad0e61ba6e..b05f62ee2344 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -362,8 +362,8 @@ int module_finalize(const Elf_Ehdr *hdr, } if (locks) { void *lseg = (void *)locks->sh_addr; - void *text = me->core_layout.base; - void *text_end = text + me->core_layout.text_size; + void *text = me->mem[MOD_TEXT].base; + void *text_end = text + me->mem[MOD_TEXT].size; alternatives_smp_module_add(me, me->name, lseg, lseg + locks->sh_size, text, text_end); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 708751311786..7bb17d37db01 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -263,7 +263,7 @@ static int __init msr_init(void) pr_err("unable to get major %d for msr\n", MSR_MAJOR); return -EBUSY; } - msr_class = class_create(THIS_MODULE, "msr"); + msr_class = class_create("msr"); if (IS_ERR(msr_class)) { err = PTR_ERR(msr_class); goto out_chrdev; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 42e182868873..ac10b46c5832 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -64,11 +64,11 @@ static unsigned paravirt_patch_call(void *insn_buff, const void *target, } #ifdef CONFIG_PARAVIRT_XXL -/* identity function, which can be inlined */ -u64 notrace _paravirt_ident_64(u64 x) -{ - return x; -} +DEFINE_PARAVIRT_ASM(_paravirt_ident_64, "mov %rdi, %rax", .text); +DEFINE_PARAVIRT_ASM(pv_native_save_fl, "pushf; pop %rax", .noinstr.text); +DEFINE_PARAVIRT_ASM(pv_native_irq_disable, "cli", .noinstr.text); +DEFINE_PARAVIRT_ASM(pv_native_irq_enable, "sti", .noinstr.text); +DEFINE_PARAVIRT_ASM(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text); #endif DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); @@ -197,11 +197,6 @@ void paravirt_end_context_switch(struct task_struct *next) arch_enter_lazy_mmu_mode(); } -static noinstr unsigned long pv_native_read_cr2(void) -{ - return native_read_cr2(); -} - static noinstr void pv_native_write_cr2(unsigned long val) { native_write_cr2(val); @@ -222,16 +217,6 @@ noinstr void pv_native_wbinvd(void) native_wbinvd(); } -static noinstr void pv_native_irq_enable(void) -{ - native_irq_enable(); -} - -static noinstr void pv_native_irq_disable(void) -{ - native_irq_disable(); -} - static noinstr void pv_native_safe_halt(void) { native_safe_halt(); @@ -298,7 +283,7 @@ struct paravirt_patch_template pv_ops = { .cpu.end_context_switch = paravirt_nop, /* Irq ops. */ - .irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), + .irq.save_fl = __PV_IS_CALLEE_SAVE(pv_native_save_fl), .irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable), .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable), .irq.safe_halt = pv_native_safe_halt, @@ -363,8 +348,7 @@ struct paravirt_patch_template pv_ops = { .mmu.make_pte = PTE_IDENT, .mmu.make_pgd = PTE_IDENT, - .mmu.dup_mmap = paravirt_nop, - .mmu.activate_mm = paravirt_nop, + .mmu.enter_mmap = paravirt_nop, .mmu.lazy_mode = { .enter = paravirt_nop, diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 30bbe4abb5d6..de6be0a3965e 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -124,7 +124,7 @@ void __init pci_iommu_alloc(void) } /* - * See <Documentation/x86/x86_64/boot-options.rst> for the iommu kernel + * See <Documentation/arch/x86/x86_64/boot-options.rst> for the iommu kernel * parameter documentation. */ static __init int iommu_setup(char *p) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b650cde3f64d..dac41a0072ea 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -5,6 +5,7 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/smp.h> +#include <linux/cpu.h> #include <linux/prctl.h> #include <linux/slab.h> #include <linux/sched.h> @@ -48,6 +49,7 @@ #include <asm/frame.h> #include <asm/unwind.h> #include <asm/tdx.h> +#include <asm/mmu_context.h> #include "process.h" @@ -162,6 +164,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + + if (p->mm && (clone_flags & (CLONE_VM | CLONE_VFORK)) == CLONE_VM) + set_bit(MM_CONTEXT_LOCK_LAM, &p->mm->context.flags); #else p->thread.sp0 = (unsigned long) (childregs + 1); savesegment(gs, p->thread.gs); @@ -368,6 +373,8 @@ void arch_setup_new_exec(void) task_clear_spec_ssb_noexec(current); speculation_ctrl_update(read_thread_flags()); } + + mm_reset_untag_mask(current->mm); } #ifdef CONFIG_X86_IOPL_IOPERM @@ -715,7 +722,7 @@ static bool x86_idle_set(void) } #ifndef CONFIG_SMP -static inline void play_dead(void) +static inline void __noreturn play_dead(void) { BUG(); } @@ -727,7 +734,7 @@ void arch_cpu_idle_enter(void) local_touch_nmi(); } -void arch_cpu_idle_dead(void) +void __noreturn arch_cpu_idle_dead(void) { play_dead(); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index bb65a68b4b49..223b223f713f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -671,7 +671,7 @@ void set_personality_64bit(void) task_pt_regs(current)->orig_ax = __NR_execve; current_thread_info()->status &= ~TS_COMPAT; if (current->mm) - current->mm->context.flags = MM_CONTEXT_HAS_VSYSCALL; + __set_bit(MM_CONTEXT_HAS_VSYSCALL, ¤t->mm->context.flags); /* TBD: overwrites user setup. Should have two bits. But 64bit processes have always behaved this way, @@ -708,7 +708,7 @@ static void __set_personality_ia32(void) * uprobes applied to this MM need to know this and * cannot use user_64bit_mode() at that time. */ - current->mm->context.flags = MM_CONTEXT_UPROBE_IA32; + __set_bit(MM_CONTEXT_UPROBE_IA32, ¤t->mm->context.flags); } current->personality |= force_personality32; @@ -743,6 +743,52 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr) } #endif +#ifdef CONFIG_ADDRESS_MASKING + +#define LAM_U57_BITS 6 + +static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits) +{ + if (!cpu_feature_enabled(X86_FEATURE_LAM)) + return -ENODEV; + + /* PTRACE_ARCH_PRCTL */ + if (current->mm != mm) + return -EINVAL; + + if (mm_valid_pasid(mm) && + !test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags)) + return -EINVAL; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + if (test_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags)) { + mmap_write_unlock(mm); + return -EBUSY; + } + + if (!nr_bits) { + mmap_write_unlock(mm); + return -EINVAL; + } else if (nr_bits <= LAM_U57_BITS) { + mm->context.lam_cr3_mask = X86_CR3_LAM_U57; + mm->context.untag_mask = ~GENMASK(62, 57); + } else { + mmap_write_unlock(mm); + return -EINVAL; + } + + write_cr3(__read_cr3() | mm->context.lam_cr3_mask); + set_tlbstate_lam_mode(mm); + set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags); + + mmap_write_unlock(mm); + + return 0; +} +#endif + long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) { int ret = 0; @@ -830,7 +876,23 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) case ARCH_MAP_VDSO_64: return prctl_map_vdso(&vdso_image_64, arg2); #endif - +#ifdef CONFIG_ADDRESS_MASKING + case ARCH_GET_UNTAG_MASK: + return put_user(task->mm->context.untag_mask, + (unsigned long __user *)arg2); + case ARCH_ENABLE_TAGGED_ADDR: + return prctl_enable_tagged_addr(task->mm, arg2); + case ARCH_FORCE_TAGGED_SVA: + if (current != task) + return -EINVAL; + set_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &task->mm->context.flags); + return 0; + case ARCH_GET_MAX_TAG_BITS: + if (!cpu_feature_enabled(X86_FEATURE_LAM)) + return put_user(0, (unsigned long __user *)arg2); + else + return put_user(LAM_U57_BITS, (unsigned long __user *)arg2); +#endif default: ret = -EINVAL; break; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d03c551defcc..3adbe97015c1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -920,7 +920,7 @@ void run_crash_ipi_callback(struct pt_regs *regs) } /* Override the weak function in kernel/panic.c */ -void nmi_panic_self_stop(struct pt_regs *regs) +void __noreturn nmi_panic_self_stop(struct pt_regs *regs) { while (1) { /* If no CPU is preparing crash dump, we simply loop here. */ diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 4a73351f87f8..56cab1bb25f5 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -43,7 +43,7 @@ .code64 SYM_CODE_START_NOALIGN(relocate_range) SYM_CODE_START_NOALIGN(relocate_kernel) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR /* * %rdi indirection_page @@ -113,7 +113,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK /* set return address to 0 if not preserving context */ pushq $0 /* store the start address on the stack */ @@ -231,7 +231,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR // RET target, above movq RSP(%r8), %rsp movq CR4(%r8), %rax @@ -256,8 +256,8 @@ SYM_CODE_END(virtual_mapped) /* Do the copies */ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) - UNWIND_HINT_EMPTY - movq %rdi, %rcx /* Put the page_list in %rcx */ + UNWIND_HINT_END_OF_STACK + movq %rdi, %rcx /* Put the page_list in %rcx */ xorl %edi, %edi xorl %esi, %esi jmp 1f diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 3f664ab277c4..b031244d6d2d 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -22,6 +22,8 @@ #include <linux/efi.h> #include <linux/platform_device.h> #include <linux/io.h> +#include <linux/psp-sev.h> +#include <uapi/linux/sev-guest.h> #include <asm/cpu_entry_area.h> #include <asm/stacktrace.h> @@ -2175,7 +2177,7 @@ static int __init init_sev_config(char *str) } __setup("sev=", init_sev_config); -int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err) +int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { struct ghcb_state state; struct es_em_ctxt ctxt; @@ -2183,8 +2185,7 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned struct ghcb *ghcb; int ret; - if (!fw_err) - return -EINVAL; + rio->exitinfo2 = SEV_RET_NO_FW_CALL; /* * __sev_get_ghcb() needs to run with IRQs disabled because it is using @@ -2209,16 +2210,16 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned if (ret) goto e_put; - *fw_err = ghcb->save.sw_exit_info_2; - switch (*fw_err) { + rio->exitinfo2 = ghcb->save.sw_exit_info_2; + switch (rio->exitinfo2) { case 0: break; - case SNP_GUEST_REQ_ERR_BUSY: + case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY): ret = -EAGAIN; break; - case SNP_GUEST_REQ_INVALID_LEN: + case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN): /* Number of expected pages are returned in RBX */ if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { input->data_npages = ghcb_get_rbx(ghcb); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9013bb28255a..352f0ce1ece4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -121,17 +121,20 @@ int arch_update_cpu_topology(void) return retval; } + +static unsigned int smpboot_warm_reset_vector_count; + static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { unsigned long flags; spin_lock_irqsave(&rtc_lock, flags); - CMOS_WRITE(0xa, 0xf); + if (!smpboot_warm_reset_vector_count++) { + CMOS_WRITE(0xa, 0xf); + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = start_eip >> 4; + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = start_eip & 0xf; + } spin_unlock_irqrestore(&rtc_lock, flags); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = - start_eip >> 4; - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = - start_eip & 0xf; } static inline void smpboot_restore_warm_reset_vector(void) @@ -143,10 +146,12 @@ static inline void smpboot_restore_warm_reset_vector(void) * to default values. */ spin_lock_irqsave(&rtc_lock, flags); - CMOS_WRITE(0, 0xf); + if (!--smpboot_warm_reset_vector_count) { + CMOS_WRITE(0, 0xf); + *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; + } spin_unlock_irqrestore(&rtc_lock, flags); - *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; } /* @@ -1059,8 +1064,6 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle); -#else - initial_gs = per_cpu_offset(cpu); #endif return 0; } @@ -1086,9 +1089,14 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, start_ip = real_mode_header->trampoline_start64; #endif idle->thread.sp = (unsigned long)task_pt_regs(idle); - early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu); initial_code = (unsigned long)start_secondary; - initial_stack = idle->thread.sp; + + if (IS_ENABLED(CONFIG_X86_32)) { + early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu); + initial_stack = idle->thread.sp; + } else { + smpboot_control = cpu; + } /* Enable the espfix hack for this CPU */ init_espfix_ap(cpu); @@ -1816,7 +1824,7 @@ static inline void mwait_play_dead(void) } } -void hlt_play_dead(void) +void __noreturn hlt_play_dead(void) { if (__this_cpu_read(cpu_info.x86) >= 4) wbinvd(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d317dc3d06a3..8b83d8fbce71 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -671,15 +671,15 @@ static bool try_fixup_enqcmd_gp(void) if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) return false; - pasid = current->mm->pasid; - /* * If the mm has not been allocated a * PASID, the #GP can not be fixed up. */ - if (!pasid_valid(pasid)) + if (!mm_valid_pasid(current->mm)) return false; + pasid = current->mm->pasid; + /* * Did this thread already have its PASID activated? * If so, the #GP must be from something else. diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 37307b40f8da..3ac50b7298d1 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -133,7 +133,7 @@ static struct orc_entry null_orc_entry = { .sp_offset = sizeof(long), .sp_reg = ORC_REG_SP, .bp_reg = ORC_REG_UNDEFINED, - .type = UNWIND_HINT_TYPE_CALL + .type = ORC_TYPE_CALL }; #ifdef CONFIG_CALL_THUNKS @@ -153,12 +153,11 @@ static struct orc_entry *orc_callthunk_find(unsigned long ip) /* Fake frame pointer entry -- used as a fallback for generated code */ static struct orc_entry orc_fp_entry = { - .type = UNWIND_HINT_TYPE_CALL, + .type = ORC_TYPE_CALL, .sp_reg = ORC_REG_BP, .sp_offset = 16, .bp_reg = ORC_REG_PREV_SP, .bp_offset = -16, - .end = 0, }; static struct orc_entry *orc_find(unsigned long ip) @@ -250,13 +249,13 @@ static int orc_sort_cmp(const void *_a, const void *_b) return -1; /* - * The "weak" section terminator entries need to always be on the left + * The "weak" section terminator entries need to always be first * to ensure the lookup code skips them in favor of real entries. * These terminator entries exist to handle any gaps created by * whitelisted .o files which didn't get objtool generation. */ orc_a = cur_orc_table + (a - cur_orc_ip_table); - return orc_a->sp_reg == ORC_REG_UNDEFINED && !orc_a->end ? -1 : 1; + return orc_a->type == ORC_TYPE_UNDEFINED ? -1 : 1; } void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size, @@ -474,14 +473,12 @@ bool unwind_next_frame(struct unwind_state *state) */ orc = &orc_fp_entry; state->error = true; - } - - /* End-of-stack check for kernel threads: */ - if (orc->sp_reg == ORC_REG_UNDEFINED) { - if (!orc->end) + } else { + if (orc->type == ORC_TYPE_UNDEFINED) goto err; - goto the_end; + if (orc->type == ORC_TYPE_END_OF_STACK) + goto the_end; } state->signal = orc->signal; @@ -554,7 +551,7 @@ bool unwind_next_frame(struct unwind_state *state) /* Find IP, SP and possibly regs: */ switch (orc->type) { - case UNWIND_HINT_TYPE_CALL: + case ORC_TYPE_CALL: ip_p = sp - sizeof(long); if (!deref_stack_reg(state, ip_p, &state->ip)) @@ -567,7 +564,7 @@ bool unwind_next_frame(struct unwind_state *state) state->prev_regs = NULL; break; - case UNWIND_HINT_TYPE_REGS: + case ORC_TYPE_REGS: if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) { orc_warn_current("can't access registers at %pB\n", (void *)orig_ip); @@ -590,13 +587,13 @@ bool unwind_next_frame(struct unwind_state *state) state->full_regs = true; break; - case UNWIND_HINT_TYPE_REGS_PARTIAL: + case ORC_TYPE_REGS_PARTIAL: if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) { orc_warn_current("can't access iret registers at %pB\n", (void *)orig_ip); goto err; } - /* See UNWIND_HINT_TYPE_REGS case comment. */ + /* See ORC_TYPE_REGS case comment. */ state->ip = unwind_recover_rethook(state, state->ip, (unsigned long *)(state->sp - sizeof(long))); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ef80d361b463..d82f4fa2f1bf 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -33,8 +33,8 @@ static int __init iommu_init_noop(void) { return 0; } static void iommu_shutdown_noop(void) { } bool __init bool_x86_init_noop(void) { return false; } void x86_op_int_noop(int cpu) { } -static __init int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; } -static __init void get_rtc_noop(struct timespec64 *now) { } +int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; } +void get_rtc_noop(struct timespec64 *now) { } static __initconst const struct of_device_id of_cmos_match[] = { { .compatible = "motorola,mc146818" }, @@ -134,6 +134,7 @@ static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } static bool enc_tlb_flush_required_noop(bool enc) { return false; } static bool enc_cache_flush_required_noop(void) { return false; } +static bool is_private_mmio_noop(u64 addr) {return false; } struct x86_platform_ops x86_platform __ro_after_init = { .calibrate_cpu = native_calibrate_cpu_early, @@ -149,6 +150,7 @@ struct x86_platform_ops x86_platform __ro_after_init = { .realmode_reserve = reserve_real_mode, .realmode_init = init_real_mode, .hyper.pin_vcpu = x86_op_int_noop, + .hyper.is_private_mmio = is_private_mmio_noop, .guest = { .enc_status_change_prepare = enc_status_change_prepare_noop, diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 8e578311ca9d..89ca7f4c1464 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -46,7 +46,6 @@ config KVM select KVM_XFER_TO_GUEST_WORK select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_VFIO - select SRCU select INTERVAL_TREE select HAVE_KVM_PM_NOTIFIER if PM select KVM_GENERIC_HARDWARE_ENABLING diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 042dee556125..995eb5054360 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -368,9 +368,39 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); - if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG - && ioapic->irr & (1 << index)) - ioapic_service(ioapic, index, false); + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && + ioapic->irr & (1 << index) && !e->fields.mask && !e->fields.remote_irr) { + /* + * Pending status in irr may be outdated: the IRQ line may have + * already been deasserted by a device while the IRQ was masked. + * This occurs, for instance, if the interrupt is handled in a + * Linux guest as a oneshot interrupt (IRQF_ONESHOT). In this + * case the guest acknowledges the interrupt to the device in + * its threaded irq handler, i.e. after the EOI but before + * unmasking, so at the time of unmasking the IRQ line is + * already down but our pending irr bit is still set. In such + * cases, injecting this pending interrupt to the guest is + * buggy: the guest will receive an extra unwanted interrupt. + * + * So we need to check here if the IRQ is actually still pending. + * As we are generally not able to probe the IRQ line status + * directly, we do it through irqfd resampler. Namely, we clear + * the pending status and notify the resampler that this interrupt + * is done, without actually injecting it into the guest. If the + * IRQ line is actually already deasserted, we are done. If it is + * still asserted, a new interrupt will be shortly triggered + * through irqfd and injected into the guest. + * + * If, however, it's not possible to resample (no irqfd resampler + * registered for this irq), then unconditionally inject this + * pending interrupt into the guest, so the guest will not miss + * an interrupt, although may get an extra unwanted interrupt. + */ + if (kvm_notify_irqfd_resampler(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index)) + ioapic->irr &= ~(1 << index); + else + ioapic_service(ioapic, index, false); + } if (e->fields.delivery_mode == APIC_DM_FIXED) { struct kvm_lapic_irq irq; diff --git a/arch/x86/kvm/kvm_onhyperv.h b/arch/x86/kvm/kvm_onhyperv.h index 287e98ef9df3..6272dabec02d 100644 --- a/arch/x86/kvm/kvm_onhyperv.h +++ b/arch/x86/kvm/kvm_onhyperv.h @@ -12,6 +12,11 @@ int hv_remote_flush_tlb_with_range(struct kvm *kvm, int hv_remote_flush_tlb(struct kvm *kvm); void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp); #else /* !CONFIG_HYPERV */ +static inline int hv_remote_flush_tlb(struct kvm *kvm) +{ + return -EOPNOTSUPP; +} + static inline void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp) { } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c25aeb550cd9..69ae5e1b3120 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -12,6 +12,7 @@ #include <linux/kvm_host.h> #include <linux/kernel.h> #include <linux/highmem.h> +#include <linux/psp.h> #include <linux/psp-sev.h> #include <linux/pagemap.h> #include <linux/swap.h> @@ -1767,18 +1768,20 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) { struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info; struct kvm_sev_info *src_sev, *cg_cleanup_sev; - struct file *source_kvm_file; + struct fd f = fdget(source_fd); struct kvm *source_kvm; bool charged = false; int ret; - source_kvm_file = fget(source_fd); - if (!file_is_kvm(source_kvm_file)) { + if (!f.file) + return -EBADF; + + if (!file_is_kvm(f.file)) { ret = -EBADF; goto out_fput; } - source_kvm = source_kvm_file->private_data; + source_kvm = f.file->private_data; ret = sev_lock_two_vms(kvm, source_kvm); if (ret) goto out_fput; @@ -1828,8 +1831,7 @@ out_dst_cgroup: out_unlock: sev_unlock_two_vms(kvm, source_kvm); out_fput: - if (source_kvm_file) - fput(source_kvm_file); + fdput(f); return ret; } @@ -2046,18 +2048,20 @@ failed: int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) { - struct file *source_kvm_file; + struct fd f = fdget(source_fd); struct kvm *source_kvm; struct kvm_sev_info *source_sev, *mirror_sev; int ret; - source_kvm_file = fget(source_fd); - if (!file_is_kvm(source_kvm_file)) { + if (!f.file) + return -EBADF; + + if (!file_is_kvm(f.file)) { ret = -EBADF; goto e_source_fput; } - source_kvm = source_kvm_file->private_data; + source_kvm = f.file->private_data; ret = sev_lock_two_vms(kvm, source_kvm); if (ret) goto e_source_fput; @@ -2103,8 +2107,7 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) e_unlock: sev_unlock_two_vms(kvm, source_kvm); e_source_fput: - if (source_kvm_file) - fput(source_kvm_file); + fdput(f); return ret; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 424fcdba4c78..a1b08359769b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3733,7 +3733,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu) svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); } -static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) +static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3757,6 +3757,37 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) svm->current_vmcb->asid_generation--; } +static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) +{ + hpa_t root_tdp = vcpu->arch.mmu->root.hpa; + + /* + * When running on Hyper-V with EnlightenedNptTlb enabled, explicitly + * flush the NPT mappings via hypercall as flushing the ASID only + * affects virtual to physical mappings, it does not invalidate guest + * physical to host physical mappings. + */ + if (svm_hv_is_enlightened_tlb_enabled(vcpu) && VALID_PAGE(root_tdp)) + hyperv_flush_guest_mapping(root_tdp); + + svm_flush_tlb_asid(vcpu); +} + +static void svm_flush_tlb_all(struct kvm_vcpu *vcpu) +{ + /* + * When running on Hyper-V with EnlightenedNptTlb enabled, remote TLB + * flushes should be routed to hv_remote_flush_tlb() without requesting + * a "regular" remote flush. Reaching this point means either there's + * a KVM bug or a prior hv_remote_flush_tlb() call failed, both of + * which might be fatal to the guest. Yell, but try to recover. + */ + if (WARN_ON_ONCE(svm_hv_is_enlightened_tlb_enabled(vcpu))) + hv_remote_flush_tlb(vcpu->kvm); + + svm_flush_tlb_asid(vcpu); +} + static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4749,10 +4780,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_rflags = svm_set_rflags, .get_if_flag = svm_get_if_flag, - .flush_tlb_all = svm_flush_tlb_current, + .flush_tlb_all = svm_flush_tlb_all, .flush_tlb_current = svm_flush_tlb_current, .flush_tlb_gva = svm_flush_tlb_gva, - .flush_tlb_guest = svm_flush_tlb_current, + .flush_tlb_guest = svm_flush_tlb_asid, .vcpu_pre_run = svm_vcpu_pre_run, .vcpu_run = svm_vcpu_run, diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index cff838f15db5..786d46d73a8e 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -6,6 +6,8 @@ #ifndef __ARCH_X86_KVM_SVM_ONHYPERV_H__ #define __ARCH_X86_KVM_SVM_ONHYPERV_H__ +#include <asm/mshyperv.h> + #if IS_ENABLED(CONFIG_HYPERV) #include "kvm_onhyperv.h" @@ -15,6 +17,14 @@ static struct kvm_x86_ops svm_x86_ops; int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu); +static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu) +{ + struct hv_vmcb_enlightenments *hve = &to_svm(vcpu)->vmcb->control.hv_enlightenments; + + return ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB && + !!hve->hv_enlightenments_control.enlightened_npt_tlb; +} + static inline void svm_hv_init_vmcb(struct vmcb *vmcb) { struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; @@ -80,6 +90,11 @@ static inline void svm_hv_update_vp_id(struct vmcb *vmcb, struct kvm_vcpu *vcpu) } #else +static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu) +{ + return false; +} + static inline void svm_hv_init_vmcb(struct vmcb *vmcb) { } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1bc2b80273c9..768487611db7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3868,7 +3868,12 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu) exit_qual = 0; } - if (ex->has_error_code) { + /* + * Unlike AMD's Paged Real Mode, which reports an error code on #PF + * VM-Exits even if the CPU is in Real Mode, Intel VMX never sets the + * "has error code" flags on VM-Exit if the CPU is in Real Mode. + */ + if (ex->has_error_code && is_protmode(vcpu)) { /* * Intel CPUs do not generate error codes with bits 31:16 set, * and more importantly VMX disallows setting bits 31:16 in the diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 07ba937bdb6f..2b1d82647195 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4434,6 +4434,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_VAPIC: case KVM_CAP_ENABLE_CAP: case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: + case KVM_CAP_IRQFD_RESAMPLE: r = 1; break; case KVM_CAP_EXIT_HYPERCALL: @@ -8905,6 +8906,8 @@ restart: } if (ctxt->have_exception) { + WARN_ON_ONCE(vcpu->mmio_needed && !vcpu->mmio_is_write); + vcpu->mmio_needed = false; r = 1; inject_emulated_exception(vcpu); } else if (vcpu->arch.pio.count) { @@ -9908,13 +9911,20 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu) static void kvm_inject_exception(struct kvm_vcpu *vcpu) { + /* + * Suppress the error code if the vCPU is in Real Mode, as Real Mode + * exceptions don't report error codes. The presence of an error code + * is carried with the exception and only stripped when the exception + * is injected as intercepted #PF VM-Exits for AMD's Paged Real Mode do + * report an error code despite the CPU being in Real Mode. + */ + vcpu->arch.exception.has_error_code &= is_protmode(vcpu); + trace_kvm_inj_exception(vcpu->arch.exception.vector, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code, vcpu->arch.exception.injected); - if (vcpu->arch.exception.error_code && !is_protmode(vcpu)) - vcpu->arch.exception.error_code = false; static_call(kvm_x86_inject_exception)(vcpu); } diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 4f1a40a86534..01932af64193 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -71,6 +71,6 @@ ifneq ($(CONFIG_GENERIC_CSUM),y) endif lib-y += clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o - lib-y += copy_user_64.o + lib-y += copy_user_64.o copy_user_uncached_64.o lib-y += cmpxchg16b_emu.o endif diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index ecbfb4dd3b01..f74a3e704a1c 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -57,134 +57,85 @@ EXPORT_SYMBOL_GPL(clear_page_erms) * Input: * rdi destination * rcx count + * rax is zero * * Output: * rcx: uncleared bytes or 0 if successful. */ -SYM_FUNC_START(clear_user_original) - /* - * Copy only the lower 32 bits of size as that is enough to handle the rest bytes, - * i.e., no need for a 'q' suffix and thus a REX prefix. - */ - mov %ecx,%eax - shr $3,%rcx - jz .Lrest_bytes +SYM_FUNC_START(rep_stos_alternative) + cmpq $64,%rcx + jae .Lunrolled - # do the qwords first - .p2align 4 -.Lqwords: - movq $0,(%rdi) - lea 8(%rdi),%rdi - dec %rcx - jnz .Lqwords + cmp $8,%ecx + jae .Lword -.Lrest_bytes: - and $7, %eax - jz .Lexit + testl %ecx,%ecx + je .Lexit - # now do the rest bytes -.Lbytes: - movb $0,(%rdi) +.Lclear_user_tail: +0: movb %al,(%rdi) inc %rdi - dec %eax - jnz .Lbytes - + dec %rcx + jnz .Lclear_user_tail .Lexit: - /* - * %rax still needs to be cleared in the exception case because this function is called - * from inline asm and the compiler expects %rax to be zero when exiting the inline asm, - * in case it might reuse it somewhere. - */ - xor %eax,%eax - RET - -.Lqwords_exception: - # convert remaining qwords back into bytes to return to caller - shl $3, %rcx - and $7, %eax - add %rax,%rcx - jmp .Lexit - -.Lbytes_exception: - mov %eax,%ecx - jmp .Lexit - - _ASM_EXTABLE_UA(.Lqwords, .Lqwords_exception) - _ASM_EXTABLE_UA(.Lbytes, .Lbytes_exception) -SYM_FUNC_END(clear_user_original) -EXPORT_SYMBOL(clear_user_original) - -/* - * Alternative clear user-space when CPU feature X86_FEATURE_REP_GOOD is - * present. - * Input: - * rdi destination - * rcx count - * - * Output: - * rcx: uncleared bytes or 0 if successful. - */ -SYM_FUNC_START(clear_user_rep_good) - # call the original thing for less than a cacheline - cmp $64, %rcx - jb clear_user_original - -.Lprep: - # copy lower 32-bits for rest bytes - mov %ecx, %edx - shr $3, %rcx - jz .Lrep_good_rest_bytes - -.Lrep_good_qwords: - rep stosq - -.Lrep_good_rest_bytes: - and $7, %edx - jz .Lrep_good_exit - -.Lrep_good_bytes: - mov %edx, %ecx - rep stosb - -.Lrep_good_exit: - # see .Lexit comment above - xor %eax, %eax RET -.Lrep_good_qwords_exception: - # convert remaining qwords back into bytes to return to caller - shl $3, %rcx - and $7, %edx - add %rdx, %rcx - jmp .Lrep_good_exit + _ASM_EXTABLE_UA( 0b, .Lexit) - _ASM_EXTABLE_UA(.Lrep_good_qwords, .Lrep_good_qwords_exception) - _ASM_EXTABLE_UA(.Lrep_good_bytes, .Lrep_good_exit) -SYM_FUNC_END(clear_user_rep_good) -EXPORT_SYMBOL(clear_user_rep_good) +.Lword: +1: movq %rax,(%rdi) + addq $8,%rdi + sub $8,%ecx + je .Lexit + cmp $8,%ecx + jae .Lword + jmp .Lclear_user_tail -/* - * Alternative clear user-space when CPU feature X86_FEATURE_ERMS is present. - * Input: - * rdi destination - * rcx count - * - * Output: - * rcx: uncleared bytes or 0 if successful. - * - */ -SYM_FUNC_START(clear_user_erms) - # call the original thing for less than a cacheline - cmp $64, %rcx - jb clear_user_original - -.Lerms_bytes: - rep stosb - -.Lerms_exit: - xorl %eax,%eax + .p2align 4 +.Lunrolled: +10: movq %rax,(%rdi) +11: movq %rax,8(%rdi) +12: movq %rax,16(%rdi) +13: movq %rax,24(%rdi) +14: movq %rax,32(%rdi) +15: movq %rax,40(%rdi) +16: movq %rax,48(%rdi) +17: movq %rax,56(%rdi) + addq $64,%rdi + subq $64,%rcx + cmpq $64,%rcx + jae .Lunrolled + cmpl $8,%ecx + jae .Lword + testl %ecx,%ecx + jne .Lclear_user_tail RET - _ASM_EXTABLE_UA(.Lerms_bytes, .Lerms_exit) -SYM_FUNC_END(clear_user_erms) -EXPORT_SYMBOL(clear_user_erms) + /* + * If we take an exception on any of the + * word stores, we know that %rcx isn't zero, + * so we can just go to the tail clearing to + * get the exact count. + * + * The unrolled case might end up clearing + * some bytes twice. Don't care. + * + * We could use the value in %rdi to avoid + * a second fault on the exact count case, + * but do we really care? No. + * + * Finally, we could try to align %rdi at the + * top of the unrolling. But unaligned stores + * just aren't that common or expensive. + */ + _ASM_EXTABLE_UA( 1b, .Lclear_user_tail) + _ASM_EXTABLE_UA(10b, .Lclear_user_tail) + _ASM_EXTABLE_UA(11b, .Lclear_user_tail) + _ASM_EXTABLE_UA(12b, .Lclear_user_tail) + _ASM_EXTABLE_UA(13b, .Lclear_user_tail) + _ASM_EXTABLE_UA(14b, .Lclear_user_tail) + _ASM_EXTABLE_UA(15b, .Lclear_user_tail) + _ASM_EXTABLE_UA(16b, .Lclear_user_tail) + _ASM_EXTABLE_UA(17b, .Lclear_user_tail) +SYM_FUNC_END(rep_stos_alternative) +EXPORT_SYMBOL(rep_stos_alternative) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 9dec1b38a98f..4fc5c2de2de4 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -7,404 +7,108 @@ */ #include <linux/linkage.h> -#include <asm/current.h> -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> -#include <asm/cpufeatures.h> -#include <asm/alternative.h> #include <asm/asm.h> -#include <asm/smap.h> #include <asm/export.h> -#include <asm/trapnr.h> - -.macro ALIGN_DESTINATION - /* check for bad alignment of destination */ - movl %edi,%ecx - andl $7,%ecx - jz 102f /* already aligned */ - subl $8,%ecx - negl %ecx - subl %ecx,%edx -100: movb (%rsi),%al -101: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz 100b -102: - - _ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align) - _ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align) -.endm /* - * copy_user_generic_unrolled - memory copy with exception handling. - * This version is for CPUs like P4 that don't have efficient micro - * code for rep movsq - * - * Input: - * rdi destination - * rsi source - * rdx count - * - * Output: - * eax uncopied bytes or 0 if successful. - */ -SYM_FUNC_START(copy_user_generic_unrolled) - ASM_STAC - cmpl $8,%edx - jb .Lcopy_user_short_string_bytes - ALIGN_DESTINATION - movl %edx,%ecx - andl $63,%edx - shrl $6,%ecx - jz copy_user_short_string -1: movq (%rsi),%r8 -2: movq 1*8(%rsi),%r9 -3: movq 2*8(%rsi),%r10 -4: movq 3*8(%rsi),%r11 -5: movq %r8,(%rdi) -6: movq %r9,1*8(%rdi) -7: movq %r10,2*8(%rdi) -8: movq %r11,3*8(%rdi) -9: movq 4*8(%rsi),%r8 -10: movq 5*8(%rsi),%r9 -11: movq 6*8(%rsi),%r10 -12: movq 7*8(%rsi),%r11 -13: movq %r8,4*8(%rdi) -14: movq %r9,5*8(%rdi) -15: movq %r10,6*8(%rdi) -16: movq %r11,7*8(%rdi) - leaq 64(%rsi),%rsi - leaq 64(%rdi),%rdi - decl %ecx - jnz 1b - jmp copy_user_short_string - -30: shll $6,%ecx - addl %ecx,%edx - jmp .Lcopy_user_handle_tail - - _ASM_EXTABLE_CPY(1b, 30b) - _ASM_EXTABLE_CPY(2b, 30b) - _ASM_EXTABLE_CPY(3b, 30b) - _ASM_EXTABLE_CPY(4b, 30b) - _ASM_EXTABLE_CPY(5b, 30b) - _ASM_EXTABLE_CPY(6b, 30b) - _ASM_EXTABLE_CPY(7b, 30b) - _ASM_EXTABLE_CPY(8b, 30b) - _ASM_EXTABLE_CPY(9b, 30b) - _ASM_EXTABLE_CPY(10b, 30b) - _ASM_EXTABLE_CPY(11b, 30b) - _ASM_EXTABLE_CPY(12b, 30b) - _ASM_EXTABLE_CPY(13b, 30b) - _ASM_EXTABLE_CPY(14b, 30b) - _ASM_EXTABLE_CPY(15b, 30b) - _ASM_EXTABLE_CPY(16b, 30b) -SYM_FUNC_END(copy_user_generic_unrolled) -EXPORT_SYMBOL(copy_user_generic_unrolled) - -/* Some CPUs run faster using the string copy instructions. - * This is also a lot simpler. Use them when possible. - * - * Only 4GB of copy is supported. This shouldn't be a problem - * because the kernel normally only writes from/to page sized chunks - * even if user space passed a longer buffer. - * And more would be dangerous because both Intel and AMD have - * errata with rep movsq > 4GB. If someone feels the need to fix - * this please consider this. + * rep_movs_alternative - memory copy with exception handling. + * This version is for CPUs that don't have FSRM (Fast Short Rep Movs) * * Input: * rdi destination * rsi source - * rdx count + * rcx count * * Output: - * eax uncopied bytes or 0 if successful. - */ -SYM_FUNC_START(copy_user_generic_string) - ASM_STAC - cmpl $8,%edx - jb 2f /* less than 8 bytes, go to byte copy loop */ - ALIGN_DESTINATION - movl %edx,%ecx - shrl $3,%ecx - andl $7,%edx -1: rep movsq -2: movl %edx,%ecx -3: rep movsb - xorl %eax,%eax - ASM_CLAC - RET - -11: leal (%rdx,%rcx,8),%ecx -12: movl %ecx,%edx /* ecx is zerorest also */ - jmp .Lcopy_user_handle_tail - - _ASM_EXTABLE_CPY(1b, 11b) - _ASM_EXTABLE_CPY(3b, 12b) -SYM_FUNC_END(copy_user_generic_string) -EXPORT_SYMBOL(copy_user_generic_string) - -/* - * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. - * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. - * - * Input: - * rdi destination - * rsi source - * rdx count + * rcx uncopied bytes or 0 if successful. * - * Output: - * eax uncopied bytes or 0 if successful. + * NOTE! The calling convention is very intentionally the same as + * for 'rep movs', so that we can rewrite the function call with + * just a plain 'rep movs' on machines that have FSRM. But to make + * it simpler for us, we can clobber rsi/rdi and rax/r8-r11 freely. */ -SYM_FUNC_START(copy_user_enhanced_fast_string) - ASM_STAC - /* CPUs without FSRM should avoid rep movsb for short copies */ - ALTERNATIVE "cmpl $64, %edx; jb copy_user_short_string", "", X86_FEATURE_FSRM - movl %edx,%ecx -1: rep movsb - xorl %eax,%eax - ASM_CLAC +SYM_FUNC_START(rep_movs_alternative) + cmpq $64,%rcx + jae .Lunrolled + + cmp $8,%ecx + jae .Lword + + testl %ecx,%ecx + je .Lexit + +.Lcopy_user_tail: +0: movb (%rsi),%al +1: movb %al,(%rdi) + inc %rdi + inc %rsi + dec %rcx + jne .Lcopy_user_tail +.Lexit: RET -12: movl %ecx,%edx /* ecx is zerorest also */ - jmp .Lcopy_user_handle_tail - - _ASM_EXTABLE_CPY(1b, 12b) -SYM_FUNC_END(copy_user_enhanced_fast_string) -EXPORT_SYMBOL(copy_user_enhanced_fast_string) - -/* - * Try to copy last bytes and clear the rest if needed. - * Since protection fault in copy_from/to_user is not a normal situation, - * it is not necessary to optimize tail handling. - * Don't try to copy the tail if machine check happened - * - * Input: - * eax trap number written by ex_handler_copy() - * rdi destination - * rsi source - * rdx count - * - * Output: - * eax uncopied bytes or 0 if successful. - */ -SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) - cmp $X86_TRAP_MC,%eax - je 3f - - movl %edx,%ecx -1: rep movsb -2: mov %ecx,%eax - ASM_CLAC + _ASM_EXTABLE_UA( 0b, .Lexit) + _ASM_EXTABLE_UA( 1b, .Lexit) + + .p2align 4 +.Lword: +2: movq (%rsi),%rax +3: movq %rax,(%rdi) + addq $8,%rsi + addq $8,%rdi + sub $8,%ecx + je .Lexit + cmp $8,%ecx + jae .Lword + jmp .Lcopy_user_tail + + _ASM_EXTABLE_UA( 2b, .Lcopy_user_tail) + _ASM_EXTABLE_UA( 3b, .Lcopy_user_tail) + + .p2align 4 +.Lunrolled: +10: movq (%rsi),%r8 +11: movq 8(%rsi),%r9 +12: movq 16(%rsi),%r10 +13: movq 24(%rsi),%r11 +14: movq %r8,(%rdi) +15: movq %r9,8(%rdi) +16: movq %r10,16(%rdi) +17: movq %r11,24(%rdi) +20: movq 32(%rsi),%r8 +21: movq 40(%rsi),%r9 +22: movq 48(%rsi),%r10 +23: movq 56(%rsi),%r11 +24: movq %r8,32(%rdi) +25: movq %r9,40(%rdi) +26: movq %r10,48(%rdi) +27: movq %r11,56(%rdi) + addq $64,%rsi + addq $64,%rdi + subq $64,%rcx + cmpq $64,%rcx + jae .Lunrolled + cmpl $8,%ecx + jae .Lword + testl %ecx,%ecx + jne .Lcopy_user_tail RET -3: - movl %edx,%eax - ASM_CLAC - RET - - _ASM_EXTABLE_CPY(1b, 2b) - -.Lcopy_user_handle_align: - addl %ecx,%edx /* ecx is zerorest also */ - jmp .Lcopy_user_handle_tail - -SYM_CODE_END(.Lcopy_user_handle_tail) - -/* - * Finish memcpy of less than 64 bytes. #AC should already be set. - * - * Input: - * rdi destination - * rsi source - * rdx count (< 64) - * - * Output: - * eax uncopied bytes or 0 if successful. - */ -SYM_CODE_START_LOCAL(copy_user_short_string) - movl %edx,%ecx - andl $7,%edx - shrl $3,%ecx - jz .Lcopy_user_short_string_bytes -18: movq (%rsi),%r8 -19: movq %r8,(%rdi) - leaq 8(%rsi),%rsi - leaq 8(%rdi),%rdi - decl %ecx - jnz 18b -.Lcopy_user_short_string_bytes: - andl %edx,%edx - jz 23f - movl %edx,%ecx -21: movb (%rsi),%al -22: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz 21b -23: xor %eax,%eax - ASM_CLAC - RET - -40: leal (%rdx,%rcx,8),%edx - jmp 60f -50: movl %ecx,%edx /* ecx is zerorest also */ -60: jmp .Lcopy_user_handle_tail - - _ASM_EXTABLE_CPY(18b, 40b) - _ASM_EXTABLE_CPY(19b, 40b) - _ASM_EXTABLE_CPY(21b, 50b) - _ASM_EXTABLE_CPY(22b, 50b) -SYM_CODE_END(copy_user_short_string) - -/* - * copy_user_nocache - Uncached memory copy with exception handling - * This will force destination out of cache for more performance. - * - * Note: Cached memory copy is used when destination or size is not - * naturally aligned. That is: - * - Require 8-byte alignment when size is 8 bytes or larger. - * - Require 4-byte alignment when size is 4 bytes. - */ -SYM_FUNC_START(__copy_user_nocache) - ASM_STAC - - /* If size is less than 8 bytes, go to 4-byte copy */ - cmpl $8,%edx - jb .L_4b_nocache_copy_entry - - /* If destination is not 8-byte aligned, "cache" copy to align it */ - ALIGN_DESTINATION - - /* Set 4x8-byte copy count and remainder */ - movl %edx,%ecx - andl $63,%edx - shrl $6,%ecx - jz .L_8b_nocache_copy_entry /* jump if count is 0 */ - - /* Perform 4x8-byte nocache loop-copy */ -.L_4x8b_nocache_copy_loop: -1: movq (%rsi),%r8 -2: movq 1*8(%rsi),%r9 -3: movq 2*8(%rsi),%r10 -4: movq 3*8(%rsi),%r11 -5: movnti %r8,(%rdi) -6: movnti %r9,1*8(%rdi) -7: movnti %r10,2*8(%rdi) -8: movnti %r11,3*8(%rdi) -9: movq 4*8(%rsi),%r8 -10: movq 5*8(%rsi),%r9 -11: movq 6*8(%rsi),%r10 -12: movq 7*8(%rsi),%r11 -13: movnti %r8,4*8(%rdi) -14: movnti %r9,5*8(%rdi) -15: movnti %r10,6*8(%rdi) -16: movnti %r11,7*8(%rdi) - leaq 64(%rsi),%rsi - leaq 64(%rdi),%rdi - decl %ecx - jnz .L_4x8b_nocache_copy_loop - - /* Set 8-byte copy count and remainder */ -.L_8b_nocache_copy_entry: - movl %edx,%ecx - andl $7,%edx - shrl $3,%ecx - jz .L_4b_nocache_copy_entry /* jump if count is 0 */ - - /* Perform 8-byte nocache loop-copy */ -.L_8b_nocache_copy_loop: -20: movq (%rsi),%r8 -21: movnti %r8,(%rdi) - leaq 8(%rsi),%rsi - leaq 8(%rdi),%rdi - decl %ecx - jnz .L_8b_nocache_copy_loop - - /* If no byte left, we're done */ -.L_4b_nocache_copy_entry: - andl %edx,%edx - jz .L_finish_copy - - /* If destination is not 4-byte aligned, go to byte copy: */ - movl %edi,%ecx - andl $3,%ecx - jnz .L_1b_cache_copy_entry - - /* Set 4-byte copy count (1 or 0) and remainder */ - movl %edx,%ecx - andl $3,%edx - shrl $2,%ecx - jz .L_1b_cache_copy_entry /* jump if count is 0 */ - - /* Perform 4-byte nocache copy: */ -30: movl (%rsi),%r8d -31: movnti %r8d,(%rdi) - leaq 4(%rsi),%rsi - leaq 4(%rdi),%rdi - - /* If no bytes left, we're done: */ - andl %edx,%edx - jz .L_finish_copy - - /* Perform byte "cache" loop-copy for the remainder */ -.L_1b_cache_copy_entry: - movl %edx,%ecx -.L_1b_cache_copy_loop: -40: movb (%rsi),%al -41: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz .L_1b_cache_copy_loop - - /* Finished copying; fence the prior stores */ -.L_finish_copy: - xorl %eax,%eax - ASM_CLAC - sfence - RET - -.L_fixup_4x8b_copy: - shll $6,%ecx - addl %ecx,%edx - jmp .L_fixup_handle_tail -.L_fixup_8b_copy: - lea (%rdx,%rcx,8),%rdx - jmp .L_fixup_handle_tail -.L_fixup_4b_copy: - lea (%rdx,%rcx,4),%rdx - jmp .L_fixup_handle_tail -.L_fixup_1b_copy: - movl %ecx,%edx -.L_fixup_handle_tail: - sfence - jmp .Lcopy_user_handle_tail - - _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy) - _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy) - _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy) - _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy) - _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy) - _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy) - _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy) -SYM_FUNC_END(__copy_user_nocache) -EXPORT_SYMBOL(__copy_user_nocache) + _ASM_EXTABLE_UA(10b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(11b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(12b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(13b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(14b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(15b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(16b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(17b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(20b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(21b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(22b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(23b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(24b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(25b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(26b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(27b, .Lcopy_user_tail) +SYM_FUNC_END(rep_movs_alternative) +EXPORT_SYMBOL(rep_movs_alternative) diff --git a/arch/x86/lib/copy_user_uncached_64.S b/arch/x86/lib/copy_user_uncached_64.S new file mode 100644 index 000000000000..5c5f38d32672 --- /dev/null +++ b/arch/x86/lib/copy_user_uncached_64.S @@ -0,0 +1,242 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2023 Linus Torvalds <torvalds@linux-foundation.org> + */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm/export.h> + +/* + * copy_user_nocache - Uncached memory copy with exception handling + * + * This copies from user space into kernel space, but the kernel + * space accesses can take a machine check exception, so they too + * need exception handling. + * + * Note: only 32-bit and 64-bit stores have non-temporal versions, + * and we only use aligned versions. Any unaligned parts at the + * start or end of the copy will be done using normal cached stores. + * + * Input: + * rdi destination + * rsi source + * edx count + * + * Output: + * rax uncopied bytes or 0 if successful. + */ +SYM_FUNC_START(__copy_user_nocache) + /* If destination is not 7-byte aligned, we'll have to align it */ + testb $7,%dil + jne .Lalign + +.Lis_aligned: + cmp $64,%edx + jb .Lquadwords + + .p2align 4,0x90 +.Lunrolled: +10: movq (%rsi),%r8 +11: movq 8(%rsi),%r9 +12: movq 16(%rsi),%r10 +13: movq 24(%rsi),%r11 +20: movnti %r8,(%rdi) +21: movnti %r9,8(%rdi) +22: movnti %r10,16(%rdi) +23: movnti %r11,24(%rdi) +30: movq 32(%rsi),%r8 +31: movq 40(%rsi),%r9 +32: movq 48(%rsi),%r10 +33: movq 56(%rsi),%r11 +40: movnti %r8,32(%rdi) +41: movnti %r9,40(%rdi) +42: movnti %r10,48(%rdi) +43: movnti %r11,56(%rdi) + + addq $64,%rsi + addq $64,%rdi + sub $64,%edx + cmp $64,%edx + jae .Lunrolled + +/* + * First set of user mode loads have been done + * without any stores, so if they fail, we can + * just try the non-unrolled loop. + */ +_ASM_EXTABLE_UA(10b, .Lquadwords) +_ASM_EXTABLE_UA(11b, .Lquadwords) +_ASM_EXTABLE_UA(12b, .Lquadwords) +_ASM_EXTABLE_UA(13b, .Lquadwords) + +/* + * The second set of user mode loads have been + * done with 32 bytes stored to the destination, + * so we need to take that into account before + * falling back to the unrolled loop. + */ +_ASM_EXTABLE_UA(30b, .Lfixup32) +_ASM_EXTABLE_UA(31b, .Lfixup32) +_ASM_EXTABLE_UA(32b, .Lfixup32) +_ASM_EXTABLE_UA(33b, .Lfixup32) + +/* + * An exception on a write means that we're + * done, but we need to update the count + * depending on where in the unrolled loop + * we were. + */ +_ASM_EXTABLE_UA(20b, .Ldone0) +_ASM_EXTABLE_UA(21b, .Ldone8) +_ASM_EXTABLE_UA(22b, .Ldone16) +_ASM_EXTABLE_UA(23b, .Ldone24) +_ASM_EXTABLE_UA(40b, .Ldone32) +_ASM_EXTABLE_UA(41b, .Ldone40) +_ASM_EXTABLE_UA(42b, .Ldone48) +_ASM_EXTABLE_UA(43b, .Ldone56) + +.Lquadwords: + cmp $8,%edx + jb .Llong +50: movq (%rsi),%rax +51: movnti %rax,(%rdi) + addq $8,%rsi + addq $8,%rdi + sub $8,%edx + jmp .Lquadwords + +/* + * If we fail on the last full quadword, we will + * not try to do any byte-wise cached accesses. + * We will try to do one more 4-byte uncached + * one, though. + */ +_ASM_EXTABLE_UA(50b, .Llast4) +_ASM_EXTABLE_UA(51b, .Ldone0) + +.Llong: + test $4,%dl + je .Lword +60: movl (%rsi),%eax +61: movnti %eax,(%rdi) + addq $4,%rsi + addq $4,%rdi + sub $4,%edx +.Lword: + sfence + test $2,%dl + je .Lbyte +70: movw (%rsi),%ax +71: movw %ax,(%rdi) + addq $2,%rsi + addq $2,%rdi + sub $2,%edx +.Lbyte: + test $1,%dl + je .Ldone +80: movb (%rsi),%al +81: movb %al,(%rdi) + dec %edx +.Ldone: + mov %edx,%eax + RET + +/* + * If we fail on the last four bytes, we won't + * bother with any fixups. It's dead, Jim. Note + * that there's no need for 'sfence' for any + * of this, since the exception will have been + * serializing. + */ +_ASM_EXTABLE_UA(60b, .Ldone) +_ASM_EXTABLE_UA(61b, .Ldone) +_ASM_EXTABLE_UA(70b, .Ldone) +_ASM_EXTABLE_UA(71b, .Ldone) +_ASM_EXTABLE_UA(80b, .Ldone) +_ASM_EXTABLE_UA(81b, .Ldone) + +/* + * This is the "head needs aliging" case when + * the destination isn't 8-byte aligned. The + * 4-byte case can be done uncached, but any + * smaller alignment is done with regular stores. + */ +.Lalign: + test $1,%dil + je .Lalign_word + test %edx,%edx + je .Ldone +90: movb (%rsi),%al +91: movb %al,(%rdi) + inc %rsi + inc %rdi + dec %edx +.Lalign_word: + test $2,%dil + je .Lalign_long + cmp $2,%edx + jb .Lbyte +92: movw (%rsi),%ax +93: movw %ax,(%rdi) + addq $2,%rsi + addq $2,%rdi + sub $2,%edx +.Lalign_long: + test $4,%dil + je .Lis_aligned + cmp $4,%edx + jb .Lword +94: movl (%rsi),%eax +95: movnti %eax,(%rdi) + addq $4,%rsi + addq $4,%rdi + sub $4,%edx + jmp .Lis_aligned + +/* + * If we fail on the initial alignment accesses, + * we're all done. Again, no point in trying to + * do byte-by-byte probing if the 4-byte load + * fails - we're not doing any uncached accesses + * any more. + */ +_ASM_EXTABLE_UA(90b, .Ldone) +_ASM_EXTABLE_UA(91b, .Ldone) +_ASM_EXTABLE_UA(92b, .Ldone) +_ASM_EXTABLE_UA(93b, .Ldone) +_ASM_EXTABLE_UA(94b, .Ldone) +_ASM_EXTABLE_UA(95b, .Ldone) + +/* + * Exception table fixups for faults in the middle + */ +.Ldone56: sub $8,%edx +.Ldone48: sub $8,%edx +.Ldone40: sub $8,%edx +.Ldone32: sub $8,%edx +.Ldone24: sub $8,%edx +.Ldone16: sub $8,%edx +.Ldone8: sub $8,%edx +.Ldone0: + mov %edx,%eax + RET + +.Lfixup32: + addq $32,%rsi + addq $32,%rdi + sub $32,%edx + jmp .Lquadwords + +.Llast4: +52: movl (%rsi),%eax +53: movnti %eax,(%rdi) + sfence + sub $4,%edx + mov %edx,%eax + RET +_ASM_EXTABLE_UA(52b, .Ldone0) +_ASM_EXTABLE_UA(53b, .Ldone0) + +SYM_FUNC_END(__copy_user_nocache) +EXPORT_SYMBOL(__copy_user_nocache) diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index b70d98d79a9d..b64a2bd1a1ef 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -37,22 +37,22 @@ #define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC -#ifdef CONFIG_X86_5LEVEL -#define LOAD_TASK_SIZE_MINUS_N(n) \ - ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rdx), \ - __stringify(mov $((1 << 56) - 4096 - (n)),%rdx), X86_FEATURE_LA57 -#else -#define LOAD_TASK_SIZE_MINUS_N(n) \ - mov $(TASK_SIZE_MAX - (n)),%_ASM_DX -#endif +.macro check_range size:req +.if IS_ENABLED(CONFIG_X86_64) + mov %rax, %rdx + sar $63, %rdx + or %rdx, %rax +.else + cmp $TASK_SIZE_MAX-\size+1, %eax + jae .Lbad_get_user + sbb %edx, %edx /* array_index_mask_nospec() */ + and %edx, %eax +.endif +.endm .text SYM_FUNC_START(__get_user_1) - LOAD_TASK_SIZE_MINUS_N(0) - cmp %_ASM_DX,%_ASM_AX - jae bad_get_user - sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ - and %_ASM_DX, %_ASM_AX + check_range size=1 ASM_STAC 1: movzbl (%_ASM_AX),%edx xor %eax,%eax @@ -62,11 +62,7 @@ SYM_FUNC_END(__get_user_1) EXPORT_SYMBOL(__get_user_1) SYM_FUNC_START(__get_user_2) - LOAD_TASK_SIZE_MINUS_N(1) - cmp %_ASM_DX,%_ASM_AX - jae bad_get_user - sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ - and %_ASM_DX, %_ASM_AX + check_range size=2 ASM_STAC 2: movzwl (%_ASM_AX),%edx xor %eax,%eax @@ -76,11 +72,7 @@ SYM_FUNC_END(__get_user_2) EXPORT_SYMBOL(__get_user_2) SYM_FUNC_START(__get_user_4) - LOAD_TASK_SIZE_MINUS_N(3) - cmp %_ASM_DX,%_ASM_AX - jae bad_get_user - sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ - and %_ASM_DX, %_ASM_AX + check_range size=4 ASM_STAC 3: movl (%_ASM_AX),%edx xor %eax,%eax @@ -90,30 +82,17 @@ SYM_FUNC_END(__get_user_4) EXPORT_SYMBOL(__get_user_4) SYM_FUNC_START(__get_user_8) -#ifdef CONFIG_X86_64 - LOAD_TASK_SIZE_MINUS_N(7) - cmp %_ASM_DX,%_ASM_AX - jae bad_get_user - sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ - and %_ASM_DX, %_ASM_AX + check_range size=8 ASM_STAC +#ifdef CONFIG_X86_64 4: movq (%_ASM_AX),%rdx - xor %eax,%eax - ASM_CLAC - RET #else - LOAD_TASK_SIZE_MINUS_N(7) - cmp %_ASM_DX,%_ASM_AX - jae bad_get_user_8 - sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ - and %_ASM_DX, %_ASM_AX - ASM_STAC 4: movl (%_ASM_AX),%edx 5: movl 4(%_ASM_AX),%ecx +#endif xor %eax,%eax ASM_CLAC RET -#endif SYM_FUNC_END(__get_user_8) EXPORT_SYMBOL(__get_user_8) @@ -166,7 +145,7 @@ EXPORT_SYMBOL(__get_user_nocheck_8) SYM_CODE_START_LOCAL(.Lbad_get_user_clac) ASM_CLAC -bad_get_user: +.Lbad_get_user: xor %edx,%edx mov $(-EFAULT),%_ASM_AX RET @@ -184,23 +163,23 @@ SYM_CODE_END(.Lbad_get_user_8_clac) #endif /* get_user */ - _ASM_EXTABLE_UA(1b, .Lbad_get_user_clac) - _ASM_EXTABLE_UA(2b, .Lbad_get_user_clac) - _ASM_EXTABLE_UA(3b, .Lbad_get_user_clac) + _ASM_EXTABLE(1b, .Lbad_get_user_clac) + _ASM_EXTABLE(2b, .Lbad_get_user_clac) + _ASM_EXTABLE(3b, .Lbad_get_user_clac) #ifdef CONFIG_X86_64 - _ASM_EXTABLE_UA(4b, .Lbad_get_user_clac) + _ASM_EXTABLE(4b, .Lbad_get_user_clac) #else - _ASM_EXTABLE_UA(4b, .Lbad_get_user_8_clac) - _ASM_EXTABLE_UA(5b, .Lbad_get_user_8_clac) + _ASM_EXTABLE(4b, .Lbad_get_user_8_clac) + _ASM_EXTABLE(5b, .Lbad_get_user_8_clac) #endif /* __get_user */ - _ASM_EXTABLE_UA(6b, .Lbad_get_user_clac) - _ASM_EXTABLE_UA(7b, .Lbad_get_user_clac) - _ASM_EXTABLE_UA(8b, .Lbad_get_user_clac) + _ASM_EXTABLE(6b, .Lbad_get_user_clac) + _ASM_EXTABLE(7b, .Lbad_get_user_clac) + _ASM_EXTABLE(8b, .Lbad_get_user_clac) #ifdef CONFIG_X86_64 - _ASM_EXTABLE_UA(9b, .Lbad_get_user_clac) + _ASM_EXTABLE(9b, .Lbad_get_user_clac) #else - _ASM_EXTABLE_UA(9b, .Lbad_get_user_8_clac) - _ASM_EXTABLE_UA(10b, .Lbad_get_user_8_clac) + _ASM_EXTABLE(9b, .Lbad_get_user_8_clac) + _ASM_EXTABLE(10b, .Lbad_get_user_8_clac) #endif diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index a64017602010..8f95fb267caa 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -11,13 +11,6 @@ .section .noinstr.text, "ax" /* - * We build a jump to memcpy_orig by default which gets NOPped out on - * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which - * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs - * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. - */ - -/* * memcpy - Copy a memory block. * * Input: @@ -27,17 +20,21 @@ * * Output: * rax original destination + * + * The FSRM alternative should be done inline (avoiding the call and + * the disgusting return handling), but that would require some help + * from the compiler for better calling conventions. + * + * The 'rep movsb' itself is small enough to replace the call, but the + * two register moves blow up the code. And one of them is "needed" + * only for the return value that is the same as the source input, + * which the compiler could/should do much better anyway. */ SYM_TYPED_FUNC_START(__memcpy) - ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ - "jmp memcpy_erms", X86_FEATURE_ERMS + ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM movq %rdi, %rax movq %rdx, %rcx - shrq $3, %rcx - andl $7, %edx - rep movsq - movl %edx, %ecx rep movsb RET SYM_FUNC_END(__memcpy) @@ -46,17 +43,6 @@ EXPORT_SYMBOL(__memcpy) SYM_FUNC_ALIAS(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) -/* - * memcpy_erms() - enhanced fast string memcpy. This is faster and - * simpler than memcpy. Use memcpy_erms when possible. - */ -SYM_FUNC_START_LOCAL(memcpy_erms) - movq %rdi, %rax - movq %rdx, %rcx - rep movsb - RET -SYM_FUNC_END(memcpy_erms) - SYM_FUNC_START_LOCAL(memcpy_orig) movq %rdi, %rax diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 6143b1a6fa2c..7c59a704c458 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -18,27 +18,22 @@ * rdx count (bytes) * * rax original destination + * + * The FSRS alternative should be done inline (avoiding the call and + * the disgusting return handling), but that would require some help + * from the compiler for better calling conventions. + * + * The 'rep stosb' itself is small enough to replace the call, but all + * the register moves blow up the code. And two of them are "needed" + * only for the return value that is the same as the source input, + * which the compiler could/should do much better anyway. */ SYM_FUNC_START(__memset) - /* - * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended - * to use it when possible. If not available, use fast string instructions. - * - * Otherwise, use original memset function. - */ - ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ - "jmp memset_erms", X86_FEATURE_ERMS + ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS movq %rdi,%r9 + movb %sil,%al movq %rdx,%rcx - andl $7,%edx - shrq $3,%rcx - /* expand byte value */ - movzbl %sil,%esi - movabs $0x0101010101010101,%rax - imulq %rsi,%rax - rep stosq - movl %edx,%ecx rep stosb movq %r9,%rax RET @@ -48,26 +43,6 @@ EXPORT_SYMBOL(__memset) SYM_FUNC_ALIAS(memset, __memset) EXPORT_SYMBOL(memset) -/* - * ISO C memset - set a memory block to a byte value. This function uses - * enhanced rep stosb to override the fast string function. - * The code is simpler and shorter than the fast string function as well. - * - * rdi destination - * rsi value (char) - * rdx count (bytes) - * - * rax original destination - */ -SYM_FUNC_START_LOCAL(memset_erms) - movq %rdi,%r9 - movb %sil,%al - movq %rdx,%rcx - rep stosb - movq %r9,%rax - RET -SYM_FUNC_END(memset_erms) - SYM_FUNC_START_LOCAL(memset_orig) movq %rdi,%r10 diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 32125224fcca..3062d09a776d 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -33,20 +33,20 @@ * as they get called from within inline assembly. */ -#ifdef CONFIG_X86_5LEVEL -#define LOAD_TASK_SIZE_MINUS_N(n) \ - ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rbx), \ - __stringify(mov $((1 << 56) - 4096 - (n)),%rbx), X86_FEATURE_LA57 -#else -#define LOAD_TASK_SIZE_MINUS_N(n) \ - mov $(TASK_SIZE_MAX - (n)),%_ASM_BX -#endif +.macro check_range size:req +.if IS_ENABLED(CONFIG_X86_64) + mov %rcx, %rbx + sar $63, %rbx + or %rbx, %rcx +.else + cmp $TASK_SIZE_MAX-\size+1, %ecx + jae .Lbad_put_user +.endif +.endm .text SYM_FUNC_START(__put_user_1) - LOAD_TASK_SIZE_MINUS_N(0) - cmp %_ASM_BX,%_ASM_CX - jae .Lbad_put_user + check_range size=1 ASM_STAC 1: movb %al,(%_ASM_CX) xor %ecx,%ecx @@ -66,9 +66,7 @@ SYM_FUNC_END(__put_user_nocheck_1) EXPORT_SYMBOL(__put_user_nocheck_1) SYM_FUNC_START(__put_user_2) - LOAD_TASK_SIZE_MINUS_N(1) - cmp %_ASM_BX,%_ASM_CX - jae .Lbad_put_user + check_range size=2 ASM_STAC 3: movw %ax,(%_ASM_CX) xor %ecx,%ecx @@ -88,9 +86,7 @@ SYM_FUNC_END(__put_user_nocheck_2) EXPORT_SYMBOL(__put_user_nocheck_2) SYM_FUNC_START(__put_user_4) - LOAD_TASK_SIZE_MINUS_N(3) - cmp %_ASM_BX,%_ASM_CX - jae .Lbad_put_user + check_range size=4 ASM_STAC 5: movl %eax,(%_ASM_CX) xor %ecx,%ecx @@ -110,9 +106,7 @@ SYM_FUNC_END(__put_user_nocheck_4) EXPORT_SYMBOL(__put_user_nocheck_4) SYM_FUNC_START(__put_user_8) - LOAD_TASK_SIZE_MINUS_N(7) - cmp %_ASM_BX,%_ASM_CX - jae .Lbad_put_user + check_range size=8 ASM_STAC 7: mov %_ASM_AX,(%_ASM_CX) #ifdef CONFIG_X86_32 @@ -144,15 +138,15 @@ SYM_CODE_START_LOCAL(.Lbad_put_user_clac) RET SYM_CODE_END(.Lbad_put_user_clac) - _ASM_EXTABLE_UA(1b, .Lbad_put_user_clac) - _ASM_EXTABLE_UA(2b, .Lbad_put_user_clac) - _ASM_EXTABLE_UA(3b, .Lbad_put_user_clac) - _ASM_EXTABLE_UA(4b, .Lbad_put_user_clac) - _ASM_EXTABLE_UA(5b, .Lbad_put_user_clac) - _ASM_EXTABLE_UA(6b, .Lbad_put_user_clac) - _ASM_EXTABLE_UA(7b, .Lbad_put_user_clac) - _ASM_EXTABLE_UA(9b, .Lbad_put_user_clac) + _ASM_EXTABLE(1b, .Lbad_put_user_clac) + _ASM_EXTABLE(2b, .Lbad_put_user_clac) + _ASM_EXTABLE(3b, .Lbad_put_user_clac) + _ASM_EXTABLE(4b, .Lbad_put_user_clac) + _ASM_EXTABLE(5b, .Lbad_put_user_clac) + _ASM_EXTABLE(6b, .Lbad_put_user_clac) + _ASM_EXTABLE(7b, .Lbad_put_user_clac) + _ASM_EXTABLE(9b, .Lbad_put_user_clac) #ifdef CONFIG_X86_32 - _ASM_EXTABLE_UA(8b, .Lbad_put_user_clac) - _ASM_EXTABLE_UA(10b, .Lbad_put_user_clac) + _ASM_EXTABLE(8b, .Lbad_put_user_clac) + _ASM_EXTABLE(10b, .Lbad_put_user_clac) #endif diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 5f61c65322be..27ef53fab6bd 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -33,7 +33,7 @@ .align RETPOLINE_THUNK_SIZE SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) - UNWIND_HINT_EMPTY + UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ @@ -75,7 +75,7 @@ SYM_CODE_END(__x86_indirect_thunk_array) .align RETPOLINE_THUNK_SIZE SYM_INNER_LABEL(__x86_indirect_call_thunk_\reg, SYM_L_GLOBAL) - UNWIND_HINT_EMPTY + UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR CALL_DEPTH_ACCOUNT @@ -103,7 +103,7 @@ SYM_CODE_END(__x86_indirect_call_thunk_array) .align RETPOLINE_THUNK_SIZE SYM_INNER_LABEL(__x86_indirect_jump_thunk_\reg, SYM_L_GLOBAL) - UNWIND_HINT_EMPTY + UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR POLINE \reg ANNOTATE_UNRET_SAFE diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 6c1f8ac5e721..003d90138e20 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -45,7 +45,11 @@ EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); long __copy_user_flushcache(void *dst, const void __user *src, unsigned size) { unsigned long flushed, dest = (unsigned long) dst; - long rc = __copy_user_nocache(dst, src, size, 0); + long rc; + + stac(); + rc = __copy_user_nocache(dst, src, size); + clac(); /* * __copy_user_nocache() uses non-temporal stores for the bulk @@ -136,13 +140,4 @@ void __memcpy_flushcache(void *_dst, const void *_src, size_t size) } } EXPORT_SYMBOL_GPL(__memcpy_flushcache); - -void memcpy_page_flushcache(char *to, struct page *page, size_t offset, - size_t len) -{ - char *from = kmap_atomic(page); - - memcpy_flushcache(to, from + offset, len); - kunmap_atomic(from); -} #endif diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 7316a8224259..e91500a80963 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -10,6 +10,7 @@ #include <asm/fixmap.h> #include <asm/desc.h> #include <asm/kasan.h> +#include <asm/setup.h> static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); @@ -29,6 +30,12 @@ static __init void init_cea_offsets(void) unsigned int max_cea; unsigned int i, j; + if (!kaslr_enabled()) { + for_each_possible_cpu(i) + per_cpu(_cea_offset, i) = i; + return; + } + max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; /* O(sodding terrible) */ diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index 092ea436c7e6..b43301cb2a80 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c @@ -71,6 +71,5 @@ static void __exit pt_dump_debug_exit(void) module_init(pt_dump_debug_init); module_exit(pt_dump_debug_exit); -MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables"); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a498ae1fbe66..e4399983c50c 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -19,6 +19,7 @@ #include <linux/uaccess.h> /* faulthandler_disabled() */ #include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/ #include <linux/mm_types.h> +#include <linux/mm.h> /* find_and_lock_vma() */ #include <asm/cpufeature.h> /* boot_cpu_has, ... */ #include <asm/traps.h> /* dotraplinkage, ... */ @@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs, } #endif +#ifdef CONFIG_PER_VMA_LOCK + if (!(flags & FAULT_FLAG_USER)) + goto lock_mmap; + + vma = lock_vma_under_rcu(mm, address); + if (!vma) + goto lock_mmap; + + if (unlikely(access_error(error_code, vma))) { + vma_end_read(vma); + goto lock_mmap; + } + fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); + vma_end_read(vma); + + if (!(fault & VM_FAULT_RETRY)) { + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; + } + count_vm_vma_lock_event(VMA_LOCK_RETRY); + + /* Quick path to respond to signals */ + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + kernelmode_fixup_or_oops(regs, error_code, address, + SIGBUS, BUS_ADRERR, + ARCH_DEFAULT_PKEY); + return; + } +lock_mmap: +#endif /* CONFIG_PER_VMA_LOCK */ + /* * Kernel-mode access to the user address space should only occur * on well-defined single instructions listed in the exception @@ -1433,6 +1466,9 @@ good_area: } mmap_read_unlock(mm); +#ifdef CONFIG_PER_VMA_LOCK +done: +#endif if (likely(!(fault & VM_FAULT_ERROR))) return; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index cb258f58fdc8..3cdac0f0055d 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -806,7 +806,7 @@ void __init poking_init(void) BUG_ON(!poking_mm); /* Xen PV guests need the PGD to be pinned. */ - paravirt_arch_dup_mmap(NULL, poking_mm); + paravirt_enter_mmap(poking_mm); /* * Randomize the poking address, but make sure that the following page @@ -1048,6 +1048,11 @@ __visible DEFINE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate) = { .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ }; +#ifdef CONFIG_ADDRESS_MASKING +DEFINE_PER_CPU(u64, tlbstate_untag_mask); +EXPORT_PER_CPU_SYMBOL(tlbstate_untag_mask); +#endif + void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) { /* entry 0 MUST be WB (hardwired to speed up translations) */ diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 6453fbaedb08..aa7d279321ea 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -116,6 +116,11 @@ static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *des if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) return; + if (x86_platform.hyper.is_private_mmio(addr)) { + desc->flags |= IORES_MAP_ENCRYPTED; + return; + } + if (!IS_ENABLED(CONFIG_EFI)) return; diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 9c4d8dbcb129..e0b51c09109f 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -513,10 +513,14 @@ void __init mem_encrypt_free_decrypted_mem(void) npages = (vaddr_end - vaddr) >> PAGE_SHIFT; /* - * The unused memory range was mapped decrypted, change the encryption - * attribute from decrypted to encrypted before freeing it. + * If the unused memory range was mapped decrypted, change the encryption + * attribute from decrypted to encrypted before freeing it. Base the + * re-encryption on the same condition used for the decryption in + * sme_postprocess_startup(). Higher level abstractions, such as + * CC_ATTR_MEM_ENCRYPT, aren't necessarily equivalent in a Hyper-V VM + * using vTOM, where sme_me_mask is always zero. */ - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { + if (sme_me_mask) { r = set_memory_encrypted(vaddr, npages); if (r) { pr_warn("failed to free unused decrypted pages\n"); diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 46a00aa858b6..de10800cd4dd 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -1073,11 +1073,15 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, } /* - * untrack_pfn_moved is called, while mremapping a pfnmap for a new region, - * with the old vma after its pfnmap page table has been removed. The new - * vma has a new pfnmap to the same pfn & cache type with VM_PAT set. + * untrack_pfn_clear is called if the following situation fits: + * + * 1) while mremapping a pfnmap for a new region, with the old vma after + * its pfnmap page table has been removed. The new vma has a new pfnmap + * to the same pfn & cache type with VM_PAT set. + * 2) while duplicating vm area, the new vma fails to copy the pgtable from + * old vma. */ -void untrack_pfn_moved(struct vm_area_struct *vma) +void untrack_pfn_clear(struct vm_area_struct *vma) { vm_flags_clear(vma, VM_PAT); } diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 356758b7d4b4..7159cf787613 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -234,7 +234,7 @@ within_inclusive(unsigned long addr, unsigned long start, unsigned long end) * take full advantage of the the limited (s32) immediate addressing range (2G) * of x86_64. * - * See Documentation/x86/x86_64/mm.rst for more detail. + * See Documentation/arch/x86/x86_64/mm.rst for more detail. */ static inline unsigned long highmap_start_pfn(void) @@ -2175,9 +2175,6 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) { - if (hv_is_isolation_supported()) - return hv_set_mem_host_visibility(addr, numpages, !enc); - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) return __set_memory_enc_pgtable(addr, numpages, enc); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 92d73ccede70..267acf27480a 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -154,26 +154,30 @@ static inline u16 user_pcid(u16 asid) return ret; } -static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) +static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam) { + unsigned long cr3 = __sme_pa(pgd) | lam; + if (static_cpu_has(X86_FEATURE_PCID)) { - return __sme_pa(pgd) | kern_pcid(asid); + VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); + cr3 |= kern_pcid(asid); } else { VM_WARN_ON_ONCE(asid != 0); - return __sme_pa(pgd); } + + return cr3; } -static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) +static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid, + unsigned long lam) { - VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); /* * Use boot_cpu_has() instead of this_cpu_has() as this function * might be called during early boot. This should work even after * boot because all CPU's the have same capabilities: */ VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID)); - return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; + return build_cr3(pgd, asid, lam) | CR3_NOFLUSH; } /* @@ -274,15 +278,16 @@ static inline void invalidate_user_asid(u16 asid) (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask)); } -static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush) +static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, unsigned long lam, + bool need_flush) { unsigned long new_mm_cr3; if (need_flush) { invalidate_user_asid(new_asid); - new_mm_cr3 = build_cr3(pgdir, new_asid); + new_mm_cr3 = build_cr3(pgdir, new_asid, lam); } else { - new_mm_cr3 = build_cr3_noflush(pgdir, new_asid); + new_mm_cr3 = build_cr3_noflush(pgdir, new_asid, lam); } /* @@ -491,6 +496,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, { struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm); u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); + unsigned long new_lam = mm_lam_cr3_mask(next); bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy); unsigned cpu = smp_processor_id(); u64 next_tlb_gen; @@ -520,7 +526,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * isn't free. */ #ifdef CONFIG_DEBUG_VM - if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) { + if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid, + tlbstate_lam_cr3_mask()))) { /* * If we were to BUG here, we'd be very likely to kill * the system so hard that we don't see the call trace. @@ -552,10 +559,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * instruction. */ if (real_prev == next) { + /* Not actually switching mm's */ VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != next->context.ctx_id); /* + * If this races with another thread that enables lam, 'new_lam' + * might not match tlbstate_lam_cr3_mask(). + */ + + /* * Even in lazy TLB mode, the CPU should stay set in the * mm_cpumask. The TLB shootdown code can figure out from * cpu_tlbstate_shared.is_lazy whether or not to send an IPI. @@ -622,15 +635,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, barrier(); } + set_tlbstate_lam_mode(next); if (need_flush) { this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); - load_new_mm_cr3(next->pgd, new_asid, true); + load_new_mm_cr3(next->pgd, new_asid, new_lam, true); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ - load_new_mm_cr3(next->pgd, new_asid, false); + load_new_mm_cr3(next->pgd, new_asid, new_lam, false); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0); } @@ -691,6 +705,10 @@ void initialize_tlbstate_and_flush(void) /* Assert that CR3 already references the right mm. */ WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd)); + /* LAM expected to be disabled */ + WARN_ON(cr3 & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57)); + WARN_ON(mm_lam_cr3_mask(mm)); + /* * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization * doesn't work like other CR4 bits because it can only be set from @@ -699,8 +717,8 @@ void initialize_tlbstate_and_flush(void) WARN_ON(boot_cpu_has(X86_FEATURE_PCID) && !(cr4_read_shadow() & X86_CR4_PCIDE)); - /* Force ASID 0 and force a TLB flush. */ - write_cr3(build_cr3(mm->pgd, 0)); + /* Disable LAM, force ASID 0 and force a TLB flush. */ + write_cr3(build_cr3(mm->pgd, 0, 0)); /* Reinitialize tlbstate. */ this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT); @@ -708,6 +726,7 @@ void initialize_tlbstate_and_flush(void) this_cpu_write(cpu_tlbstate.next_asid, 1); this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen); + set_tlbstate_lam_mode(mm); for (i = 1; i < TLB_NR_DYN_ASIDS; i++) this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0); @@ -925,7 +944,7 @@ void flush_tlb_multi(const struct cpumask *cpumask, } /* - * See Documentation/x86/tlb.rst for details. We choose 33 + * See Documentation/arch/x86/tlb.rst for details. We choose 33 * because it is large enough to cover the vast majority (at * least 95%) of allocations, and is small enough that we are * confident it will not cause too much overhead. Each single @@ -1071,8 +1090,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) */ unsigned long __get_current_cr3_fast(void) { - unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, - this_cpu_read(cpu_tlbstate.loaded_mm_asid)); + unsigned long cr3 = + build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, + this_cpu_read(cpu_tlbstate.loaded_mm_asid), + tlbstate_lam_cr3_mask()); /* For now, be very restrictive about when this can be called. */ VM_WARN_ON(in_nmi() || preemptible()); diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 615a76d70019..e3ec02e6ac9f 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -7,6 +7,7 @@ #include <linux/dmi.h> #include <linux/pci.h> #include <linux/vgaarb.h> +#include <asm/amd_nb.h> #include <asm/hpet.h> #include <asm/pci_x86.h> @@ -824,3 +825,82 @@ static void rs690_fix_64bit_dma(struct pci_dev *pdev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma); #endif + +#ifdef CONFIG_AMD_NB + +#define AMD_15B8_RCC_DEV2_EPF0_STRAP2 0x10136008 +#define AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK 0x00000080L + +static void quirk_clear_strap_no_soft_reset_dev2_f0(struct pci_dev *dev) +{ + u32 data; + + if (!amd_smn_read(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, &data)) { + data &= ~AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK; + if (amd_smn_write(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, data)) + pci_err(dev, "Failed to write data 0x%x\n", data); + } else { + pci_err(dev, "Failed to read data\n"); + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b8, quirk_clear_strap_no_soft_reset_dev2_f0); +#endif + +/* + * When returning from D3cold to D0, firmware on some Google Coral and Reef + * family Chromebooks with Intel Apollo Lake SoC clobbers the headers of + * both the L1 PM Substates capability and the previous capability for the + * "Celeron N3350/Pentium N4200/Atom E3900 Series PCI Express Port B #1". + * + * Save those values at enumeration-time and restore them at resume. + */ + +static u16 prev_cap, l1ss_cap; +static u32 prev_header, l1ss_header; + +static void chromeos_save_apl_pci_l1ss_capability(struct pci_dev *dev) +{ + int pos = PCI_CFG_SPACE_SIZE, prev = 0; + u32 header, pheader = 0; + + while (pos) { + pci_read_config_dword(dev, pos, &header); + if (PCI_EXT_CAP_ID(header) == PCI_EXT_CAP_ID_L1SS) { + prev_cap = prev; + prev_header = pheader; + l1ss_cap = pos; + l1ss_header = header; + return; + } + + prev = pos; + pheader = header; + pos = PCI_EXT_CAP_NEXT(header); + } +} + +static void chromeos_fixup_apl_pci_l1ss_capability(struct pci_dev *dev) +{ + u32 header; + + if (!prev_cap || !prev_header || !l1ss_cap || !l1ss_header) + return; + + /* Fixup the header of L1SS Capability if missing */ + pci_read_config_dword(dev, l1ss_cap, &header); + if (header != l1ss_header) { + pci_write_config_dword(dev, l1ss_cap, l1ss_header); + pci_info(dev, "restore L1SS Capability header (was %#010x now %#010x)\n", + header, l1ss_header); + } + + /* Fixup the link to L1SS Capability if missing */ + pci_read_config_dword(dev, prev_cap, &header); + if (header != prev_header) { + pci_write_config_dword(dev, prev_cap, prev_header); + pci_info(dev, "restore previous Capability header (was %#010x now %#010x)\n", + header, prev_header); + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x5ad6, chromeos_save_apl_pci_l1ss_capability); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, 0x5ad6, chromeos_fixup_apl_pci_l1ss_capability); diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index ed0442e35434..00a92cb2c814 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c @@ -86,7 +86,7 @@ static void __init init_pvh_bootparams(bool xen_guest) } /* - * See Documentation/x86/boot.rst. + * See Documentation/arch/x86/boot.rst. * * Version 2.12 supports Xen entry point but we will use default x86/PC * environment (i.e. hardware_subarch 0). diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 7fe564eaf228..c4365a05ab83 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -50,7 +50,7 @@ #define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8) SYM_CODE_START_LOCAL(pvh_start_xen) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK cld lgdt (_pa(gdt)) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 236447ee9beb..7a4d5e911415 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -288,7 +288,7 @@ EXPORT_SYMBOL(restore_processor_state); #endif #if defined(CONFIG_HIBERNATION) && defined(CONFIG_HOTPLUG_CPU) -static void resume_play_dead(void) +static void __noreturn resume_play_dead(void) { play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 17f09dc26381..82fec66d46d2 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -69,8 +69,7 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS) CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE) CFLAGS_string.o += $(PURGATORY_CFLAGS) -AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2 -AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2 +asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x)) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 2925074b9a58..d30949e25ebd 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -406,7 +406,7 @@ static void read_ehdr(FILE *fp) if (ehdr.e_version != EV_CURRENT) die("Unknown ELF version\n"); if (ehdr.e_ehsize != sizeof(Elf_Ehdr)) - die("Bad Elf header size\n"); + die("Bad ELF header size\n"); if (ehdr.e_phentsize != sizeof(Elf_Phdr)) die("Bad program header entry\n"); if (ehdr.e_shentsize != sizeof(Elf_Shdr)) diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 1da44aca896c..ada3868c02c2 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -48,7 +48,7 @@ void __init xen_pvh_init(struct boot_params *boot_params) struct xen_platform_op op = { .cmd = XENPF_get_dom0_console, }; - long ret = HYPERVISOR_platform_op(&op); + int ret = HYPERVISOR_platform_op(&op); if (ret > 0) xen_init_vga(&op.u.dom0_console, diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index ee29fb558f2e..b3b8d289b9ab 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -885,14 +885,7 @@ void xen_mm_unpin_all(void) spin_unlock(&pgd_lock); } -static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) -{ - spin_lock(&next->page_table_lock); - xen_pgd_pin(next); - spin_unlock(&next->page_table_lock); -} - -static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) +static void xen_enter_mmap(struct mm_struct *mm) { spin_lock(&mm->page_table_lock); xen_pgd_pin(mm); @@ -2153,8 +2146,7 @@ static const typeof(pv_ops) xen_mmu_ops __initconst = { .make_p4d = PV_CALLEE_SAVE(xen_make_p4d), #endif - .activate_mm = xen_activate_mm, - .dup_mmap = xen_dup_mmap, + .enter_mmap = xen_enter_mmap, .exit_mmap = xen_exit_mmap, .lazy_mode = { diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 4a184f6e4e4d..08f1ceb9eb81 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -165,7 +165,7 @@ xen_pv_trap asm_exc_xen_hypervisor_callback SYM_CODE_START(xen_early_idt_handler_array) i = 0 .rept NUM_EXCEPTION_VECTORS - UNWIND_HINT_EMPTY + UNWIND_HINT_UNDEFINED ENDBR pop %rcx pop %r11 @@ -193,7 +193,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 * rsp->rax } */ SYM_CODE_START(xen_iret) - UNWIND_HINT_EMPTY + UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR pushq $0 jmp hypercall_iret diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index e36ea4268bd2..643d02900fbb 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -45,11 +45,11 @@ SYM_CODE_END(hypercall_page) #ifdef CONFIG_XEN_PV __INIT SYM_CODE_START(startup_xen) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR cld - mov initial_stack(%rip), %rsp + leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp /* Set up %gs. * @@ -71,7 +71,7 @@ SYM_CODE_END(startup_xen) #ifdef CONFIG_XEN_PV_SMP .pushsection .text SYM_CODE_START(asm_cpu_bringup_and_idle) - UNWIND_HINT_EMPTY + UNWIND_HINT_END_OF_STACK ENDBR call cpu_bringup_and_idle |