diff options
Diffstat (limited to 'arch')
197 files changed, 2824 insertions, 2889 deletions
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c index 6d0c9f7268ba..06b0e5fd54a6 100644 --- a/arch/arm/common/locomo.c +++ b/arch/arm/common/locomo.c @@ -816,10 +816,10 @@ EXPORT_SYMBOL(locomo_frontlight_set); * We model this as a regular bus type, and hang devices directly * off this. */ -static int locomo_match(struct device *_dev, struct device_driver *_drv) +static int locomo_match(struct device *_dev, const struct device_driver *_drv) { struct locomo_dev *dev = LOCOMO_DEV(_dev); - struct locomo_driver *drv = LOCOMO_DRV(_drv); + const struct locomo_driver *drv = LOCOMO_DRV(_drv); return dev->devid == drv->devid; } diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 1fbd7363cf11..550978dc3c50 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -1339,10 +1339,10 @@ EXPORT_SYMBOL_GPL(sa1111_get_irq); * We model this as a regular bus type, and hang devices directly * off this. */ -static int sa1111_match(struct device *_dev, struct device_driver *_drv) +static int sa1111_match(struct device *_dev, const struct device_driver *_drv) { struct sa1111_dev *dev = to_sa1111_device(_dev); - struct sa1111_driver *drv = SA1111_DRV(_drv); + const struct sa1111_driver *drv = SA1111_DRV(_drv); return !!(dev->devid & drv->devid); } diff --git a/arch/arm/include/asm/hardware/locomo.h b/arch/arm/include/asm/hardware/locomo.h index 9fd9ad5d9202..3190e1e5067a 100644 --- a/arch/arm/include/asm/hardware/locomo.h +++ b/arch/arm/include/asm/hardware/locomo.h @@ -189,7 +189,7 @@ struct locomo_driver { void (*remove)(struct locomo_dev *); }; -#define LOCOMO_DRV(_d) container_of((_d), struct locomo_driver, drv) +#define LOCOMO_DRV(_d) container_of_const((_d), struct locomo_driver, drv) #define LOCOMO_DRIVER_NAME(_ldev) ((_ldev)->dev.driver->name) diff --git a/arch/arm/include/asm/hardware/sa1111.h b/arch/arm/include/asm/hardware/sa1111.h index d8c6f8a99dfa..a815f39b4243 100644 --- a/arch/arm/include/asm/hardware/sa1111.h +++ b/arch/arm/include/asm/hardware/sa1111.h @@ -404,7 +404,7 @@ struct sa1111_driver { void (*remove)(struct sa1111_dev *); }; -#define SA1111_DRV(_d) container_of((_d), struct sa1111_driver, drv) +#define SA1111_DRV(_d) container_of_const((_d), struct sa1111_driver, drv) #define SA1111_DRIVER_NAME(_sadev) ((_sadev)->dev.driver->name) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7fd70be0463f..b3fc891f1544 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -168,9 +168,9 @@ config ARM64 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN - select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN - select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN - select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE) + select HAVE_ARCH_KASAN_VMALLOC + select HAVE_ARCH_KASAN_SW_TAGS + select HAVE_ARCH_KASAN_HW_TAGS if ARM64_MTE # Some instrumentation may be unsound, hence EXPERT select HAVE_ARCH_KCSAN if EXPERT select HAVE_ARCH_KFENCE @@ -211,8 +211,8 @@ config ARM64 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_ERROR_INJECTION - select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_RETVAL select HAVE_GCC_PLUGINS select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \ HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI @@ -1471,7 +1471,6 @@ config HOTPLUG_CPU config NUMA bool "NUMA Memory Allocation and Scheduler Support" select GENERIC_ARCH_NUMA - select ACPI_NUMA if ACPI select OF_NUMA select HAVE_SETUP_PER_CPU_AREA select NEED_PER_CPU_EMBED_FIRST_CHUNK @@ -2337,6 +2336,17 @@ config EFI allow the kernel to be booted as an EFI application. This is only useful on systems that have UEFI firmware. +config COMPRESSED_INSTALL + bool "Install compressed image by default" + help + This makes the regular "make install" install the compressed + image we built, not the legacy uncompressed one. + + You can check that a compressed image works for you by doing + "make zinstall" first, and verifying that everything is fine + in your environment before making "make install" do this for + you. + config DMI bool "Enable support for SMBIOS (DMI) tables" depends on EFI diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 3f0f35fd5bb7..f6bc3da1ef11 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -182,7 +182,13 @@ $(BOOT_TARGETS): vmlinux Image.%: Image $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -install: KBUILD_IMAGE := $(boot)/Image +ifeq ($(CONFIG_COMPRESSED_INSTALL),y) + DEFAULT_KBUILD_IMAGE = $(KBUILD_IMAGE) +else + DEFAULT_KBUILD_IMAGE = $(boot)/Image +endif + +install: KBUILD_IMAGE := $(DEFAULT_KBUILD_IMAGE) install zinstall: $(call cmd,install) @@ -229,7 +235,7 @@ define archhelp echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' echo ' image.fit - Flat Image Tree (arch/$(ARCH)/boot/image.fit)' - echo ' install - Install uncompressed kernel' + echo ' install - Install kernel (compressed if COMPRESSED_INSTALL set)' echo ' zinstall - Install compressed kernel' echo ' Install using (your) ~/bin/installkernel or' echo ' (distribution) /sbin/installkernel or' diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index f8efbc128446..7a4f5604be3f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1065,6 +1065,28 @@ static inline bool pgtable_l5_enabled(void) { return false; } #define p4d_offset_kimg(dir,addr) ((p4d_t *)dir) +static inline +p4d_t *p4d_offset_lockless_folded(pgd_t *pgdp, pgd_t pgd, unsigned long addr) +{ + /* + * With runtime folding of the pud, pud_offset_lockless() passes + * the 'pgd_t *' we return here to p4d_to_folded_pud(), which + * will offset the pointer assuming that it points into + * a page-table page. However, the fast GUP path passes us a + * pgd_t allocated on the stack and so we must use the original + * pointer in 'pgdp' to construct the p4d pointer instead of + * using the generic p4d_offset_lockless() implementation. + * + * Note: reusing the original pointer means that we may + * dereference the same (live) page-table entry multiple times. + * This is safe because it is still only loaded once in the + * context of each level and the CPU guarantees same-address + * read-after-read ordering. + */ + return p4d_offset(pgdp, addr); +} +#define p4d_offset_lockless p4d_offset_lockless_folded + #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #define pgd_ERROR(e) \ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index b776e7424fe9..e737c6295ec7 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -507,7 +507,7 @@ static int update_insn_emulation_mode(struct insn_emulation *insn, return ret; } -static int emulation_proc_handler(struct ctl_table *table, int write, +static int emulation_proc_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 82e8a6017382..77006df20a75 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -535,7 +535,7 @@ static unsigned int find_supported_vector_length(enum vec_type type, #if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL) -static int vec_proc_do_default_vl(struct ctl_table *table, int write, +static int vec_proc_do_default_vl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct vl_info *info = table->extra1; diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d63930c82839..d11da6461278 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -21,7 +21,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # potential future proofing if we end up with internal calls to the exported # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). -ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ +ldflags-y := -shared -soname=linux-vdso.so.1 \ -Bsymbolic --build-id=sha1 -n $(btildflags-y) ifdef CONFIG_LD_ORPHAN_WARN diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index cc4508c604b2..25a2cb6317f3 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -98,7 +98,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__ # From arm vDSO Makefile VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 -VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1 +VDSO_LDFLAGS += -shared --build-id=sha1 VDSO_LDFLAGS += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index a4c1dd4741a4..7ceaa1e0b4bc 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -149,7 +149,7 @@ Res0 63:32 UnsignedEnum 31:28 GIC 0b0000 NI 0b0001 GICv3 - 0b0010 GICv4p1 + 0b0011 GICv4p1 EndEnum UnsignedEnum 27:24 Virt_frac 0b0000 NI @@ -903,7 +903,7 @@ EndEnum UnsignedEnum 27:24 GIC 0b0000 NI 0b0001 IMP - 0b0010 V4P1 + 0b0011 V4P1 EndEnum SignedEnum 23:20 AdvSIMD 0b0000 IMP diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index ebdb7156560c..70f169210b52 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -476,7 +476,6 @@ config NR_CPUS config NUMA bool "NUMA Support" select SMP - select ACPI_NUMA if ACPI help Say Y to compile the kernel with NUMA (Non-Uniform Memory Access) support. This option improves performance on systems with more diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f9d252b6ede1..60077e576935 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -478,6 +478,7 @@ config MACH_LOONGSON64 select BOARD_SCACHE select CSRC_R4K select CEVT_R4K + select SYNC_R4K select FORCE_PCI select ISA select I8259 diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index c2930a75b7e4..1e782275850a 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -240,6 +240,10 @@ GCR_ACCESSOR_RO(32, 0x0d0, gic_status) GCR_ACCESSOR_RO(32, 0x0f0, cpc_status) #define CM_GCR_CPC_STATUS_EX BIT(0) +/* GCR_ACCESS - Controls core/IOCU access to GCRs */ +GCR_ACCESSOR_RW(32, 0x120, access_cm3) +#define CM_GCR_ACCESS_ACCESSEN GENMASK(7, 0) + /* GCR_L2_CONFIG - Indicates L2 cache configuration when Config5.L2C=1 */ GCR_ACCESSOR_RW(32, 0x130, l2_config) #define CM_GCR_L2_CONFIG_BYPASS BIT(20) diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index bc2c240f414b..2427d76f953f 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -50,7 +50,6 @@ extern int __cpu_logical_map[NR_CPUS]; #define SMP_CALL_FUNCTION 0x2 /* Octeon - Tell another core to flush its icache */ #define SMP_ICACHE_FLUSH 0x4 -#define SMP_ASK_C0COUNT 0x8 /* Mask of CPUs which are currently definitely operating coherently */ extern cpumask_t cpu_coherent_mask; diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 9cc087dd1c19..395622c37325 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -317,7 +317,10 @@ static void boot_core(unsigned int core, unsigned int vpe_id) write_gcr_co_reset_ext_base(CM_GCR_Cx_RESET_EXT_BASE_UEB); /* Ensure the core can access the GCRs */ - set_gcr_access(1 << core); + if (mips_cm_revision() < CM_REV_CM3) + set_gcr_access(1 << core); + else + set_gcr_access_cm3(1 << core); if (mips_cpc_present()) { /* Reset the core */ diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c index 66d049cdcf14..147acd972a07 100644 --- a/arch/mips/loongson64/smp.c +++ b/arch/mips/loongson64/smp.c @@ -33,7 +33,6 @@ static void __iomem *ipi_clear0_regs[16]; static void __iomem *ipi_status0_regs[16]; static void __iomem *ipi_en0_regs[16]; static void __iomem *ipi_mailbox_buf[16]; -static uint32_t core0_c0count[NR_CPUS]; static u32 (*ipi_read_clear)(int cpu); static void (*ipi_write_action)(int cpu, u32 action); @@ -382,11 +381,10 @@ loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) ipi_write_action(cpu_logical_map(i), (u32)action); } - static irqreturn_t loongson3_ipi_interrupt(int irq, void *dev_id) { - int i, cpu = smp_processor_id(); - unsigned int action, c0count; + int cpu = smp_processor_id(); + unsigned int action; action = ipi_read_clear(cpu); @@ -399,26 +397,14 @@ static irqreturn_t loongson3_ipi_interrupt(int irq, void *dev_id) irq_exit(); } - if (action & SMP_ASK_C0COUNT) { - BUG_ON(cpu != 0); - c0count = read_c0_count(); - c0count = c0count ? c0count : 1; - for (i = 1; i < nr_cpu_ids; i++) - core0_c0count[i] = c0count; - nudge_writes(); /* Let others see the result ASAP */ - } - return IRQ_HANDLED; } -#define MAX_LOOPS 800 /* * SMP init and finish on secondary CPUs */ static void loongson3_init_secondary(void) { - int i; - uint32_t initcount; unsigned int cpu = smp_processor_id(); unsigned int imask = STATUSF_IP7 | STATUSF_IP6 | STATUSF_IP3 | STATUSF_IP2; @@ -432,23 +418,6 @@ static void loongson3_init_secondary(void) cpu_logical_map(cpu) % loongson_sysconf.cores_per_package); cpu_data[cpu].package = cpu_logical_map(cpu) / loongson_sysconf.cores_per_package; - - i = 0; - core0_c0count[cpu] = 0; - loongson3_send_ipi_single(0, SMP_ASK_C0COUNT); - while (!core0_c0count[cpu]) { - i++; - cpu_relax(); - } - - if (i > MAX_LOOPS) - i = MAX_LOOPS; - if (cpu_data[cpu].package) - initcount = core0_c0count[cpu] + i; - else /* Local access is faster for loops */ - initcount = core0_c0count[cpu] + i/2; - - write_c0_count(initcount); } static void loongson3_smp_finish(void) diff --git a/arch/mips/sibyte/common/sb_tbprof.c b/arch/mips/sibyte/common/sb_tbprof.c index af5333986900..149a9151bc0b 100644 --- a/arch/mips/sibyte/common/sb_tbprof.c +++ b/arch/mips/sibyte/common/sb_tbprof.c @@ -589,4 +589,5 @@ module_exit(sbprof_tb_cleanup); MODULE_ALIAS_CHARDEV_MAJOR(SBPROF_TB_MAJOR); MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); +MODULE_DESCRIPTION("Support for ZBbus profiling"); MODULE_LICENSE("GPL"); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index dc9b902de8ea..5d650e02cbf4 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -46,6 +46,7 @@ config PARISC select GENERIC_CPU_DEVICES if !SMP select GENERIC_LIB_DEVMEM_IS_ALLOWED select SYSCTL_ARCH_UNALIGN_ALLOW + select SYSCTL_ARCH_UNALIGN_NO_WARN select SYSCTL_EXCEPTION_TRACE select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA @@ -86,6 +87,7 @@ config PARISC select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS select TRACE_IRQFLAGS_SUPPORT select HAVE_FUNCTION_DESCRIPTORS if 64BIT + select PCI_MSI_ARCH_FALLBACKS if PCI_MSI help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/parisc/include/asm/parisc-device.h b/arch/parisc/include/asm/parisc-device.h index 7ddd7f433367..9e74cef4d774 100644 --- a/arch/parisc/include/asm/parisc-device.h +++ b/arch/parisc/include/asm/parisc-device.h @@ -41,7 +41,7 @@ struct parisc_driver { #define to_parisc_device(d) container_of(d, struct parisc_device, dev) -#define to_parisc_driver(d) container_of(d, struct parisc_driver, drv) +#define to_parisc_driver(d) container_of_const(d, struct parisc_driver, drv) #define parisc_parent(d) to_parisc_device(d->dev.parent) static inline const char *parisc_pathname(struct parisc_device *d) diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index 98851ff7699a..a97c0fd55f91 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -20,7 +20,7 @@ * sysdeps/unix/sysv/linux/hppa/sysdep.h */ -#ifdef PIC +#ifndef DONT_USE_PIC /* WARNING: CANNOT BE USED IN A NOP! */ # define K_STW_ASM_PIC " copy %%r19, %%r4\n" # define K_LDW_ASM_PIC " copy %%r4, %%r19\n" @@ -43,7 +43,7 @@ across the syscall. */ #define K_CALL_CLOB_REGS "%r1", "%r2", K_USING_GR4 \ - "%r20", "%r29", "%r31" + "%r20", "%r29", "%r31" #undef K_INLINE_SYSCALL #define K_INLINE_SYSCALL(name, nr, args...) ({ \ @@ -58,7 +58,7 @@ " ldi %1, %%r20\n" \ K_LDW_ASM_PIC \ : "=r" (__res) \ - : "i" (SYS_ify(name)) K_ASM_ARGS_##nr \ + : "i" (name) K_ASM_ARGS_##nr \ : "memory", K_CALL_CLOB_REGS K_CLOB_ARGS_##nr \ ); \ __sys_res = (long)__res; \ @@ -104,42 +104,18 @@ #define K_CLOB_ARGS_1 K_CLOB_ARGS_2, "%r25" #define K_CLOB_ARGS_0 K_CLOB_ARGS_1, "%r26" -#define _syscall0(type,name) \ -type name(void) \ -{ \ - return K_INLINE_SYSCALL(name, 0); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ - return K_INLINE_SYSCALL(name, 1, arg1); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1, type2 arg2) \ -{ \ - return K_INLINE_SYSCALL(name, 2, arg1, arg2); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1, type2 arg2, type3 arg3) \ -{ \ - return K_INLINE_SYSCALL(name, 3, arg1, arg2, arg3); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ - return K_INLINE_SYSCALL(name, 4, arg1, arg2, arg3, arg4); \ -} - -/* select takes 5 arguments */ -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ -{ \ - return K_INLINE_SYSCALL(name, 5, arg1, arg2, arg3, arg4, arg5); \ -} +#define syscall0(name) \ + K_INLINE_SYSCALL(name, 0) +#define syscall1(name, arg1) \ + K_INLINE_SYSCALL(name, 1, arg1) +#define syscall2(name, arg1, arg2) \ + K_INLINE_SYSCALL(name, 2, arg1, arg2) +#define syscall3(name, arg1, arg2, arg3) \ + K_INLINE_SYSCALL(name, 3, arg1, arg2, arg3) +#define syscall4(name, arg1, arg2, arg3, arg4) \ + K_INLINE_SYSCALL(name, 4, arg1, arg2, arg3, arg4) +#define syscall5(name, arg1, arg2, arg3, arg4, arg5) \ + K_INLINE_SYSCALL(name, 5, arg1, arg2, arg3, arg4, arg5) #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_STAT64 diff --git a/arch/parisc/include/asm/vdso.h b/arch/parisc/include/asm/vdso.h index ef8206193f82..2a2dc11b5545 100644 --- a/arch/parisc/include/asm/vdso.h +++ b/arch/parisc/include/asm/vdso.h @@ -19,6 +19,6 @@ extern struct vdso_data *vdso_data; /* Default link addresses for the vDSOs */ #define VDSO_LBASE 0 -#define VDSO_VERSION_STRING LINUX_5.18 +#define VDSO_VERSION_STRING LINUX_6.11 #endif /* __PARISC_VDSO_H__ */ diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 483bfafd930c..db531e58d70e 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -611,11 +611,7 @@ void __init parisc_setup_cache_timing(void) threshold/1024); set_tlb_threshold: - if (threshold > FLUSH_TLB_THRESHOLD) - parisc_tlb_flush_threshold = threshold; - else - parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD; - + parisc_tlb_flush_threshold = max(threshold, FLUSH_TLB_THRESHOLD); printk(KERN_INFO "TLB flush threshold set to %lu KiB\n", parisc_tlb_flush_threshold/1024); } diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index ac19d685e4a5..1e793f770f71 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -97,7 +97,7 @@ static int for_each_padev(int (*fn)(struct device *, void *), void * data) * @driver: the PA-RISC driver to try * @dev: the PA-RISC device to try */ -static int match_device(struct parisc_driver *driver, struct parisc_device *dev) +static int match_device(const struct parisc_driver *driver, struct parisc_device *dev) { const struct parisc_device_id *ids; @@ -548,7 +548,7 @@ alloc_pa_dev(unsigned long hpa, struct hardware_path *mod_path) return dev; } -static int parisc_generic_match(struct device *dev, struct device_driver *drv) +static int parisc_generic_match(struct device *dev, const struct device_driver *drv) { return match_device(to_parisc_driver(drv), to_parisc_device(dev)); } diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 71e596ca5a86..3e79e40e361d 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -104,6 +104,7 @@ #define ERR_NOTHANDLED -1 int unaligned_enabled __read_mostly = 1; +int no_unaligned_warning __read_mostly; static int emulate_ldh(struct pt_regs *regs, int toreg) { @@ -399,6 +400,7 @@ void handle_unaligned(struct pt_regs *regs) } else { static DEFINE_RATELIMIT_STATE(kernel_ratelimit, 5 * HZ, 5); if (!(current->thread.flags & PARISC_UAC_NOPRINT) && + !no_unaligned_warning && __ratelimit(&kernel_ratelimit)) pr_warn("Kernel: unaligned access to " RFMT " in %pS " "(iir " RFMT ")\n", diff --git a/arch/parisc/kernel/vdso32/Makefile b/arch/parisc/kernel/vdso32/Makefile index 1350d50c6306..2b36d25ada6e 100644 --- a/arch/parisc/kernel/vdso32/Makefile +++ b/arch/parisc/kernel/vdso32/Makefile @@ -1,11 +1,25 @@ -# List of files in the vdso, has to be asm only for now +# Include the generic Makefile to check the built vdso. +include $(srctree)/lib/vdso/Makefile + +KCOV_INSTRUMENT := n + +# Disable gcov profiling, ubsan and kasan for VDSO code +GCOV_PROFILE := n +UBSAN_SANITIZE := n +KASAN_SANITIZE := n +KCSAN_SANITIZE := n obj-vdso32 = note.o sigtramp.o restart_syscall.o +obj-cvdso32 = vdso32_generic.o # Build rules -targets := $(obj-vdso32) vdso32.so +targets := $(obj-vdso32) $(obj-cvdso32) vdso32.so obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) +obj-cvdso32 := $(addprefix $(obj)/, $(obj-cvdso32)) + +VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_vdso32_generic.o = $(VDSO_CFLAGS_REMOVE) ccflags-y := -shared -fno-common -fbuiltin -mno-fast-indirect-calls -O2 -mno-long-calls # -march=1.1 -mschedule=7100LC @@ -26,18 +40,22 @@ $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so FORCE # Force dependency (incbin is bad) # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so: $(obj)/vdso32.lds $(obj-vdso32) $(VDSO_LIBGCC) FORCE +$(obj)/vdso32.so: $(obj)/vdso32.lds $(obj-vdso32) $(obj-cvdso32) $(VDSO_LIBGCC) FORCE $(call if_changed,vdso32ld) # assembly rules for the .S files $(obj-vdso32): %.o: %.S FORCE $(call if_changed_dep,vdso32as) +$(obj-cvdso32): %.o: %.c FORCE + $(call if_changed_dep,vdso32cc) # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $(filter-out FORCE, $^) -o $@ quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $< +quiet_cmd_vdso32cc = VDSO32C $@ + cmd_vdso32cc = $(CROSS32CC) $(c_flags) -c -o $@ $< # Generate VDSO offsets using helper script gen-vdsosym := $(src)/gen_vdso_offsets.sh diff --git a/arch/parisc/kernel/vdso32/vdso32.lds.S b/arch/parisc/kernel/vdso32/vdso32.lds.S index d4aff3af5262..4273baa26b65 100644 --- a/arch/parisc/kernel/vdso32/vdso32.lds.S +++ b/arch/parisc/kernel/vdso32/vdso32.lds.S @@ -106,6 +106,9 @@ VERSION global: __kernel_sigtramp_rt32; __kernel_restart_syscall32; + __vdso_gettimeofday; + __vdso_clock_gettime; + __vdso_clock_gettime64; local: *; }; } diff --git a/arch/parisc/kernel/vdso32/vdso32_generic.c b/arch/parisc/kernel/vdso32/vdso32_generic.c new file mode 100644 index 000000000000..8d5bd59e8646 --- /dev/null +++ b/arch/parisc/kernel/vdso32/vdso32_generic.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "asm/unistd.h" +#include <linux/types.h> +#include <uapi/asm/unistd_32.h> + +struct timezone; +struct old_timespec32; +struct __kernel_timespec; +struct __kernel_old_timeval; + +/* forward declarations */ +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); +int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts); +int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts); + + +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) +{ + return syscall2(__NR_gettimeofday, (long)tv, (long)tz); +} + +int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) +{ + return syscall2(__NR_clock_gettime, (long)clock, (long)ts); +} + +int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts) +{ + return syscall2(__NR_clock_gettime64, (long)clock, (long)ts); +} diff --git a/arch/parisc/kernel/vdso64/Makefile b/arch/parisc/kernel/vdso64/Makefile index 0b1c1cc4c2c7..bd87bd6a6659 100644 --- a/arch/parisc/kernel/vdso64/Makefile +++ b/arch/parisc/kernel/vdso64/Makefile @@ -1,12 +1,25 @@ -# List of files in the vdso, has to be asm only for now +# Include the generic Makefile to check the built vdso. +include $(srctree)/lib/vdso/Makefile + +KCOV_INSTRUMENT := n + +# Disable gcov profiling, ubsan and kasan for VDSO code +GCOV_PROFILE := n +UBSAN_SANITIZE := n +KASAN_SANITIZE := n +KCSAN_SANITIZE := n obj-vdso64 = note.o sigtramp.o restart_syscall.o +obj-cvdso64 = vdso64_generic.o # Build rules -targets := $(obj-vdso64) vdso64.so -obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) +targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) +obj-cvdso64 := $(addprefix $(obj)/, $(obj-cvdso64)) +VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE) ccflags-y := -shared -fno-common -fno-builtin ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ @@ -26,18 +39,22 @@ $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so FORCE # Force dependency (incbin is bad) # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so: $(obj)/vdso64.lds $(obj-vdso64) $(VDSO_LIBGCC) FORCE +$(obj)/vdso64.so: $(obj)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) $(VDSO_LIBGCC) FORCE $(call if_changed,vdso64ld) # assembly rules for the .S files $(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) +$(obj-cvdso64): %.o: %.c FORCE + $(call if_changed_dep,vdso64cc) # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter-out FORCE, $^) -o $@ quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< +quiet_cmd_vdso64cc = VDSO64C $@ + cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $< # Generate VDSO offsets using helper script gen-vdsosym := $(src)/gen_vdso_offsets.sh diff --git a/arch/parisc/kernel/vdso64/vdso64.lds.S b/arch/parisc/kernel/vdso64/vdso64.lds.S index de1fb4b19286..10f25e4e1554 100644 --- a/arch/parisc/kernel/vdso64/vdso64.lds.S +++ b/arch/parisc/kernel/vdso64/vdso64.lds.S @@ -104,6 +104,8 @@ VERSION global: __kernel_sigtramp_rt64; __kernel_restart_syscall64; + __vdso_gettimeofday; + __vdso_clock_gettime; local: *; }; } diff --git a/arch/parisc/kernel/vdso64/vdso64_generic.c b/arch/parisc/kernel/vdso64/vdso64_generic.c new file mode 100644 index 000000000000..fc6836a0075b --- /dev/null +++ b/arch/parisc/kernel/vdso64/vdso64_generic.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "asm/unistd.h" +#include <linux/types.h> + +struct timezone; +struct __kernel_timespec; +struct __kernel_old_timeval; + +/* forward declarations */ +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); + + +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) +{ + return syscall2(__NR_gettimeofday, (long)tv, (long)tz); +} + +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) +{ + return syscall2(__NR_clock_gettime, (long)clock, (long)ts); +} diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index d13d8fdc3411..987e23a2bd28 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -390,11 +390,7 @@ int ps3_system_bus_device_register(struct ps3_system_bus_device *dev); int ps3_system_bus_driver_register(struct ps3_system_bus_driver *drv); void ps3_system_bus_driver_unregister(struct ps3_system_bus_driver *drv); -static inline struct ps3_system_bus_driver *ps3_drv_to_system_bus_drv( - struct device_driver *_drv) -{ - return container_of(_drv, struct ps3_system_bus_driver, core); -} +#define ps3_drv_to_system_bus_drv(_drv) container_of_const(_drv, struct ps3_system_bus_driver, core) static inline struct ps3_system_bus_device *ps3_dev_to_system_bus_dev( const struct device *_dev) { diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h index 6faf2a931755..7c444150c5ad 100644 --- a/arch/powerpc/include/asm/vio.h +++ b/arch/powerpc/include/asm/vio.h @@ -156,11 +156,7 @@ static inline int vio_enable_interrupts(struct vio_dev *dev) } #endif -static inline struct vio_driver *to_vio_driver(struct device_driver *drv) -{ - return container_of(drv, struct vio_driver, driver); -} - +#define to_vio_driver(__drv) container_of_const(__drv, struct vio_driver, driver) #define to_vio_dev(__dev) container_of_const(__dev, struct vio_dev, dev) #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 961aadc71de2..5e6c7b527677 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1984,8 +1984,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; r = -ENXIO; - if (!xive_enabled()) + if (!xive_enabled()) { + fdput(f); break; + } r = -EPERM; dev = kvm_device_from_filp(f.file); diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 56dc6b29a3e7..b9a7d9bae687 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -333,10 +333,10 @@ int ps3_mmio_region_init(struct ps3_system_bus_device *dev, EXPORT_SYMBOL_GPL(ps3_mmio_region_init); static int ps3_system_bus_match(struct device *_dev, - struct device_driver *_drv) + const struct device_driver *_drv) { int result; - struct ps3_system_bus_driver *drv = ps3_drv_to_system_bus_drv(_drv); + const struct ps3_system_bus_driver *drv = ps3_drv_to_system_bus_drv(_drv); struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); if (!dev->match_sub_id) diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index b401282727a4..3436b0af795e 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -339,7 +339,7 @@ static struct attribute *ibmbus_bus_attrs[] = { }; ATTRIBUTE_GROUPS(ibmbus_bus); -static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv) +static int ibmebus_bus_bus_match(struct device *dev, const struct device_driver *drv) { const struct of_device_id *matches = drv->of_match_table; diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 36d1c7d4156b..ac1d2d2c9a88 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1576,10 +1576,10 @@ void vio_unregister_device(struct vio_dev *viodev) } EXPORT_SYMBOL(vio_unregister_device); -static int vio_bus_match(struct device *dev, struct device_driver *drv) +static int vio_bus_match(struct device *dev, const struct device_driver *drv) { const struct vio_dev *vio_dev = to_vio_dev(dev); - struct vio_driver *vio_drv = to_vio_driver(drv); + const struct vio_driver *vio_drv = to_vio_driver(drv); const struct vio_device_id *ids = vio_drv->id_table; return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL); @@ -1689,7 +1689,7 @@ struct vio_dev *vio_find_node(struct device_node *vnode) /* construct the kobject name from the device node */ if (of_node_is_type(vnode_parent, "vdevice")) { const __be32 *prop; - + prop = of_get_property(vnode, "reg", NULL); if (!prop) goto out; diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 3ceec2ca84fa..0f3cd7c3a436 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -13,7 +13,9 @@ config 32BIT config RISCV def_bool y select ACPI_GENERIC_GSI if ACPI + select ACPI_PPTT if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI + select ACPI_SPCR_TABLE if ACPI select ARCH_DMA_DEFAULT_COHERENT select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM_VMEMMAP @@ -123,6 +125,7 @@ config RISCV select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_STACKLEAK select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU @@ -154,7 +157,6 @@ config RISCV select HAVE_KERNEL_UNCOMPRESSED if !XIP_KERNEL && !EFI_ZBOOT select HAVE_KERNEL_ZSTD if !XIP_KERNEL && !EFI_ZBOOT select HAVE_KPROBES if !XIP_KERNEL - select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL # https://github.com/ClangBuiltLinux/linux/issues/1881 select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD @@ -820,6 +822,8 @@ config RISCV_EFFICIENT_UNALIGNED_ACCESS endchoice +source "arch/riscv/Kconfig.vendor" + endmenu # "Platform type" menu "Kernel features" diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor new file mode 100644 index 000000000000..6f1cdd32ed29 --- /dev/null +++ b/arch/riscv/Kconfig.vendor @@ -0,0 +1,19 @@ +menu "Vendor extensions" + +config RISCV_ISA_VENDOR_EXT + bool + +menu "Andes" +config RISCV_ISA_VENDOR_EXT_ANDES + bool "Andes vendor extension support" + select RISCV_ISA_VENDOR_EXT + default y + help + Say N here if you want to disable all Andes vendor extension + support. This will cause any Andes vendor extensions that are + requested by hardware probing to be ignored. + + If you don't know what to do here, say Y. +endmenu + +endmenu diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index 869c0345b908..4e9e7a28bf9b 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -18,7 +18,6 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S OBJCOPYFLAGS_loader.bin :=-O binary OBJCOPYFLAGS_xipImage :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.* loader loader.o loader.lds loader.bin targets := Image Image.* loader loader.o loader.lds loader.bin xipImage ifeq ($(CONFIG_XIP_KERNEL),y) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 3f1f055866af..0d678325444f 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -7,6 +7,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y CONFIG_MEMCG=y +CONFIG_BLK_CGROUP=y CONFIG_CGROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y @@ -35,9 +36,6 @@ CONFIG_ARCH_THEAD=y CONFIG_ARCH_VIRT=y CONFIG_ARCH_CANAAN=y CONFIG_SMP=y -CONFIG_HOTPLUG_CPU=y -CONFIG_PM=y -CONFIG_CPU_IDLE=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_FREQ_GOV_POWERSAVE=m @@ -52,13 +50,11 @@ CONFIG_ACPI=y CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -CONFIG_SPARSEMEM_MANUAL=y CONFIG_BLK_DEV_THROTTLING=y +CONFIG_SPARSEMEM_MANUAL=y CONFIG_NET=y CONFIG_PACKET=y -CONFIG_UNIX=y CONFIG_XFRM_USER=m -CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_PNP=y @@ -102,9 +98,9 @@ CONFIG_NET_SCHED=y CONFIG_NET_CLS_CGROUP=m CONFIG_NETLINK_DIAG=y CONFIG_CGROUP_NET_PRIO=y +CONFIG_CAN=m CONFIG_NET_9P=y CONFIG_NET_9P_VIRTIO=y -CONFIG_CAN=m CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y @@ -153,8 +149,8 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y +CONFIG_SERIAL_SH_SCI=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y @@ -179,7 +175,6 @@ CONFIG_DEVFREQ_THERMAL=y CONFIG_RZG2L_THERMAL=y CONFIG_WATCHDOG=y CONFIG_SUNXI_WATCHDOG=y -CONFIG_RENESAS_RZG2LWDT=y CONFIG_MFD_AXP20X_I2C=y CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y @@ -193,11 +188,9 @@ CONFIG_DRM_NOUVEAU=m CONFIG_DRM_SUN4I=m CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB=y -CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SOC=y -CONFIG_SND_SOC_RZ=m CONFIG_SND_DESIGNWARE_I2S=m CONFIG_SND_SOC_STARFIVE=m CONFIG_SND_SOC_JH7110_PWMDAC=m @@ -239,34 +232,31 @@ CONFIG_USB_CONFIGFS_F_FS=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y -CONFIG_MMC_SDHCI_CADENCE=y CONFIG_MMC_SDHCI_OF_DWCMSHC=y +CONFIG_MMC_SDHCI_CADENCE=y CONFIG_MMC_SPI=y +CONFIG_MMC_SDHI=y CONFIG_MMC_DW=y CONFIG_MMC_DW_STARFIVE=y -CONFIG_MMC_SDHI=y CONFIG_MMC_SUNXI=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SUN6I=y CONFIG_DMADEVICES=y CONFIG_DMA_SUN6I=m CONFIG_DW_AXI_DMAC=y -CONFIG_RZ_DMAC=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y -CONFIG_RENESAS_OSTM=y CONFIG_CLK_SOPHGO_CV1800=y CONFIG_SUN8I_DE2_CCU=m +CONFIG_RENESAS_OSTM=y CONFIG_SUN50I_IOMMU=y CONFIG_RPMSG_CHAR=y CONFIG_RPMSG_CTRL=y CONFIG_RPMSG_VIRTIO=y -CONFIG_ARCH_R9A07G043=y +CONFIG_PM_DEVFREQ=y CONFIG_IIO=y -CONFIG_RZG2L_ADC=m -CONFIG_RESET_RZG2L_USBPHY_CTRL=y CONFIG_PHY_SUN4I_USB=m CONFIG_PHY_RCAR_GEN3_USB2=y CONFIG_PHY_STARFIVE_JH7110_DPHY_RX=m diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c index f2708a9494a1..fc1a34faa5f3 100644 --- a/arch/riscv/errata/andes/errata.c +++ b/arch/riscv/errata/andes/errata.c @@ -17,6 +17,7 @@ #include <asm/processor.h> #include <asm/sbi.h> #include <asm/vendorid_list.h> +#include <asm/vendor_extensions.h> #define ANDES_AX45MP_MARCHID 0x8000000000008a45UL #define ANDES_AX45MP_MIMPID 0x500UL @@ -65,6 +66,8 @@ void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct al unsigned long archid, unsigned long impid, unsigned int stage) { + BUILD_BUG_ON(ERRATA_ANDES_NUMBER >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + if (stage == RISCV_ALTERNATIVES_BOOT) errata_probe_iocp(stage, archid, impid); diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index 716cfedad3a2..cea3b96ade11 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -12,6 +12,7 @@ #include <asm/alternative.h> #include <asm/vendorid_list.h> #include <asm/errata_list.h> +#include <asm/vendor_extensions.h> struct errata_info_t { char name[32]; @@ -96,6 +97,8 @@ void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, u32 cpu_apply_errata = 0; u32 tmp; + BUILD_BUG_ON(ERRATA_SIFIVE_NUMBER >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return; diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index bf6a0a6318ee..f5120e07c318 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -18,6 +18,7 @@ #include <asm/io.h> #include <asm/patch.h> #include <asm/vendorid_list.h> +#include <asm/vendor_extensions.h> #define CSR_TH_SXSTATUS 0x5c0 #define SXSTATUS_MAEE _AC(0x200000, UL) @@ -166,6 +167,8 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, u32 tmp; void *oldptr, *altptr; + BUILD_BUG_ON(ERRATA_THEAD_NUMBER >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + for (alt = begin; alt < end; alt++) { if (alt->vendor_id != THEAD_VENDOR_ID) continue; diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h index 7dad0cf9d701..e0a1f84404f3 100644 --- a/arch/riscv/include/asm/acpi.h +++ b/arch/riscv/include/asm/acpi.h @@ -61,11 +61,14 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) { } void acpi_init_rintc_map(void); struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu); -u32 get_acpi_id_for_cpu(int cpu); +static inline u32 get_acpi_id_for_cpu(int cpu) +{ + return acpi_cpu_get_madt_rintc(cpu)->uid; +} + int acpi_get_riscv_isa(struct acpi_table_header *table, unsigned int cpu, const char **isa); -static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; } void acpi_get_cbo_block_size(struct acpi_table_header *table, u32 *cbom_size, u32 *cboz_size, u32 *cbop_size); #else @@ -87,4 +90,12 @@ static inline void acpi_get_cbo_block_size(struct acpi_table_header *table, #endif /* CONFIG_ACPI */ +#ifdef CONFIG_ACPI_NUMA +int acpi_numa_get_nid(unsigned int cpu); +void acpi_map_cpus_to_nodes(void); +#else +static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; } +static inline void acpi_map_cpus_to_nodes(void) { } +#endif /* CONFIG_ACPI_NUMA */ + #endif /*_ASM_ACPI_H*/ diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 880606b0469a..71af9ecfcfcb 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -170,7 +170,7 @@ legacy: ({ \ typeof(x) x_ = (x); \ __builtin_constant_p(x_) ? \ - (int)((x_ != 0) ? (32 - __builtin_clz(x_)) : 0) \ + ((x_ != 0) ? (32 - __builtin_clz(x_)) : 0) \ : \ variable_fls(x_); \ }) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 000796c2d0b1..45f9c1171a48 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -33,6 +33,31 @@ extern struct riscv_isainfo hart_isa[NR_CPUS]; void riscv_user_isa_enable(void); +#define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \ + .name = #_name, \ + .property = #_name, \ + .id = _id, \ + .subset_ext_ids = _subset_exts, \ + .subset_ext_size = _subset_exts_size, \ + .validate = _validate \ +} + +#define __RISCV_ISA_EXT_DATA(_name, _id) _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, NULL) + +#define __RISCV_ISA_EXT_DATA_VALIDATE(_name, _id, _validate) \ + _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, _validate) + +/* Used to declare pure "lasso" extension (Zk for instance) */ +#define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \ + _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, \ + ARRAY_SIZE(_bundled_exts), NULL) + +/* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */ +#define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \ + _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), NULL) +#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \ + _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate) + #if defined(CONFIG_RISCV_MISALIGNED) bool check_unaligned_access_emulated_all_cpus(void); void unaligned_emulation_finish(void); @@ -79,59 +104,66 @@ extern bool riscv_isa_fallback; unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); +#define STANDARD_EXT 0 + bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit); #define riscv_isa_extension_available(isa_bitmap, ext) \ __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) -static __always_inline bool -riscv_has_extension_likely(const unsigned long ext) +static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor, + const unsigned long ext) { - compiletime_assert(ext < RISCV_ISA_EXT_MAX, - "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm goto( - ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_no); - } else { - if (!__riscv_isa_extension_available(NULL, ext)) - goto l_no; - } + asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1) + : + : [vendor] "i" (vendor), [ext] "i" (ext) + : + : l_no); return true; l_no: return false; } -static __always_inline bool -riscv_has_extension_unlikely(const unsigned long ext) +static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor, + const unsigned long ext) { - compiletime_assert(ext < RISCV_ISA_EXT_MAX, - "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm goto( - ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_yes); - } else { - if (__riscv_isa_extension_available(NULL, ext)) - goto l_yes; - } + asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1) + : + : [vendor] "i" (vendor), [ext] "i" (ext) + : + : l_yes); return false; l_yes: return true; } +static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_unlikely(STANDARD_EXT, ext); + + return __riscv_isa_extension_available(NULL, ext); +} + +static __always_inline bool riscv_has_extension_likely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_likely(STANDARD_EXT, ext); + + return __riscv_isa_extension_available(NULL, ext); +} + static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) { - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && + __riscv_has_extension_likely(STANDARD_EXT, ext)) return true; return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); @@ -139,7 +171,10 @@ static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsign static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext) { - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && + __riscv_has_extension_unlikely(STANDARD_EXT, ext)) return true; return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index b18b202ca141..5a0bd27fd11a 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -80,19 +80,18 @@ #define RISCV_ISA_EXT_ZFA 71 #define RISCV_ISA_EXT_ZTSO 72 #define RISCV_ISA_EXT_ZACAS 73 -#define RISCV_ISA_EXT_XANDESPMU 74 -#define RISCV_ISA_EXT_ZVE32X 75 -#define RISCV_ISA_EXT_ZVE32F 76 -#define RISCV_ISA_EXT_ZVE64X 77 -#define RISCV_ISA_EXT_ZVE64F 78 -#define RISCV_ISA_EXT_ZVE64D 79 -#define RISCV_ISA_EXT_ZIMOP 80 -#define RISCV_ISA_EXT_ZCA 81 -#define RISCV_ISA_EXT_ZCB 82 -#define RISCV_ISA_EXT_ZCD 83 -#define RISCV_ISA_EXT_ZCF 84 -#define RISCV_ISA_EXT_ZCMOP 85 -#define RISCV_ISA_EXT_ZAWRS 86 +#define RISCV_ISA_EXT_ZVE32X 74 +#define RISCV_ISA_EXT_ZVE32F 75 +#define RISCV_ISA_EXT_ZVE64X 76 +#define RISCV_ISA_EXT_ZVE64F 77 +#define RISCV_ISA_EXT_ZVE64D 78 +#define RISCV_ISA_EXT_ZIMOP 79 +#define RISCV_ISA_EXT_ZCA 80 +#define RISCV_ISA_EXT_ZCB 81 +#define RISCV_ISA_EXT_ZCD 82 +#define RISCV_ISA_EXT_ZCF 83 +#define RISCV_ISA_EXT_ZCMOP 84 +#define RISCV_ISA_EXT_ZAWRS 85 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 150a9877b0af..ef01c182af2b 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,7 +8,7 @@ #include <uapi/asm/hwprobe.h> -#define RISCV_HWPROBE_MAX_KEY 7 +#define RISCV_HWPROBE_MAX_KEY 8 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 235fd45d998d..7ede2111c591 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -37,7 +37,7 @@ * define the PAGE_OFFSET value for SV48 and SV39. */ #define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL) -#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL) +#define PAGE_OFFSET_L3 _AC(0xffffffd600000000, UL) #else #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) #endif /* CONFIG_64BIT */ diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 5d473343634b..fca5c6be2b81 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -10,6 +10,7 @@ #include <asm/page.h> #include <linux/const.h> +#include <linux/sizes.h> /* thread information allocation */ #define THREAD_SIZE_ORDER CONFIG_THREAD_SIZE_ORDER diff --git a/arch/riscv/include/asm/vendor_extensions.h b/arch/riscv/include/asm/vendor_extensions.h new file mode 100644 index 000000000000..7437304a71b9 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2024 Rivos, Inc + */ + +#ifndef _ASM_VENDOR_EXTENSIONS_H +#define _ASM_VENDOR_EXTENSIONS_H + +#include <asm/cpufeature.h> + +#include <linux/array_size.h> +#include <linux/types.h> + +/* + * The extension keys of each vendor must be strictly less than this value. + */ +#define RISCV_ISA_VENDOR_EXT_MAX 32 + +struct riscv_isavendorinfo { + DECLARE_BITMAP(isa, RISCV_ISA_VENDOR_EXT_MAX); +}; + +struct riscv_isa_vendor_ext_data_list { + bool is_initialized; + const size_t ext_data_count; + const struct riscv_isa_ext_data *ext_data; + struct riscv_isavendorinfo per_hart_isa_bitmap[NR_CPUS]; + struct riscv_isavendorinfo all_harts_isa_bitmap; +}; + +extern struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[]; + +extern const size_t riscv_isa_vendor_ext_list_size; + +/* + * The alternatives need some way of distinguishing between vendor extensions + * and errata. Incrementing all of the vendor extension keys so they are at + * least 0x8000 accomplishes that. + */ +#define RISCV_VENDOR_EXT_ALTERNATIVES_BASE 0x8000 + +#define VENDOR_EXT_ALL_CPUS -1 + +bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsigned int bit); +#define riscv_cpu_isa_vendor_extension_available(cpu, vendor, ext) \ + __riscv_isa_vendor_extension_available(cpu, vendor, RISCV_ISA_VENDOR_EXT_##ext) +#define riscv_isa_vendor_extension_available(vendor, ext) \ + __riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, \ + RISCV_ISA_VENDOR_EXT_##ext) + +static __always_inline bool riscv_has_vendor_extension_likely(const unsigned long vendor, + const unsigned long ext) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return false; + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_likely(vendor, + ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + + return __riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, ext); +} + +static __always_inline bool riscv_has_vendor_extension_unlikely(const unsigned long vendor, + const unsigned long ext) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return false; + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_unlikely(vendor, + ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE); + + return __riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, ext); +} + +static __always_inline bool riscv_cpu_has_vendor_extension_likely(const unsigned long vendor, + int cpu, const unsigned long ext) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return false; + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && + __riscv_has_extension_likely(vendor, ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE)) + return true; + + return __riscv_isa_vendor_extension_available(cpu, vendor, ext); +} + +static __always_inline bool riscv_cpu_has_vendor_extension_unlikely(const unsigned long vendor, + int cpu, + const unsigned long ext) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return false; + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && + __riscv_has_extension_unlikely(vendor, ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE)) + return true; + + return __riscv_isa_vendor_extension_available(cpu, vendor, ext); +} + +#endif /* _ASM_VENDOR_EXTENSIONS_H */ diff --git a/arch/riscv/include/asm/vendor_extensions/andes.h b/arch/riscv/include/asm/vendor_extensions/andes.h new file mode 100644 index 000000000000..7bb2fc43438f --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/andes.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_ANDES_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_ANDES_H + +#include <asm/vendor_extensions.h> + +#include <linux/types.h> + +#define RISCV_ISA_VENDOR_EXT_XANDESPMU 0 + +/* + * Extension keys should be strictly less than max. + * It is safe to increment this when necessary. + */ +#define RISCV_ISA_VENDOR_EXT_MAX_ANDES 32 + +extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_andes; + +#endif diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 8b8f6ac0eae2..b706c8e47b02 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -81,6 +81,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0) #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6 #define RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS 7 +#define RISCV_HWPROBE_KEY_TIME_CSR_FREQ 8 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 5b243d46f4b1..06d407f1b30b 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -58,6 +58,8 @@ obj-y += riscv_ksyms.o obj-y += stacktrace.o obj-y += cacheinfo.o obj-y += patch.o +obj-y += vendor_extensions.o +obj-y += vendor_extensions/ obj-y += probes/ obj-y += tests/ obj-$(CONFIG_MMU) += vdso.o vdso/ @@ -110,3 +112,4 @@ obj-$(CONFIG_COMPAT) += compat_vdso/ obj-$(CONFIG_64BIT) += pi/ obj-$(CONFIG_ACPI) += acpi.o +obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index e619edc8b0cc..ba957aaca5cb 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -17,7 +17,9 @@ #include <linux/efi.h> #include <linux/io.h> #include <linux/memblock.h> +#include <linux/of_fdt.h> #include <linux/pci.h> +#include <linux/serial_core.h> int acpi_noirq = 1; /* skip ACPI IRQ initialization */ int acpi_disabled = 1; @@ -131,7 +133,7 @@ void __init acpi_boot_table_init(void) if (param_acpi_off || (!param_acpi_on && !param_acpi_force && efi.acpi20 == EFI_INVALID_TABLE_ADDR)) - return; + goto done; /* * ACPI is disabled at this point. Enable it in order to parse @@ -151,6 +153,14 @@ void __init acpi_boot_table_init(void) if (!param_acpi_force) disable_acpi(); } + +done: + if (acpi_disabled) { + if (earlycon_acpi_spcr_enable) + early_init_dt_scan_chosen_stdout(); + } else { + acpi_parse_spcr(earlycon_acpi_spcr_enable, true); + } } static int acpi_parse_madt_rintc(union acpi_subtable_headers *header, const unsigned long end) @@ -191,11 +201,6 @@ struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu) return &cpu_madt_rintc[cpu]; } -u32 get_acpi_id_for_cpu(int cpu) -{ - return acpi_cpu_get_madt_rintc(cpu)->uid; -} - /* * __acpi_map_table() will be called before paging_init(), so early_ioremap() * or early_memremap() should be called here to for ACPI table mapping. diff --git a/arch/riscv/kernel/acpi_numa.c b/arch/riscv/kernel/acpi_numa.c new file mode 100644 index 000000000000..0231482d6946 --- /dev/null +++ b/arch/riscv/kernel/acpi_numa.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ACPI 6.6 based NUMA setup for RISCV + * Lots of code was borrowed from arch/arm64/kernel/acpi_numa.c + * + * Copyright 2004 Andi Kleen, SuSE Labs. + * Copyright (C) 2013-2016, Linaro Ltd. + * Author: Hanjun Guo <hanjun.guo@linaro.org> + * Copyright (C) 2024 Intel Corporation. + * + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. + * + * Called from acpi_numa_init while reading the SRAT and SLIT tables. + * Assumes all memory regions belonging to a single proximity domain + * are in one chunk. Holes between them will be included in the node. + */ + +#define pr_fmt(fmt) "ACPI: NUMA: " fmt + +#include <linux/acpi.h> +#include <linux/bitmap.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/memblock.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/topology.h> + +#include <asm/numa.h> + +static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; + +int __init acpi_numa_get_nid(unsigned int cpu) +{ + return acpi_early_node_map[cpu]; +} + +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (uid == get_acpi_id_for_cpu(cpu)) + return cpu; + + return -EINVAL; +} + +static int __init acpi_parse_rintc_pxm(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_srat_rintc_affinity *pa; + int cpu, pxm, node; + + if (srat_disabled()) + return -EINVAL; + + pa = (struct acpi_srat_rintc_affinity *)header; + if (!pa) + return -EINVAL; + + if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED)) + return 0; + + pxm = pa->proximity_domain; + node = pxm_to_node(pxm); + + /* + * If we can't map the UID to a logical cpu this + * means that the UID is not part of possible cpus + * so we do not need a NUMA mapping for it, skip + * the SRAT entry and keep parsing. + */ + cpu = get_cpu_for_acpi_id(pa->acpi_processor_uid); + if (cpu < 0) + return 0; + + acpi_early_node_map[cpu] = node; + pr_info("SRAT: PXM %d -> HARTID 0x%lx -> Node %d\n", pxm, + cpuid_to_hartid_map(cpu), node); + + return 0; +} + +void __init acpi_map_cpus_to_nodes(void) +{ + int i; + + /* + * In ACPI, SMP and CPU NUMA information is provided in separate + * static tables, namely the MADT and the SRAT. + * + * Thus, it is simpler to first create the cpu logical map through + * an MADT walk and then map the logical cpus to their node ids + * as separate steps. + */ + acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_RINTC_AFFINITY, acpi_parse_rintc_pxm, 0); + + for (i = 0; i < nr_cpu_ids; i++) + early_map_cpu_to_node(i, acpi_numa_get_nid(i)); +} + +/* Callback for Proximity Domain -> logical node ID mapping */ +void __init acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa) +{ + int pxm, node; + + if (srat_disabled()) + return; + + if (pa->header.length < sizeof(struct acpi_srat_rintc_affinity)) { + pr_err("SRAT: Invalid SRAT header length: %d\n", pa->header.length); + bad_srat(); + return; + } + + if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED)) + return; + + pxm = pa->proximity_domain; + node = acpi_map_pxm_to_node(pxm); + + if (node == NUMA_NO_NODE) { + pr_err("SRAT: Too many proximity domains %d\n", pxm); + bad_srat(); + return; + } + + node_set(node, numa_nodes_parsed); +} diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 09e9b88110d1..d6c108c50cba 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -3,6 +3,7 @@ * Copyright (C) 2017 SiFive */ +#include <linux/acpi.h> #include <linux/cpu.h> #include <linux/of.h> #include <asm/cacheinfo.h> @@ -64,7 +65,6 @@ uintptr_t get_cache_geometry(u32 level, enum cache_type type) } static void ci_leaf_init(struct cacheinfo *this_leaf, - struct device_node *node, enum cache_type type, unsigned int level) { this_leaf->level = level; @@ -79,12 +79,33 @@ int populate_cache_leaves(unsigned int cpu) struct device_node *prev = NULL; int levels = 1, level = 1; + if (!acpi_disabled) { + int ret, fw_levels, split_levels; + + ret = acpi_get_cache_info(cpu, &fw_levels, &split_levels); + if (ret) + return ret; + + BUG_ON((split_levels > fw_levels) || + (split_levels + fw_levels > this_cpu_ci->num_leaves)); + + for (; level <= this_cpu_ci->num_levels; level++) { + if (level <= split_levels) { + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + } else { + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); + } + } + return 0; + } + if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); prev = np; while ((np = of_find_next_cache_node(np))) { @@ -97,11 +118,11 @@ int populate_cache_leaves(unsigned int cpu) if (level <= levels) break; if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); levels = level; } of_node_put(np); diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index c1f3655238fd..f6b13e9f5e6c 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -16,6 +16,7 @@ #include <asm/sbi.h> #include <asm/smp.h> #include <asm/pgtable.h> +#include <asm/vendor_extensions.h> bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { @@ -235,7 +236,33 @@ arch_initcall(riscv_cpuinfo_init); #ifdef CONFIG_PROC_FS -static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) +#define ALL_CPUS -1 + +static void print_vendor_isa(struct seq_file *f, int cpu) +{ + struct riscv_isavendorinfo *vendor_bitmap; + struct riscv_isa_vendor_ext_data_list *ext_list; + const struct riscv_isa_ext_data *ext_data; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + ext_list = riscv_isa_vendor_ext_list[i]; + ext_data = riscv_isa_vendor_ext_list[i]->ext_data; + + if (cpu == ALL_CPUS) + vendor_bitmap = &ext_list->all_harts_isa_bitmap; + else + vendor_bitmap = &ext_list->per_hart_isa_bitmap[cpu]; + + for (int j = 0; j < ext_list->ext_data_count; j++) { + if (!__riscv_isa_extension_available(vendor_bitmap->isa, ext_data[j].id)) + continue; + + seq_printf(f, "_%s", ext_data[j].name); + } + } +} + +static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap, int cpu) { if (IS_ENABLED(CONFIG_32BIT)) @@ -254,6 +281,8 @@ static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) seq_printf(f, "%s", riscv_isa_ext[i].name); } + print_vendor_isa(f, cpu); + seq_puts(f, "\n"); } @@ -316,7 +345,7 @@ static int c_show(struct seq_file *m, void *v) * line. */ seq_puts(m, "isa\t\t: "); - print_isa(m, NULL); + print_isa(m, NULL, ALL_CPUS); print_mmu(m); if (acpi_disabled) { @@ -338,7 +367,7 @@ static int c_show(struct seq_file *m, void *v) * additional extensions not present across all harts. */ seq_puts(m, "hart isa\t: "); - print_isa(m, hart_isa[cpu_id].isa); + print_isa(m, hart_isa[cpu_id].isa, cpu_id); seq_puts(m, "\n"); return 0; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 0366dc3baf33..8f20607adb40 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -24,6 +24,7 @@ #include <asm/processor.h> #include <asm/sbi.h> #include <asm/vector.h> +#include <asm/vendor_extensions.h> #define NUM_ALPHA_EXTS ('z' - 'a' + 1) @@ -100,31 +101,6 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data, return 0; } -#define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \ - .name = #_name, \ - .property = #_name, \ - .id = _id, \ - .subset_ext_ids = _subset_exts, \ - .subset_ext_size = _subset_exts_size, \ - .validate = _validate \ -} - -#define __RISCV_ISA_EXT_DATA(_name, _id) _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, NULL) - -#define __RISCV_ISA_EXT_DATA_VALIDATE(_name, _id, _validate) \ - _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, _validate) - -/* Used to declare pure "lasso" extension (Zk for instance) */ -#define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \ - _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, \ - ARRAY_SIZE(_bundled_exts), NULL) - -/* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */ -#define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \ - _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), NULL) -#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \ - _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate) - static int riscv_ext_zca_depends(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -405,7 +381,6 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), - __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_EXT_XANDESPMU), }; const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext); @@ -512,6 +487,21 @@ static void __init riscv_parse_isa_string(const char *isa, unsigned long *bitmap bool ext_err = false; switch (*ext) { + case 'x': + case 'X': + if (acpi_disabled) + pr_warn_once("Vendor extensions are ignored in riscv,isa. Use riscv,isa-extensions instead."); + /* + * To skip an extension, we find its end. + * As multi-letter extensions must be split from other multi-letter + * extensions with an "_", the end of a multi-letter extension will + * either be the null character or the "_" at the start of the next + * multi-letter extension. + */ + for (; *isa && *isa != '_'; ++isa) + ; + ext_err = true; + break; case 's': /* * Workaround for invalid single-letter 's' & 'u' (QEMU). @@ -527,8 +517,6 @@ static void __init riscv_parse_isa_string(const char *isa, unsigned long *bitmap } fallthrough; case 'S': - case 'x': - case 'X': case 'z': case 'Z': /* @@ -728,6 +716,61 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) acpi_put_table((struct acpi_table_header *)rhct); } +static void __init riscv_fill_cpu_vendor_ext(struct device_node *cpu_node, int cpu) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i]; + + for (int j = 0; j < ext_list->ext_data_count; j++) { + const struct riscv_isa_ext_data ext = ext_list->ext_data[j]; + struct riscv_isavendorinfo *isavendorinfo = &ext_list->per_hart_isa_bitmap[cpu]; + + if (of_property_match_string(cpu_node, "riscv,isa-extensions", + ext.property) < 0) + continue; + + /* + * Assume that subset extensions are all members of the + * same vendor. + */ + if (ext.subset_ext_size) + for (int k = 0; k < ext.subset_ext_size; k++) + set_bit(ext.subset_ext_ids[k], isavendorinfo->isa); + + set_bit(ext.id, isavendorinfo->isa); + } + } +} + +/* + * Populate all_harts_isa_bitmap for each vendor with all of the extensions that + * are shared across CPUs for that vendor. + */ +static void __init riscv_fill_vendor_ext_list(int cpu) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i]; + + if (!ext_list->is_initialized) { + bitmap_copy(ext_list->all_harts_isa_bitmap.isa, + ext_list->per_hart_isa_bitmap[cpu].isa, + RISCV_ISA_VENDOR_EXT_MAX); + ext_list->is_initialized = true; + } else { + bitmap_and(ext_list->all_harts_isa_bitmap.isa, + ext_list->all_harts_isa_bitmap.isa, + ext_list->per_hart_isa_bitmap[cpu].isa, + RISCV_ISA_VENDOR_EXT_MAX); + } + } +} + static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) { unsigned int cpu; @@ -760,6 +803,7 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) } riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap); + riscv_fill_cpu_vendor_ext(cpu_node, cpu); of_node_put(cpu_node); @@ -776,6 +820,8 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) bitmap_copy(riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX); else bitmap_and(riscv_isa, riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX); + + riscv_fill_vendor_ext_list(cpu); } if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX)) @@ -918,28 +964,45 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, { struct alt_entry *alt; void *oldptr, *altptr; - u16 id, value; + u16 id, value, vendor; if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return; for (alt = begin; alt < end; alt++) { - if (alt->vendor_id != 0) - continue; - id = PATCH_ID_CPUFEATURE_ID(alt->patch_id); + vendor = PATCH_ID_CPUFEATURE_ID(alt->vendor_id); - if (id >= RISCV_ISA_EXT_MAX) { - WARN(1, "This extension id:%d is not in ISA extension list", id); - continue; - } + /* + * Any alternative with a patch_id that is less than + * RISCV_ISA_EXT_MAX is interpreted as a standard extension. + * + * Any alternative with patch_id that is greater than or equal + * to RISCV_VENDOR_EXT_ALTERNATIVES_BASE is interpreted as a + * vendor extension. + */ + if (id < RISCV_ISA_EXT_MAX) { + /* + * This patch should be treated as errata so skip + * processing here. + */ + if (alt->vendor_id != 0) + continue; - if (!__riscv_isa_extension_available(NULL, id)) - continue; + if (!__riscv_isa_extension_available(NULL, id)) + continue; - value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id); - if (!riscv_cpufeature_patch_check(id, value)) + value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id); + if (!riscv_cpufeature_patch_check(id, value)) + continue; + } else if (id >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE) { + if (!__riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, + id - RISCV_VENDOR_EXT_ALTERNATIVES_BASE)) + continue; + } else { + WARN(1, "This extension id:%d is not in ISA extension list", id); continue; + } oldptr = ALT_OLD_PTR(alt); altptr = ALT_ALT_PTR(alt); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 68a24cf9481a..ac2e908d4418 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -88,7 +88,6 @@ SYM_CODE_START(handle_exception) call riscv_v_context_nesting_start #endif move a0, sp /* pt_regs */ - la ra, ret_from_exception /* * MSB of cause differentiates between @@ -97,7 +96,8 @@ SYM_CODE_START(handle_exception) bge s4, zero, 1f /* Handle interrupts */ - tail do_irq + call do_irq + j ret_from_exception 1: /* Handle other exceptions */ slli t0, s4, RISCV_LGPTR @@ -105,11 +105,14 @@ SYM_CODE_START(handle_exception) la t2, excp_vect_table_end add t0, t1, t0 /* Check if exception code lies within bounds */ - bgeu t0, t2, 1f - REG_L t0, 0(t0) - jr t0 -1: - tail do_trap_unknown + bgeu t0, t2, 3f + REG_L t1, 0(t0) +2: jalr t1 + j ret_from_exception +3: + + la t1, do_trap_unknown + j 2b SYM_CODE_END(handle_exception) ASM_NOKPROBE(handle_exception) @@ -130,6 +133,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception) #endif bnez s0, 1f +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + call stackleak_erase_on_task_stack +#endif + /* Save unwound kernel stack pointer in thread_info */ addi s0, sp, PT_SIZE_ON_STACK REG_S s0, TASK_TI_KERNEL_SP(tp) diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile index 8265ff497977..d2129f2c61b8 100644 --- a/arch/riscv/kernel/probes/Makefile +++ b/arch/riscv/kernel/probes/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o -obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c deleted file mode 100644 index a69dfa610aa8..000000000000 --- a/arch/riscv/kernel/probes/ftrace.c +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <linux/kprobes.h> - -/* Ftrace callback handler for kprobes -- called under preepmt disabled */ -void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct ftrace_regs *fregs) -{ - struct kprobe *p; - struct pt_regs *regs; - struct kprobe_ctlblk *kcb; - int bit; - - if (unlikely(kprobe_ftrace_disabled)) - return; - - bit = ftrace_test_recursion_trylock(ip, parent_ip); - if (bit < 0) - return; - - p = get_kprobe((kprobe_opcode_t *)ip); - if (unlikely(!p) || kprobe_disabled(p)) - goto out; - - regs = ftrace_get_regs(fregs); - kcb = get_kprobe_ctlblk(); - if (kprobe_running()) { - kprobes_inc_nmissed_count(p); - } else { - unsigned long orig_ip = instruction_pointer(regs); - - instruction_pointer_set(regs, ip); - - __this_cpu_write(current_kprobe, p); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, regs)) { - /* - * Emulate singlestep (and also recover regs->pc) - * as if there is a nop - */ - instruction_pointer_set(regs, - (unsigned long)p->addr + MCOUNT_INSN_SIZE); - if (unlikely(p->post_handler)) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, regs, 0); - } - instruction_pointer_set(regs, orig_ip); - } - - /* - * If pre_handler returns !0, it changes regs->pc. We have to - * skip emulating post_handler. - */ - __this_cpu_write(current_kprobe, NULL); - } -out: - ftrace_test_recursion_unlock(bit); -} -NOKPROBE_SYMBOL(kprobe_ftrace_handler); - -int arch_prepare_kprobe_ftrace(struct kprobe *p) -{ - p->ainsn.api.insn = NULL; - return 0; -} diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 4f73c0ae44b2..a2cde65b69e9 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -281,8 +281,10 @@ void __init setup_arch(char **cmdline_p) setup_smp(); #endif - if (!acpi_disabled) + if (!acpi_disabled) { acpi_init_rintc_map(); + acpi_map_cpus_to_nodes(); + } riscv_init_cbo_blocksizes(); riscv_fill_hwcap(); diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 5a2edd7f027e..dcd282419456 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -84,7 +84,7 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec) datap = state + 1; /* datap is designed to be 16 byte aligned for better performance */ - WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16))); + WARN_ON(!IS_ALIGNED((unsigned long)datap, 16)); get_cpu_vector_context(); riscv_v_vstate_save(¤t->thread.vstate, regs); diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 19baf0d574d3..0f8f1c95ac38 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -96,7 +96,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un if (hart == cpuid_to_hartid_map(0)) { BUG_ON(found_boot_cpu); found_boot_cpu = true; - early_map_cpu_to_node(0, acpi_numa_get_nid(cpu_count)); return 0; } @@ -106,7 +105,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un } cpuid_to_hartid_map(cpu_count) = hart; - early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count)); cpu_count++; return 0; diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 10e311b2759d..c6d5de22463f 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -16,7 +16,7 @@ #ifdef CONFIG_FRAME_POINTER -extern asmlinkage void ret_from_exception(void); +extern asmlinkage void handle_exception(void); static inline int fp_is_valid(unsigned long fp, unsigned long sp) { @@ -71,7 +71,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame->fp; pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, &frame->ra); - if (pc == (unsigned long)ret_from_exception) { + if (pc == (unsigned long)handle_exception) { if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) break; diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 685594769535..8d1b5c35d2a7 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -9,6 +9,7 @@ #include <asm/cpufeature.h> #include <asm/hwprobe.h> #include <asm/processor.h> +#include <asm/delay.h> #include <asm/sbi.h> #include <asm/switch_to.h> #include <asm/uaccess.h> @@ -93,44 +94,45 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, * regardless of the kernel's configuration, as no other checks, besides * presence in the hart_isa bitmap, are made. */ + EXT_KEY(ZACAS); + EXT_KEY(ZAWRS); EXT_KEY(ZBA); EXT_KEY(ZBB); - EXT_KEY(ZBS); - EXT_KEY(ZICBOZ); EXT_KEY(ZBC); - EXT_KEY(ZBKB); EXT_KEY(ZBKC); EXT_KEY(ZBKX); + EXT_KEY(ZBS); + EXT_KEY(ZCA); + EXT_KEY(ZCB); + EXT_KEY(ZCMOP); + EXT_KEY(ZICBOZ); + EXT_KEY(ZICOND); + EXT_KEY(ZIHINTNTL); + EXT_KEY(ZIHINTPAUSE); + EXT_KEY(ZIMOP); EXT_KEY(ZKND); EXT_KEY(ZKNE); EXT_KEY(ZKNH); EXT_KEY(ZKSED); EXT_KEY(ZKSH); EXT_KEY(ZKT); - EXT_KEY(ZIHINTNTL); EXT_KEY(ZTSO); - EXT_KEY(ZACAS); - EXT_KEY(ZICOND); - EXT_KEY(ZIHINTPAUSE); - EXT_KEY(ZIMOP); - EXT_KEY(ZCA); - EXT_KEY(ZCB); - EXT_KEY(ZCMOP); - EXT_KEY(ZAWRS); /* * All the following extensions must depend on the kernel * support of V. */ if (has_vector()) { - EXT_KEY(ZVE32X); - EXT_KEY(ZVE32F); - EXT_KEY(ZVE64X); - EXT_KEY(ZVE64F); - EXT_KEY(ZVE64D); EXT_KEY(ZVBB); EXT_KEY(ZVBC); + EXT_KEY(ZVE32F); + EXT_KEY(ZVE32X); + EXT_KEY(ZVE64D); + EXT_KEY(ZVE64F); + EXT_KEY(ZVE64X); + EXT_KEY(ZVFH); + EXT_KEY(ZVFHMIN); EXT_KEY(ZVKB); EXT_KEY(ZVKG); EXT_KEY(ZVKNED); @@ -139,16 +141,14 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZVKSED); EXT_KEY(ZVKSH); EXT_KEY(ZVKT); - EXT_KEY(ZVFH); - EXT_KEY(ZVFHMIN); } if (has_fpu()) { - EXT_KEY(ZFH); - EXT_KEY(ZFHMIN); - EXT_KEY(ZFA); EXT_KEY(ZCD); EXT_KEY(ZCF); + EXT_KEY(ZFA); + EXT_KEY(ZFH); + EXT_KEY(ZFHMIN); } #undef EXT_KEY } @@ -237,6 +237,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, pair->value = user_max_virt_addr(); break; + case RISCV_HWPROBE_KEY_TIME_CSR_FREQ: + pair->value = riscv_timebase; + break; + /* * For forward compatibility, unknown keys don't fail the whole * call, but get their element key set to -1 and value set to 0 diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c new file mode 100644 index 000000000000..b6c1e7b5d34b --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024 Rivos, Inc + */ + +#include <asm/vendorid_list.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/andes.h> + +#include <linux/array_size.h> +#include <linux/types.h> + +struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = { +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES + &riscv_isa_vendor_ext_list_andes, +#endif +}; + +const size_t riscv_isa_vendor_ext_list_size = ARRAY_SIZE(riscv_isa_vendor_ext_list); + +/** + * __riscv_isa_vendor_extension_available() - Check whether given vendor + * extension is available or not. + * + * @cpu: check if extension is available on this cpu + * @vendor: vendor that the extension is a member of + * @bit: bit position of the desired extension + * Return: true or false + * + * NOTE: When cpu is -1, will check if extension is available on all cpus + */ +bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsigned int bit) +{ + struct riscv_isavendorinfo *bmap; + struct riscv_isavendorinfo *cpu_bmap; + + switch (vendor) { + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES + case ANDES_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_andes.all_harts_isa_bitmap; + cpu_bmap = &riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap[cpu]; + break; + #endif + default: + return false; + } + + if (cpu != -1) + bmap = &cpu_bmap[cpu]; + + if (bit >= RISCV_ISA_VENDOR_EXT_MAX) + return false; + + return test_bit(bit, bmap->isa) ? true : false; +} +EXPORT_SYMBOL_GPL(__riscv_isa_vendor_extension_available); diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile new file mode 100644 index 000000000000..6a61aed944f1 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o diff --git a/arch/riscv/kernel/vendor_extensions/andes.c b/arch/riscv/kernel/vendor_extensions/andes.c new file mode 100644 index 000000000000..ec688c88456a --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/andes.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/andes.h> + +#include <linux/array_size.h> +#include <linux/types.h> + +/* All Andes vendor extensions supported in Linux */ +const struct riscv_isa_ext_data riscv_isa_vendor_ext_andes[] = { + __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_VENDOR_EXT_XANDESPMU), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_andes = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_andes), + .ext_data = riscv_isa_vendor_ext_andes, +}; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 59e0d861e26f..a822f952f64a 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -799,17 +799,6 @@ config HAVE_PNETID menu "Virtualization" -config PROTECTED_VIRTUALIZATION_GUEST - def_bool n - prompt "Protected virtualization guest support" - help - Select this option, if you want to be able to run this - kernel as a protected virtualization KVM guest. - Protected virtualization capable machines have a mini hypervisor - located at machine level (an ultravisor). With help of the - Ultravisor, KVM will be able to run "protected" VMs, special - VMs whose memory and management data are unavailable to KVM. - config PFAULT def_bool y prompt "Pseudo page fault support" diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index c2978cb03b36..91a30e017d65 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -46,9 +46,9 @@ * /proc entries (sysctl) */ static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata"; -static int appldata_timer_handler(struct ctl_table *ctl, int write, +static int appldata_timer_handler(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); -static int appldata_interval_handler(struct ctl_table *ctl, int write, +static int appldata_interval_handler(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table_header *appldata_sysctl_header; @@ -199,7 +199,7 @@ static void __appldata_vtimer_setup(int cmd) * Start/Stop timer, show status of timer (0 = not active, 1 = active) */ static int -appldata_timer_handler(struct ctl_table *ctl, int write, +appldata_timer_handler(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int timer_active = appldata_timer_active; @@ -232,7 +232,7 @@ appldata_timer_handler(struct ctl_table *ctl, int write, * current timer interval. */ static int -appldata_interval_handler(struct ctl_table *ctl, int write, +appldata_interval_handler(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int interval = appldata_interval; @@ -262,7 +262,7 @@ appldata_interval_handler(struct ctl_table *ctl, int write, * monitoring (0 = not in process, 1 = in process) */ static int -appldata_generic_handler(struct ctl_table *ctl, int write, +appldata_generic_handler(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct appldata_ops *ops = NULL, *tmp_ops; diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index e7658997452b..4f476884d340 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -39,8 +39,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o -obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o +obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o alternative.o uv.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c new file mode 100644 index 000000000000..abc08d2c873d --- /dev/null +++ b/arch/s390/boot/alternative.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../kernel/alternative.c" diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 18027fdc92b0..83e2ce050b6c 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -30,6 +30,8 @@ struct vmlinux_info { unsigned long init_mm_off; unsigned long swapper_pg_dir_off; unsigned long invalid_pg_dir_off; + unsigned long alt_instructions; + unsigned long alt_instructions_end; #ifdef CONFIG_KASAN unsigned long kasan_early_shadow_page_off; unsigned long kasan_early_shadow_pte_off; @@ -89,8 +91,10 @@ extern char _end[], _decompressor_end[]; extern unsigned char _compressed_start[]; extern unsigned char _compressed_end[]; extern struct vmlinux_info _vmlinux_info; + #define vmlinux _vmlinux_info +#define __lowcore_pa(x) ((unsigned long)(x) % sizeof(struct lowcore)) #define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore)) #define __kernel_va(x) ((void *)((unsigned long)(x) - __kaslr_offset_phys + __kaslr_offset)) #define __kernel_pa(x) ((unsigned long)(x) - __kaslr_offset + __kaslr_offset_phys) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index a21f301acd29..1773b72a6a7b 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -3,6 +3,7 @@ #include <linux/init.h> #include <linux/ctype.h> #include <linux/pgtable.h> +#include <asm/abs_lowcore.h> #include <asm/page-states.h> #include <asm/ebcdic.h> #include <asm/sclp.h> @@ -310,5 +311,7 @@ void parse_boot_command_line(void) prot_virt_host = 1; } #endif + if (!strcmp(param, "relocate_lowcore") && test_facility(193)) + relocate_lowcore = 1; } } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index c59014945af0..ce232552bc1c 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -30,6 +30,7 @@ unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata_preserved(max_mappable); +int __bootdata_preserved(relocate_lowcore); u64 __bootdata_preserved(stfle_fac_list[16]); struct oldmem_data __bootdata_preserved(oldmem_data); @@ -376,6 +377,8 @@ static void kaslr_adjust_vmlinux_info(long offset) vmlinux.init_mm_off += offset; vmlinux.swapper_pg_dir_off += offset; vmlinux.invalid_pg_dir_off += offset; + vmlinux.alt_instructions += offset; + vmlinux.alt_instructions_end += offset; #ifdef CONFIG_KASAN vmlinux.kasan_early_shadow_page_off += offset; vmlinux.kasan_early_shadow_pte_off += offset; @@ -478,8 +481,12 @@ void startup_kernel(void) * before the kernel started. Therefore, in case the two sections * overlap there is no risk of corrupting any data. */ - if (kaslr_enabled()) - amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G); + if (kaslr_enabled()) { + unsigned long amode31_min; + + amode31_min = (unsigned long)_decompressor_end; + amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G); + } if (!amode31_lma) amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size; physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size); @@ -503,6 +510,9 @@ void startup_kernel(void) kaslr_adjust_got(__kaslr_offset); setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit); copy_bootdata(); + __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions, + (struct alt_instr *)_vmlinux_info.alt_instructions_end, + ALT_CTX_EARLY); /* * Save KASLR offset for early dumps, before vmcore_info is set. diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index 1e66d2cbb096..318e6ba95bfd 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -8,12 +8,8 @@ #include "uv.h" /* will be used in arch/s390/kernel/uv.c */ -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST int __bootdata_preserved(prot_virt_guest); -#endif -#if IS_ENABLED(CONFIG_KVM) int __bootdata_preserved(prot_virt_host); -#endif struct uv_info __bootdata_preserved(uv_info); void uv_query_info(void) @@ -53,14 +49,11 @@ void uv_query_info(void) uv_info.max_secrets = uvcb.max_secrets; } -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) && test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list)) prot_virt_guest = 1; -#endif } -#if IS_ENABLED(CONFIG_KVM) unsigned long adjust_to_uv_max(unsigned long limit) { if (is_prot_virt_host() && uv_info.max_sec_stor_addr) @@ -92,4 +85,3 @@ void sanitize_prot_virt_host(void) { prot_virt_host = is_prot_virt_host_capable(); } -#endif diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h index 0f3070856f8d..da4a4a8d48e0 100644 --- a/arch/s390/boot/uv.h +++ b/arch/s390/boot/uv.h @@ -2,21 +2,8 @@ #ifndef BOOT_UV_H #define BOOT_UV_H -#if IS_ENABLED(CONFIG_KVM) unsigned long adjust_to_uv_max(unsigned long limit); void sanitize_prot_virt_host(void); -#else -static inline unsigned long adjust_to_uv_max(unsigned long limit) -{ - return limit; -} -static inline void sanitize_prot_virt_host(void) {} -#endif - -#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) void uv_query_info(void); -#else -static inline void uv_query_info(void) {} -#endif #endif /* BOOT_UV_H */ diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index a255ca189aaa..2847cc059ab7 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -26,6 +26,7 @@ atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); enum populate_mode { POPULATE_NONE, POPULATE_DIRECT, + POPULATE_LOWCORE, POPULATE_ABS_LOWCORE, POPULATE_IDENTITY, POPULATE_KERNEL, @@ -242,6 +243,8 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m return -1; case POPULATE_DIRECT: return addr; + case POPULATE_LOWCORE: + return __lowcore_pa(addr); case POPULATE_ABS_LOWCORE: return __abs_lowcore_pa(addr); case POPULATE_KERNEL: @@ -418,6 +421,7 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit) { + unsigned long lowcore_address = 0; unsigned long start, end; unsigned long asce_type; unsigned long asce_bits; @@ -455,12 +459,17 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); + if (relocate_lowcore) + lowcore_address = LOWCORE_ALT_ADDRESS; + /* * To allow prefixing the lowcore must be mapped with 4KB pages. * To prevent creation of a large page at address 0 first map * the lowcore and create the identity mapping only afterwards. */ - pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT); + pgtable_populate(lowcore_address, + lowcore_address + sizeof(struct lowcore), + POPULATE_LOWCORE); for_each_physmem_usable_range(i, &start, &end) { pgtable_populate((unsigned long)__identity_va(start), (unsigned long)__identity_va(end), diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index f3602414a961..ea63a7342f5f 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -55,7 +55,6 @@ CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m -CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_S390_HYPFS_FS=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index d0d8925fdf09..d8b28ff8ff45 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -53,7 +53,6 @@ CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m -CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_S390_HYPFS_FS=y diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h index 6f264b79e377..d20df8c923fc 100644 --- a/arch/s390/include/asm/abs_lowcore.h +++ b/arch/s390/include/asm/abs_lowcore.h @@ -2,6 +2,7 @@ #ifndef _ASM_S390_ABS_LOWCORE_H #define _ASM_S390_ABS_LOWCORE_H +#include <asm/sections.h> #include <asm/lowcore.h> #define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore)) @@ -24,4 +25,11 @@ static inline void put_abs_lowcore(struct lowcore *lc) put_cpu(); } +extern int __bootdata_preserved(relocate_lowcore); + +static inline int have_relocated_lowcore(void) +{ + return relocate_lowcore; +} + #endif /* _ASM_S390_ABS_LOWCORE_H */ diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h deleted file mode 100644 index 608f6287ca9c..000000000000 --- a/arch/s390/include/asm/alternative-asm.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_S390_ALTERNATIVE_ASM_H -#define _ASM_S390_ALTERNATIVE_ASM_H - -#ifdef __ASSEMBLY__ - -/* - * Issue one struct alt_instr descriptor entry (need to put it into - * the section .altinstructions, see below). This entry contains - * enough information for the alternatives patching code to patch an - * instruction. See apply_alternatives(). - */ -.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature - .long \orig_start - . - .long \alt_start - . - .word \feature - .byte \orig_end - \orig_start - .org . - ( \orig_end - \orig_start ) & 1 - .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start ) - .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start ) -.endm - -/* - * Define an alternative between two instructions. If @feature is - * present, early code in apply_alternatives() replaces @oldinstr with - * @newinstr. - */ -.macro ALTERNATIVE oldinstr, newinstr, feature - .pushsection .altinstr_replacement,"ax" -770: \newinstr -771: .popsection -772: \oldinstr -773: .pushsection .altinstructions,"a" - alt_entry 772b, 773b, 770b, 771b, \feature - .popsection -.endm - -/* - * Define an alternative between two instructions. If @feature is - * present, early code in apply_alternatives() replaces @oldinstr with - * @newinstr. - */ -.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 - .pushsection .altinstr_replacement,"ax" -770: \newinstr1 -771: \newinstr2 -772: .popsection -773: \oldinstr -774: .pushsection .altinstructions,"a" - alt_entry 773b, 774b, 770b, 771b,\feature1 - alt_entry 773b, 774b, 771b, 772b,\feature2 - .popsection -.endm - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_S390_ALTERNATIVE_ASM_H */ diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index dd93b92c3ab6..de980c938a3e 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -2,6 +2,58 @@ #ifndef _ASM_S390_ALTERNATIVE_H #define _ASM_S390_ALTERNATIVE_H +/* + * Each alternative comes with a 32 bit feature field: + * union { + * u32 feature; + * struct { + * u32 ctx : 4; + * u32 type : 8; + * u32 data : 20; + * }; + * } + * + * @ctx is a bitfield, where only one bit must be set. Each bit defines + * in which context an alternative is supposed to be applied to the + * kernel image: + * + * - from the decompressor before the kernel itself is executed + * - from early kernel code from within the kernel + * + * @type is a number which defines the type and with that the type + * specific alternative patching. + * + * @data is additional type specific information which defines if an + * alternative should be applied. + */ + +#define ALT_CTX_EARLY 1 +#define ALT_CTX_LATE 2 +#define ALT_CTX_ALL (ALT_CTX_EARLY | ALT_CTX_LATE) + +#define ALT_TYPE_FACILITY 0 +#define ALT_TYPE_SPEC 1 +#define ALT_TYPE_LOWCORE 2 + +#define ALT_DATA_SHIFT 0 +#define ALT_TYPE_SHIFT 20 +#define ALT_CTX_SHIFT 28 + +#define ALT_FACILITY_EARLY(facility) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ + ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ + (facility) << ALT_DATA_SHIFT) + +#define ALT_FACILITY(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \ + ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ + (facility) << ALT_DATA_SHIFT) + +#define ALT_SPEC(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \ + ALT_TYPE_SPEC << ALT_TYPE_SHIFT | \ + (facility) << ALT_DATA_SHIFT) + +#define ALT_LOWCORE (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ + ALT_TYPE_LOWCORE << ALT_TYPE_SHIFT) + #ifndef __ASSEMBLY__ #include <linux/types.h> @@ -11,12 +63,30 @@ struct alt_instr { s32 instr_offset; /* original instruction */ s32 repl_offset; /* offset to replacement instruction */ - u16 facility; /* facility bit set for replacement */ + union { + u32 feature; /* feature required for replacement */ + struct { + u32 ctx : 4; /* context */ + u32 type : 8; /* type of alternative */ + u32 data : 20; /* patching information */ + }; + }; u8 instrlen; /* length of original instruction */ } __packed; -void apply_alternative_instructions(void); -void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + +void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx); + +static inline void apply_alternative_instructions(void) +{ + __apply_alternatives(__alt_instructions, __alt_instructions_end, ALT_CTX_LATE); +} + +static inline void apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ + __apply_alternatives(start, end, ALT_CTX_ALL); +} /* * +---------------------------------+ @@ -48,10 +118,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); #define OLDINSTR(oldinstr) \ "661:\n\t" oldinstr "\n662:\n" -#define ALTINSTR_ENTRY(facility, num) \ +#define ALTINSTR_ENTRY(feature, num) \ "\t.long 661b - .\n" /* old instruction */ \ "\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \ - "\t.word " __stringify(facility) "\n" /* facility bit */ \ + "\t.long " __stringify(feature) "\n" /* feature */ \ "\t.byte " oldinstr_len "\n" /* instruction len */ \ "\t.org . - (" oldinstr_len ") & 1\n" \ "\t.org . - (" oldinstr_len ") + (" altinstr_len(num) ")\n" \ @@ -61,24 +131,24 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" /* alternative assembly primitive: */ -#define ALTERNATIVE(oldinstr, altinstr, facility) \ +#define ALTERNATIVE(oldinstr, altinstr, feature) \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(altinstr, 1) \ ".popsection\n" \ OLDINSTR(oldinstr) \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(facility, 1) \ + ALTINSTR_ENTRY(feature, 1) \ ".popsection\n" -#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2)\ +#define ALTERNATIVE_2(oldinstr, altinstr1, feature1, altinstr2, feature2)\ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(altinstr1, 1) \ ALTINSTR_REPLACEMENT(altinstr2, 2) \ ".popsection\n" \ OLDINSTR(oldinstr) \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(facility1, 1) \ - ALTINSTR_ENTRY(facility2, 2) \ + ALTINSTR_ENTRY(feature1, 1) \ + ALTINSTR_ENTRY(feature2, 2) \ ".popsection\n" /* @@ -93,12 +163,12 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); * For non barrier like inlines please define new variants * without volatile and memory clobber. */ -#define alternative(oldinstr, altinstr, facility) \ - asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory") +#define alternative(oldinstr, altinstr, feature) \ + asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) : : : "memory") -#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \ - asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \ - altinstr2, facility2) ::: "memory") +#define alternative_2(oldinstr, altinstr1, feature1, altinstr2, feature2) \ + asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, feature1, \ + altinstr2, feature2) ::: "memory") /* Alternative inline assembly with input. */ #define alternative_input(oldinstr, newinstr, feature, input...) \ @@ -106,8 +176,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); : : input) /* Like alternative_input, but with a single output argument */ -#define alternative_io(oldinstr, altinstr, facility, output, input...) \ - asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) \ +#define alternative_io(oldinstr, altinstr, feature, output, input...) \ + asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) \ : output : input) /* Use this macro if more than one output parameter is needed. */ @@ -116,6 +186,56 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); /* Use this macro if clobbers are needed without inputs. */ #define ASM_NO_INPUT_CLOBBER(clobber...) : clobber +#else /* __ASSEMBLY__ */ + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature + .long \orig_start - . + .long \alt_start - . + .long \feature + .byte \orig_end - \orig_start + .org . - ( \orig_end - \orig_start ) & 1 + .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start ) + .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start ) +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature + .pushsection .altinstr_replacement,"ax" +770: \newinstr +771: .popsection +772: \oldinstr +773: .pushsection .altinstructions,"a" + alt_entry 772b, 773b, 770b, 771b, \feature + .popsection +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 + .pushsection .altinstr_replacement,"ax" +770: \newinstr1 +771: \newinstr2 +772: .popsection +773: \oldinstr +774: .pushsection .altinstructions,"a" + alt_entry 773b, 774b, 770b, 771b,\feature1 + alt_entry 773b, 774b, 771b, 772b,\feature2 + .popsection +.endm + #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_ALTERNATIVE_H */ diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 2b379d1d9046..742c7919cbcd 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -188,7 +188,8 @@ static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new) return old; } -#ifdef __GCC_ASM_FLAG_OUTPUTS__ +/* GCC versions before 14.2.0 may die with an ICE in some configurations. */ +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_IS_GCC) && (GCC_VERSION < 140200)) static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) { diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 436365ff6c19..e3afcece375e 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -210,7 +210,7 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *); #define get_ccwdev_lock(x) (x)->ccwlock #define to_ccwdev(n) container_of(n, struct ccw_device, dev) -#define to_ccwdrv(n) container_of(n, struct ccw_driver, driver) +#define to_ccwdrv(n) container_of_const(n, struct ccw_driver, driver) extern struct ccw_device *ccw_device_create_console(struct ccw_driver *); extern void ccw_device_destroy_console(struct ccw_device *); diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index d46cc725f024..b7d234838a36 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -20,7 +20,6 @@ #define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8) extern u64 stfle_fac_list[16]; -extern u64 alt_stfle_fac_list[16]; static inline void __set_facility(unsigned long nr, void *facilities) { diff --git a/arch/s390/include/asm/kmsan.h b/arch/s390/include/asm/kmsan.h index 27db65fbf3f6..f73e181d09ae 100644 --- a/arch/s390/include/asm/kmsan.h +++ b/arch/s390/include/asm/kmsan.h @@ -12,8 +12,8 @@ static inline bool is_lowcore_addr(void *addr) { - return addr >= (void *)&S390_lowcore && - addr < (void *)(&S390_lowcore + 1); + return addr >= (void *)get_lowcore() && + addr < (void *)(get_lowcore() + 1); } static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin) @@ -25,7 +25,7 @@ static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin) * order to get a distinct struct page. */ addr += (void *)lowcore_ptr[raw_smp_processor_id()] - - (void *)&S390_lowcore; + (void *)get_lowcore(); if (KMSAN_WARN_ON(is_lowcore_addr(addr))) return NULL; return kmsan_get_metadata(addr, is_origin); diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index c724e71e1785..183ac29afaf8 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -14,10 +14,15 @@ #include <asm/ctlreg.h> #include <asm/cpu.h> #include <asm/types.h> +#include <asm/alternative.h> #define LC_ORDER 1 #define LC_PAGES 2 +#define LOWCORE_ALT_ADDRESS _AC(0x70000, UL) + +#ifndef __ASSEMBLY__ + struct pgm_tdb { u64 data[32]; }; @@ -97,8 +102,7 @@ struct lowcore { __u64 save_area_async[8]; /* 0x0240 */ __u64 save_area_restart[1]; /* 0x0280 */ - /* CPU flags. */ - __u64 cpu_flags; /* 0x0288 */ + __u64 pcpu; /* 0x0288 */ /* Return psws. */ psw_t return_psw; /* 0x0290 */ @@ -215,7 +219,14 @@ struct lowcore { static __always_inline struct lowcore *get_lowcore(void) { - return NULL; + struct lowcore *lc; + + if (__is_defined(__DECOMPRESSOR)) + return NULL; + asm(ALTERNATIVE("llilh %[lc],0", "llilh %[lc],%[alt]", ALT_LOWCORE) + : [lc] "=d" (lc) + : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16)); + return lc; } extern struct lowcore *lowcore_ptr[]; @@ -225,4 +236,19 @@ static inline void set_prefix(__u32 address) asm volatile("spx %0" : : "Q" (address) : "memory"); } +#else /* __ASSEMBLY__ */ + +.macro GET_LC reg + ALTERNATIVE "llilh \reg,0", \ + __stringify(llilh \reg, LOWCORE_ALT_ADDRESS >> 16), \ + ALT_LOWCORE +.endm + +.macro STMG_LC start, end, savearea + ALTERNATIVE "stmg \start, \end, \savearea", \ + __stringify(stmg \start, \end, LOWCORE_ALT_ADDRESS + \savearea), \ + ALT_LOWCORE +.endm + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_LOWCORE_H */ diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h index b9c1f3cae842..192835a3e24d 100644 --- a/arch/s390/include/asm/nospec-branch.h +++ b/arch/s390/include/asm/nospec-branch.h @@ -5,8 +5,17 @@ #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <asm/facility.h> extern int nospec_disable; +extern int nobp; + +static inline bool nobp_enabled(void) +{ + if (__is_defined(__DECOMPRESSOR)) + return false; + return nobp && test_facility(82); +} void nospec_init_branches(void); void nospec_auto_detect(void); diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 5ec41ec3d761..06416b3f94f5 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -174,12 +174,10 @@ static inline int devmem_is_allowed(unsigned long pfn) #define HAVE_ARCH_FREE_PAGE #define HAVE_ARCH_ALLOC_PAGE -#if IS_ENABLED(CONFIG_PGSTE) int arch_make_folio_accessible(struct folio *folio); #define HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE int arch_make_page_accessible(struct page *page); #define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE -#endif struct vm_layout { unsigned long kaslr_offset; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index c87cf2b8e81a..5ecd442535b9 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,13 +14,11 @@ #include <linux/bits.h> -#define CIF_SIE 0 /* CPU needs SIE exit cleanup */ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define CIF_ENABLED_WAIT 5 /* in enabled wait state */ #define CIF_MCCK_GUEST 6 /* machine check happening in guest */ #define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */ -#define _CIF_SIE BIT(CIF_SIE) #define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) #define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT) #define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST) @@ -42,21 +40,37 @@ #include <asm/irqflags.h> #include <asm/alternative.h> +struct pcpu { + unsigned long ec_mask; /* bit mask for ec_xxx functions */ + unsigned long ec_clk; /* sigp timestamp for ec_xxx */ + unsigned long flags; /* per CPU flags */ + signed char state; /* physical cpu state */ + signed char polarization; /* physical polarization */ + u16 address; /* physical cpu address */ +}; + +DECLARE_PER_CPU(struct pcpu, pcpu_devices); + typedef long (*sys_call_ptr_t)(struct pt_regs *regs); +static __always_inline struct pcpu *this_pcpu(void) +{ + return (struct pcpu *)(get_lowcore()->pcpu); +} + static __always_inline void set_cpu_flag(int flag) { - get_lowcore()->cpu_flags |= (1UL << flag); + this_pcpu()->flags |= (1UL << flag); } static __always_inline void clear_cpu_flag(int flag) { - get_lowcore()->cpu_flags &= ~(1UL << flag); + this_pcpu()->flags &= ~(1UL << flag); } static __always_inline bool test_cpu_flag(int flag) { - return get_lowcore()->cpu_flags & (1UL << flag); + return this_pcpu()->flags & (1UL << flag); } static __always_inline bool test_and_set_cpu_flag(int flag) @@ -81,9 +95,7 @@ static __always_inline bool test_and_clear_cpu_flag(int flag) */ static __always_inline bool test_cpu_flag_of(int flag, int cpu) { - struct lowcore *lc = lowcore_ptr[cpu]; - - return lc->cpu_flags & (1UL << flag); + return per_cpu(pcpu_devices, cpu).flags & (1UL << flag); } #define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY) @@ -405,7 +417,7 @@ static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) static __always_inline void bpon(void) { - asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", 82)); + asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82))); } #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/runtime-const.h b/arch/s390/include/asm/runtime-const.h new file mode 100644 index 000000000000..17878b1d048c --- /dev/null +++ b/arch/s390/include/asm/runtime-const.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_RUNTIME_CONST_H +#define _ASM_S390_RUNTIME_CONST_H + +#include <linux/uaccess.h> + +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret; \ + \ + asm_inline( \ + "0: iihf %[__ret],%[c1]\n" \ + " iilf %[__ret],%[c2]\n" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n" \ + ".long 0b - .\n" \ + ".popsection" \ + : [__ret] "=d" (__ret) \ + : [c1] "i" (0x01234567UL), \ + [c2] "i" (0x89abcdefUL)); \ + __ret; \ +}) + +#define runtime_const_shift_right_32(val, sym) \ +({ \ + unsigned int __ret = (val); \ + \ + asm_inline( \ + "0: srl %[__ret],12\n" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n" \ + ".long 0b - .\n" \ + ".popsection" \ + : [__ret] "+d" (__ret)); \ + __ret; \ +}) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* 32-bit immediate for iihf and iilf in bits in I2 field */ +static inline void __runtime_fixup_32(u32 *p, unsigned int val) +{ + s390_kernel_write(p, &val, sizeof(val)); +} + +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + __runtime_fixup_32(where + 2, val >> 32); + __runtime_fixup_32(where + 8, val); +} + +/* Immediate value is lower 12 bits of D2 field of srl */ +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + u32 insn = *(u32 *)where; + + insn &= 0xfffff000; + insn |= (val & 63); + s390_kernel_write(where, &insn, sizeof(insn)); +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif /* _ASM_S390_RUNTIME_CONST_H */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index c13c79025348..cd835f4fb11a 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -24,7 +24,6 @@ extern int __cpu_up(unsigned int cpu, struct task_struct *tidle); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -extern void smp_call_online_cpu(void (*func)(void *), void *); extern void smp_call_ipl_cpu(void (*func)(void *), void *); extern void smp_emergency_stop(void); diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 3e43c90ff135..77d5e804af93 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -79,7 +79,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) typecheck(int, lp->lock); kcsan_release(); asm_inline volatile( - ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */ + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */ " sth %1,%0\n" : "=R" (((unsigned short *) &lp->lock)[1]) : "d" (0) : "cc", "memory"); diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index d02a709717b8..00ac01874a12 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -40,6 +40,7 @@ struct thread_info { unsigned long flags; /* low level flags */ unsigned long syscall_work; /* SYSCALL_WORK_ flags */ unsigned int cpu; /* current CPU */ + unsigned char sie; /* running in SIE context */ }; /* diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 9213be0529ee..a81f897a81ce 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -332,7 +332,14 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return __clear_user(to, n); } -void *s390_kernel_write(void *dst, const void *src, size_t size); +void *__s390_kernel_write(void *dst, const void *src, size_t size); + +static inline void *s390_kernel_write(void *dst, const void *src, size_t size) +{ + if (__is_defined(__DECOMPRESSOR)) + return memcpy(dst, src, size); + return __s390_kernel_write(dst, src, size); +} int __noreturn __put_kernel_bad(void); diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 0679445cac0b..0b5f8f3e84f1 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -414,7 +414,6 @@ static inline bool uv_has_feature(u8 feature_bit) return test_bit_inv(feature_bit, &uv_info.uv_feature_indications); } -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST extern int prot_virt_guest; static inline int is_prot_virt_guest(void) @@ -466,13 +465,6 @@ static inline int uv_remove_shared(unsigned long addr) return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS); } -#else -#define is_prot_virt_guest() 0 -static inline int uv_set_shared(unsigned long addr) { return 0; } -static inline int uv_remove_shared(unsigned long addr) { return 0; } -#endif - -#if IS_ENABLED(CONFIG_KVM) extern int prot_virt_host; static inline int is_prot_virt_host(void) @@ -489,29 +481,5 @@ int uv_convert_from_secure_pte(pte_t pte); int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); void setup_uv(void); -#else -#define is_prot_virt_host() 0 -static inline void setup_uv(void) {} - -static inline int uv_pin_shared(unsigned long paddr) -{ - return 0; -} - -static inline int uv_destroy_folio(struct folio *folio) -{ - return 0; -} - -static inline int uv_destroy_pte(pte_t pte) -{ - return 0; -} - -static inline int uv_convert_from_secure_pte(pte_t pte) -{ - return 0; -} -#endif #endif /* _ASM_S390_UV_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 7241fa194709..e47a4be54ff8 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -43,7 +43,7 @@ obj-y += sysinfo.o lgr.o os_info.o ctlreg.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o -obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o +obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o extra-y += vmlinux.lds @@ -80,7 +80,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o obj-$(CONFIG_TRACEPOINTS) += trace.o -obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o # vdso obj-y += vdso64/ diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c index f9efc54ec4b7..09cd24cbe74e 100644 --- a/arch/s390/kernel/abs_lowcore.c +++ b/arch/s390/kernel/abs_lowcore.c @@ -4,6 +4,7 @@ #include <asm/abs_lowcore.h> unsigned long __bootdata_preserved(__abs_lowcore); +int __bootdata_preserved(relocate_lowcore); int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc) { diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 1ac5f707dd70..8d5d0de35de0 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,68 +1,41 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/module.h> -#include <linux/cpu.h> -#include <linux/smp.h> -#include <asm/text-patching.h> + +#include <linux/uaccess.h> +#include <asm/nospec-branch.h> +#include <asm/abs_lowcore.h> #include <asm/alternative.h> #include <asm/facility.h> -#include <asm/nospec-branch.h> - -static int __initdata_or_module alt_instr_disabled; - -static int __init disable_alternative_instructions(char *str) -{ - alt_instr_disabled = 1; - return 0; -} - -early_param("noaltinstr", disable_alternative_instructions); -static void __init_or_module __apply_alternatives(struct alt_instr *start, - struct alt_instr *end) +void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx) { - struct alt_instr *a; u8 *instr, *replacement; + struct alt_instr *a; + bool replace; /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. */ for (a = start; a < end; a++) { + if (!(a->ctx & ctx)) + continue; + switch (a->type) { + case ALT_TYPE_FACILITY: + replace = test_facility(a->data); + break; + case ALT_TYPE_SPEC: + replace = nobp_enabled(); + break; + case ALT_TYPE_LOWCORE: + replace = have_relocated_lowcore(); + break; + default: + replace = false; + } + if (!replace) + continue; instr = (u8 *)&a->instr_offset + a->instr_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset; - - if (!__test_facility(a->facility, alt_stfle_fac_list)) - continue; s390_kernel_write(instr, replacement, a->instrlen); } } - -void __init_or_module apply_alternatives(struct alt_instr *start, - struct alt_instr *end) -{ - if (!alt_instr_disabled) - __apply_alternatives(start, end); -} - -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; -void __init apply_alternative_instructions(void) -{ - apply_alternatives(__alt_instructions, __alt_instructions_end); -} - -static void do_sync_core(void *info) -{ - sync_core(); -} - -void text_poke_sync(void) -{ - on_each_cpu(do_sync_core, NULL, 1); -} - -void text_poke_sync_lock(void) -{ - cpus_read_lock(); - text_poke_sync(); - cpus_read_unlock(); -} diff --git a/arch/s390/kernel/alternative.h b/arch/s390/kernel/alternative.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/arch/s390/kernel/alternative.h diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 26bb45d0e6f1..ffa0dd2dbaac 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -28,6 +28,7 @@ int main(void) BLANK(); /* thread info offsets */ OFFSET(__TI_flags, task_struct, thread_info.flags); + OFFSET(__TI_sie, task_struct, thread_info.sie); BLANK(); /* pt_regs offsets */ OFFSET(__PT_PSW, pt_regs, psw); @@ -114,7 +115,7 @@ int main(void) OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync); OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async); OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart); - OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags); + OFFSET(__LC_PCPU, lowcore, pcpu); OFFSET(__LC_RETURN_PSW, lowcore, return_psw); OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw); OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer); @@ -186,5 +187,7 @@ int main(void) #endif OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs); DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs)); + + OFFSET(__PCPU_FLAGS, pcpu, flags); return 0; } diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 85328a0ef3b6..bce50ca75ea7 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -954,7 +954,7 @@ static int debug_active = 1; * always allow read, allow write only if debug_stoppable is set or * if debug_active is already off */ -static int s390dbf_procactive(struct ctl_table *table, int write, +static int s390dbf_procactive(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 467ed4dba817..14d324865e33 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -48,6 +48,7 @@ decompressor_handled_param(dfltcc); decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); decompressor_handled_param(cmma); +decompressor_handled_param(relocate_lowcore); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif @@ -190,13 +191,6 @@ static noinline __init void setup_lowcore_early(void) get_lowcore()->preempt_count = INIT_PREEMPT_COUNT; } -static noinline __init void setup_facility_list(void) -{ - memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list)); - if (!IS_ENABLED(CONFIG_KERNEL_NOBP)) - __clear_facility(82, alt_stfle_fac_list); -} - static __init void detect_diag9c(void) { unsigned int cpu_address; @@ -291,7 +285,6 @@ void __init startup_init(void) lockdep_off(); sort_amode31_extable(); setup_lowcore_early(); - setup_facility_list(); detect_machine_type(); setup_arch_string(); setup_boot_command_line(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 454b6b92c7f8..749410cfdbc0 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -12,7 +12,7 @@ #include <linux/init.h> #include <linux/linkage.h> #include <asm/asm-extable.h> -#include <asm/alternative-asm.h> +#include <asm/alternative.h> #include <asm/processor.h> #include <asm/cache.h> #include <asm/dwarf.h> @@ -28,49 +28,54 @@ #include <asm/setup.h> #include <asm/nmi.h> #include <asm/nospec-insn.h> +#include <asm/lowcore.h> _LPP_OFFSET = __LC_LPP .macro STBEAR address - ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193 + ALTERNATIVE "nop", ".insn s,0xb2010000,\address", ALT_FACILITY(193) .endm .macro LBEAR address - ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193 + ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193) .endm - .macro LPSWEY address,lpswe - ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193 + .macro LPSWEY address, lpswe + ALTERNATIVE_2 "b \lpswe;nopr", \ + ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY_EARLY(193), \ + __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \ + ALT_LOWCORE .endm - .macro MBEAR reg - ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193 + .macro MBEAR reg, lowcore + ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK(\lowcore)),\ + ALT_FACILITY(193) .endm - .macro CHECK_STACK savearea + .macro CHECK_STACK savearea, lowcore #ifdef CONFIG_CHECK_STACK tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD - lghi %r14,\savearea + la %r14,\savearea(\lowcore) jz stack_overflow #endif .endm - .macro CHECK_VMAP_STACK savearea,oklabel + .macro CHECK_VMAP_STACK savearea, lowcore, oklabel #ifdef CONFIG_VMAP_STACK lgr %r14,%r15 nill %r14,0x10000 - THREAD_SIZE oill %r14,STACK_INIT_OFFSET - clg %r14,__LC_KERNEL_STACK + clg %r14,__LC_KERNEL_STACK(\lowcore) je \oklabel - clg %r14,__LC_ASYNC_STACK + clg %r14,__LC_ASYNC_STACK(\lowcore) je \oklabel - clg %r14,__LC_MCCK_STACK + clg %r14,__LC_MCCK_STACK(\lowcore) je \oklabel - clg %r14,__LC_NODAT_STACK + clg %r14,__LC_NODAT_STACK(\lowcore) je \oklabel - clg %r14,__LC_RESTART_STACK + clg %r14,__LC_RESTART_STACK(\lowcore) je \oklabel - lghi %r14,\savearea + la %r14,\savearea(\lowcore) j stack_overflow #else j \oklabel @@ -100,30 +105,31 @@ _LPP_OFFSET = __LC_LPP .endm .macro BPOFF - ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82 + ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_SPEC(82) .endm .macro BPON - ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82 + ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82) .endm .macro BPENTER tif_ptr,tif_mask ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \ - "j .+12; nop; nop", 82 + "j .+12; nop; nop", ALT_SPEC(82) .endm .macro BPEXIT tif_ptr,tif_mask TSTMSK \tif_ptr,\tif_mask ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \ - "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82 + "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82) .endm #if IS_ENABLED(CONFIG_KVM) - .macro SIEEXIT sie_control - lg %r9,\sie_control # get control block pointer - ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce - ni __LC_CPU_FLAGS+7,255-_CIF_SIE + .macro SIEEXIT sie_control,lowcore + lg %r9,\sie_control # get control block pointer + ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce + lg %r9,__LC_CURRENT(\lowcore) + mvi __TI_sie(%r9),0 larl %r9,sie_exit # skip forward to sie_exit .endm #endif @@ -163,13 +169,14 @@ SYM_FUNC_START(__switch_to_asm) stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev lg %r15,0(%r4,%r3) # start of kernel stack of next agr %r15,%r5 # end of kernel stack of next - stg %r3,__LC_CURRENT # store task struct of next - stg %r15,__LC_KERNEL_STACK # store end of kernel stack + GET_LC %r13 + stg %r3,__LC_CURRENT(%r13) # store task struct of next + stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next aghi %r3,__TASK_pid - mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next + mvc __LC_CURRENT_PID(4,%r13),0(%r3) # store pid of next + ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40) lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task - ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40 BR_EX %r14 SYM_FUNC_END(__switch_to_asm) @@ -183,15 +190,16 @@ SYM_FUNC_END(__switch_to_asm) */ SYM_FUNC_START(__sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers - lg %r12,__LC_CURRENT + GET_LC %r13 + lg %r14,__LC_CURRENT(%r13) stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical.. stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 - mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags + mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags lmg %r0,%r13,0(%r4) # load guest gprs 0-13 - oi __LC_CPU_FLAGS+7,_CIF_SIE + mvi __TI_sie(%r14),1 lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now @@ -210,8 +218,10 @@ SYM_FUNC_START(__sie64a) .Lsie_skip: lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce - ni __LC_CPU_FLAGS+7,255-_CIF_SIE + GET_LC %r14 + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce + lg %r14,__LC_CURRENT(%r14) + mvi __TI_sie(%r14),0 # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. @@ -254,14 +264,15 @@ EXPORT_SYMBOL(sie_exit) */ SYM_CODE_START(system_call) - stpt __LC_SYS_ENTER_TIMER - stmg %r8,%r15,__LC_SAVE_AREA_SYNC + STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC + GET_LC %r13 + stpt __LC_SYS_ENTER_TIMER(%r13) BPOFF lghi %r14,0 .Lsysc_per: - STBEAR __LC_LAST_BREAK - lctlg %c1,%c1,__LC_KERNEL_ASCE - lg %r15,__LC_KERNEL_STACK + STBEAR __LC_LAST_BREAK(%r13) + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) + lg %r15,__LC_KERNEL_STACK(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15) # clear user controlled register to prevent speculative use @@ -276,17 +287,17 @@ SYM_CODE_START(system_call) xgr %r10,%r10 xgr %r11,%r11 la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs - mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC - MBEAR %r2 + mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC(%r13) + MBEAR %r2,%r13 lgr %r3,%r14 brasl %r14,__do_syscall STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE - mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lctlg %c1,%c1,__LC_USER_ASCE(%r13) + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) + stpt __LC_EXIT_TIMER(%r13) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) - stpt __LC_EXIT_TIMER LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE SYM_CODE_END(system_call) @@ -297,12 +308,13 @@ SYM_CODE_START(ret_from_fork) lgr %r3,%r11 brasl %r14,__ret_from_fork STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE - mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + GET_LC %r13 + lctlg %c1,%c1,__LC_USER_ASCE(%r13) + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) + stpt __LC_EXIT_TIMER(%r13) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) - stpt __LC_EXIT_TIMER LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE SYM_CODE_END(ret_from_fork) @@ -311,39 +323,40 @@ SYM_CODE_END(ret_from_fork) */ SYM_CODE_START(pgm_check_handler) - stpt __LC_SYS_ENTER_TIMER + STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC + GET_LC %r13 + stpt __LC_SYS_ENTER_TIMER(%r13) BPOFF - stmg %r8,%r15,__LC_SAVE_AREA_SYNC lgr %r10,%r15 - lmg %r8,%r9,__LC_PGM_OLD_PSW + lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13) tmhh %r8,0x0001 # coming from user space? jno .Lpgm_skip_asce - lctlg %c1,%c1,__LC_KERNEL_ASCE + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) j 3f # -> fault in user space .Lpgm_skip_asce: 1: tmhh %r8,0x4000 # PER bit set in old PSW ? jnz 2f # -> enabled, can't be a double fault - tm __LC_PGM_ILC+3,0x80 # check for per exception + tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -2: CHECK_STACK __LC_SAVE_AREA_SYNC +2: CHECK_STACK __LC_SAVE_AREA_SYNC,%r13 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) # CHECK_VMAP_STACK branches to stack_overflow or 4f - CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f -3: lg %r15,__LC_KERNEL_STACK + CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,%r13,4f +3: lg %r15,__LC_KERNEL_STACK(%r13) 4: la %r11,STACK_FRAME_OVERHEAD(%r15) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC(%r13) + mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13) stctg %c1,%c1,__PT_CR1(%r11) #if IS_ENABLED(CONFIG_KVM) - ltg %r12,__LC_GMAP + ltg %r12,__LC_GMAP(%r13) jz 5f clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11) jne 5f BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r10) + SIEEXIT __SF_SIE_CONTROL(%r10),%r13 #endif 5: stmg %r8,%r9,__PT_PSW(%r11) # clear user controlled registers to prevent speculative use @@ -359,11 +372,11 @@ SYM_CODE_START(pgm_check_handler) tmhh %r8,0x0001 # returning to user space? jno .Lpgm_exit_kernel STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE + lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON - stpt __LC_EXIT_TIMER + stpt __LC_EXIT_TIMER(%r13) .Lpgm_exit_kernel: - mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE @@ -372,11 +385,11 @@ SYM_CODE_START(pgm_check_handler) # single stepped system call # .Lpgm_svcper: - mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW + mvc __LC_RETURN_PSW(8,%r13),__LC_SVC_NEW_PSW(%r13) larl %r14,.Lsysc_per - stg %r14,__LC_RETURN_PSW+8 + stg %r14,__LC_RETURN_PSW+8(%r13) lghi %r14,1 - LBEAR __LC_PGM_LAST_BREAK + LBEAR __LC_PGM_LAST_BREAK(%r13) LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per SYM_CODE_END(pgm_check_handler) @@ -385,25 +398,27 @@ SYM_CODE_END(pgm_check_handler) */ .macro INT_HANDLER name,lc_old_psw,handler SYM_CODE_START(\name) - stckf __LC_INT_CLOCK - stpt __LC_SYS_ENTER_TIMER - STBEAR __LC_LAST_BREAK + STMG_LC %r8,%r15,__LC_SAVE_AREA_ASYNC + GET_LC %r13 + stckf __LC_INT_CLOCK(%r13) + stpt __LC_SYS_ENTER_TIMER(%r13) + STBEAR __LC_LAST_BREAK(%r13) BPOFF - stmg %r8,%r15,__LC_SAVE_AREA_ASYNC - lmg %r8,%r9,\lc_old_psw + lmg %r8,%r9,\lc_old_psw(%r13) tmhh %r8,0x0001 # interrupting from user ? jnz 1f #if IS_ENABLED(CONFIG_KVM) - TSTMSK __LC_CPU_FLAGS,_CIF_SIE + lg %r10,__LC_CURRENT(%r13) + tm __TI_sie(%r10),0xff jz 0f BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r15) + SIEEXIT __SF_SIE_CONTROL(%r15),%r13 #endif -0: CHECK_STACK __LC_SAVE_AREA_ASYNC +0: CHECK_STACK __LC_SAVE_AREA_ASYNC,%r13 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f -1: lctlg %c1,%c1,__LC_KERNEL_ASCE - lg %r15,__LC_KERNEL_STACK +1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) + lg %r15,__LC_KERNEL_STACK(%r13) 2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) @@ -417,18 +432,18 @@ SYM_CODE_START(\name) xgr %r7,%r7 xgr %r10,%r10 xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC - MBEAR %r11 + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC(%r13) + MBEAR %r11,%r13 stmg %r8,%r9,__PT_PSW(%r11) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,\handler - mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + mvc __LC_RETURN_PSW(16,%r13),__PT_PSW(%r11) tmhh %r8,0x0001 # returning to user ? jno 2f STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE + lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON - stpt __LC_EXIT_TIMER + stpt __LC_EXIT_TIMER(%r13) 2: LBEAR __PT_LAST_BREAK(%r11) lmg %r0,%r15,__PT_R0(%r11) LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE @@ -443,35 +458,37 @@ INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq */ SYM_CODE_START(mcck_int_handler) BPOFF - lmg %r8,%r9,__LC_MCK_OLD_PSW - TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE + GET_LC %r13 + lmg %r8,%r9,__LC_MCK_OLD_PSW(%r13) + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_SYSTEM_DAMAGE jo .Lmcck_panic # yes -> rest of mcck code invalid - TSTMSK __LC_MCCK_CODE,MCCK_CODE_CR_VALID + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CR_VALID jno .Lmcck_panic # control registers invalid -> panic ptlb - lghi %r14,__LC_CPU_TIMER_SAVE_AREA - mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) - TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID + lay %r14,__LC_CPU_TIMER_SAVE_AREA(%r13) + mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14) + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CPU_TIMER_VALID jo 3f - la %r14,__LC_SYS_ENTER_TIMER - clc 0(8,%r14),__LC_EXIT_TIMER + la %r14,__LC_SYS_ENTER_TIMER(%r13) + clc 0(8,%r14),__LC_EXIT_TIMER(%r13) jl 1f - la %r14,__LC_EXIT_TIMER -1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + la %r14,__LC_EXIT_TIMER(%r13) +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER(%r13) jl 2f - la %r14,__LC_LAST_UPDATE_TIMER + la %r14,__LC_LAST_UPDATE_TIMER(%r13) 2: spt 0(%r14) - mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) -3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID + mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14) +3: TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_MWP_VALID jno .Lmcck_panic tmhh %r8,0x0001 # interrupting from user ? jnz .Lmcck_user - TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic #if IS_ENABLED(CONFIG_KVM) - TSTMSK __LC_CPU_FLAGS,_CIF_SIE + lg %r10,__LC_CURRENT(%r13) + tm __TI_sie(%r10),0xff jz .Lmcck_user - # Need to compare the address instead of a CIF_SIE* flag. + # Need to compare the address instead of __TI_SIE flag. # Otherwise there would be a race between setting the flag # and entering SIE (or leaving and clearing the flag). This # would cause machine checks targeted at the guest to be @@ -480,18 +497,19 @@ SYM_CODE_START(mcck_int_handler) clgrjl %r9,%r14, 4f larl %r14,.Lsie_leave clgrjhe %r9,%r14, 4f - oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST + lg %r10,__LC_PCPU + oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r15) + SIEEXIT __SF_SIE_CONTROL(%r15),%r13 #endif .Lmcck_user: - lg %r15,__LC_MCCK_STACK + lg %r15,__LC_MCCK_STACK(%r13) la %r11,STACK_FRAME_OVERHEAD(%r15) stctg %c1,%c1,__PT_CR1(%r11) - lctlg %c1,%c1,__LC_KERNEL_ASCE + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - lghi %r14,__LC_GPREGS_SAVE_AREA+64 - stmg %r0,%r7,__PT_R0(%r11) + lay %r14,__LC_GPREGS_SAVE_AREA(%r13) + mvc __PT_R0(128,%r11),0(%r14) # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 @@ -501,7 +519,6 @@ SYM_CODE_START(mcck_int_handler) xgr %r6,%r6 xgr %r7,%r7 xgr %r10,%r10 - mvc __PT_R8(64,%r11),0(%r14) stmg %r8,%r9,__PT_PSW(%r11) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) @@ -509,12 +526,13 @@ SYM_CODE_START(mcck_int_handler) brasl %r14,s390_do_machine_check lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) - mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW - tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? + mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW + tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ? jno 0f BPON - stpt __LC_EXIT_TIMER -0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193 + stpt __LC_EXIT_TIMER(%r13) +0: ALTERNATIVE "brcl 0,0", __stringify(lay %r12,__LC_LAST_BREAK_SAVE_AREA(%r13)),\ + ALT_FACILITY(193) LBEAR 0(%r12) lmg %r11,%r15,__PT_R11(%r11) LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE @@ -550,7 +568,7 @@ SYM_CODE_START(mcck_int_handler) SYM_CODE_END(mcck_int_handler) SYM_CODE_START(restart_int_handler) - ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40 + ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40) stg %r15,__LC_SAVE_AREA_RESTART TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4 jz 0f @@ -558,15 +576,17 @@ SYM_CODE_START(restart_int_handler) 0: larl %r15,daton_psw lpswe 0(%r15) # turn dat on, keep irqs off .Ldaton: - lg %r15,__LC_RESTART_STACK + GET_LC %r15 + lg %r15,__LC_RESTART_STACK(%r15) xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15) - mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART - mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW + GET_LC %r13 + mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART(%r13) + mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW(%r13) xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) - lg %r1,__LC_RESTART_FN # load fn, parm & source cpu - lg %r2,__LC_RESTART_DATA - lgf %r3,__LC_RESTART_SOURCE + lg %r1,__LC_RESTART_FN(%r13) # load fn, parm & source cpu + lg %r2,__LC_RESTART_DATA(%r13) + lgf %r3,__LC_RESTART_SOURCE(%r13) ltgr %r3,%r3 # test source cpu address jm 1f # negative -> skip source stop 0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu @@ -588,7 +608,8 @@ SYM_CODE_END(restart_int_handler) * Setup a pt_regs so that show_trace can provide a good call trace. */ SYM_CODE_START(stack_overflow) - lg %r15,__LC_NODAT_STACK # change to panic stack + GET_LC %r15 + lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 45413b04efc5..396034b2fe67 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -10,6 +10,7 @@ #include <linux/init.h> #include <linux/linkage.h> +#include <asm/lowcore.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/page.h> @@ -18,14 +19,15 @@ __HEAD SYM_CODE_START(startup_continue) larl %r1,tod_clock_base - mvc 0(16,%r1),__LC_BOOT_CLOCK + GET_LC %r2 + mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2) # # Setup stack # larl %r14,init_task - stg %r14,__LC_CURRENT + stg %r14,__LC_CURRENT(%r2) larl %r15,init_thread_union+STACK_INIT_OFFSET - stg %r15,__LC_KERNEL_STACK + stg %r15,__LC_KERNEL_STACK(%r2) brasl %r14,sclp_early_adjust_va # allow sclp_early_printk brasl %r14,startup_init # s390 specific early init brasl %r14,start_kernel # common init code diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 3a7d6e172211..f17bb7bf9392 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2112,7 +2112,7 @@ void do_restart(void *arg) tracing_off(); debug_locks_off(); lgr_info_log(); - smp_call_online_cpu(__do_restart, arg); + smp_call_ipl_cpu(__do_restart, arg); } /* on halt */ diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index f4cf65da6d49..8f681ccfb83a 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -62,7 +62,7 @@ static void __do_machine_kdump(void *data) * This need to be done *after* s390_reset_system set the * prefix register of this CPU to zero */ - memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA), + memcpy(absolute_pointer(get_lowcore()->floating_pt_save_area), phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512); call_nodat(1, int, purgatory, int, 1); diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 9b8c24ebb008..e11ec15960a1 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -4,6 +4,8 @@ #include <linux/cpu.h> #include <asm/nospec-branch.h> +int nobp = IS_ENABLED(CONFIG_KERNEL_NOBP); + static int __init nobp_setup_early(char *str) { bool enabled; @@ -17,11 +19,11 @@ static int __init nobp_setup_early(char *str) * The user explicitly requested nobp=1, enable it and * disable the expoline support. */ - __set_facility(82, alt_stfle_fac_list); + nobp = 1; if (IS_ENABLED(CONFIG_EXPOLINE)) nospec_disable = 1; } else { - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } return 0; } @@ -29,7 +31,7 @@ early_param("nobp", nobp_setup_early); static int __init nospec_setup_early(char *str) { - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; return 0; } early_param("nospec", nospec_setup_early); @@ -40,7 +42,7 @@ static int __init nospec_report(void) pr_info("Spectre V2 mitigation: etokens\n"); if (nospec_uses_trampoline()) pr_info("Spectre V2 mitigation: execute trampolines\n"); - if (__test_facility(82, alt_stfle_fac_list)) + if (nobp_enabled()) pr_info("Spectre V2 mitigation: limited branch prediction\n"); return 0; } @@ -66,14 +68,14 @@ void __init nospec_auto_detect(void) */ if (__is_defined(CC_USING_EXPOLINE)) nospec_disable = 1; - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } else if (__is_defined(CC_USING_EXPOLINE)) { /* * The kernel has been compiled with expolines. * Keep expolines enabled and disable nobp. */ nospec_disable = 0; - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } /* * If the kernel has not been compiled with expolines the @@ -86,7 +88,7 @@ static int __init spectre_v2_setup_early(char *str) { if (str && !strncmp(str, "on", 2)) { nospec_disable = 0; - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } if (str && !strncmp(str, "off", 3)) nospec_disable = 1; diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c index 52d4353188ad..a95188818637 100644 --- a/arch/s390/kernel/nospec-sysfs.c +++ b/arch/s390/kernel/nospec-sysfs.c @@ -17,7 +17,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Mitigation: etokens\n"); if (nospec_uses_trampoline()) return sprintf(buf, "Mitigation: execute trampolines\n"); - if (__test_facility(82, alt_stfle_fac_list)) + if (nobp_enabled()) return sprintf(buf, "Mitigation: limited branch prediction\n"); return sprintf(buf, "Vulnerable\n"); } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 1434642e9cba..6968be98af11 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -556,25 +556,31 @@ static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth) struct cf_trailer_entry *trailer_start, *trailer_stop; struct cf_ctrset_entry *ctrstart, *ctrstop; size_t offset = 0; + int i; - auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; - do { + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset); ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset); + /* Counter set not authorized */ + if (!(auth & cpumf_ctr_ctl[i])) + continue; + /* Counter set size zero was not saved */ + if (!cpum_cf_read_setsize(i)) + continue; + if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { pr_err_once("cpum_cf_diag counter set compare error " "in set %i\n", ctrstart->set); return 0; } - auth &= ~cpumf_ctr_ctl[ctrstart->set]; if (ctrstart->def == CF_DIAG_CTRSET_DEF) { cfdiag_diffctrset((u64 *)(ctrstart + 1), (u64 *)(ctrstop + 1), ctrstart->ctr); offset += ctrstart->ctr * sizeof(u64) + sizeof(*ctrstart); } - } while (ctrstart->def && auth); + } /* Save time_stamp from start of event in stop's trailer */ trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset); diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 65c1464eea4f..5ce9a795a0fe 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -17,7 +17,8 @@ #include <linux/mm_types.h> #include <linux/delay.h> #include <linux/cpu.h> - +#include <linux/smp.h> +#include <asm/text-patching.h> #include <asm/diag.h> #include <asm/facility.h> #include <asm/elf.h> @@ -79,6 +80,23 @@ void notrace stop_machine_yield(const struct cpumask *cpumask) } } +static void do_sync_core(void *info) +{ + sync_core(); +} + +void text_poke_sync(void) +{ + on_each_cpu(do_sync_core, NULL, 1); +} + +void text_poke_sync_lock(void) +{ + cpus_read_lock(); + text_poke_sync(); + cpus_read_unlock(); +} + /* * cpu_init - initializes state that is per-CPU. */ diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 88087a32ebc6..69fcaf54d5ca 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -9,6 +9,7 @@ #include <asm/asm-offsets.h> #include <asm/nospec-insn.h> #include <asm/sigp.h> +#include <asm/lowcore.h> GEN_BR_THUNK %r9 @@ -20,20 +21,15 @@ # r3 = Parameter for function # SYM_CODE_START(store_status) - /* Save register one and load save area base */ - stg %r1,__LC_SAVE_AREA_RESTART + STMG_LC %r0,%r15,__LC_GPREGS_SAVE_AREA /* General purpose registers */ - lghi %r1,__LC_GPREGS_SAVE_AREA - stmg %r0,%r15,0(%r1) - mvc 8(8,%r1),__LC_SAVE_AREA_RESTART + GET_LC %r13 /* Control registers */ - lghi %r1,__LC_CREGS_SAVE_AREA - stctg %c0,%c15,0(%r1) + stctg %c0,%c15,__LC_CREGS_SAVE_AREA(%r13) /* Access registers */ - lghi %r1,__LC_AREGS_SAVE_AREA - stam %a0,%a15,0(%r1) + stamy %a0,%a15,__LC_AREGS_SAVE_AREA(%r13) /* Floating point registers */ - lghi %r1,__LC_FPREGS_SAVE_AREA + lay %r1,__LC_FPREGS_SAVE_AREA(%r13) std %f0, 0x00(%r1) std %f1, 0x08(%r1) std %f2, 0x10(%r1) @@ -51,21 +47,21 @@ SYM_CODE_START(store_status) std %f14,0x70(%r1) std %f15,0x78(%r1) /* Floating point control register */ - lghi %r1,__LC_FP_CREG_SAVE_AREA + lay %r1,__LC_FP_CREG_SAVE_AREA(%r13) stfpc 0(%r1) /* CPU timer */ - lghi %r1,__LC_CPU_TIMER_SAVE_AREA + lay %r1,__LC_CPU_TIMER_SAVE_AREA(%r13) stpt 0(%r1) /* Store prefix register */ - lghi %r1,__LC_PREFIX_SAVE_AREA + lay %r1,__LC_PREFIX_SAVE_AREA(%r13) stpx 0(%r1) /* Clock comparator - seven bytes */ - lghi %r1,__LC_CLOCK_COMP_SAVE_AREA larl %r4,clkcmp stckc 0(%r4) + lay %r1,__LC_CLOCK_COMP_SAVE_AREA(%r13) mvc 1(7,%r1),1(%r4) /* Program status word */ - lghi %r1,__LC_PSW_SAVE_AREA + lay %r1,__LC_PSW_SAVE_AREA(%r13) epsw %r4,%r5 st %r4,0(%r1) st %r5,4(%r1) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 3993f4caf224..4ec99f73fa27 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -149,13 +149,12 @@ unsigned long __bootdata_preserved(max_mappable); struct physmem_info __bootdata(physmem_info); struct vm_layout __bootdata_preserved(vm_layout); -EXPORT_SYMBOL_GPL(vm_layout); +EXPORT_SYMBOL(vm_layout); int __bootdata_preserved(__kaslr_enabled); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); -u64 alt_stfle_fac_list[16]; struct oldmem_data __bootdata_preserved(oldmem_data); unsigned long VMALLOC_START; @@ -406,6 +405,7 @@ static void __init setup_lowcore(void) panic("%s: Failed to allocate %zu bytes align=%zx\n", __func__, sizeof(*lc), sizeof(*lc)); + lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0); lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT; lc->restart_psw.addr = __pa(restart_int_handler); lc->external_new_psw.mask = PSW_KERNEL_BITS; @@ -889,6 +889,9 @@ void __init setup_arch(char **cmdline_p) else pr_info("Linux is running as a guest in 64-bit mode\n"); + if (have_relocated_lowcore()) + pr_info("Lowcore relocated to 0x%px\n", get_lowcore()); + log_component_list(); /* Have one command line that is parsed and saved in /proc/cmdline */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index c3c54adf67bc..fbba37ec53cf 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -74,16 +74,15 @@ enum { CPU_STATE_CONFIGURED, }; -struct pcpu { - unsigned long ec_mask; /* bit mask for ec_xxx functions */ - unsigned long ec_clk; /* sigp timestamp for ec_xxx */ - signed char state; /* physical cpu state */ - signed char polarization; /* physical polarization */ - u16 address; /* physical cpu address */ -}; - static u8 boot_core_type; -static struct pcpu pcpu_devices[NR_CPUS]; +DEFINE_PER_CPU(struct pcpu, pcpu_devices); +/* + * Pointer to the pcpu area of the boot CPU. This is required when a restart + * interrupt is triggered on an offline CPU. For that case accessing percpu + * data with the common primitives does not work, since the percpu offset is + * stored in a non existent lowcore. + */ +static struct pcpu *ipl_pcpu; unsigned int smp_cpu_mt_shift; EXPORT_SYMBOL(smp_cpu_mt_shift); @@ -174,8 +173,8 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address) int cpu; for_each_cpu(cpu, mask) - if (pcpu_devices[cpu].address == address) - return pcpu_devices + cpu; + if (per_cpu(pcpu_devices, cpu).address == address) + return &per_cpu(pcpu_devices, cpu); return NULL; } @@ -230,13 +229,11 @@ out: return -ENOMEM; } -static void pcpu_free_lowcore(struct pcpu *pcpu) +static void pcpu_free_lowcore(struct pcpu *pcpu, int cpu) { unsigned long async_stack, nodat_stack, mcck_stack; struct lowcore *lc; - int cpu; - cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET; async_stack = lc->async_stack - STACK_INIT_OFFSET; @@ -259,6 +256,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); lc->cpu_nr = cpu; + lc->pcpu = (unsigned long)pcpu; lc->restart_flags = RESTART_FLAG_CTLREGS; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; @@ -277,12 +275,10 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) arch_spin_lock_setup(cpu); } -static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) +static void pcpu_attach_task(int cpu, struct task_struct *tsk) { struct lowcore *lc; - int cpu; - cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET; lc->current_task = (unsigned long)tsk; @@ -296,18 +292,16 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) lc->steal_timer = 0; } -static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) +static void pcpu_start_fn(int cpu, void (*func)(void *), void *data) { struct lowcore *lc; - int cpu; - cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; lc->restart_stack = lc->kernel_stack; lc->restart_fn = (unsigned long) func; lc->restart_data = (unsigned long) data; lc->restart_source = -1U; - pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); + pcpu_sigp_retry(per_cpu_ptr(&pcpu_devices, cpu), SIGP_RESTART, 0); } typedef void (pcpu_delegate_fn)(void *); @@ -320,14 +314,14 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data) func(data); /* should not return */ } -static void pcpu_delegate(struct pcpu *pcpu, +static void pcpu_delegate(struct pcpu *pcpu, int cpu, pcpu_delegate_fn *func, void *data, unsigned long stack) { struct lowcore *lc, *abs_lc; unsigned int source_cpu; - lc = lowcore_ptr[pcpu - pcpu_devices]; + lc = lowcore_ptr[cpu]; source_cpu = stap(); if (pcpu->address == source_cpu) { @@ -377,38 +371,22 @@ static int pcpu_set_smt(unsigned int mtid) smp_cpu_mt_shift = 0; while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift)) smp_cpu_mt_shift++; - pcpu_devices[0].address = stap(); + per_cpu(pcpu_devices, 0).address = stap(); } return cc; } /* - * Call function on an online CPU. - */ -void smp_call_online_cpu(void (*func)(void *), void *data) -{ - struct pcpu *pcpu; - - /* Use the current cpu if it is online. */ - pcpu = pcpu_find_address(cpu_online_mask, stap()); - if (!pcpu) - /* Use the first online cpu. */ - pcpu = pcpu_devices + cpumask_first(cpu_online_mask); - pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack); -} - -/* * Call function on the ipl CPU. */ void smp_call_ipl_cpu(void (*func)(void *), void *data) { struct lowcore *lc = lowcore_ptr[0]; - if (pcpu_devices[0].address == stap()) + if (ipl_pcpu->address == stap()) lc = get_lowcore(); - pcpu_delegate(&pcpu_devices[0], func, data, - lc->nodat_stack); + pcpu_delegate(ipl_pcpu, 0, func, data, lc->nodat_stack); } int smp_find_processor_id(u16 address) @@ -416,21 +394,21 @@ int smp_find_processor_id(u16 address) int cpu; for_each_present_cpu(cpu) - if (pcpu_devices[cpu].address == address) + if (per_cpu(pcpu_devices, cpu).address == address) return cpu; return -1; } void schedule_mcck_handler(void) { - pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending); + pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_mcck_pending); } bool notrace arch_vcpu_is_preempted(int cpu) { if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) return false; - if (pcpu_running(pcpu_devices + cpu)) + if (pcpu_running(per_cpu_ptr(&pcpu_devices, cpu))) return false; return true; } @@ -442,7 +420,7 @@ void notrace smp_yield_cpu(int cpu) return; diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" - : : "d" (pcpu_devices[cpu].address)); + : : "d" (per_cpu(pcpu_devices, cpu).address)); } EXPORT_SYMBOL_GPL(smp_yield_cpu); @@ -463,7 +441,7 @@ void notrace smp_emergency_stop(void) end = get_tod_clock() + (1000000UL << 12); for_each_cpu(cpu, &cpumask) { - struct pcpu *pcpu = pcpu_devices + cpu; + struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu); set_bit(ec_stop_cpu, &pcpu->ec_mask); while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, 0, NULL) == SIGP_CC_BUSY && @@ -472,7 +450,7 @@ void notrace smp_emergency_stop(void) } while (get_tod_clock() < end) { for_each_cpu(cpu, &cpumask) - if (pcpu_stopped(pcpu_devices + cpu)) + if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu))) cpumask_clear_cpu(cpu, &cpumask); if (cpumask_empty(&cpumask)) break; @@ -487,6 +465,7 @@ NOKPROBE_SYMBOL(smp_emergency_stop); */ void smp_send_stop(void) { + struct pcpu *pcpu; int cpu; /* Disable all interrupts/machine checks */ @@ -502,8 +481,9 @@ void smp_send_stop(void) for_each_online_cpu(cpu) { if (cpu == smp_processor_id()) continue; - pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0); - while (!pcpu_stopped(pcpu_devices + cpu)) + pcpu = per_cpu_ptr(&pcpu_devices, cpu); + pcpu_sigp_retry(pcpu, SIGP_STOP, 0); + while (!pcpu_stopped(pcpu)) cpu_relax(); } } @@ -517,7 +497,7 @@ static void smp_handle_ext_call(void) unsigned long bits; /* handle bit signal external calls */ - bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0); + bits = this_cpu_xchg(pcpu_devices.ec_mask, 0); if (test_bit(ec_stop_cpu, &bits)) smp_stop_cpu(); if (test_bit(ec_schedule, &bits)) @@ -542,12 +522,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) int cpu; for_each_cpu(cpu, mask) - pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single); } void arch_send_call_function_single_ipi(int cpu) { - pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single); } /* @@ -557,13 +537,13 @@ void arch_send_call_function_single_ipi(int cpu) */ void arch_smp_send_reschedule(int cpu) { - pcpu_ec_call(pcpu_devices + cpu, ec_schedule); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_schedule); } #ifdef CONFIG_IRQ_WORK void arch_irq_work_raise(void) { - pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work); + pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_irq_work); } #endif @@ -575,7 +555,7 @@ int smp_store_status(int cpu) struct pcpu *pcpu; unsigned long pa; - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); lc = lowcore_ptr[cpu]; pa = __pa(&lc->floating_pt_save_area); if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, @@ -683,17 +663,17 @@ void __init smp_save_dump_secondary_cpus(void) void smp_cpu_set_polarization(int cpu, int val) { - pcpu_devices[cpu].polarization = val; + per_cpu(pcpu_devices, cpu).polarization = val; } int smp_cpu_get_polarization(int cpu) { - return pcpu_devices[cpu].polarization; + return per_cpu(pcpu_devices, cpu).polarization; } int smp_cpu_get_cpu_address(int cpu) { - return pcpu_devices[cpu].address; + return per_cpu(pcpu_devices, cpu).address; } static void __ref smp_get_core_info(struct sclp_core_info *info, int early) @@ -732,7 +712,7 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) { if (pcpu_find_address(cpu_present_mask, address + i)) continue; - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); pcpu->address = address + i; if (configured) pcpu->state = CPU_STATE_CONFIGURED; @@ -767,7 +747,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) * that all SMT threads get subsequent logical CPU numbers. */ if (early) { - core_id = pcpu_devices[0].address >> smp_cpu_mt_shift; + core_id = per_cpu(pcpu_devices, 0).address >> smp_cpu_mt_shift; for (i = 0; i < info->configured; i++) { core = &info->core[i]; if (core->core_id == core_id) { @@ -867,7 +847,7 @@ static void smp_start_secondary(void *cpuvoid) /* Upping and downing of CPUs */ int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - struct pcpu *pcpu = pcpu_devices + cpu; + struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu); int rc; if (pcpu->state != CPU_STATE_CONFIGURED) @@ -885,8 +865,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) */ system_ctlreg_lock(); pcpu_prepare_secondary(pcpu, cpu); - pcpu_attach_task(pcpu, tidle); - pcpu_start_fn(pcpu, smp_start_secondary, NULL); + pcpu_attach_task(cpu, tidle); + pcpu_start_fn(cpu, smp_start_secondary, NULL); /* Wait until cpu puts itself in the online & active maps */ while (!cpu_online(cpu)) cpu_relax(); @@ -931,18 +911,19 @@ void __cpu_die(unsigned int cpu) struct pcpu *pcpu; /* Wait until target cpu is down */ - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); while (!pcpu_stopped(pcpu)) cpu_relax(); - pcpu_free_lowcore(pcpu); + pcpu_free_lowcore(pcpu, cpu); cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); + pcpu->flags = 0; } void __noreturn cpu_die(void) { idle_task_exit(); - pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); + pcpu_sigp_retry(this_cpu_ptr(&pcpu_devices), SIGP_STOP, 0); for (;;) ; } @@ -972,11 +953,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { - struct pcpu *pcpu = pcpu_devices; + struct lowcore *lc = get_lowcore(); WARN_ON(!cpu_present(0) || !cpu_online(0)); - pcpu->state = CPU_STATE_CONFIGURED; - get_lowcore()->percpu_offset = __per_cpu_offset[0]; + lc->percpu_offset = __per_cpu_offset[0]; + ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0); + ipl_pcpu->state = CPU_STATE_CONFIGURED; + lc->pcpu = (unsigned long)ipl_pcpu; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); } @@ -984,8 +967,8 @@ void __init smp_setup_processor_id(void) { struct lowcore *lc = get_lowcore(); - pcpu_devices[0].address = stap(); lc->cpu_nr = 0; + per_cpu(pcpu_devices, 0).address = stap(); lc->spinlock_lockval = arch_spin_lockval(0); lc->spinlock_index = 0; } @@ -1007,7 +990,7 @@ static ssize_t cpu_configure_show(struct device *dev, ssize_t count; mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state); + count = sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state); mutex_unlock(&smp_cpu_state_mutex); return count; } @@ -1033,7 +1016,7 @@ static ssize_t cpu_configure_store(struct device *dev, for (i = 0; i <= smp_cpu_mtid; i++) if (cpu_online(cpu + i)) goto out; - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); rc = 0; switch (val) { case 0: @@ -1045,7 +1028,7 @@ static ssize_t cpu_configure_store(struct device *dev, for (i = 0; i <= smp_cpu_mtid; i++) { if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) continue; - pcpu[i].state = CPU_STATE_STANDBY; + per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_STANDBY; smp_cpu_set_polarization(cpu + i, POLARIZATION_UNKNOWN); } @@ -1060,7 +1043,7 @@ static ssize_t cpu_configure_store(struct device *dev, for (i = 0; i <= smp_cpu_mtid; i++) { if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) continue; - pcpu[i].state = CPU_STATE_CONFIGURED; + per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_CONFIGURED; smp_cpu_set_polarization(cpu + i, POLARIZATION_UNKNOWN); } @@ -1079,7 +1062,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); static ssize_t show_cpu_address(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", pcpu_devices[dev->id].address); + return sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address); } static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); @@ -1105,14 +1088,14 @@ static struct attribute_group cpu_online_attr_group = { static int smp_cpu_online(unsigned int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group); } static int smp_cpu_pre_down(unsigned int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group); return 0; @@ -1125,7 +1108,7 @@ bool arch_cpu_is_hotpluggable(int cpu) int arch_register_cpu(int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); int rc; c->hotpluggable = arch_cpu_is_hotpluggable(cpu); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 98ef6dc7916b..22029ecae1c5 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -594,7 +594,7 @@ static int __init topology_setup(char *str) } early_param("topology", topology_setup); -static int topology_ctl_handler(struct ctl_table *ctl, int write, +static int topology_ctl_handler(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int enabled = topology_is_enabled(); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index fa62fa0e369f..36db065c7cf7 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -18,11 +18,22 @@ #include <asm/sections.h> #include <asm/uv.h> +#if !IS_ENABLED(CONFIG_KVM) +unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) +{ + return 0; +} + +int gmap_fault(struct gmap *gmap, unsigned long gaddr, + unsigned int fault_flags) +{ + return 0; +} +#endif + /* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST int __bootdata_preserved(prot_virt_guest); EXPORT_SYMBOL(prot_virt_guest); -#endif /* * uv_info contains both host and guest information but it's currently only @@ -35,7 +46,6 @@ EXPORT_SYMBOL(prot_virt_guest); struct uv_info __bootdata_preserved(uv_info); EXPORT_SYMBOL(uv_info); -#if IS_ENABLED(CONFIG_KVM) int __bootdata_preserved(prot_virt_host); EXPORT_SYMBOL(prot_virt_host); @@ -543,9 +553,6 @@ int arch_make_page_accessible(struct page *page) return arch_make_folio_accessible(page_folio(page)); } EXPORT_SYMBOL_GPL(arch_make_page_accessible); -#endif - -#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) static ssize_t uv_query_facilities(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -721,24 +728,13 @@ static struct attribute_group uv_query_attr_group = { static ssize_t uv_is_prot_virt_guest(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - int val = 0; - -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST - val = prot_virt_guest; -#endif - return sysfs_emit(buf, "%d\n", val); + return sysfs_emit(buf, "%d\n", prot_virt_guest); } static ssize_t uv_is_prot_virt_host(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - int val = 0; - -#if IS_ENABLED(CONFIG_KVM) - val = prot_virt_host; -#endif - - return sysfs_emit(buf, "%d\n", val); + return sysfs_emit(buf, "%d\n", prot_virt_host); } static struct kobj_attribute uv_prot_virt_guest = @@ -790,4 +786,3 @@ out_kobj: return rc; } device_initcall(uv_info_init); -#endif diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index a1ce3925ec71..975c654cf5a5 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -190,6 +190,9 @@ SECTIONS . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) + RUNTIME_CONST(shift, d_hash_shift) + RUNTIME_CONST(ptr, dentry_hashtable) + PERCPU_SECTION(0x100) . = ALIGN(PAGE_SIZE); @@ -219,6 +222,8 @@ SECTIONS QUAD(init_mm) QUAD(swapper_pg_dir) QUAD(invalid_pg_dir) + QUAD(__alt_instructions) + QUAD(__alt_instructions_end) #ifdef CONFIG_KASAN QUAD(kasan_early_shadow_page) QUAD(kasan_early_shadow_pte) diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 0c9a73a18826..9f86ad8fa8b4 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -75,7 +75,7 @@ static inline int arch_load_niai4(int *lock) int owner; asm_inline volatile( - ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */ + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */ " l %0,%1\n" : "=d" (owner) : "Q" (*lock) : "memory"); return owner; @@ -86,7 +86,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new) int expected = old; asm_inline volatile( - ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */ + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */ " cs %0,%3,%1\n" : "=d" (old), "=Q" (*lock) : "0" (old), "d" (new), "Q" (*lock) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 5cb5e724cde3..75d15bf41d97 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -243,7 +243,7 @@ static int cmm_skip_blanks(char *cp, char **endp) return str != cp; } -static int cmm_pages_handler(struct ctl_table *ctl, int write, +static int cmm_pages_handler(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { long nr = cmm_get_pages(); @@ -262,7 +262,7 @@ static int cmm_pages_handler(struct ctl_table *ctl, int write, return 0; } -static int cmm_timed_pages_handler(struct ctl_table *ctl, int write, +static int cmm_timed_pages_handler(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -282,7 +282,7 @@ static int cmm_timed_pages_handler(struct ctl_table *ctl, int write, return 0; } -static int cmm_timeout_handler(struct ctl_table *ctl, int write, +static int cmm_timeout_handler(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { char buf[64], *p; diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 45db5f47b22d..98dab3e049de 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -36,6 +36,16 @@ enum address_markers_idx { VMEMMAP_END_NR, VMALLOC_NR, VMALLOC_END_NR, +#ifdef CONFIG_KMSAN + KMSAN_VMALLOC_SHADOW_START_NR, + KMSAN_VMALLOC_SHADOW_END_NR, + KMSAN_VMALLOC_ORIGIN_START_NR, + KMSAN_VMALLOC_ORIGIN_END_NR, + KMSAN_MODULES_SHADOW_START_NR, + KMSAN_MODULES_SHADOW_END_NR, + KMSAN_MODULES_ORIGIN_START_NR, + KMSAN_MODULES_ORIGIN_END_NR, +#endif MODULES_NR, MODULES_END_NR, ABS_LOWCORE_NR, @@ -65,6 +75,16 @@ static struct addr_marker address_markers[] = { [VMEMMAP_END_NR] = {0, "vmemmap Area End"}, [VMALLOC_NR] = {0, "vmalloc Area Start"}, [VMALLOC_END_NR] = {0, "vmalloc Area End"}, +#ifdef CONFIG_KMSAN + [KMSAN_VMALLOC_SHADOW_START_NR] = {0, "Kmsan vmalloc Shadow Start"}, + [KMSAN_VMALLOC_SHADOW_END_NR] = {0, "Kmsan vmalloc Shadow End"}, + [KMSAN_VMALLOC_ORIGIN_START_NR] = {0, "Kmsan vmalloc Origins Start"}, + [KMSAN_VMALLOC_ORIGIN_END_NR] = {0, "Kmsan vmalloc Origins End"}, + [KMSAN_MODULES_SHADOW_START_NR] = {0, "Kmsan Modules Shadow Start"}, + [KMSAN_MODULES_SHADOW_END_NR] = {0, "Kmsan Modules Shadow End"}, + [KMSAN_MODULES_ORIGIN_START_NR] = {0, "Kmsan Modules Origins Start"}, + [KMSAN_MODULES_ORIGIN_END_NR] = {0, "Kmsan Modules Origins End"}, +#endif [MODULES_NR] = {0, "Modules Area Start"}, [MODULES_END_NR] = {0, "Modules Area End"}, [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"}, @@ -307,6 +327,16 @@ static int pt_dump_init(void) address_markers[KFENCE_START_NR].start_address = kfence_start; address_markers[KFENCE_END_NR].start_address = kfence_start + KFENCE_POOL_SIZE; #endif +#ifdef CONFIG_KMSAN + address_markers[KMSAN_VMALLOC_SHADOW_START_NR].start_address = KMSAN_VMALLOC_SHADOW_START; + address_markers[KMSAN_VMALLOC_SHADOW_END_NR].start_address = KMSAN_VMALLOC_SHADOW_END; + address_markers[KMSAN_VMALLOC_ORIGIN_START_NR].start_address = KMSAN_VMALLOC_ORIGIN_START; + address_markers[KMSAN_VMALLOC_ORIGIN_END_NR].start_address = KMSAN_VMALLOC_ORIGIN_END; + address_markers[KMSAN_MODULES_SHADOW_START_NR].start_address = KMSAN_MODULES_SHADOW_START; + address_markers[KMSAN_MODULES_SHADOW_END_NR].start_address = KMSAN_MODULES_SHADOW_END; + address_markers[KMSAN_MODULES_ORIGIN_START_NR].start_address = KMSAN_MODULES_ORIGIN_START; + address_markers[KMSAN_MODULES_ORIGIN_END_NR].start_address = KMSAN_MODULES_ORIGIN_END; +#endif sort_address_markers(); #ifdef CONFIG_PTDUMP_DEBUGFS debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 632c3a55feed..28a18c42ba99 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -48,7 +48,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz } /* - * s390_kernel_write - write to kernel memory bypassing DAT + * __s390_kernel_write - write to kernel memory bypassing DAT * @dst: destination address * @src: source address * @size: number of bytes to copy @@ -61,7 +61,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz */ static DEFINE_SPINLOCK(s390_kernel_write_lock); -notrace void *s390_kernel_write(void *dst, const void *src, size_t size) +notrace void *__s390_kernel_write(void *dst, const void *src, size_t size) { void *tmp = dst; unsigned long flags; diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 0ef83b6ac0db..84482a921332 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -268,33 +268,20 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, } } -int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs, + unsigned long *bit) { - struct zpci_dev *zdev = to_zpci(pdev); - unsigned int hwirq, msi_vecs, cpu; - unsigned long bit; - struct msi_desc *msi; - struct msi_msg msg; - int cpu_addr; - int rc, irq; - - zdev->aisb = -1UL; - zdev->msi_first_bit = -1U; - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; - msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); - if (irq_delivery == DIRECTED) { /* Allocate cpu vector bits */ - bit = airq_iv_alloc(zpci_ibv[0], msi_vecs); - if (bit == -1UL) + *bit = airq_iv_alloc(zpci_ibv[0], msi_vecs); + if (*bit == -1UL) return -EIO; } else { /* Allocate adapter summary indicator bit */ - bit = airq_iv_alloc_bit(zpci_sbv); - if (bit == -1UL) + *bit = airq_iv_alloc_bit(zpci_sbv); + if (*bit == -1UL) return -EIO; - zdev->aisb = bit; + zdev->aisb = *bit; /* Create adapter interrupt vector */ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL); @@ -302,27 +289,66 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return -ENOMEM; /* Wire up shortcut pointer */ - zpci_ibv[bit] = zdev->aibv; + zpci_ibv[*bit] = zdev->aibv; /* Each function has its own interrupt vector */ - bit = 0; + *bit = 0; } + return 0; +} - /* Request MSI interrupts */ +int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu; + struct zpci_dev *zdev = to_zpci(pdev); + struct msi_desc *msi; + struct msi_msg msg; + unsigned long bit; + int cpu_addr; + int rc, irq; + + zdev->aisb = -1UL; + zdev->msi_first_bit = -1U; + + msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); + if (msi_vecs < nvec) { + pr_info("%s requested %d irqs, allocate system limit of %d", + pci_name(pdev), nvec, zdev->max_msi); + } + + rc = __alloc_airq(zdev, msi_vecs, &bit); + if (rc < 0) + return rc; + + /* + * Request MSI interrupts: + * When using MSI, nvec_used interrupt sources and their irq + * descriptors are controlled through one msi descriptor. + * Thus the outer loop over msi descriptors shall run only once, + * while two inner loops iterate over the interrupt vectors. + * When using MSI-X, each interrupt vector/irq descriptor + * is bound to exactly one msi descriptor (nvec_used is one). + * So the inner loops are executed once, while the outer iterates + * over the MSI-X descriptors. + */ hwirq = bit; msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) { - rc = -EIO; if (hwirq - bit >= msi_vecs) break; - irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, - (irq_delivery == DIRECTED) ? - msi->affinity : NULL); + irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used); + irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE, + (irq_delivery == DIRECTED) ? + msi->affinity : NULL); if (irq < 0) return -ENOMEM; - rc = irq_set_msi_desc(irq, msi); - if (rc) - return rc; - irq_set_chip_and_handler(irq, &zpci_irq_chip, - handle_percpu_irq); + + for (i = 0; i < irqs_per_msi; i++) { + rc = irq_set_msi_desc_off(irq, i, msi); + if (rc) + return rc; + irq_set_chip_and_handler(irq + i, &zpci_irq_chip, + handle_percpu_irq); + } + msg.data = hwirq - bit; if (irq_delivery == DIRECTED) { if (msi->affinity) @@ -335,31 +361,35 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) msg.address_lo |= (cpu_addr << 8); for_each_possible_cpu(cpu) { - airq_iv_set_data(zpci_ibv[cpu], hwirq, irq); + for (i = 0; i < irqs_per_msi; i++) + airq_iv_set_data(zpci_ibv[cpu], + hwirq + i, irq + i); } } else { msg.address_lo = zdev->msi_addr & 0xffffffff; - airq_iv_set_data(zdev->aibv, hwirq, irq); + for (i = 0; i < irqs_per_msi; i++) + airq_iv_set_data(zdev->aibv, hwirq + i, irq + i); } msg.address_hi = zdev->msi_addr >> 32; pci_write_msi_msg(irq, &msg); - hwirq++; + hwirq += irqs_per_msi; } zdev->msi_first_bit = bit; - zdev->msi_nr_irqs = msi_vecs; + zdev->msi_nr_irqs = hwirq - bit; rc = zpci_set_irq(zdev); if (rc) return rc; - return (msi_vecs == nvec) ? 0 : msi_vecs; + return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs; } void arch_teardown_msi_irqs(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); struct msi_desc *msi; + unsigned int i; int rc; /* Disable interrupts */ @@ -369,8 +399,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) /* Release MSI interrupts */ msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) { - irq_set_msi_desc(msi->irq, NULL); - irq_free_desc(msi->irq); + for (i = 0; i < msi->nvec_used; i++) { + irq_set_msi_desc(msi->irq + i, NULL); + irq_free_desc(msi->irq + i); + } msi->msg.address_lo = 0; msi->msg.address_hi = 0; msi->msg.data = 0; diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h index 587fb7841096..0ca8c3463166 100644 --- a/arch/sparc/include/asm/vio.h +++ b/arch/sparc/include/asm/vio.h @@ -483,11 +483,7 @@ int __vio_register_driver(struct vio_driver *drv, struct module *owner, __vio_register_driver(driver, THIS_MODULE, KBUILD_MODNAME) void vio_unregister_driver(struct vio_driver *drv); -static inline struct vio_driver *to_vio_driver(struct device_driver *drv) -{ - return container_of(drv, struct vio_driver, driver); -} - +#define to_vio_driver(__drv) container_of_const(__drv, struct vio_driver, driver) #define to_vio_dev(__dev) container_of_const(__dev, struct vio_dev, dev) int vio_ldc_send(struct vio_driver_state *vio, void *data, int len); diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c index 846a55f942d4..07933d75ac81 100644 --- a/arch/sparc/kernel/vio.c +++ b/arch/sparc/kernel/vio.c @@ -54,10 +54,10 @@ static int vio_hotplug(const struct device *dev, struct kobj_uevent_env *env) return 0; } -static int vio_bus_match(struct device *dev, struct device_driver *drv) +static int vio_bus_match(struct device *dev, const struct device_driver *drv) { struct vio_dev *vio_dev = to_vio_dev(dev); - struct vio_driver *vio_drv = to_vio_driver(drv); + const struct vio_driver *vio_drv = to_vio_driver(drv); const struct vio_device_id *matches = vio_drv->id_table; if (!matches) diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 93a5a8999b07..dca84fd6d00a 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -11,7 +11,7 @@ config UML select ARCH_HAS_KCOV select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNLEN_USER - select ARCH_NO_PREEMPT + select ARCH_NO_PREEMPT_DYNAMIC select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_KASAN if X86_64 select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN @@ -31,7 +31,8 @@ config UML select TRACE_IRQFLAGS_SUPPORT select TTY # Needed for line.c select HAVE_ARCH_VMAP_STACK - select HAVE_RUST if X86_64 + select HAVE_RUST + select ARCH_HAS_UBSAN config MMU bool @@ -48,12 +49,13 @@ config NO_IOMEM config UML_IOMEM_EMULATION bool select INDIRECT_IOMEM + select HAS_IOPORT select GENERIC_PCI_IOMAP select GENERIC_IOMAP select NO_GENERIC_PCI_IOPORT_MAP config NO_IOPORT_MAP - def_bool y + def_bool !UML_IOMEM_EMULATION config ISA bool diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index b94b2618e7d8..ede40a160c5e 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -297,26 +297,6 @@ config UML_NET_MCAST If unsure, say N. -config UML_NET_PCAP - bool "pcap transport (obsolete)" - depends on UML_NET - depends on !MODVERSIONS - select MAY_HAVE_RUNTIME_DEPS - help - The pcap transport makes a pcap packet stream on the host look - like an ethernet device inside UML. This is useful for making - UML act as a network monitor for the host. You must have libcap - installed in order to build the pcap transport into UML. - - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html> That site - has examples of the UML command line to use to enable this option. - - NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please - migrate to UML_NET_VECTOR. - - If unsure, say N. - config UML_NET_SLIRP bool "SLiRP transport (obsolete)" depends on UML_NET diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 0e6af81096fd..57882e6bc215 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -20,14 +20,9 @@ harddog-objs := harddog_kern.o harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o rtc-objs := rtc_kern.o rtc_user.o -LDFLAGS_pcap.o = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a) - LDFLAGS_vde.o = $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a) -targets := pcap_kern.o pcap_user.o vde_kern.o vde_user.o - -$(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o - $(LD) -r -dp -o $@ $^ $(ld_flags) +targets := vde_kern.o vde_user.o $(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o $(LD) -r -dp -o $@ $^ $(ld_flags) @@ -49,7 +44,6 @@ obj-$(CONFIG_UML_NET_DAEMON) += daemon.o obj-$(CONFIG_UML_NET_VECTOR) += vector.o obj-$(CONFIG_UML_NET_VDE) += vde.o obj-$(CONFIG_UML_NET_MCAST) += umcast.o -obj-$(CONFIG_UML_NET_PCAP) += pcap.o obj-$(CONFIG_UML_NET) += net.o obj-$(CONFIG_MCONSOLE) += mconsole.o obj-$(CONFIG_MMAPPER) += mmapper_kern.o @@ -69,7 +63,7 @@ obj-$(CONFIG_UML_RTC) += rtc.o obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o # pcap_user.o must be added explicitly. -USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o +USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o vde_user.o vector_user.o CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH) CFLAGS_xterm.o += '-DCONFIG_XTERM_CHAN_DEFAULT_EMULATOR="$(CONFIG_XTERM_CHAN_DEFAULT_EMULATOR)"' diff --git a/arch/um/drivers/chan.h b/arch/um/drivers/chan.h index e14b9cdf7a33..5a61db512ffb 100644 --- a/arch/um/drivers/chan.h +++ b/arch/um/drivers/chan.h @@ -22,7 +22,8 @@ struct chan { unsigned int output:1; unsigned int opened:1; unsigned int enabled:1; - int fd; + int fd_in; + int fd_out; /* only different to fd_in if blocking output is needed */ const struct chan_ops *ops; void *data; }; diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index 37538b4168da..e78a99816c86 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -81,6 +81,12 @@ static const struct chan_ops not_configged_ops = { }; #endif /* CONFIG_NOCONFIG_CHAN */ +static inline bool need_output_blocking(void) +{ + return time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL; +} + static int open_one_chan(struct chan *chan) { int fd, err; @@ -96,15 +102,43 @@ static int open_one_chan(struct chan *chan) return fd; err = os_set_fd_block(fd, 0); - if (err) { - (*chan->ops->close)(fd, chan->data); - return err; - } + if (err) + goto out_close; + + chan->fd_in = fd; + chan->fd_out = fd; + + /* + * In time-travel modes infinite-CPU and external we need to guarantee + * that any writes to the output succeed immdiately from the point of + * the VM. The best way to do this is to put the FD in blocking mode + * and simply wait/retry until everything is written. + * As every write is guaranteed to complete, we also do not need to + * request an IRQ for the output. + * + * Note that input cannot happen in a time synchronized way. We permit + * it, but time passes very quickly if anything waits for a read. + */ + if (chan->output && need_output_blocking()) { + err = os_dup_file(chan->fd_out); + if (err < 0) + goto out_close; - chan->fd = fd; + chan->fd_out = err; + + err = os_set_fd_block(chan->fd_out, 1); + if (err) { + os_close_file(chan->fd_out); + goto out_close; + } + } chan->opened = 1; return 0; + +out_close: + (*chan->ops->close)(fd, chan->data); + return err; } static int open_chan(struct list_head *chans) @@ -125,7 +159,7 @@ static int open_chan(struct list_head *chans) void chan_enable_winch(struct chan *chan, struct tty_port *port) { if (chan && chan->primary && chan->ops->winch) - register_winch(chan->fd, port); + register_winch(chan->fd_in, port); } static void line_timer_cb(struct work_struct *work) @@ -156,8 +190,9 @@ int enable_chan(struct line *line) if (chan->enabled) continue; - err = line_setup_irq(chan->fd, chan->input, chan->output, line, - chan); + err = line_setup_irq(chan->fd_in, chan->input, + chan->output && !need_output_blocking(), + line, chan); if (err) goto out_close; @@ -196,7 +231,8 @@ void free_irqs(void) if (chan->input && chan->enabled) um_free_irq(chan->line->read_irq, chan); - if (chan->output && chan->enabled) + if (chan->output && chan->enabled && + !need_output_blocking()) um_free_irq(chan->line->write_irq, chan); chan->enabled = 0; } @@ -216,15 +252,19 @@ static void close_one_chan(struct chan *chan, int delay_free_irq) } else { if (chan->input && chan->enabled) um_free_irq(chan->line->read_irq, chan); - if (chan->output && chan->enabled) + if (chan->output && chan->enabled && + !need_output_blocking()) um_free_irq(chan->line->write_irq, chan); chan->enabled = 0; } + if (chan->fd_out != chan->fd_in) + os_close_file(chan->fd_out); if (chan->ops->close != NULL) - (*chan->ops->close)(chan->fd, chan->data); + (*chan->ops->close)(chan->fd_in, chan->data); chan->opened = 0; - chan->fd = -1; + chan->fd_in = -1; + chan->fd_out = -1; } void close_chan(struct line *line) @@ -244,7 +284,7 @@ void close_chan(struct line *line) void deactivate_chan(struct chan *chan, int irq) { if (chan && chan->enabled) - deactivate_fd(chan->fd, irq); + deactivate_fd(chan->fd_in, irq); } int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq) @@ -254,7 +294,7 @@ int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq) if (len == 0 || !chan || !chan->ops->write) return 0; - n = chan->ops->write(chan->fd, buf, len, chan->data); + n = chan->ops->write(chan->fd_out, buf, len, chan->data); if (chan->primary) { ret = n; } @@ -268,7 +308,7 @@ int console_write_chan(struct chan *chan, const char *buf, int len) if (!chan || !chan->ops->console_write) return 0; - n = chan->ops->console_write(chan->fd, buf, len); + n = chan->ops->console_write(chan->fd_out, buf, len); if (chan->primary) ret = n; return ret; @@ -296,14 +336,14 @@ int chan_window_size(struct line *line, unsigned short *rows_out, if (chan && chan->primary) { if (chan->ops->window_size == NULL) return 0; - return chan->ops->window_size(chan->fd, chan->data, + return chan->ops->window_size(chan->fd_in, chan->data, rows_out, cols_out); } chan = line->chan_out; if (chan && chan->primary) { if (chan->ops->window_size == NULL) return 0; - return chan->ops->window_size(chan->fd, chan->data, + return chan->ops->window_size(chan->fd_in, chan->data, rows_out, cols_out); } return 0; @@ -319,7 +359,7 @@ static void free_one_chan(struct chan *chan) (*chan->ops->free)(chan->data); if (chan->primary && chan->output) - ignore_sigio_fd(chan->fd); + ignore_sigio_fd(chan->fd_in); kfree(chan); } @@ -478,7 +518,8 @@ static struct chan *parse_chan(struct line *line, char *str, int device, .output = 0, .opened = 0, .enabled = 0, - .fd = -1, + .fd_in = -1, + .fd_out = -1, .ops = ops, .data = data }); return chan; @@ -549,7 +590,7 @@ void chan_interrupt(struct line *line, int irq) schedule_delayed_work(&line->task, 1); goto out; } - err = chan->ops->read(chan->fd, &c, chan->data); + err = chan->ops->read(chan->fd_in, &c, chan->data); if (err > 0) tty_insert_flip_char(port, c, TTY_NORMAL); } while (err > 0); diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index ec04e47b9d79..a66e556012c4 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -23,7 +23,7 @@ int generic_read(int fd, __u8 *c_out, void *unused) { int n; - n = read(fd, c_out, sizeof(*c_out)); + CATCH_EINTR(n = read(fd, c_out, sizeof(*c_out))); if (n > 0) return n; else if (n == 0) @@ -37,11 +37,23 @@ int generic_read(int fd, __u8 *c_out, void *unused) int generic_write(int fd, const __u8 *buf, size_t n, void *unused) { + int written = 0; int err; - err = write(fd, buf, n); - if (err > 0) - return err; + /* The FD may be in blocking mode, as such, need to retry short writes, + * they may have been interrupted by a signal. + */ + do { + errno = 0; + err = write(fd, buf + written, n - written); + if (err > 0) { + written += err; + continue; + } + } while (err < 0 && errno == EINTR); + + if (written > 0) + return written; else if (errno == EAGAIN) return 0; else if (err == 0) diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c index 60d1c6cab8a9..99a7144b229f 100644 --- a/arch/um/drivers/harddog_kern.c +++ b/arch/um/drivers/harddog_kern.c @@ -49,6 +49,7 @@ #include "mconsole.h" #include "harddog.h" +MODULE_DESCRIPTION("UML hardware watchdog"); MODULE_LICENSE("GPL"); static DEFINE_MUTEX(harddog_mutex); diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index d82bc3fdb86e..43d8959cc746 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -383,6 +383,7 @@ int setup_one_line(struct line *lines, int n, char *init, parse_chan_pair(NULL, line, n, opts, error_out); err = 0; } + *error_out = "configured as 'none'"; } else { char *new = kstrdup(init, GFP_KERNEL); if (!new) { @@ -406,6 +407,7 @@ int setup_one_line(struct line *lines, int n, char *init, } } if (err) { + *error_out = "failed to parse channel pair"; line->init_str = NULL; line->valid = 0; kfree(new); diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c deleted file mode 100644 index d9bf95d7867b..000000000000 --- a/arch/um/drivers/pcap_kern.c +++ /dev/null @@ -1,113 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <linux/init.h> -#include <linux/netdevice.h> -#include <net_kern.h> -#include "pcap_user.h" - -struct pcap_init { - char *host_if; - int promisc; - int optimize; - char *filter; -}; - -static void pcap_init_kern(struct net_device *dev, void *data) -{ - struct uml_net_private *pri; - struct pcap_data *ppri; - struct pcap_init *init = data; - - pri = netdev_priv(dev); - ppri = (struct pcap_data *) pri->user; - ppri->host_if = init->host_if; - ppri->promisc = init->promisc; - ppri->optimize = init->optimize; - ppri->filter = init->filter; - - printk("pcap backend, host interface %s\n", ppri->host_if); -} - -static int pcap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return pcap_user_read(fd, skb_mac_header(skb), - skb->dev->mtu + ETH_HEADER_OTHER, - (struct pcap_data *) &lp->user); -} - -static int pcap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return -EPERM; -} - -static const struct net_kern_info pcap_kern_info = { - .init = pcap_init_kern, - .protocol = eth_protocol, - .read = pcap_read, - .write = pcap_write, -}; - -static int pcap_setup(char *str, char **mac_out, void *data) -{ - struct pcap_init *init = data; - char *remain, *host_if = NULL, *options[2] = { NULL, NULL }; - int i; - - *init = ((struct pcap_init) - { .host_if = "eth0", - .promisc = 1, - .optimize = 0, - .filter = NULL }); - - remain = split_if_spec(str, &host_if, &init->filter, - &options[0], &options[1], mac_out, NULL); - if (remain != NULL) { - printk(KERN_ERR "pcap_setup - Extra garbage on " - "specification : '%s'\n", remain); - return 0; - } - - if (host_if != NULL) - init->host_if = host_if; - - for (i = 0; i < ARRAY_SIZE(options); i++) { - if (options[i] == NULL) - continue; - if (!strcmp(options[i], "promisc")) - init->promisc = 1; - else if (!strcmp(options[i], "nopromisc")) - init->promisc = 0; - else if (!strcmp(options[i], "optimize")) - init->optimize = 1; - else if (!strcmp(options[i], "nooptimize")) - init->optimize = 0; - else { - printk(KERN_ERR "pcap_setup : bad option - '%s'\n", - options[i]); - return 0; - } - } - - return 1; -} - -static struct transport pcap_transport = { - .list = LIST_HEAD_INIT(pcap_transport.list), - .name = "pcap", - .setup = pcap_setup, - .user = &pcap_user_info, - .kern = &pcap_kern_info, - .private_size = sizeof(struct pcap_data), - .setup_size = sizeof(struct pcap_init), -}; - -static int register_pcap(void) -{ - register_transport(&pcap_transport); - return 0; -} - -late_initcall(register_pcap); diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c deleted file mode 100644 index 52ddda3e3b10..000000000000 --- a/arch/um/drivers/pcap_user.c +++ /dev/null @@ -1,137 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <errno.h> -#include <pcap.h> -#include <string.h> -#include <asm/types.h> -#include <net_user.h> -#include "pcap_user.h" -#include <um_malloc.h> - -#define PCAP_FD(p) (*(int *)(p)) - -static int pcap_user_init(void *data, void *dev) -{ - struct pcap_data *pri = data; - pcap_t *p; - char errors[PCAP_ERRBUF_SIZE]; - - p = pcap_open_live(pri->host_if, ETH_MAX_PACKET + ETH_HEADER_OTHER, - pri->promisc, 0, errors); - if (p == NULL) { - printk(UM_KERN_ERR "pcap_user_init : pcap_open_live failed - " - "'%s'\n", errors); - return -EINVAL; - } - - pri->dev = dev; - pri->pcap = p; - return 0; -} - -static int pcap_user_open(void *data) -{ - struct pcap_data *pri = data; - __u32 netmask; - int err; - - if (pri->pcap == NULL) - return -ENODEV; - - if (pri->filter != NULL) { - err = dev_netmask(pri->dev, &netmask); - if (err < 0) { - printk(UM_KERN_ERR "pcap_user_open : dev_netmask failed\n"); - return -EIO; - } - - pri->compiled = uml_kmalloc(sizeof(struct bpf_program), - UM_GFP_KERNEL); - if (pri->compiled == NULL) { - printk(UM_KERN_ERR "pcap_user_open : kmalloc failed\n"); - return -ENOMEM; - } - - err = pcap_compile(pri->pcap, - (struct bpf_program *) pri->compiled, - pri->filter, pri->optimize, netmask); - if (err < 0) { - printk(UM_KERN_ERR "pcap_user_open : pcap_compile failed - " - "'%s'\n", pcap_geterr(pri->pcap)); - goto out; - } - - err = pcap_setfilter(pri->pcap, pri->compiled); - if (err < 0) { - printk(UM_KERN_ERR "pcap_user_open : pcap_setfilter " - "failed - '%s'\n", pcap_geterr(pri->pcap)); - goto out; - } - } - - return PCAP_FD(pri->pcap); - - out: - kfree(pri->compiled); - return -EIO; -} - -static void pcap_remove(void *data) -{ - struct pcap_data *pri = data; - - if (pri->compiled != NULL) - pcap_freecode(pri->compiled); - - if (pri->pcap != NULL) - pcap_close(pri->pcap); -} - -struct pcap_handler_data { - char *buffer; - int len; -}; - -static void handler(u_char *data, const struct pcap_pkthdr *header, - const u_char *packet) -{ - int len; - - struct pcap_handler_data *hdata = (struct pcap_handler_data *) data; - - len = hdata->len < header->caplen ? hdata->len : header->caplen; - memcpy(hdata->buffer, packet, len); - hdata->len = len; -} - -int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri) -{ - struct pcap_handler_data hdata = ((struct pcap_handler_data) - { .buffer = buffer, - .len = len }); - int n; - - n = pcap_dispatch(pri->pcap, 1, handler, (u_char *) &hdata); - if (n < 0) { - printk(UM_KERN_ERR "pcap_dispatch failed - %s\n", - pcap_geterr(pri->pcap)); - return -EIO; - } - else if (n == 0) - return 0; - return hdata.len; -} - -const struct net_user_info pcap_user_info = { - .init = pcap_user_init, - .open = pcap_user_open, - .close = NULL, - .remove = pcap_remove, - .add_address = NULL, - .delete_address = NULL, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, -}; diff --git a/arch/um/drivers/pcap_user.h b/arch/um/drivers/pcap_user.h deleted file mode 100644 index 216246f5f09b..000000000000 --- a/arch/um/drivers/pcap_user.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - */ - -#include <net_user.h> - -struct pcap_data { - char *host_if; - int promisc; - int optimize; - char *filter; - void *compiled; - void *pcap; - void *dev; -}; - -extern const struct net_user_info pcap_user_info; - -extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri); - diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c index c52b3ff3c092..a4508470df78 100644 --- a/arch/um/drivers/port_kern.c +++ b/arch/um/drivers/port_kern.c @@ -45,15 +45,17 @@ struct connection { static irqreturn_t pipe_interrupt(int irq, void *data) { struct connection *conn = data; - int fd; + int n_fds = 1, fd = -1; + ssize_t ret; - fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); - if (fd < 0) { - if (fd == -EAGAIN) + ret = os_rcv_fd_msg(conn->socket[0], &fd, n_fds, &conn->helper_pid, + sizeof(conn->helper_pid)); + if (ret != sizeof(conn->helper_pid)) { + if (ret == -EAGAIN) return IRQ_NONE; - printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n", - -fd); + printk(KERN_ERR "pipe_interrupt : os_rcv_fd_msg returned %zd\n", + ret); os_close_file(conn->fd); } diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 9f1e76ddda5a..7f28ec1929dc 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -36,7 +36,6 @@ #include <linux/vmalloc.h> #include <linux/platform_device.h> #include <linux/scatterlist.h> -#include <asm/tlbflush.h> #include <kern_util.h> #include "mconsole_kern.h" #include <init.h> @@ -106,7 +105,6 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data) #define DRIVER_NAME "uml-blkdev" static DEFINE_MUTEX(ubd_lock); -static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */ static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg); @@ -759,7 +757,6 @@ static int ubd_open_dev(struct ubd *ubd_dev) printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); goto error; } - flush_tlb_kernel_vm(); err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap, ubd_dev->cow.bitmap_offset, diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 4279793b11b7..2d473282ab51 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -1115,11 +1115,12 @@ static int irq_rr; static int vector_net_close(struct net_device *dev) { struct vector_private *vp = netdev_priv(dev); - unsigned long flags; netif_stop_queue(dev); del_timer(&vp->tl); + vp->opened = false; + if (vp->fds == NULL) return 0; @@ -1158,10 +1159,7 @@ static int vector_net_close(struct net_device *dev) destroy_queue(vp->tx_queue); kfree(vp->fds); vp->fds = NULL; - spin_lock_irqsave(&vp->lock, flags); - vp->opened = false; vp->in_error = false; - spin_unlock_irqrestore(&vp->lock, flags); return 0; } @@ -1203,17 +1201,12 @@ static void vector_reset_tx(struct work_struct *work) static int vector_net_open(struct net_device *dev) { struct vector_private *vp = netdev_priv(dev); - unsigned long flags; int err = -EINVAL; struct vector_device *vdevice; - spin_lock_irqsave(&vp->lock, flags); - if (vp->opened) { - spin_unlock_irqrestore(&vp->lock, flags); + if (vp->opened) return -ENXIO; - } vp->opened = true; - spin_unlock_irqrestore(&vp->lock, flags); vp->bpf = uml_vector_user_bpf(get_bpf_file(vp->parsed)); @@ -1387,8 +1380,6 @@ static int vector_net_load_bpf_flash(struct net_device *dev, return -1; } - spin_lock(&vp->lock); - if (vp->bpf != NULL) { if (vp->opened) uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf); @@ -1417,15 +1408,12 @@ static int vector_net_load_bpf_flash(struct net_device *dev, if (vp->opened) result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf); - spin_unlock(&vp->lock); - return result; free_buffer: release_firmware(fw); flash_fail: - spin_unlock(&vp->lock); if (vp->bpf != NULL) kfree(vp->bpf->filter); kfree(vp->bpf); @@ -1631,7 +1619,6 @@ static void vector_eth_configure( INIT_WORK(&vp->reset_tx, vector_reset_tx); timer_setup(&vp->tl, vector_timer_expire, 0); - spin_lock_init(&vp->lock); /* FIXME */ dev->netdev_ops = &vector_netdev_ops; diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h index 2a1fa8e0f3e1..806df551be0b 100644 --- a/arch/um/drivers/vector_kern.h +++ b/arch/um/drivers/vector_kern.h @@ -71,7 +71,6 @@ struct vector_estats { struct vector_private { struct list_head list; - spinlock_t lock; struct net_device *dev; struct napi_struct napi ____cacheline_aligned; diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c index 6918de5e2956..e4316c7981e8 100644 --- a/arch/um/drivers/xterm.c +++ b/arch/um/drivers/xterm.c @@ -156,7 +156,7 @@ static int xterm_open(int input, int output, int primary, void *d, new = xterm_fd(fd, &data->helper_pid); if (new < 0) { err = new; - printk(UM_KERN_ERR "xterm_open : os_rcv_fd failed, err = %d\n", + printk(UM_KERN_ERR "xterm_open : xterm_fd failed, err = %d\n", -err); goto out_kill; } diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c index 8011e51993d5..3971252cb1a6 100644 --- a/arch/um/drivers/xterm_kern.c +++ b/arch/um/drivers/xterm_kern.c @@ -21,12 +21,19 @@ struct xterm_wait { static irqreturn_t xterm_interrupt(int irq, void *data) { struct xterm_wait *xterm = data; - int fd; + int fd = -1, n_fds = 1; + ssize_t ret; - fd = os_rcv_fd(xterm->fd, &xterm->pid); - if (fd == -EAGAIN) + ret = os_rcv_fd_msg(xterm->fd, &fd, n_fds, + &xterm->pid, sizeof(xterm->pid)); + if (ret == -EAGAIN) return IRQ_NONE; + if (ret < 0) + fd = ret; + else if (ret != sizeof(xterm->pid)) + fd = -EMSGSIZE; + xterm->new_fd = fd; complete(&xterm->ready); diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h index f2923c767bb9..a3eaca41ff61 100644 --- a/arch/um/include/asm/mmu.h +++ b/arch/um/include/asm/mmu.h @@ -7,15 +7,13 @@ #define __ARCH_UM_MMU_H #include <mm_id.h> -#include <asm/mm_context.h> typedef struct mm_context { struct mm_id id; - struct uml_arch_mm_context arch; -} mm_context_t; -/* Avoid tangled inclusion with asm/ldt.h */ -extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm); -extern void free_ldt(struct mm_context *mm); + /* Address range in need of a TLB sync */ + unsigned long sync_tlb_range_from; + unsigned long sync_tlb_range_to; +} mm_context_t; #endif diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index 68e2eb9cfb47..23dcc914d44e 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -13,8 +13,6 @@ #include <asm/mm_hooks.h> #include <asm/mmu.h> -extern void force_flush_all(void); - #define activate_mm activate_mm static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) { diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index e1ece21dbe3f..5bb397b65efb 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -244,6 +244,38 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) #define PFN_PTE_SHIFT PAGE_SHIFT +static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + if (!mm->context.sync_tlb_range_to) { + mm->context.sync_tlb_range_from = start; + mm->context.sync_tlb_range_to = end; + } else { + if (start < mm->context.sync_tlb_range_from) + mm->context.sync_tlb_range_from = start; + if (end > mm->context.sync_tlb_range_to) + mm->context.sync_tlb_range_to = end; + } +} + +#define set_ptes set_ptes +static inline void set_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, int nr) +{ + /* Basically the default implementation */ + size_t length = nr * PAGE_SIZE; + + for (;;) { + set_pte(ptep, pte); + if (--nr == 0) + break; + ptep++; + pte = __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT)); + } + + um_tlb_mark_sync(mm, addr, addr + length); +} + #define __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t pte_a, pte_t pte_b) { diff --git a/arch/um/include/asm/tlbflush.h b/arch/um/include/asm/tlbflush.h index a5bda890390d..db997976b6ea 100644 --- a/arch/um/include/asm/tlbflush.h +++ b/arch/um/include/asm/tlbflush.h @@ -9,23 +9,51 @@ #include <linux/mm.h> /* - * TLB flushing: + * In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls + * from the process handling the MM (which can be the kernel itself). + * + * To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes + * we catch all PTE transitions where memory that was unusable becomes usable. + * While with flush_tlb_* we can track any memory that becomes unusable and + * even if a higher layer of the page table was modified. + * + * So, we simply track updates using both methods and mark the memory area to + * be synced later on. The only special case is that flush_tlb_kern_* needs to + * be executed immediately as there is no good synchronization point in that + * case. In contrast, in the set_ptes case we can wait for the next kernel + * segfault before we do the synchornization. * - * - flush_tlb() flushes the current mm struct TLBs * - flush_tlb_all() flushes all processes TLBs * - flush_tlb_mm(mm) flushes the specified mm context TLB's * - flush_tlb_page(vma, vmaddr) flushes one page - * - flush_tlb_kernel_vm() flushes the kernel vm area * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages */ +extern int um_tlb_sync(struct mm_struct *mm); + extern void flush_tlb_all(void); extern void flush_tlb_mm(struct mm_struct *mm); -extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end); -extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long address); -extern void flush_tlb_kernel_vm(void); -extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); -extern void __flush_tlb_one(unsigned long addr); + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long address) +{ + um_tlb_mark_sync(vma->vm_mm, address, address + PAGE_SIZE); +} + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + um_tlb_mark_sync(vma->vm_mm, start, end); +} + +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ + um_tlb_mark_sync(&init_mm, start, end); + + /* Kernel needs to be synced immediately */ + um_tlb_sync(&init_mm); +} #endif diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h index c22f46a757dc..06292fca5a4d 100644 --- a/arch/um/include/shared/as-layout.h +++ b/arch/um/include/shared/as-layout.h @@ -23,7 +23,7 @@ #define STUB_START stub_start #define STUB_CODE STUB_START #define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE) -#define STUB_DATA_PAGES 1 /* must be a power of two */ +#define STUB_DATA_PAGES 2 /* must be a power of two */ #define STUB_END (STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE) #ifndef __ASSEMBLY__ diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 96195483fbd0..579ed946a3a9 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* for use by sys-$SUBARCH/kernel-offsets.c */ -#include <stub-data.h> DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); @@ -30,7 +29,3 @@ DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT); DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT); #endif -/* for stub */ -DEFINE(UML_STUB_FIELD_OFFSET, offsetof(struct stub_data, offset)); -DEFINE(UML_STUB_FIELD_CHILD_ERR, offsetof(struct stub_data, child_err)); -DEFINE(UML_STUB_FIELD_FD, offsetof(struct stub_data, fd)); diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index 95521b1f5b20..d8ffd2db168e 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -13,7 +13,6 @@ struct siginfo; extern int uml_exitcode; -extern int ncpus; extern int kmalloc_ok; #define UML_ROUND_UP(addr) \ diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index aff8906304ea..9a039d6f1f74 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -163,8 +163,10 @@ extern int os_set_fd_block(int fd, int blocking); extern int os_accept_connection(int fd); extern int os_create_unix_socket(const char *file, int len, int close_on_exec); extern int os_shutdown_socket(int fd, int r, int w); +extern int os_dup_file(int fd); extern void os_close_file(int fd); -extern int os_rcv_fd(int fd, int *helper_pid_out); +ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds, + void *data, size_t data_len); extern int os_connect_socket(const char *name); extern int os_file_type(char *file); extern int os_file_mode(const char *file, struct openflags *mode_out); @@ -179,6 +181,8 @@ extern int os_eventfd(unsigned int initval, int flags); extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds, unsigned int fds_num); int os_poll(unsigned int n, const int *fds); +void *os_mmap_rw_shared(int fd, size_t size); +void *os_mremap_rw_shared(void *old_addr, size_t old_size, size_t new_size); /* start_up.c */ extern void os_early_checks(void); @@ -191,6 +195,9 @@ extern void get_host_cpu_features( /* mem.c */ extern int create_mem_file(unsigned long long len); +/* tlb.c */ +extern void report_enomem(void); + /* process.c */ extern unsigned long os_process_pc(int pid); extern int os_process_parent(int pid); @@ -268,24 +275,20 @@ extern long long os_persistent_clock_emulation(void); extern long long os_nsecs(void); /* skas/mem.c */ -extern long run_syscall_stub(struct mm_id * mm_idp, - int syscall, unsigned long *args, long expected, - void **addr, int done); -extern long syscall_stub_data(struct mm_id * mm_idp, - unsigned long *data, int data_count, - void **addr, void **stub_addr); -extern int map(struct mm_id * mm_idp, unsigned long virt, - unsigned long len, int prot, int phys_fd, - unsigned long long offset, int done, void **data); -extern int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len, - int done, void **data); -extern int protect(struct mm_id * mm_idp, unsigned long addr, - unsigned long len, unsigned int prot, int done, void **data); +int syscall_stub_flush(struct mm_id *mm_idp); +struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp); +void syscall_stub_dump_error(struct mm_id *mm_idp); + +int map(struct mm_id *mm_idp, unsigned long virt, + unsigned long len, int prot, int phys_fd, + unsigned long long offset); +int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len); +int protect(struct mm_id *mm_idp, unsigned long addr, + unsigned long len, unsigned int prot); /* skas/process.c */ extern int is_skas_winch(int pid, int fd, void *data); extern int start_userspace(unsigned long stub_stack); -extern int copy_context_skas0(unsigned long stack, int pid); extern void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs); extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)); extern void switch_threads(jmp_buf *me, jmp_buf *you); diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h index 92dbf727e384..1e76ba40feba 100644 --- a/arch/um/include/shared/skas/mm_id.h +++ b/arch/um/include/shared/skas/mm_id.h @@ -12,7 +12,7 @@ struct mm_id { int pid; } u; unsigned long stack; - int kill; + int syscall_data_len; }; void __switch_mm(struct mm_id *mm_idp); diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h index c93d2cbc8f32..ebaa116de30b 100644 --- a/arch/um/include/shared/skas/skas.h +++ b/arch/um/include/shared/skas/skas.h @@ -15,5 +15,7 @@ extern void new_thread_handler(void); extern void handle_syscall(struct uml_pt_regs *regs); extern long execute_syscall_skas(void *r); extern unsigned long current_stub_stack(void); +extern struct mm_id *current_mm_id(void); +extern void current_mm_sync(void); #endif diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h index 5e3ade3fb38b..2b6b44759dfa 100644 --- a/arch/um/include/shared/skas/stub-data.h +++ b/arch/um/include/shared/skas/stub-data.h @@ -8,10 +8,42 @@ #ifndef __STUB_DATA_H #define __STUB_DATA_H +#include <linux/compiler_types.h> +#include <as-layout.h> +#include <sysdep/tls.h> + +#define STUB_NEXT_SYSCALL(s) \ + ((struct stub_syscall *) (((unsigned long) s) + (s)->cmd_len)) + +enum stub_syscall_type { + STUB_SYSCALL_UNSET = 0, + STUB_SYSCALL_MMAP, + STUB_SYSCALL_MUNMAP, + STUB_SYSCALL_MPROTECT, +}; + +struct stub_syscall { + struct { + unsigned long addr; + unsigned long length; + unsigned long offset; + int fd; + int prot; + } mem; + + enum stub_syscall_type syscall; +}; + struct stub_data { unsigned long offset; - int fd; - long parent_err, child_err; + long err, child_err; + + int syscall_data_len; + /* 128 leaves enough room for additional fields in the struct */ + struct stub_syscall syscall_data[(UM_KERN_PAGE_SIZE - 128) / sizeof(struct stub_syscall)] __aligned(16); + + /* Stack for our signal handlers and for calling into . */ + unsigned char sigstack[UM_KERN_PAGE_SIZE] __aligned(UM_KERN_PAGE_SIZE); }; #endif diff --git a/arch/um/include/shared/timetravel.h b/arch/um/include/shared/timetravel.h index e5c3d69f1b69..c8db2f213dba 100644 --- a/arch/um/include/shared/timetravel.h +++ b/arch/um/include/shared/timetravel.h @@ -15,8 +15,17 @@ enum time_travel_mode { #if defined(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT) || \ defined(CONFIG_UML_TIME_TRAVEL_SUPPORT) extern enum time_travel_mode time_travel_mode; +extern int time_travel_should_print_bc_msg; #else #define time_travel_mode TT_MODE_OFF +#define time_travel_should_print_bc_msg 0 #endif /* (UML_)CONFIG_UML_TIME_TRAVEL_SUPPORT */ +void _time_travel_print_bc_msg(void); +static inline void time_travel_print_bc_msg(void) +{ + if (time_travel_should_print_bc_msg) + _time_travel_print_bc_msg(); +} + #endif /* _UM_TIME_TRAVEL_H_ */ diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h index 326e52450e41..bbab79c0c074 100644 --- a/arch/um/include/shared/user.h +++ b/arch/um/include/shared/user.h @@ -42,11 +42,19 @@ extern void panic(const char *fmt, ...) #define printk(...) _printk(__VA_ARGS__) extern int _printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); +extern void print_hex_dump(const char *level, const char *prefix_str, + int prefix_type, int rowsize, int groupsize, + const void *buf, size_t len, _Bool ascii); #else static inline int printk(const char *fmt, ...) { return 0; } +static inline void print_hex_dump(const char *level, const char *prefix_str, + int prefix_type, int rowsize, int groupsize, + const void *buf, size_t len, _Bool ascii) +{ +} #endif extern int in_aton(char *str); diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 827a0d3fa589..2c15bb2c104c 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -22,17 +22,8 @@ void flush_thread(void) { - void *data = NULL; - int ret; - arch_flush_thread(¤t->thread.arch); - ret = unmap(¤t->mm->context.id, 0, TASK_SIZE, 1, &data); - if (ret) { - printk(KERN_ERR "%s - clearing address space failed, err = %d\n", - __func__, ret); - force_sig(SIGKILL); - } get_safe_registers(current_pt_regs()->regs.gp, current_pt_regs()->regs.fp); diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 635d44606bfe..534e91797f89 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -37,7 +37,7 @@ struct irq_reg { bool pending; bool wakeup; #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT - bool pending_on_resume; + bool pending_event; void (*timetravel_handler)(int, int, void *, struct time_travel_event *); struct time_travel_event event; @@ -56,6 +56,9 @@ static DEFINE_SPINLOCK(irq_lock); static LIST_HEAD(active_fds); static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ); static bool irqs_suspended; +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +static bool irqs_pending; +#endif static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) { @@ -84,9 +87,12 @@ static void irq_event_handler(struct time_travel_event *ev) { struct irq_reg *reg = container_of(ev, struct irq_reg, event); - /* do nothing if suspended - just to cause a wakeup */ - if (irqs_suspended) + /* do nothing if suspended; just cause a wakeup and mark as pending */ + if (irqs_suspended) { + irqs_pending = true; + reg->pending_event = true; return; + } generic_handle_irq(reg->irq); } @@ -110,16 +116,47 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry, if (!reg->event.pending) return false; - if (irqs_suspended) - reg->pending_on_resume = true; return true; } + +static void irq_do_pending_events(bool timetravel_handlers_only) +{ + struct irq_entry *entry; + + if (!irqs_pending || timetravel_handlers_only) + return; + + irqs_pending = false; + + list_for_each_entry(entry, &active_fds, list) { + enum um_irq_type t; + + for (t = 0; t < NUM_IRQ_TYPES; t++) { + struct irq_reg *reg = &entry->reg[t]; + + /* + * Any timetravel_handler was invoked already, just + * directly run the IRQ. + */ + if (reg->pending_event) { + irq_enter(); + generic_handle_irq(reg->irq); + irq_exit(); + reg->pending_event = false; + } + } + } +} #else static bool irq_do_timetravel_handler(struct irq_entry *entry, enum um_irq_type t) { return false; } + +static void irq_do_pending_events(bool timetravel_handlers_only) +{ +} #endif static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t, @@ -145,6 +182,8 @@ static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type */ if (timetravel_handlers_only) { #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + reg->pending_event = true; + irqs_pending = true; mark_sigio_pending(); #endif return; @@ -162,6 +201,10 @@ static void _sigio_handler(struct uml_pt_regs *regs, if (timetravel_handlers_only && !um_irq_timetravel_handler_used()) return; + /* Flush out pending events that were ignored due to time-travel. */ + if (!irqs_suspended) + irq_do_pending_events(timetravel_handlers_only); + while (1) { /* This is now lockless - epoll keeps back-referencesto the irqs * which have trigger it so there is no need to walk the irq @@ -195,7 +238,9 @@ static void _sigio_handler(struct uml_pt_regs *regs, void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { + preempt_disable(); _sigio_handler(regs, irqs_suspended); + preempt_enable(); } static struct irq_entry *get_irq_entry_by_fd(int fd) @@ -543,30 +588,7 @@ void um_irqs_resume(void) unsigned long flags; - local_irq_save(flags); -#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT - /* - * We don't need to lock anything here since we're in resume - * and nothing else is running, but have disabled IRQs so we - * don't try anything else with the interrupt list from there. - */ - list_for_each_entry(entry, &active_fds, list) { - enum um_irq_type t; - - for (t = 0; t < NUM_IRQ_TYPES; t++) { - struct irq_reg *reg = &entry->reg[t]; - - if (reg->pending_on_resume) { - irq_enter(); - generic_handle_irq(reg->irq); - irq_exit(); - reg->pending_on_resume = false; - } - } - } -#endif - - spin_lock(&irq_lock); + spin_lock_irqsave(&irq_lock, flags); list_for_each_entry(entry, &active_fds, list) { if (entry->suspended) { int err = os_set_fd_async(entry->fd); diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index 3a85bde3e173..f2fb77da08cf 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(os_shutdown_socket); EXPORT_SYMBOL(os_create_unix_socket); EXPORT_SYMBOL(os_connect_socket); EXPORT_SYMBOL(os_accept_connection); -EXPORT_SYMBOL(os_rcv_fd); +EXPORT_SYMBOL(os_rcv_fd_msg); EXPORT_SYMBOL(run_helper); EXPORT_SYMBOL(os_major); EXPORT_SYMBOL(os_minor); diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index ca91accd64fc..a5b4fe2ad931 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -73,7 +73,6 @@ void __init mem_init(void) /* this will put all low memory onto the freelists */ memblock_free_all(); - max_low_pfn = totalram_pages(); max_pfn = max_low_pfn; kmalloc_ok = 1; } diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index d2134802f6a8..f36b63f53bab 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -122,8 +122,6 @@ void new_thread_handler(void) /* Called magically, see new_thread_handler above */ static void fork_handler(void) { - force_flush_all(); - schedule_tail(current->thread.prev_sched); /* @@ -237,73 +235,6 @@ int copy_from_user_proc(void *to, void __user *from, int size) return copy_from_user(to, from, size); } -static atomic_t using_sysemu = ATOMIC_INIT(0); -int sysemu_supported; - -static void set_using_sysemu(int value) -{ - if (value > sysemu_supported) - return; - atomic_set(&using_sysemu, value); -} - -static int get_using_sysemu(void) -{ - return atomic_read(&using_sysemu); -} - -static int sysemu_proc_show(struct seq_file *m, void *v) -{ - seq_printf(m, "%d\n", get_using_sysemu()); - return 0; -} - -static int sysemu_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, sysemu_proc_show, NULL); -} - -static ssize_t sysemu_proc_write(struct file *file, const char __user *buf, - size_t count, loff_t *pos) -{ - char tmp[2]; - - if (copy_from_user(tmp, buf, 1)) - return -EFAULT; - - if (tmp[0] >= '0' && tmp[0] <= '2') - set_using_sysemu(tmp[0] - '0'); - /* We use the first char, but pretend to write everything */ - return count; -} - -static const struct proc_ops sysemu_proc_ops = { - .proc_open = sysemu_proc_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = single_release, - .proc_write = sysemu_proc_write, -}; - -static int __init make_proc_sysemu(void) -{ - struct proc_dir_entry *ent; - if (!sysemu_supported) - return 0; - - ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_ops); - - if (ent == NULL) - { - printk(KERN_WARNING "Failed to register /proc/sysemu\n"); - return 0; - } - - return 0; -} - -late_initcall(make_proc_sysemu); - int singlestepping(void) { return test_thread_flag(TIF_SINGLESTEP); diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 25840eee1068..3736bca626ba 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -59,3 +59,18 @@ void machine_halt(void) { machine_power_off(); } + +static int sys_power_off_handler(struct sys_off_data *data) +{ + machine_power_off(); + return 0; +} + +static int register_power_off(void) +{ + register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_DEFAULT, + sys_power_off_handler, NULL); + return 0; +} +__initcall(register_power_off); diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index f93972a25765..6f86d53e3d69 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -3,15 +3,14 @@ # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) # -obj-y := clone.o mmu.o process.o syscall.o uaccess.o +obj-y := stub.o mmu.o process.o syscall.o uaccess.o -# clone.o is in the stub, so it can't be built with profiling +# stub.o is in the stub, so it can't be built with profiling # GCC hardened also auto-enables -fpic, but we need %ebx so it can't work -> # disable it -CFLAGS_clone.o := $(CFLAGS_NO_HARDENING) -UNPROFILE_OBJS := clone.o - +CFLAGS_stub.o := $(CFLAGS_NO_HARDENING) +UNPROFILE_OBJS := stub.o KCOV_INSTRUMENT := n include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c deleted file mode 100644 index 62435187dda4..000000000000 --- a/arch/um/kernel/skas/clone.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) - * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <signal.h> -#include <sched.h> -#include <asm/unistd.h> -#include <sys/time.h> -#include <as-layout.h> -#include <ptrace_user.h> -#include <stub-data.h> -#include <sysdep/stub.h> - -/* - * This is in a separate file because it needs to be compiled with any - * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled - * - * Use UM_KERN_PAGE_SIZE instead of PAGE_SIZE because that calls getpagesize - * on some systems. - */ - -void __attribute__ ((__section__ (".__syscall_stub"))) -stub_clone_handler(void) -{ - struct stub_data *data = get_stub_data(); - long err; - - err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, - (unsigned long)data + - STUB_DATA_PAGES * UM_KERN_PAGE_SIZE / 2); - if (err) { - data->parent_err = err; - goto done; - } - - err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); - if (err) { - data->child_err = err; - goto done; - } - - remap_stack_and_trap(); - - done: - trap_myself(); -} diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index aeed1c2aaf3c..47f98d87ea3c 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -14,11 +14,14 @@ #include <as-layout.h> #include <os.h> #include <skas.h> +#include <stub-data.h> + +/* Ensure the stub_data struct covers the allocated area */ +static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE); int init_new_context(struct task_struct *task, struct mm_struct *mm) { - struct mm_context *from_mm = NULL; - struct mm_context *to_mm = &mm->context; + struct mm_id *new_id = &mm->context.id; unsigned long stack = 0; int ret = -ENOMEM; @@ -26,34 +29,46 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) if (stack == 0) goto out; - to_mm->id.stack = stack; - if (current->mm != NULL && current->mm != &init_mm) - from_mm = ¤t->mm->context; + new_id->stack = stack; block_signals_trace(); - if (from_mm) - to_mm->id.u.pid = copy_context_skas0(stack, - from_mm->id.u.pid); - else to_mm->id.u.pid = start_userspace(stack); + new_id->u.pid = start_userspace(stack); unblock_signals_trace(); - if (to_mm->id.u.pid < 0) { - ret = to_mm->id.u.pid; + if (new_id->u.pid < 0) { + ret = new_id->u.pid; goto out_free; } - ret = init_new_ldt(to_mm, from_mm); - if (ret < 0) { - printk(KERN_ERR "init_new_context_skas - init_ldt" - " failed, errno = %d\n", ret); - goto out_free; - } + /* + * Ensure the new MM is clean and nothing unwanted is mapped. + * + * TODO: We should clear the memory up to STUB_START to ensure there is + * nothing mapped there, i.e. we (currently) have: + * + * |- user memory -|- unused -|- stub -|- unused -| + * ^ TASK_SIZE ^ STUB_START + * + * Meaning we have two unused areas where we may still have valid + * mappings from our internal clone(). That isn't really a problem as + * userspace is not going to access them, but it is definitely not + * correct. + * + * However, we are "lucky" and if rseq is configured, then on 32 bit + * it will fall into the first empty range while on 64 bit it is going + * to use an anonymous mapping in the second range. As such, things + * continue to work for now as long as we don't start unmapping these + * areas. + * + * Change this to STUB_START once we have a clean userspace. + */ + unmap(new_id, 0, TASK_SIZE); return 0; out_free: - if (to_mm->id.stack != 0) - free_pages(to_mm->id.stack, ilog2(STUB_DATA_PAGES)); + if (new_id->stack != 0) + free_pages(new_id->stack, ilog2(STUB_DATA_PAGES)); out: return ret; } @@ -76,5 +91,4 @@ void destroy_context(struct mm_struct *mm) os_kill_ptraced_process(mmu->id.u.pid, 1); free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); - free_ldt(mmu); } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 99a5cbb36083..5f9c1c5f36e2 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -8,6 +8,8 @@ #include <linux/sched/task_stack.h> #include <linux/sched/task.h> +#include <asm/tlbflush.h> + #include <as-layout.h> #include <kern.h> #include <os.h> @@ -50,3 +52,19 @@ unsigned long current_stub_stack(void) return current->mm->context.id.stack; } + +struct mm_id *current_mm_id(void) +{ + if (current->mm == NULL) + return NULL; + + return ¤t->mm->context.id; +} + +void current_mm_sync(void) +{ + if (current->mm == NULL) + return; + + um_tlb_sync(current->mm); +} diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c new file mode 100644 index 000000000000..5d52ffa682dc --- /dev/null +++ b/arch/um/kernel/skas/stub.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net> + */ + +#include <sysdep/stub.h> + +static __always_inline int syscall_handler(struct stub_data *d) +{ + int i; + unsigned long res; + + for (i = 0; i < d->syscall_data_len; i++) { + struct stub_syscall *sc = &d->syscall_data[i]; + + switch (sc->syscall) { + case STUB_SYSCALL_MMAP: + res = stub_syscall6(STUB_MMAP_NR, + sc->mem.addr, sc->mem.length, + sc->mem.prot, + MAP_SHARED | MAP_FIXED, + sc->mem.fd, sc->mem.offset); + if (res != sc->mem.addr) { + d->err = res; + d->syscall_data_len = i; + return -1; + } + break; + case STUB_SYSCALL_MUNMAP: + res = stub_syscall2(__NR_munmap, + sc->mem.addr, sc->mem.length); + if (res) { + d->err = res; + d->syscall_data_len = i; + return -1; + } + break; + case STUB_SYSCALL_MPROTECT: + res = stub_syscall3(__NR_mprotect, + sc->mem.addr, sc->mem.length, + sc->mem.prot); + if (res) { + d->err = res; + d->syscall_data_len = i; + return -1; + } + break; + default: + d->err = -95; /* EOPNOTSUPP */ + d->syscall_data_len = i; + return -1; + } + } + + d->err = 0; + d->syscall_data_len = 0; + + return 0; +} + +void __section(".__syscall_stub") +stub_syscall_handler(void) +{ + struct stub_data *d = get_stub_data(); + + syscall_handler(d); + + trap_myself(); +} diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index a8bfe8be1526..47b9f5e63566 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -31,6 +31,7 @@ EXPORT_SYMBOL_GPL(time_travel_mode); static bool time_travel_start_set; static unsigned long long time_travel_start; static unsigned long long time_travel_time; +static unsigned long long time_travel_shm_offset; static LIST_HEAD(time_travel_events); static LIST_HEAD(time_travel_irqs); static unsigned long long time_travel_timer_interval; @@ -40,8 +41,11 @@ static int time_travel_ext_fd = -1; static unsigned int time_travel_ext_waiting; static bool time_travel_ext_prev_request_valid; static unsigned long long time_travel_ext_prev_request; -static bool time_travel_ext_free_until_valid; -static unsigned long long time_travel_ext_free_until; +static unsigned long long *time_travel_ext_free_until; +static unsigned long long _time_travel_ext_free_until; +static u16 time_travel_shm_id; +static struct um_timetravel_schedshm *time_travel_shm; +static union um_timetravel_schedshm_client *time_travel_shm_client; static void time_travel_set_time(unsigned long long ns) { @@ -58,8 +62,52 @@ enum time_travel_message_handling { TTMH_IDLE, TTMH_POLL, TTMH_READ, + TTMH_READ_START_ACK, }; +static u64 bc_message; +int time_travel_should_print_bc_msg; + +void _time_travel_print_bc_msg(void) +{ + time_travel_should_print_bc_msg = 0; + printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message); +} + +static void time_travel_setup_shm(int fd, u16 id) +{ + u32 len; + + time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm)); + + if (!time_travel_shm) + goto out; + + len = time_travel_shm->len; + + if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION || + len < struct_size(time_travel_shm, clients, id + 1)) { + os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm)); + time_travel_shm = NULL; + goto out; + } + + time_travel_shm = os_mremap_rw_shared(time_travel_shm, + sizeof(*time_travel_shm), + len); + if (!time_travel_shm) + goto out; + + time_travel_shm_offset = time_travel_shm->current_time; + time_travel_shm_client = &time_travel_shm->clients[id]; + time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE; + time_travel_shm_id = id; + /* always look at that free_until from now on */ + time_travel_ext_free_until = &time_travel_shm->free_until; +out: + os_close_file(fd); +} + static void time_travel_handle_message(struct um_timetravel_msg *msg, enum time_travel_message_handling mode) { @@ -80,7 +128,20 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg, } } - ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); + if (unlikely(mode == TTMH_READ_START_ACK)) { + int fd[UM_TIMETRAVEL_SHARED_MAX_FDS]; + + ret = os_rcv_fd_msg(time_travel_ext_fd, fd, + ARRAY_SIZE(fd), msg, sizeof(*msg)); + if (ret == sizeof(*msg)) { + time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD], + msg->time & UM_TIMETRAVEL_START_ACK_ID); + /* we don't use the logging for now */ + os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]); + } + } else { + ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); + } if (ret == 0) panic("time-travel external link is broken\n"); @@ -96,10 +157,24 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg, return; case UM_TIMETRAVEL_RUN: time_travel_set_time(msg->time); + if (time_travel_shm) { + /* no request right now since we're running */ + time_travel_shm_client->flags &= + ~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN; + /* no ack for shared memory RUN */ + return; + } break; case UM_TIMETRAVEL_FREE_UNTIL: - time_travel_ext_free_until_valid = true; - time_travel_ext_free_until = msg->time; + /* not supposed to get this with shm, but ignore it */ + if (time_travel_shm) + break; + time_travel_ext_free_until = &_time_travel_ext_free_until; + _time_travel_ext_free_until = msg->time; + break; + case UM_TIMETRAVEL_BROADCAST: + bc_message = msg->time; + time_travel_should_print_bc_msg = 1; break; } @@ -136,8 +211,15 @@ static u64 time_travel_ext_req(u32 op, u64 time) block_signals_hard(); os_write_file(time_travel_ext_fd, &msg, sizeof(msg)); + /* no ACK expected for WAIT in shared memory mode */ + if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm) + goto done; + while (msg.op != UM_TIMETRAVEL_ACK) - time_travel_handle_message(&msg, TTMH_READ); + time_travel_handle_message(&msg, + op == UM_TIMETRAVEL_START ? + TTMH_READ_START_ACK : + TTMH_READ); if (msg.seq != mseq) panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n", @@ -145,6 +227,7 @@ static u64 time_travel_ext_req(u32 op, u64 time) if (op == UM_TIMETRAVEL_GET) time_travel_set_time(msg.time); +done: unblock_signals_hard(); return msg.time; @@ -180,13 +263,33 @@ static void time_travel_ext_update_request(unsigned long long time) /* * if we're running and are allowed to run past the request * then we don't need to update it either + * + * Note for shm we ignore FREE_UNTIL messages and leave the pointer + * to shared memory, and for non-shm the offset is 0. */ - if (!time_travel_ext_waiting && time_travel_ext_free_until_valid && - time < time_travel_ext_free_until) + if (!time_travel_ext_waiting && time_travel_ext_free_until && + time < (*time_travel_ext_free_until - time_travel_shm_offset)) return; time_travel_ext_prev_request = time; time_travel_ext_prev_request_valid = true; + + if (time_travel_shm) { + union um_timetravel_schedshm_client *running; + + running = &time_travel_shm->clients[time_travel_shm->running_id]; + + if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) { + time_travel_shm_client->flags |= + UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN; + time += time_travel_shm_offset; + time_travel_shm_client->req_time = time; + if (time < time_travel_shm->free_until) + time_travel_shm->free_until = time; + return; + } + } + time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time); } @@ -194,6 +297,14 @@ void __time_travel_propagate_time(void) { static unsigned long long last_propagated; + if (time_travel_shm) { + if (time_travel_shm->running_id != time_travel_shm_id) + panic("time-travel: setting time while not running\n"); + time_travel_shm->current_time = time_travel_time + + time_travel_shm_offset; + return; + } + if (last_propagated == time_travel_time) return; @@ -209,9 +320,12 @@ static bool time_travel_ext_request(unsigned long long time) * If we received an external sync point ("free until") then we * don't have to request/wait for anything until then, unless * we're already waiting. + * + * Note for shm we ignore FREE_UNTIL messages and leave the pointer + * to shared memory, and for non-shm the offset is 0. */ - if (!time_travel_ext_waiting && time_travel_ext_free_until_valid && - time < time_travel_ext_free_until) + if (!time_travel_ext_waiting && time_travel_ext_free_until && + time < (*time_travel_ext_free_until - time_travel_shm_offset)) return false; time_travel_ext_update_request(time); @@ -225,7 +339,8 @@ static void time_travel_ext_wait(bool idle) }; time_travel_ext_prev_request_valid = false; - time_travel_ext_free_until_valid = false; + if (!time_travel_shm) + time_travel_ext_free_until = NULL; time_travel_ext_waiting++; time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1); @@ -248,7 +363,11 @@ static void time_travel_ext_wait(bool idle) static void time_travel_ext_get_time(void) { - time_travel_ext_req(UM_TIMETRAVEL_GET, -1); + if (time_travel_shm) + time_travel_set_time(time_travel_shm->current_time - + time_travel_shm_offset); + else + time_travel_ext_req(UM_TIMETRAVEL_GET, -1); } static void __time_travel_update_time(unsigned long long ns, bool idle) @@ -875,9 +994,49 @@ static int setup_time_travel_start(char *str) return 1; } -__setup("time-travel-start", setup_time_travel_start); +__setup("time-travel-start=", setup_time_travel_start); __uml_help(setup_time_travel_start, -"time-travel-start=<seconds>\n" +"time-travel-start=<nanoseconds>\n" "Configure the UML instance's wall clock to start at this value rather than\n" "the host's wall clock at the time of UML boot.\n"); +static struct kobject *bc_time_kobject; + +static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "0x%llx", bc_message); +} + +static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret; + u64 user_bc_message; + + ret = kstrtou64(buf, 0, &user_bc_message); + if (ret) + return ret; + + bc_message = user_bc_message; + + time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message); + pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message); + return count; +} + +static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store); + +static int __init um_bc_start(void) +{ + if (time_travel_mode != TT_MODE_EXTERNAL) + return 0; + + bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj); + if (!bc_time_kobject) + return 0; + + if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr)) + pr_debug("failed to create the bc file in /sys/kernel/um_time"); + + return 0; +} +late_initcall(um_bc_start); #endif diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 8784f03fa4a6..44c6fc697f3a 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -15,209 +15,54 @@ #include <skas.h> #include <kern_util.h> -struct host_vm_change { - struct host_vm_op { - enum { NONE, MMAP, MUNMAP, MPROTECT } type; - union { - struct { - unsigned long addr; - unsigned long len; - unsigned int prot; - int fd; - __u64 offset; - } mmap; - struct { - unsigned long addr; - unsigned long len; - } munmap; - struct { - unsigned long addr; - unsigned long len; - unsigned int prot; - } mprotect; - } u; - } ops[1]; - int userspace; - int index; - struct mm_struct *mm; - void *data; - int force; +struct vm_ops { + struct mm_id *mm_idp; + + int (*mmap)(struct mm_id *mm_idp, + unsigned long virt, unsigned long len, int prot, + int phys_fd, unsigned long long offset); + int (*unmap)(struct mm_id *mm_idp, + unsigned long virt, unsigned long len); + int (*mprotect)(struct mm_id *mm_idp, + unsigned long virt, unsigned long len, + unsigned int prot); }; -#define INIT_HVC(mm, force, userspace) \ - ((struct host_vm_change) \ - { .ops = { { .type = NONE } }, \ - .mm = mm, \ - .data = NULL, \ - .userspace = userspace, \ - .index = 0, \ - .force = force }) - -static void report_enomem(void) +static int kern_map(struct mm_id *mm_idp, + unsigned long virt, unsigned long len, int prot, + int phys_fd, unsigned long long offset) { - printk(KERN_ERR "UML ran out of memory on the host side! " - "This can happen due to a memory limitation or " - "vm.max_map_count has been reached.\n"); -} - -static int do_ops(struct host_vm_change *hvc, int end, - int finished) -{ - struct host_vm_op *op; - int i, ret = 0; - - for (i = 0; i < end && !ret; i++) { - op = &hvc->ops[i]; - switch (op->type) { - case MMAP: - if (hvc->userspace) - ret = map(&hvc->mm->context.id, op->u.mmap.addr, - op->u.mmap.len, op->u.mmap.prot, - op->u.mmap.fd, - op->u.mmap.offset, finished, - &hvc->data); - else - map_memory(op->u.mmap.addr, op->u.mmap.offset, - op->u.mmap.len, 1, 1, 1); - break; - case MUNMAP: - if (hvc->userspace) - ret = unmap(&hvc->mm->context.id, - op->u.munmap.addr, - op->u.munmap.len, finished, - &hvc->data); - else - ret = os_unmap_memory( - (void *) op->u.munmap.addr, - op->u.munmap.len); - - break; - case MPROTECT: - if (hvc->userspace) - ret = protect(&hvc->mm->context.id, - op->u.mprotect.addr, - op->u.mprotect.len, - op->u.mprotect.prot, - finished, &hvc->data); - else - ret = os_protect_memory( - (void *) op->u.mprotect.addr, - op->u.mprotect.len, - 1, 1, 1); - break; - default: - printk(KERN_ERR "Unknown op type %d in do_ops\n", - op->type); - BUG(); - break; - } - } - - if (ret == -ENOMEM) - report_enomem(); - - return ret; + /* TODO: Why is executable needed to be always set in the kernel? */ + return os_map_memory((void *)virt, phys_fd, offset, len, + prot & UM_PROT_READ, prot & UM_PROT_WRITE, + 1); } -static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, - unsigned int prot, struct host_vm_change *hvc) +static int kern_unmap(struct mm_id *mm_idp, + unsigned long virt, unsigned long len) { - __u64 offset; - struct host_vm_op *last; - int fd = -1, ret = 0; - - if (hvc->userspace) - fd = phys_mapping(phys, &offset); - else - offset = phys; - if (hvc->index != 0) { - last = &hvc->ops[hvc->index - 1]; - if ((last->type == MMAP) && - (last->u.mmap.addr + last->u.mmap.len == virt) && - (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) && - (last->u.mmap.offset + last->u.mmap.len == offset)) { - last->u.mmap.len += len; - return 0; - } - } - - if (hvc->index == ARRAY_SIZE(hvc->ops)) { - ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); - hvc->index = 0; - } - - hvc->ops[hvc->index++] = ((struct host_vm_op) - { .type = MMAP, - .u = { .mmap = { .addr = virt, - .len = len, - .prot = prot, - .fd = fd, - .offset = offset } - } }); - return ret; + return os_unmap_memory((void *)virt, len); } -static int add_munmap(unsigned long addr, unsigned long len, - struct host_vm_change *hvc) +static int kern_mprotect(struct mm_id *mm_idp, + unsigned long virt, unsigned long len, + unsigned int prot) { - struct host_vm_op *last; - int ret = 0; - - if (hvc->index != 0) { - last = &hvc->ops[hvc->index - 1]; - if ((last->type == MUNMAP) && - (last->u.munmap.addr + last->u.mmap.len == addr)) { - last->u.munmap.len += len; - return 0; - } - } - - if (hvc->index == ARRAY_SIZE(hvc->ops)) { - ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); - hvc->index = 0; - } - - hvc->ops[hvc->index++] = ((struct host_vm_op) - { .type = MUNMAP, - .u = { .munmap = { .addr = addr, - .len = len } } }); - return ret; + return os_protect_memory((void *)virt, len, + prot & UM_PROT_READ, prot & UM_PROT_WRITE, + 1); } -static int add_mprotect(unsigned long addr, unsigned long len, - unsigned int prot, struct host_vm_change *hvc) +void report_enomem(void) { - struct host_vm_op *last; - int ret = 0; - - if (hvc->index != 0) { - last = &hvc->ops[hvc->index - 1]; - if ((last->type == MPROTECT) && - (last->u.mprotect.addr + last->u.mprotect.len == addr) && - (last->u.mprotect.prot == prot)) { - last->u.mprotect.len += len; - return 0; - } - } - - if (hvc->index == ARRAY_SIZE(hvc->ops)) { - ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); - hvc->index = 0; - } - - hvc->ops[hvc->index++] = ((struct host_vm_op) - { .type = MPROTECT, - .u = { .mprotect = { .addr = addr, - .len = len, - .prot = prot } } }); - return ret; + printk(KERN_ERR "UML ran out of memory on the host side! " + "This can happen due to a memory limitation or " + "vm.max_map_count has been reached.\n"); } -#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) - static inline int update_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - struct host_vm_change *hvc) + struct vm_ops *ops) { pte_t *pte; int r, w, x, prot, ret = 0; @@ -235,15 +80,22 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr, prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | (x ? UM_PROT_EXEC : 0)); - if (hvc->force || pte_newpage(*pte)) { + if (pte_newpage(*pte)) { if (pte_present(*pte)) { - if (pte_newpage(*pte)) - ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, - PAGE_SIZE, prot, hvc); + if (pte_newpage(*pte)) { + __u64 offset; + unsigned long phys = + pte_val(*pte) & PAGE_MASK; + int fd = phys_mapping(phys, &offset); + + ret = ops->mmap(ops->mm_idp, addr, + PAGE_SIZE, prot, fd, + offset); + } } else - ret = add_munmap(addr, PAGE_SIZE, hvc); + ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); } else if (pte_newprot(*pte)) - ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); + ret = ops->mprotect(ops->mm_idp, addr, PAGE_SIZE, prot); *pte = pte_mkuptodate(*pte); } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); return ret; @@ -251,7 +103,7 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr, static inline int update_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, - struct host_vm_change *hvc) + struct vm_ops *ops) { pmd_t *pmd; unsigned long next; @@ -261,19 +113,20 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr, do { next = pmd_addr_end(addr, end); if (!pmd_present(*pmd)) { - if (hvc->force || pmd_newpage(*pmd)) { - ret = add_munmap(addr, next - addr, hvc); + if (pmd_newpage(*pmd)) { + ret = ops->unmap(ops->mm_idp, addr, + next - addr); pmd_mkuptodate(*pmd); } } - else ret = update_pte_range(pmd, addr, next, hvc); + else ret = update_pte_range(pmd, addr, next, ops); } while (pmd++, addr = next, ((addr < end) && !ret)); return ret; } static inline int update_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, - struct host_vm_change *hvc) + struct vm_ops *ops) { pud_t *pud; unsigned long next; @@ -283,19 +136,20 @@ static inline int update_pud_range(p4d_t *p4d, unsigned long addr, do { next = pud_addr_end(addr, end); if (!pud_present(*pud)) { - if (hvc->force || pud_newpage(*pud)) { - ret = add_munmap(addr, next - addr, hvc); + if (pud_newpage(*pud)) { + ret = ops->unmap(ops->mm_idp, addr, + next - addr); pud_mkuptodate(*pud); } } - else ret = update_pmd_range(pud, addr, next, hvc); + else ret = update_pmd_range(pud, addr, next, ops); } while (pud++, addr = next, ((addr < end) && !ret)); return ret; } static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, - struct host_vm_change *hvc) + struct vm_ops *ops) { p4d_t *p4d; unsigned long next; @@ -305,227 +159,59 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, do { next = p4d_addr_end(addr, end); if (!p4d_present(*p4d)) { - if (hvc->force || p4d_newpage(*p4d)) { - ret = add_munmap(addr, next - addr, hvc); + if (p4d_newpage(*p4d)) { + ret = ops->unmap(ops->mm_idp, addr, + next - addr); p4d_mkuptodate(*p4d); } } else - ret = update_pud_range(p4d, addr, next, hvc); + ret = update_pud_range(p4d, addr, next, ops); } while (p4d++, addr = next, ((addr < end) && !ret)); return ret; } -static void fix_range_common(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr, int force) +int um_tlb_sync(struct mm_struct *mm) { pgd_t *pgd; - struct host_vm_change hvc; - unsigned long addr = start_addr, next; - int ret = 0, userspace = 1; + struct vm_ops ops; + unsigned long addr = mm->context.sync_tlb_range_from, next; + int ret = 0; + + if (mm->context.sync_tlb_range_to == 0) + return 0; + + ops.mm_idp = &mm->context.id; + if (mm == &init_mm) { + ops.mmap = kern_map; + ops.unmap = kern_unmap; + ops.mprotect = kern_mprotect; + } else { + ops.mmap = map; + ops.unmap = unmap; + ops.mprotect = protect; + } - hvc = INIT_HVC(mm, force, userspace); pgd = pgd_offset(mm, addr); do { - next = pgd_addr_end(addr, end_addr); + next = pgd_addr_end(addr, mm->context.sync_tlb_range_to); if (!pgd_present(*pgd)) { - if (force || pgd_newpage(*pgd)) { - ret = add_munmap(addr, next - addr, &hvc); + if (pgd_newpage(*pgd)) { + ret = ops.unmap(ops.mm_idp, addr, + next - addr); pgd_mkuptodate(*pgd); } } else - ret = update_p4d_range(pgd, addr, next, &hvc); - } while (pgd++, addr = next, ((addr < end_addr) && !ret)); + ret = update_p4d_range(pgd, addr, next, &ops); + } while (pgd++, addr = next, + ((addr < mm->context.sync_tlb_range_to) && !ret)); - if (!ret) - ret = do_ops(&hvc, hvc.index, 1); - - /* This is not an else because ret is modified above */ - if (ret) { - struct mm_id *mm_idp = ¤t->mm->context.id; - - printk(KERN_ERR "fix_range_common: failed, killing current " - "process: %d\n", task_tgid_vnr(current)); - mm_idp->kill = 1; - } -} - -static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) -{ - struct mm_struct *mm; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - unsigned long addr, last; - int updated = 0, err = 0, force = 0, userspace = 0; - struct host_vm_change hvc; - - mm = &init_mm; - hvc = INIT_HVC(mm, force, userspace); - for (addr = start; addr < end;) { - pgd = pgd_offset(mm, addr); - if (!pgd_present(*pgd)) { - last = ADD_ROUND(addr, PGDIR_SIZE); - if (last > end) - last = end; - if (pgd_newpage(*pgd)) { - updated = 1; - err = add_munmap(addr, last - addr, &hvc); - if (err < 0) - panic("munmap failed, errno = %d\n", - -err); - } - addr = last; - continue; - } - - p4d = p4d_offset(pgd, addr); - if (!p4d_present(*p4d)) { - last = ADD_ROUND(addr, P4D_SIZE); - if (last > end) - last = end; - if (p4d_newpage(*p4d)) { - updated = 1; - err = add_munmap(addr, last - addr, &hvc); - if (err < 0) - panic("munmap failed, errno = %d\n", - -err); - } - addr = last; - continue; - } - - pud = pud_offset(p4d, addr); - if (!pud_present(*pud)) { - last = ADD_ROUND(addr, PUD_SIZE); - if (last > end) - last = end; - if (pud_newpage(*pud)) { - updated = 1; - err = add_munmap(addr, last - addr, &hvc); - if (err < 0) - panic("munmap failed, errno = %d\n", - -err); - } - addr = last; - continue; - } - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - last = ADD_ROUND(addr, PMD_SIZE); - if (last > end) - last = end; - if (pmd_newpage(*pmd)) { - updated = 1; - err = add_munmap(addr, last - addr, &hvc); - if (err < 0) - panic("munmap failed, errno = %d\n", - -err); - } - addr = last; - continue; - } - - pte = pte_offset_kernel(pmd, addr); - if (!pte_present(*pte) || pte_newpage(*pte)) { - updated = 1; - err = add_munmap(addr, PAGE_SIZE, &hvc); - if (err < 0) - panic("munmap failed, errno = %d\n", - -err); - if (pte_present(*pte)) - err = add_mmap(addr, pte_val(*pte) & PAGE_MASK, - PAGE_SIZE, 0, &hvc); - } - else if (pte_newprot(*pte)) { - updated = 1; - err = add_mprotect(addr, PAGE_SIZE, 0, &hvc); - } - addr += PAGE_SIZE; - } - if (!err) - err = do_ops(&hvc, hvc.index, 1); - - if (err < 0) - panic("flush_tlb_kernel failed, errno = %d\n", err); - return updated; -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) -{ - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - struct mm_struct *mm = vma->vm_mm; - void *flush = NULL; - int r, w, x, prot, err = 0; - struct mm_id *mm_id; - - address &= PAGE_MASK; - - pgd = pgd_offset(mm, address); - if (!pgd_present(*pgd)) - goto kill; - - p4d = p4d_offset(pgd, address); - if (!p4d_present(*p4d)) - goto kill; - - pud = pud_offset(p4d, address); - if (!pud_present(*pud)) - goto kill; - - pmd = pmd_offset(pud, address); - if (!pmd_present(*pmd)) - goto kill; - - pte = pte_offset_kernel(pmd, address); - - r = pte_read(*pte); - w = pte_write(*pte); - x = pte_exec(*pte); - if (!pte_young(*pte)) { - r = 0; - w = 0; - } else if (!pte_dirty(*pte)) { - w = 0; - } - - mm_id = &mm->context.id; - prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | - (x ? UM_PROT_EXEC : 0)); - if (pte_newpage(*pte)) { - if (pte_present(*pte)) { - unsigned long long offset; - int fd; - - fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset); - err = map(mm_id, address, PAGE_SIZE, prot, fd, offset, - 1, &flush); - } - else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush); - } - else if (pte_newprot(*pte)) - err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush); - - if (err) { - if (err == -ENOMEM) - report_enomem(); - - goto kill; - } - - *pte = pte_mkuptodate(*pte); + if (ret == -ENOMEM) + report_enomem(); - return; + mm->context.sync_tlb_range_from = 0; + mm->context.sync_tlb_range_to = 0; -kill: - printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address); - force_sig(SIGKILL); + return ret; } void flush_tlb_all(void) @@ -540,60 +226,11 @@ void flush_tlb_all(void) flush_tlb_mm(current->mm); } -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - flush_tlb_kernel_range_common(start, end); -} - -void flush_tlb_kernel_vm(void) -{ - flush_tlb_kernel_range_common(start_vm, end_vm); -} - -void __flush_tlb_one(unsigned long addr) -{ - flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE); -} - -static void fix_range(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr, int force) -{ - /* - * Don't bother flushing if this address space is about to be - * destroyed. - */ - if (atomic_read(&mm->mm_users) == 0) - return; - - fix_range_common(mm, start_addr, end_addr, force); -} - -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - if (vma->vm_mm == NULL) - flush_tlb_kernel_range_common(start, end); - else fix_range(vma->vm_mm, start, end, 0); -} -EXPORT_SYMBOL(flush_tlb_range); - void flush_tlb_mm(struct mm_struct *mm) { struct vm_area_struct *vma; VMA_ITERATOR(vmi, mm, 0); for_each_vma(vmi, vma) - fix_range(mm, vma->vm_start, vma->vm_end, 0); -} - -void force_flush_all(void) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, 0); - - mmap_read_lock(mm); - for_each_vma(vmi, vma) - fix_range(mm, vma->vm_start, vma->vm_end, 1); - mmap_read_unlock(mm); + um_tlb_mark_sync(mm, vma->vm_start, vma->vm_end); } diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 6d8ae86ae978..97c8df9c4401 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -113,7 +113,7 @@ good_area: #if 0 WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); #endif - flush_tlb_page(vma, address); + out: mmap_read_unlock(mm); out_nosemaphore: @@ -210,8 +210,17 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, if (!is_user && regs) current->thread.segv_regs = container_of(regs, struct pt_regs, regs); - if (!is_user && (address >= start_vm) && (address < end_vm)) { - flush_tlb_kernel_vm(); + if (!is_user && init_mm.context.sync_tlb_range_to) { + /* + * Kernel has pending updates from set_ptes that were not + * flushed yet. Syncing them should fix the pagefault (if not + * we'll get here again and panic). + */ + err = um_tlb_sync(&init_mm); + if (err == -ENOMEM) + report_enomem(); + if (err) + panic("Failed to sync kernel TLBs: %d", err); goto out; } else if (current->mm == NULL) { diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index e95f805e5004..8e594cda6d77 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -126,9 +126,6 @@ unsigned long uml_reserved; /* Also modified in mem_init */ unsigned long start_vm; unsigned long end_vm; -/* Set in uml_ncpus_setup */ -int ncpus = 1; - /* Set in early boot */ static int have_root __initdata; static int have_console __initdata; diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index fc4450db59bd..5adf8f630049 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -17,6 +17,7 @@ #include <sys/stat.h> #include <sys/sysmacros.h> #include <sys/un.h> +#include <sys/mman.h> #include <sys/types.h> #include <sys/eventfd.h> #include <poll.h> @@ -240,6 +241,16 @@ out: return err; } +int os_dup_file(int fd) +{ + int new_fd = dup(fd); + + if (new_fd < 0) + return -errno; + + return new_fd; +} + void os_close_file(int fd) { close(fd); @@ -502,44 +513,47 @@ int os_shutdown_socket(int fd, int r, int w) return 0; } -int os_rcv_fd(int fd, int *helper_pid_out) +/** + * os_rcv_fd_msg - receive message with (optional) FDs + * @fd: the FD to receive from + * @fds: the array for FDs to write to + * @n_fds: number of FDs to receive (@fds array size) + * @data: the message buffer + * @data_len: the size of the message to receive + * + * Receive a message with FDs. + * + * Returns: the size of the received message, or an error code + */ +ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds, + void *data, size_t data_len) { - int new, n; - char buf[CMSG_SPACE(sizeof(new))]; - struct msghdr msg; + char buf[CMSG_SPACE(sizeof(*fds) * n_fds)]; struct cmsghdr *cmsg; - struct iovec iov; - - msg.msg_name = NULL; - msg.msg_namelen = 0; - iov = ((struct iovec) { .iov_base = helper_pid_out, - .iov_len = sizeof(*helper_pid_out) }); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = buf; - msg.msg_controllen = sizeof(buf); - msg.msg_flags = 0; + struct iovec iov = { + .iov_base = data, + .iov_len = data_len, + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = buf, + .msg_controllen = sizeof(buf), + }; + int n; n = recvmsg(fd, &msg, 0); if (n < 0) return -errno; - else if (n != iov.iov_len) - *helper_pid_out = -1; cmsg = CMSG_FIRSTHDR(&msg); - if (cmsg == NULL) { - printk(UM_KERN_ERR "rcv_fd didn't receive anything, " - "error = %d\n", errno); - return -1; - } - if ((cmsg->cmsg_level != SOL_SOCKET) || - (cmsg->cmsg_type != SCM_RIGHTS)) { - printk(UM_KERN_ERR "rcv_fd didn't receive a descriptor\n"); - return -1; - } + if (!cmsg || + cmsg->cmsg_level != SOL_SOCKET || + cmsg->cmsg_type != SCM_RIGHTS) + return n; - new = ((int *) CMSG_DATA(cmsg))[0]; - return new; + memcpy(fds, CMSG_DATA(cmsg), cmsg->cmsg_len); + return n; } int os_create_unix_socket(const char *file, int len, int close_on_exec) @@ -705,3 +719,25 @@ int os_poll(unsigned int n, const int *fds) return -EIO; } + +void *os_mmap_rw_shared(int fd, size_t size) +{ + void *res = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (res == MAP_FAILED) + return NULL; + + return res; +} + +void *os_mremap_rw_shared(void *old_addr, size_t old_size, size_t new_size) +{ + void *res; + + res = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE, NULL); + + if (res == MAP_FAILED) + return NULL; + + return res; +} diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 787cfb9a0308..b11ed66c8bb0 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -8,6 +8,7 @@ #include <stdlib.h> #include <stdarg.h> +#include <stdbool.h> #include <errno.h> #include <signal.h> #include <string.h> @@ -65,9 +66,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) int signals_enabled; #ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT -static int signals_blocked; -#else -#define signals_blocked 0 +static int signals_blocked, signals_blocked_pending; #endif static unsigned int signals_pending; static unsigned int signals_active = 0; @@ -76,14 +75,27 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) { int enabled = signals_enabled; - if ((signals_blocked || !enabled) && (sig == SIGIO)) { +#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT + if ((signals_blocked || + __atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) && + (sig == SIGIO)) { + /* increment so unblock will do another round */ + __atomic_add_fetch(&signals_blocked_pending, 1, + __ATOMIC_SEQ_CST); + return; + } +#endif + + if (!enabled && (sig == SIGIO)) { /* * In TT_MODE_EXTERNAL, need to still call time-travel - * handlers unless signals are also blocked for the - * external time message processing. This will mark - * signals_pending by itself (only if necessary.) + * handlers. This will mark signals_pending by itself + * (only if necessary.) + * Note we won't get here if signals are hard-blocked + * (which is handled above), in that case the hard- + * unblock will handle things. */ - if (!signals_blocked && time_travel_mode == TT_MODE_EXTERNAL) + if (time_travel_mode == TT_MODE_EXTERNAL) sigio_run_timetravel_handlers(); else signals_pending |= SIGIO_MASK; @@ -380,33 +392,99 @@ int um_set_signals_trace(int enable) #ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT void mark_sigio_pending(void) { + /* + * It would seem that this should be atomic so + * it isn't a read-modify-write with a signal + * that could happen in the middle, losing the + * value set by the signal. + * + * However, this function is only called when in + * time-travel=ext simulation mode, in which case + * the only signal ever pending is SIGIO, which + * is blocked while this can be called, and the + * timer signal (SIGALRM) cannot happen. + */ signals_pending |= SIGIO_MASK; } void block_signals_hard(void) { - if (signals_blocked) - return; - signals_blocked = 1; + signals_blocked++; barrier(); } void unblock_signals_hard(void) { + static bool unblocking; + if (!signals_blocked) + panic("unblocking signals while not blocked"); + + if (--signals_blocked) return; - /* Must be set to 0 before we check the pending bits etc. */ - signals_blocked = 0; + /* + * Must be set to 0 before we check pending so the + * SIGIO handler will run as normal unless we're still + * going to process signals_blocked_pending. + */ barrier(); - if (signals_pending && signals_enabled) { - /* this is a bit inefficient, but that's not really important */ - block_signals(); - unblock_signals(); - } else if (signals_pending & SIGIO_MASK) { - /* we need to run time-travel handlers even if not enabled */ - sigio_run_timetravel_handlers(); + /* + * Note that block_signals_hard()/unblock_signals_hard() can be called + * within the unblock_signals()/sigio_run_timetravel_handlers() below. + * This would still be prone to race conditions since it's actually a + * call _within_ e.g. vu_req_read_message(), where we observed this + * issue, which loops. Thus, if the inner call handles the recorded + * pending signals, we can get out of the inner call with the real + * signal hander no longer blocked, and still have a race. Thus don't + * handle unblocking in the inner call, if it happens, but only in + * the outermost call - 'unblocking' serves as an ownership for the + * signals_blocked_pending decrement. + */ + if (unblocking) + return; + unblocking = true; + + while (__atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) { + if (signals_enabled) { + /* signals are enabled so we can touch this */ + signals_pending |= SIGIO_MASK; + /* + * this is a bit inefficient, but that's + * not really important + */ + block_signals(); + unblock_signals(); + } else { + /* + * we need to run time-travel handlers even + * if not enabled + */ + sigio_run_timetravel_handlers(); + } + + /* + * The decrement of signals_blocked_pending must be atomic so + * that the signal handler will either happen before or after + * the decrement, not during a read-modify-write: + * - If it happens before, it can increment it and we'll + * decrement it and do another round in the loop. + * - If it happens after it'll see 0 for both signals_blocked + * and signals_blocked_pending and thus run the handler as + * usual (subject to signals_enabled, but that's unrelated.) + * + * Note that a call to unblock_signals_hard() within the calls + * to unblock_signals() or sigio_run_timetravel_handlers() above + * will do nothing due to the 'unblocking' state, so this cannot + * underflow as the only one decrementing will be the outermost + * one. + */ + if (__atomic_sub_fetch(&signals_blocked_pending, 1, + __ATOMIC_SEQ_CST) < 0) + panic("signals_blocked_pending underflow"); } + + unblocking = false; } #endif diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index 1f9c1bffc3a6..c55430775efd 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* + * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net> * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) */ @@ -19,7 +20,30 @@ #include <sysdep/stub.h> #include "../internal.h" -extern char batch_syscall_stub[], __syscall_stub_start[]; +extern char __syscall_stub_start[]; + +void syscall_stub_dump_error(struct mm_id *mm_idp) +{ + struct stub_data *proc_data = (void *)mm_idp->stack; + struct stub_syscall *sc; + + if (proc_data->syscall_data_len < 0 || + proc_data->syscall_data_len >= ARRAY_SIZE(proc_data->syscall_data)) + panic("Syscall data was corrupted by stub (len is: %d, expected maximum: %d)!", + proc_data->syscall_data_len, + mm_idp->syscall_data_len); + + sc = &proc_data->syscall_data[proc_data->syscall_data_len]; + + printk(UM_KERN_ERR "%s : length = %d, last offset = %d", + __func__, mm_idp->syscall_data_len, + proc_data->syscall_data_len); + printk(UM_KERN_ERR "%s : stub syscall type %d failed, return value = 0x%lx\n", + __func__, sc->syscall, proc_data->err); + + print_hex_dump(UM_KERN_ERR, " syscall data: ", 0, + 16, 4, sc, sizeof(*sc), 0); +} static inline unsigned long *check_init_stack(struct mm_id * mm_idp, unsigned long *stack) @@ -36,22 +60,24 @@ static unsigned long syscall_regs[MAX_REG_NR]; static int __init init_syscall_regs(void) { get_safe_registers(syscall_regs, NULL); + syscall_regs[REGS_IP_INDEX] = STUB_CODE + - ((unsigned long) batch_syscall_stub - + ((unsigned long) stub_syscall_handler - (unsigned long) __syscall_stub_start); - syscall_regs[REGS_SP_INDEX] = STUB_DATA; + syscall_regs[REGS_SP_INDEX] = STUB_DATA + + offsetof(struct stub_data, sigstack) + + sizeof(((struct stub_data *) 0)->sigstack) - + sizeof(void *); return 0; } __initcall(init_syscall_regs); -static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr) +static inline long do_syscall_stub(struct mm_id *mm_idp) { + struct stub_data *proc_data = (void *)mm_idp->stack; int n, i; - long ret, offset; - unsigned long * data; - unsigned long * syscall; int err, pid = mm_idp->u.pid; n = ptrace_setregs(pid, syscall_regs); @@ -63,6 +89,9 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr) __func__, -n); } + /* Inform process how much we have filled in. */ + proc_data->syscall_data_len = mm_idp->syscall_data_len; + err = ptrace(PTRACE_CONT, pid, 0, 0); if (err) panic("Failed to continue stub, pid = %d, errno = %d\n", pid, @@ -71,135 +100,141 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr) wait_stub_done(pid); /* - * When the stub stops, we find the following values on the - * beginning of the stack: - * (long )return_value - * (long )offset to failed sycall-data (0, if no error) + * proc_data->err will be non-zero if there was an (unexpected) error. + * In that case, syscall_data_len points to the last executed syscall, + * otherwise it will be zero (but we do not need to rely on that). */ - ret = *((unsigned long *) mm_idp->stack); - offset = *((unsigned long *) mm_idp->stack + 1); - if (offset) { - data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA); - printk(UM_KERN_ERR "%s : ret = %ld, offset = %ld, data = %p\n", - __func__, ret, offset, data); - syscall = (unsigned long *)((unsigned long)data + data[0]); - printk(UM_KERN_ERR "%s: syscall %ld failed, return value = 0x%lx, expected return value = 0x%lx\n", - __func__, syscall[0], ret, syscall[7]); - printk(UM_KERN_ERR " syscall parameters: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", - syscall[1], syscall[2], syscall[3], - syscall[4], syscall[5], syscall[6]); - for (n = 1; n < data[0]/sizeof(long); n++) { - if (n == 1) - printk(UM_KERN_ERR " additional syscall data:"); - if (n % 4 == 1) - printk("\n" UM_KERN_ERR " "); - printk(" 0x%lx", data[n]); - } - if (n > 1) - printk("\n"); - } - else ret = 0; + if (proc_data->err < 0) { + syscall_stub_dump_error(mm_idp); - *addr = check_init_stack(mm_idp, NULL); + /* Store error code in case someone tries to add more syscalls */ + mm_idp->syscall_data_len = proc_data->err; + } else { + mm_idp->syscall_data_len = 0; + } - return ret; + return mm_idp->syscall_data_len; } -long run_syscall_stub(struct mm_id * mm_idp, int syscall, - unsigned long *args, long expected, void **addr, - int done) +int syscall_stub_flush(struct mm_id *mm_idp) { - unsigned long *stack = check_init_stack(mm_idp, *addr); - - *stack += sizeof(long); - stack += *stack / sizeof(long); - - *stack++ = syscall; - *stack++ = args[0]; - *stack++ = args[1]; - *stack++ = args[2]; - *stack++ = args[3]; - *stack++ = args[4]; - *stack++ = args[5]; - *stack++ = expected; - *stack = 0; - - if (!done && ((((unsigned long) stack) & ~UM_KERN_PAGE_MASK) < - UM_KERN_PAGE_SIZE - 10 * sizeof(long))) { - *addr = stack; + int res; + + if (mm_idp->syscall_data_len == 0) return 0; + + /* If an error happened already, report it and reset the state. */ + if (mm_idp->syscall_data_len < 0) { + res = mm_idp->syscall_data_len; + mm_idp->syscall_data_len = 0; + return res; } - return do_syscall_stub(mm_idp, addr); + res = do_syscall_stub(mm_idp); + mm_idp->syscall_data_len = 0; + + return res; } -long syscall_stub_data(struct mm_id * mm_idp, - unsigned long *data, int data_count, - void **addr, void **stub_addr) +struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp) { - unsigned long *stack; - int ret = 0; - - /* - * If *addr still is uninitialized, it *must* contain NULL. - * Thus in this case do_syscall_stub correctly won't be called. - */ - if ((((unsigned long) *addr) & ~UM_KERN_PAGE_MASK) >= - UM_KERN_PAGE_SIZE - (10 + data_count) * sizeof(long)) { - ret = do_syscall_stub(mm_idp, addr); - /* in case of error, don't overwrite data on stack */ - if (ret) - return ret; + struct stub_syscall *sc; + struct stub_data *proc_data = (struct stub_data *) mm_idp->stack; + + if (mm_idp->syscall_data_len > 0 && + mm_idp->syscall_data_len == ARRAY_SIZE(proc_data->syscall_data)) + do_syscall_stub(mm_idp); + + if (mm_idp->syscall_data_len < 0) { + /* Return dummy to retain error state. */ + sc = &proc_data->syscall_data[0]; + } else { + sc = &proc_data->syscall_data[mm_idp->syscall_data_len]; + mm_idp->syscall_data_len += 1; } + memset(sc, 0, sizeof(*sc)); - stack = check_init_stack(mm_idp, *addr); - *addr = stack; + return sc; +} - *stack = data_count * sizeof(long); +static struct stub_syscall *syscall_stub_get_previous(struct mm_id *mm_idp, + int syscall_type, + unsigned long virt) +{ + if (mm_idp->syscall_data_len > 0) { + struct stub_data *proc_data = (void *) mm_idp->stack; + struct stub_syscall *sc; - memcpy(stack + 1, data, data_count * sizeof(long)); + sc = &proc_data->syscall_data[mm_idp->syscall_data_len - 1]; - *stub_addr = (void *)(((unsigned long)(stack + 1) & - ~UM_KERN_PAGE_MASK) + STUB_DATA); + if (sc->syscall == syscall_type && + sc->mem.addr + sc->mem.length == virt) + return sc; + } - return 0; + return NULL; } -int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, int prot, - int phys_fd, unsigned long long offset, int done, void **data) +int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot, + int phys_fd, unsigned long long offset) { - int ret; - unsigned long args[] = { virt, len, prot, - MAP_SHARED | MAP_FIXED, phys_fd, - MMAP_OFFSET(offset) }; + struct stub_syscall *sc; - ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt, - data, done); + /* Compress with previous syscall if that is possible */ + sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MMAP, virt); + if (sc && sc->mem.prot == prot && sc->mem.fd == phys_fd && + sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) { + sc->mem.length += len; + return 0; + } - return ret; + sc = syscall_stub_alloc(mm_idp); + sc->syscall = STUB_SYSCALL_MMAP; + sc->mem.addr = virt; + sc->mem.length = len; + sc->mem.prot = prot; + sc->mem.fd = phys_fd; + sc->mem.offset = MMAP_OFFSET(offset); + + return 0; } -int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len, - int done, void **data) +int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len) { - int ret; - unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, - 0 }; + struct stub_syscall *sc; - ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0, - data, done); + /* Compress with previous syscall if that is possible */ + sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MUNMAP, addr); + if (sc) { + sc->mem.length += len; + return 0; + } - return ret; + sc = syscall_stub_alloc(mm_idp); + sc->syscall = STUB_SYSCALL_MUNMAP; + sc->mem.addr = addr; + sc->mem.length = len; + + return 0; } -int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len, - unsigned int prot, int done, void **data) +int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, + unsigned int prot) { - int ret; - unsigned long args[] = { addr, len, prot, 0, 0, 0 }; + struct stub_syscall *sc; - ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0, - data, done); + /* Compress with previous syscall if that is possible */ + sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MPROTECT, addr); + if (sc && sc->mem.prot == prot) { + sc->mem.length += len; + return 0; + } - return ret; + sc = syscall_stub_alloc(mm_idp); + sc->syscall = STUB_SYSCALL_MPROTECT; + sc->mem.addr = addr; + sc->mem.length = len; + sc->mem.prot = prot; + + return 0; } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 41a288dcfc34..f7088345b3fc 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -23,6 +23,7 @@ #include <skas.h> #include <sysdep/stub.h> #include <linux/threads.h> +#include <timetravel.h> #include "../internal.h" int is_skas_winch(int pid, int fd, void *data) @@ -253,7 +254,6 @@ static int userspace_tramp(void *stack) } int userspace_pid[NR_CPUS]; -int kill_userspace_mm[NR_CPUS]; /** * start_userspace() - prepare a new userspace process @@ -345,8 +345,20 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) interrupt_end(); while (1) { - if (kill_userspace_mm[0]) + time_travel_print_bc_msg(); + + current_mm_sync(); + + /* Flush out any pending syscalls */ + err = syscall_stub_flush(current_mm_id()); + if (err) { + if (err == -ENOMEM) + report_enomem(); + + printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d", + __func__, -err); fatal_sigsegv(); + } /* * This can legitimately fail if the process loads a @@ -461,113 +473,6 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) } } -static unsigned long thread_regs[MAX_REG_NR]; -static unsigned long thread_fp_regs[FP_SIZE]; - -static int __init init_thread_regs(void) -{ - get_safe_registers(thread_regs, thread_fp_regs); - /* Set parent's instruction pointer to start of clone-stub */ - thread_regs[REGS_IP_INDEX] = STUB_CODE + - (unsigned long) stub_clone_handler - - (unsigned long) __syscall_stub_start; - thread_regs[REGS_SP_INDEX] = STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - - sizeof(void *); -#ifdef __SIGNAL_FRAMESIZE - thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE; -#endif - return 0; -} - -__initcall(init_thread_regs); - -int copy_context_skas0(unsigned long new_stack, int pid) -{ - int err; - unsigned long current_stack = current_stub_stack(); - struct stub_data *data = (struct stub_data *) current_stack; - struct stub_data *child_data = (struct stub_data *) new_stack; - unsigned long long new_offset; - int new_fd = phys_mapping(uml_to_phys((void *)new_stack), &new_offset); - - /* - * prepare offset and fd of child's stack as argument for parent's - * and child's mmap2 calls - */ - *data = ((struct stub_data) { - .offset = MMAP_OFFSET(new_offset), - .fd = new_fd, - .parent_err = -ESRCH, - .child_err = 0, - }); - - *child_data = ((struct stub_data) { - .child_err = -ESRCH, - }); - - err = ptrace_setregs(pid, thread_regs); - if (err < 0) { - err = -errno; - printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n", - __func__, pid, -err); - return err; - } - - err = put_fp_registers(pid, thread_fp_regs); - if (err < 0) { - printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n", - __func__, pid, err); - return err; - } - - /* - * Wait, until parent has finished its work: read child's pid from - * parent's stack, and check, if bad result. - */ - err = ptrace(PTRACE_CONT, pid, 0, 0); - if (err) { - err = -errno; - printk(UM_KERN_ERR "Failed to continue new process, pid = %d, errno = %d\n", - pid, errno); - return err; - } - - wait_stub_done(pid); - - pid = data->parent_err; - if (pid < 0) { - printk(UM_KERN_ERR "%s - stub-parent reports error %d\n", - __func__, -pid); - return pid; - } - - /* - * Wait, until child has finished too: read child's result from - * child's stack and check it. - */ - wait_stub_done(pid); - if (child_data->child_err != STUB_DATA) { - printk(UM_KERN_ERR "%s - stub-child %d reports error %ld\n", - __func__, pid, data->child_err); - err = data->child_err; - goto out_kill; - } - - if (ptrace(PTRACE_SETOPTIONS, pid, NULL, - (void *)PTRACE_O_TRACESYSGOOD) < 0) { - err = -errno; - printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n", - __func__, errno); - goto out_kill; - } - - return pid; - - out_kill: - os_kill_ptraced_process(pid, 1); - return err; -} - void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) { (*buf)[0].JB_IP = (unsigned long) handler; @@ -684,5 +589,4 @@ void reboot_skas(void) void __switch_mm(struct mm_id *mm_idp) { userspace_pid[0] = mm_idp->u.pid; - kill_userspace_mm[0] = mm_idp->kill; } diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 89ad9f4f865c..93fc82c01aba 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -17,6 +17,7 @@ #include <sys/wait.h> #include <sys/time.h> #include <sys/resource.h> +#include <asm/ldt.h> #include <asm/unistd.h> #include <init.h> #include <os.h> diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index 2106a2bd152b..a46b1397ad01 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um @@ -9,6 +9,7 @@ core-y += arch/x86/crypto/ # ifeq ($(CONFIG_CC_IS_CLANG),y) KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx +KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 endif diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 64fbd2dbc5b7..a9088250770f 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -62,11 +62,6 @@ void xen_arch_unregister_cpu(int num); #ifdef CONFIG_PVH void __init xen_pvh_init(struct boot_params *boot_params); void __init mem_map_via_hcall(struct boot_params *boot_params_p); -#ifdef CONFIG_XEN_PVH -void __init xen_reserve_extra_memory(struct boot_params *bootp); -#else -static inline void xen_reserve_extra_memory(struct boot_params *bootp) { } -#endif #endif /* Lazy mode for batching updates / context switch */ diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index 9a7c03d47861..51b805c727fc 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -38,7 +38,7 @@ static bool __read_mostly sched_itmt_capable; */ unsigned int __read_mostly sysctl_sched_itmt_enabled; -static int sched_itmt_update_handler(struct ctl_table *table, int write, +static int sched_itmt_update_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { unsigned int old_sysctl; diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index 8c2d4b8de25d..944e0290f2c0 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c @@ -75,9 +75,6 @@ static void __init init_pvh_bootparams(bool xen_guest) } else xen_raw_printk("Warning: Can fit ISA range into e820\n"); - if (xen_guest) - xen_reserve_extra_memory(&pvh_bootparams); - pvh_bootparams.hdr.cmd_line_ptr = pvh_start_info.cmdline_paddr; diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index 8bc72a51b257..36e67fc97c22 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -9,9 +9,9 @@ else BITS := 64 endif -obj-y = bugs_$(BITS).o delay.o fault.o ldt.o \ +obj-y = bugs_$(BITS).o delay.o fault.o \ ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \ - stub_$(BITS).o stub_segv.o \ + stub_segv.o \ sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \ mem_$(BITS).o subarch.o os-Linux/ @@ -31,7 +31,6 @@ obj-y += syscalls_64.o vdso/ subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o \ ../lib/memmove_64.o ../lib/memset_64.o -subarch-$(CONFIG_PREEMPTION) += ../entry/thunk_64.o endif diff --git a/arch/x86/um/asm/mm_context.h b/arch/x86/um/asm/mm_context.h deleted file mode 100644 index dc32dc023c2f..000000000000 --- a/arch/x86/um/asm/mm_context.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (C) 2004 Fujitsu Siemens Computers GmbH - * Licensed under the GPL - * - * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com> - */ - -#ifndef __ASM_LDT_H -#define __ASM_LDT_H - -#include <linux/mutex.h> -#include <asm/ldt.h> - -#define LDT_PAGES_MAX \ - ((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE) -#define LDT_ENTRIES_PER_PAGE \ - (PAGE_SIZE/LDT_ENTRY_SIZE) -#define LDT_DIRECT_ENTRIES \ - ((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE) - -struct ldt_entry { - __u32 a; - __u32 b; -}; - -typedef struct uml_ldt { - int entry_count; - struct mutex lock; - union { - struct ldt_entry * pages[LDT_PAGES_MAX]; - struct ldt_entry entries[LDT_DIRECT_ENTRIES]; - } u; -} uml_ldt_t; - -#define LDT_entry_a(info) \ - ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) - -#define LDT_entry_b(info) \ - (((info)->base_addr & 0xff000000) | \ - (((info)->base_addr & 0x00ff0000) >> 16) | \ - ((info)->limit & 0xf0000) | \ - (((info)->read_exec_only ^ 1) << 9) | \ - ((info)->contents << 10) | \ - (((info)->seg_not_present ^ 1) << 15) | \ - ((info)->seg_32bit << 22) | \ - ((info)->limit_in_pages << 23) | \ - ((info)->useable << 20) | \ - 0x7000) - -#define _LDT_empty(info) (\ - (info)->base_addr == 0 && \ - (info)->limit == 0 && \ - (info)->contents == 0 && \ - (info)->read_exec_only == 1 && \ - (info)->seg_32bit == 0 && \ - (info)->limit_in_pages == 0 && \ - (info)->seg_not_present == 1 && \ - (info)->useable == 0 ) - -#ifdef CONFIG_X86_64 -#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0)) -#else -#define LDT_empty(info) (_LDT_empty(info)) -#endif - -struct uml_arch_mm_context { - uml_ldt_t ldt; -}; - -#endif diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c deleted file mode 100644 index 255a44dd415a..000000000000 --- a/arch/x86/um/ldt.c +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL - */ - -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/syscalls.h> -#include <linux/uaccess.h> -#include <asm/unistd.h> -#include <os.h> -#include <skas.h> -#include <sysdep/tls.h> - -static inline int modify_ldt (int func, void *ptr, unsigned long bytecount) -{ - return syscall(__NR_modify_ldt, func, ptr, bytecount); -} - -static long write_ldt_entry(struct mm_id *mm_idp, int func, - struct user_desc *desc, void **addr, int done) -{ - long res; - void *stub_addr; - - BUILD_BUG_ON(sizeof(*desc) % sizeof(long)); - - res = syscall_stub_data(mm_idp, (unsigned long *)desc, - sizeof(*desc) / sizeof(long), - addr, &stub_addr); - if (!res) { - unsigned long args[] = { func, - (unsigned long)stub_addr, - sizeof(*desc), - 0, 0, 0 }; - res = run_syscall_stub(mm_idp, __NR_modify_ldt, args, - 0, addr, done); - } - - return res; -} - -/* - * In skas mode, we hold our own ldt data in UML. - * Thus, the code implementing sys_modify_ldt_skas - * is very similar to (and mostly stolen from) sys_modify_ldt - * for arch/i386/kernel/ldt.c - * The routines copied and modified in part are: - * - read_ldt - * - read_default_ldt - * - write_ldt - * - sys_modify_ldt_skas - */ - -static int read_ldt(void __user * ptr, unsigned long bytecount) -{ - int i, err = 0; - unsigned long size; - uml_ldt_t *ldt = ¤t->mm->context.arch.ldt; - - if (!ldt->entry_count) - goto out; - if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - err = bytecount; - - mutex_lock(&ldt->lock); - if (ldt->entry_count <= LDT_DIRECT_ENTRIES) { - size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES; - if (size > bytecount) - size = bytecount; - if (copy_to_user(ptr, ldt->u.entries, size)) - err = -EFAULT; - bytecount -= size; - ptr += size; - } - else { - for (i=0; i<ldt->entry_count/LDT_ENTRIES_PER_PAGE && bytecount; - i++) { - size = PAGE_SIZE; - if (size > bytecount) - size = bytecount; - if (copy_to_user(ptr, ldt->u.pages[i], size)) { - err = -EFAULT; - break; - } - bytecount -= size; - ptr += size; - } - } - mutex_unlock(&ldt->lock); - - if (bytecount == 0 || err == -EFAULT) - goto out; - - if (clear_user(ptr, bytecount)) - err = -EFAULT; - -out: - return err; -} - -static int read_default_ldt(void __user * ptr, unsigned long bytecount) -{ - int err; - - if (bytecount > 5*LDT_ENTRY_SIZE) - bytecount = 5*LDT_ENTRY_SIZE; - - err = bytecount; - /* - * UML doesn't support lcall7 and lcall27. - * So, we don't really have a default ldt, but emulate - * an empty ldt of common host default ldt size. - */ - if (clear_user(ptr, bytecount)) - err = -EFAULT; - - return err; -} - -static int write_ldt(void __user * ptr, unsigned long bytecount, int func) -{ - uml_ldt_t *ldt = ¤t->mm->context.arch.ldt; - struct mm_id * mm_idp = ¤t->mm->context.id; - int i, err; - struct user_desc ldt_info; - struct ldt_entry entry0, *ldt_p; - void *addr = NULL; - - err = -EINVAL; - if (bytecount != sizeof(ldt_info)) - goto out; - err = -EFAULT; - if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) - goto out; - - err = -EINVAL; - if (ldt_info.entry_number >= LDT_ENTRIES) - goto out; - if (ldt_info.contents == 3) { - if (func == 1) - goto out; - if (ldt_info.seg_not_present == 0) - goto out; - } - - mutex_lock(&ldt->lock); - - err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1); - if (err) - goto out_unlock; - - if (ldt_info.entry_number >= ldt->entry_count && - ldt_info.entry_number >= LDT_DIRECT_ENTRIES) { - for (i=ldt->entry_count/LDT_ENTRIES_PER_PAGE; - i*LDT_ENTRIES_PER_PAGE <= ldt_info.entry_number; - i++) { - if (i == 0) - memcpy(&entry0, ldt->u.entries, - sizeof(entry0)); - ldt->u.pages[i] = (struct ldt_entry *) - __get_free_page(GFP_KERNEL|__GFP_ZERO); - if (!ldt->u.pages[i]) { - err = -ENOMEM; - /* Undo the change in host */ - memset(&ldt_info, 0, sizeof(ldt_info)); - write_ldt_entry(mm_idp, 1, &ldt_info, &addr, 1); - goto out_unlock; - } - if (i == 0) { - memcpy(ldt->u.pages[0], &entry0, - sizeof(entry0)); - memcpy(ldt->u.pages[0]+1, ldt->u.entries+1, - sizeof(entry0)*(LDT_DIRECT_ENTRIES-1)); - } - ldt->entry_count = (i + 1) * LDT_ENTRIES_PER_PAGE; - } - } - if (ldt->entry_count <= ldt_info.entry_number) - ldt->entry_count = ldt_info.entry_number + 1; - - if (ldt->entry_count <= LDT_DIRECT_ENTRIES) - ldt_p = ldt->u.entries + ldt_info.entry_number; - else - ldt_p = ldt->u.pages[ldt_info.entry_number/LDT_ENTRIES_PER_PAGE] + - ldt_info.entry_number%LDT_ENTRIES_PER_PAGE; - - if (ldt_info.base_addr == 0 && ldt_info.limit == 0 && - (func == 1 || LDT_empty(&ldt_info))) { - ldt_p->a = 0; - ldt_p->b = 0; - } - else{ - if (func == 1) - ldt_info.useable = 0; - ldt_p->a = LDT_entry_a(&ldt_info); - ldt_p->b = LDT_entry_b(&ldt_info); - } - err = 0; - -out_unlock: - mutex_unlock(&ldt->lock); -out: - return err; -} - -static long do_modify_ldt_skas(int func, void __user *ptr, - unsigned long bytecount) -{ - int ret = -ENOSYS; - - switch (func) { - case 0: - ret = read_ldt(ptr, bytecount); - break; - case 1: - case 0x11: - ret = write_ldt(ptr, bytecount, func); - break; - case 2: - ret = read_default_ldt(ptr, bytecount); - break; - } - return ret; -} - -static DEFINE_SPINLOCK(host_ldt_lock); -static short dummy_list[9] = {0, -1}; -static short * host_ldt_entries = NULL; - -static void ldt_get_host_info(void) -{ - long ret; - struct ldt_entry * ldt; - short *tmp; - int i, size, k, order; - - spin_lock(&host_ldt_lock); - - if (host_ldt_entries != NULL) { - spin_unlock(&host_ldt_lock); - return; - } - host_ldt_entries = dummy_list+1; - - spin_unlock(&host_ldt_lock); - - for (i = LDT_PAGES_MAX-1, order=0; i; i>>=1, order++) - ; - - ldt = (struct ldt_entry *) - __get_free_pages(GFP_KERNEL|__GFP_ZERO, order); - if (ldt == NULL) { - printk(KERN_ERR "ldt_get_host_info: couldn't allocate buffer " - "for host ldt\n"); - return; - } - - ret = modify_ldt(0, ldt, (1<<order)*PAGE_SIZE); - if (ret < 0) { - printk(KERN_ERR "ldt_get_host_info: couldn't read host ldt\n"); - goto out_free; - } - if (ret == 0) { - /* default_ldt is active, simply write an empty entry 0 */ - host_ldt_entries = dummy_list; - goto out_free; - } - - for (i=0, size=0; i<ret/LDT_ENTRY_SIZE; i++) { - if (ldt[i].a != 0 || ldt[i].b != 0) - size++; - } - - if (size < ARRAY_SIZE(dummy_list)) - host_ldt_entries = dummy_list; - else { - size = (size + 1) * sizeof(dummy_list[0]); - tmp = kmalloc(size, GFP_KERNEL); - if (tmp == NULL) { - printk(KERN_ERR "ldt_get_host_info: couldn't allocate " - "host ldt list\n"); - goto out_free; - } - host_ldt_entries = tmp; - } - - for (i=0, k=0; i<ret/LDT_ENTRY_SIZE; i++) { - if (ldt[i].a != 0 || ldt[i].b != 0) - host_ldt_entries[k++] = i; - } - host_ldt_entries[k] = -1; - -out_free: - free_pages((unsigned long)ldt, order); -} - -long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm) -{ - struct user_desc desc; - short * num_p; - int i; - long page, err=0; - void *addr = NULL; - - - mutex_init(&new_mm->arch.ldt.lock); - - if (!from_mm) { - memset(&desc, 0, sizeof(desc)); - /* - * Now we try to retrieve info about the ldt, we - * inherited from the host. All ldt-entries found - * will be reset in the following loop - */ - ldt_get_host_info(); - for (num_p=host_ldt_entries; *num_p != -1; num_p++) { - desc.entry_number = *num_p; - err = write_ldt_entry(&new_mm->id, 1, &desc, - &addr, *(num_p + 1) == -1); - if (err) - break; - } - new_mm->arch.ldt.entry_count = 0; - - goto out; - } - - /* - * Our local LDT is used to supply the data for - * modify_ldt(READLDT), if PTRACE_LDT isn't available, - * i.e., we have to use the stub for modify_ldt, which - * can't handle the big read buffer of up to 64kB. - */ - mutex_lock(&from_mm->arch.ldt.lock); - if (from_mm->arch.ldt.entry_count <= LDT_DIRECT_ENTRIES) - memcpy(new_mm->arch.ldt.u.entries, from_mm->arch.ldt.u.entries, - sizeof(new_mm->arch.ldt.u.entries)); - else { - i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE; - while (i-->0) { - page = __get_free_page(GFP_KERNEL|__GFP_ZERO); - if (!page) { - err = -ENOMEM; - break; - } - new_mm->arch.ldt.u.pages[i] = - (struct ldt_entry *) page; - memcpy(new_mm->arch.ldt.u.pages[i], - from_mm->arch.ldt.u.pages[i], PAGE_SIZE); - } - } - new_mm->arch.ldt.entry_count = from_mm->arch.ldt.entry_count; - mutex_unlock(&from_mm->arch.ldt.lock); - - out: - return err; -} - - -void free_ldt(struct mm_context *mm) -{ - int i; - - if (mm->arch.ldt.entry_count > LDT_DIRECT_ENTRIES) { - i = mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE; - while (i-- > 0) - free_page((long) mm->arch.ldt.u.pages[i]); - } - mm->arch.ldt.entry_count = 0; -} - -SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , - unsigned long , bytecount) -{ - /* See non-um modify_ldt() for why we do this cast */ - return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount); -} diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h index ce0ca46ad383..dc89f4423454 100644 --- a/arch/x86/um/shared/sysdep/stub.h +++ b/arch/x86/um/shared/sysdep/stub.h @@ -12,4 +12,4 @@ #endif extern void stub_segv_handler(int, siginfo_t *, void *); -extern void stub_clone_handler(void); +extern void stub_syscall_handler(void); diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h index ea8b5a2d67af..0b44a86dd346 100644 --- a/arch/x86/um/shared/sysdep/stub_32.h +++ b/arch/x86/um/shared/sysdep/stub_32.h @@ -6,6 +6,7 @@ #ifndef __SYSDEP_STUB_H #define __SYSDEP_STUB_H +#include <stddef.h> #include <asm/ptrace.h> #include <generated/asm-offsets.h> @@ -79,33 +80,31 @@ static __always_inline long stub_syscall5(long syscall, long arg1, long arg2, return ret; } -static __always_inline void trap_myself(void) +static __always_inline long stub_syscall6(long syscall, long arg1, long arg2, + long arg3, long arg4, long arg5, + long arg6) { - __asm("int3"); + struct syscall_args { + int ebx, ebp; + } args = { arg1, arg6 }; + long ret; + + __asm__ volatile ("pushl %%ebp;" + "movl 0x4(%%ebx),%%ebp;" + "movl (%%ebx),%%ebx;" + "int $0x80;" + "popl %%ebp" + : "=a" (ret) + : "0" (syscall), "b" (&args), + "c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5) + : "memory"); + + return ret; } -static __always_inline void remap_stack_and_trap(void) +static __always_inline void trap_myself(void) { - __asm__ volatile ( - "movl %%esp,%%ebx ;" - "andl %0,%%ebx ;" - "movl %1,%%eax ;" - "movl %%ebx,%%edi ; addl %2,%%edi ; movl (%%edi),%%edi ;" - "movl %%ebx,%%ebp ; addl %3,%%ebp ; movl (%%ebp),%%ebp ;" - "int $0x80 ;" - "addl %4,%%ebx ; movl %%eax, (%%ebx) ;" - "int $3" - : : - "g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1)), - "g" (STUB_MMAP_NR), - "g" (UML_STUB_FIELD_FD), - "g" (UML_STUB_FIELD_OFFSET), - "g" (UML_STUB_FIELD_CHILD_ERR), - "c" (STUB_DATA_PAGES * UM_KERN_PAGE_SIZE), - "d" (PROT_READ | PROT_WRITE), - "S" (MAP_FIXED | MAP_SHARED) - : - "memory"); + __asm("int3"); } static __always_inline void *get_stub_data(void) diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h index b24168ef0ac4..67f44284f1aa 100644 --- a/arch/x86/um/shared/sysdep/stub_64.h +++ b/arch/x86/um/shared/sysdep/stub_64.h @@ -6,6 +6,7 @@ #ifndef __SYSDEP_STUB_H #define __SYSDEP_STUB_H +#include <stddef.h> #include <sysdep/ptrace_user.h> #include <generated/asm-offsets.h> #include <linux/stddef.h> @@ -79,35 +80,25 @@ static __always_inline long stub_syscall5(long syscall, long arg1, long arg2, return ret; } -static __always_inline void trap_myself(void) +static __always_inline long stub_syscall6(long syscall, long arg1, long arg2, + long arg3, long arg4, long arg5, + long arg6) { - __asm("int3"); + long ret; + + __asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; movq %7,%%r9 ; " + __syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3), + "g" (arg4), "g" (arg5), "g" (arg6) + : __syscall_clobber, "r10", "r8", "r9"); + + return ret; } -static __always_inline void remap_stack_and_trap(void) +static __always_inline void trap_myself(void) { - __asm__ volatile ( - "movq %0,%%rax ;" - "movq %%rsp,%%rdi ;" - "andq %1,%%rdi ;" - "movq %2,%%r10 ;" - "movq %%rdi,%%r8 ; addq %3,%%r8 ; movq (%%r8),%%r8 ;" - "movq %%rdi,%%r9 ; addq %4,%%r9 ; movq (%%r9),%%r9 ;" - __syscall ";" - "movq %%rsp,%%rdi ; andq %1,%%rdi ;" - "addq %5,%%rdi ; movq %%rax, (%%rdi) ;" - "int3" - : : - "g" (STUB_MMAP_NR), - "g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1)), - "g" (MAP_FIXED | MAP_SHARED), - "g" (UML_STUB_FIELD_FD), - "g" (UML_STUB_FIELD_OFFSET), - "g" (UML_STUB_FIELD_CHILD_ERR), - "S" (STUB_DATA_PAGES * UM_KERN_PAGE_SIZE), - "d" (PROT_READ | PROT_WRITE) - : - __syscall_clobber, "r10", "r8", "r9"); + __asm("int3"); } static __always_inline void *get_stub_data(void) diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S deleted file mode 100644 index 8291899e6aaf..000000000000 --- a/arch/x86/um/stub_32.S +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <as-layout.h> - -.section .__syscall_stub, "ax" - - .globl batch_syscall_stub -batch_syscall_stub: - /* %esp comes in as "top of page" */ - mov %esp, %ecx - /* %esp has pointer to first operation */ - add $8, %esp -again: - /* load length of additional data */ - mov 0x0(%esp), %eax - - /* if(length == 0) : end of list */ - /* write possible 0 to header */ - mov %eax, 0x4(%ecx) - cmpl $0, %eax - jz done - - /* save current pointer */ - mov %esp, 0x4(%ecx) - - /* skip additional data */ - add %eax, %esp - - /* load syscall-# */ - pop %eax - - /* load syscall params */ - pop %ebx - pop %ecx - pop %edx - pop %esi - pop %edi - pop %ebp - - /* execute syscall */ - int $0x80 - - /* restore top of page pointer in %ecx */ - mov %esp, %ecx - andl $(~UM_KERN_PAGE_SIZE) + 1, %ecx - - /* check return value */ - pop %ebx - cmp %ebx, %eax - je again - -done: - /* save return value */ - mov %eax, (%ecx) - - /* stop */ - int3 diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S deleted file mode 100644 index f3404640197a..000000000000 --- a/arch/x86/um/stub_64.S +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <as-layout.h> - -.section .__syscall_stub, "ax" - .globl batch_syscall_stub -batch_syscall_stub: - /* %rsp has the pointer to first operation */ - mov %rsp, %rbx - add $0x10, %rsp -again: - /* load length of additional data */ - mov 0x0(%rsp), %rax - - /* if(length == 0) : end of list */ - /* write possible 0 to header */ - mov %rax, 8(%rbx) - cmp $0, %rax - jz done - - /* save current pointer */ - mov %rsp, 8(%rbx) - - /* skip additional data */ - add %rax, %rsp - - /* load syscall-# */ - pop %rax - - /* load syscall params */ - pop %rdi - pop %rsi - pop %rdx - pop %r10 - pop %r8 - pop %r9 - - /* execute syscall */ - syscall - - /* check return value */ - pop %rcx - cmp %rcx, %rax - je again - -done: - /* save return value */ - mov %rax, (%rbx) - - /* stop */ - int3 diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c index d301deee041f..fbb129023080 100644 --- a/arch/x86/um/tls_32.c +++ b/arch/x86/um/tls_32.c @@ -11,6 +11,7 @@ #include <os.h> #include <skas.h> #include <sysdep/tls.h> +#include <asm/desc.h> /* * If needed we can detect when it's uninitialized. diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 27a2a02ef8fb..728a4366ca85 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -9,6 +9,7 @@ #include <asm/io_apic.h> #include <asm/hypervisor.h> #include <asm/e820/api.h> +#include <asm/setup.h> #include <xen/xen.h> #include <asm/xen/interface.h> @@ -27,54 +28,6 @@ bool __ro_after_init xen_pvh; EXPORT_SYMBOL_GPL(xen_pvh); -void __init xen_pvh_init(struct boot_params *boot_params) -{ - u32 msr; - u64 pfn; - - xen_pvh = 1; - xen_domain_type = XEN_HVM_DOMAIN; - xen_start_flags = pvh_start_info.flags; - - msr = cpuid_ebx(xen_cpuid_base() + 2); - pfn = __pa(hypercall_page); - wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); - - if (xen_initial_domain()) - x86_init.oem.arch_setup = xen_add_preferred_consoles; - x86_init.oem.banner = xen_banner; - - xen_efi_init(boot_params); - - if (xen_initial_domain()) { - struct xen_platform_op op = { - .cmd = XENPF_get_dom0_console, - }; - int ret = HYPERVISOR_platform_op(&op); - - if (ret > 0) - xen_init_vga(&op.u.dom0_console, - min(ret * sizeof(char), - sizeof(op.u.dom0_console)), - &boot_params->screen_info); - } -} - -void __init mem_map_via_hcall(struct boot_params *boot_params_p) -{ - struct xen_memory_map memmap; - int rc; - - memmap.nr_entries = ARRAY_SIZE(boot_params_p->e820_table); - set_xen_guest_handle(memmap.buffer, boot_params_p->e820_table); - rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); - if (rc) { - xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc); - BUG(); - } - boot_params_p->e820_entries = memmap.nr_entries; -} - /* * Reserve e820 UNUSABLE regions to inflate the memory balloon. * @@ -89,8 +42,9 @@ void __init mem_map_via_hcall(struct boot_params *boot_params_p) * hypervisor should notify us which memory ranges are suitable for creating * foreign mappings, but that's not yet implemented. */ -void __init xen_reserve_extra_memory(struct boot_params *bootp) +static void __init pvh_reserve_extra_memory(void) { + struct boot_params *bootp = &boot_params; unsigned int i, ram_pages = 0, extra_pages; for (i = 0; i < bootp->e820_entries; i++) { @@ -141,3 +95,58 @@ void __init xen_reserve_extra_memory(struct boot_params *bootp) xen_add_extra_mem(PFN_UP(e->addr), pages); } } + +static void __init pvh_arch_setup(void) +{ + pvh_reserve_extra_memory(); + + if (xen_initial_domain()) + xen_add_preferred_consoles(); +} + +void __init xen_pvh_init(struct boot_params *boot_params) +{ + u32 msr; + u64 pfn; + + xen_pvh = 1; + xen_domain_type = XEN_HVM_DOMAIN; + xen_start_flags = pvh_start_info.flags; + + msr = cpuid_ebx(xen_cpuid_base() + 2); + pfn = __pa(hypercall_page); + wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + + x86_init.oem.arch_setup = pvh_arch_setup; + x86_init.oem.banner = xen_banner; + + xen_efi_init(boot_params); + + if (xen_initial_domain()) { + struct xen_platform_op op = { + .cmd = XENPF_get_dom0_console, + }; + int ret = HYPERVISOR_platform_op(&op); + + if (ret > 0) + xen_init_vga(&op.u.dom0_console, + min(ret * sizeof(char), + sizeof(op.u.dom0_console)), + &boot_params->screen_info); + } +} + +void __init mem_map_via_hcall(struct boot_params *boot_params_p) +{ + struct xen_memory_map memmap; + int rc; + + memmap.nr_entries = ARRAY_SIZE(boot_params_p->e820_table); + set_xen_guest_handle(memmap.buffer, boot_params_p->e820_table); + rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); + if (rc) { + xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc); + BUG(); + } + boot_params_p->e820_entries = memmap.nr_entries; +} diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index d4cefd8a9af4..10c660fae8b3 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -54,8 +54,9 @@ struct mc_debug_data { static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); static struct mc_debug_data mc_debug_data_early __initdata; -static struct mc_debug_data __percpu *mc_debug_data __refdata = +static DEFINE_PER_CPU(struct mc_debug_data *, mc_debug_data) = &mc_debug_data_early; +static struct mc_debug_data __percpu *mc_debug_data_ptr; DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); static struct static_key mc_debug __ro_after_init; @@ -70,16 +71,20 @@ static int __init xen_parse_mc_debug(char *arg) } early_param("xen_mc_debug", xen_parse_mc_debug); +void mc_percpu_init(unsigned int cpu) +{ + per_cpu(mc_debug_data, cpu) = per_cpu_ptr(mc_debug_data_ptr, cpu); +} + static int __init mc_debug_enable(void) { - struct mc_debug_data __percpu *mcdb; unsigned long flags; if (!mc_debug_enabled) return 0; - mcdb = alloc_percpu(struct mc_debug_data); - if (!mcdb) { + mc_debug_data_ptr = alloc_percpu(struct mc_debug_data); + if (!mc_debug_data_ptr) { pr_err("xen_mc_debug inactive\n"); static_key_slow_dec(&mc_debug); return -ENOMEM; @@ -88,7 +93,7 @@ static int __init mc_debug_enable(void) /* Be careful when switching to percpu debug data. */ local_irq_save(flags); xen_mc_flush(); - mc_debug_data = mcdb; + mc_percpu_init(0); local_irq_restore(flags); pr_info("xen_mc_debug active\n"); @@ -150,7 +155,7 @@ void xen_mc_flush(void) trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx); if (static_key_false(&mc_debug)) { - mcdb = this_cpu_ptr(mc_debug_data); + mcdb = __this_cpu_read(mc_debug_data); memcpy(mcdb->entries, b->entries, b->mcidx * sizeof(struct multicall_entry)); } @@ -230,7 +235,7 @@ struct multicall_space __xen_mc_entry(size_t args) ret.mc = &b->entries[b->mcidx]; if (static_key_false(&mc_debug)) { - struct mc_debug_data *mcdb = this_cpu_ptr(mc_debug_data); + struct mc_debug_data *mcdb = __this_cpu_read(mc_debug_data); mcdb->caller[b->mcidx] = __builtin_return_address(0); mcdb->argsz[b->mcidx] = args; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a0c3e77e3d5b..806ddb2391d9 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -690,6 +690,7 @@ char * __init xen_memory_setup(void) struct xen_memory_map memmap; unsigned long max_pages; unsigned long extra_pages = 0; + unsigned long maxmem_pages; int i; int op; @@ -761,8 +762,8 @@ char * __init xen_memory_setup(void) * Make sure we have no memory above max_pages, as this area * isn't handled by the p2m management. */ - extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), - extra_pages, max_pages - max_pfn); + maxmem_pages = EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)); + extra_pages = min3(maxmem_pages, extra_pages, max_pages - max_pfn); i = 0; addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 7ea57f728b89..6863d3da7dec 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -305,6 +305,7 @@ static int xen_pv_kick_ap(unsigned int cpu, struct task_struct *idle) return rc; xen_pmu_init(cpu); + mc_percpu_init(cpu); /* * Why is this a BUG? If the hypercall fails then everything can be diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index e7775dff9452..0cf16fc79e0b 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -257,6 +257,9 @@ void xen_mc_callback(void (*fn)(void *), void *data); */ struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size); +/* Do percpu data initialization for multicalls. */ +void mc_percpu_init(unsigned int cpu); + extern bool is_xen_pmu; irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); |