aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/common/locomo.c4
-rw-r--r--arch/arm/common/sa1111.c4
-rw-r--r--arch/arm/include/asm/hardware/locomo.h2
-rw-r--r--arch/arm/include/asm/hardware/sa1111.h2
-rw-r--r--arch/arm64/Kconfig20
-rw-r--r--arch/arm64/Makefile10
-rw-r--r--arch/arm64/include/asm/pgtable.h22
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c2
-rw-r--r--arch/arm64/kernel/fpsimd.c2
-rw-r--r--arch/arm64/kernel/vdso/Makefile2
-rw-r--r--arch/arm64/kernel/vdso32/Makefile2
-rw-r--r--arch/arm64/tools/sysreg4
-rw-r--r--arch/loongarch/Kconfig1
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/mips/include/asm/mips-cm.h4
-rw-r--r--arch/mips/include/asm/smp.h1
-rw-r--r--arch/mips/kernel/smp-cps.c5
-rw-r--r--arch/mips/loongson64/smp.c35
-rw-r--r--arch/mips/sibyte/common/sb_tbprof.c1
-rw-r--r--arch/parisc/Kconfig2
-rw-r--r--arch/parisc/include/asm/parisc-device.h2
-rw-r--r--arch/parisc/include/asm/unistd.h54
-rw-r--r--arch/parisc/include/asm/vdso.h2
-rw-r--r--arch/parisc/kernel/cache.c6
-rw-r--r--arch/parisc/kernel/drivers.c4
-rw-r--r--arch/parisc/kernel/unaligned.c2
-rw-r--r--arch/parisc/kernel/vdso32/Makefile24
-rw-r--r--arch/parisc/kernel/vdso32/vdso32.lds.S3
-rw-r--r--arch/parisc/kernel/vdso32/vdso32_generic.c32
-rw-r--r--arch/parisc/kernel/vdso64/Makefile25
-rw-r--r--arch/parisc/kernel/vdso64/vdso64.lds.S2
-rw-r--r--arch/parisc/kernel/vdso64/vdso64_generic.c24
-rw-r--r--arch/powerpc/include/asm/ps3.h6
-rw-r--r--arch/powerpc/include/asm/vio.h6
-rw-r--r--arch/powerpc/kvm/powerpc.c4
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c4
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c2
-rw-r--r--arch/powerpc/platforms/pseries/vio.c6
-rw-r--r--arch/riscv/Kconfig6
-rw-r--r--arch/riscv/Kconfig.vendor19
-rw-r--r--arch/riscv/boot/Makefile1
-rw-r--r--arch/riscv/configs/defconfig26
-rw-r--r--arch/riscv/errata/andes/errata.c3
-rw-r--r--arch/riscv/errata/sifive/errata.c3
-rw-r--r--arch/riscv/errata/thead/errata.c3
-rw-r--r--arch/riscv/include/asm/acpi.h15
-rw-r--r--arch/riscv/include/asm/bitops.h2
-rw-r--r--arch/riscv/include/asm/cpufeature.h103
-rw-r--r--arch/riscv/include/asm/hwcap.h25
-rw-r--r--arch/riscv/include/asm/hwprobe.h2
-rw-r--r--arch/riscv/include/asm/page.h2
-rw-r--r--arch/riscv/include/asm/thread_info.h1
-rw-r--r--arch/riscv/include/asm/vendor_extensions.h104
-rw-r--r--arch/riscv/include/asm/vendor_extensions/andes.h19
-rw-r--r--arch/riscv/include/uapi/asm/hwprobe.h1
-rw-r--r--arch/riscv/kernel/Makefile3
-rw-r--r--arch/riscv/kernel/acpi.c17
-rw-r--r--arch/riscv/kernel/acpi_numa.c131
-rw-r--r--arch/riscv/kernel/cacheinfo.c35
-rw-r--r--arch/riscv/kernel/cpu.c35
-rw-r--r--arch/riscv/kernel/cpufeature.c143
-rw-r--r--arch/riscv/kernel/entry.S21
-rw-r--r--arch/riscv/kernel/probes/Makefile1
-rw-r--r--arch/riscv/kernel/probes/ftrace.c65
-rw-r--r--arch/riscv/kernel/setup.c4
-rw-r--r--arch/riscv/kernel/signal.c2
-rw-r--r--arch/riscv/kernel/smpboot.c2
-rw-r--r--arch/riscv/kernel/stacktrace.c4
-rw-r--r--arch/riscv/kernel/sys_hwprobe.c48
-rw-r--r--arch/riscv/kernel/vendor_extensions.c56
-rw-r--r--arch/riscv/kernel/vendor_extensions/Makefile3
-rw-r--r--arch/riscv/kernel/vendor_extensions/andes.c18
-rw-r--r--arch/s390/Kconfig11
-rw-r--r--arch/s390/appldata/appldata_base.c10
-rw-r--r--arch/s390/boot/Makefile3
-rw-r--r--arch/s390/boot/alternative.c3
-rw-r--r--arch/s390/boot/boot.h4
-rw-r--r--arch/s390/boot/ipl_parm.c3
-rw-r--r--arch/s390/boot/startup.c14
-rw-r--r--arch/s390/boot/uv.c8
-rw-r--r--arch/s390/boot/uv.h13
-rw-r--r--arch/s390/boot/vmem.c11
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/configs/defconfig1
-rw-r--r--arch/s390/include/asm/abs_lowcore.h8
-rw-r--r--arch/s390/include/asm/alternative-asm.h57
-rw-r--r--arch/s390/include/asm/alternative.h154
-rw-r--r--arch/s390/include/asm/atomic_ops.h3
-rw-r--r--arch/s390/include/asm/ccwdev.h2
-rw-r--r--arch/s390/include/asm/facility.h1
-rw-r--r--arch/s390/include/asm/kmsan.h6
-rw-r--r--arch/s390/include/asm/lowcore.h32
-rw-r--r--arch/s390/include/asm/nospec-branch.h9
-rw-r--r--arch/s390/include/asm/page.h2
-rw-r--r--arch/s390/include/asm/processor.h30
-rw-r--r--arch/s390/include/asm/runtime-const.h77
-rw-r--r--arch/s390/include/asm/smp.h1
-rw-r--r--arch/s390/include/asm/spinlock.h2
-rw-r--r--arch/s390/include/asm/thread_info.h1
-rw-r--r--arch/s390/include/asm/uaccess.h9
-rw-r--r--arch/s390/include/asm/uv.h32
-rw-r--r--arch/s390/kernel/Makefile3
-rw-r--r--arch/s390/kernel/abs_lowcore.c1
-rw-r--r--arch/s390/kernel/alternative.c75
-rw-r--r--arch/s390/kernel/alternative.h0
-rw-r--r--arch/s390/kernel/asm-offsets.c5
-rw-r--r--arch/s390/kernel/debug.c2
-rw-r--r--arch/s390/kernel/early.c9
-rw-r--r--arch/s390/kernel/entry.S251
-rw-r--r--arch/s390/kernel/head64.S8
-rw-r--r--arch/s390/kernel/ipl.c2
-rw-r--r--arch/s390/kernel/machine_kexec.c2
-rw-r--r--arch/s390/kernel/nospec-branch.c16
-rw-r--r--arch/s390/kernel/nospec-sysfs.c2
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c14
-rw-r--r--arch/s390/kernel/processor.c20
-rw-r--r--arch/s390/kernel/reipl.S26
-rw-r--r--arch/s390/kernel/setup.c7
-rw-r--r--arch/s390/kernel/smp.c141
-rw-r--r--arch/s390/kernel/topology.c2
-rw-r--r--arch/s390/kernel/uv.c35
-rw-r--r--arch/s390/kernel/vmlinux.lds.S5
-rw-r--r--arch/s390/lib/spinlock.c4
-rw-r--r--arch/s390/mm/cmm.c6
-rw-r--r--arch/s390/mm/dump_pagetables.c30
-rw-r--r--arch/s390/mm/maccess.c4
-rw-r--r--arch/s390/pci/pci_irq.c110
-rw-r--r--arch/sparc/include/asm/vio.h6
-rw-r--r--arch/sparc/kernel/vio.c4
-rw-r--r--arch/um/Kconfig8
-rw-r--r--arch/um/drivers/Kconfig20
-rw-r--r--arch/um/drivers/Makefile10
-rw-r--r--arch/um/drivers/chan.h3
-rw-r--r--arch/um/drivers/chan_kern.c81
-rw-r--r--arch/um/drivers/chan_user.c20
-rw-r--r--arch/um/drivers/harddog_kern.c1
-rw-r--r--arch/um/drivers/line.c2
-rw-r--r--arch/um/drivers/pcap_kern.c113
-rw-r--r--arch/um/drivers/pcap_user.c137
-rw-r--r--arch/um/drivers/pcap_user.h21
-rw-r--r--arch/um/drivers/port_kern.c14
-rw-r--r--arch/um/drivers/ubd_kern.c3
-rw-r--r--arch/um/drivers/vector_kern.c19
-rw-r--r--arch/um/drivers/vector_kern.h1
-rw-r--r--arch/um/drivers/xterm.c2
-rw-r--r--arch/um/drivers/xterm_kern.c13
-rw-r--r--arch/um/include/asm/mmu.h10
-rw-r--r--arch/um/include/asm/mmu_context.h2
-rw-r--r--arch/um/include/asm/pgtable.h32
-rw-r--r--arch/um/include/asm/tlbflush.h46
-rw-r--r--arch/um/include/shared/as-layout.h2
-rw-r--r--arch/um/include/shared/common-offsets.h5
-rw-r--r--arch/um/include/shared/kern_util.h1
-rw-r--r--arch/um/include/shared/os.h33
-rw-r--r--arch/um/include/shared/skas/mm_id.h2
-rw-r--r--arch/um/include/shared/skas/skas.h2
-rw-r--r--arch/um/include/shared/skas/stub-data.h36
-rw-r--r--arch/um/include/shared/timetravel.h9
-rw-r--r--arch/um/include/shared/user.h8
-rw-r--r--arch/um/kernel/exec.c9
-rw-r--r--arch/um/kernel/irq.c80
-rw-r--r--arch/um/kernel/ksyms.c2
-rw-r--r--arch/um/kernel/mem.c1
-rw-r--r--arch/um/kernel/process.c69
-rw-r--r--arch/um/kernel/reboot.c15
-rw-r--r--arch/um/kernel/skas/Makefile9
-rw-r--r--arch/um/kernel/skas/clone.c48
-rw-r--r--arch/um/kernel/skas/mmu.c54
-rw-r--r--arch/um/kernel/skas/process.c18
-rw-r--r--arch/um/kernel/skas/stub.c69
-rw-r--r--arch/um/kernel/time.c187
-rw-r--r--arch/um/kernel/tlb.c545
-rw-r--r--arch/um/kernel/trap.c15
-rw-r--r--arch/um/kernel/um_arch.c3
-rw-r--r--arch/um/os-Linux/file.c94
-rw-r--r--arch/um/os-Linux/signal.c118
-rw-r--r--arch/um/os-Linux/skas/mem.c245
-rw-r--r--arch/um/os-Linux/skas/process.c124
-rw-r--r--arch/um/os-Linux/start_up.c1
-rw-r--r--arch/x86/Makefile.um1
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h5
-rw-r--r--arch/x86/kernel/itmt.c2
-rw-r--r--arch/x86/platform/pvh/enlighten.c3
-rw-r--r--arch/x86/um/Makefile5
-rw-r--r--arch/x86/um/asm/mm_context.h70
-rw-r--r--arch/x86/um/ldt.c380
-rw-r--r--arch/x86/um/shared/sysdep/stub.h2
-rw-r--r--arch/x86/um/shared/sysdep/stub_32.h45
-rw-r--r--arch/x86/um/shared/sysdep/stub_64.h41
-rw-r--r--arch/x86/um/stub_32.S56
-rw-r--r--arch/x86/um/stub_64.S50
-rw-r--r--arch/x86/um/tls_32.c1
-rw-r--r--arch/x86/xen/enlighten_pvh.c107
-rw-r--r--arch/x86/xen/multicalls.c19
-rw-r--r--arch/x86/xen/setup.c5
-rw-r--r--arch/x86/xen/smp_pv.c1
-rw-r--r--arch/x86/xen/xen-ops.h3
197 files changed, 2824 insertions, 2889 deletions
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index 6d0c9f7268ba..06b0e5fd54a6 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -816,10 +816,10 @@ EXPORT_SYMBOL(locomo_frontlight_set);
* We model this as a regular bus type, and hang devices directly
* off this.
*/
-static int locomo_match(struct device *_dev, struct device_driver *_drv)
+static int locomo_match(struct device *_dev, const struct device_driver *_drv)
{
struct locomo_dev *dev = LOCOMO_DEV(_dev);
- struct locomo_driver *drv = LOCOMO_DRV(_drv);
+ const struct locomo_driver *drv = LOCOMO_DRV(_drv);
return dev->devid == drv->devid;
}
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 1fbd7363cf11..550978dc3c50 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -1339,10 +1339,10 @@ EXPORT_SYMBOL_GPL(sa1111_get_irq);
* We model this as a regular bus type, and hang devices directly
* off this.
*/
-static int sa1111_match(struct device *_dev, struct device_driver *_drv)
+static int sa1111_match(struct device *_dev, const struct device_driver *_drv)
{
struct sa1111_dev *dev = to_sa1111_device(_dev);
- struct sa1111_driver *drv = SA1111_DRV(_drv);
+ const struct sa1111_driver *drv = SA1111_DRV(_drv);
return !!(dev->devid & drv->devid);
}
diff --git a/arch/arm/include/asm/hardware/locomo.h b/arch/arm/include/asm/hardware/locomo.h
index 9fd9ad5d9202..3190e1e5067a 100644
--- a/arch/arm/include/asm/hardware/locomo.h
+++ b/arch/arm/include/asm/hardware/locomo.h
@@ -189,7 +189,7 @@ struct locomo_driver {
void (*remove)(struct locomo_dev *);
};
-#define LOCOMO_DRV(_d) container_of((_d), struct locomo_driver, drv)
+#define LOCOMO_DRV(_d) container_of_const((_d), struct locomo_driver, drv)
#define LOCOMO_DRIVER_NAME(_ldev) ((_ldev)->dev.driver->name)
diff --git a/arch/arm/include/asm/hardware/sa1111.h b/arch/arm/include/asm/hardware/sa1111.h
index d8c6f8a99dfa..a815f39b4243 100644
--- a/arch/arm/include/asm/hardware/sa1111.h
+++ b/arch/arm/include/asm/hardware/sa1111.h
@@ -404,7 +404,7 @@ struct sa1111_driver {
void (*remove)(struct sa1111_dev *);
};
-#define SA1111_DRV(_d) container_of((_d), struct sa1111_driver, drv)
+#define SA1111_DRV(_d) container_of_const((_d), struct sa1111_driver, drv)
#define SA1111_DRIVER_NAME(_sadev) ((_sadev)->dev.driver->name)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7fd70be0463f..b3fc891f1544 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -168,9 +168,9 @@ config ARM64
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN
- select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
- select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN
- select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE)
+ select HAVE_ARCH_KASAN_VMALLOC
+ select HAVE_ARCH_KASAN_SW_TAGS
+ select HAVE_ARCH_KASAN_HW_TAGS if ARM64_MTE
# Some instrumentation may be unsound, hence EXPERT
select HAVE_ARCH_KCSAN if EXPERT
select HAVE_ARCH_KFENCE
@@ -211,8 +211,8 @@ config ARM64
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
- select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_GRAPH_RETVAL
select HAVE_GCC_PLUGINS
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \
HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI
@@ -1471,7 +1471,6 @@ config HOTPLUG_CPU
config NUMA
bool "NUMA Memory Allocation and Scheduler Support"
select GENERIC_ARCH_NUMA
- select ACPI_NUMA if ACPI
select OF_NUMA
select HAVE_SETUP_PER_CPU_AREA
select NEED_PER_CPU_EMBED_FIRST_CHUNK
@@ -2337,6 +2336,17 @@ config EFI
allow the kernel to be booted as an EFI application. This
is only useful on systems that have UEFI firmware.
+config COMPRESSED_INSTALL
+ bool "Install compressed image by default"
+ help
+ This makes the regular "make install" install the compressed
+ image we built, not the legacy uncompressed one.
+
+ You can check that a compressed image works for you by doing
+ "make zinstall" first, and verifying that everything is fine
+ in your environment before making "make install" do this for
+ you.
+
config DMI
bool "Enable support for SMBIOS (DMI) tables"
depends on EFI
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 3f0f35fd5bb7..f6bc3da1ef11 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -182,7 +182,13 @@ $(BOOT_TARGETS): vmlinux
Image.%: Image
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-install: KBUILD_IMAGE := $(boot)/Image
+ifeq ($(CONFIG_COMPRESSED_INSTALL),y)
+ DEFAULT_KBUILD_IMAGE = $(KBUILD_IMAGE)
+else
+ DEFAULT_KBUILD_IMAGE = $(boot)/Image
+endif
+
+install: KBUILD_IMAGE := $(DEFAULT_KBUILD_IMAGE)
install zinstall:
$(call cmd,install)
@@ -229,7 +235,7 @@ define archhelp
echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)'
echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)'
echo ' image.fit - Flat Image Tree (arch/$(ARCH)/boot/image.fit)'
- echo ' install - Install uncompressed kernel'
+ echo ' install - Install kernel (compressed if COMPRESSED_INSTALL set)'
echo ' zinstall - Install compressed kernel'
echo ' Install using (your) ~/bin/installkernel or'
echo ' (distribution) /sbin/installkernel or'
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index f8efbc128446..7a4f5604be3f 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1065,6 +1065,28 @@ static inline bool pgtable_l5_enabled(void) { return false; }
#define p4d_offset_kimg(dir,addr) ((p4d_t *)dir)
+static inline
+p4d_t *p4d_offset_lockless_folded(pgd_t *pgdp, pgd_t pgd, unsigned long addr)
+{
+ /*
+ * With runtime folding of the pud, pud_offset_lockless() passes
+ * the 'pgd_t *' we return here to p4d_to_folded_pud(), which
+ * will offset the pointer assuming that it points into
+ * a page-table page. However, the fast GUP path passes us a
+ * pgd_t allocated on the stack and so we must use the original
+ * pointer in 'pgdp' to construct the p4d pointer instead of
+ * using the generic p4d_offset_lockless() implementation.
+ *
+ * Note: reusing the original pointer means that we may
+ * dereference the same (live) page-table entry multiple times.
+ * This is safe because it is still only loaded once in the
+ * context of each level and the CPU guarantees same-address
+ * read-after-read ordering.
+ */
+ return p4d_offset(pgdp, addr);
+}
+#define p4d_offset_lockless p4d_offset_lockless_folded
+
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
#define pgd_ERROR(e) \
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index b776e7424fe9..e737c6295ec7 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -507,7 +507,7 @@ static int update_insn_emulation_mode(struct insn_emulation *insn,
return ret;
}
-static int emulation_proc_handler(struct ctl_table *table, int write,
+static int emulation_proc_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 82e8a6017382..77006df20a75 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -535,7 +535,7 @@ static unsigned int find_supported_vector_length(enum vec_type type,
#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL)
-static int vec_proc_do_default_vl(struct ctl_table *table, int write,
+static int vec_proc_do_default_vl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct vl_info *info = table->extra1;
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index d63930c82839..d11da6461278 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -21,7 +21,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti
# potential future proofing if we end up with internal calls to the exported
# routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so
# preparation in build-time C")).
-ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \
+ldflags-y := -shared -soname=linux-vdso.so.1 \
-Bsymbolic --build-id=sha1 -n $(btildflags-y)
ifdef CONFIG_LD_ORPHAN_WARN
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index cc4508c604b2..25a2cb6317f3 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -98,7 +98,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__
# From arm vDSO Makefile
VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1
VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096
-VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1
+VDSO_LDFLAGS += -shared --build-id=sha1
VDSO_LDFLAGS += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index a4c1dd4741a4..7ceaa1e0b4bc 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -149,7 +149,7 @@ Res0 63:32
UnsignedEnum 31:28 GIC
0b0000 NI
0b0001 GICv3
- 0b0010 GICv4p1
+ 0b0011 GICv4p1
EndEnum
UnsignedEnum 27:24 Virt_frac
0b0000 NI
@@ -903,7 +903,7 @@ EndEnum
UnsignedEnum 27:24 GIC
0b0000 NI
0b0001 IMP
- 0b0010 V4P1
+ 0b0011 V4P1
EndEnum
SignedEnum 23:20 AdvSIMD
0b0000 IMP
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index ebdb7156560c..70f169210b52 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -476,7 +476,6 @@ config NR_CPUS
config NUMA
bool "NUMA Support"
select SMP
- select ACPI_NUMA if ACPI
help
Say Y to compile the kernel with NUMA (Non-Uniform Memory Access)
support. This option improves performance on systems with more
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f9d252b6ede1..60077e576935 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -478,6 +478,7 @@ config MACH_LOONGSON64
select BOARD_SCACHE
select CSRC_R4K
select CEVT_R4K
+ select SYNC_R4K
select FORCE_PCI
select ISA
select I8259
diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
index c2930a75b7e4..1e782275850a 100644
--- a/arch/mips/include/asm/mips-cm.h
+++ b/arch/mips/include/asm/mips-cm.h
@@ -240,6 +240,10 @@ GCR_ACCESSOR_RO(32, 0x0d0, gic_status)
GCR_ACCESSOR_RO(32, 0x0f0, cpc_status)
#define CM_GCR_CPC_STATUS_EX BIT(0)
+/* GCR_ACCESS - Controls core/IOCU access to GCRs */
+GCR_ACCESSOR_RW(32, 0x120, access_cm3)
+#define CM_GCR_ACCESS_ACCESSEN GENMASK(7, 0)
+
/* GCR_L2_CONFIG - Indicates L2 cache configuration when Config5.L2C=1 */
GCR_ACCESSOR_RW(32, 0x130, l2_config)
#define CM_GCR_L2_CONFIG_BYPASS BIT(20)
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index bc2c240f414b..2427d76f953f 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -50,7 +50,6 @@ extern int __cpu_logical_map[NR_CPUS];
#define SMP_CALL_FUNCTION 0x2
/* Octeon - Tell another core to flush its icache */
#define SMP_ICACHE_FLUSH 0x4
-#define SMP_ASK_C0COUNT 0x8
/* Mask of CPUs which are currently definitely operating coherently */
extern cpumask_t cpu_coherent_mask;
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 9cc087dd1c19..395622c37325 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -317,7 +317,10 @@ static void boot_core(unsigned int core, unsigned int vpe_id)
write_gcr_co_reset_ext_base(CM_GCR_Cx_RESET_EXT_BASE_UEB);
/* Ensure the core can access the GCRs */
- set_gcr_access(1 << core);
+ if (mips_cm_revision() < CM_REV_CM3)
+ set_gcr_access(1 << core);
+ else
+ set_gcr_access_cm3(1 << core);
if (mips_cpc_present()) {
/* Reset the core */
diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
index 66d049cdcf14..147acd972a07 100644
--- a/arch/mips/loongson64/smp.c
+++ b/arch/mips/loongson64/smp.c
@@ -33,7 +33,6 @@ static void __iomem *ipi_clear0_regs[16];
static void __iomem *ipi_status0_regs[16];
static void __iomem *ipi_en0_regs[16];
static void __iomem *ipi_mailbox_buf[16];
-static uint32_t core0_c0count[NR_CPUS];
static u32 (*ipi_read_clear)(int cpu);
static void (*ipi_write_action)(int cpu, u32 action);
@@ -382,11 +381,10 @@ loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action)
ipi_write_action(cpu_logical_map(i), (u32)action);
}
-
static irqreturn_t loongson3_ipi_interrupt(int irq, void *dev_id)
{
- int i, cpu = smp_processor_id();
- unsigned int action, c0count;
+ int cpu = smp_processor_id();
+ unsigned int action;
action = ipi_read_clear(cpu);
@@ -399,26 +397,14 @@ static irqreturn_t loongson3_ipi_interrupt(int irq, void *dev_id)
irq_exit();
}
- if (action & SMP_ASK_C0COUNT) {
- BUG_ON(cpu != 0);
- c0count = read_c0_count();
- c0count = c0count ? c0count : 1;
- for (i = 1; i < nr_cpu_ids; i++)
- core0_c0count[i] = c0count;
- nudge_writes(); /* Let others see the result ASAP */
- }
-
return IRQ_HANDLED;
}
-#define MAX_LOOPS 800
/*
* SMP init and finish on secondary CPUs
*/
static void loongson3_init_secondary(void)
{
- int i;
- uint32_t initcount;
unsigned int cpu = smp_processor_id();
unsigned int imask = STATUSF_IP7 | STATUSF_IP6 |
STATUSF_IP3 | STATUSF_IP2;
@@ -432,23 +418,6 @@ static void loongson3_init_secondary(void)
cpu_logical_map(cpu) % loongson_sysconf.cores_per_package);
cpu_data[cpu].package =
cpu_logical_map(cpu) / loongson_sysconf.cores_per_package;
-
- i = 0;
- core0_c0count[cpu] = 0;
- loongson3_send_ipi_single(0, SMP_ASK_C0COUNT);
- while (!core0_c0count[cpu]) {
- i++;
- cpu_relax();
- }
-
- if (i > MAX_LOOPS)
- i = MAX_LOOPS;
- if (cpu_data[cpu].package)
- initcount = core0_c0count[cpu] + i;
- else /* Local access is faster for loops */
- initcount = core0_c0count[cpu] + i/2;
-
- write_c0_count(initcount);
}
static void loongson3_smp_finish(void)
diff --git a/arch/mips/sibyte/common/sb_tbprof.c b/arch/mips/sibyte/common/sb_tbprof.c
index af5333986900..149a9151bc0b 100644
--- a/arch/mips/sibyte/common/sb_tbprof.c
+++ b/arch/mips/sibyte/common/sb_tbprof.c
@@ -589,4 +589,5 @@ module_exit(sbprof_tb_cleanup);
MODULE_ALIAS_CHARDEV_MAJOR(SBPROF_TB_MAJOR);
MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
+MODULE_DESCRIPTION("Support for ZBbus profiling");
MODULE_LICENSE("GPL");
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index dc9b902de8ea..5d650e02cbf4 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -46,6 +46,7 @@ config PARISC
select GENERIC_CPU_DEVICES if !SMP
select GENERIC_LIB_DEVMEM_IS_ALLOWED
select SYSCTL_ARCH_UNALIGN_ALLOW
+ select SYSCTL_ARCH_UNALIGN_NO_WARN
select SYSCTL_EXCEPTION_TRACE
select HAVE_MOD_ARCH_SPECIFIC
select MODULES_USE_ELF_RELA
@@ -86,6 +87,7 @@ config PARISC
select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS
select TRACE_IRQFLAGS_SUPPORT
select HAVE_FUNCTION_DESCRIPTORS if 64BIT
+ select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
help
The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/parisc/include/asm/parisc-device.h b/arch/parisc/include/asm/parisc-device.h
index 7ddd7f433367..9e74cef4d774 100644
--- a/arch/parisc/include/asm/parisc-device.h
+++ b/arch/parisc/include/asm/parisc-device.h
@@ -41,7 +41,7 @@ struct parisc_driver {
#define to_parisc_device(d) container_of(d, struct parisc_device, dev)
-#define to_parisc_driver(d) container_of(d, struct parisc_driver, drv)
+#define to_parisc_driver(d) container_of_const(d, struct parisc_driver, drv)
#define parisc_parent(d) to_parisc_device(d->dev.parent)
static inline const char *parisc_pathname(struct parisc_device *d)
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 98851ff7699a..a97c0fd55f91 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -20,7 +20,7 @@
* sysdeps/unix/sysv/linux/hppa/sysdep.h
*/
-#ifdef PIC
+#ifndef DONT_USE_PIC
/* WARNING: CANNOT BE USED IN A NOP! */
# define K_STW_ASM_PIC " copy %%r19, %%r4\n"
# define K_LDW_ASM_PIC " copy %%r4, %%r19\n"
@@ -43,7 +43,7 @@
across the syscall. */
#define K_CALL_CLOB_REGS "%r1", "%r2", K_USING_GR4 \
- "%r20", "%r29", "%r31"
+ "%r20", "%r29", "%r31"
#undef K_INLINE_SYSCALL
#define K_INLINE_SYSCALL(name, nr, args...) ({ \
@@ -58,7 +58,7 @@
" ldi %1, %%r20\n" \
K_LDW_ASM_PIC \
: "=r" (__res) \
- : "i" (SYS_ify(name)) K_ASM_ARGS_##nr \
+ : "i" (name) K_ASM_ARGS_##nr \
: "memory", K_CALL_CLOB_REGS K_CLOB_ARGS_##nr \
); \
__sys_res = (long)__res; \
@@ -104,42 +104,18 @@
#define K_CLOB_ARGS_1 K_CLOB_ARGS_2, "%r25"
#define K_CLOB_ARGS_0 K_CLOB_ARGS_1, "%r26"
-#define _syscall0(type,name) \
-type name(void) \
-{ \
- return K_INLINE_SYSCALL(name, 0); \
-}
-
-#define _syscall1(type,name,type1,arg1) \
-type name(type1 arg1) \
-{ \
- return K_INLINE_SYSCALL(name, 1, arg1); \
-}
-
-#define _syscall2(type,name,type1,arg1,type2,arg2) \
-type name(type1 arg1, type2 arg2) \
-{ \
- return K_INLINE_SYSCALL(name, 2, arg1, arg2); \
-}
-
-#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \
-type name(type1 arg1, type2 arg2, type3 arg3) \
-{ \
- return K_INLINE_SYSCALL(name, 3, arg1, arg2, arg3); \
-}
-
-#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
-type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \
-{ \
- return K_INLINE_SYSCALL(name, 4, arg1, arg2, arg3, arg4); \
-}
-
-/* select takes 5 arguments */
-#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \
-type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \
-{ \
- return K_INLINE_SYSCALL(name, 5, arg1, arg2, arg3, arg4, arg5); \
-}
+#define syscall0(name) \
+ K_INLINE_SYSCALL(name, 0)
+#define syscall1(name, arg1) \
+ K_INLINE_SYSCALL(name, 1, arg1)
+#define syscall2(name, arg1, arg2) \
+ K_INLINE_SYSCALL(name, 2, arg1, arg2)
+#define syscall3(name, arg1, arg2, arg3) \
+ K_INLINE_SYSCALL(name, 3, arg1, arg2, arg3)
+#define syscall4(name, arg1, arg2, arg3, arg4) \
+ K_INLINE_SYSCALL(name, 4, arg1, arg2, arg3, arg4)
+#define syscall5(name, arg1, arg2, arg3, arg4, arg5) \
+ K_INLINE_SYSCALL(name, 5, arg1, arg2, arg3, arg4, arg5)
#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_STAT64
diff --git a/arch/parisc/include/asm/vdso.h b/arch/parisc/include/asm/vdso.h
index ef8206193f82..2a2dc11b5545 100644
--- a/arch/parisc/include/asm/vdso.h
+++ b/arch/parisc/include/asm/vdso.h
@@ -19,6 +19,6 @@ extern struct vdso_data *vdso_data;
/* Default link addresses for the vDSOs */
#define VDSO_LBASE 0
-#define VDSO_VERSION_STRING LINUX_5.18
+#define VDSO_VERSION_STRING LINUX_6.11
#endif /* __PARISC_VDSO_H__ */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 483bfafd930c..db531e58d70e 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -611,11 +611,7 @@ void __init parisc_setup_cache_timing(void)
threshold/1024);
set_tlb_threshold:
- if (threshold > FLUSH_TLB_THRESHOLD)
- parisc_tlb_flush_threshold = threshold;
- else
- parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD;
-
+ parisc_tlb_flush_threshold = max(threshold, FLUSH_TLB_THRESHOLD);
printk(KERN_INFO "TLB flush threshold set to %lu KiB\n",
parisc_tlb_flush_threshold/1024);
}
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index ac19d685e4a5..1e793f770f71 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -97,7 +97,7 @@ static int for_each_padev(int (*fn)(struct device *, void *), void * data)
* @driver: the PA-RISC driver to try
* @dev: the PA-RISC device to try
*/
-static int match_device(struct parisc_driver *driver, struct parisc_device *dev)
+static int match_device(const struct parisc_driver *driver, struct parisc_device *dev)
{
const struct parisc_device_id *ids;
@@ -548,7 +548,7 @@ alloc_pa_dev(unsigned long hpa, struct hardware_path *mod_path)
return dev;
}
-static int parisc_generic_match(struct device *dev, struct device_driver *drv)
+static int parisc_generic_match(struct device *dev, const struct device_driver *drv)
{
return match_device(to_parisc_driver(drv), to_parisc_device(dev));
}
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 71e596ca5a86..3e79e40e361d 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -104,6 +104,7 @@
#define ERR_NOTHANDLED -1
int unaligned_enabled __read_mostly = 1;
+int no_unaligned_warning __read_mostly;
static int emulate_ldh(struct pt_regs *regs, int toreg)
{
@@ -399,6 +400,7 @@ void handle_unaligned(struct pt_regs *regs)
} else {
static DEFINE_RATELIMIT_STATE(kernel_ratelimit, 5 * HZ, 5);
if (!(current->thread.flags & PARISC_UAC_NOPRINT) &&
+ !no_unaligned_warning &&
__ratelimit(&kernel_ratelimit))
pr_warn("Kernel: unaligned access to " RFMT " in %pS "
"(iir " RFMT ")\n",
diff --git a/arch/parisc/kernel/vdso32/Makefile b/arch/parisc/kernel/vdso32/Makefile
index 1350d50c6306..2b36d25ada6e 100644
--- a/arch/parisc/kernel/vdso32/Makefile
+++ b/arch/parisc/kernel/vdso32/Makefile
@@ -1,11 +1,25 @@
-# List of files in the vdso, has to be asm only for now
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile
+
+KCOV_INSTRUMENT := n
+
+# Disable gcov profiling, ubsan and kasan for VDSO code
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
obj-vdso32 = note.o sigtramp.o restart_syscall.o
+obj-cvdso32 = vdso32_generic.o
# Build rules
-targets := $(obj-vdso32) vdso32.so
+targets := $(obj-vdso32) $(obj-cvdso32) vdso32.so
obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+obj-cvdso32 := $(addprefix $(obj)/, $(obj-cvdso32))
+
+VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_vdso32_generic.o = $(VDSO_CFLAGS_REMOVE)
ccflags-y := -shared -fno-common -fbuiltin -mno-fast-indirect-calls -O2 -mno-long-calls
# -march=1.1 -mschedule=7100LC
@@ -26,18 +40,22 @@ $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so FORCE
# Force dependency (incbin is bad)
# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so: $(obj)/vdso32.lds $(obj-vdso32) $(VDSO_LIBGCC) FORCE
+$(obj)/vdso32.so: $(obj)/vdso32.lds $(obj-vdso32) $(obj-cvdso32) $(VDSO_LIBGCC) FORCE
$(call if_changed,vdso32ld)
# assembly rules for the .S files
$(obj-vdso32): %.o: %.S FORCE
$(call if_changed_dep,vdso32as)
+$(obj-cvdso32): %.o: %.c FORCE
+ $(call if_changed_dep,vdso32cc)
# actual build commands
quiet_cmd_vdso32ld = VDSO32L $@
cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $(filter-out FORCE, $^) -o $@
quiet_cmd_vdso32as = VDSO32A $@
cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdso32cc = VDSO32C $@
+ cmd_vdso32cc = $(CROSS32CC) $(c_flags) -c -o $@ $<
# Generate VDSO offsets using helper script
gen-vdsosym := $(src)/gen_vdso_offsets.sh
diff --git a/arch/parisc/kernel/vdso32/vdso32.lds.S b/arch/parisc/kernel/vdso32/vdso32.lds.S
index d4aff3af5262..4273baa26b65 100644
--- a/arch/parisc/kernel/vdso32/vdso32.lds.S
+++ b/arch/parisc/kernel/vdso32/vdso32.lds.S
@@ -106,6 +106,9 @@ VERSION
global:
__kernel_sigtramp_rt32;
__kernel_restart_syscall32;
+ __vdso_gettimeofday;
+ __vdso_clock_gettime;
+ __vdso_clock_gettime64;
local: *;
};
}
diff --git a/arch/parisc/kernel/vdso32/vdso32_generic.c b/arch/parisc/kernel/vdso32/vdso32_generic.c
new file mode 100644
index 000000000000..8d5bd59e8646
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/vdso32_generic.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "asm/unistd.h"
+#include <linux/types.h>
+#include <uapi/asm/unistd_32.h>
+
+struct timezone;
+struct old_timespec32;
+struct __kernel_timespec;
+struct __kernel_old_timeval;
+
+/* forward declarations */
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
+int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts);
+int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts);
+
+
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+ return syscall2(__NR_gettimeofday, (long)tv, (long)tz);
+}
+
+int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts)
+{
+ return syscall2(__NR_clock_gettime, (long)clock, (long)ts);
+}
+
+int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts)
+{
+ return syscall2(__NR_clock_gettime64, (long)clock, (long)ts);
+}
diff --git a/arch/parisc/kernel/vdso64/Makefile b/arch/parisc/kernel/vdso64/Makefile
index 0b1c1cc4c2c7..bd87bd6a6659 100644
--- a/arch/parisc/kernel/vdso64/Makefile
+++ b/arch/parisc/kernel/vdso64/Makefile
@@ -1,12 +1,25 @@
-# List of files in the vdso, has to be asm only for now
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile
+
+KCOV_INSTRUMENT := n
+
+# Disable gcov profiling, ubsan and kasan for VDSO code
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
obj-vdso64 = note.o sigtramp.o restart_syscall.o
+obj-cvdso64 = vdso64_generic.o
# Build rules
-targets := $(obj-vdso64) vdso64.so
-obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+obj-cvdso64 := $(addprefix $(obj)/, $(obj-cvdso64))
+VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
ccflags-y := -shared -fno-common -fno-builtin
ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
@@ -26,18 +39,22 @@ $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so FORCE
# Force dependency (incbin is bad)
# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so: $(obj)/vdso64.lds $(obj-vdso64) $(VDSO_LIBGCC) FORCE
+$(obj)/vdso64.so: $(obj)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) $(VDSO_LIBGCC) FORCE
$(call if_changed,vdso64ld)
# assembly rules for the .S files
$(obj-vdso64): %.o: %.S FORCE
$(call if_changed_dep,vdso64as)
+$(obj-cvdso64): %.o: %.c FORCE
+ $(call if_changed_dep,vdso64cc)
# actual build commands
quiet_cmd_vdso64ld = VDSO64L $@
cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter-out FORCE, $^) -o $@
quiet_cmd_vdso64as = VDSO64A $@
cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdso64cc = VDSO64C $@
+ cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $<
# Generate VDSO offsets using helper script
gen-vdsosym := $(src)/gen_vdso_offsets.sh
diff --git a/arch/parisc/kernel/vdso64/vdso64.lds.S b/arch/parisc/kernel/vdso64/vdso64.lds.S
index de1fb4b19286..10f25e4e1554 100644
--- a/arch/parisc/kernel/vdso64/vdso64.lds.S
+++ b/arch/parisc/kernel/vdso64/vdso64.lds.S
@@ -104,6 +104,8 @@ VERSION
global:
__kernel_sigtramp_rt64;
__kernel_restart_syscall64;
+ __vdso_gettimeofday;
+ __vdso_clock_gettime;
local: *;
};
}
diff --git a/arch/parisc/kernel/vdso64/vdso64_generic.c b/arch/parisc/kernel/vdso64/vdso64_generic.c
new file mode 100644
index 000000000000..fc6836a0075b
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/vdso64_generic.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "asm/unistd.h"
+#include <linux/types.h>
+
+struct timezone;
+struct __kernel_timespec;
+struct __kernel_old_timeval;
+
+/* forward declarations */
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
+int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
+
+
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+ return syscall2(__NR_gettimeofday, (long)tv, (long)tz);
+}
+
+int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
+{
+ return syscall2(__NR_clock_gettime, (long)clock, (long)ts);
+}
diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h
index d13d8fdc3411..987e23a2bd28 100644
--- a/arch/powerpc/include/asm/ps3.h
+++ b/arch/powerpc/include/asm/ps3.h
@@ -390,11 +390,7 @@ int ps3_system_bus_device_register(struct ps3_system_bus_device *dev);
int ps3_system_bus_driver_register(struct ps3_system_bus_driver *drv);
void ps3_system_bus_driver_unregister(struct ps3_system_bus_driver *drv);
-static inline struct ps3_system_bus_driver *ps3_drv_to_system_bus_drv(
- struct device_driver *_drv)
-{
- return container_of(_drv, struct ps3_system_bus_driver, core);
-}
+#define ps3_drv_to_system_bus_drv(_drv) container_of_const(_drv, struct ps3_system_bus_driver, core)
static inline struct ps3_system_bus_device *ps3_dev_to_system_bus_dev(
const struct device *_dev)
{
diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h
index 6faf2a931755..7c444150c5ad 100644
--- a/arch/powerpc/include/asm/vio.h
+++ b/arch/powerpc/include/asm/vio.h
@@ -156,11 +156,7 @@ static inline int vio_enable_interrupts(struct vio_dev *dev)
}
#endif
-static inline struct vio_driver *to_vio_driver(struct device_driver *drv)
-{
- return container_of(drv, struct vio_driver, driver);
-}
-
+#define to_vio_driver(__drv) container_of_const(__drv, struct vio_driver, driver)
#define to_vio_dev(__dev) container_of_const(__dev, struct vio_dev, dev)
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 961aadc71de2..5e6c7b527677 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1984,8 +1984,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
break;
r = -ENXIO;
- if (!xive_enabled())
+ if (!xive_enabled()) {
+ fdput(f);
break;
+ }
r = -EPERM;
dev = kvm_device_from_filp(f.file);
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index 56dc6b29a3e7..b9a7d9bae687 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -333,10 +333,10 @@ int ps3_mmio_region_init(struct ps3_system_bus_device *dev,
EXPORT_SYMBOL_GPL(ps3_mmio_region_init);
static int ps3_system_bus_match(struct device *_dev,
- struct device_driver *_drv)
+ const struct device_driver *_drv)
{
int result;
- struct ps3_system_bus_driver *drv = ps3_drv_to_system_bus_drv(_drv);
+ const struct ps3_system_bus_driver *drv = ps3_drv_to_system_bus_drv(_drv);
struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
if (!dev->match_sub_id)
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index b401282727a4..3436b0af795e 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -339,7 +339,7 @@ static struct attribute *ibmbus_bus_attrs[] = {
};
ATTRIBUTE_GROUPS(ibmbus_bus);
-static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv)
+static int ibmebus_bus_bus_match(struct device *dev, const struct device_driver *drv)
{
const struct of_device_id *matches = drv->of_match_table;
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 36d1c7d4156b..ac1d2d2c9a88 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1576,10 +1576,10 @@ void vio_unregister_device(struct vio_dev *viodev)
}
EXPORT_SYMBOL(vio_unregister_device);
-static int vio_bus_match(struct device *dev, struct device_driver *drv)
+static int vio_bus_match(struct device *dev, const struct device_driver *drv)
{
const struct vio_dev *vio_dev = to_vio_dev(dev);
- struct vio_driver *vio_drv = to_vio_driver(drv);
+ const struct vio_driver *vio_drv = to_vio_driver(drv);
const struct vio_device_id *ids = vio_drv->id_table;
return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
@@ -1689,7 +1689,7 @@ struct vio_dev *vio_find_node(struct device_node *vnode)
/* construct the kobject name from the device node */
if (of_node_is_type(vnode_parent, "vdevice")) {
const __be32 *prop;
-
+
prop = of_get_property(vnode, "reg", NULL);
if (!prop)
goto out;
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 3ceec2ca84fa..0f3cd7c3a436 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -13,7 +13,9 @@ config 32BIT
config RISCV
def_bool y
select ACPI_GENERIC_GSI if ACPI
+ select ACPI_PPTT if ACPI
select ACPI_REDUCED_HARDWARE_ONLY if ACPI
+ select ACPI_SPCR_TABLE if ACPI
select ARCH_DMA_DEFAULT_COHERENT
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM_VMEMMAP
@@ -123,6 +125,7 @@ config RISCV
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_STACKLEAK
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU
@@ -154,7 +157,6 @@ config RISCV
select HAVE_KERNEL_UNCOMPRESSED if !XIP_KERNEL && !EFI_ZBOOT
select HAVE_KERNEL_ZSTD if !XIP_KERNEL && !EFI_ZBOOT
select HAVE_KPROBES if !XIP_KERNEL
- select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL
select HAVE_KRETPROBES if !XIP_KERNEL
# https://github.com/ClangBuiltLinux/linux/issues/1881
select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD
@@ -820,6 +822,8 @@ config RISCV_EFFICIENT_UNALIGNED_ACCESS
endchoice
+source "arch/riscv/Kconfig.vendor"
+
endmenu # "Platform type"
menu "Kernel features"
diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor
new file mode 100644
index 000000000000..6f1cdd32ed29
--- /dev/null
+++ b/arch/riscv/Kconfig.vendor
@@ -0,0 +1,19 @@
+menu "Vendor extensions"
+
+config RISCV_ISA_VENDOR_EXT
+ bool
+
+menu "Andes"
+config RISCV_ISA_VENDOR_EXT_ANDES
+ bool "Andes vendor extension support"
+ select RISCV_ISA_VENDOR_EXT
+ default y
+ help
+ Say N here if you want to disable all Andes vendor extension
+ support. This will cause any Andes vendor extensions that are
+ requested by hardware probing to be ignored.
+
+ If you don't know what to do here, say Y.
+endmenu
+
+endmenu
diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
index 869c0345b908..4e9e7a28bf9b 100644
--- a/arch/riscv/boot/Makefile
+++ b/arch/riscv/boot/Makefile
@@ -18,7 +18,6 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
OBJCOPYFLAGS_loader.bin :=-O binary
OBJCOPYFLAGS_xipImage :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
-targets := Image Image.* loader loader.o loader.lds loader.bin
targets := Image Image.* loader loader.o loader.lds loader.bin xipImage
ifeq ($(CONFIG_XIP_KERNEL),y)
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 3f1f055866af..0d678325444f 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -7,6 +7,7 @@ CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CGROUPS=y
CONFIG_MEMCG=y
+CONFIG_BLK_CGROUP=y
CONFIG_CGROUP_SCHED=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
@@ -35,9 +36,6 @@ CONFIG_ARCH_THEAD=y
CONFIG_ARCH_VIRT=y
CONFIG_ARCH_CANAAN=y
CONFIG_SMP=y
-CONFIG_HOTPLUG_CPU=y
-CONFIG_PM=y
-CONFIG_CPU_IDLE=y
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_STAT=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=m
@@ -52,13 +50,11 @@ CONFIG_ACPI=y
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-CONFIG_SPARSEMEM_MANUAL=y
CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_SPARSEMEM_MANUAL=y
CONFIG_NET=y
CONFIG_PACKET=y
-CONFIG_UNIX=y
CONFIG_XFRM_USER=m
-CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_PNP=y
@@ -102,9 +98,9 @@ CONFIG_NET_SCHED=y
CONFIG_NET_CLS_CGROUP=m
CONFIG_NETLINK_DIAG=y
CONFIG_CGROUP_NET_PRIO=y
+CONFIG_CAN=m
CONFIG_NET_9P=y
CONFIG_NET_9P_VIRTIO=y
-CONFIG_CAN=m
CONFIG_PCI=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCI_HOST_GENERIC=y
@@ -153,8 +149,8 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_DW=y
CONFIG_SERIAL_OF_PLATFORM=y
-CONFIG_SERIAL_SH_SCI=y
CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
+CONFIG_SERIAL_SH_SCI=y
CONFIG_VIRTIO_CONSOLE=y
CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_VIRTIO=y
@@ -179,7 +175,6 @@ CONFIG_DEVFREQ_THERMAL=y
CONFIG_RZG2L_THERMAL=y
CONFIG_WATCHDOG=y
CONFIG_SUNXI_WATCHDOG=y
-CONFIG_RENESAS_RZG2LWDT=y
CONFIG_MFD_AXP20X_I2C=y
CONFIG_REGULATOR=y
CONFIG_REGULATOR_FIXED_VOLTAGE=y
@@ -193,11 +188,9 @@ CONFIG_DRM_NOUVEAU=m
CONFIG_DRM_SUN4I=m
CONFIG_DRM_VIRTIO_GPU=m
CONFIG_FB=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_SOUND=y
CONFIG_SND=y
CONFIG_SND_SOC=y
-CONFIG_SND_SOC_RZ=m
CONFIG_SND_DESIGNWARE_I2S=m
CONFIG_SND_SOC_STARFIVE=m
CONFIG_SND_SOC_JH7110_PWMDAC=m
@@ -239,34 +232,31 @@ CONFIG_USB_CONFIGFS_F_FS=y
CONFIG_MMC=y
CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
-CONFIG_MMC_SDHCI_CADENCE=y
CONFIG_MMC_SDHCI_OF_DWCMSHC=y
+CONFIG_MMC_SDHCI_CADENCE=y
CONFIG_MMC_SPI=y
+CONFIG_MMC_SDHI=y
CONFIG_MMC_DW=y
CONFIG_MMC_DW_STARFIVE=y
-CONFIG_MMC_SDHI=y
CONFIG_MMC_SUNXI=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_SUN6I=y
CONFIG_DMADEVICES=y
CONFIG_DMA_SUN6I=m
CONFIG_DW_AXI_DMAC=y
-CONFIG_RZ_DMAC=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_INPUT=y
CONFIG_VIRTIO_MMIO=y
-CONFIG_RENESAS_OSTM=y
CONFIG_CLK_SOPHGO_CV1800=y
CONFIG_SUN8I_DE2_CCU=m
+CONFIG_RENESAS_OSTM=y
CONFIG_SUN50I_IOMMU=y
CONFIG_RPMSG_CHAR=y
CONFIG_RPMSG_CTRL=y
CONFIG_RPMSG_VIRTIO=y
-CONFIG_ARCH_R9A07G043=y
+CONFIG_PM_DEVFREQ=y
CONFIG_IIO=y
-CONFIG_RZG2L_ADC=m
-CONFIG_RESET_RZG2L_USBPHY_CTRL=y
CONFIG_PHY_SUN4I_USB=m
CONFIG_PHY_RCAR_GEN3_USB2=y
CONFIG_PHY_STARFIVE_JH7110_DPHY_RX=m
diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c
index f2708a9494a1..fc1a34faa5f3 100644
--- a/arch/riscv/errata/andes/errata.c
+++ b/arch/riscv/errata/andes/errata.c
@@ -17,6 +17,7 @@
#include <asm/processor.h>
#include <asm/sbi.h>
#include <asm/vendorid_list.h>
+#include <asm/vendor_extensions.h>
#define ANDES_AX45MP_MARCHID 0x8000000000008a45UL
#define ANDES_AX45MP_MIMPID 0x500UL
@@ -65,6 +66,8 @@ void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct al
unsigned long archid, unsigned long impid,
unsigned int stage)
{
+ BUILD_BUG_ON(ERRATA_ANDES_NUMBER >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE);
+
if (stage == RISCV_ALTERNATIVES_BOOT)
errata_probe_iocp(stage, archid, impid);
diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c
index 716cfedad3a2..cea3b96ade11 100644
--- a/arch/riscv/errata/sifive/errata.c
+++ b/arch/riscv/errata/sifive/errata.c
@@ -12,6 +12,7 @@
#include <asm/alternative.h>
#include <asm/vendorid_list.h>
#include <asm/errata_list.h>
+#include <asm/vendor_extensions.h>
struct errata_info_t {
char name[32];
@@ -96,6 +97,8 @@ void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
u32 cpu_apply_errata = 0;
u32 tmp;
+ BUILD_BUG_ON(ERRATA_SIFIVE_NUMBER >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE);
+
if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
return;
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index bf6a0a6318ee..f5120e07c318 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -18,6 +18,7 @@
#include <asm/io.h>
#include <asm/patch.h>
#include <asm/vendorid_list.h>
+#include <asm/vendor_extensions.h>
#define CSR_TH_SXSTATUS 0x5c0
#define SXSTATUS_MAEE _AC(0x200000, UL)
@@ -166,6 +167,8 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
u32 tmp;
void *oldptr, *altptr;
+ BUILD_BUG_ON(ERRATA_THEAD_NUMBER >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE);
+
for (alt = begin; alt < end; alt++) {
if (alt->vendor_id != THEAD_VENDOR_ID)
continue;
diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h
index 7dad0cf9d701..e0a1f84404f3 100644
--- a/arch/riscv/include/asm/acpi.h
+++ b/arch/riscv/include/asm/acpi.h
@@ -61,11 +61,14 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) { }
void acpi_init_rintc_map(void);
struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu);
-u32 get_acpi_id_for_cpu(int cpu);
+static inline u32 get_acpi_id_for_cpu(int cpu)
+{
+ return acpi_cpu_get_madt_rintc(cpu)->uid;
+}
+
int acpi_get_riscv_isa(struct acpi_table_header *table,
unsigned int cpu, const char **isa);
-static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; }
void acpi_get_cbo_block_size(struct acpi_table_header *table, u32 *cbom_size,
u32 *cboz_size, u32 *cbop_size);
#else
@@ -87,4 +90,12 @@ static inline void acpi_get_cbo_block_size(struct acpi_table_header *table,
#endif /* CONFIG_ACPI */
+#ifdef CONFIG_ACPI_NUMA
+int acpi_numa_get_nid(unsigned int cpu);
+void acpi_map_cpus_to_nodes(void);
+#else
+static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; }
+static inline void acpi_map_cpus_to_nodes(void) { }
+#endif /* CONFIG_ACPI_NUMA */
+
#endif /*_ASM_ACPI_H*/
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 880606b0469a..71af9ecfcfcb 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -170,7 +170,7 @@ legacy:
({ \
typeof(x) x_ = (x); \
__builtin_constant_p(x_) ? \
- (int)((x_ != 0) ? (32 - __builtin_clz(x_)) : 0) \
+ ((x_ != 0) ? (32 - __builtin_clz(x_)) : 0) \
: \
variable_fls(x_); \
})
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index 000796c2d0b1..45f9c1171a48 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -33,6 +33,31 @@ extern struct riscv_isainfo hart_isa[NR_CPUS];
void riscv_user_isa_enable(void);
+#define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \
+ .name = #_name, \
+ .property = #_name, \
+ .id = _id, \
+ .subset_ext_ids = _subset_exts, \
+ .subset_ext_size = _subset_exts_size, \
+ .validate = _validate \
+}
+
+#define __RISCV_ISA_EXT_DATA(_name, _id) _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, NULL)
+
+#define __RISCV_ISA_EXT_DATA_VALIDATE(_name, _id, _validate) \
+ _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, _validate)
+
+/* Used to declare pure "lasso" extension (Zk for instance) */
+#define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \
+ _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, \
+ ARRAY_SIZE(_bundled_exts), NULL)
+
+/* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */
+#define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \
+ _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), NULL)
+#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \
+ _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate)
+
#if defined(CONFIG_RISCV_MISALIGNED)
bool check_unaligned_access_emulated_all_cpus(void);
void unaligned_emulation_finish(void);
@@ -79,59 +104,66 @@ extern bool riscv_isa_fallback;
unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
+#define STANDARD_EXT 0
+
bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit);
#define riscv_isa_extension_available(isa_bitmap, ext) \
__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
-static __always_inline bool
-riscv_has_extension_likely(const unsigned long ext)
+static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor,
+ const unsigned long ext)
{
- compiletime_assert(ext < RISCV_ISA_EXT_MAX,
- "ext must be < RISCV_ISA_EXT_MAX");
-
- if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
- asm goto(
- ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1)
- :
- : [ext] "i" (ext)
- :
- : l_no);
- } else {
- if (!__riscv_isa_extension_available(NULL, ext))
- goto l_no;
- }
+ asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1)
+ :
+ : [vendor] "i" (vendor), [ext] "i" (ext)
+ :
+ : l_no);
return true;
l_no:
return false;
}
-static __always_inline bool
-riscv_has_extension_unlikely(const unsigned long ext)
+static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor,
+ const unsigned long ext)
{
- compiletime_assert(ext < RISCV_ISA_EXT_MAX,
- "ext must be < RISCV_ISA_EXT_MAX");
-
- if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
- asm goto(
- ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1)
- :
- : [ext] "i" (ext)
- :
- : l_yes);
- } else {
- if (__riscv_isa_extension_available(NULL, ext))
- goto l_yes;
- }
+ asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1)
+ :
+ : [vendor] "i" (vendor), [ext] "i" (ext)
+ :
+ : l_yes);
return false;
l_yes:
return true;
}
+static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext)
+{
+ compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
+
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
+ return __riscv_has_extension_unlikely(STANDARD_EXT, ext);
+
+ return __riscv_isa_extension_available(NULL, ext);
+}
+
+static __always_inline bool riscv_has_extension_likely(const unsigned long ext)
+{
+ compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
+
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
+ return __riscv_has_extension_likely(STANDARD_EXT, ext);
+
+ return __riscv_isa_extension_available(NULL, ext);
+}
+
static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
{
- if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
+ compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
+
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) &&
+ __riscv_has_extension_likely(STANDARD_EXT, ext))
return true;
return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
@@ -139,7 +171,10 @@ static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsign
static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext)
{
- if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
+ compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
+
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) &&
+ __riscv_has_extension_unlikely(STANDARD_EXT, ext))
return true;
return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index b18b202ca141..5a0bd27fd11a 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -80,19 +80,18 @@
#define RISCV_ISA_EXT_ZFA 71
#define RISCV_ISA_EXT_ZTSO 72
#define RISCV_ISA_EXT_ZACAS 73
-#define RISCV_ISA_EXT_XANDESPMU 74
-#define RISCV_ISA_EXT_ZVE32X 75
-#define RISCV_ISA_EXT_ZVE32F 76
-#define RISCV_ISA_EXT_ZVE64X 77
-#define RISCV_ISA_EXT_ZVE64F 78
-#define RISCV_ISA_EXT_ZVE64D 79
-#define RISCV_ISA_EXT_ZIMOP 80
-#define RISCV_ISA_EXT_ZCA 81
-#define RISCV_ISA_EXT_ZCB 82
-#define RISCV_ISA_EXT_ZCD 83
-#define RISCV_ISA_EXT_ZCF 84
-#define RISCV_ISA_EXT_ZCMOP 85
-#define RISCV_ISA_EXT_ZAWRS 86
+#define RISCV_ISA_EXT_ZVE32X 74
+#define RISCV_ISA_EXT_ZVE32F 75
+#define RISCV_ISA_EXT_ZVE64X 76
+#define RISCV_ISA_EXT_ZVE64F 77
+#define RISCV_ISA_EXT_ZVE64D 78
+#define RISCV_ISA_EXT_ZIMOP 79
+#define RISCV_ISA_EXT_ZCA 80
+#define RISCV_ISA_EXT_ZCB 81
+#define RISCV_ISA_EXT_ZCD 82
+#define RISCV_ISA_EXT_ZCF 83
+#define RISCV_ISA_EXT_ZCMOP 84
+#define RISCV_ISA_EXT_ZAWRS 85
#define RISCV_ISA_EXT_XLINUXENVCFG 127
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 150a9877b0af..ef01c182af2b 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -8,7 +8,7 @@
#include <uapi/asm/hwprobe.h>
-#define RISCV_HWPROBE_MAX_KEY 7
+#define RISCV_HWPROBE_MAX_KEY 8
static inline bool riscv_hwprobe_key_is_valid(__s64 key)
{
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 235fd45d998d..7ede2111c591 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -37,7 +37,7 @@
* define the PAGE_OFFSET value for SV48 and SV39.
*/
#define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL)
-#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL)
+#define PAGE_OFFSET_L3 _AC(0xffffffd600000000, UL)
#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
#endif /* CONFIG_64BIT */
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 5d473343634b..fca5c6be2b81 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -10,6 +10,7 @@
#include <asm/page.h>
#include <linux/const.h>
+#include <linux/sizes.h>
/* thread information allocation */
#define THREAD_SIZE_ORDER CONFIG_THREAD_SIZE_ORDER
diff --git a/arch/riscv/include/asm/vendor_extensions.h b/arch/riscv/include/asm/vendor_extensions.h
new file mode 100644
index 000000000000..7437304a71b9
--- /dev/null
+++ b/arch/riscv/include/asm/vendor_extensions.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2024 Rivos, Inc
+ */
+
+#ifndef _ASM_VENDOR_EXTENSIONS_H
+#define _ASM_VENDOR_EXTENSIONS_H
+
+#include <asm/cpufeature.h>
+
+#include <linux/array_size.h>
+#include <linux/types.h>
+
+/*
+ * The extension keys of each vendor must be strictly less than this value.
+ */
+#define RISCV_ISA_VENDOR_EXT_MAX 32
+
+struct riscv_isavendorinfo {
+ DECLARE_BITMAP(isa, RISCV_ISA_VENDOR_EXT_MAX);
+};
+
+struct riscv_isa_vendor_ext_data_list {
+ bool is_initialized;
+ const size_t ext_data_count;
+ const struct riscv_isa_ext_data *ext_data;
+ struct riscv_isavendorinfo per_hart_isa_bitmap[NR_CPUS];
+ struct riscv_isavendorinfo all_harts_isa_bitmap;
+};
+
+extern struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[];
+
+extern const size_t riscv_isa_vendor_ext_list_size;
+
+/*
+ * The alternatives need some way of distinguishing between vendor extensions
+ * and errata. Incrementing all of the vendor extension keys so they are at
+ * least 0x8000 accomplishes that.
+ */
+#define RISCV_VENDOR_EXT_ALTERNATIVES_BASE 0x8000
+
+#define VENDOR_EXT_ALL_CPUS -1
+
+bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsigned int bit);
+#define riscv_cpu_isa_vendor_extension_available(cpu, vendor, ext) \
+ __riscv_isa_vendor_extension_available(cpu, vendor, RISCV_ISA_VENDOR_EXT_##ext)
+#define riscv_isa_vendor_extension_available(vendor, ext) \
+ __riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, \
+ RISCV_ISA_VENDOR_EXT_##ext)
+
+static __always_inline bool riscv_has_vendor_extension_likely(const unsigned long vendor,
+ const unsigned long ext)
+{
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT))
+ return false;
+
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
+ return __riscv_has_extension_likely(vendor,
+ ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE);
+
+ return __riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, ext);
+}
+
+static __always_inline bool riscv_has_vendor_extension_unlikely(const unsigned long vendor,
+ const unsigned long ext)
+{
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT))
+ return false;
+
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
+ return __riscv_has_extension_unlikely(vendor,
+ ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE);
+
+ return __riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, ext);
+}
+
+static __always_inline bool riscv_cpu_has_vendor_extension_likely(const unsigned long vendor,
+ int cpu, const unsigned long ext)
+{
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT))
+ return false;
+
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) &&
+ __riscv_has_extension_likely(vendor, ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE))
+ return true;
+
+ return __riscv_isa_vendor_extension_available(cpu, vendor, ext);
+}
+
+static __always_inline bool riscv_cpu_has_vendor_extension_unlikely(const unsigned long vendor,
+ int cpu,
+ const unsigned long ext)
+{
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT))
+ return false;
+
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) &&
+ __riscv_has_extension_unlikely(vendor, ext + RISCV_VENDOR_EXT_ALTERNATIVES_BASE))
+ return true;
+
+ return __riscv_isa_vendor_extension_available(cpu, vendor, ext);
+}
+
+#endif /* _ASM_VENDOR_EXTENSIONS_H */
diff --git a/arch/riscv/include/asm/vendor_extensions/andes.h b/arch/riscv/include/asm/vendor_extensions/andes.h
new file mode 100644
index 000000000000..7bb2fc43438f
--- /dev/null
+++ b/arch/riscv/include/asm/vendor_extensions/andes.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_ANDES_H
+#define _ASM_RISCV_VENDOR_EXTENSIONS_ANDES_H
+
+#include <asm/vendor_extensions.h>
+
+#include <linux/types.h>
+
+#define RISCV_ISA_VENDOR_EXT_XANDESPMU 0
+
+/*
+ * Extension keys should be strictly less than max.
+ * It is safe to increment this when necessary.
+ */
+#define RISCV_ISA_VENDOR_EXT_MAX_ANDES 32
+
+extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_andes;
+
+#endif
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index 8b8f6ac0eae2..b706c8e47b02 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -81,6 +81,7 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
#define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
#define RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS 7
+#define RISCV_HWPROBE_KEY_TIME_CSR_FREQ 8
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
/* Flags */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 5b243d46f4b1..06d407f1b30b 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -58,6 +58,8 @@ obj-y += riscv_ksyms.o
obj-y += stacktrace.o
obj-y += cacheinfo.o
obj-y += patch.o
+obj-y += vendor_extensions.o
+obj-y += vendor_extensions/
obj-y += probes/
obj-y += tests/
obj-$(CONFIG_MMU) += vdso.o vdso/
@@ -110,3 +112,4 @@ obj-$(CONFIG_COMPAT) += compat_vdso/
obj-$(CONFIG_64BIT) += pi/
obj-$(CONFIG_ACPI) += acpi.o
+obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c
index e619edc8b0cc..ba957aaca5cb 100644
--- a/arch/riscv/kernel/acpi.c
+++ b/arch/riscv/kernel/acpi.c
@@ -17,7 +17,9 @@
#include <linux/efi.h>
#include <linux/io.h>
#include <linux/memblock.h>
+#include <linux/of_fdt.h>
#include <linux/pci.h>
+#include <linux/serial_core.h>
int acpi_noirq = 1; /* skip ACPI IRQ initialization */
int acpi_disabled = 1;
@@ -131,7 +133,7 @@ void __init acpi_boot_table_init(void)
if (param_acpi_off ||
(!param_acpi_on && !param_acpi_force &&
efi.acpi20 == EFI_INVALID_TABLE_ADDR))
- return;
+ goto done;
/*
* ACPI is disabled at this point. Enable it in order to parse
@@ -151,6 +153,14 @@ void __init acpi_boot_table_init(void)
if (!param_acpi_force)
disable_acpi();
}
+
+done:
+ if (acpi_disabled) {
+ if (earlycon_acpi_spcr_enable)
+ early_init_dt_scan_chosen_stdout();
+ } else {
+ acpi_parse_spcr(earlycon_acpi_spcr_enable, true);
+ }
}
static int acpi_parse_madt_rintc(union acpi_subtable_headers *header, const unsigned long end)
@@ -191,11 +201,6 @@ struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu)
return &cpu_madt_rintc[cpu];
}
-u32 get_acpi_id_for_cpu(int cpu)
-{
- return acpi_cpu_get_madt_rintc(cpu)->uid;
-}
-
/*
* __acpi_map_table() will be called before paging_init(), so early_ioremap()
* or early_memremap() should be called here to for ACPI table mapping.
diff --git a/arch/riscv/kernel/acpi_numa.c b/arch/riscv/kernel/acpi_numa.c
new file mode 100644
index 000000000000..0231482d6946
--- /dev/null
+++ b/arch/riscv/kernel/acpi_numa.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACPI 6.6 based NUMA setup for RISCV
+ * Lots of code was borrowed from arch/arm64/kernel/acpi_numa.c
+ *
+ * Copyright 2004 Andi Kleen, SuSE Labs.
+ * Copyright (C) 2013-2016, Linaro Ltd.
+ * Author: Hanjun Guo <hanjun.guo@linaro.org>
+ * Copyright (C) 2024 Intel Corporation.
+ *
+ * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
+ *
+ * Called from acpi_numa_init while reading the SRAT and SLIT tables.
+ * Assumes all memory regions belonging to a single proximity domain
+ * are in one chunk. Holes between them will be included in the node.
+ */
+
+#define pr_fmt(fmt) "ACPI: NUMA: " fmt
+
+#include <linux/acpi.h>
+#include <linux/bitmap.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/topology.h>
+
+#include <asm/numa.h>
+
+static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE };
+
+int __init acpi_numa_get_nid(unsigned int cpu)
+{
+ return acpi_early_node_map[cpu];
+}
+
+static inline int get_cpu_for_acpi_id(u32 uid)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+ if (uid == get_acpi_id_for_cpu(cpu))
+ return cpu;
+
+ return -EINVAL;
+}
+
+static int __init acpi_parse_rintc_pxm(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_srat_rintc_affinity *pa;
+ int cpu, pxm, node;
+
+ if (srat_disabled())
+ return -EINVAL;
+
+ pa = (struct acpi_srat_rintc_affinity *)header;
+ if (!pa)
+ return -EINVAL;
+
+ if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED))
+ return 0;
+
+ pxm = pa->proximity_domain;
+ node = pxm_to_node(pxm);
+
+ /*
+ * If we can't map the UID to a logical cpu this
+ * means that the UID is not part of possible cpus
+ * so we do not need a NUMA mapping for it, skip
+ * the SRAT entry and keep parsing.
+ */
+ cpu = get_cpu_for_acpi_id(pa->acpi_processor_uid);
+ if (cpu < 0)
+ return 0;
+
+ acpi_early_node_map[cpu] = node;
+ pr_info("SRAT: PXM %d -> HARTID 0x%lx -> Node %d\n", pxm,
+ cpuid_to_hartid_map(cpu), node);
+
+ return 0;
+}
+
+void __init acpi_map_cpus_to_nodes(void)
+{
+ int i;
+
+ /*
+ * In ACPI, SMP and CPU NUMA information is provided in separate
+ * static tables, namely the MADT and the SRAT.
+ *
+ * Thus, it is simpler to first create the cpu logical map through
+ * an MADT walk and then map the logical cpus to their node ids
+ * as separate steps.
+ */
+ acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat),
+ ACPI_SRAT_TYPE_RINTC_AFFINITY, acpi_parse_rintc_pxm, 0);
+
+ for (i = 0; i < nr_cpu_ids; i++)
+ early_map_cpu_to_node(i, acpi_numa_get_nid(i));
+}
+
+/* Callback for Proximity Domain -> logical node ID mapping */
+void __init acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa)
+{
+ int pxm, node;
+
+ if (srat_disabled())
+ return;
+
+ if (pa->header.length < sizeof(struct acpi_srat_rintc_affinity)) {
+ pr_err("SRAT: Invalid SRAT header length: %d\n", pa->header.length);
+ bad_srat();
+ return;
+ }
+
+ if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED))
+ return;
+
+ pxm = pa->proximity_domain;
+ node = acpi_map_pxm_to_node(pxm);
+
+ if (node == NUMA_NO_NODE) {
+ pr_err("SRAT: Too many proximity domains %d\n", pxm);
+ bad_srat();
+ return;
+ }
+
+ node_set(node, numa_nodes_parsed);
+}
diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index 09e9b88110d1..d6c108c50cba 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -3,6 +3,7 @@
* Copyright (C) 2017 SiFive
*/
+#include <linux/acpi.h>
#include <linux/cpu.h>
#include <linux/of.h>
#include <asm/cacheinfo.h>
@@ -64,7 +65,6 @@ uintptr_t get_cache_geometry(u32 level, enum cache_type type)
}
static void ci_leaf_init(struct cacheinfo *this_leaf,
- struct device_node *node,
enum cache_type type, unsigned int level)
{
this_leaf->level = level;
@@ -79,12 +79,33 @@ int populate_cache_leaves(unsigned int cpu)
struct device_node *prev = NULL;
int levels = 1, level = 1;
+ if (!acpi_disabled) {
+ int ret, fw_levels, split_levels;
+
+ ret = acpi_get_cache_info(cpu, &fw_levels, &split_levels);
+ if (ret)
+ return ret;
+
+ BUG_ON((split_levels > fw_levels) ||
+ (split_levels + fw_levels > this_cpu_ci->num_leaves));
+
+ for (; level <= this_cpu_ci->num_levels; level++) {
+ if (level <= split_levels) {
+ ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
+ ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
+ } else {
+ ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level);
+ }
+ }
+ return 0;
+ }
+
if (of_property_read_bool(np, "cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
+ ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level);
if (of_property_read_bool(np, "i-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level);
+ ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
if (of_property_read_bool(np, "d-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level);
+ ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
prev = np;
while ((np = of_find_next_cache_node(np))) {
@@ -97,11 +118,11 @@ int populate_cache_leaves(unsigned int cpu)
if (level <= levels)
break;
if (of_property_read_bool(np, "cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
+ ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level);
if (of_property_read_bool(np, "i-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level);
+ ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
if (of_property_read_bool(np, "d-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level);
+ ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
levels = level;
}
of_node_put(np);
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index c1f3655238fd..f6b13e9f5e6c 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -16,6 +16,7 @@
#include <asm/sbi.h>
#include <asm/smp.h>
#include <asm/pgtable.h>
+#include <asm/vendor_extensions.h>
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
@@ -235,7 +236,33 @@ arch_initcall(riscv_cpuinfo_init);
#ifdef CONFIG_PROC_FS
-static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap)
+#define ALL_CPUS -1
+
+static void print_vendor_isa(struct seq_file *f, int cpu)
+{
+ struct riscv_isavendorinfo *vendor_bitmap;
+ struct riscv_isa_vendor_ext_data_list *ext_list;
+ const struct riscv_isa_ext_data *ext_data;
+
+ for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) {
+ ext_list = riscv_isa_vendor_ext_list[i];
+ ext_data = riscv_isa_vendor_ext_list[i]->ext_data;
+
+ if (cpu == ALL_CPUS)
+ vendor_bitmap = &ext_list->all_harts_isa_bitmap;
+ else
+ vendor_bitmap = &ext_list->per_hart_isa_bitmap[cpu];
+
+ for (int j = 0; j < ext_list->ext_data_count; j++) {
+ if (!__riscv_isa_extension_available(vendor_bitmap->isa, ext_data[j].id))
+ continue;
+
+ seq_printf(f, "_%s", ext_data[j].name);
+ }
+ }
+}
+
+static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap, int cpu)
{
if (IS_ENABLED(CONFIG_32BIT))
@@ -254,6 +281,8 @@ static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap)
seq_printf(f, "%s", riscv_isa_ext[i].name);
}
+ print_vendor_isa(f, cpu);
+
seq_puts(f, "\n");
}
@@ -316,7 +345,7 @@ static int c_show(struct seq_file *m, void *v)
* line.
*/
seq_puts(m, "isa\t\t: ");
- print_isa(m, NULL);
+ print_isa(m, NULL, ALL_CPUS);
print_mmu(m);
if (acpi_disabled) {
@@ -338,7 +367,7 @@ static int c_show(struct seq_file *m, void *v)
* additional extensions not present across all harts.
*/
seq_puts(m, "hart isa\t: ");
- print_isa(m, hart_isa[cpu_id].isa);
+ print_isa(m, hart_isa[cpu_id].isa, cpu_id);
seq_puts(m, "\n");
return 0;
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 0366dc3baf33..8f20607adb40 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -24,6 +24,7 @@
#include <asm/processor.h>
#include <asm/sbi.h>
#include <asm/vector.h>
+#include <asm/vendor_extensions.h>
#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
@@ -100,31 +101,6 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data,
return 0;
}
-#define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \
- .name = #_name, \
- .property = #_name, \
- .id = _id, \
- .subset_ext_ids = _subset_exts, \
- .subset_ext_size = _subset_exts_size, \
- .validate = _validate \
-}
-
-#define __RISCV_ISA_EXT_DATA(_name, _id) _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, NULL)
-
-#define __RISCV_ISA_EXT_DATA_VALIDATE(_name, _id, _validate) \
- _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, _validate)
-
-/* Used to declare pure "lasso" extension (Zk for instance) */
-#define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \
- _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, \
- ARRAY_SIZE(_bundled_exts), NULL)
-
-/* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */
-#define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \
- _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), NULL)
-#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \
- _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate)
-
static int riscv_ext_zca_depends(const struct riscv_isa_ext_data *data,
const unsigned long *isa_bitmap)
{
@@ -405,7 +381,6 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
__RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
- __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_EXT_XANDESPMU),
};
const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext);
@@ -512,6 +487,21 @@ static void __init riscv_parse_isa_string(const char *isa, unsigned long *bitmap
bool ext_err = false;
switch (*ext) {
+ case 'x':
+ case 'X':
+ if (acpi_disabled)
+ pr_warn_once("Vendor extensions are ignored in riscv,isa. Use riscv,isa-extensions instead.");
+ /*
+ * To skip an extension, we find its end.
+ * As multi-letter extensions must be split from other multi-letter
+ * extensions with an "_", the end of a multi-letter extension will
+ * either be the null character or the "_" at the start of the next
+ * multi-letter extension.
+ */
+ for (; *isa && *isa != '_'; ++isa)
+ ;
+ ext_err = true;
+ break;
case 's':
/*
* Workaround for invalid single-letter 's' & 'u' (QEMU).
@@ -527,8 +517,6 @@ static void __init riscv_parse_isa_string(const char *isa, unsigned long *bitmap
}
fallthrough;
case 'S':
- case 'x':
- case 'X':
case 'z':
case 'Z':
/*
@@ -728,6 +716,61 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
acpi_put_table((struct acpi_table_header *)rhct);
}
+static void __init riscv_fill_cpu_vendor_ext(struct device_node *cpu_node, int cpu)
+{
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT))
+ return;
+
+ for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) {
+ struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i];
+
+ for (int j = 0; j < ext_list->ext_data_count; j++) {
+ const struct riscv_isa_ext_data ext = ext_list->ext_data[j];
+ struct riscv_isavendorinfo *isavendorinfo = &ext_list->per_hart_isa_bitmap[cpu];
+
+ if (of_property_match_string(cpu_node, "riscv,isa-extensions",
+ ext.property) < 0)
+ continue;
+
+ /*
+ * Assume that subset extensions are all members of the
+ * same vendor.
+ */
+ if (ext.subset_ext_size)
+ for (int k = 0; k < ext.subset_ext_size; k++)
+ set_bit(ext.subset_ext_ids[k], isavendorinfo->isa);
+
+ set_bit(ext.id, isavendorinfo->isa);
+ }
+ }
+}
+
+/*
+ * Populate all_harts_isa_bitmap for each vendor with all of the extensions that
+ * are shared across CPUs for that vendor.
+ */
+static void __init riscv_fill_vendor_ext_list(int cpu)
+{
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT))
+ return;
+
+ for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) {
+ struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i];
+
+ if (!ext_list->is_initialized) {
+ bitmap_copy(ext_list->all_harts_isa_bitmap.isa,
+ ext_list->per_hart_isa_bitmap[cpu].isa,
+ RISCV_ISA_VENDOR_EXT_MAX);
+ ext_list->is_initialized = true;
+ } else {
+ bitmap_and(ext_list->all_harts_isa_bitmap.isa,
+ ext_list->all_harts_isa_bitmap.isa,
+ ext_list->per_hart_isa_bitmap[cpu].isa,
+ RISCV_ISA_VENDOR_EXT_MAX);
+ }
+ }
+}
+
static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
{
unsigned int cpu;
@@ -760,6 +803,7 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
}
riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap);
+ riscv_fill_cpu_vendor_ext(cpu_node, cpu);
of_node_put(cpu_node);
@@ -776,6 +820,8 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
bitmap_copy(riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX);
else
bitmap_and(riscv_isa, riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX);
+
+ riscv_fill_vendor_ext_list(cpu);
}
if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX))
@@ -918,28 +964,45 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
{
struct alt_entry *alt;
void *oldptr, *altptr;
- u16 id, value;
+ u16 id, value, vendor;
if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
return;
for (alt = begin; alt < end; alt++) {
- if (alt->vendor_id != 0)
- continue;
-
id = PATCH_ID_CPUFEATURE_ID(alt->patch_id);
+ vendor = PATCH_ID_CPUFEATURE_ID(alt->vendor_id);
- if (id >= RISCV_ISA_EXT_MAX) {
- WARN(1, "This extension id:%d is not in ISA extension list", id);
- continue;
- }
+ /*
+ * Any alternative with a patch_id that is less than
+ * RISCV_ISA_EXT_MAX is interpreted as a standard extension.
+ *
+ * Any alternative with patch_id that is greater than or equal
+ * to RISCV_VENDOR_EXT_ALTERNATIVES_BASE is interpreted as a
+ * vendor extension.
+ */
+ if (id < RISCV_ISA_EXT_MAX) {
+ /*
+ * This patch should be treated as errata so skip
+ * processing here.
+ */
+ if (alt->vendor_id != 0)
+ continue;
- if (!__riscv_isa_extension_available(NULL, id))
- continue;
+ if (!__riscv_isa_extension_available(NULL, id))
+ continue;
- value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id);
- if (!riscv_cpufeature_patch_check(id, value))
+ value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id);
+ if (!riscv_cpufeature_patch_check(id, value))
+ continue;
+ } else if (id >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE) {
+ if (!__riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor,
+ id - RISCV_VENDOR_EXT_ALTERNATIVES_BASE))
+ continue;
+ } else {
+ WARN(1, "This extension id:%d is not in ISA extension list", id);
continue;
+ }
oldptr = ALT_OLD_PTR(alt);
altptr = ALT_ALT_PTR(alt);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 68a24cf9481a..ac2e908d4418 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -88,7 +88,6 @@ SYM_CODE_START(handle_exception)
call riscv_v_context_nesting_start
#endif
move a0, sp /* pt_regs */
- la ra, ret_from_exception
/*
* MSB of cause differentiates between
@@ -97,7 +96,8 @@ SYM_CODE_START(handle_exception)
bge s4, zero, 1f
/* Handle interrupts */
- tail do_irq
+ call do_irq
+ j ret_from_exception
1:
/* Handle other exceptions */
slli t0, s4, RISCV_LGPTR
@@ -105,11 +105,14 @@ SYM_CODE_START(handle_exception)
la t2, excp_vect_table_end
add t0, t1, t0
/* Check if exception code lies within bounds */
- bgeu t0, t2, 1f
- REG_L t0, 0(t0)
- jr t0
-1:
- tail do_trap_unknown
+ bgeu t0, t2, 3f
+ REG_L t1, 0(t0)
+2: jalr t1
+ j ret_from_exception
+3:
+
+ la t1, do_trap_unknown
+ j 2b
SYM_CODE_END(handle_exception)
ASM_NOKPROBE(handle_exception)
@@ -130,6 +133,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
#endif
bnez s0, 1f
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ call stackleak_erase_on_task_stack
+#endif
+
/* Save unwound kernel stack pointer in thread_info */
addi s0, sp, PT_SIZE_ON_STACK
REG_S s0, TASK_TI_KERNEL_SP(tp)
diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile
index 8265ff497977..d2129f2c61b8 100644
--- a/arch/riscv/kernel/probes/Makefile
+++ b/arch/riscv/kernel/probes/Makefile
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o
obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o
-obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o
obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o
CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c
deleted file mode 100644
index a69dfa610aa8..000000000000
--- a/arch/riscv/kernel/probes/ftrace.c
+++ /dev/null
@@ -1,65 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/kprobes.h>
-
-/* Ftrace callback handler for kprobes -- called under preepmt disabled */
-void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct ftrace_regs *fregs)
-{
- struct kprobe *p;
- struct pt_regs *regs;
- struct kprobe_ctlblk *kcb;
- int bit;
-
- if (unlikely(kprobe_ftrace_disabled))
- return;
-
- bit = ftrace_test_recursion_trylock(ip, parent_ip);
- if (bit < 0)
- return;
-
- p = get_kprobe((kprobe_opcode_t *)ip);
- if (unlikely(!p) || kprobe_disabled(p))
- goto out;
-
- regs = ftrace_get_regs(fregs);
- kcb = get_kprobe_ctlblk();
- if (kprobe_running()) {
- kprobes_inc_nmissed_count(p);
- } else {
- unsigned long orig_ip = instruction_pointer(regs);
-
- instruction_pointer_set(regs, ip);
-
- __this_cpu_write(current_kprobe, p);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- if (!p->pre_handler || !p->pre_handler(p, regs)) {
- /*
- * Emulate singlestep (and also recover regs->pc)
- * as if there is a nop
- */
- instruction_pointer_set(regs,
- (unsigned long)p->addr + MCOUNT_INSN_SIZE);
- if (unlikely(p->post_handler)) {
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
- p->post_handler(p, regs, 0);
- }
- instruction_pointer_set(regs, orig_ip);
- }
-
- /*
- * If pre_handler returns !0, it changes regs->pc. We have to
- * skip emulating post_handler.
- */
- __this_cpu_write(current_kprobe, NULL);
- }
-out:
- ftrace_test_recursion_unlock(bit);
-}
-NOKPROBE_SYMBOL(kprobe_ftrace_handler);
-
-int arch_prepare_kprobe_ftrace(struct kprobe *p)
-{
- p->ainsn.api.insn = NULL;
- return 0;
-}
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 4f73c0ae44b2..a2cde65b69e9 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -281,8 +281,10 @@ void __init setup_arch(char **cmdline_p)
setup_smp();
#endif
- if (!acpi_disabled)
+ if (!acpi_disabled) {
acpi_init_rintc_map();
+ acpi_map_cpus_to_nodes();
+ }
riscv_init_cbo_blocksizes();
riscv_fill_hwcap();
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 5a2edd7f027e..dcd282419456 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -84,7 +84,7 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec)
datap = state + 1;
/* datap is designed to be 16 byte aligned for better performance */
- WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16)));
+ WARN_ON(!IS_ALIGNED((unsigned long)datap, 16));
get_cpu_vector_context();
riscv_v_vstate_save(&current->thread.vstate, regs);
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 19baf0d574d3..0f8f1c95ac38 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -96,7 +96,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un
if (hart == cpuid_to_hartid_map(0)) {
BUG_ON(found_boot_cpu);
found_boot_cpu = true;
- early_map_cpu_to_node(0, acpi_numa_get_nid(cpu_count));
return 0;
}
@@ -106,7 +105,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un
}
cpuid_to_hartid_map(cpu_count) = hart;
- early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count));
cpu_count++;
return 0;
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 10e311b2759d..c6d5de22463f 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -16,7 +16,7 @@
#ifdef CONFIG_FRAME_POINTER
-extern asmlinkage void ret_from_exception(void);
+extern asmlinkage void handle_exception(void);
static inline int fp_is_valid(unsigned long fp, unsigned long sp)
{
@@ -71,7 +71,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
fp = frame->fp;
pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra,
&frame->ra);
- if (pc == (unsigned long)ret_from_exception) {
+ if (pc == (unsigned long)handle_exception) {
if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
break;
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index 685594769535..8d1b5c35d2a7 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -9,6 +9,7 @@
#include <asm/cpufeature.h>
#include <asm/hwprobe.h>
#include <asm/processor.h>
+#include <asm/delay.h>
#include <asm/sbi.h>
#include <asm/switch_to.h>
#include <asm/uaccess.h>
@@ -93,44 +94,45 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
* regardless of the kernel's configuration, as no other checks, besides
* presence in the hart_isa bitmap, are made.
*/
+ EXT_KEY(ZACAS);
+ EXT_KEY(ZAWRS);
EXT_KEY(ZBA);
EXT_KEY(ZBB);
- EXT_KEY(ZBS);
- EXT_KEY(ZICBOZ);
EXT_KEY(ZBC);
-
EXT_KEY(ZBKB);
EXT_KEY(ZBKC);
EXT_KEY(ZBKX);
+ EXT_KEY(ZBS);
+ EXT_KEY(ZCA);
+ EXT_KEY(ZCB);
+ EXT_KEY(ZCMOP);
+ EXT_KEY(ZICBOZ);
+ EXT_KEY(ZICOND);
+ EXT_KEY(ZIHINTNTL);
+ EXT_KEY(ZIHINTPAUSE);
+ EXT_KEY(ZIMOP);
EXT_KEY(ZKND);
EXT_KEY(ZKNE);
EXT_KEY(ZKNH);
EXT_KEY(ZKSED);
EXT_KEY(ZKSH);
EXT_KEY(ZKT);
- EXT_KEY(ZIHINTNTL);
EXT_KEY(ZTSO);
- EXT_KEY(ZACAS);
- EXT_KEY(ZICOND);
- EXT_KEY(ZIHINTPAUSE);
- EXT_KEY(ZIMOP);
- EXT_KEY(ZCA);
- EXT_KEY(ZCB);
- EXT_KEY(ZCMOP);
- EXT_KEY(ZAWRS);
/*
* All the following extensions must depend on the kernel
* support of V.
*/
if (has_vector()) {
- EXT_KEY(ZVE32X);
- EXT_KEY(ZVE32F);
- EXT_KEY(ZVE64X);
- EXT_KEY(ZVE64F);
- EXT_KEY(ZVE64D);
EXT_KEY(ZVBB);
EXT_KEY(ZVBC);
+ EXT_KEY(ZVE32F);
+ EXT_KEY(ZVE32X);
+ EXT_KEY(ZVE64D);
+ EXT_KEY(ZVE64F);
+ EXT_KEY(ZVE64X);
+ EXT_KEY(ZVFH);
+ EXT_KEY(ZVFHMIN);
EXT_KEY(ZVKB);
EXT_KEY(ZVKG);
EXT_KEY(ZVKNED);
@@ -139,16 +141,14 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
EXT_KEY(ZVKSED);
EXT_KEY(ZVKSH);
EXT_KEY(ZVKT);
- EXT_KEY(ZVFH);
- EXT_KEY(ZVFHMIN);
}
if (has_fpu()) {
- EXT_KEY(ZFH);
- EXT_KEY(ZFHMIN);
- EXT_KEY(ZFA);
EXT_KEY(ZCD);
EXT_KEY(ZCF);
+ EXT_KEY(ZFA);
+ EXT_KEY(ZFH);
+ EXT_KEY(ZFHMIN);
}
#undef EXT_KEY
}
@@ -237,6 +237,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
pair->value = user_max_virt_addr();
break;
+ case RISCV_HWPROBE_KEY_TIME_CSR_FREQ:
+ pair->value = riscv_timebase;
+ break;
+
/*
* For forward compatibility, unknown keys don't fail the whole
* call, but get their element key set to -1 and value set to 0
diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c
new file mode 100644
index 000000000000..b6c1e7b5d34b
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Rivos, Inc
+ */
+
+#include <asm/vendorid_list.h>
+#include <asm/vendor_extensions.h>
+#include <asm/vendor_extensions/andes.h>
+
+#include <linux/array_size.h>
+#include <linux/types.h>
+
+struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = {
+#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES
+ &riscv_isa_vendor_ext_list_andes,
+#endif
+};
+
+const size_t riscv_isa_vendor_ext_list_size = ARRAY_SIZE(riscv_isa_vendor_ext_list);
+
+/**
+ * __riscv_isa_vendor_extension_available() - Check whether given vendor
+ * extension is available or not.
+ *
+ * @cpu: check if extension is available on this cpu
+ * @vendor: vendor that the extension is a member of
+ * @bit: bit position of the desired extension
+ * Return: true or false
+ *
+ * NOTE: When cpu is -1, will check if extension is available on all cpus
+ */
+bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsigned int bit)
+{
+ struct riscv_isavendorinfo *bmap;
+ struct riscv_isavendorinfo *cpu_bmap;
+
+ switch (vendor) {
+ #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES
+ case ANDES_VENDOR_ID:
+ bmap = &riscv_isa_vendor_ext_list_andes.all_harts_isa_bitmap;
+ cpu_bmap = &riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap[cpu];
+ break;
+ #endif
+ default:
+ return false;
+ }
+
+ if (cpu != -1)
+ bmap = &cpu_bmap[cpu];
+
+ if (bit >= RISCV_ISA_VENDOR_EXT_MAX)
+ return false;
+
+ return test_bit(bit, bmap->isa) ? true : false;
+}
+EXPORT_SYMBOL_GPL(__riscv_isa_vendor_extension_available);
diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile
new file mode 100644
index 000000000000..6a61aed944f1
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o
diff --git a/arch/riscv/kernel/vendor_extensions/andes.c b/arch/riscv/kernel/vendor_extensions/andes.c
new file mode 100644
index 000000000000..ec688c88456a
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions/andes.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/cpufeature.h>
+#include <asm/vendor_extensions.h>
+#include <asm/vendor_extensions/andes.h>
+
+#include <linux/array_size.h>
+#include <linux/types.h>
+
+/* All Andes vendor extensions supported in Linux */
+const struct riscv_isa_ext_data riscv_isa_vendor_ext_andes[] = {
+ __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_VENDOR_EXT_XANDESPMU),
+};
+
+struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_andes = {
+ .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_andes),
+ .ext_data = riscv_isa_vendor_ext_andes,
+};
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 59e0d861e26f..a822f952f64a 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -799,17 +799,6 @@ config HAVE_PNETID
menu "Virtualization"
-config PROTECTED_VIRTUALIZATION_GUEST
- def_bool n
- prompt "Protected virtualization guest support"
- help
- Select this option, if you want to be able to run this
- kernel as a protected virtualization KVM guest.
- Protected virtualization capable machines have a mini hypervisor
- located at machine level (an ultravisor). With help of the
- Ultravisor, KVM will be able to run "protected" VMs, special
- VMs whose memory and management data are unavailable to KVM.
-
config PFAULT
def_bool y
prompt "Pseudo page fault support"
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index c2978cb03b36..91a30e017d65 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -46,9 +46,9 @@
* /proc entries (sysctl)
*/
static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
-static int appldata_timer_handler(struct ctl_table *ctl, int write,
+static int appldata_timer_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-static int appldata_interval_handler(struct ctl_table *ctl, int write,
+static int appldata_interval_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
static struct ctl_table_header *appldata_sysctl_header;
@@ -199,7 +199,7 @@ static void __appldata_vtimer_setup(int cmd)
* Start/Stop timer, show status of timer (0 = not active, 1 = active)
*/
static int
-appldata_timer_handler(struct ctl_table *ctl, int write,
+appldata_timer_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int timer_active = appldata_timer_active;
@@ -232,7 +232,7 @@ appldata_timer_handler(struct ctl_table *ctl, int write,
* current timer interval.
*/
static int
-appldata_interval_handler(struct ctl_table *ctl, int write,
+appldata_interval_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int interval = appldata_interval;
@@ -262,7 +262,7 @@ appldata_interval_handler(struct ctl_table *ctl, int write,
* monitoring (0 = not in process, 1 = in process)
*/
static int
-appldata_generic_handler(struct ctl_table *ctl, int write,
+appldata_generic_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct appldata_ops *ops = NULL, *tmp_ops;
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index e7658997452b..4f476884d340 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -39,8 +39,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o
-obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
+obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o alternative.o uv.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c
new file mode 100644
index 000000000000..abc08d2c873d
--- /dev/null
+++ b/arch/s390/boot/alternative.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "../kernel/alternative.c"
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 18027fdc92b0..83e2ce050b6c 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -30,6 +30,8 @@ struct vmlinux_info {
unsigned long init_mm_off;
unsigned long swapper_pg_dir_off;
unsigned long invalid_pg_dir_off;
+ unsigned long alt_instructions;
+ unsigned long alt_instructions_end;
#ifdef CONFIG_KASAN
unsigned long kasan_early_shadow_page_off;
unsigned long kasan_early_shadow_pte_off;
@@ -89,8 +91,10 @@ extern char _end[], _decompressor_end[];
extern unsigned char _compressed_start[];
extern unsigned char _compressed_end[];
extern struct vmlinux_info _vmlinux_info;
+
#define vmlinux _vmlinux_info
+#define __lowcore_pa(x) ((unsigned long)(x) % sizeof(struct lowcore))
#define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))
#define __kernel_va(x) ((void *)((unsigned long)(x) - __kaslr_offset_phys + __kaslr_offset))
#define __kernel_pa(x) ((unsigned long)(x) - __kaslr_offset + __kaslr_offset_phys)
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index a21f301acd29..1773b72a6a7b 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/ctype.h>
#include <linux/pgtable.h>
+#include <asm/abs_lowcore.h>
#include <asm/page-states.h>
#include <asm/ebcdic.h>
#include <asm/sclp.h>
@@ -310,5 +311,7 @@ void parse_boot_command_line(void)
prot_virt_host = 1;
}
#endif
+ if (!strcmp(param, "relocate_lowcore") && test_facility(193))
+ relocate_lowcore = 1;
}
}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index c59014945af0..ce232552bc1c 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -30,6 +30,7 @@ unsigned long __bootdata_preserved(vmemmap_size);
unsigned long __bootdata_preserved(MODULES_VADDR);
unsigned long __bootdata_preserved(MODULES_END);
unsigned long __bootdata_preserved(max_mappable);
+int __bootdata_preserved(relocate_lowcore);
u64 __bootdata_preserved(stfle_fac_list[16]);
struct oldmem_data __bootdata_preserved(oldmem_data);
@@ -376,6 +377,8 @@ static void kaslr_adjust_vmlinux_info(long offset)
vmlinux.init_mm_off += offset;
vmlinux.swapper_pg_dir_off += offset;
vmlinux.invalid_pg_dir_off += offset;
+ vmlinux.alt_instructions += offset;
+ vmlinux.alt_instructions_end += offset;
#ifdef CONFIG_KASAN
vmlinux.kasan_early_shadow_page_off += offset;
vmlinux.kasan_early_shadow_pte_off += offset;
@@ -478,8 +481,12 @@ void startup_kernel(void)
* before the kernel started. Therefore, in case the two sections
* overlap there is no risk of corrupting any data.
*/
- if (kaslr_enabled())
- amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G);
+ if (kaslr_enabled()) {
+ unsigned long amode31_min;
+
+ amode31_min = (unsigned long)_decompressor_end;
+ amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G);
+ }
if (!amode31_lma)
amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size;
physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size);
@@ -503,6 +510,9 @@ void startup_kernel(void)
kaslr_adjust_got(__kaslr_offset);
setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit);
copy_bootdata();
+ __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions,
+ (struct alt_instr *)_vmlinux_info.alt_instructions_end,
+ ALT_CTX_EARLY);
/*
* Save KASLR offset for early dumps, before vmcore_info is set.
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index 1e66d2cbb096..318e6ba95bfd 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -8,12 +8,8 @@
#include "uv.h"
/* will be used in arch/s390/kernel/uv.c */
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
-#endif
-#if IS_ENABLED(CONFIG_KVM)
int __bootdata_preserved(prot_virt_host);
-#endif
struct uv_info __bootdata_preserved(uv_info);
void uv_query_info(void)
@@ -53,14 +49,11 @@ void uv_query_info(void)
uv_info.max_secrets = uvcb.max_secrets;
}
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
prot_virt_guest = 1;
-#endif
}
-#if IS_ENABLED(CONFIG_KVM)
unsigned long adjust_to_uv_max(unsigned long limit)
{
if (is_prot_virt_host() && uv_info.max_sec_stor_addr)
@@ -92,4 +85,3 @@ void sanitize_prot_virt_host(void)
{
prot_virt_host = is_prot_virt_host_capable();
}
-#endif
diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h
index 0f3070856f8d..da4a4a8d48e0 100644
--- a/arch/s390/boot/uv.h
+++ b/arch/s390/boot/uv.h
@@ -2,21 +2,8 @@
#ifndef BOOT_UV_H
#define BOOT_UV_H
-#if IS_ENABLED(CONFIG_KVM)
unsigned long adjust_to_uv_max(unsigned long limit);
void sanitize_prot_virt_host(void);
-#else
-static inline unsigned long adjust_to_uv_max(unsigned long limit)
-{
- return limit;
-}
-static inline void sanitize_prot_virt_host(void) {}
-#endif
-
-#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
void uv_query_info(void);
-#else
-static inline void uv_query_info(void) {}
-#endif
#endif /* BOOT_UV_H */
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index a255ca189aaa..2847cc059ab7 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -26,6 +26,7 @@ atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
enum populate_mode {
POPULATE_NONE,
POPULATE_DIRECT,
+ POPULATE_LOWCORE,
POPULATE_ABS_LOWCORE,
POPULATE_IDENTITY,
POPULATE_KERNEL,
@@ -242,6 +243,8 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m
return -1;
case POPULATE_DIRECT:
return addr;
+ case POPULATE_LOWCORE:
+ return __lowcore_pa(addr);
case POPULATE_ABS_LOWCORE:
return __abs_lowcore_pa(addr);
case POPULATE_KERNEL:
@@ -418,6 +421,7 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit)
{
+ unsigned long lowcore_address = 0;
unsigned long start, end;
unsigned long asce_type;
unsigned long asce_bits;
@@ -455,12 +459,17 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l
__arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
__arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
+ if (relocate_lowcore)
+ lowcore_address = LOWCORE_ALT_ADDRESS;
+
/*
* To allow prefixing the lowcore must be mapped with 4KB pages.
* To prevent creation of a large page at address 0 first map
* the lowcore and create the identity mapping only afterwards.
*/
- pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT);
+ pgtable_populate(lowcore_address,
+ lowcore_address + sizeof(struct lowcore),
+ POPULATE_LOWCORE);
for_each_physmem_usable_range(i, &start, &end) {
pgtable_populate((unsigned long)__identity_va(start),
(unsigned long)__identity_va(end),
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index f3602414a961..ea63a7342f5f 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -55,7 +55,6 @@ CONFIG_EXPOLINE_AUTO=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
CONFIG_VFIO_AP=m
-CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_S390_HYPFS_FS=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index d0d8925fdf09..d8b28ff8ff45 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -53,7 +53,6 @@ CONFIG_EXPOLINE_AUTO=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
CONFIG_VFIO_AP=m
-CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_S390_HYPFS_FS=y
diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h
index 6f264b79e377..d20df8c923fc 100644
--- a/arch/s390/include/asm/abs_lowcore.h
+++ b/arch/s390/include/asm/abs_lowcore.h
@@ -2,6 +2,7 @@
#ifndef _ASM_S390_ABS_LOWCORE_H
#define _ASM_S390_ABS_LOWCORE_H
+#include <asm/sections.h>
#include <asm/lowcore.h>
#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore))
@@ -24,4 +25,11 @@ static inline void put_abs_lowcore(struct lowcore *lc)
put_cpu();
}
+extern int __bootdata_preserved(relocate_lowcore);
+
+static inline int have_relocated_lowcore(void)
+{
+ return relocate_lowcore;
+}
+
#endif /* _ASM_S390_ABS_LOWCORE_H */
diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h
deleted file mode 100644
index 608f6287ca9c..000000000000
--- a/arch/s390/include/asm/alternative-asm.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_S390_ALTERNATIVE_ASM_H
-#define _ASM_S390_ALTERNATIVE_ASM_H
-
-#ifdef __ASSEMBLY__
-
-/*
- * Issue one struct alt_instr descriptor entry (need to put it into
- * the section .altinstructions, see below). This entry contains
- * enough information for the alternatives patching code to patch an
- * instruction. See apply_alternatives().
- */
-.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
- .long \orig_start - .
- .long \alt_start - .
- .word \feature
- .byte \orig_end - \orig_start
- .org . - ( \orig_end - \orig_start ) & 1
- .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start )
- .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start )
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr.
- */
-.macro ALTERNATIVE oldinstr, newinstr, feature
- .pushsection .altinstr_replacement,"ax"
-770: \newinstr
-771: .popsection
-772: \oldinstr
-773: .pushsection .altinstructions,"a"
- alt_entry 772b, 773b, 770b, 771b, \feature
- .popsection
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr.
- */
-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
- .pushsection .altinstr_replacement,"ax"
-770: \newinstr1
-771: \newinstr2
-772: .popsection
-773: \oldinstr
-774: .pushsection .altinstructions,"a"
- alt_entry 773b, 774b, 770b, 771b,\feature1
- alt_entry 773b, 774b, 771b, 772b,\feature2
- .popsection
-.endm
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_S390_ALTERNATIVE_ASM_H */
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index dd93b92c3ab6..de980c938a3e 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -2,6 +2,58 @@
#ifndef _ASM_S390_ALTERNATIVE_H
#define _ASM_S390_ALTERNATIVE_H
+/*
+ * Each alternative comes with a 32 bit feature field:
+ * union {
+ * u32 feature;
+ * struct {
+ * u32 ctx : 4;
+ * u32 type : 8;
+ * u32 data : 20;
+ * };
+ * }
+ *
+ * @ctx is a bitfield, where only one bit must be set. Each bit defines
+ * in which context an alternative is supposed to be applied to the
+ * kernel image:
+ *
+ * - from the decompressor before the kernel itself is executed
+ * - from early kernel code from within the kernel
+ *
+ * @type is a number which defines the type and with that the type
+ * specific alternative patching.
+ *
+ * @data is additional type specific information which defines if an
+ * alternative should be applied.
+ */
+
+#define ALT_CTX_EARLY 1
+#define ALT_CTX_LATE 2
+#define ALT_CTX_ALL (ALT_CTX_EARLY | ALT_CTX_LATE)
+
+#define ALT_TYPE_FACILITY 0
+#define ALT_TYPE_SPEC 1
+#define ALT_TYPE_LOWCORE 2
+
+#define ALT_DATA_SHIFT 0
+#define ALT_TYPE_SHIFT 20
+#define ALT_CTX_SHIFT 28
+
+#define ALT_FACILITY_EARLY(facility) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \
+ ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \
+ (facility) << ALT_DATA_SHIFT)
+
+#define ALT_FACILITY(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \
+ ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \
+ (facility) << ALT_DATA_SHIFT)
+
+#define ALT_SPEC(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \
+ ALT_TYPE_SPEC << ALT_TYPE_SHIFT | \
+ (facility) << ALT_DATA_SHIFT)
+
+#define ALT_LOWCORE (ALT_CTX_EARLY << ALT_CTX_SHIFT | \
+ ALT_TYPE_LOWCORE << ALT_TYPE_SHIFT)
+
#ifndef __ASSEMBLY__
#include <linux/types.h>
@@ -11,12 +63,30 @@
struct alt_instr {
s32 instr_offset; /* original instruction */
s32 repl_offset; /* offset to replacement instruction */
- u16 facility; /* facility bit set for replacement */
+ union {
+ u32 feature; /* feature required for replacement */
+ struct {
+ u32 ctx : 4; /* context */
+ u32 type : 8; /* type of alternative */
+ u32 data : 20; /* patching information */
+ };
+ };
u8 instrlen; /* length of original instruction */
} __packed;
-void apply_alternative_instructions(void);
-void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+
+void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx);
+
+static inline void apply_alternative_instructions(void)
+{
+ __apply_alternatives(__alt_instructions, __alt_instructions_end, ALT_CTX_LATE);
+}
+
+static inline void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+{
+ __apply_alternatives(start, end, ALT_CTX_ALL);
+}
/*
* +---------------------------------+
@@ -48,10 +118,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
#define OLDINSTR(oldinstr) \
"661:\n\t" oldinstr "\n662:\n"
-#define ALTINSTR_ENTRY(facility, num) \
+#define ALTINSTR_ENTRY(feature, num) \
"\t.long 661b - .\n" /* old instruction */ \
"\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \
- "\t.word " __stringify(facility) "\n" /* facility bit */ \
+ "\t.long " __stringify(feature) "\n" /* feature */ \
"\t.byte " oldinstr_len "\n" /* instruction len */ \
"\t.org . - (" oldinstr_len ") & 1\n" \
"\t.org . - (" oldinstr_len ") + (" altinstr_len(num) ")\n" \
@@ -61,24 +131,24 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n"
/* alternative assembly primitive: */
-#define ALTERNATIVE(oldinstr, altinstr, facility) \
+#define ALTERNATIVE(oldinstr, altinstr, feature) \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(altinstr, 1) \
".popsection\n" \
OLDINSTR(oldinstr) \
".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(facility, 1) \
+ ALTINSTR_ENTRY(feature, 1) \
".popsection\n"
-#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2)\
+#define ALTERNATIVE_2(oldinstr, altinstr1, feature1, altinstr2, feature2)\
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(altinstr1, 1) \
ALTINSTR_REPLACEMENT(altinstr2, 2) \
".popsection\n" \
OLDINSTR(oldinstr) \
".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(facility1, 1) \
- ALTINSTR_ENTRY(facility2, 2) \
+ ALTINSTR_ENTRY(feature1, 1) \
+ ALTINSTR_ENTRY(feature2, 2) \
".popsection\n"
/*
@@ -93,12 +163,12 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
* For non barrier like inlines please define new variants
* without volatile and memory clobber.
*/
-#define alternative(oldinstr, altinstr, facility) \
- asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
+#define alternative(oldinstr, altinstr, feature) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) : : : "memory")
-#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
- asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \
- altinstr2, facility2) ::: "memory")
+#define alternative_2(oldinstr, altinstr1, feature1, altinstr2, feature2) \
+ asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, feature1, \
+ altinstr2, feature2) ::: "memory")
/* Alternative inline assembly with input. */
#define alternative_input(oldinstr, newinstr, feature, input...) \
@@ -106,8 +176,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
: : input)
/* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, altinstr, facility, output, input...) \
- asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) \
+#define alternative_io(oldinstr, altinstr, feature, output, input...) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) \
: output : input)
/* Use this macro if more than one output parameter is needed. */
@@ -116,6 +186,56 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
/* Use this macro if clobbers are needed without inputs. */
#define ASM_NO_INPUT_CLOBBER(clobber...) : clobber
+#else /* __ASSEMBLY__ */
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
+ .long \orig_start - .
+ .long \alt_start - .
+ .long \feature
+ .byte \orig_end - \orig_start
+ .org . - ( \orig_end - \orig_start ) & 1
+ .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start )
+ .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start )
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr.
+ */
+.macro ALTERNATIVE oldinstr, newinstr, feature
+ .pushsection .altinstr_replacement,"ax"
+770: \newinstr
+771: .popsection
+772: \oldinstr
+773: .pushsection .altinstructions,"a"
+ alt_entry 772b, 773b, 770b, 771b, \feature
+ .popsection
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+ .pushsection .altinstr_replacement,"ax"
+770: \newinstr1
+771: \newinstr2
+772: .popsection
+773: \oldinstr
+774: .pushsection .altinstructions,"a"
+ alt_entry 773b, 774b, 770b, 771b,\feature1
+ alt_entry 773b, 774b, 771b, 772b,\feature2
+ .popsection
+.endm
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_ALTERNATIVE_H */
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index 2b379d1d9046..742c7919cbcd 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -188,7 +188,8 @@ static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new)
return old;
}
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
+/* GCC versions before 14.2.0 may die with an ICE in some configurations. */
+#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_IS_GCC) && (GCC_VERSION < 140200))
static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
{
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index 436365ff6c19..e3afcece375e 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -210,7 +210,7 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *);
#define get_ccwdev_lock(x) (x)->ccwlock
#define to_ccwdev(n) container_of(n, struct ccw_device, dev)
-#define to_ccwdrv(n) container_of(n, struct ccw_driver, driver)
+#define to_ccwdrv(n) container_of_const(n, struct ccw_driver, driver)
extern struct ccw_device *ccw_device_create_console(struct ccw_driver *);
extern void ccw_device_destroy_console(struct ccw_device *);
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index d46cc725f024..b7d234838a36 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -20,7 +20,6 @@
#define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8)
extern u64 stfle_fac_list[16];
-extern u64 alt_stfle_fac_list[16];
static inline void __set_facility(unsigned long nr, void *facilities)
{
diff --git a/arch/s390/include/asm/kmsan.h b/arch/s390/include/asm/kmsan.h
index 27db65fbf3f6..f73e181d09ae 100644
--- a/arch/s390/include/asm/kmsan.h
+++ b/arch/s390/include/asm/kmsan.h
@@ -12,8 +12,8 @@
static inline bool is_lowcore_addr(void *addr)
{
- return addr >= (void *)&S390_lowcore &&
- addr < (void *)(&S390_lowcore + 1);
+ return addr >= (void *)get_lowcore() &&
+ addr < (void *)(get_lowcore() + 1);
}
static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin)
@@ -25,7 +25,7 @@ static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin)
* order to get a distinct struct page.
*/
addr += (void *)lowcore_ptr[raw_smp_processor_id()] -
- (void *)&S390_lowcore;
+ (void *)get_lowcore();
if (KMSAN_WARN_ON(is_lowcore_addr(addr)))
return NULL;
return kmsan_get_metadata(addr, is_origin);
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index c724e71e1785..183ac29afaf8 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -14,10 +14,15 @@
#include <asm/ctlreg.h>
#include <asm/cpu.h>
#include <asm/types.h>
+#include <asm/alternative.h>
#define LC_ORDER 1
#define LC_PAGES 2
+#define LOWCORE_ALT_ADDRESS _AC(0x70000, UL)
+
+#ifndef __ASSEMBLY__
+
struct pgm_tdb {
u64 data[32];
};
@@ -97,8 +102,7 @@ struct lowcore {
__u64 save_area_async[8]; /* 0x0240 */
__u64 save_area_restart[1]; /* 0x0280 */
- /* CPU flags. */
- __u64 cpu_flags; /* 0x0288 */
+ __u64 pcpu; /* 0x0288 */
/* Return psws. */
psw_t return_psw; /* 0x0290 */
@@ -215,7 +219,14 @@ struct lowcore {
static __always_inline struct lowcore *get_lowcore(void)
{
- return NULL;
+ struct lowcore *lc;
+
+ if (__is_defined(__DECOMPRESSOR))
+ return NULL;
+ asm(ALTERNATIVE("llilh %[lc],0", "llilh %[lc],%[alt]", ALT_LOWCORE)
+ : [lc] "=d" (lc)
+ : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16));
+ return lc;
}
extern struct lowcore *lowcore_ptr[];
@@ -225,4 +236,19 @@ static inline void set_prefix(__u32 address)
asm volatile("spx %0" : : "Q" (address) : "memory");
}
+#else /* __ASSEMBLY__ */
+
+.macro GET_LC reg
+ ALTERNATIVE "llilh \reg,0", \
+ __stringify(llilh \reg, LOWCORE_ALT_ADDRESS >> 16), \
+ ALT_LOWCORE
+.endm
+
+.macro STMG_LC start, end, savearea
+ ALTERNATIVE "stmg \start, \end, \savearea", \
+ __stringify(stmg \start, \end, LOWCORE_ALT_ADDRESS + \savearea), \
+ ALT_LOWCORE
+.endm
+
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_LOWCORE_H */
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
index b9c1f3cae842..192835a3e24d 100644
--- a/arch/s390/include/asm/nospec-branch.h
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -5,8 +5,17 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
+#include <asm/facility.h>
extern int nospec_disable;
+extern int nobp;
+
+static inline bool nobp_enabled(void)
+{
+ if (__is_defined(__DECOMPRESSOR))
+ return false;
+ return nobp && test_facility(82);
+}
void nospec_init_branches(void);
void nospec_auto_detect(void);
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 5ec41ec3d761..06416b3f94f5 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -174,12 +174,10 @@ static inline int devmem_is_allowed(unsigned long pfn)
#define HAVE_ARCH_FREE_PAGE
#define HAVE_ARCH_ALLOC_PAGE
-#if IS_ENABLED(CONFIG_PGSTE)
int arch_make_folio_accessible(struct folio *folio);
#define HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
int arch_make_page_accessible(struct page *page);
#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
-#endif
struct vm_layout {
unsigned long kaslr_offset;
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index c87cf2b8e81a..5ecd442535b9 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -14,13 +14,11 @@
#include <linux/bits.h>
-#define CIF_SIE 0 /* CPU needs SIE exit cleanup */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define CIF_ENABLED_WAIT 5 /* in enabled wait state */
#define CIF_MCCK_GUEST 6 /* machine check happening in guest */
#define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */
-#define _CIF_SIE BIT(CIF_SIE)
#define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY)
#define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT)
#define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST)
@@ -42,21 +40,37 @@
#include <asm/irqflags.h>
#include <asm/alternative.h>
+struct pcpu {
+ unsigned long ec_mask; /* bit mask for ec_xxx functions */
+ unsigned long ec_clk; /* sigp timestamp for ec_xxx */
+ unsigned long flags; /* per CPU flags */
+ signed char state; /* physical cpu state */
+ signed char polarization; /* physical polarization */
+ u16 address; /* physical cpu address */
+};
+
+DECLARE_PER_CPU(struct pcpu, pcpu_devices);
+
typedef long (*sys_call_ptr_t)(struct pt_regs *regs);
+static __always_inline struct pcpu *this_pcpu(void)
+{
+ return (struct pcpu *)(get_lowcore()->pcpu);
+}
+
static __always_inline void set_cpu_flag(int flag)
{
- get_lowcore()->cpu_flags |= (1UL << flag);
+ this_pcpu()->flags |= (1UL << flag);
}
static __always_inline void clear_cpu_flag(int flag)
{
- get_lowcore()->cpu_flags &= ~(1UL << flag);
+ this_pcpu()->flags &= ~(1UL << flag);
}
static __always_inline bool test_cpu_flag(int flag)
{
- return get_lowcore()->cpu_flags & (1UL << flag);
+ return this_pcpu()->flags & (1UL << flag);
}
static __always_inline bool test_and_set_cpu_flag(int flag)
@@ -81,9 +95,7 @@ static __always_inline bool test_and_clear_cpu_flag(int flag)
*/
static __always_inline bool test_cpu_flag_of(int flag, int cpu)
{
- struct lowcore *lc = lowcore_ptr[cpu];
-
- return lc->cpu_flags & (1UL << flag);
+ return per_cpu(pcpu_devices, cpu).flags & (1UL << flag);
}
#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
@@ -405,7 +417,7 @@ static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
static __always_inline void bpon(void)
{
- asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", 82));
+ asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)));
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/runtime-const.h b/arch/s390/include/asm/runtime-const.h
new file mode 100644
index 000000000000..17878b1d048c
--- /dev/null
+++ b/arch/s390/include/asm/runtime-const.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_RUNTIME_CONST_H
+#define _ASM_S390_RUNTIME_CONST_H
+
+#include <linux/uaccess.h>
+
+#define runtime_const_ptr(sym) \
+({ \
+ typeof(sym) __ret; \
+ \
+ asm_inline( \
+ "0: iihf %[__ret],%[c1]\n" \
+ " iilf %[__ret],%[c2]\n" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n" \
+ ".long 0b - .\n" \
+ ".popsection" \
+ : [__ret] "=d" (__ret) \
+ : [c1] "i" (0x01234567UL), \
+ [c2] "i" (0x89abcdefUL)); \
+ __ret; \
+})
+
+#define runtime_const_shift_right_32(val, sym) \
+({ \
+ unsigned int __ret = (val); \
+ \
+ asm_inline( \
+ "0: srl %[__ret],12\n" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n" \
+ ".long 0b - .\n" \
+ ".popsection" \
+ : [__ret] "+d" (__ret)); \
+ __ret; \
+})
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/* 32-bit immediate for iihf and iilf in bits in I2 field */
+static inline void __runtime_fixup_32(u32 *p, unsigned int val)
+{
+ s390_kernel_write(p, &val, sizeof(val));
+}
+
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ __runtime_fixup_32(where + 2, val >> 32);
+ __runtime_fixup_32(where + 8, val);
+}
+
+/* Immediate value is lower 12 bits of D2 field of srl */
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ u32 insn = *(u32 *)where;
+
+ insn &= 0xfffff000;
+ insn |= (val & 63);
+ s390_kernel_write(where, &insn, sizeof(insn));
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif /* _ASM_S390_RUNTIME_CONST_H */
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index c13c79025348..cd835f4fb11a 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -24,7 +24,6 @@ extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-extern void smp_call_online_cpu(void (*func)(void *), void *);
extern void smp_call_ipl_cpu(void (*func)(void *), void *);
extern void smp_emergency_stop(void);
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 3e43c90ff135..77d5e804af93 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -79,7 +79,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
typecheck(int, lp->lock);
kcsan_release();
asm_inline volatile(
- ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */
" sth %1,%0\n"
: "=R" (((unsigned short *) &lp->lock)[1])
: "d" (0) : "cc", "memory");
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index d02a709717b8..00ac01874a12 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -40,6 +40,7 @@ struct thread_info {
unsigned long flags; /* low level flags */
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
unsigned int cpu; /* current CPU */
+ unsigned char sie; /* running in SIE context */
};
/*
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 9213be0529ee..a81f897a81ce 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -332,7 +332,14 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
return __clear_user(to, n);
}
-void *s390_kernel_write(void *dst, const void *src, size_t size);
+void *__s390_kernel_write(void *dst, const void *src, size_t size);
+
+static inline void *s390_kernel_write(void *dst, const void *src, size_t size)
+{
+ if (__is_defined(__DECOMPRESSOR))
+ return memcpy(dst, src, size);
+ return __s390_kernel_write(dst, src, size);
+}
int __noreturn __put_kernel_bad(void);
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index 0679445cac0b..0b5f8f3e84f1 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -414,7 +414,6 @@ static inline bool uv_has_feature(u8 feature_bit)
return test_bit_inv(feature_bit, &uv_info.uv_feature_indications);
}
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
extern int prot_virt_guest;
static inline int is_prot_virt_guest(void)
@@ -466,13 +465,6 @@ static inline int uv_remove_shared(unsigned long addr)
return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
}
-#else
-#define is_prot_virt_guest() 0
-static inline int uv_set_shared(unsigned long addr) { return 0; }
-static inline int uv_remove_shared(unsigned long addr) { return 0; }
-#endif
-
-#if IS_ENABLED(CONFIG_KVM)
extern int prot_virt_host;
static inline int is_prot_virt_host(void)
@@ -489,29 +481,5 @@ int uv_convert_from_secure_pte(pte_t pte);
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
void setup_uv(void);
-#else
-#define is_prot_virt_host() 0
-static inline void setup_uv(void) {}
-
-static inline int uv_pin_shared(unsigned long paddr)
-{
- return 0;
-}
-
-static inline int uv_destroy_folio(struct folio *folio)
-{
- return 0;
-}
-
-static inline int uv_destroy_pte(pte_t pte)
-{
- return 0;
-}
-
-static inline int uv_convert_from_secure_pte(pte_t pte)
-{
- return 0;
-}
-#endif
#endif /* _ASM_S390_UV_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 7241fa194709..e47a4be54ff8 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -43,7 +43,7 @@ obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o
+obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o
extra-y += vmlinux.lds
@@ -80,7 +80,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
-obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
# vdso
obj-y += vdso64/
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
index f9efc54ec4b7..09cd24cbe74e 100644
--- a/arch/s390/kernel/abs_lowcore.c
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -4,6 +4,7 @@
#include <asm/abs_lowcore.h>
unsigned long __bootdata_preserved(__abs_lowcore);
+int __bootdata_preserved(relocate_lowcore);
int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
{
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index 1ac5f707dd70..8d5d0de35de0 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -1,68 +1,41 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <asm/text-patching.h>
+
+#include <linux/uaccess.h>
+#include <asm/nospec-branch.h>
+#include <asm/abs_lowcore.h>
#include <asm/alternative.h>
#include <asm/facility.h>
-#include <asm/nospec-branch.h>
-
-static int __initdata_or_module alt_instr_disabled;
-
-static int __init disable_alternative_instructions(char *str)
-{
- alt_instr_disabled = 1;
- return 0;
-}
-
-early_param("noaltinstr", disable_alternative_instructions);
-static void __init_or_module __apply_alternatives(struct alt_instr *start,
- struct alt_instr *end)
+void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx)
{
- struct alt_instr *a;
u8 *instr, *replacement;
+ struct alt_instr *a;
+ bool replace;
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
*/
for (a = start; a < end; a++) {
+ if (!(a->ctx & ctx))
+ continue;
+ switch (a->type) {
+ case ALT_TYPE_FACILITY:
+ replace = test_facility(a->data);
+ break;
+ case ALT_TYPE_SPEC:
+ replace = nobp_enabled();
+ break;
+ case ALT_TYPE_LOWCORE:
+ replace = have_relocated_lowcore();
+ break;
+ default:
+ replace = false;
+ }
+ if (!replace)
+ continue;
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
-
- if (!__test_facility(a->facility, alt_stfle_fac_list))
- continue;
s390_kernel_write(instr, replacement, a->instrlen);
}
}
-
-void __init_or_module apply_alternatives(struct alt_instr *start,
- struct alt_instr *end)
-{
- if (!alt_instr_disabled)
- __apply_alternatives(start, end);
-}
-
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-void __init apply_alternative_instructions(void)
-{
- apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
-static void do_sync_core(void *info)
-{
- sync_core();
-}
-
-void text_poke_sync(void)
-{
- on_each_cpu(do_sync_core, NULL, 1);
-}
-
-void text_poke_sync_lock(void)
-{
- cpus_read_lock();
- text_poke_sync();
- cpus_read_unlock();
-}
diff --git a/arch/s390/kernel/alternative.h b/arch/s390/kernel/alternative.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/arch/s390/kernel/alternative.h
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 26bb45d0e6f1..ffa0dd2dbaac 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -28,6 +28,7 @@ int main(void)
BLANK();
/* thread info offsets */
OFFSET(__TI_flags, task_struct, thread_info.flags);
+ OFFSET(__TI_sie, task_struct, thread_info.sie);
BLANK();
/* pt_regs offsets */
OFFSET(__PT_PSW, pt_regs, psw);
@@ -114,7 +115,7 @@ int main(void)
OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync);
OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async);
OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart);
- OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags);
+ OFFSET(__LC_PCPU, lowcore, pcpu);
OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw);
OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer);
@@ -186,5 +187,7 @@ int main(void)
#endif
OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs);
DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs));
+
+ OFFSET(__PCPU_FLAGS, pcpu, flags);
return 0;
}
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 85328a0ef3b6..bce50ca75ea7 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -954,7 +954,7 @@ static int debug_active = 1;
* always allow read, allow write only if debug_stoppable is set or
* if debug_active is already off
*/
-static int s390dbf_procactive(struct ctl_table *table, int write,
+static int s390dbf_procactive(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
if (!write || debug_stoppable || !debug_active)
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 467ed4dba817..14d324865e33 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -48,6 +48,7 @@ decompressor_handled_param(dfltcc);
decompressor_handled_param(facilities);
decompressor_handled_param(nokaslr);
decompressor_handled_param(cmma);
+decompressor_handled_param(relocate_lowcore);
#if IS_ENABLED(CONFIG_KVM)
decompressor_handled_param(prot_virt);
#endif
@@ -190,13 +191,6 @@ static noinline __init void setup_lowcore_early(void)
get_lowcore()->preempt_count = INIT_PREEMPT_COUNT;
}
-static noinline __init void setup_facility_list(void)
-{
- memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list));
- if (!IS_ENABLED(CONFIG_KERNEL_NOBP))
- __clear_facility(82, alt_stfle_fac_list);
-}
-
static __init void detect_diag9c(void)
{
unsigned int cpu_address;
@@ -291,7 +285,6 @@ void __init startup_init(void)
lockdep_off();
sort_amode31_extable();
setup_lowcore_early();
- setup_facility_list();
detect_machine_type();
setup_arch_string();
setup_boot_command_line();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 454b6b92c7f8..749410cfdbc0 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -12,7 +12,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/asm-extable.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/dwarf.h>
@@ -28,49 +28,54 @@
#include <asm/setup.h>
#include <asm/nmi.h>
#include <asm/nospec-insn.h>
+#include <asm/lowcore.h>
_LPP_OFFSET = __LC_LPP
.macro STBEAR address
- ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193
+ ALTERNATIVE "nop", ".insn s,0xb2010000,\address", ALT_FACILITY(193)
.endm
.macro LBEAR address
- ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193
+ ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193)
.endm
- .macro LPSWEY address,lpswe
- ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193
+ .macro LPSWEY address, lpswe
+ ALTERNATIVE_2 "b \lpswe;nopr", \
+ ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY_EARLY(193), \
+ __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \
+ ALT_LOWCORE
.endm
- .macro MBEAR reg
- ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
+ .macro MBEAR reg, lowcore
+ ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK(\lowcore)),\
+ ALT_FACILITY(193)
.endm
- .macro CHECK_STACK savearea
+ .macro CHECK_STACK savearea, lowcore
#ifdef CONFIG_CHECK_STACK
tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD
- lghi %r14,\savearea
+ la %r14,\savearea(\lowcore)
jz stack_overflow
#endif
.endm
- .macro CHECK_VMAP_STACK savearea,oklabel
+ .macro CHECK_VMAP_STACK savearea, lowcore, oklabel
#ifdef CONFIG_VMAP_STACK
lgr %r14,%r15
nill %r14,0x10000 - THREAD_SIZE
oill %r14,STACK_INIT_OFFSET
- clg %r14,__LC_KERNEL_STACK
+ clg %r14,__LC_KERNEL_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_ASYNC_STACK
+ clg %r14,__LC_ASYNC_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_MCCK_STACK
+ clg %r14,__LC_MCCK_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_NODAT_STACK
+ clg %r14,__LC_NODAT_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_RESTART_STACK
+ clg %r14,__LC_RESTART_STACK(\lowcore)
je \oklabel
- lghi %r14,\savearea
+ la %r14,\savearea(\lowcore)
j stack_overflow
#else
j \oklabel
@@ -100,30 +105,31 @@ _LPP_OFFSET = __LC_LPP
.endm
.macro BPOFF
- ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_SPEC(82)
.endm
.macro BPON
- ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
.endm
.macro BPENTER tif_ptr,tif_mask
ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
- "j .+12; nop; nop", 82
+ "j .+12; nop; nop", ALT_SPEC(82)
.endm
.macro BPEXIT tif_ptr,tif_mask
TSTMSK \tif_ptr,\tif_mask
ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \
- "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82
+ "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
.endm
#if IS_ENABLED(CONFIG_KVM)
- .macro SIEEXIT sie_control
- lg %r9,\sie_control # get control block pointer
- ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
- ni __LC_CPU_FLAGS+7,255-_CIF_SIE
+ .macro SIEEXIT sie_control,lowcore
+ lg %r9,\sie_control # get control block pointer
+ ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce
+ lg %r9,__LC_CURRENT(\lowcore)
+ mvi __TI_sie(%r9),0
larl %r9,sie_exit # skip forward to sie_exit
.endm
#endif
@@ -163,13 +169,14 @@ SYM_FUNC_START(__switch_to_asm)
stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev
lg %r15,0(%r4,%r3) # start of kernel stack of next
agr %r15,%r5 # end of kernel stack of next
- stg %r3,__LC_CURRENT # store task struct of next
- stg %r15,__LC_KERNEL_STACK # store end of kernel stack
+ GET_LC %r13
+ stg %r3,__LC_CURRENT(%r13) # store task struct of next
+ stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack
lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next
aghi %r3,__TASK_pid
- mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
+ mvc __LC_CURRENT_PID(4,%r13),0(%r3) # store pid of next
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40)
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
- ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
BR_EX %r14
SYM_FUNC_END(__switch_to_asm)
@@ -183,15 +190,16 @@ SYM_FUNC_END(__switch_to_asm)
*/
SYM_FUNC_START(__sie64a)
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
- lg %r12,__LC_CURRENT
+ GET_LC %r13
+ lg %r14,__LC_CURRENT(%r13)
stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical..
stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses
stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area
stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce
xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
- mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
+ mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags
lmg %r0,%r13,0(%r4) # load guest gprs 0-13
- oi __LC_CPU_FLAGS+7,_CIF_SIE
+ mvi __TI_sie(%r14),1
lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
@@ -210,8 +218,10 @@ SYM_FUNC_START(__sie64a)
.Lsie_skip:
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
- ni __LC_CPU_FLAGS+7,255-_CIF_SIE
+ GET_LC %r14
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce
+ lg %r14,__LC_CURRENT(%r14)
+ mvi __TI_sie(%r14),0
# some program checks are suppressing. C code (e.g. do_protection_exception)
# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
@@ -254,14 +264,15 @@ EXPORT_SYMBOL(sie_exit)
*/
SYM_CODE_START(system_call)
- stpt __LC_SYS_ENTER_TIMER
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC
+ GET_LC %r13
+ stpt __LC_SYS_ENTER_TIMER(%r13)
BPOFF
lghi %r14,0
.Lsysc_per:
- STBEAR __LC_LAST_BREAK
- lctlg %c1,%c1,__LC_KERNEL_ASCE
- lg %r15,__LC_KERNEL_STACK
+ STBEAR __LC_LAST_BREAK(%r13)
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
+ lg %r15,__LC_KERNEL_STACK(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
# clear user controlled register to prevent speculative use
@@ -276,17 +287,17 @@ SYM_CODE_START(system_call)
xgr %r10,%r10
xgr %r11,%r11
la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
- mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC
- MBEAR %r2
+ mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC(%r13)
+ MBEAR %r2,%r13
lgr %r3,%r14
brasl %r14,__do_syscall
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ lctlg %c1,%c1,__LC_USER_ASCE(%r13)
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+ stpt __LC_EXIT_TIMER(%r13)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- stpt __LC_EXIT_TIMER
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
SYM_CODE_END(system_call)
@@ -297,12 +308,13 @@ SYM_CODE_START(ret_from_fork)
lgr %r3,%r11
brasl %r14,__ret_from_fork
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ GET_LC %r13
+ lctlg %c1,%c1,__LC_USER_ASCE(%r13)
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+ stpt __LC_EXIT_TIMER(%r13)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- stpt __LC_EXIT_TIMER
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
SYM_CODE_END(ret_from_fork)
@@ -311,39 +323,40 @@ SYM_CODE_END(ret_from_fork)
*/
SYM_CODE_START(pgm_check_handler)
- stpt __LC_SYS_ENTER_TIMER
+ STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC
+ GET_LC %r13
+ stpt __LC_SYS_ENTER_TIMER(%r13)
BPOFF
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
lgr %r10,%r15
- lmg %r8,%r9,__LC_PGM_OLD_PSW
+ lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13)
tmhh %r8,0x0001 # coming from user space?
jno .Lpgm_skip_asce
- lctlg %c1,%c1,__LC_KERNEL_ASCE
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
j 3f # -> fault in user space
.Lpgm_skip_asce:
1: tmhh %r8,0x4000 # PER bit set in old PSW ?
jnz 2f # -> enabled, can't be a double fault
- tm __LC_PGM_ILC+3,0x80 # check for per exception
+ tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
-2: CHECK_STACK __LC_SAVE_AREA_SYNC
+2: CHECK_STACK __LC_SAVE_AREA_SYNC,%r13
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
# CHECK_VMAP_STACK branches to stack_overflow or 4f
- CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
-3: lg %r15,__LC_KERNEL_STACK
+ CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,%r13,4f
+3: lg %r15,__LC_KERNEL_STACK(%r13)
4: la %r11,STACK_FRAME_OVERHEAD(%r15)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
- mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC(%r13)
+ mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13)
stctg %c1,%c1,__PT_CR1(%r11)
#if IS_ENABLED(CONFIG_KVM)
- ltg %r12,__LC_GMAP
+ ltg %r12,__LC_GMAP(%r13)
jz 5f
clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11)
jne 5f
BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST
- SIEEXIT __SF_SIE_CONTROL(%r10)
+ SIEEXIT __SF_SIE_CONTROL(%r10),%r13
#endif
5: stmg %r8,%r9,__PT_PSW(%r11)
# clear user controlled registers to prevent speculative use
@@ -359,11 +372,11 @@ SYM_CODE_START(pgm_check_handler)
tmhh %r8,0x0001 # returning to user space?
jno .Lpgm_exit_kernel
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
+ lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
- stpt __LC_EXIT_TIMER
+ stpt __LC_EXIT_TIMER(%r13)
.Lpgm_exit_kernel:
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
@@ -372,11 +385,11 @@ SYM_CODE_START(pgm_check_handler)
# single stepped system call
#
.Lpgm_svcper:
- mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
+ mvc __LC_RETURN_PSW(8,%r13),__LC_SVC_NEW_PSW(%r13)
larl %r14,.Lsysc_per
- stg %r14,__LC_RETURN_PSW+8
+ stg %r14,__LC_RETURN_PSW+8(%r13)
lghi %r14,1
- LBEAR __LC_PGM_LAST_BREAK
+ LBEAR __LC_PGM_LAST_BREAK(%r13)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per
SYM_CODE_END(pgm_check_handler)
@@ -385,25 +398,27 @@ SYM_CODE_END(pgm_check_handler)
*/
.macro INT_HANDLER name,lc_old_psw,handler
SYM_CODE_START(\name)
- stckf __LC_INT_CLOCK
- stpt __LC_SYS_ENTER_TIMER
- STBEAR __LC_LAST_BREAK
+ STMG_LC %r8,%r15,__LC_SAVE_AREA_ASYNC
+ GET_LC %r13
+ stckf __LC_INT_CLOCK(%r13)
+ stpt __LC_SYS_ENTER_TIMER(%r13)
+ STBEAR __LC_LAST_BREAK(%r13)
BPOFF
- stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
- lmg %r8,%r9,\lc_old_psw
+ lmg %r8,%r9,\lc_old_psw(%r13)
tmhh %r8,0x0001 # interrupting from user ?
jnz 1f
#if IS_ENABLED(CONFIG_KVM)
- TSTMSK __LC_CPU_FLAGS,_CIF_SIE
+ lg %r10,__LC_CURRENT(%r13)
+ tm __TI_sie(%r10),0xff
jz 0f
BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
- SIEEXIT __SF_SIE_CONTROL(%r15)
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
-0: CHECK_STACK __LC_SAVE_AREA_ASYNC
+0: CHECK_STACK __LC_SAVE_AREA_ASYNC,%r13
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
-1: lctlg %c1,%c1,__LC_KERNEL_ASCE
- lg %r15,__LC_KERNEL_STACK
+1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
+ lg %r15,__LC_KERNEL_STACK(%r13)
2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
@@ -417,18 +432,18 @@ SYM_CODE_START(\name)
xgr %r7,%r7
xgr %r10,%r10
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
- MBEAR %r11
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC(%r13)
+ MBEAR %r11,%r13
stmg %r8,%r9,__PT_PSW(%r11)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,\handler
- mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+ mvc __LC_RETURN_PSW(16,%r13),__PT_PSW(%r11)
tmhh %r8,0x0001 # returning to user ?
jno 2f
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
+ lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
- stpt __LC_EXIT_TIMER
+ stpt __LC_EXIT_TIMER(%r13)
2: LBEAR __PT_LAST_BREAK(%r11)
lmg %r0,%r15,__PT_R0(%r11)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
@@ -443,35 +458,37 @@ INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq
*/
SYM_CODE_START(mcck_int_handler)
BPOFF
- lmg %r8,%r9,__LC_MCK_OLD_PSW
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
+ GET_LC %r13
+ lmg %r8,%r9,__LC_MCK_OLD_PSW(%r13)
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_SYSTEM_DAMAGE
jo .Lmcck_panic # yes -> rest of mcck code invalid
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_CR_VALID
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CR_VALID
jno .Lmcck_panic # control registers invalid -> panic
ptlb
- lghi %r14,__LC_CPU_TIMER_SAVE_AREA
- mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
+ lay %r14,__LC_CPU_TIMER_SAVE_AREA(%r13)
+ mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CPU_TIMER_VALID
jo 3f
- la %r14,__LC_SYS_ENTER_TIMER
- clc 0(8,%r14),__LC_EXIT_TIMER
+ la %r14,__LC_SYS_ENTER_TIMER(%r13)
+ clc 0(8,%r14),__LC_EXIT_TIMER(%r13)
jl 1f
- la %r14,__LC_EXIT_TIMER
-1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
+ la %r14,__LC_EXIT_TIMER(%r13)
+1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER(%r13)
jl 2f
- la %r14,__LC_LAST_UPDATE_TIMER
+ la %r14,__LC_LAST_UPDATE_TIMER(%r13)
2: spt 0(%r14)
- mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
-3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID
+ mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
+3: TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_MWP_VALID
jno .Lmcck_panic
tmhh %r8,0x0001 # interrupting from user ?
jnz .Lmcck_user
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_IA_VALID
jno .Lmcck_panic
#if IS_ENABLED(CONFIG_KVM)
- TSTMSK __LC_CPU_FLAGS,_CIF_SIE
+ lg %r10,__LC_CURRENT(%r13)
+ tm __TI_sie(%r10),0xff
jz .Lmcck_user
- # Need to compare the address instead of a CIF_SIE* flag.
+ # Need to compare the address instead of __TI_SIE flag.
# Otherwise there would be a race between setting the flag
# and entering SIE (or leaving and clearing the flag). This
# would cause machine checks targeted at the guest to be
@@ -480,18 +497,19 @@ SYM_CODE_START(mcck_int_handler)
clgrjl %r9,%r14, 4f
larl %r14,.Lsie_leave
clgrjhe %r9,%r14, 4f
- oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
+ lg %r10,__LC_PCPU
+ oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST
4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
- SIEEXIT __SF_SIE_CONTROL(%r15)
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
.Lmcck_user:
- lg %r15,__LC_MCCK_STACK
+ lg %r15,__LC_MCCK_STACK(%r13)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stctg %c1,%c1,__PT_CR1(%r11)
- lctlg %c1,%c1,__LC_KERNEL_ASCE
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- lghi %r14,__LC_GPREGS_SAVE_AREA+64
- stmg %r0,%r7,__PT_R0(%r11)
+ lay %r14,__LC_GPREGS_SAVE_AREA(%r13)
+ mvc __PT_R0(128,%r11),0(%r14)
# clear user controlled registers to prevent speculative use
xgr %r0,%r0
xgr %r1,%r1
@@ -501,7 +519,6 @@ SYM_CODE_START(mcck_int_handler)
xgr %r6,%r6
xgr %r7,%r7
xgr %r10,%r10
- mvc __PT_R8(64,%r11),0(%r14)
stmg %r8,%r9,__PT_PSW(%r11)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
@@ -509,12 +526,13 @@ SYM_CODE_START(mcck_int_handler)
brasl %r14,s390_do_machine_check
lctlg %c1,%c1,__PT_CR1(%r11)
lmg %r0,%r10,__PT_R0(%r11)
- mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
- tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
+ mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW
+ tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ?
jno 0f
BPON
- stpt __LC_EXIT_TIMER
-0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
+ stpt __LC_EXIT_TIMER(%r13)
+0: ALTERNATIVE "brcl 0,0", __stringify(lay %r12,__LC_LAST_BREAK_SAVE_AREA(%r13)),\
+ ALT_FACILITY(193)
LBEAR 0(%r12)
lmg %r11,%r15,__PT_R11(%r11)
LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
@@ -550,7 +568,7 @@ SYM_CODE_START(mcck_int_handler)
SYM_CODE_END(mcck_int_handler)
SYM_CODE_START(restart_int_handler)
- ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40)
stg %r15,__LC_SAVE_AREA_RESTART
TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
jz 0f
@@ -558,15 +576,17 @@ SYM_CODE_START(restart_int_handler)
0: larl %r15,daton_psw
lpswe 0(%r15) # turn dat on, keep irqs off
.Ldaton:
- lg %r15,__LC_RESTART_STACK
+ GET_LC %r15
+ lg %r15,__LC_RESTART_STACK(%r15)
xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
- mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
+ GET_LC %r13
+ mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART(%r13)
+ mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW(%r13)
xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
- lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
- lg %r2,__LC_RESTART_DATA
- lgf %r3,__LC_RESTART_SOURCE
+ lg %r1,__LC_RESTART_FN(%r13) # load fn, parm & source cpu
+ lg %r2,__LC_RESTART_DATA(%r13)
+ lgf %r3,__LC_RESTART_SOURCE(%r13)
ltgr %r3,%r3 # test source cpu address
jm 1f # negative -> skip source stop
0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
@@ -588,7 +608,8 @@ SYM_CODE_END(restart_int_handler)
* Setup a pt_regs so that show_trace can provide a good call trace.
*/
SYM_CODE_START(stack_overflow)
- lg %r15,__LC_NODAT_STACK # change to panic stack
+ GET_LC %r15
+ lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
stmg %r8,%r9,__PT_PSW(%r11)
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 45413b04efc5..396034b2fe67 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -10,6 +10,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
+#include <asm/lowcore.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/page.h>
@@ -18,14 +19,15 @@
__HEAD
SYM_CODE_START(startup_continue)
larl %r1,tod_clock_base
- mvc 0(16,%r1),__LC_BOOT_CLOCK
+ GET_LC %r2
+ mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2)
#
# Setup stack
#
larl %r14,init_task
- stg %r14,__LC_CURRENT
+ stg %r14,__LC_CURRENT(%r2)
larl %r15,init_thread_union+STACK_INIT_OFFSET
- stg %r15,__LC_KERNEL_STACK
+ stg %r15,__LC_KERNEL_STACK(%r2)
brasl %r14,sclp_early_adjust_va # allow sclp_early_printk
brasl %r14,startup_init # s390 specific early init
brasl %r14,start_kernel # common init code
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 3a7d6e172211..f17bb7bf9392 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2112,7 +2112,7 @@ void do_restart(void *arg)
tracing_off();
debug_locks_off();
lgr_info_log();
- smp_call_online_cpu(__do_restart, arg);
+ smp_call_ipl_cpu(__do_restart, arg);
}
/* on halt */
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index f4cf65da6d49..8f681ccfb83a 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -62,7 +62,7 @@ static void __do_machine_kdump(void *data)
* This need to be done *after* s390_reset_system set the
* prefix register of this CPU to zero
*/
- memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA),
+ memcpy(absolute_pointer(get_lowcore()->floating_pt_save_area),
phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512);
call_nodat(1, int, purgatory, int, 1);
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 9b8c24ebb008..e11ec15960a1 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -4,6 +4,8 @@
#include <linux/cpu.h>
#include <asm/nospec-branch.h>
+int nobp = IS_ENABLED(CONFIG_KERNEL_NOBP);
+
static int __init nobp_setup_early(char *str)
{
bool enabled;
@@ -17,11 +19,11 @@ static int __init nobp_setup_early(char *str)
* The user explicitly requested nobp=1, enable it and
* disable the expoline support.
*/
- __set_facility(82, alt_stfle_fac_list);
+ nobp = 1;
if (IS_ENABLED(CONFIG_EXPOLINE))
nospec_disable = 1;
} else {
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
return 0;
}
@@ -29,7 +31,7 @@ early_param("nobp", nobp_setup_early);
static int __init nospec_setup_early(char *str)
{
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
return 0;
}
early_param("nospec", nospec_setup_early);
@@ -40,7 +42,7 @@ static int __init nospec_report(void)
pr_info("Spectre V2 mitigation: etokens\n");
if (nospec_uses_trampoline())
pr_info("Spectre V2 mitigation: execute trampolines\n");
- if (__test_facility(82, alt_stfle_fac_list))
+ if (nobp_enabled())
pr_info("Spectre V2 mitigation: limited branch prediction\n");
return 0;
}
@@ -66,14 +68,14 @@ void __init nospec_auto_detect(void)
*/
if (__is_defined(CC_USING_EXPOLINE))
nospec_disable = 1;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
} else if (__is_defined(CC_USING_EXPOLINE)) {
/*
* The kernel has been compiled with expolines.
* Keep expolines enabled and disable nobp.
*/
nospec_disable = 0;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
/*
* If the kernel has not been compiled with expolines the
@@ -86,7 +88,7 @@ static int __init spectre_v2_setup_early(char *str)
{
if (str && !strncmp(str, "on", 2)) {
nospec_disable = 0;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
if (str && !strncmp(str, "off", 3))
nospec_disable = 1;
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
index 52d4353188ad..a95188818637 100644
--- a/arch/s390/kernel/nospec-sysfs.c
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -17,7 +17,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
return sprintf(buf, "Mitigation: etokens\n");
if (nospec_uses_trampoline())
return sprintf(buf, "Mitigation: execute trampolines\n");
- if (__test_facility(82, alt_stfle_fac_list))
+ if (nobp_enabled())
return sprintf(buf, "Mitigation: limited branch prediction\n");
return sprintf(buf, "Vulnerable\n");
}
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 1434642e9cba..6968be98af11 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -556,25 +556,31 @@ static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth)
struct cf_trailer_entry *trailer_start, *trailer_stop;
struct cf_ctrset_entry *ctrstart, *ctrstop;
size_t offset = 0;
+ int i;
- auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1;
- do {
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset);
ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset);
+ /* Counter set not authorized */
+ if (!(auth & cpumf_ctr_ctl[i]))
+ continue;
+ /* Counter set size zero was not saved */
+ if (!cpum_cf_read_setsize(i))
+ continue;
+
if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
pr_err_once("cpum_cf_diag counter set compare error "
"in set %i\n", ctrstart->set);
return 0;
}
- auth &= ~cpumf_ctr_ctl[ctrstart->set];
if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
cfdiag_diffctrset((u64 *)(ctrstart + 1),
(u64 *)(ctrstop + 1), ctrstart->ctr);
offset += ctrstart->ctr * sizeof(u64) +
sizeof(*ctrstart);
}
- } while (ctrstart->def && auth);
+ }
/* Save time_stamp from start of event in stop's trailer */
trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset);
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 65c1464eea4f..5ce9a795a0fe 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -17,7 +17,8 @@
#include <linux/mm_types.h>
#include <linux/delay.h>
#include <linux/cpu.h>
-
+#include <linux/smp.h>
+#include <asm/text-patching.h>
#include <asm/diag.h>
#include <asm/facility.h>
#include <asm/elf.h>
@@ -79,6 +80,23 @@ void notrace stop_machine_yield(const struct cpumask *cpumask)
}
}
+static void do_sync_core(void *info)
+{
+ sync_core();
+}
+
+void text_poke_sync(void)
+{
+ on_each_cpu(do_sync_core, NULL, 1);
+}
+
+void text_poke_sync_lock(void)
+{
+ cpus_read_lock();
+ text_poke_sync();
+ cpus_read_unlock();
+}
+
/*
* cpu_init - initializes state that is per-CPU.
*/
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 88087a32ebc6..69fcaf54d5ca 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -9,6 +9,7 @@
#include <asm/asm-offsets.h>
#include <asm/nospec-insn.h>
#include <asm/sigp.h>
+#include <asm/lowcore.h>
GEN_BR_THUNK %r9
@@ -20,20 +21,15 @@
# r3 = Parameter for function
#
SYM_CODE_START(store_status)
- /* Save register one and load save area base */
- stg %r1,__LC_SAVE_AREA_RESTART
+ STMG_LC %r0,%r15,__LC_GPREGS_SAVE_AREA
/* General purpose registers */
- lghi %r1,__LC_GPREGS_SAVE_AREA
- stmg %r0,%r15,0(%r1)
- mvc 8(8,%r1),__LC_SAVE_AREA_RESTART
+ GET_LC %r13
/* Control registers */
- lghi %r1,__LC_CREGS_SAVE_AREA
- stctg %c0,%c15,0(%r1)
+ stctg %c0,%c15,__LC_CREGS_SAVE_AREA(%r13)
/* Access registers */
- lghi %r1,__LC_AREGS_SAVE_AREA
- stam %a0,%a15,0(%r1)
+ stamy %a0,%a15,__LC_AREGS_SAVE_AREA(%r13)
/* Floating point registers */
- lghi %r1,__LC_FPREGS_SAVE_AREA
+ lay %r1,__LC_FPREGS_SAVE_AREA(%r13)
std %f0, 0x00(%r1)
std %f1, 0x08(%r1)
std %f2, 0x10(%r1)
@@ -51,21 +47,21 @@ SYM_CODE_START(store_status)
std %f14,0x70(%r1)
std %f15,0x78(%r1)
/* Floating point control register */
- lghi %r1,__LC_FP_CREG_SAVE_AREA
+ lay %r1,__LC_FP_CREG_SAVE_AREA(%r13)
stfpc 0(%r1)
/* CPU timer */
- lghi %r1,__LC_CPU_TIMER_SAVE_AREA
+ lay %r1,__LC_CPU_TIMER_SAVE_AREA(%r13)
stpt 0(%r1)
/* Store prefix register */
- lghi %r1,__LC_PREFIX_SAVE_AREA
+ lay %r1,__LC_PREFIX_SAVE_AREA(%r13)
stpx 0(%r1)
/* Clock comparator - seven bytes */
- lghi %r1,__LC_CLOCK_COMP_SAVE_AREA
larl %r4,clkcmp
stckc 0(%r4)
+ lay %r1,__LC_CLOCK_COMP_SAVE_AREA(%r13)
mvc 1(7,%r1),1(%r4)
/* Program status word */
- lghi %r1,__LC_PSW_SAVE_AREA
+ lay %r1,__LC_PSW_SAVE_AREA(%r13)
epsw %r4,%r5
st %r4,0(%r1)
st %r5,4(%r1)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 3993f4caf224..4ec99f73fa27 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -149,13 +149,12 @@ unsigned long __bootdata_preserved(max_mappable);
struct physmem_info __bootdata(physmem_info);
struct vm_layout __bootdata_preserved(vm_layout);
-EXPORT_SYMBOL_GPL(vm_layout);
+EXPORT_SYMBOL(vm_layout);
int __bootdata_preserved(__kaslr_enabled);
unsigned int __bootdata_preserved(zlib_dfltcc_support);
EXPORT_SYMBOL(zlib_dfltcc_support);
u64 __bootdata_preserved(stfle_fac_list[16]);
EXPORT_SYMBOL(stfle_fac_list);
-u64 alt_stfle_fac_list[16];
struct oldmem_data __bootdata_preserved(oldmem_data);
unsigned long VMALLOC_START;
@@ -406,6 +405,7 @@ static void __init setup_lowcore(void)
panic("%s: Failed to allocate %zu bytes align=%zx\n",
__func__, sizeof(*lc), sizeof(*lc));
+ lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0);
lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
lc->restart_psw.addr = __pa(restart_int_handler);
lc->external_new_psw.mask = PSW_KERNEL_BITS;
@@ -889,6 +889,9 @@ void __init setup_arch(char **cmdline_p)
else
pr_info("Linux is running as a guest in 64-bit mode\n");
+ if (have_relocated_lowcore())
+ pr_info("Lowcore relocated to 0x%px\n", get_lowcore());
+
log_component_list();
/* Have one command line that is parsed and saved in /proc/cmdline */
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index c3c54adf67bc..fbba37ec53cf 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -74,16 +74,15 @@ enum {
CPU_STATE_CONFIGURED,
};
-struct pcpu {
- unsigned long ec_mask; /* bit mask for ec_xxx functions */
- unsigned long ec_clk; /* sigp timestamp for ec_xxx */
- signed char state; /* physical cpu state */
- signed char polarization; /* physical polarization */
- u16 address; /* physical cpu address */
-};
-
static u8 boot_core_type;
-static struct pcpu pcpu_devices[NR_CPUS];
+DEFINE_PER_CPU(struct pcpu, pcpu_devices);
+/*
+ * Pointer to the pcpu area of the boot CPU. This is required when a restart
+ * interrupt is triggered on an offline CPU. For that case accessing percpu
+ * data with the common primitives does not work, since the percpu offset is
+ * stored in a non existent lowcore.
+ */
+static struct pcpu *ipl_pcpu;
unsigned int smp_cpu_mt_shift;
EXPORT_SYMBOL(smp_cpu_mt_shift);
@@ -174,8 +173,8 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
int cpu;
for_each_cpu(cpu, mask)
- if (pcpu_devices[cpu].address == address)
- return pcpu_devices + cpu;
+ if (per_cpu(pcpu_devices, cpu).address == address)
+ return &per_cpu(pcpu_devices, cpu);
return NULL;
}
@@ -230,13 +229,11 @@ out:
return -ENOMEM;
}
-static void pcpu_free_lowcore(struct pcpu *pcpu)
+static void pcpu_free_lowcore(struct pcpu *pcpu, int cpu)
{
unsigned long async_stack, nodat_stack, mcck_stack;
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET;
async_stack = lc->async_stack - STACK_INIT_OFFSET;
@@ -259,6 +256,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
lc->cpu_nr = cpu;
+ lc->pcpu = (unsigned long)pcpu;
lc->restart_flags = RESTART_FLAG_CTLREGS;
lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0;
@@ -277,12 +275,10 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
arch_spin_lock_setup(cpu);
}
-static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+static void pcpu_attach_task(int cpu, struct task_struct *tsk)
{
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET;
lc->current_task = (unsigned long)tsk;
@@ -296,18 +292,16 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
lc->steal_timer = 0;
}
-static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
+static void pcpu_start_fn(int cpu, void (*func)(void *), void *data)
{
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
lc->restart_stack = lc->kernel_stack;
lc->restart_fn = (unsigned long) func;
lc->restart_data = (unsigned long) data;
lc->restart_source = -1U;
- pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
+ pcpu_sigp_retry(per_cpu_ptr(&pcpu_devices, cpu), SIGP_RESTART, 0);
}
typedef void (pcpu_delegate_fn)(void *);
@@ -320,14 +314,14 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data)
func(data); /* should not return */
}
-static void pcpu_delegate(struct pcpu *pcpu,
+static void pcpu_delegate(struct pcpu *pcpu, int cpu,
pcpu_delegate_fn *func,
void *data, unsigned long stack)
{
struct lowcore *lc, *abs_lc;
unsigned int source_cpu;
- lc = lowcore_ptr[pcpu - pcpu_devices];
+ lc = lowcore_ptr[cpu];
source_cpu = stap();
if (pcpu->address == source_cpu) {
@@ -377,38 +371,22 @@ static int pcpu_set_smt(unsigned int mtid)
smp_cpu_mt_shift = 0;
while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
smp_cpu_mt_shift++;
- pcpu_devices[0].address = stap();
+ per_cpu(pcpu_devices, 0).address = stap();
}
return cc;
}
/*
- * Call function on an online CPU.
- */
-void smp_call_online_cpu(void (*func)(void *), void *data)
-{
- struct pcpu *pcpu;
-
- /* Use the current cpu if it is online. */
- pcpu = pcpu_find_address(cpu_online_mask, stap());
- if (!pcpu)
- /* Use the first online cpu. */
- pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
- pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
-}
-
-/*
* Call function on the ipl CPU.
*/
void smp_call_ipl_cpu(void (*func)(void *), void *data)
{
struct lowcore *lc = lowcore_ptr[0];
- if (pcpu_devices[0].address == stap())
+ if (ipl_pcpu->address == stap())
lc = get_lowcore();
- pcpu_delegate(&pcpu_devices[0], func, data,
- lc->nodat_stack);
+ pcpu_delegate(ipl_pcpu, 0, func, data, lc->nodat_stack);
}
int smp_find_processor_id(u16 address)
@@ -416,21 +394,21 @@ int smp_find_processor_id(u16 address)
int cpu;
for_each_present_cpu(cpu)
- if (pcpu_devices[cpu].address == address)
+ if (per_cpu(pcpu_devices, cpu).address == address)
return cpu;
return -1;
}
void schedule_mcck_handler(void)
{
- pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending);
+ pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_mcck_pending);
}
bool notrace arch_vcpu_is_preempted(int cpu)
{
if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
return false;
- if (pcpu_running(pcpu_devices + cpu))
+ if (pcpu_running(per_cpu_ptr(&pcpu_devices, cpu)))
return false;
return true;
}
@@ -442,7 +420,7 @@ void notrace smp_yield_cpu(int cpu)
return;
diag_stat_inc_norecursion(DIAG_STAT_X09C);
asm volatile("diag %0,0,0x9c"
- : : "d" (pcpu_devices[cpu].address));
+ : : "d" (per_cpu(pcpu_devices, cpu).address));
}
EXPORT_SYMBOL_GPL(smp_yield_cpu);
@@ -463,7 +441,7 @@ void notrace smp_emergency_stop(void)
end = get_tod_clock() + (1000000UL << 12);
for_each_cpu(cpu, &cpumask) {
- struct pcpu *pcpu = pcpu_devices + cpu;
+ struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
set_bit(ec_stop_cpu, &pcpu->ec_mask);
while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
0, NULL) == SIGP_CC_BUSY &&
@@ -472,7 +450,7 @@ void notrace smp_emergency_stop(void)
}
while (get_tod_clock() < end) {
for_each_cpu(cpu, &cpumask)
- if (pcpu_stopped(pcpu_devices + cpu))
+ if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu)))
cpumask_clear_cpu(cpu, &cpumask);
if (cpumask_empty(&cpumask))
break;
@@ -487,6 +465,7 @@ NOKPROBE_SYMBOL(smp_emergency_stop);
*/
void smp_send_stop(void)
{
+ struct pcpu *pcpu;
int cpu;
/* Disable all interrupts/machine checks */
@@ -502,8 +481,9 @@ void smp_send_stop(void)
for_each_online_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
- pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0);
- while (!pcpu_stopped(pcpu_devices + cpu))
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
+ pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+ while (!pcpu_stopped(pcpu))
cpu_relax();
}
}
@@ -517,7 +497,7 @@ static void smp_handle_ext_call(void)
unsigned long bits;
/* handle bit signal external calls */
- bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
+ bits = this_cpu_xchg(pcpu_devices.ec_mask, 0);
if (test_bit(ec_stop_cpu, &bits))
smp_stop_cpu();
if (test_bit(ec_schedule, &bits))
@@ -542,12 +522,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
int cpu;
for_each_cpu(cpu, mask)
- pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
}
void arch_send_call_function_single_ipi(int cpu)
{
- pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
}
/*
@@ -557,13 +537,13 @@ void arch_send_call_function_single_ipi(int cpu)
*/
void arch_smp_send_reschedule(int cpu)
{
- pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_schedule);
}
#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
{
- pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work);
+ pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_irq_work);
}
#endif
@@ -575,7 +555,7 @@ int smp_store_status(int cpu)
struct pcpu *pcpu;
unsigned long pa;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
lc = lowcore_ptr[cpu];
pa = __pa(&lc->floating_pt_save_area);
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
@@ -683,17 +663,17 @@ void __init smp_save_dump_secondary_cpus(void)
void smp_cpu_set_polarization(int cpu, int val)
{
- pcpu_devices[cpu].polarization = val;
+ per_cpu(pcpu_devices, cpu).polarization = val;
}
int smp_cpu_get_polarization(int cpu)
{
- return pcpu_devices[cpu].polarization;
+ return per_cpu(pcpu_devices, cpu).polarization;
}
int smp_cpu_get_cpu_address(int cpu)
{
- return pcpu_devices[cpu].address;
+ return per_cpu(pcpu_devices, cpu).address;
}
static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
@@ -732,7 +712,7 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
if (pcpu_find_address(cpu_present_mask, address + i))
continue;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
pcpu->address = address + i;
if (configured)
pcpu->state = CPU_STATE_CONFIGURED;
@@ -767,7 +747,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
* that all SMT threads get subsequent logical CPU numbers.
*/
if (early) {
- core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
+ core_id = per_cpu(pcpu_devices, 0).address >> smp_cpu_mt_shift;
for (i = 0; i < info->configured; i++) {
core = &info->core[i];
if (core->core_id == core_id) {
@@ -867,7 +847,7 @@ static void smp_start_secondary(void *cpuvoid)
/* Upping and downing of CPUs */
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
- struct pcpu *pcpu = pcpu_devices + cpu;
+ struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
int rc;
if (pcpu->state != CPU_STATE_CONFIGURED)
@@ -885,8 +865,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
*/
system_ctlreg_lock();
pcpu_prepare_secondary(pcpu, cpu);
- pcpu_attach_task(pcpu, tidle);
- pcpu_start_fn(pcpu, smp_start_secondary, NULL);
+ pcpu_attach_task(cpu, tidle);
+ pcpu_start_fn(cpu, smp_start_secondary, NULL);
/* Wait until cpu puts itself in the online & active maps */
while (!cpu_online(cpu))
cpu_relax();
@@ -931,18 +911,19 @@ void __cpu_die(unsigned int cpu)
struct pcpu *pcpu;
/* Wait until target cpu is down */
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
while (!pcpu_stopped(pcpu))
cpu_relax();
- pcpu_free_lowcore(pcpu);
+ pcpu_free_lowcore(pcpu, cpu);
cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+ pcpu->flags = 0;
}
void __noreturn cpu_die(void)
{
idle_task_exit();
- pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
+ pcpu_sigp_retry(this_cpu_ptr(&pcpu_devices), SIGP_STOP, 0);
for (;;) ;
}
@@ -972,11 +953,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
void __init smp_prepare_boot_cpu(void)
{
- struct pcpu *pcpu = pcpu_devices;
+ struct lowcore *lc = get_lowcore();
WARN_ON(!cpu_present(0) || !cpu_online(0));
- pcpu->state = CPU_STATE_CONFIGURED;
- get_lowcore()->percpu_offset = __per_cpu_offset[0];
+ lc->percpu_offset = __per_cpu_offset[0];
+ ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0);
+ ipl_pcpu->state = CPU_STATE_CONFIGURED;
+ lc->pcpu = (unsigned long)ipl_pcpu;
smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
}
@@ -984,8 +967,8 @@ void __init smp_setup_processor_id(void)
{
struct lowcore *lc = get_lowcore();
- pcpu_devices[0].address = stap();
lc->cpu_nr = 0;
+ per_cpu(pcpu_devices, 0).address = stap();
lc->spinlock_lockval = arch_spin_lockval(0);
lc->spinlock_index = 0;
}
@@ -1007,7 +990,7 @@ static ssize_t cpu_configure_show(struct device *dev,
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
+ count = sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
@@ -1033,7 +1016,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++)
if (cpu_online(cpu + i))
goto out;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
rc = 0;
switch (val) {
case 0:
@@ -1045,7 +1028,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++) {
if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
continue;
- pcpu[i].state = CPU_STATE_STANDBY;
+ per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_STANDBY;
smp_cpu_set_polarization(cpu + i,
POLARIZATION_UNKNOWN);
}
@@ -1060,7 +1043,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++) {
if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
continue;
- pcpu[i].state = CPU_STATE_CONFIGURED;
+ per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_CONFIGURED;
smp_cpu_set_polarization(cpu + i,
POLARIZATION_UNKNOWN);
}
@@ -1079,7 +1062,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
static ssize_t show_cpu_address(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
+ return sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address);
}
static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
@@ -1105,14 +1088,14 @@ static struct attribute_group cpu_online_attr_group = {
static int smp_cpu_online(unsigned int cpu)
{
- struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group);
}
static int smp_cpu_pre_down(unsigned int cpu)
{
- struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group);
return 0;
@@ -1125,7 +1108,7 @@ bool arch_cpu_is_hotpluggable(int cpu)
int arch_register_cpu(int cpu)
{
- struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
int rc;
c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 98ef6dc7916b..22029ecae1c5 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -594,7 +594,7 @@ static int __init topology_setup(char *str)
}
early_param("topology", topology_setup);
-static int topology_ctl_handler(struct ctl_table *ctl, int write,
+static int topology_ctl_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int enabled = topology_is_enabled();
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index fa62fa0e369f..36db065c7cf7 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -18,11 +18,22 @@
#include <asm/sections.h>
#include <asm/uv.h>
+#if !IS_ENABLED(CONFIG_KVM)
+unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+ return 0;
+}
+
+int gmap_fault(struct gmap *gmap, unsigned long gaddr,
+ unsigned int fault_flags)
+{
+ return 0;
+}
+#endif
+
/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
EXPORT_SYMBOL(prot_virt_guest);
-#endif
/*
* uv_info contains both host and guest information but it's currently only
@@ -35,7 +46,6 @@ EXPORT_SYMBOL(prot_virt_guest);
struct uv_info __bootdata_preserved(uv_info);
EXPORT_SYMBOL(uv_info);
-#if IS_ENABLED(CONFIG_KVM)
int __bootdata_preserved(prot_virt_host);
EXPORT_SYMBOL(prot_virt_host);
@@ -543,9 +553,6 @@ int arch_make_page_accessible(struct page *page)
return arch_make_folio_accessible(page_folio(page));
}
EXPORT_SYMBOL_GPL(arch_make_page_accessible);
-#endif
-
-#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
static ssize_t uv_query_facilities(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -721,24 +728,13 @@ static struct attribute_group uv_query_attr_group = {
static ssize_t uv_is_prot_virt_guest(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- int val = 0;
-
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
- val = prot_virt_guest;
-#endif
- return sysfs_emit(buf, "%d\n", val);
+ return sysfs_emit(buf, "%d\n", prot_virt_guest);
}
static ssize_t uv_is_prot_virt_host(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- int val = 0;
-
-#if IS_ENABLED(CONFIG_KVM)
- val = prot_virt_host;
-#endif
-
- return sysfs_emit(buf, "%d\n", val);
+ return sysfs_emit(buf, "%d\n", prot_virt_host);
}
static struct kobj_attribute uv_prot_virt_guest =
@@ -790,4 +786,3 @@ out_kobj:
return rc;
}
device_initcall(uv_info_init);
-#endif
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index a1ce3925ec71..975c654cf5a5 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -190,6 +190,9 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)
+ RUNTIME_CONST(shift, d_hash_shift)
+ RUNTIME_CONST(ptr, dentry_hashtable)
+
PERCPU_SECTION(0x100)
. = ALIGN(PAGE_SIZE);
@@ -219,6 +222,8 @@ SECTIONS
QUAD(init_mm)
QUAD(swapper_pg_dir)
QUAD(invalid_pg_dir)
+ QUAD(__alt_instructions)
+ QUAD(__alt_instructions_end)
#ifdef CONFIG_KASAN
QUAD(kasan_early_shadow_page)
QUAD(kasan_early_shadow_pte)
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 0c9a73a18826..9f86ad8fa8b4 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -75,7 +75,7 @@ static inline int arch_load_niai4(int *lock)
int owner;
asm_inline volatile(
- ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */
" l %0,%1\n"
: "=d" (owner) : "Q" (*lock) : "memory");
return owner;
@@ -86,7 +86,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
int expected = old;
asm_inline volatile(
- ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */
" cs %0,%3,%1\n"
: "=d" (old), "=Q" (*lock)
: "0" (old), "d" (new), "Q" (*lock)
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 5cb5e724cde3..75d15bf41d97 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -243,7 +243,7 @@ static int cmm_skip_blanks(char *cp, char **endp)
return str != cp;
}
-static int cmm_pages_handler(struct ctl_table *ctl, int write,
+static int cmm_pages_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
long nr = cmm_get_pages();
@@ -262,7 +262,7 @@ static int cmm_pages_handler(struct ctl_table *ctl, int write,
return 0;
}
-static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
+static int cmm_timed_pages_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -282,7 +282,7 @@ static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
return 0;
}
-static int cmm_timeout_handler(struct ctl_table *ctl, int write,
+static int cmm_timeout_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
char buf[64], *p;
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 45db5f47b22d..98dab3e049de 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -36,6 +36,16 @@ enum address_markers_idx {
VMEMMAP_END_NR,
VMALLOC_NR,
VMALLOC_END_NR,
+#ifdef CONFIG_KMSAN
+ KMSAN_VMALLOC_SHADOW_START_NR,
+ KMSAN_VMALLOC_SHADOW_END_NR,
+ KMSAN_VMALLOC_ORIGIN_START_NR,
+ KMSAN_VMALLOC_ORIGIN_END_NR,
+ KMSAN_MODULES_SHADOW_START_NR,
+ KMSAN_MODULES_SHADOW_END_NR,
+ KMSAN_MODULES_ORIGIN_START_NR,
+ KMSAN_MODULES_ORIGIN_END_NR,
+#endif
MODULES_NR,
MODULES_END_NR,
ABS_LOWCORE_NR,
@@ -65,6 +75,16 @@ static struct addr_marker address_markers[] = {
[VMEMMAP_END_NR] = {0, "vmemmap Area End"},
[VMALLOC_NR] = {0, "vmalloc Area Start"},
[VMALLOC_END_NR] = {0, "vmalloc Area End"},
+#ifdef CONFIG_KMSAN
+ [KMSAN_VMALLOC_SHADOW_START_NR] = {0, "Kmsan vmalloc Shadow Start"},
+ [KMSAN_VMALLOC_SHADOW_END_NR] = {0, "Kmsan vmalloc Shadow End"},
+ [KMSAN_VMALLOC_ORIGIN_START_NR] = {0, "Kmsan vmalloc Origins Start"},
+ [KMSAN_VMALLOC_ORIGIN_END_NR] = {0, "Kmsan vmalloc Origins End"},
+ [KMSAN_MODULES_SHADOW_START_NR] = {0, "Kmsan Modules Shadow Start"},
+ [KMSAN_MODULES_SHADOW_END_NR] = {0, "Kmsan Modules Shadow End"},
+ [KMSAN_MODULES_ORIGIN_START_NR] = {0, "Kmsan Modules Origins Start"},
+ [KMSAN_MODULES_ORIGIN_END_NR] = {0, "Kmsan Modules Origins End"},
+#endif
[MODULES_NR] = {0, "Modules Area Start"},
[MODULES_END_NR] = {0, "Modules Area End"},
[ABS_LOWCORE_NR] = {0, "Lowcore Area Start"},
@@ -307,6 +327,16 @@ static int pt_dump_init(void)
address_markers[KFENCE_START_NR].start_address = kfence_start;
address_markers[KFENCE_END_NR].start_address = kfence_start + KFENCE_POOL_SIZE;
#endif
+#ifdef CONFIG_KMSAN
+ address_markers[KMSAN_VMALLOC_SHADOW_START_NR].start_address = KMSAN_VMALLOC_SHADOW_START;
+ address_markers[KMSAN_VMALLOC_SHADOW_END_NR].start_address = KMSAN_VMALLOC_SHADOW_END;
+ address_markers[KMSAN_VMALLOC_ORIGIN_START_NR].start_address = KMSAN_VMALLOC_ORIGIN_START;
+ address_markers[KMSAN_VMALLOC_ORIGIN_END_NR].start_address = KMSAN_VMALLOC_ORIGIN_END;
+ address_markers[KMSAN_MODULES_SHADOW_START_NR].start_address = KMSAN_MODULES_SHADOW_START;
+ address_markers[KMSAN_MODULES_SHADOW_END_NR].start_address = KMSAN_MODULES_SHADOW_END;
+ address_markers[KMSAN_MODULES_ORIGIN_START_NR].start_address = KMSAN_MODULES_ORIGIN_START;
+ address_markers[KMSAN_MODULES_ORIGIN_END_NR].start_address = KMSAN_MODULES_ORIGIN_END;
+#endif
sort_address_markers();
#ifdef CONFIG_PTDUMP_DEBUGFS
debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 632c3a55feed..28a18c42ba99 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -48,7 +48,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
}
/*
- * s390_kernel_write - write to kernel memory bypassing DAT
+ * __s390_kernel_write - write to kernel memory bypassing DAT
* @dst: destination address
* @src: source address
* @size: number of bytes to copy
@@ -61,7 +61,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
*/
static DEFINE_SPINLOCK(s390_kernel_write_lock);
-notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
+notrace void *__s390_kernel_write(void *dst, const void *src, size_t size)
{
void *tmp = dst;
unsigned long flags;
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 0ef83b6ac0db..84482a921332 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -268,33 +268,20 @@ static void zpci_floating_irq_handler(struct airq_struct *airq,
}
}
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs,
+ unsigned long *bit)
{
- struct zpci_dev *zdev = to_zpci(pdev);
- unsigned int hwirq, msi_vecs, cpu;
- unsigned long bit;
- struct msi_desc *msi;
- struct msi_msg msg;
- int cpu_addr;
- int rc, irq;
-
- zdev->aisb = -1UL;
- zdev->msi_first_bit = -1U;
- if (type == PCI_CAP_ID_MSI && nvec > 1)
- return 1;
- msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
-
if (irq_delivery == DIRECTED) {
/* Allocate cpu vector bits */
- bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
- if (bit == -1UL)
+ *bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
+ if (*bit == -1UL)
return -EIO;
} else {
/* Allocate adapter summary indicator bit */
- bit = airq_iv_alloc_bit(zpci_sbv);
- if (bit == -1UL)
+ *bit = airq_iv_alloc_bit(zpci_sbv);
+ if (*bit == -1UL)
return -EIO;
- zdev->aisb = bit;
+ zdev->aisb = *bit;
/* Create adapter interrupt vector */
zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL);
@@ -302,27 +289,66 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
return -ENOMEM;
/* Wire up shortcut pointer */
- zpci_ibv[bit] = zdev->aibv;
+ zpci_ibv[*bit] = zdev->aibv;
/* Each function has its own interrupt vector */
- bit = 0;
+ *bit = 0;
}
+ return 0;
+}
- /* Request MSI interrupts */
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+ unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ struct msi_desc *msi;
+ struct msi_msg msg;
+ unsigned long bit;
+ int cpu_addr;
+ int rc, irq;
+
+ zdev->aisb = -1UL;
+ zdev->msi_first_bit = -1U;
+
+ msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
+ if (msi_vecs < nvec) {
+ pr_info("%s requested %d irqs, allocate system limit of %d",
+ pci_name(pdev), nvec, zdev->max_msi);
+ }
+
+ rc = __alloc_airq(zdev, msi_vecs, &bit);
+ if (rc < 0)
+ return rc;
+
+ /*
+ * Request MSI interrupts:
+ * When using MSI, nvec_used interrupt sources and their irq
+ * descriptors are controlled through one msi descriptor.
+ * Thus the outer loop over msi descriptors shall run only once,
+ * while two inner loops iterate over the interrupt vectors.
+ * When using MSI-X, each interrupt vector/irq descriptor
+ * is bound to exactly one msi descriptor (nvec_used is one).
+ * So the inner loops are executed once, while the outer iterates
+ * over the MSI-X descriptors.
+ */
hwirq = bit;
msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
- rc = -EIO;
if (hwirq - bit >= msi_vecs)
break;
- irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE,
- (irq_delivery == DIRECTED) ?
- msi->affinity : NULL);
+ irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used);
+ irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE,
+ (irq_delivery == DIRECTED) ?
+ msi->affinity : NULL);
if (irq < 0)
return -ENOMEM;
- rc = irq_set_msi_desc(irq, msi);
- if (rc)
- return rc;
- irq_set_chip_and_handler(irq, &zpci_irq_chip,
- handle_percpu_irq);
+
+ for (i = 0; i < irqs_per_msi; i++) {
+ rc = irq_set_msi_desc_off(irq, i, msi);
+ if (rc)
+ return rc;
+ irq_set_chip_and_handler(irq + i, &zpci_irq_chip,
+ handle_percpu_irq);
+ }
+
msg.data = hwirq - bit;
if (irq_delivery == DIRECTED) {
if (msi->affinity)
@@ -335,31 +361,35 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
msg.address_lo |= (cpu_addr << 8);
for_each_possible_cpu(cpu) {
- airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
+ for (i = 0; i < irqs_per_msi; i++)
+ airq_iv_set_data(zpci_ibv[cpu],
+ hwirq + i, irq + i);
}
} else {
msg.address_lo = zdev->msi_addr & 0xffffffff;
- airq_iv_set_data(zdev->aibv, hwirq, irq);
+ for (i = 0; i < irqs_per_msi; i++)
+ airq_iv_set_data(zdev->aibv, hwirq + i, irq + i);
}
msg.address_hi = zdev->msi_addr >> 32;
pci_write_msi_msg(irq, &msg);
- hwirq++;
+ hwirq += irqs_per_msi;
}
zdev->msi_first_bit = bit;
- zdev->msi_nr_irqs = msi_vecs;
+ zdev->msi_nr_irqs = hwirq - bit;
rc = zpci_set_irq(zdev);
if (rc)
return rc;
- return (msi_vecs == nvec) ? 0 : msi_vecs;
+ return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs;
}
void arch_teardown_msi_irqs(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
struct msi_desc *msi;
+ unsigned int i;
int rc;
/* Disable interrupts */
@@ -369,8 +399,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
/* Release MSI interrupts */
msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) {
- irq_set_msi_desc(msi->irq, NULL);
- irq_free_desc(msi->irq);
+ for (i = 0; i < msi->nvec_used; i++) {
+ irq_set_msi_desc(msi->irq + i, NULL);
+ irq_free_desc(msi->irq + i);
+ }
msi->msg.address_lo = 0;
msi->msg.address_hi = 0;
msi->msg.data = 0;
diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h
index 587fb7841096..0ca8c3463166 100644
--- a/arch/sparc/include/asm/vio.h
+++ b/arch/sparc/include/asm/vio.h
@@ -483,11 +483,7 @@ int __vio_register_driver(struct vio_driver *drv, struct module *owner,
__vio_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
void vio_unregister_driver(struct vio_driver *drv);
-static inline struct vio_driver *to_vio_driver(struct device_driver *drv)
-{
- return container_of(drv, struct vio_driver, driver);
-}
-
+#define to_vio_driver(__drv) container_of_const(__drv, struct vio_driver, driver)
#define to_vio_dev(__dev) container_of_const(__dev, struct vio_dev, dev)
int vio_ldc_send(struct vio_driver_state *vio, void *data, int len);
diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c
index 846a55f942d4..07933d75ac81 100644
--- a/arch/sparc/kernel/vio.c
+++ b/arch/sparc/kernel/vio.c
@@ -54,10 +54,10 @@ static int vio_hotplug(const struct device *dev, struct kobj_uevent_env *env)
return 0;
}
-static int vio_bus_match(struct device *dev, struct device_driver *drv)
+static int vio_bus_match(struct device *dev, const struct device_driver *drv)
{
struct vio_dev *vio_dev = to_vio_dev(dev);
- struct vio_driver *vio_drv = to_vio_driver(drv);
+ const struct vio_driver *vio_drv = to_vio_driver(drv);
const struct vio_device_id *matches = vio_drv->id_table;
if (!matches)
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 93a5a8999b07..dca84fd6d00a 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -11,7 +11,7 @@ config UML
select ARCH_HAS_KCOV
select ARCH_HAS_STRNCPY_FROM_USER
select ARCH_HAS_STRNLEN_USER
- select ARCH_NO_PREEMPT
+ select ARCH_NO_PREEMPT_DYNAMIC
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_KASAN if X86_64
select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
@@ -31,7 +31,8 @@ config UML
select TRACE_IRQFLAGS_SUPPORT
select TTY # Needed for line.c
select HAVE_ARCH_VMAP_STACK
- select HAVE_RUST if X86_64
+ select HAVE_RUST
+ select ARCH_HAS_UBSAN
config MMU
bool
@@ -48,12 +49,13 @@ config NO_IOMEM
config UML_IOMEM_EMULATION
bool
select INDIRECT_IOMEM
+ select HAS_IOPORT
select GENERIC_PCI_IOMAP
select GENERIC_IOMAP
select NO_GENERIC_PCI_IOPORT_MAP
config NO_IOPORT_MAP
- def_bool y
+ def_bool !UML_IOMEM_EMULATION
config ISA
bool
diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
index b94b2618e7d8..ede40a160c5e 100644
--- a/arch/um/drivers/Kconfig
+++ b/arch/um/drivers/Kconfig
@@ -297,26 +297,6 @@ config UML_NET_MCAST
If unsure, say N.
-config UML_NET_PCAP
- bool "pcap transport (obsolete)"
- depends on UML_NET
- depends on !MODVERSIONS
- select MAY_HAVE_RUNTIME_DEPS
- help
- The pcap transport makes a pcap packet stream on the host look
- like an ethernet device inside UML. This is useful for making
- UML act as a network monitor for the host. You must have libcap
- installed in order to build the pcap transport into UML.
-
- For more information, see
- <http://user-mode-linux.sourceforge.net/old/networking.html> That site
- has examples of the UML command line to use to enable this option.
-
- NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
- migrate to UML_NET_VECTOR.
-
- If unsure, say N.
-
config UML_NET_SLIRP
bool "SLiRP transport (obsolete)"
depends on UML_NET
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index 0e6af81096fd..57882e6bc215 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -20,14 +20,9 @@ harddog-objs := harddog_kern.o
harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o
rtc-objs := rtc_kern.o rtc_user.o
-LDFLAGS_pcap.o = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a)
-
LDFLAGS_vde.o = $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a)
-targets := pcap_kern.o pcap_user.o vde_kern.o vde_user.o
-
-$(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o
- $(LD) -r -dp -o $@ $^ $(ld_flags)
+targets := vde_kern.o vde_user.o
$(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o
$(LD) -r -dp -o $@ $^ $(ld_flags)
@@ -49,7 +44,6 @@ obj-$(CONFIG_UML_NET_DAEMON) += daemon.o
obj-$(CONFIG_UML_NET_VECTOR) += vector.o
obj-$(CONFIG_UML_NET_VDE) += vde.o
obj-$(CONFIG_UML_NET_MCAST) += umcast.o
-obj-$(CONFIG_UML_NET_PCAP) += pcap.o
obj-$(CONFIG_UML_NET) += net.o
obj-$(CONFIG_MCONSOLE) += mconsole.o
obj-$(CONFIG_MMAPPER) += mmapper_kern.o
@@ -69,7 +63,7 @@ obj-$(CONFIG_UML_RTC) += rtc.o
obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o
# pcap_user.o must be added explicitly.
-USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
+USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o vde_user.o vector_user.o
CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
CFLAGS_xterm.o += '-DCONFIG_XTERM_CHAN_DEFAULT_EMULATOR="$(CONFIG_XTERM_CHAN_DEFAULT_EMULATOR)"'
diff --git a/arch/um/drivers/chan.h b/arch/um/drivers/chan.h
index e14b9cdf7a33..5a61db512ffb 100644
--- a/arch/um/drivers/chan.h
+++ b/arch/um/drivers/chan.h
@@ -22,7 +22,8 @@ struct chan {
unsigned int output:1;
unsigned int opened:1;
unsigned int enabled:1;
- int fd;
+ int fd_in;
+ int fd_out; /* only different to fd_in if blocking output is needed */
const struct chan_ops *ops;
void *data;
};
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index 37538b4168da..e78a99816c86 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -81,6 +81,12 @@ static const struct chan_ops not_configged_ops = {
};
#endif /* CONFIG_NOCONFIG_CHAN */
+static inline bool need_output_blocking(void)
+{
+ return time_travel_mode == TT_MODE_INFCPU ||
+ time_travel_mode == TT_MODE_EXTERNAL;
+}
+
static int open_one_chan(struct chan *chan)
{
int fd, err;
@@ -96,15 +102,43 @@ static int open_one_chan(struct chan *chan)
return fd;
err = os_set_fd_block(fd, 0);
- if (err) {
- (*chan->ops->close)(fd, chan->data);
- return err;
- }
+ if (err)
+ goto out_close;
+
+ chan->fd_in = fd;
+ chan->fd_out = fd;
+
+ /*
+ * In time-travel modes infinite-CPU and external we need to guarantee
+ * that any writes to the output succeed immdiately from the point of
+ * the VM. The best way to do this is to put the FD in blocking mode
+ * and simply wait/retry until everything is written.
+ * As every write is guaranteed to complete, we also do not need to
+ * request an IRQ for the output.
+ *
+ * Note that input cannot happen in a time synchronized way. We permit
+ * it, but time passes very quickly if anything waits for a read.
+ */
+ if (chan->output && need_output_blocking()) {
+ err = os_dup_file(chan->fd_out);
+ if (err < 0)
+ goto out_close;
- chan->fd = fd;
+ chan->fd_out = err;
+
+ err = os_set_fd_block(chan->fd_out, 1);
+ if (err) {
+ os_close_file(chan->fd_out);
+ goto out_close;
+ }
+ }
chan->opened = 1;
return 0;
+
+out_close:
+ (*chan->ops->close)(fd, chan->data);
+ return err;
}
static int open_chan(struct list_head *chans)
@@ -125,7 +159,7 @@ static int open_chan(struct list_head *chans)
void chan_enable_winch(struct chan *chan, struct tty_port *port)
{
if (chan && chan->primary && chan->ops->winch)
- register_winch(chan->fd, port);
+ register_winch(chan->fd_in, port);
}
static void line_timer_cb(struct work_struct *work)
@@ -156,8 +190,9 @@ int enable_chan(struct line *line)
if (chan->enabled)
continue;
- err = line_setup_irq(chan->fd, chan->input, chan->output, line,
- chan);
+ err = line_setup_irq(chan->fd_in, chan->input,
+ chan->output && !need_output_blocking(),
+ line, chan);
if (err)
goto out_close;
@@ -196,7 +231,8 @@ void free_irqs(void)
if (chan->input && chan->enabled)
um_free_irq(chan->line->read_irq, chan);
- if (chan->output && chan->enabled)
+ if (chan->output && chan->enabled &&
+ !need_output_blocking())
um_free_irq(chan->line->write_irq, chan);
chan->enabled = 0;
}
@@ -216,15 +252,19 @@ static void close_one_chan(struct chan *chan, int delay_free_irq)
} else {
if (chan->input && chan->enabled)
um_free_irq(chan->line->read_irq, chan);
- if (chan->output && chan->enabled)
+ if (chan->output && chan->enabled &&
+ !need_output_blocking())
um_free_irq(chan->line->write_irq, chan);
chan->enabled = 0;
}
+ if (chan->fd_out != chan->fd_in)
+ os_close_file(chan->fd_out);
if (chan->ops->close != NULL)
- (*chan->ops->close)(chan->fd, chan->data);
+ (*chan->ops->close)(chan->fd_in, chan->data);
chan->opened = 0;
- chan->fd = -1;
+ chan->fd_in = -1;
+ chan->fd_out = -1;
}
void close_chan(struct line *line)
@@ -244,7 +284,7 @@ void close_chan(struct line *line)
void deactivate_chan(struct chan *chan, int irq)
{
if (chan && chan->enabled)
- deactivate_fd(chan->fd, irq);
+ deactivate_fd(chan->fd_in, irq);
}
int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq)
@@ -254,7 +294,7 @@ int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq)
if (len == 0 || !chan || !chan->ops->write)
return 0;
- n = chan->ops->write(chan->fd, buf, len, chan->data);
+ n = chan->ops->write(chan->fd_out, buf, len, chan->data);
if (chan->primary) {
ret = n;
}
@@ -268,7 +308,7 @@ int console_write_chan(struct chan *chan, const char *buf, int len)
if (!chan || !chan->ops->console_write)
return 0;
- n = chan->ops->console_write(chan->fd, buf, len);
+ n = chan->ops->console_write(chan->fd_out, buf, len);
if (chan->primary)
ret = n;
return ret;
@@ -296,14 +336,14 @@ int chan_window_size(struct line *line, unsigned short *rows_out,
if (chan && chan->primary) {
if (chan->ops->window_size == NULL)
return 0;
- return chan->ops->window_size(chan->fd, chan->data,
+ return chan->ops->window_size(chan->fd_in, chan->data,
rows_out, cols_out);
}
chan = line->chan_out;
if (chan && chan->primary) {
if (chan->ops->window_size == NULL)
return 0;
- return chan->ops->window_size(chan->fd, chan->data,
+ return chan->ops->window_size(chan->fd_in, chan->data,
rows_out, cols_out);
}
return 0;
@@ -319,7 +359,7 @@ static void free_one_chan(struct chan *chan)
(*chan->ops->free)(chan->data);
if (chan->primary && chan->output)
- ignore_sigio_fd(chan->fd);
+ ignore_sigio_fd(chan->fd_in);
kfree(chan);
}
@@ -478,7 +518,8 @@ static struct chan *parse_chan(struct line *line, char *str, int device,
.output = 0,
.opened = 0,
.enabled = 0,
- .fd = -1,
+ .fd_in = -1,
+ .fd_out = -1,
.ops = ops,
.data = data });
return chan;
@@ -549,7 +590,7 @@ void chan_interrupt(struct line *line, int irq)
schedule_delayed_work(&line->task, 1);
goto out;
}
- err = chan->ops->read(chan->fd, &c, chan->data);
+ err = chan->ops->read(chan->fd_in, &c, chan->data);
if (err > 0)
tty_insert_flip_char(port, c, TTY_NORMAL);
} while (err > 0);
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index ec04e47b9d79..a66e556012c4 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -23,7 +23,7 @@ int generic_read(int fd, __u8 *c_out, void *unused)
{
int n;
- n = read(fd, c_out, sizeof(*c_out));
+ CATCH_EINTR(n = read(fd, c_out, sizeof(*c_out)));
if (n > 0)
return n;
else if (n == 0)
@@ -37,11 +37,23 @@ int generic_read(int fd, __u8 *c_out, void *unused)
int generic_write(int fd, const __u8 *buf, size_t n, void *unused)
{
+ int written = 0;
int err;
- err = write(fd, buf, n);
- if (err > 0)
- return err;
+ /* The FD may be in blocking mode, as such, need to retry short writes,
+ * they may have been interrupted by a signal.
+ */
+ do {
+ errno = 0;
+ err = write(fd, buf + written, n - written);
+ if (err > 0) {
+ written += err;
+ continue;
+ }
+ } while (err < 0 && errno == EINTR);
+
+ if (written > 0)
+ return written;
else if (errno == EAGAIN)
return 0;
else if (err == 0)
diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c
index 60d1c6cab8a9..99a7144b229f 100644
--- a/arch/um/drivers/harddog_kern.c
+++ b/arch/um/drivers/harddog_kern.c
@@ -49,6 +49,7 @@
#include "mconsole.h"
#include "harddog.h"
+MODULE_DESCRIPTION("UML hardware watchdog");
MODULE_LICENSE("GPL");
static DEFINE_MUTEX(harddog_mutex);
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index d82bc3fdb86e..43d8959cc746 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -383,6 +383,7 @@ int setup_one_line(struct line *lines, int n, char *init,
parse_chan_pair(NULL, line, n, opts, error_out);
err = 0;
}
+ *error_out = "configured as 'none'";
} else {
char *new = kstrdup(init, GFP_KERNEL);
if (!new) {
@@ -406,6 +407,7 @@ int setup_one_line(struct line *lines, int n, char *init,
}
}
if (err) {
+ *error_out = "failed to parse channel pair";
line->init_str = NULL;
line->valid = 0;
kfree(new);
diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c
deleted file mode 100644
index d9bf95d7867b..000000000000
--- a/arch/um/drivers/pcap_kern.c
+++ /dev/null
@@ -1,113 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <net_kern.h>
-#include "pcap_user.h"
-
-struct pcap_init {
- char *host_if;
- int promisc;
- int optimize;
- char *filter;
-};
-
-static void pcap_init_kern(struct net_device *dev, void *data)
-{
- struct uml_net_private *pri;
- struct pcap_data *ppri;
- struct pcap_init *init = data;
-
- pri = netdev_priv(dev);
- ppri = (struct pcap_data *) pri->user;
- ppri->host_if = init->host_if;
- ppri->promisc = init->promisc;
- ppri->optimize = init->optimize;
- ppri->filter = init->filter;
-
- printk("pcap backend, host interface %s\n", ppri->host_if);
-}
-
-static int pcap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return pcap_user_read(fd, skb_mac_header(skb),
- skb->dev->mtu + ETH_HEADER_OTHER,
- (struct pcap_data *) &lp->user);
-}
-
-static int pcap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return -EPERM;
-}
-
-static const struct net_kern_info pcap_kern_info = {
- .init = pcap_init_kern,
- .protocol = eth_protocol,
- .read = pcap_read,
- .write = pcap_write,
-};
-
-static int pcap_setup(char *str, char **mac_out, void *data)
-{
- struct pcap_init *init = data;
- char *remain, *host_if = NULL, *options[2] = { NULL, NULL };
- int i;
-
- *init = ((struct pcap_init)
- { .host_if = "eth0",
- .promisc = 1,
- .optimize = 0,
- .filter = NULL });
-
- remain = split_if_spec(str, &host_if, &init->filter,
- &options[0], &options[1], mac_out, NULL);
- if (remain != NULL) {
- printk(KERN_ERR "pcap_setup - Extra garbage on "
- "specification : '%s'\n", remain);
- return 0;
- }
-
- if (host_if != NULL)
- init->host_if = host_if;
-
- for (i = 0; i < ARRAY_SIZE(options); i++) {
- if (options[i] == NULL)
- continue;
- if (!strcmp(options[i], "promisc"))
- init->promisc = 1;
- else if (!strcmp(options[i], "nopromisc"))
- init->promisc = 0;
- else if (!strcmp(options[i], "optimize"))
- init->optimize = 1;
- else if (!strcmp(options[i], "nooptimize"))
- init->optimize = 0;
- else {
- printk(KERN_ERR "pcap_setup : bad option - '%s'\n",
- options[i]);
- return 0;
- }
- }
-
- return 1;
-}
-
-static struct transport pcap_transport = {
- .list = LIST_HEAD_INIT(pcap_transport.list),
- .name = "pcap",
- .setup = pcap_setup,
- .user = &pcap_user_info,
- .kern = &pcap_kern_info,
- .private_size = sizeof(struct pcap_data),
- .setup_size = sizeof(struct pcap_init),
-};
-
-static int register_pcap(void)
-{
- register_transport(&pcap_transport);
- return 0;
-}
-
-late_initcall(register_pcap);
diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c
deleted file mode 100644
index 52ddda3e3b10..000000000000
--- a/arch/um/drivers/pcap_user.c
+++ /dev/null
@@ -1,137 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <errno.h>
-#include <pcap.h>
-#include <string.h>
-#include <asm/types.h>
-#include <net_user.h>
-#include "pcap_user.h"
-#include <um_malloc.h>
-
-#define PCAP_FD(p) (*(int *)(p))
-
-static int pcap_user_init(void *data, void *dev)
-{
- struct pcap_data *pri = data;
- pcap_t *p;
- char errors[PCAP_ERRBUF_SIZE];
-
- p = pcap_open_live(pri->host_if, ETH_MAX_PACKET + ETH_HEADER_OTHER,
- pri->promisc, 0, errors);
- if (p == NULL) {
- printk(UM_KERN_ERR "pcap_user_init : pcap_open_live failed - "
- "'%s'\n", errors);
- return -EINVAL;
- }
-
- pri->dev = dev;
- pri->pcap = p;
- return 0;
-}
-
-static int pcap_user_open(void *data)
-{
- struct pcap_data *pri = data;
- __u32 netmask;
- int err;
-
- if (pri->pcap == NULL)
- return -ENODEV;
-
- if (pri->filter != NULL) {
- err = dev_netmask(pri->dev, &netmask);
- if (err < 0) {
- printk(UM_KERN_ERR "pcap_user_open : dev_netmask failed\n");
- return -EIO;
- }
-
- pri->compiled = uml_kmalloc(sizeof(struct bpf_program),
- UM_GFP_KERNEL);
- if (pri->compiled == NULL) {
- printk(UM_KERN_ERR "pcap_user_open : kmalloc failed\n");
- return -ENOMEM;
- }
-
- err = pcap_compile(pri->pcap,
- (struct bpf_program *) pri->compiled,
- pri->filter, pri->optimize, netmask);
- if (err < 0) {
- printk(UM_KERN_ERR "pcap_user_open : pcap_compile failed - "
- "'%s'\n", pcap_geterr(pri->pcap));
- goto out;
- }
-
- err = pcap_setfilter(pri->pcap, pri->compiled);
- if (err < 0) {
- printk(UM_KERN_ERR "pcap_user_open : pcap_setfilter "
- "failed - '%s'\n", pcap_geterr(pri->pcap));
- goto out;
- }
- }
-
- return PCAP_FD(pri->pcap);
-
- out:
- kfree(pri->compiled);
- return -EIO;
-}
-
-static void pcap_remove(void *data)
-{
- struct pcap_data *pri = data;
-
- if (pri->compiled != NULL)
- pcap_freecode(pri->compiled);
-
- if (pri->pcap != NULL)
- pcap_close(pri->pcap);
-}
-
-struct pcap_handler_data {
- char *buffer;
- int len;
-};
-
-static void handler(u_char *data, const struct pcap_pkthdr *header,
- const u_char *packet)
-{
- int len;
-
- struct pcap_handler_data *hdata = (struct pcap_handler_data *) data;
-
- len = hdata->len < header->caplen ? hdata->len : header->caplen;
- memcpy(hdata->buffer, packet, len);
- hdata->len = len;
-}
-
-int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri)
-{
- struct pcap_handler_data hdata = ((struct pcap_handler_data)
- { .buffer = buffer,
- .len = len });
- int n;
-
- n = pcap_dispatch(pri->pcap, 1, handler, (u_char *) &hdata);
- if (n < 0) {
- printk(UM_KERN_ERR "pcap_dispatch failed - %s\n",
- pcap_geterr(pri->pcap));
- return -EIO;
- }
- else if (n == 0)
- return 0;
- return hdata.len;
-}
-
-const struct net_user_info pcap_user_info = {
- .init = pcap_user_init,
- .open = pcap_user_open,
- .close = NULL,
- .remove = pcap_remove,
- .add_address = NULL,
- .delete_address = NULL,
- .mtu = ETH_MAX_PACKET,
- .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
diff --git a/arch/um/drivers/pcap_user.h b/arch/um/drivers/pcap_user.h
deleted file mode 100644
index 216246f5f09b..000000000000
--- a/arch/um/drivers/pcap_user.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- */
-
-#include <net_user.h>
-
-struct pcap_data {
- char *host_if;
- int promisc;
- int optimize;
- char *filter;
- void *compiled;
- void *pcap;
- void *dev;
-};
-
-extern const struct net_user_info pcap_user_info;
-
-extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri);
-
diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
index c52b3ff3c092..a4508470df78 100644
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -45,15 +45,17 @@ struct connection {
static irqreturn_t pipe_interrupt(int irq, void *data)
{
struct connection *conn = data;
- int fd;
+ int n_fds = 1, fd = -1;
+ ssize_t ret;
- fd = os_rcv_fd(conn->socket[0], &conn->helper_pid);
- if (fd < 0) {
- if (fd == -EAGAIN)
+ ret = os_rcv_fd_msg(conn->socket[0], &fd, n_fds, &conn->helper_pid,
+ sizeof(conn->helper_pid));
+ if (ret != sizeof(conn->helper_pid)) {
+ if (ret == -EAGAIN)
return IRQ_NONE;
- printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n",
- -fd);
+ printk(KERN_ERR "pipe_interrupt : os_rcv_fd_msg returned %zd\n",
+ ret);
os_close_file(conn->fd);
}
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 9f1e76ddda5a..7f28ec1929dc 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -36,7 +36,6 @@
#include <linux/vmalloc.h>
#include <linux/platform_device.h>
#include <linux/scatterlist.h>
-#include <asm/tlbflush.h>
#include <kern_util.h>
#include "mconsole_kern.h"
#include <init.h>
@@ -106,7 +105,6 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data)
#define DRIVER_NAME "uml-blkdev"
static DEFINE_MUTEX(ubd_lock);
-static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg);
@@ -759,7 +757,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
goto error;
}
- flush_tlb_kernel_vm();
err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
ubd_dev->cow.bitmap_offset,
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 4279793b11b7..2d473282ab51 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1115,11 +1115,12 @@ static int irq_rr;
static int vector_net_close(struct net_device *dev)
{
struct vector_private *vp = netdev_priv(dev);
- unsigned long flags;
netif_stop_queue(dev);
del_timer(&vp->tl);
+ vp->opened = false;
+
if (vp->fds == NULL)
return 0;
@@ -1158,10 +1159,7 @@ static int vector_net_close(struct net_device *dev)
destroy_queue(vp->tx_queue);
kfree(vp->fds);
vp->fds = NULL;
- spin_lock_irqsave(&vp->lock, flags);
- vp->opened = false;
vp->in_error = false;
- spin_unlock_irqrestore(&vp->lock, flags);
return 0;
}
@@ -1203,17 +1201,12 @@ static void vector_reset_tx(struct work_struct *work)
static int vector_net_open(struct net_device *dev)
{
struct vector_private *vp = netdev_priv(dev);
- unsigned long flags;
int err = -EINVAL;
struct vector_device *vdevice;
- spin_lock_irqsave(&vp->lock, flags);
- if (vp->opened) {
- spin_unlock_irqrestore(&vp->lock, flags);
+ if (vp->opened)
return -ENXIO;
- }
vp->opened = true;
- spin_unlock_irqrestore(&vp->lock, flags);
vp->bpf = uml_vector_user_bpf(get_bpf_file(vp->parsed));
@@ -1387,8 +1380,6 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
return -1;
}
- spin_lock(&vp->lock);
-
if (vp->bpf != NULL) {
if (vp->opened)
uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
@@ -1417,15 +1408,12 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
if (vp->opened)
result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
- spin_unlock(&vp->lock);
-
return result;
free_buffer:
release_firmware(fw);
flash_fail:
- spin_unlock(&vp->lock);
if (vp->bpf != NULL)
kfree(vp->bpf->filter);
kfree(vp->bpf);
@@ -1631,7 +1619,6 @@ static void vector_eth_configure(
INIT_WORK(&vp->reset_tx, vector_reset_tx);
timer_setup(&vp->tl, vector_timer_expire, 0);
- spin_lock_init(&vp->lock);
/* FIXME */
dev->netdev_ops = &vector_netdev_ops;
diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h
index 2a1fa8e0f3e1..806df551be0b 100644
--- a/arch/um/drivers/vector_kern.h
+++ b/arch/um/drivers/vector_kern.h
@@ -71,7 +71,6 @@ struct vector_estats {
struct vector_private {
struct list_head list;
- spinlock_t lock;
struct net_device *dev;
struct napi_struct napi ____cacheline_aligned;
diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c
index 6918de5e2956..e4316c7981e8 100644
--- a/arch/um/drivers/xterm.c
+++ b/arch/um/drivers/xterm.c
@@ -156,7 +156,7 @@ static int xterm_open(int input, int output, int primary, void *d,
new = xterm_fd(fd, &data->helper_pid);
if (new < 0) {
err = new;
- printk(UM_KERN_ERR "xterm_open : os_rcv_fd failed, err = %d\n",
+ printk(UM_KERN_ERR "xterm_open : xterm_fd failed, err = %d\n",
-err);
goto out_kill;
}
diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c
index 8011e51993d5..3971252cb1a6 100644
--- a/arch/um/drivers/xterm_kern.c
+++ b/arch/um/drivers/xterm_kern.c
@@ -21,12 +21,19 @@ struct xterm_wait {
static irqreturn_t xterm_interrupt(int irq, void *data)
{
struct xterm_wait *xterm = data;
- int fd;
+ int fd = -1, n_fds = 1;
+ ssize_t ret;
- fd = os_rcv_fd(xterm->fd, &xterm->pid);
- if (fd == -EAGAIN)
+ ret = os_rcv_fd_msg(xterm->fd, &fd, n_fds,
+ &xterm->pid, sizeof(xterm->pid));
+ if (ret == -EAGAIN)
return IRQ_NONE;
+ if (ret < 0)
+ fd = ret;
+ else if (ret != sizeof(xterm->pid))
+ fd = -EMSGSIZE;
+
xterm->new_fd = fd;
complete(&xterm->ready);
diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h
index f2923c767bb9..a3eaca41ff61 100644
--- a/arch/um/include/asm/mmu.h
+++ b/arch/um/include/asm/mmu.h
@@ -7,15 +7,13 @@
#define __ARCH_UM_MMU_H
#include <mm_id.h>
-#include <asm/mm_context.h>
typedef struct mm_context {
struct mm_id id;
- struct uml_arch_mm_context arch;
-} mm_context_t;
-/* Avoid tangled inclusion with asm/ldt.h */
-extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm);
-extern void free_ldt(struct mm_context *mm);
+ /* Address range in need of a TLB sync */
+ unsigned long sync_tlb_range_from;
+ unsigned long sync_tlb_range_to;
+} mm_context_t;
#endif
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index 68e2eb9cfb47..23dcc914d44e 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -13,8 +13,6 @@
#include <asm/mm_hooks.h>
#include <asm/mmu.h>
-extern void force_flush_all(void);
-
#define activate_mm activate_mm
static inline void activate_mm(struct mm_struct *old, struct mm_struct *new)
{
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index e1ece21dbe3f..5bb397b65efb 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -244,6 +244,38 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)
#define PFN_PTE_SHIFT PAGE_SHIFT
+static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned long start,
+ unsigned long end)
+{
+ if (!mm->context.sync_tlb_range_to) {
+ mm->context.sync_tlb_range_from = start;
+ mm->context.sync_tlb_range_to = end;
+ } else {
+ if (start < mm->context.sync_tlb_range_from)
+ mm->context.sync_tlb_range_from = start;
+ if (end > mm->context.sync_tlb_range_to)
+ mm->context.sync_tlb_range_to = end;
+ }
+}
+
+#define set_ptes set_ptes
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, int nr)
+{
+ /* Basically the default implementation */
+ size_t length = nr * PAGE_SIZE;
+
+ for (;;) {
+ set_pte(ptep, pte);
+ if (--nr == 0)
+ break;
+ ptep++;
+ pte = __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
+ }
+
+ um_tlb_mark_sync(mm, addr, addr + length);
+}
+
#define __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b)
{
diff --git a/arch/um/include/asm/tlbflush.h b/arch/um/include/asm/tlbflush.h
index a5bda890390d..db997976b6ea 100644
--- a/arch/um/include/asm/tlbflush.h
+++ b/arch/um/include/asm/tlbflush.h
@@ -9,23 +9,51 @@
#include <linux/mm.h>
/*
- * TLB flushing:
+ * In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls
+ * from the process handling the MM (which can be the kernel itself).
+ *
+ * To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes
+ * we catch all PTE transitions where memory that was unusable becomes usable.
+ * While with flush_tlb_* we can track any memory that becomes unusable and
+ * even if a higher layer of the page table was modified.
+ *
+ * So, we simply track updates using both methods and mark the memory area to
+ * be synced later on. The only special case is that flush_tlb_kern_* needs to
+ * be executed immediately as there is no good synchronization point in that
+ * case. In contrast, in the set_ptes case we can wait for the next kernel
+ * segfault before we do the synchornization.
*
- * - flush_tlb() flushes the current mm struct TLBs
* - flush_tlb_all() flushes all processes TLBs
* - flush_tlb_mm(mm) flushes the specified mm context TLB's
* - flush_tlb_page(vma, vmaddr) flushes one page
- * - flush_tlb_kernel_vm() flushes the kernel vm area
* - flush_tlb_range(vma, start, end) flushes a range of pages
+ * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
*/
+extern int um_tlb_sync(struct mm_struct *mm);
+
extern void flush_tlb_all(void);
extern void flush_tlb_mm(struct mm_struct *mm);
-extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end);
-extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long address);
-extern void flush_tlb_kernel_vm(void);
-extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
-extern void __flush_tlb_one(unsigned long addr);
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long address)
+{
+ um_tlb_mark_sync(vma->vm_mm, address, address + PAGE_SIZE);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ um_tlb_mark_sync(vma->vm_mm, start, end);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+ unsigned long end)
+{
+ um_tlb_mark_sync(&init_mm, start, end);
+
+ /* Kernel needs to be synced immediately */
+ um_tlb_sync(&init_mm);
+}
#endif
diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h
index c22f46a757dc..06292fca5a4d 100644
--- a/arch/um/include/shared/as-layout.h
+++ b/arch/um/include/shared/as-layout.h
@@ -23,7 +23,7 @@
#define STUB_START stub_start
#define STUB_CODE STUB_START
#define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE)
-#define STUB_DATA_PAGES 1 /* must be a power of two */
+#define STUB_DATA_PAGES 2 /* must be a power of two */
#define STUB_END (STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE)
#ifndef __ASSEMBLY__
diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h
index 96195483fbd0..579ed946a3a9 100644
--- a/arch/um/include/shared/common-offsets.h
+++ b/arch/um/include/shared/common-offsets.h
@@ -1,6 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* for use by sys-$SUBARCH/kernel-offsets.c */
-#include <stub-data.h>
DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE);
@@ -30,7 +29,3 @@ DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT);
DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT);
#endif
-/* for stub */
-DEFINE(UML_STUB_FIELD_OFFSET, offsetof(struct stub_data, offset));
-DEFINE(UML_STUB_FIELD_CHILD_ERR, offsetof(struct stub_data, child_err));
-DEFINE(UML_STUB_FIELD_FD, offsetof(struct stub_data, fd));
diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h
index 95521b1f5b20..d8ffd2db168e 100644
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -13,7 +13,6 @@ struct siginfo;
extern int uml_exitcode;
-extern int ncpus;
extern int kmalloc_ok;
#define UML_ROUND_UP(addr) \
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index aff8906304ea..9a039d6f1f74 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -163,8 +163,10 @@ extern int os_set_fd_block(int fd, int blocking);
extern int os_accept_connection(int fd);
extern int os_create_unix_socket(const char *file, int len, int close_on_exec);
extern int os_shutdown_socket(int fd, int r, int w);
+extern int os_dup_file(int fd);
extern void os_close_file(int fd);
-extern int os_rcv_fd(int fd, int *helper_pid_out);
+ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
+ void *data, size_t data_len);
extern int os_connect_socket(const char *name);
extern int os_file_type(char *file);
extern int os_file_mode(const char *file, struct openflags *mode_out);
@@ -179,6 +181,8 @@ extern int os_eventfd(unsigned int initval, int flags);
extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
const int *fds, unsigned int fds_num);
int os_poll(unsigned int n, const int *fds);
+void *os_mmap_rw_shared(int fd, size_t size);
+void *os_mremap_rw_shared(void *old_addr, size_t old_size, size_t new_size);
/* start_up.c */
extern void os_early_checks(void);
@@ -191,6 +195,9 @@ extern void get_host_cpu_features(
/* mem.c */
extern int create_mem_file(unsigned long long len);
+/* tlb.c */
+extern void report_enomem(void);
+
/* process.c */
extern unsigned long os_process_pc(int pid);
extern int os_process_parent(int pid);
@@ -268,24 +275,20 @@ extern long long os_persistent_clock_emulation(void);
extern long long os_nsecs(void);
/* skas/mem.c */
-extern long run_syscall_stub(struct mm_id * mm_idp,
- int syscall, unsigned long *args, long expected,
- void **addr, int done);
-extern long syscall_stub_data(struct mm_id * mm_idp,
- unsigned long *data, int data_count,
- void **addr, void **stub_addr);
-extern int map(struct mm_id * mm_idp, unsigned long virt,
- unsigned long len, int prot, int phys_fd,
- unsigned long long offset, int done, void **data);
-extern int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
- int done, void **data);
-extern int protect(struct mm_id * mm_idp, unsigned long addr,
- unsigned long len, unsigned int prot, int done, void **data);
+int syscall_stub_flush(struct mm_id *mm_idp);
+struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp);
+void syscall_stub_dump_error(struct mm_id *mm_idp);
+
+int map(struct mm_id *mm_idp, unsigned long virt,
+ unsigned long len, int prot, int phys_fd,
+ unsigned long long offset);
+int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len);
+int protect(struct mm_id *mm_idp, unsigned long addr,
+ unsigned long len, unsigned int prot);
/* skas/process.c */
extern int is_skas_winch(int pid, int fd, void *data);
extern int start_userspace(unsigned long stub_stack);
-extern int copy_context_skas0(unsigned long stack, int pid);
extern void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs);
extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
extern void switch_threads(jmp_buf *me, jmp_buf *you);
diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h
index 92dbf727e384..1e76ba40feba 100644
--- a/arch/um/include/shared/skas/mm_id.h
+++ b/arch/um/include/shared/skas/mm_id.h
@@ -12,7 +12,7 @@ struct mm_id {
int pid;
} u;
unsigned long stack;
- int kill;
+ int syscall_data_len;
};
void __switch_mm(struct mm_id *mm_idp);
diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h
index c93d2cbc8f32..ebaa116de30b 100644
--- a/arch/um/include/shared/skas/skas.h
+++ b/arch/um/include/shared/skas/skas.h
@@ -15,5 +15,7 @@ extern void new_thread_handler(void);
extern void handle_syscall(struct uml_pt_regs *regs);
extern long execute_syscall_skas(void *r);
extern unsigned long current_stub_stack(void);
+extern struct mm_id *current_mm_id(void);
+extern void current_mm_sync(void);
#endif
diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h
index 5e3ade3fb38b..2b6b44759dfa 100644
--- a/arch/um/include/shared/skas/stub-data.h
+++ b/arch/um/include/shared/skas/stub-data.h
@@ -8,10 +8,42 @@
#ifndef __STUB_DATA_H
#define __STUB_DATA_H
+#include <linux/compiler_types.h>
+#include <as-layout.h>
+#include <sysdep/tls.h>
+
+#define STUB_NEXT_SYSCALL(s) \
+ ((struct stub_syscall *) (((unsigned long) s) + (s)->cmd_len))
+
+enum stub_syscall_type {
+ STUB_SYSCALL_UNSET = 0,
+ STUB_SYSCALL_MMAP,
+ STUB_SYSCALL_MUNMAP,
+ STUB_SYSCALL_MPROTECT,
+};
+
+struct stub_syscall {
+ struct {
+ unsigned long addr;
+ unsigned long length;
+ unsigned long offset;
+ int fd;
+ int prot;
+ } mem;
+
+ enum stub_syscall_type syscall;
+};
+
struct stub_data {
unsigned long offset;
- int fd;
- long parent_err, child_err;
+ long err, child_err;
+
+ int syscall_data_len;
+ /* 128 leaves enough room for additional fields in the struct */
+ struct stub_syscall syscall_data[(UM_KERN_PAGE_SIZE - 128) / sizeof(struct stub_syscall)] __aligned(16);
+
+ /* Stack for our signal handlers and for calling into . */
+ unsigned char sigstack[UM_KERN_PAGE_SIZE] __aligned(UM_KERN_PAGE_SIZE);
};
#endif
diff --git a/arch/um/include/shared/timetravel.h b/arch/um/include/shared/timetravel.h
index e5c3d69f1b69..c8db2f213dba 100644
--- a/arch/um/include/shared/timetravel.h
+++ b/arch/um/include/shared/timetravel.h
@@ -15,8 +15,17 @@ enum time_travel_mode {
#if defined(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT) || \
defined(CONFIG_UML_TIME_TRAVEL_SUPPORT)
extern enum time_travel_mode time_travel_mode;
+extern int time_travel_should_print_bc_msg;
#else
#define time_travel_mode TT_MODE_OFF
+#define time_travel_should_print_bc_msg 0
#endif /* (UML_)CONFIG_UML_TIME_TRAVEL_SUPPORT */
+void _time_travel_print_bc_msg(void);
+static inline void time_travel_print_bc_msg(void)
+{
+ if (time_travel_should_print_bc_msg)
+ _time_travel_print_bc_msg();
+}
+
#endif /* _UM_TIME_TRAVEL_H_ */
diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h
index 326e52450e41..bbab79c0c074 100644
--- a/arch/um/include/shared/user.h
+++ b/arch/um/include/shared/user.h
@@ -42,11 +42,19 @@ extern void panic(const char *fmt, ...)
#define printk(...) _printk(__VA_ARGS__)
extern int _printk(const char *fmt, ...)
__attribute__ ((format (printf, 1, 2)));
+extern void print_hex_dump(const char *level, const char *prefix_str,
+ int prefix_type, int rowsize, int groupsize,
+ const void *buf, size_t len, _Bool ascii);
#else
static inline int printk(const char *fmt, ...)
{
return 0;
}
+static inline void print_hex_dump(const char *level, const char *prefix_str,
+ int prefix_type, int rowsize, int groupsize,
+ const void *buf, size_t len, _Bool ascii)
+{
+}
#endif
extern int in_aton(char *str);
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 827a0d3fa589..2c15bb2c104c 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -22,17 +22,8 @@
void flush_thread(void)
{
- void *data = NULL;
- int ret;
-
arch_flush_thread(&current->thread.arch);
- ret = unmap(&current->mm->context.id, 0, TASK_SIZE, 1, &data);
- if (ret) {
- printk(KERN_ERR "%s - clearing address space failed, err = %d\n",
- __func__, ret);
- force_sig(SIGKILL);
- }
get_safe_registers(current_pt_regs()->regs.gp,
current_pt_regs()->regs.fp);
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 635d44606bfe..534e91797f89 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -37,7 +37,7 @@ struct irq_reg {
bool pending;
bool wakeup;
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
- bool pending_on_resume;
+ bool pending_event;
void (*timetravel_handler)(int, int, void *,
struct time_travel_event *);
struct time_travel_event event;
@@ -56,6 +56,9 @@ static DEFINE_SPINLOCK(irq_lock);
static LIST_HEAD(active_fds);
static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ);
static bool irqs_suspended;
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+static bool irqs_pending;
+#endif
static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
{
@@ -84,9 +87,12 @@ static void irq_event_handler(struct time_travel_event *ev)
{
struct irq_reg *reg = container_of(ev, struct irq_reg, event);
- /* do nothing if suspended - just to cause a wakeup */
- if (irqs_suspended)
+ /* do nothing if suspended; just cause a wakeup and mark as pending */
+ if (irqs_suspended) {
+ irqs_pending = true;
+ reg->pending_event = true;
return;
+ }
generic_handle_irq(reg->irq);
}
@@ -110,16 +116,47 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry,
if (!reg->event.pending)
return false;
- if (irqs_suspended)
- reg->pending_on_resume = true;
return true;
}
+
+static void irq_do_pending_events(bool timetravel_handlers_only)
+{
+ struct irq_entry *entry;
+
+ if (!irqs_pending || timetravel_handlers_only)
+ return;
+
+ irqs_pending = false;
+
+ list_for_each_entry(entry, &active_fds, list) {
+ enum um_irq_type t;
+
+ for (t = 0; t < NUM_IRQ_TYPES; t++) {
+ struct irq_reg *reg = &entry->reg[t];
+
+ /*
+ * Any timetravel_handler was invoked already, just
+ * directly run the IRQ.
+ */
+ if (reg->pending_event) {
+ irq_enter();
+ generic_handle_irq(reg->irq);
+ irq_exit();
+ reg->pending_event = false;
+ }
+ }
+ }
+}
#else
static bool irq_do_timetravel_handler(struct irq_entry *entry,
enum um_irq_type t)
{
return false;
}
+
+static void irq_do_pending_events(bool timetravel_handlers_only)
+{
+}
#endif
static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t,
@@ -145,6 +182,8 @@ static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type
*/
if (timetravel_handlers_only) {
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+ reg->pending_event = true;
+ irqs_pending = true;
mark_sigio_pending();
#endif
return;
@@ -162,6 +201,10 @@ static void _sigio_handler(struct uml_pt_regs *regs,
if (timetravel_handlers_only && !um_irq_timetravel_handler_used())
return;
+ /* Flush out pending events that were ignored due to time-travel. */
+ if (!irqs_suspended)
+ irq_do_pending_events(timetravel_handlers_only);
+
while (1) {
/* This is now lockless - epoll keeps back-referencesto the irqs
* which have trigger it so there is no need to walk the irq
@@ -195,7 +238,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,
void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{
+ preempt_disable();
_sigio_handler(regs, irqs_suspended);
+ preempt_enable();
}
static struct irq_entry *get_irq_entry_by_fd(int fd)
@@ -543,30 +588,7 @@ void um_irqs_resume(void)
unsigned long flags;
- local_irq_save(flags);
-#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
- /*
- * We don't need to lock anything here since we're in resume
- * and nothing else is running, but have disabled IRQs so we
- * don't try anything else with the interrupt list from there.
- */
- list_for_each_entry(entry, &active_fds, list) {
- enum um_irq_type t;
-
- for (t = 0; t < NUM_IRQ_TYPES; t++) {
- struct irq_reg *reg = &entry->reg[t];
-
- if (reg->pending_on_resume) {
- irq_enter();
- generic_handle_irq(reg->irq);
- irq_exit();
- reg->pending_on_resume = false;
- }
- }
- }
-#endif
-
- spin_lock(&irq_lock);
+ spin_lock_irqsave(&irq_lock, flags);
list_for_each_entry(entry, &active_fds, list) {
if (entry->suspended) {
int err = os_set_fd_async(entry->fd);
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 3a85bde3e173..f2fb77da08cf 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(os_shutdown_socket);
EXPORT_SYMBOL(os_create_unix_socket);
EXPORT_SYMBOL(os_connect_socket);
EXPORT_SYMBOL(os_accept_connection);
-EXPORT_SYMBOL(os_rcv_fd);
+EXPORT_SYMBOL(os_rcv_fd_msg);
EXPORT_SYMBOL(run_helper);
EXPORT_SYMBOL(os_major);
EXPORT_SYMBOL(os_minor);
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index ca91accd64fc..a5b4fe2ad931 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -73,7 +73,6 @@ void __init mem_init(void)
/* this will put all low memory onto the freelists */
memblock_free_all();
- max_low_pfn = totalram_pages();
max_pfn = max_low_pfn;
kmalloc_ok = 1;
}
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index d2134802f6a8..f36b63f53bab 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -122,8 +122,6 @@ void new_thread_handler(void)
/* Called magically, see new_thread_handler above */
static void fork_handler(void)
{
- force_flush_all();
-
schedule_tail(current->thread.prev_sched);
/*
@@ -237,73 +235,6 @@ int copy_from_user_proc(void *to, void __user *from, int size)
return copy_from_user(to, from, size);
}
-static atomic_t using_sysemu = ATOMIC_INIT(0);
-int sysemu_supported;
-
-static void set_using_sysemu(int value)
-{
- if (value > sysemu_supported)
- return;
- atomic_set(&using_sysemu, value);
-}
-
-static int get_using_sysemu(void)
-{
- return atomic_read(&using_sysemu);
-}
-
-static int sysemu_proc_show(struct seq_file *m, void *v)
-{
- seq_printf(m, "%d\n", get_using_sysemu());
- return 0;
-}
-
-static int sysemu_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, sysemu_proc_show, NULL);
-}
-
-static ssize_t sysemu_proc_write(struct file *file, const char __user *buf,
- size_t count, loff_t *pos)
-{
- char tmp[2];
-
- if (copy_from_user(tmp, buf, 1))
- return -EFAULT;
-
- if (tmp[0] >= '0' && tmp[0] <= '2')
- set_using_sysemu(tmp[0] - '0');
- /* We use the first char, but pretend to write everything */
- return count;
-}
-
-static const struct proc_ops sysemu_proc_ops = {
- .proc_open = sysemu_proc_open,
- .proc_read = seq_read,
- .proc_lseek = seq_lseek,
- .proc_release = single_release,
- .proc_write = sysemu_proc_write,
-};
-
-static int __init make_proc_sysemu(void)
-{
- struct proc_dir_entry *ent;
- if (!sysemu_supported)
- return 0;
-
- ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_ops);
-
- if (ent == NULL)
- {
- printk(KERN_WARNING "Failed to register /proc/sysemu\n");
- return 0;
- }
-
- return 0;
-}
-
-late_initcall(make_proc_sysemu);
-
int singlestepping(void)
{
return test_thread_flag(TIF_SINGLESTEP);
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 25840eee1068..3736bca626ba 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -59,3 +59,18 @@ void machine_halt(void)
{
machine_power_off();
}
+
+static int sys_power_off_handler(struct sys_off_data *data)
+{
+ machine_power_off();
+ return 0;
+}
+
+static int register_power_off(void)
+{
+ register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
+ SYS_OFF_PRIO_DEFAULT,
+ sys_power_off_handler, NULL);
+ return 0;
+}
+__initcall(register_power_off);
diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
index f93972a25765..6f86d53e3d69 100644
--- a/arch/um/kernel/skas/Makefile
+++ b/arch/um/kernel/skas/Makefile
@@ -3,15 +3,14 @@
# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
#
-obj-y := clone.o mmu.o process.o syscall.o uaccess.o
+obj-y := stub.o mmu.o process.o syscall.o uaccess.o
-# clone.o is in the stub, so it can't be built with profiling
+# stub.o is in the stub, so it can't be built with profiling
# GCC hardened also auto-enables -fpic, but we need %ebx so it can't work ->
# disable it
-CFLAGS_clone.o := $(CFLAGS_NO_HARDENING)
-UNPROFILE_OBJS := clone.o
-
+CFLAGS_stub.o := $(CFLAGS_NO_HARDENING)
+UNPROFILE_OBJS := stub.o
KCOV_INSTRUMENT := n
include $(srctree)/arch/um/scripts/Makefile.rules
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
deleted file mode 100644
index 62435187dda4..000000000000
--- a/arch/um/kernel/skas/clone.c
+++ /dev/null
@@ -1,48 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <signal.h>
-#include <sched.h>
-#include <asm/unistd.h>
-#include <sys/time.h>
-#include <as-layout.h>
-#include <ptrace_user.h>
-#include <stub-data.h>
-#include <sysdep/stub.h>
-
-/*
- * This is in a separate file because it needs to be compiled with any
- * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled
- *
- * Use UM_KERN_PAGE_SIZE instead of PAGE_SIZE because that calls getpagesize
- * on some systems.
- */
-
-void __attribute__ ((__section__ (".__syscall_stub")))
-stub_clone_handler(void)
-{
- struct stub_data *data = get_stub_data();
- long err;
-
- err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
- (unsigned long)data +
- STUB_DATA_PAGES * UM_KERN_PAGE_SIZE / 2);
- if (err) {
- data->parent_err = err;
- goto done;
- }
-
- err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
- if (err) {
- data->child_err = err;
- goto done;
- }
-
- remap_stack_and_trap();
-
- done:
- trap_myself();
-}
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index aeed1c2aaf3c..47f98d87ea3c 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -14,11 +14,14 @@
#include <as-layout.h>
#include <os.h>
#include <skas.h>
+#include <stub-data.h>
+
+/* Ensure the stub_data struct covers the allocated area */
+static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
int init_new_context(struct task_struct *task, struct mm_struct *mm)
{
- struct mm_context *from_mm = NULL;
- struct mm_context *to_mm = &mm->context;
+ struct mm_id *new_id = &mm->context.id;
unsigned long stack = 0;
int ret = -ENOMEM;
@@ -26,34 +29,46 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
if (stack == 0)
goto out;
- to_mm->id.stack = stack;
- if (current->mm != NULL && current->mm != &init_mm)
- from_mm = &current->mm->context;
+ new_id->stack = stack;
block_signals_trace();
- if (from_mm)
- to_mm->id.u.pid = copy_context_skas0(stack,
- from_mm->id.u.pid);
- else to_mm->id.u.pid = start_userspace(stack);
+ new_id->u.pid = start_userspace(stack);
unblock_signals_trace();
- if (to_mm->id.u.pid < 0) {
- ret = to_mm->id.u.pid;
+ if (new_id->u.pid < 0) {
+ ret = new_id->u.pid;
goto out_free;
}
- ret = init_new_ldt(to_mm, from_mm);
- if (ret < 0) {
- printk(KERN_ERR "init_new_context_skas - init_ldt"
- " failed, errno = %d\n", ret);
- goto out_free;
- }
+ /*
+ * Ensure the new MM is clean and nothing unwanted is mapped.
+ *
+ * TODO: We should clear the memory up to STUB_START to ensure there is
+ * nothing mapped there, i.e. we (currently) have:
+ *
+ * |- user memory -|- unused -|- stub -|- unused -|
+ * ^ TASK_SIZE ^ STUB_START
+ *
+ * Meaning we have two unused areas where we may still have valid
+ * mappings from our internal clone(). That isn't really a problem as
+ * userspace is not going to access them, but it is definitely not
+ * correct.
+ *
+ * However, we are "lucky" and if rseq is configured, then on 32 bit
+ * it will fall into the first empty range while on 64 bit it is going
+ * to use an anonymous mapping in the second range. As such, things
+ * continue to work for now as long as we don't start unmapping these
+ * areas.
+ *
+ * Change this to STUB_START once we have a clean userspace.
+ */
+ unmap(new_id, 0, TASK_SIZE);
return 0;
out_free:
- if (to_mm->id.stack != 0)
- free_pages(to_mm->id.stack, ilog2(STUB_DATA_PAGES));
+ if (new_id->stack != 0)
+ free_pages(new_id->stack, ilog2(STUB_DATA_PAGES));
out:
return ret;
}
@@ -76,5 +91,4 @@ void destroy_context(struct mm_struct *mm)
os_kill_ptraced_process(mmu->id.u.pid, 1);
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
- free_ldt(mmu);
}
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 99a5cbb36083..5f9c1c5f36e2 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -8,6 +8,8 @@
#include <linux/sched/task_stack.h>
#include <linux/sched/task.h>
+#include <asm/tlbflush.h>
+
#include <as-layout.h>
#include <kern.h>
#include <os.h>
@@ -50,3 +52,19 @@ unsigned long current_stub_stack(void)
return current->mm->context.id.stack;
}
+
+struct mm_id *current_mm_id(void)
+{
+ if (current->mm == NULL)
+ return NULL;
+
+ return &current->mm->context.id;
+}
+
+void current_mm_sync(void)
+{
+ if (current->mm == NULL)
+ return;
+
+ um_tlb_sync(current->mm);
+}
diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c
new file mode 100644
index 000000000000..5d52ffa682dc
--- /dev/null
+++ b/arch/um/kernel/skas/stub.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
+ */
+
+#include <sysdep/stub.h>
+
+static __always_inline int syscall_handler(struct stub_data *d)
+{
+ int i;
+ unsigned long res;
+
+ for (i = 0; i < d->syscall_data_len; i++) {
+ struct stub_syscall *sc = &d->syscall_data[i];
+
+ switch (sc->syscall) {
+ case STUB_SYSCALL_MMAP:
+ res = stub_syscall6(STUB_MMAP_NR,
+ sc->mem.addr, sc->mem.length,
+ sc->mem.prot,
+ MAP_SHARED | MAP_FIXED,
+ sc->mem.fd, sc->mem.offset);
+ if (res != sc->mem.addr) {
+ d->err = res;
+ d->syscall_data_len = i;
+ return -1;
+ }
+ break;
+ case STUB_SYSCALL_MUNMAP:
+ res = stub_syscall2(__NR_munmap,
+ sc->mem.addr, sc->mem.length);
+ if (res) {
+ d->err = res;
+ d->syscall_data_len = i;
+ return -1;
+ }
+ break;
+ case STUB_SYSCALL_MPROTECT:
+ res = stub_syscall3(__NR_mprotect,
+ sc->mem.addr, sc->mem.length,
+ sc->mem.prot);
+ if (res) {
+ d->err = res;
+ d->syscall_data_len = i;
+ return -1;
+ }
+ break;
+ default:
+ d->err = -95; /* EOPNOTSUPP */
+ d->syscall_data_len = i;
+ return -1;
+ }
+ }
+
+ d->err = 0;
+ d->syscall_data_len = 0;
+
+ return 0;
+}
+
+void __section(".__syscall_stub")
+stub_syscall_handler(void)
+{
+ struct stub_data *d = get_stub_data();
+
+ syscall_handler(d);
+
+ trap_myself();
+}
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index a8bfe8be1526..47b9f5e63566 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -31,6 +31,7 @@ EXPORT_SYMBOL_GPL(time_travel_mode);
static bool time_travel_start_set;
static unsigned long long time_travel_start;
static unsigned long long time_travel_time;
+static unsigned long long time_travel_shm_offset;
static LIST_HEAD(time_travel_events);
static LIST_HEAD(time_travel_irqs);
static unsigned long long time_travel_timer_interval;
@@ -40,8 +41,11 @@ static int time_travel_ext_fd = -1;
static unsigned int time_travel_ext_waiting;
static bool time_travel_ext_prev_request_valid;
static unsigned long long time_travel_ext_prev_request;
-static bool time_travel_ext_free_until_valid;
-static unsigned long long time_travel_ext_free_until;
+static unsigned long long *time_travel_ext_free_until;
+static unsigned long long _time_travel_ext_free_until;
+static u16 time_travel_shm_id;
+static struct um_timetravel_schedshm *time_travel_shm;
+static union um_timetravel_schedshm_client *time_travel_shm_client;
static void time_travel_set_time(unsigned long long ns)
{
@@ -58,8 +62,52 @@ enum time_travel_message_handling {
TTMH_IDLE,
TTMH_POLL,
TTMH_READ,
+ TTMH_READ_START_ACK,
};
+static u64 bc_message;
+int time_travel_should_print_bc_msg;
+
+void _time_travel_print_bc_msg(void)
+{
+ time_travel_should_print_bc_msg = 0;
+ printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message);
+}
+
+static void time_travel_setup_shm(int fd, u16 id)
+{
+ u32 len;
+
+ time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm));
+
+ if (!time_travel_shm)
+ goto out;
+
+ len = time_travel_shm->len;
+
+ if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION ||
+ len < struct_size(time_travel_shm, clients, id + 1)) {
+ os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm));
+ time_travel_shm = NULL;
+ goto out;
+ }
+
+ time_travel_shm = os_mremap_rw_shared(time_travel_shm,
+ sizeof(*time_travel_shm),
+ len);
+ if (!time_travel_shm)
+ goto out;
+
+ time_travel_shm_offset = time_travel_shm->current_time;
+ time_travel_shm_client = &time_travel_shm->clients[id];
+ time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE;
+ time_travel_shm_id = id;
+ /* always look at that free_until from now on */
+ time_travel_ext_free_until = &time_travel_shm->free_until;
+out:
+ os_close_file(fd);
+}
+
static void time_travel_handle_message(struct um_timetravel_msg *msg,
enum time_travel_message_handling mode)
{
@@ -80,7 +128,20 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
}
}
- ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
+ if (unlikely(mode == TTMH_READ_START_ACK)) {
+ int fd[UM_TIMETRAVEL_SHARED_MAX_FDS];
+
+ ret = os_rcv_fd_msg(time_travel_ext_fd, fd,
+ ARRAY_SIZE(fd), msg, sizeof(*msg));
+ if (ret == sizeof(*msg)) {
+ time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD],
+ msg->time & UM_TIMETRAVEL_START_ACK_ID);
+ /* we don't use the logging for now */
+ os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]);
+ }
+ } else {
+ ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
+ }
if (ret == 0)
panic("time-travel external link is broken\n");
@@ -96,10 +157,24 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
return;
case UM_TIMETRAVEL_RUN:
time_travel_set_time(msg->time);
+ if (time_travel_shm) {
+ /* no request right now since we're running */
+ time_travel_shm_client->flags &=
+ ~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
+ /* no ack for shared memory RUN */
+ return;
+ }
break;
case UM_TIMETRAVEL_FREE_UNTIL:
- time_travel_ext_free_until_valid = true;
- time_travel_ext_free_until = msg->time;
+ /* not supposed to get this with shm, but ignore it */
+ if (time_travel_shm)
+ break;
+ time_travel_ext_free_until = &_time_travel_ext_free_until;
+ _time_travel_ext_free_until = msg->time;
+ break;
+ case UM_TIMETRAVEL_BROADCAST:
+ bc_message = msg->time;
+ time_travel_should_print_bc_msg = 1;
break;
}
@@ -136,8 +211,15 @@ static u64 time_travel_ext_req(u32 op, u64 time)
block_signals_hard();
os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
+ /* no ACK expected for WAIT in shared memory mode */
+ if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm)
+ goto done;
+
while (msg.op != UM_TIMETRAVEL_ACK)
- time_travel_handle_message(&msg, TTMH_READ);
+ time_travel_handle_message(&msg,
+ op == UM_TIMETRAVEL_START ?
+ TTMH_READ_START_ACK :
+ TTMH_READ);
if (msg.seq != mseq)
panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
@@ -145,6 +227,7 @@ static u64 time_travel_ext_req(u32 op, u64 time)
if (op == UM_TIMETRAVEL_GET)
time_travel_set_time(msg.time);
+done:
unblock_signals_hard();
return msg.time;
@@ -180,13 +263,33 @@ static void time_travel_ext_update_request(unsigned long long time)
/*
* if we're running and are allowed to run past the request
* then we don't need to update it either
+ *
+ * Note for shm we ignore FREE_UNTIL messages and leave the pointer
+ * to shared memory, and for non-shm the offset is 0.
*/
- if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
- time < time_travel_ext_free_until)
+ if (!time_travel_ext_waiting && time_travel_ext_free_until &&
+ time < (*time_travel_ext_free_until - time_travel_shm_offset))
return;
time_travel_ext_prev_request = time;
time_travel_ext_prev_request_valid = true;
+
+ if (time_travel_shm) {
+ union um_timetravel_schedshm_client *running;
+
+ running = &time_travel_shm->clients[time_travel_shm->running_id];
+
+ if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) {
+ time_travel_shm_client->flags |=
+ UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
+ time += time_travel_shm_offset;
+ time_travel_shm_client->req_time = time;
+ if (time < time_travel_shm->free_until)
+ time_travel_shm->free_until = time;
+ return;
+ }
+ }
+
time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
}
@@ -194,6 +297,14 @@ void __time_travel_propagate_time(void)
{
static unsigned long long last_propagated;
+ if (time_travel_shm) {
+ if (time_travel_shm->running_id != time_travel_shm_id)
+ panic("time-travel: setting time while not running\n");
+ time_travel_shm->current_time = time_travel_time +
+ time_travel_shm_offset;
+ return;
+ }
+
if (last_propagated == time_travel_time)
return;
@@ -209,9 +320,12 @@ static bool time_travel_ext_request(unsigned long long time)
* If we received an external sync point ("free until") then we
* don't have to request/wait for anything until then, unless
* we're already waiting.
+ *
+ * Note for shm we ignore FREE_UNTIL messages and leave the pointer
+ * to shared memory, and for non-shm the offset is 0.
*/
- if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
- time < time_travel_ext_free_until)
+ if (!time_travel_ext_waiting && time_travel_ext_free_until &&
+ time < (*time_travel_ext_free_until - time_travel_shm_offset))
return false;
time_travel_ext_update_request(time);
@@ -225,7 +339,8 @@ static void time_travel_ext_wait(bool idle)
};
time_travel_ext_prev_request_valid = false;
- time_travel_ext_free_until_valid = false;
+ if (!time_travel_shm)
+ time_travel_ext_free_until = NULL;
time_travel_ext_waiting++;
time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
@@ -248,7 +363,11 @@ static void time_travel_ext_wait(bool idle)
static void time_travel_ext_get_time(void)
{
- time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
+ if (time_travel_shm)
+ time_travel_set_time(time_travel_shm->current_time -
+ time_travel_shm_offset);
+ else
+ time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
}
static void __time_travel_update_time(unsigned long long ns, bool idle)
@@ -875,9 +994,49 @@ static int setup_time_travel_start(char *str)
return 1;
}
-__setup("time-travel-start", setup_time_travel_start);
+__setup("time-travel-start=", setup_time_travel_start);
__uml_help(setup_time_travel_start,
-"time-travel-start=<seconds>\n"
+"time-travel-start=<nanoseconds>\n"
"Configure the UML instance's wall clock to start at this value rather than\n"
"the host's wall clock at the time of UML boot.\n");
+static struct kobject *bc_time_kobject;
+
+static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "0x%llx", bc_message);
+}
+
+static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ int ret;
+ u64 user_bc_message;
+
+ ret = kstrtou64(buf, 0, &user_bc_message);
+ if (ret)
+ return ret;
+
+ bc_message = user_bc_message;
+
+ time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message);
+ pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message);
+ return count;
+}
+
+static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store);
+
+static int __init um_bc_start(void)
+{
+ if (time_travel_mode != TT_MODE_EXTERNAL)
+ return 0;
+
+ bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj);
+ if (!bc_time_kobject)
+ return 0;
+
+ if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr))
+ pr_debug("failed to create the bc file in /sys/kernel/um_time");
+
+ return 0;
+}
+late_initcall(um_bc_start);
#endif
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 8784f03fa4a6..44c6fc697f3a 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -15,209 +15,54 @@
#include <skas.h>
#include <kern_util.h>
-struct host_vm_change {
- struct host_vm_op {
- enum { NONE, MMAP, MUNMAP, MPROTECT } type;
- union {
- struct {
- unsigned long addr;
- unsigned long len;
- unsigned int prot;
- int fd;
- __u64 offset;
- } mmap;
- struct {
- unsigned long addr;
- unsigned long len;
- } munmap;
- struct {
- unsigned long addr;
- unsigned long len;
- unsigned int prot;
- } mprotect;
- } u;
- } ops[1];
- int userspace;
- int index;
- struct mm_struct *mm;
- void *data;
- int force;
+struct vm_ops {
+ struct mm_id *mm_idp;
+
+ int (*mmap)(struct mm_id *mm_idp,
+ unsigned long virt, unsigned long len, int prot,
+ int phys_fd, unsigned long long offset);
+ int (*unmap)(struct mm_id *mm_idp,
+ unsigned long virt, unsigned long len);
+ int (*mprotect)(struct mm_id *mm_idp,
+ unsigned long virt, unsigned long len,
+ unsigned int prot);
};
-#define INIT_HVC(mm, force, userspace) \
- ((struct host_vm_change) \
- { .ops = { { .type = NONE } }, \
- .mm = mm, \
- .data = NULL, \
- .userspace = userspace, \
- .index = 0, \
- .force = force })
-
-static void report_enomem(void)
+static int kern_map(struct mm_id *mm_idp,
+ unsigned long virt, unsigned long len, int prot,
+ int phys_fd, unsigned long long offset)
{
- printk(KERN_ERR "UML ran out of memory on the host side! "
- "This can happen due to a memory limitation or "
- "vm.max_map_count has been reached.\n");
-}
-
-static int do_ops(struct host_vm_change *hvc, int end,
- int finished)
-{
- struct host_vm_op *op;
- int i, ret = 0;
-
- for (i = 0; i < end && !ret; i++) {
- op = &hvc->ops[i];
- switch (op->type) {
- case MMAP:
- if (hvc->userspace)
- ret = map(&hvc->mm->context.id, op->u.mmap.addr,
- op->u.mmap.len, op->u.mmap.prot,
- op->u.mmap.fd,
- op->u.mmap.offset, finished,
- &hvc->data);
- else
- map_memory(op->u.mmap.addr, op->u.mmap.offset,
- op->u.mmap.len, 1, 1, 1);
- break;
- case MUNMAP:
- if (hvc->userspace)
- ret = unmap(&hvc->mm->context.id,
- op->u.munmap.addr,
- op->u.munmap.len, finished,
- &hvc->data);
- else
- ret = os_unmap_memory(
- (void *) op->u.munmap.addr,
- op->u.munmap.len);
-
- break;
- case MPROTECT:
- if (hvc->userspace)
- ret = protect(&hvc->mm->context.id,
- op->u.mprotect.addr,
- op->u.mprotect.len,
- op->u.mprotect.prot,
- finished, &hvc->data);
- else
- ret = os_protect_memory(
- (void *) op->u.mprotect.addr,
- op->u.mprotect.len,
- 1, 1, 1);
- break;
- default:
- printk(KERN_ERR "Unknown op type %d in do_ops\n",
- op->type);
- BUG();
- break;
- }
- }
-
- if (ret == -ENOMEM)
- report_enomem();
-
- return ret;
+ /* TODO: Why is executable needed to be always set in the kernel? */
+ return os_map_memory((void *)virt, phys_fd, offset, len,
+ prot & UM_PROT_READ, prot & UM_PROT_WRITE,
+ 1);
}
-static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
- unsigned int prot, struct host_vm_change *hvc)
+static int kern_unmap(struct mm_id *mm_idp,
+ unsigned long virt, unsigned long len)
{
- __u64 offset;
- struct host_vm_op *last;
- int fd = -1, ret = 0;
-
- if (hvc->userspace)
- fd = phys_mapping(phys, &offset);
- else
- offset = phys;
- if (hvc->index != 0) {
- last = &hvc->ops[hvc->index - 1];
- if ((last->type == MMAP) &&
- (last->u.mmap.addr + last->u.mmap.len == virt) &&
- (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
- (last->u.mmap.offset + last->u.mmap.len == offset)) {
- last->u.mmap.len += len;
- return 0;
- }
- }
-
- if (hvc->index == ARRAY_SIZE(hvc->ops)) {
- ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
- hvc->index = 0;
- }
-
- hvc->ops[hvc->index++] = ((struct host_vm_op)
- { .type = MMAP,
- .u = { .mmap = { .addr = virt,
- .len = len,
- .prot = prot,
- .fd = fd,
- .offset = offset }
- } });
- return ret;
+ return os_unmap_memory((void *)virt, len);
}
-static int add_munmap(unsigned long addr, unsigned long len,
- struct host_vm_change *hvc)
+static int kern_mprotect(struct mm_id *mm_idp,
+ unsigned long virt, unsigned long len,
+ unsigned int prot)
{
- struct host_vm_op *last;
- int ret = 0;
-
- if (hvc->index != 0) {
- last = &hvc->ops[hvc->index - 1];
- if ((last->type == MUNMAP) &&
- (last->u.munmap.addr + last->u.mmap.len == addr)) {
- last->u.munmap.len += len;
- return 0;
- }
- }
-
- if (hvc->index == ARRAY_SIZE(hvc->ops)) {
- ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
- hvc->index = 0;
- }
-
- hvc->ops[hvc->index++] = ((struct host_vm_op)
- { .type = MUNMAP,
- .u = { .munmap = { .addr = addr,
- .len = len } } });
- return ret;
+ return os_protect_memory((void *)virt, len,
+ prot & UM_PROT_READ, prot & UM_PROT_WRITE,
+ 1);
}
-static int add_mprotect(unsigned long addr, unsigned long len,
- unsigned int prot, struct host_vm_change *hvc)
+void report_enomem(void)
{
- struct host_vm_op *last;
- int ret = 0;
-
- if (hvc->index != 0) {
- last = &hvc->ops[hvc->index - 1];
- if ((last->type == MPROTECT) &&
- (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
- (last->u.mprotect.prot == prot)) {
- last->u.mprotect.len += len;
- return 0;
- }
- }
-
- if (hvc->index == ARRAY_SIZE(hvc->ops)) {
- ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
- hvc->index = 0;
- }
-
- hvc->ops[hvc->index++] = ((struct host_vm_op)
- { .type = MPROTECT,
- .u = { .mprotect = { .addr = addr,
- .len = len,
- .prot = prot } } });
- return ret;
+ printk(KERN_ERR "UML ran out of memory on the host side! "
+ "This can happen due to a memory limitation or "
+ "vm.max_map_count has been reached.\n");
}
-#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
-
static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end,
- struct host_vm_change *hvc)
+ struct vm_ops *ops)
{
pte_t *pte;
int r, w, x, prot, ret = 0;
@@ -235,15 +80,22 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
(x ? UM_PROT_EXEC : 0));
- if (hvc->force || pte_newpage(*pte)) {
+ if (pte_newpage(*pte)) {
if (pte_present(*pte)) {
- if (pte_newpage(*pte))
- ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
- PAGE_SIZE, prot, hvc);
+ if (pte_newpage(*pte)) {
+ __u64 offset;
+ unsigned long phys =
+ pte_val(*pte) & PAGE_MASK;
+ int fd = phys_mapping(phys, &offset);
+
+ ret = ops->mmap(ops->mm_idp, addr,
+ PAGE_SIZE, prot, fd,
+ offset);
+ }
} else
- ret = add_munmap(addr, PAGE_SIZE, hvc);
+ ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE);
} else if (pte_newprot(*pte))
- ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
+ ret = ops->mprotect(ops->mm_idp, addr, PAGE_SIZE, prot);
*pte = pte_mkuptodate(*pte);
} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
return ret;
@@ -251,7 +103,7 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
static inline int update_pmd_range(pud_t *pud, unsigned long addr,
unsigned long end,
- struct host_vm_change *hvc)
+ struct vm_ops *ops)
{
pmd_t *pmd;
unsigned long next;
@@ -261,19 +113,20 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr,
do {
next = pmd_addr_end(addr, end);
if (!pmd_present(*pmd)) {
- if (hvc->force || pmd_newpage(*pmd)) {
- ret = add_munmap(addr, next - addr, hvc);
+ if (pmd_newpage(*pmd)) {
+ ret = ops->unmap(ops->mm_idp, addr,
+ next - addr);
pmd_mkuptodate(*pmd);
}
}
- else ret = update_pte_range(pmd, addr, next, hvc);
+ else ret = update_pte_range(pmd, addr, next, ops);
} while (pmd++, addr = next, ((addr < end) && !ret));
return ret;
}
static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
unsigned long end,
- struct host_vm_change *hvc)
+ struct vm_ops *ops)
{
pud_t *pud;
unsigned long next;
@@ -283,19 +136,20 @@ static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
do {
next = pud_addr_end(addr, end);
if (!pud_present(*pud)) {
- if (hvc->force || pud_newpage(*pud)) {
- ret = add_munmap(addr, next - addr, hvc);
+ if (pud_newpage(*pud)) {
+ ret = ops->unmap(ops->mm_idp, addr,
+ next - addr);
pud_mkuptodate(*pud);
}
}
- else ret = update_pmd_range(pud, addr, next, hvc);
+ else ret = update_pmd_range(pud, addr, next, ops);
} while (pud++, addr = next, ((addr < end) && !ret));
return ret;
}
static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
unsigned long end,
- struct host_vm_change *hvc)
+ struct vm_ops *ops)
{
p4d_t *p4d;
unsigned long next;
@@ -305,227 +159,59 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
do {
next = p4d_addr_end(addr, end);
if (!p4d_present(*p4d)) {
- if (hvc->force || p4d_newpage(*p4d)) {
- ret = add_munmap(addr, next - addr, hvc);
+ if (p4d_newpage(*p4d)) {
+ ret = ops->unmap(ops->mm_idp, addr,
+ next - addr);
p4d_mkuptodate(*p4d);
}
} else
- ret = update_pud_range(p4d, addr, next, hvc);
+ ret = update_pud_range(p4d, addr, next, ops);
} while (p4d++, addr = next, ((addr < end) && !ret));
return ret;
}
-static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
- unsigned long end_addr, int force)
+int um_tlb_sync(struct mm_struct *mm)
{
pgd_t *pgd;
- struct host_vm_change hvc;
- unsigned long addr = start_addr, next;
- int ret = 0, userspace = 1;
+ struct vm_ops ops;
+ unsigned long addr = mm->context.sync_tlb_range_from, next;
+ int ret = 0;
+
+ if (mm->context.sync_tlb_range_to == 0)
+ return 0;
+
+ ops.mm_idp = &mm->context.id;
+ if (mm == &init_mm) {
+ ops.mmap = kern_map;
+ ops.unmap = kern_unmap;
+ ops.mprotect = kern_mprotect;
+ } else {
+ ops.mmap = map;
+ ops.unmap = unmap;
+ ops.mprotect = protect;
+ }
- hvc = INIT_HVC(mm, force, userspace);
pgd = pgd_offset(mm, addr);
do {
- next = pgd_addr_end(addr, end_addr);
+ next = pgd_addr_end(addr, mm->context.sync_tlb_range_to);
if (!pgd_present(*pgd)) {
- if (force || pgd_newpage(*pgd)) {
- ret = add_munmap(addr, next - addr, &hvc);
+ if (pgd_newpage(*pgd)) {
+ ret = ops.unmap(ops.mm_idp, addr,
+ next - addr);
pgd_mkuptodate(*pgd);
}
} else
- ret = update_p4d_range(pgd, addr, next, &hvc);
- } while (pgd++, addr = next, ((addr < end_addr) && !ret));
+ ret = update_p4d_range(pgd, addr, next, &ops);
+ } while (pgd++, addr = next,
+ ((addr < mm->context.sync_tlb_range_to) && !ret));
- if (!ret)
- ret = do_ops(&hvc, hvc.index, 1);
-
- /* This is not an else because ret is modified above */
- if (ret) {
- struct mm_id *mm_idp = &current->mm->context.id;
-
- printk(KERN_ERR "fix_range_common: failed, killing current "
- "process: %d\n", task_tgid_vnr(current));
- mm_idp->kill = 1;
- }
-}
-
-static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
-{
- struct mm_struct *mm;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- unsigned long addr, last;
- int updated = 0, err = 0, force = 0, userspace = 0;
- struct host_vm_change hvc;
-
- mm = &init_mm;
- hvc = INIT_HVC(mm, force, userspace);
- for (addr = start; addr < end;) {
- pgd = pgd_offset(mm, addr);
- if (!pgd_present(*pgd)) {
- last = ADD_ROUND(addr, PGDIR_SIZE);
- if (last > end)
- last = end;
- if (pgd_newpage(*pgd)) {
- updated = 1;
- err = add_munmap(addr, last - addr, &hvc);
- if (err < 0)
- panic("munmap failed, errno = %d\n",
- -err);
- }
- addr = last;
- continue;
- }
-
- p4d = p4d_offset(pgd, addr);
- if (!p4d_present(*p4d)) {
- last = ADD_ROUND(addr, P4D_SIZE);
- if (last > end)
- last = end;
- if (p4d_newpage(*p4d)) {
- updated = 1;
- err = add_munmap(addr, last - addr, &hvc);
- if (err < 0)
- panic("munmap failed, errno = %d\n",
- -err);
- }
- addr = last;
- continue;
- }
-
- pud = pud_offset(p4d, addr);
- if (!pud_present(*pud)) {
- last = ADD_ROUND(addr, PUD_SIZE);
- if (last > end)
- last = end;
- if (pud_newpage(*pud)) {
- updated = 1;
- err = add_munmap(addr, last - addr, &hvc);
- if (err < 0)
- panic("munmap failed, errno = %d\n",
- -err);
- }
- addr = last;
- continue;
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- last = ADD_ROUND(addr, PMD_SIZE);
- if (last > end)
- last = end;
- if (pmd_newpage(*pmd)) {
- updated = 1;
- err = add_munmap(addr, last - addr, &hvc);
- if (err < 0)
- panic("munmap failed, errno = %d\n",
- -err);
- }
- addr = last;
- continue;
- }
-
- pte = pte_offset_kernel(pmd, addr);
- if (!pte_present(*pte) || pte_newpage(*pte)) {
- updated = 1;
- err = add_munmap(addr, PAGE_SIZE, &hvc);
- if (err < 0)
- panic("munmap failed, errno = %d\n",
- -err);
- if (pte_present(*pte))
- err = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
- PAGE_SIZE, 0, &hvc);
- }
- else if (pte_newprot(*pte)) {
- updated = 1;
- err = add_mprotect(addr, PAGE_SIZE, 0, &hvc);
- }
- addr += PAGE_SIZE;
- }
- if (!err)
- err = do_ops(&hvc, hvc.index, 1);
-
- if (err < 0)
- panic("flush_tlb_kernel failed, errno = %d\n", err);
- return updated;
-}
-
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
-{
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- struct mm_struct *mm = vma->vm_mm;
- void *flush = NULL;
- int r, w, x, prot, err = 0;
- struct mm_id *mm_id;
-
- address &= PAGE_MASK;
-
- pgd = pgd_offset(mm, address);
- if (!pgd_present(*pgd))
- goto kill;
-
- p4d = p4d_offset(pgd, address);
- if (!p4d_present(*p4d))
- goto kill;
-
- pud = pud_offset(p4d, address);
- if (!pud_present(*pud))
- goto kill;
-
- pmd = pmd_offset(pud, address);
- if (!pmd_present(*pmd))
- goto kill;
-
- pte = pte_offset_kernel(pmd, address);
-
- r = pte_read(*pte);
- w = pte_write(*pte);
- x = pte_exec(*pte);
- if (!pte_young(*pte)) {
- r = 0;
- w = 0;
- } else if (!pte_dirty(*pte)) {
- w = 0;
- }
-
- mm_id = &mm->context.id;
- prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
- (x ? UM_PROT_EXEC : 0));
- if (pte_newpage(*pte)) {
- if (pte_present(*pte)) {
- unsigned long long offset;
- int fd;
-
- fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
- err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
- 1, &flush);
- }
- else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
- }
- else if (pte_newprot(*pte))
- err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
-
- if (err) {
- if (err == -ENOMEM)
- report_enomem();
-
- goto kill;
- }
-
- *pte = pte_mkuptodate(*pte);
+ if (ret == -ENOMEM)
+ report_enomem();
- return;
+ mm->context.sync_tlb_range_from = 0;
+ mm->context.sync_tlb_range_to = 0;
-kill:
- printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
- force_sig(SIGKILL);
+ return ret;
}
void flush_tlb_all(void)
@@ -540,60 +226,11 @@ void flush_tlb_all(void)
flush_tlb_mm(current->mm);
}
-void flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
- flush_tlb_kernel_range_common(start, end);
-}
-
-void flush_tlb_kernel_vm(void)
-{
- flush_tlb_kernel_range_common(start_vm, end_vm);
-}
-
-void __flush_tlb_one(unsigned long addr)
-{
- flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
-}
-
-static void fix_range(struct mm_struct *mm, unsigned long start_addr,
- unsigned long end_addr, int force)
-{
- /*
- * Don't bother flushing if this address space is about to be
- * destroyed.
- */
- if (atomic_read(&mm->mm_users) == 0)
- return;
-
- fix_range_common(mm, start_addr, end_addr, force);
-}
-
-void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end)
-{
- if (vma->vm_mm == NULL)
- flush_tlb_kernel_range_common(start, end);
- else fix_range(vma->vm_mm, start, end, 0);
-}
-EXPORT_SYMBOL(flush_tlb_range);
-
void flush_tlb_mm(struct mm_struct *mm)
{
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, mm, 0);
for_each_vma(vmi, vma)
- fix_range(mm, vma->vm_start, vma->vm_end, 0);
-}
-
-void force_flush_all(void)
-{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
-
- mmap_read_lock(mm);
- for_each_vma(vmi, vma)
- fix_range(mm, vma->vm_start, vma->vm_end, 1);
- mmap_read_unlock(mm);
+ um_tlb_mark_sync(mm, vma->vm_start, vma->vm_end);
}
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 6d8ae86ae978..97c8df9c4401 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -113,7 +113,7 @@ good_area:
#if 0
WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
#endif
- flush_tlb_page(vma, address);
+
out:
mmap_read_unlock(mm);
out_nosemaphore:
@@ -210,8 +210,17 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
if (!is_user && regs)
current->thread.segv_regs = container_of(regs, struct pt_regs, regs);
- if (!is_user && (address >= start_vm) && (address < end_vm)) {
- flush_tlb_kernel_vm();
+ if (!is_user && init_mm.context.sync_tlb_range_to) {
+ /*
+ * Kernel has pending updates from set_ptes that were not
+ * flushed yet. Syncing them should fix the pagefault (if not
+ * we'll get here again and panic).
+ */
+ err = um_tlb_sync(&init_mm);
+ if (err == -ENOMEM)
+ report_enomem();
+ if (err)
+ panic("Failed to sync kernel TLBs: %d", err);
goto out;
}
else if (current->mm == NULL) {
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index e95f805e5004..8e594cda6d77 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -126,9 +126,6 @@ unsigned long uml_reserved; /* Also modified in mem_init */
unsigned long start_vm;
unsigned long end_vm;
-/* Set in uml_ncpus_setup */
-int ncpus = 1;
-
/* Set in early boot */
static int have_root __initdata;
static int have_console __initdata;
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index fc4450db59bd..5adf8f630049 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -17,6 +17,7 @@
#include <sys/stat.h>
#include <sys/sysmacros.h>
#include <sys/un.h>
+#include <sys/mman.h>
#include <sys/types.h>
#include <sys/eventfd.h>
#include <poll.h>
@@ -240,6 +241,16 @@ out:
return err;
}
+int os_dup_file(int fd)
+{
+ int new_fd = dup(fd);
+
+ if (new_fd < 0)
+ return -errno;
+
+ return new_fd;
+}
+
void os_close_file(int fd)
{
close(fd);
@@ -502,44 +513,47 @@ int os_shutdown_socket(int fd, int r, int w)
return 0;
}
-int os_rcv_fd(int fd, int *helper_pid_out)
+/**
+ * os_rcv_fd_msg - receive message with (optional) FDs
+ * @fd: the FD to receive from
+ * @fds: the array for FDs to write to
+ * @n_fds: number of FDs to receive (@fds array size)
+ * @data: the message buffer
+ * @data_len: the size of the message to receive
+ *
+ * Receive a message with FDs.
+ *
+ * Returns: the size of the received message, or an error code
+ */
+ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
+ void *data, size_t data_len)
{
- int new, n;
- char buf[CMSG_SPACE(sizeof(new))];
- struct msghdr msg;
+ char buf[CMSG_SPACE(sizeof(*fds) * n_fds)];
struct cmsghdr *cmsg;
- struct iovec iov;
-
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- iov = ((struct iovec) { .iov_base = helper_pid_out,
- .iov_len = sizeof(*helper_pid_out) });
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- msg.msg_control = buf;
- msg.msg_controllen = sizeof(buf);
- msg.msg_flags = 0;
+ struct iovec iov = {
+ .iov_base = data,
+ .iov_len = data_len,
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = buf,
+ .msg_controllen = sizeof(buf),
+ };
+ int n;
n = recvmsg(fd, &msg, 0);
if (n < 0)
return -errno;
- else if (n != iov.iov_len)
- *helper_pid_out = -1;
cmsg = CMSG_FIRSTHDR(&msg);
- if (cmsg == NULL) {
- printk(UM_KERN_ERR "rcv_fd didn't receive anything, "
- "error = %d\n", errno);
- return -1;
- }
- if ((cmsg->cmsg_level != SOL_SOCKET) ||
- (cmsg->cmsg_type != SCM_RIGHTS)) {
- printk(UM_KERN_ERR "rcv_fd didn't receive a descriptor\n");
- return -1;
- }
+ if (!cmsg ||
+ cmsg->cmsg_level != SOL_SOCKET ||
+ cmsg->cmsg_type != SCM_RIGHTS)
+ return n;
- new = ((int *) CMSG_DATA(cmsg))[0];
- return new;
+ memcpy(fds, CMSG_DATA(cmsg), cmsg->cmsg_len);
+ return n;
}
int os_create_unix_socket(const char *file, int len, int close_on_exec)
@@ -705,3 +719,25 @@ int os_poll(unsigned int n, const int *fds)
return -EIO;
}
+
+void *os_mmap_rw_shared(int fd, size_t size)
+{
+ void *res = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+ if (res == MAP_FAILED)
+ return NULL;
+
+ return res;
+}
+
+void *os_mremap_rw_shared(void *old_addr, size_t old_size, size_t new_size)
+{
+ void *res;
+
+ res = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE, NULL);
+
+ if (res == MAP_FAILED)
+ return NULL;
+
+ return res;
+}
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 787cfb9a0308..b11ed66c8bb0 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -8,6 +8,7 @@
#include <stdlib.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <errno.h>
#include <signal.h>
#include <string.h>
@@ -65,9 +66,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
int signals_enabled;
#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
-static int signals_blocked;
-#else
-#define signals_blocked 0
+static int signals_blocked, signals_blocked_pending;
#endif
static unsigned int signals_pending;
static unsigned int signals_active = 0;
@@ -76,14 +75,27 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
{
int enabled = signals_enabled;
- if ((signals_blocked || !enabled) && (sig == SIGIO)) {
+#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
+ if ((signals_blocked ||
+ __atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) &&
+ (sig == SIGIO)) {
+ /* increment so unblock will do another round */
+ __atomic_add_fetch(&signals_blocked_pending, 1,
+ __ATOMIC_SEQ_CST);
+ return;
+ }
+#endif
+
+ if (!enabled && (sig == SIGIO)) {
/*
* In TT_MODE_EXTERNAL, need to still call time-travel
- * handlers unless signals are also blocked for the
- * external time message processing. This will mark
- * signals_pending by itself (only if necessary.)
+ * handlers. This will mark signals_pending by itself
+ * (only if necessary.)
+ * Note we won't get here if signals are hard-blocked
+ * (which is handled above), in that case the hard-
+ * unblock will handle things.
*/
- if (!signals_blocked && time_travel_mode == TT_MODE_EXTERNAL)
+ if (time_travel_mode == TT_MODE_EXTERNAL)
sigio_run_timetravel_handlers();
else
signals_pending |= SIGIO_MASK;
@@ -380,33 +392,99 @@ int um_set_signals_trace(int enable)
#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
void mark_sigio_pending(void)
{
+ /*
+ * It would seem that this should be atomic so
+ * it isn't a read-modify-write with a signal
+ * that could happen in the middle, losing the
+ * value set by the signal.
+ *
+ * However, this function is only called when in
+ * time-travel=ext simulation mode, in which case
+ * the only signal ever pending is SIGIO, which
+ * is blocked while this can be called, and the
+ * timer signal (SIGALRM) cannot happen.
+ */
signals_pending |= SIGIO_MASK;
}
void block_signals_hard(void)
{
- if (signals_blocked)
- return;
- signals_blocked = 1;
+ signals_blocked++;
barrier();
}
void unblock_signals_hard(void)
{
+ static bool unblocking;
+
if (!signals_blocked)
+ panic("unblocking signals while not blocked");
+
+ if (--signals_blocked)
return;
- /* Must be set to 0 before we check the pending bits etc. */
- signals_blocked = 0;
+ /*
+ * Must be set to 0 before we check pending so the
+ * SIGIO handler will run as normal unless we're still
+ * going to process signals_blocked_pending.
+ */
barrier();
- if (signals_pending && signals_enabled) {
- /* this is a bit inefficient, but that's not really important */
- block_signals();
- unblock_signals();
- } else if (signals_pending & SIGIO_MASK) {
- /* we need to run time-travel handlers even if not enabled */
- sigio_run_timetravel_handlers();
+ /*
+ * Note that block_signals_hard()/unblock_signals_hard() can be called
+ * within the unblock_signals()/sigio_run_timetravel_handlers() below.
+ * This would still be prone to race conditions since it's actually a
+ * call _within_ e.g. vu_req_read_message(), where we observed this
+ * issue, which loops. Thus, if the inner call handles the recorded
+ * pending signals, we can get out of the inner call with the real
+ * signal hander no longer blocked, and still have a race. Thus don't
+ * handle unblocking in the inner call, if it happens, but only in
+ * the outermost call - 'unblocking' serves as an ownership for the
+ * signals_blocked_pending decrement.
+ */
+ if (unblocking)
+ return;
+ unblocking = true;
+
+ while (__atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) {
+ if (signals_enabled) {
+ /* signals are enabled so we can touch this */
+ signals_pending |= SIGIO_MASK;
+ /*
+ * this is a bit inefficient, but that's
+ * not really important
+ */
+ block_signals();
+ unblock_signals();
+ } else {
+ /*
+ * we need to run time-travel handlers even
+ * if not enabled
+ */
+ sigio_run_timetravel_handlers();
+ }
+
+ /*
+ * The decrement of signals_blocked_pending must be atomic so
+ * that the signal handler will either happen before or after
+ * the decrement, not during a read-modify-write:
+ * - If it happens before, it can increment it and we'll
+ * decrement it and do another round in the loop.
+ * - If it happens after it'll see 0 for both signals_blocked
+ * and signals_blocked_pending and thus run the handler as
+ * usual (subject to signals_enabled, but that's unrelated.)
+ *
+ * Note that a call to unblock_signals_hard() within the calls
+ * to unblock_signals() or sigio_run_timetravel_handlers() above
+ * will do nothing due to the 'unblocking' state, so this cannot
+ * underflow as the only one decrementing will be the outermost
+ * one.
+ */
+ if (__atomic_sub_fetch(&signals_blocked_pending, 1,
+ __ATOMIC_SEQ_CST) < 0)
+ panic("signals_blocked_pending underflow");
}
+
+ unblocking = false;
}
#endif
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index 1f9c1bffc3a6..c55430775efd 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
* Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
@@ -19,7 +20,30 @@
#include <sysdep/stub.h>
#include "../internal.h"
-extern char batch_syscall_stub[], __syscall_stub_start[];
+extern char __syscall_stub_start[];
+
+void syscall_stub_dump_error(struct mm_id *mm_idp)
+{
+ struct stub_data *proc_data = (void *)mm_idp->stack;
+ struct stub_syscall *sc;
+
+ if (proc_data->syscall_data_len < 0 ||
+ proc_data->syscall_data_len >= ARRAY_SIZE(proc_data->syscall_data))
+ panic("Syscall data was corrupted by stub (len is: %d, expected maximum: %d)!",
+ proc_data->syscall_data_len,
+ mm_idp->syscall_data_len);
+
+ sc = &proc_data->syscall_data[proc_data->syscall_data_len];
+
+ printk(UM_KERN_ERR "%s : length = %d, last offset = %d",
+ __func__, mm_idp->syscall_data_len,
+ proc_data->syscall_data_len);
+ printk(UM_KERN_ERR "%s : stub syscall type %d failed, return value = 0x%lx\n",
+ __func__, sc->syscall, proc_data->err);
+
+ print_hex_dump(UM_KERN_ERR, " syscall data: ", 0,
+ 16, 4, sc, sizeof(*sc), 0);
+}
static inline unsigned long *check_init_stack(struct mm_id * mm_idp,
unsigned long *stack)
@@ -36,22 +60,24 @@ static unsigned long syscall_regs[MAX_REG_NR];
static int __init init_syscall_regs(void)
{
get_safe_registers(syscall_regs, NULL);
+
syscall_regs[REGS_IP_INDEX] = STUB_CODE +
- ((unsigned long) batch_syscall_stub -
+ ((unsigned long) stub_syscall_handler -
(unsigned long) __syscall_stub_start);
- syscall_regs[REGS_SP_INDEX] = STUB_DATA;
+ syscall_regs[REGS_SP_INDEX] = STUB_DATA +
+ offsetof(struct stub_data, sigstack) +
+ sizeof(((struct stub_data *) 0)->sigstack) -
+ sizeof(void *);
return 0;
}
__initcall(init_syscall_regs);
-static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
+static inline long do_syscall_stub(struct mm_id *mm_idp)
{
+ struct stub_data *proc_data = (void *)mm_idp->stack;
int n, i;
- long ret, offset;
- unsigned long * data;
- unsigned long * syscall;
int err, pid = mm_idp->u.pid;
n = ptrace_setregs(pid, syscall_regs);
@@ -63,6 +89,9 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
__func__, -n);
}
+ /* Inform process how much we have filled in. */
+ proc_data->syscall_data_len = mm_idp->syscall_data_len;
+
err = ptrace(PTRACE_CONT, pid, 0, 0);
if (err)
panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
@@ -71,135 +100,141 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
wait_stub_done(pid);
/*
- * When the stub stops, we find the following values on the
- * beginning of the stack:
- * (long )return_value
- * (long )offset to failed sycall-data (0, if no error)
+ * proc_data->err will be non-zero if there was an (unexpected) error.
+ * In that case, syscall_data_len points to the last executed syscall,
+ * otherwise it will be zero (but we do not need to rely on that).
*/
- ret = *((unsigned long *) mm_idp->stack);
- offset = *((unsigned long *) mm_idp->stack + 1);
- if (offset) {
- data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA);
- printk(UM_KERN_ERR "%s : ret = %ld, offset = %ld, data = %p\n",
- __func__, ret, offset, data);
- syscall = (unsigned long *)((unsigned long)data + data[0]);
- printk(UM_KERN_ERR "%s: syscall %ld failed, return value = 0x%lx, expected return value = 0x%lx\n",
- __func__, syscall[0], ret, syscall[7]);
- printk(UM_KERN_ERR " syscall parameters: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
- syscall[1], syscall[2], syscall[3],
- syscall[4], syscall[5], syscall[6]);
- for (n = 1; n < data[0]/sizeof(long); n++) {
- if (n == 1)
- printk(UM_KERN_ERR " additional syscall data:");
- if (n % 4 == 1)
- printk("\n" UM_KERN_ERR " ");
- printk(" 0x%lx", data[n]);
- }
- if (n > 1)
- printk("\n");
- }
- else ret = 0;
+ if (proc_data->err < 0) {
+ syscall_stub_dump_error(mm_idp);
- *addr = check_init_stack(mm_idp, NULL);
+ /* Store error code in case someone tries to add more syscalls */
+ mm_idp->syscall_data_len = proc_data->err;
+ } else {
+ mm_idp->syscall_data_len = 0;
+ }
- return ret;
+ return mm_idp->syscall_data_len;
}
-long run_syscall_stub(struct mm_id * mm_idp, int syscall,
- unsigned long *args, long expected, void **addr,
- int done)
+int syscall_stub_flush(struct mm_id *mm_idp)
{
- unsigned long *stack = check_init_stack(mm_idp, *addr);
-
- *stack += sizeof(long);
- stack += *stack / sizeof(long);
-
- *stack++ = syscall;
- *stack++ = args[0];
- *stack++ = args[1];
- *stack++ = args[2];
- *stack++ = args[3];
- *stack++ = args[4];
- *stack++ = args[5];
- *stack++ = expected;
- *stack = 0;
-
- if (!done && ((((unsigned long) stack) & ~UM_KERN_PAGE_MASK) <
- UM_KERN_PAGE_SIZE - 10 * sizeof(long))) {
- *addr = stack;
+ int res;
+
+ if (mm_idp->syscall_data_len == 0)
return 0;
+
+ /* If an error happened already, report it and reset the state. */
+ if (mm_idp->syscall_data_len < 0) {
+ res = mm_idp->syscall_data_len;
+ mm_idp->syscall_data_len = 0;
+ return res;
}
- return do_syscall_stub(mm_idp, addr);
+ res = do_syscall_stub(mm_idp);
+ mm_idp->syscall_data_len = 0;
+
+ return res;
}
-long syscall_stub_data(struct mm_id * mm_idp,
- unsigned long *data, int data_count,
- void **addr, void **stub_addr)
+struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp)
{
- unsigned long *stack;
- int ret = 0;
-
- /*
- * If *addr still is uninitialized, it *must* contain NULL.
- * Thus in this case do_syscall_stub correctly won't be called.
- */
- if ((((unsigned long) *addr) & ~UM_KERN_PAGE_MASK) >=
- UM_KERN_PAGE_SIZE - (10 + data_count) * sizeof(long)) {
- ret = do_syscall_stub(mm_idp, addr);
- /* in case of error, don't overwrite data on stack */
- if (ret)
- return ret;
+ struct stub_syscall *sc;
+ struct stub_data *proc_data = (struct stub_data *) mm_idp->stack;
+
+ if (mm_idp->syscall_data_len > 0 &&
+ mm_idp->syscall_data_len == ARRAY_SIZE(proc_data->syscall_data))
+ do_syscall_stub(mm_idp);
+
+ if (mm_idp->syscall_data_len < 0) {
+ /* Return dummy to retain error state. */
+ sc = &proc_data->syscall_data[0];
+ } else {
+ sc = &proc_data->syscall_data[mm_idp->syscall_data_len];
+ mm_idp->syscall_data_len += 1;
}
+ memset(sc, 0, sizeof(*sc));
- stack = check_init_stack(mm_idp, *addr);
- *addr = stack;
+ return sc;
+}
- *stack = data_count * sizeof(long);
+static struct stub_syscall *syscall_stub_get_previous(struct mm_id *mm_idp,
+ int syscall_type,
+ unsigned long virt)
+{
+ if (mm_idp->syscall_data_len > 0) {
+ struct stub_data *proc_data = (void *) mm_idp->stack;
+ struct stub_syscall *sc;
- memcpy(stack + 1, data, data_count * sizeof(long));
+ sc = &proc_data->syscall_data[mm_idp->syscall_data_len - 1];
- *stub_addr = (void *)(((unsigned long)(stack + 1) &
- ~UM_KERN_PAGE_MASK) + STUB_DATA);
+ if (sc->syscall == syscall_type &&
+ sc->mem.addr + sc->mem.length == virt)
+ return sc;
+ }
- return 0;
+ return NULL;
}
-int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, int prot,
- int phys_fd, unsigned long long offset, int done, void **data)
+int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot,
+ int phys_fd, unsigned long long offset)
{
- int ret;
- unsigned long args[] = { virt, len, prot,
- MAP_SHARED | MAP_FIXED, phys_fd,
- MMAP_OFFSET(offset) };
+ struct stub_syscall *sc;
- ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt,
- data, done);
+ /* Compress with previous syscall if that is possible */
+ sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MMAP, virt);
+ if (sc && sc->mem.prot == prot && sc->mem.fd == phys_fd &&
+ sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) {
+ sc->mem.length += len;
+ return 0;
+ }
- return ret;
+ sc = syscall_stub_alloc(mm_idp);
+ sc->syscall = STUB_SYSCALL_MMAP;
+ sc->mem.addr = virt;
+ sc->mem.length = len;
+ sc->mem.prot = prot;
+ sc->mem.fd = phys_fd;
+ sc->mem.offset = MMAP_OFFSET(offset);
+
+ return 0;
}
-int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
- int done, void **data)
+int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len)
{
- int ret;
- unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0,
- 0 };
+ struct stub_syscall *sc;
- ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0,
- data, done);
+ /* Compress with previous syscall if that is possible */
+ sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MUNMAP, addr);
+ if (sc) {
+ sc->mem.length += len;
+ return 0;
+ }
- return ret;
+ sc = syscall_stub_alloc(mm_idp);
+ sc->syscall = STUB_SYSCALL_MUNMAP;
+ sc->mem.addr = addr;
+ sc->mem.length = len;
+
+ return 0;
}
-int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
- unsigned int prot, int done, void **data)
+int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len,
+ unsigned int prot)
{
- int ret;
- unsigned long args[] = { addr, len, prot, 0, 0, 0 };
+ struct stub_syscall *sc;
- ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0,
- data, done);
+ /* Compress with previous syscall if that is possible */
+ sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MPROTECT, addr);
+ if (sc && sc->mem.prot == prot) {
+ sc->mem.length += len;
+ return 0;
+ }
- return ret;
+ sc = syscall_stub_alloc(mm_idp);
+ sc->syscall = STUB_SYSCALL_MPROTECT;
+ sc->mem.addr = addr;
+ sc->mem.length = len;
+ sc->mem.prot = prot;
+
+ return 0;
}
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 41a288dcfc34..f7088345b3fc 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -23,6 +23,7 @@
#include <skas.h>
#include <sysdep/stub.h>
#include <linux/threads.h>
+#include <timetravel.h>
#include "../internal.h"
int is_skas_winch(int pid, int fd, void *data)
@@ -253,7 +254,6 @@ static int userspace_tramp(void *stack)
}
int userspace_pid[NR_CPUS];
-int kill_userspace_mm[NR_CPUS];
/**
* start_userspace() - prepare a new userspace process
@@ -345,8 +345,20 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
interrupt_end();
while (1) {
- if (kill_userspace_mm[0])
+ time_travel_print_bc_msg();
+
+ current_mm_sync();
+
+ /* Flush out any pending syscalls */
+ err = syscall_stub_flush(current_mm_id());
+ if (err) {
+ if (err == -ENOMEM)
+ report_enomem();
+
+ printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
+ __func__, -err);
fatal_sigsegv();
+ }
/*
* This can legitimately fail if the process loads a
@@ -461,113 +473,6 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
}
}
-static unsigned long thread_regs[MAX_REG_NR];
-static unsigned long thread_fp_regs[FP_SIZE];
-
-static int __init init_thread_regs(void)
-{
- get_safe_registers(thread_regs, thread_fp_regs);
- /* Set parent's instruction pointer to start of clone-stub */
- thread_regs[REGS_IP_INDEX] = STUB_CODE +
- (unsigned long) stub_clone_handler -
- (unsigned long) __syscall_stub_start;
- thread_regs[REGS_SP_INDEX] = STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE -
- sizeof(void *);
-#ifdef __SIGNAL_FRAMESIZE
- thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE;
-#endif
- return 0;
-}
-
-__initcall(init_thread_regs);
-
-int copy_context_skas0(unsigned long new_stack, int pid)
-{
- int err;
- unsigned long current_stack = current_stub_stack();
- struct stub_data *data = (struct stub_data *) current_stack;
- struct stub_data *child_data = (struct stub_data *) new_stack;
- unsigned long long new_offset;
- int new_fd = phys_mapping(uml_to_phys((void *)new_stack), &new_offset);
-
- /*
- * prepare offset and fd of child's stack as argument for parent's
- * and child's mmap2 calls
- */
- *data = ((struct stub_data) {
- .offset = MMAP_OFFSET(new_offset),
- .fd = new_fd,
- .parent_err = -ESRCH,
- .child_err = 0,
- });
-
- *child_data = ((struct stub_data) {
- .child_err = -ESRCH,
- });
-
- err = ptrace_setregs(pid, thread_regs);
- if (err < 0) {
- err = -errno;
- printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n",
- __func__, pid, -err);
- return err;
- }
-
- err = put_fp_registers(pid, thread_fp_regs);
- if (err < 0) {
- printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n",
- __func__, pid, err);
- return err;
- }
-
- /*
- * Wait, until parent has finished its work: read child's pid from
- * parent's stack, and check, if bad result.
- */
- err = ptrace(PTRACE_CONT, pid, 0, 0);
- if (err) {
- err = -errno;
- printk(UM_KERN_ERR "Failed to continue new process, pid = %d, errno = %d\n",
- pid, errno);
- return err;
- }
-
- wait_stub_done(pid);
-
- pid = data->parent_err;
- if (pid < 0) {
- printk(UM_KERN_ERR "%s - stub-parent reports error %d\n",
- __func__, -pid);
- return pid;
- }
-
- /*
- * Wait, until child has finished too: read child's result from
- * child's stack and check it.
- */
- wait_stub_done(pid);
- if (child_data->child_err != STUB_DATA) {
- printk(UM_KERN_ERR "%s - stub-child %d reports error %ld\n",
- __func__, pid, data->child_err);
- err = data->child_err;
- goto out_kill;
- }
-
- if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
- (void *)PTRACE_O_TRACESYSGOOD) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
- __func__, errno);
- goto out_kill;
- }
-
- return pid;
-
- out_kill:
- os_kill_ptraced_process(pid, 1);
- return err;
-}
-
void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
{
(*buf)[0].JB_IP = (unsigned long) handler;
@@ -684,5 +589,4 @@ void reboot_skas(void)
void __switch_mm(struct mm_id *mm_idp)
{
userspace_pid[0] = mm_idp->u.pid;
- kill_userspace_mm[0] = mm_idp->kill;
}
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 89ad9f4f865c..93fc82c01aba 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -17,6 +17,7 @@
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/resource.h>
+#include <asm/ldt.h>
#include <asm/unistd.h>
#include <init.h>
#include <os.h>
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um
index 2106a2bd152b..a46b1397ad01 100644
--- a/arch/x86/Makefile.um
+++ b/arch/x86/Makefile.um
@@ -9,6 +9,7 @@ core-y += arch/x86/crypto/
#
ifeq ($(CONFIG_CC_IS_CLANG),y)
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
+KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
endif
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 64fbd2dbc5b7..a9088250770f 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -62,11 +62,6 @@ void xen_arch_unregister_cpu(int num);
#ifdef CONFIG_PVH
void __init xen_pvh_init(struct boot_params *boot_params);
void __init mem_map_via_hcall(struct boot_params *boot_params_p);
-#ifdef CONFIG_XEN_PVH
-void __init xen_reserve_extra_memory(struct boot_params *bootp);
-#else
-static inline void xen_reserve_extra_memory(struct boot_params *bootp) { }
-#endif
#endif
/* Lazy mode for batching updates / context switch */
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index 9a7c03d47861..51b805c727fc 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -38,7 +38,7 @@ static bool __read_mostly sched_itmt_capable;
*/
unsigned int __read_mostly sysctl_sched_itmt_enabled;
-static int sched_itmt_update_handler(struct ctl_table *table, int write,
+static int sched_itmt_update_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
unsigned int old_sysctl;
diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c
index 8c2d4b8de25d..944e0290f2c0 100644
--- a/arch/x86/platform/pvh/enlighten.c
+++ b/arch/x86/platform/pvh/enlighten.c
@@ -75,9 +75,6 @@ static void __init init_pvh_bootparams(bool xen_guest)
} else
xen_raw_printk("Warning: Can fit ISA range into e820\n");
- if (xen_guest)
- xen_reserve_extra_memory(&pvh_bootparams);
-
pvh_bootparams.hdr.cmd_line_ptr =
pvh_start_info.cmdline_paddr;
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 8bc72a51b257..36e67fc97c22 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -9,9 +9,9 @@ else
BITS := 64
endif
-obj-y = bugs_$(BITS).o delay.o fault.o ldt.o \
+obj-y = bugs_$(BITS).o delay.o fault.o \
ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
- stub_$(BITS).o stub_segv.o \
+ stub_segv.o \
sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
mem_$(BITS).o subarch.o os-Linux/
@@ -31,7 +31,6 @@ obj-y += syscalls_64.o vdso/
subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o \
../lib/memmove_64.o ../lib/memset_64.o
-subarch-$(CONFIG_PREEMPTION) += ../entry/thunk_64.o
endif
diff --git a/arch/x86/um/asm/mm_context.h b/arch/x86/um/asm/mm_context.h
deleted file mode 100644
index dc32dc023c2f..000000000000
--- a/arch/x86/um/asm/mm_context.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
- * Licensed under the GPL
- *
- * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
- */
-
-#ifndef __ASM_LDT_H
-#define __ASM_LDT_H
-
-#include <linux/mutex.h>
-#include <asm/ldt.h>
-
-#define LDT_PAGES_MAX \
- ((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
-#define LDT_ENTRIES_PER_PAGE \
- (PAGE_SIZE/LDT_ENTRY_SIZE)
-#define LDT_DIRECT_ENTRIES \
- ((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE)
-
-struct ldt_entry {
- __u32 a;
- __u32 b;
-};
-
-typedef struct uml_ldt {
- int entry_count;
- struct mutex lock;
- union {
- struct ldt_entry * pages[LDT_PAGES_MAX];
- struct ldt_entry entries[LDT_DIRECT_ENTRIES];
- } u;
-} uml_ldt_t;
-
-#define LDT_entry_a(info) \
- ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-
-#define LDT_entry_b(info) \
- (((info)->base_addr & 0xff000000) | \
- (((info)->base_addr & 0x00ff0000) >> 16) | \
- ((info)->limit & 0xf0000) | \
- (((info)->read_exec_only ^ 1) << 9) | \
- ((info)->contents << 10) | \
- (((info)->seg_not_present ^ 1) << 15) | \
- ((info)->seg_32bit << 22) | \
- ((info)->limit_in_pages << 23) | \
- ((info)->useable << 20) | \
- 0x7000)
-
-#define _LDT_empty(info) (\
- (info)->base_addr == 0 && \
- (info)->limit == 0 && \
- (info)->contents == 0 && \
- (info)->read_exec_only == 1 && \
- (info)->seg_32bit == 0 && \
- (info)->limit_in_pages == 0 && \
- (info)->seg_not_present == 1 && \
- (info)->useable == 0 )
-
-#ifdef CONFIG_X86_64
-#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
-#else
-#define LDT_empty(info) (_LDT_empty(info))
-#endif
-
-struct uml_arch_mm_context {
- uml_ldt_t ldt;
-};
-
-#endif
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
deleted file mode 100644
index 255a44dd415a..000000000000
--- a/arch/x86/um/ldt.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/syscalls.h>
-#include <linux/uaccess.h>
-#include <asm/unistd.h>
-#include <os.h>
-#include <skas.h>
-#include <sysdep/tls.h>
-
-static inline int modify_ldt (int func, void *ptr, unsigned long bytecount)
-{
- return syscall(__NR_modify_ldt, func, ptr, bytecount);
-}
-
-static long write_ldt_entry(struct mm_id *mm_idp, int func,
- struct user_desc *desc, void **addr, int done)
-{
- long res;
- void *stub_addr;
-
- BUILD_BUG_ON(sizeof(*desc) % sizeof(long));
-
- res = syscall_stub_data(mm_idp, (unsigned long *)desc,
- sizeof(*desc) / sizeof(long),
- addr, &stub_addr);
- if (!res) {
- unsigned long args[] = { func,
- (unsigned long)stub_addr,
- sizeof(*desc),
- 0, 0, 0 };
- res = run_syscall_stub(mm_idp, __NR_modify_ldt, args,
- 0, addr, done);
- }
-
- return res;
-}
-
-/*
- * In skas mode, we hold our own ldt data in UML.
- * Thus, the code implementing sys_modify_ldt_skas
- * is very similar to (and mostly stolen from) sys_modify_ldt
- * for arch/i386/kernel/ldt.c
- * The routines copied and modified in part are:
- * - read_ldt
- * - read_default_ldt
- * - write_ldt
- * - sys_modify_ldt_skas
- */
-
-static int read_ldt(void __user * ptr, unsigned long bytecount)
-{
- int i, err = 0;
- unsigned long size;
- uml_ldt_t *ldt = &current->mm->context.arch.ldt;
-
- if (!ldt->entry_count)
- goto out;
- if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
- bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
- err = bytecount;
-
- mutex_lock(&ldt->lock);
- if (ldt->entry_count <= LDT_DIRECT_ENTRIES) {
- size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES;
- if (size > bytecount)
- size = bytecount;
- if (copy_to_user(ptr, ldt->u.entries, size))
- err = -EFAULT;
- bytecount -= size;
- ptr += size;
- }
- else {
- for (i=0; i<ldt->entry_count/LDT_ENTRIES_PER_PAGE && bytecount;
- i++) {
- size = PAGE_SIZE;
- if (size > bytecount)
- size = bytecount;
- if (copy_to_user(ptr, ldt->u.pages[i], size)) {
- err = -EFAULT;
- break;
- }
- bytecount -= size;
- ptr += size;
- }
- }
- mutex_unlock(&ldt->lock);
-
- if (bytecount == 0 || err == -EFAULT)
- goto out;
-
- if (clear_user(ptr, bytecount))
- err = -EFAULT;
-
-out:
- return err;
-}
-
-static int read_default_ldt(void __user * ptr, unsigned long bytecount)
-{
- int err;
-
- if (bytecount > 5*LDT_ENTRY_SIZE)
- bytecount = 5*LDT_ENTRY_SIZE;
-
- err = bytecount;
- /*
- * UML doesn't support lcall7 and lcall27.
- * So, we don't really have a default ldt, but emulate
- * an empty ldt of common host default ldt size.
- */
- if (clear_user(ptr, bytecount))
- err = -EFAULT;
-
- return err;
-}
-
-static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
-{
- uml_ldt_t *ldt = &current->mm->context.arch.ldt;
- struct mm_id * mm_idp = &current->mm->context.id;
- int i, err;
- struct user_desc ldt_info;
- struct ldt_entry entry0, *ldt_p;
- void *addr = NULL;
-
- err = -EINVAL;
- if (bytecount != sizeof(ldt_info))
- goto out;
- err = -EFAULT;
- if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
- goto out;
-
- err = -EINVAL;
- if (ldt_info.entry_number >= LDT_ENTRIES)
- goto out;
- if (ldt_info.contents == 3) {
- if (func == 1)
- goto out;
- if (ldt_info.seg_not_present == 0)
- goto out;
- }
-
- mutex_lock(&ldt->lock);
-
- err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1);
- if (err)
- goto out_unlock;
-
- if (ldt_info.entry_number >= ldt->entry_count &&
- ldt_info.entry_number >= LDT_DIRECT_ENTRIES) {
- for (i=ldt->entry_count/LDT_ENTRIES_PER_PAGE;
- i*LDT_ENTRIES_PER_PAGE <= ldt_info.entry_number;
- i++) {
- if (i == 0)
- memcpy(&entry0, ldt->u.entries,
- sizeof(entry0));
- ldt->u.pages[i] = (struct ldt_entry *)
- __get_free_page(GFP_KERNEL|__GFP_ZERO);
- if (!ldt->u.pages[i]) {
- err = -ENOMEM;
- /* Undo the change in host */
- memset(&ldt_info, 0, sizeof(ldt_info));
- write_ldt_entry(mm_idp, 1, &ldt_info, &addr, 1);
- goto out_unlock;
- }
- if (i == 0) {
- memcpy(ldt->u.pages[0], &entry0,
- sizeof(entry0));
- memcpy(ldt->u.pages[0]+1, ldt->u.entries+1,
- sizeof(entry0)*(LDT_DIRECT_ENTRIES-1));
- }
- ldt->entry_count = (i + 1) * LDT_ENTRIES_PER_PAGE;
- }
- }
- if (ldt->entry_count <= ldt_info.entry_number)
- ldt->entry_count = ldt_info.entry_number + 1;
-
- if (ldt->entry_count <= LDT_DIRECT_ENTRIES)
- ldt_p = ldt->u.entries + ldt_info.entry_number;
- else
- ldt_p = ldt->u.pages[ldt_info.entry_number/LDT_ENTRIES_PER_PAGE] +
- ldt_info.entry_number%LDT_ENTRIES_PER_PAGE;
-
- if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
- (func == 1 || LDT_empty(&ldt_info))) {
- ldt_p->a = 0;
- ldt_p->b = 0;
- }
- else{
- if (func == 1)
- ldt_info.useable = 0;
- ldt_p->a = LDT_entry_a(&ldt_info);
- ldt_p->b = LDT_entry_b(&ldt_info);
- }
- err = 0;
-
-out_unlock:
- mutex_unlock(&ldt->lock);
-out:
- return err;
-}
-
-static long do_modify_ldt_skas(int func, void __user *ptr,
- unsigned long bytecount)
-{
- int ret = -ENOSYS;
-
- switch (func) {
- case 0:
- ret = read_ldt(ptr, bytecount);
- break;
- case 1:
- case 0x11:
- ret = write_ldt(ptr, bytecount, func);
- break;
- case 2:
- ret = read_default_ldt(ptr, bytecount);
- break;
- }
- return ret;
-}
-
-static DEFINE_SPINLOCK(host_ldt_lock);
-static short dummy_list[9] = {0, -1};
-static short * host_ldt_entries = NULL;
-
-static void ldt_get_host_info(void)
-{
- long ret;
- struct ldt_entry * ldt;
- short *tmp;
- int i, size, k, order;
-
- spin_lock(&host_ldt_lock);
-
- if (host_ldt_entries != NULL) {
- spin_unlock(&host_ldt_lock);
- return;
- }
- host_ldt_entries = dummy_list+1;
-
- spin_unlock(&host_ldt_lock);
-
- for (i = LDT_PAGES_MAX-1, order=0; i; i>>=1, order++)
- ;
-
- ldt = (struct ldt_entry *)
- __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
- if (ldt == NULL) {
- printk(KERN_ERR "ldt_get_host_info: couldn't allocate buffer "
- "for host ldt\n");
- return;
- }
-
- ret = modify_ldt(0, ldt, (1<<order)*PAGE_SIZE);
- if (ret < 0) {
- printk(KERN_ERR "ldt_get_host_info: couldn't read host ldt\n");
- goto out_free;
- }
- if (ret == 0) {
- /* default_ldt is active, simply write an empty entry 0 */
- host_ldt_entries = dummy_list;
- goto out_free;
- }
-
- for (i=0, size=0; i<ret/LDT_ENTRY_SIZE; i++) {
- if (ldt[i].a != 0 || ldt[i].b != 0)
- size++;
- }
-
- if (size < ARRAY_SIZE(dummy_list))
- host_ldt_entries = dummy_list;
- else {
- size = (size + 1) * sizeof(dummy_list[0]);
- tmp = kmalloc(size, GFP_KERNEL);
- if (tmp == NULL) {
- printk(KERN_ERR "ldt_get_host_info: couldn't allocate "
- "host ldt list\n");
- goto out_free;
- }
- host_ldt_entries = tmp;
- }
-
- for (i=0, k=0; i<ret/LDT_ENTRY_SIZE; i++) {
- if (ldt[i].a != 0 || ldt[i].b != 0)
- host_ldt_entries[k++] = i;
- }
- host_ldt_entries[k] = -1;
-
-out_free:
- free_pages((unsigned long)ldt, order);
-}
-
-long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
-{
- struct user_desc desc;
- short * num_p;
- int i;
- long page, err=0;
- void *addr = NULL;
-
-
- mutex_init(&new_mm->arch.ldt.lock);
-
- if (!from_mm) {
- memset(&desc, 0, sizeof(desc));
- /*
- * Now we try to retrieve info about the ldt, we
- * inherited from the host. All ldt-entries found
- * will be reset in the following loop
- */
- ldt_get_host_info();
- for (num_p=host_ldt_entries; *num_p != -1; num_p++) {
- desc.entry_number = *num_p;
- err = write_ldt_entry(&new_mm->id, 1, &desc,
- &addr, *(num_p + 1) == -1);
- if (err)
- break;
- }
- new_mm->arch.ldt.entry_count = 0;
-
- goto out;
- }
-
- /*
- * Our local LDT is used to supply the data for
- * modify_ldt(READLDT), if PTRACE_LDT isn't available,
- * i.e., we have to use the stub for modify_ldt, which
- * can't handle the big read buffer of up to 64kB.
- */
- mutex_lock(&from_mm->arch.ldt.lock);
- if (from_mm->arch.ldt.entry_count <= LDT_DIRECT_ENTRIES)
- memcpy(new_mm->arch.ldt.u.entries, from_mm->arch.ldt.u.entries,
- sizeof(new_mm->arch.ldt.u.entries));
- else {
- i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
- while (i-->0) {
- page = __get_free_page(GFP_KERNEL|__GFP_ZERO);
- if (!page) {
- err = -ENOMEM;
- break;
- }
- new_mm->arch.ldt.u.pages[i] =
- (struct ldt_entry *) page;
- memcpy(new_mm->arch.ldt.u.pages[i],
- from_mm->arch.ldt.u.pages[i], PAGE_SIZE);
- }
- }
- new_mm->arch.ldt.entry_count = from_mm->arch.ldt.entry_count;
- mutex_unlock(&from_mm->arch.ldt.lock);
-
- out:
- return err;
-}
-
-
-void free_ldt(struct mm_context *mm)
-{
- int i;
-
- if (mm->arch.ldt.entry_count > LDT_DIRECT_ENTRIES) {
- i = mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
- while (i-- > 0)
- free_page((long) mm->arch.ldt.u.pages[i]);
- }
- mm->arch.ldt.entry_count = 0;
-}
-
-SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
- unsigned long , bytecount)
-{
- /* See non-um modify_ldt() for why we do this cast */
- return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
-}
diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h
index ce0ca46ad383..dc89f4423454 100644
--- a/arch/x86/um/shared/sysdep/stub.h
+++ b/arch/x86/um/shared/sysdep/stub.h
@@ -12,4 +12,4 @@
#endif
extern void stub_segv_handler(int, siginfo_t *, void *);
-extern void stub_clone_handler(void);
+extern void stub_syscall_handler(void);
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
index ea8b5a2d67af..0b44a86dd346 100644
--- a/arch/x86/um/shared/sysdep/stub_32.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -6,6 +6,7 @@
#ifndef __SYSDEP_STUB_H
#define __SYSDEP_STUB_H
+#include <stddef.h>
#include <asm/ptrace.h>
#include <generated/asm-offsets.h>
@@ -79,33 +80,31 @@ static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
return ret;
}
-static __always_inline void trap_myself(void)
+static __always_inline long stub_syscall6(long syscall, long arg1, long arg2,
+ long arg3, long arg4, long arg5,
+ long arg6)
{
- __asm("int3");
+ struct syscall_args {
+ int ebx, ebp;
+ } args = { arg1, arg6 };
+ long ret;
+
+ __asm__ volatile ("pushl %%ebp;"
+ "movl 0x4(%%ebx),%%ebp;"
+ "movl (%%ebx),%%ebx;"
+ "int $0x80;"
+ "popl %%ebp"
+ : "=a" (ret)
+ : "0" (syscall), "b" (&args),
+ "c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5)
+ : "memory");
+
+ return ret;
}
-static __always_inline void remap_stack_and_trap(void)
+static __always_inline void trap_myself(void)
{
- __asm__ volatile (
- "movl %%esp,%%ebx ;"
- "andl %0,%%ebx ;"
- "movl %1,%%eax ;"
- "movl %%ebx,%%edi ; addl %2,%%edi ; movl (%%edi),%%edi ;"
- "movl %%ebx,%%ebp ; addl %3,%%ebp ; movl (%%ebp),%%ebp ;"
- "int $0x80 ;"
- "addl %4,%%ebx ; movl %%eax, (%%ebx) ;"
- "int $3"
- : :
- "g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1)),
- "g" (STUB_MMAP_NR),
- "g" (UML_STUB_FIELD_FD),
- "g" (UML_STUB_FIELD_OFFSET),
- "g" (UML_STUB_FIELD_CHILD_ERR),
- "c" (STUB_DATA_PAGES * UM_KERN_PAGE_SIZE),
- "d" (PROT_READ | PROT_WRITE),
- "S" (MAP_FIXED | MAP_SHARED)
- :
- "memory");
+ __asm("int3");
}
static __always_inline void *get_stub_data(void)
diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h
index b24168ef0ac4..67f44284f1aa 100644
--- a/arch/x86/um/shared/sysdep/stub_64.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -6,6 +6,7 @@
#ifndef __SYSDEP_STUB_H
#define __SYSDEP_STUB_H
+#include <stddef.h>
#include <sysdep/ptrace_user.h>
#include <generated/asm-offsets.h>
#include <linux/stddef.h>
@@ -79,35 +80,25 @@ static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
return ret;
}
-static __always_inline void trap_myself(void)
+static __always_inline long stub_syscall6(long syscall, long arg1, long arg2,
+ long arg3, long arg4, long arg5,
+ long arg6)
{
- __asm("int3");
+ long ret;
+
+ __asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; movq %7,%%r9 ; "
+ __syscall
+ : "=a" (ret)
+ : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3),
+ "g" (arg4), "g" (arg5), "g" (arg6)
+ : __syscall_clobber, "r10", "r8", "r9");
+
+ return ret;
}
-static __always_inline void remap_stack_and_trap(void)
+static __always_inline void trap_myself(void)
{
- __asm__ volatile (
- "movq %0,%%rax ;"
- "movq %%rsp,%%rdi ;"
- "andq %1,%%rdi ;"
- "movq %2,%%r10 ;"
- "movq %%rdi,%%r8 ; addq %3,%%r8 ; movq (%%r8),%%r8 ;"
- "movq %%rdi,%%r9 ; addq %4,%%r9 ; movq (%%r9),%%r9 ;"
- __syscall ";"
- "movq %%rsp,%%rdi ; andq %1,%%rdi ;"
- "addq %5,%%rdi ; movq %%rax, (%%rdi) ;"
- "int3"
- : :
- "g" (STUB_MMAP_NR),
- "g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1)),
- "g" (MAP_FIXED | MAP_SHARED),
- "g" (UML_STUB_FIELD_FD),
- "g" (UML_STUB_FIELD_OFFSET),
- "g" (UML_STUB_FIELD_CHILD_ERR),
- "S" (STUB_DATA_PAGES * UM_KERN_PAGE_SIZE),
- "d" (PROT_READ | PROT_WRITE)
- :
- __syscall_clobber, "r10", "r8", "r9");
+ __asm("int3");
}
static __always_inline void *get_stub_data(void)
diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S
deleted file mode 100644
index 8291899e6aaf..000000000000
--- a/arch/x86/um/stub_32.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <as-layout.h>
-
-.section .__syscall_stub, "ax"
-
- .globl batch_syscall_stub
-batch_syscall_stub:
- /* %esp comes in as "top of page" */
- mov %esp, %ecx
- /* %esp has pointer to first operation */
- add $8, %esp
-again:
- /* load length of additional data */
- mov 0x0(%esp), %eax
-
- /* if(length == 0) : end of list */
- /* write possible 0 to header */
- mov %eax, 0x4(%ecx)
- cmpl $0, %eax
- jz done
-
- /* save current pointer */
- mov %esp, 0x4(%ecx)
-
- /* skip additional data */
- add %eax, %esp
-
- /* load syscall-# */
- pop %eax
-
- /* load syscall params */
- pop %ebx
- pop %ecx
- pop %edx
- pop %esi
- pop %edi
- pop %ebp
-
- /* execute syscall */
- int $0x80
-
- /* restore top of page pointer in %ecx */
- mov %esp, %ecx
- andl $(~UM_KERN_PAGE_SIZE) + 1, %ecx
-
- /* check return value */
- pop %ebx
- cmp %ebx, %eax
- je again
-
-done:
- /* save return value */
- mov %eax, (%ecx)
-
- /* stop */
- int3
diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S
deleted file mode 100644
index f3404640197a..000000000000
--- a/arch/x86/um/stub_64.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <as-layout.h>
-
-.section .__syscall_stub, "ax"
- .globl batch_syscall_stub
-batch_syscall_stub:
- /* %rsp has the pointer to first operation */
- mov %rsp, %rbx
- add $0x10, %rsp
-again:
- /* load length of additional data */
- mov 0x0(%rsp), %rax
-
- /* if(length == 0) : end of list */
- /* write possible 0 to header */
- mov %rax, 8(%rbx)
- cmp $0, %rax
- jz done
-
- /* save current pointer */
- mov %rsp, 8(%rbx)
-
- /* skip additional data */
- add %rax, %rsp
-
- /* load syscall-# */
- pop %rax
-
- /* load syscall params */
- pop %rdi
- pop %rsi
- pop %rdx
- pop %r10
- pop %r8
- pop %r9
-
- /* execute syscall */
- syscall
-
- /* check return value */
- pop %rcx
- cmp %rcx, %rax
- je again
-
-done:
- /* save return value */
- mov %rax, (%rbx)
-
- /* stop */
- int3
diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
index d301deee041f..fbb129023080 100644
--- a/arch/x86/um/tls_32.c
+++ b/arch/x86/um/tls_32.c
@@ -11,6 +11,7 @@
#include <os.h>
#include <skas.h>
#include <sysdep/tls.h>
+#include <asm/desc.h>
/*
* If needed we can detect when it's uninitialized.
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 27a2a02ef8fb..728a4366ca85 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -9,6 +9,7 @@
#include <asm/io_apic.h>
#include <asm/hypervisor.h>
#include <asm/e820/api.h>
+#include <asm/setup.h>
#include <xen/xen.h>
#include <asm/xen/interface.h>
@@ -27,54 +28,6 @@
bool __ro_after_init xen_pvh;
EXPORT_SYMBOL_GPL(xen_pvh);
-void __init xen_pvh_init(struct boot_params *boot_params)
-{
- u32 msr;
- u64 pfn;
-
- xen_pvh = 1;
- xen_domain_type = XEN_HVM_DOMAIN;
- xen_start_flags = pvh_start_info.flags;
-
- msr = cpuid_ebx(xen_cpuid_base() + 2);
- pfn = __pa(hypercall_page);
- wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
-
- if (xen_initial_domain())
- x86_init.oem.arch_setup = xen_add_preferred_consoles;
- x86_init.oem.banner = xen_banner;
-
- xen_efi_init(boot_params);
-
- if (xen_initial_domain()) {
- struct xen_platform_op op = {
- .cmd = XENPF_get_dom0_console,
- };
- int ret = HYPERVISOR_platform_op(&op);
-
- if (ret > 0)
- xen_init_vga(&op.u.dom0_console,
- min(ret * sizeof(char),
- sizeof(op.u.dom0_console)),
- &boot_params->screen_info);
- }
-}
-
-void __init mem_map_via_hcall(struct boot_params *boot_params_p)
-{
- struct xen_memory_map memmap;
- int rc;
-
- memmap.nr_entries = ARRAY_SIZE(boot_params_p->e820_table);
- set_xen_guest_handle(memmap.buffer, boot_params_p->e820_table);
- rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
- if (rc) {
- xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
- BUG();
- }
- boot_params_p->e820_entries = memmap.nr_entries;
-}
-
/*
* Reserve e820 UNUSABLE regions to inflate the memory balloon.
*
@@ -89,8 +42,9 @@ void __init mem_map_via_hcall(struct boot_params *boot_params_p)
* hypervisor should notify us which memory ranges are suitable for creating
* foreign mappings, but that's not yet implemented.
*/
-void __init xen_reserve_extra_memory(struct boot_params *bootp)
+static void __init pvh_reserve_extra_memory(void)
{
+ struct boot_params *bootp = &boot_params;
unsigned int i, ram_pages = 0, extra_pages;
for (i = 0; i < bootp->e820_entries; i++) {
@@ -141,3 +95,58 @@ void __init xen_reserve_extra_memory(struct boot_params *bootp)
xen_add_extra_mem(PFN_UP(e->addr), pages);
}
}
+
+static void __init pvh_arch_setup(void)
+{
+ pvh_reserve_extra_memory();
+
+ if (xen_initial_domain())
+ xen_add_preferred_consoles();
+}
+
+void __init xen_pvh_init(struct boot_params *boot_params)
+{
+ u32 msr;
+ u64 pfn;
+
+ xen_pvh = 1;
+ xen_domain_type = XEN_HVM_DOMAIN;
+ xen_start_flags = pvh_start_info.flags;
+
+ msr = cpuid_ebx(xen_cpuid_base() + 2);
+ pfn = __pa(hypercall_page);
+ wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+
+ x86_init.oem.arch_setup = pvh_arch_setup;
+ x86_init.oem.banner = xen_banner;
+
+ xen_efi_init(boot_params);
+
+ if (xen_initial_domain()) {
+ struct xen_platform_op op = {
+ .cmd = XENPF_get_dom0_console,
+ };
+ int ret = HYPERVISOR_platform_op(&op);
+
+ if (ret > 0)
+ xen_init_vga(&op.u.dom0_console,
+ min(ret * sizeof(char),
+ sizeof(op.u.dom0_console)),
+ &boot_params->screen_info);
+ }
+}
+
+void __init mem_map_via_hcall(struct boot_params *boot_params_p)
+{
+ struct xen_memory_map memmap;
+ int rc;
+
+ memmap.nr_entries = ARRAY_SIZE(boot_params_p->e820_table);
+ set_xen_guest_handle(memmap.buffer, boot_params_p->e820_table);
+ rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+ if (rc) {
+ xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
+ BUG();
+ }
+ boot_params_p->e820_entries = memmap.nr_entries;
+}
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index d4cefd8a9af4..10c660fae8b3 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -54,8 +54,9 @@ struct mc_debug_data {
static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
static struct mc_debug_data mc_debug_data_early __initdata;
-static struct mc_debug_data __percpu *mc_debug_data __refdata =
+static DEFINE_PER_CPU(struct mc_debug_data *, mc_debug_data) =
&mc_debug_data_early;
+static struct mc_debug_data __percpu *mc_debug_data_ptr;
DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
static struct static_key mc_debug __ro_after_init;
@@ -70,16 +71,20 @@ static int __init xen_parse_mc_debug(char *arg)
}
early_param("xen_mc_debug", xen_parse_mc_debug);
+void mc_percpu_init(unsigned int cpu)
+{
+ per_cpu(mc_debug_data, cpu) = per_cpu_ptr(mc_debug_data_ptr, cpu);
+}
+
static int __init mc_debug_enable(void)
{
- struct mc_debug_data __percpu *mcdb;
unsigned long flags;
if (!mc_debug_enabled)
return 0;
- mcdb = alloc_percpu(struct mc_debug_data);
- if (!mcdb) {
+ mc_debug_data_ptr = alloc_percpu(struct mc_debug_data);
+ if (!mc_debug_data_ptr) {
pr_err("xen_mc_debug inactive\n");
static_key_slow_dec(&mc_debug);
return -ENOMEM;
@@ -88,7 +93,7 @@ static int __init mc_debug_enable(void)
/* Be careful when switching to percpu debug data. */
local_irq_save(flags);
xen_mc_flush();
- mc_debug_data = mcdb;
+ mc_percpu_init(0);
local_irq_restore(flags);
pr_info("xen_mc_debug active\n");
@@ -150,7 +155,7 @@ void xen_mc_flush(void)
trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx);
if (static_key_false(&mc_debug)) {
- mcdb = this_cpu_ptr(mc_debug_data);
+ mcdb = __this_cpu_read(mc_debug_data);
memcpy(mcdb->entries, b->entries,
b->mcidx * sizeof(struct multicall_entry));
}
@@ -230,7 +235,7 @@ struct multicall_space __xen_mc_entry(size_t args)
ret.mc = &b->entries[b->mcidx];
if (static_key_false(&mc_debug)) {
- struct mc_debug_data *mcdb = this_cpu_ptr(mc_debug_data);
+ struct mc_debug_data *mcdb = __this_cpu_read(mc_debug_data);
mcdb->caller[b->mcidx] = __builtin_return_address(0);
mcdb->argsz[b->mcidx] = args;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a0c3e77e3d5b..806ddb2391d9 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -690,6 +690,7 @@ char * __init xen_memory_setup(void)
struct xen_memory_map memmap;
unsigned long max_pages;
unsigned long extra_pages = 0;
+ unsigned long maxmem_pages;
int i;
int op;
@@ -761,8 +762,8 @@ char * __init xen_memory_setup(void)
* Make sure we have no memory above max_pages, as this area
* isn't handled by the p2m management.
*/
- extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
- extra_pages, max_pages - max_pfn);
+ maxmem_pages = EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM));
+ extra_pages = min3(maxmem_pages, extra_pages, max_pages - max_pfn);
i = 0;
addr = xen_e820_table.entries[0].addr;
size = xen_e820_table.entries[0].size;
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 7ea57f728b89..6863d3da7dec 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -305,6 +305,7 @@ static int xen_pv_kick_ap(unsigned int cpu, struct task_struct *idle)
return rc;
xen_pmu_init(cpu);
+ mc_percpu_init(cpu);
/*
* Why is this a BUG? If the hypercall fails then everything can be
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index e7775dff9452..0cf16fc79e0b 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -257,6 +257,9 @@ void xen_mc_callback(void (*fn)(void *), void *data);
*/
struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size);
+/* Do percpu data initialization for multicalls. */
+void mc_percpu_init(unsigned int cpu);
+
extern bool is_xen_pmu;
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);