25 files changed, 978 insertions, 352 deletions
diff --git a/arch/s390/include/asm/access-regs.h b/arch/s390/include/asm/access-regs.h
new file mode 100644
index 000000000000..1a6412d9f5ad
--- /dev/null
+++ b/arch/s390/include/asm/access-regs.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2024
+ */
+
+#ifndef __ASM_S390_ACCESS_REGS_H
+#define __ASM_S390_ACCESS_REGS_H
+
+#include <linux/instrumented.h>
+#include <asm/sigcontext.h>
+
+struct access_regs {
+	unsigned int regs[NUM_ACRS];
+};
+
+static inline void save_access_regs(unsigned int *acrs)
+{
+	struct access_regs *regs = (struct access_regs *)acrs;
+
+	instrument_write(regs, sizeof(*regs));
+	asm volatile("stamy	0,15,%[regs]"
+		     : [regs] "=QS" (*regs)
+		     :
+		     : "memory");
+}
+
+static inline void restore_access_regs(unsigned int *acrs)
+{
+	struct access_regs *regs = (struct access_regs *)acrs;
+
+	instrument_read(regs, sizeof(*regs));
+	asm volatile("lamy	0,15,%[regs]"
+		     :
+		     : [regs] "QS" (*regs)
+		     : "memory");
+}
+
+#endif /* __ASM_S390_ACCESS_REGS_H */
diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h
index f2240392c708..a92ebbc7aa7a 100644
--- a/arch/s390/include/asm/appldata.h
+++ b/arch/s390/include/asm/appldata.h
@@ -54,13 +54,13 @@ static inline int appldata_asm(struct appldata_parameter_list *parm_list,
 	parm_list->function = fn;
 	parm_list->parlist_length = sizeof(*parm_list);
 	parm_list->buffer_length = length;
-	parm_list->product_id_addr = (unsigned long) id;
+	parm_list->product_id_addr = virt_to_phys(id);
 	parm_list->buffer_addr = virt_to_phys(buffer);
 	diag_stat_inc(DIAG_STAT_X0DC);
 	asm volatile(
 		"	diag	%1,%0,0xdc"
 		: "=d" (ry)
-		: "d" (parm_list), "m" (*parm_list), "m" (*id)
+		: "d" (virt_to_phys(parm_list)), "m" (*parm_list), "m" (*id)
 		: "cc");
 	return ry;
 }
diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h
index a873e873e1ee..56096ae26f29 100644
--- a/arch/s390/include/asm/asm-prototypes.h
+++ b/arch/s390/include/asm/asm-prototypes.h
@@ -3,7 +3,7 @@
 
 #include <linux/kvm_host.h>
 #include <linux/ftrace.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
 #include <asm-generic/asm-prototypes.h>
 
 __int128_t __ashlti3(__int128_t a, int b);
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index aebe1e22c7be..c500d45fb465 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -14,7 +14,7 @@
 		".section .rodata.str,\"aMS\",@progbits,1\n"	\
 		"1:	.asciz	\""__FILE__"\"\n"		\
 		".previous\n"					\
-		".section __bug_table,\"awM\",@progbits,%2\n"	\
+		".section __bug_table,\"aw\"\n"			\
 		"2:	.long	0b-.\n"				\
 		"	.long	1b-.\n"				\
 		"	.short	%0,%1\n"			\
@@ -30,7 +30,7 @@
 #define __EMIT_BUG(x) do {					\
 	asm_inline volatile(					\
 		"0:	mc	0,0\n"				\
-		".section __bug_table,\"awM\",@progbits,%1\n"	\
+		".section __bug_table,\"aw\"\n"			\
 		"1:	.long	0b-.\n"				\
 		"	.short	%0\n"				\
 		"	.org	1b+%1\n"			\
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index 69837eec2ff5..b89159591ca0 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -12,36 +12,29 @@
 #ifndef _S390_CHECKSUM_H
 #define _S390_CHECKSUM_H
 
-#include <linux/kasan-checks.h>
+#include <linux/instrumented.h>
 #include <linux/in6.h>
 
-/*
- * Computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit).
- *
- * Returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic.
- *
- * This function must be called with even lengths, except
- * for the last fragment, which may be odd.
- *
- * It's best to have buff aligned on a 32-bit boundary.
- */
-static inline __wsum csum_partial(const void *buff, int len, __wsum sum)
+static inline __wsum cksm(const void *buff, int len, __wsum sum)
 {
 	union register_pair rp = {
-		.even = (unsigned long) buff,
-		.odd = (unsigned long) len,
+		.even = (unsigned long)buff,
+		.odd = (unsigned long)len,
 	};
 
-	kasan_check_read(buff, len);
-	asm volatile(
+	instrument_read(buff, len);
+	asm volatile("\n"
 		"0:	cksm	%[sum],%[rp]\n"
 		"	jo	0b\n"
 		: [sum] "+&d" (sum), [rp] "+&d" (rp.pair) : : "cc", "memory");
 	return sum;
 }
 
+__wsum csum_partial(const void *buff, int len, __wsum sum);
+
+#define _HAVE_ARCH_CSUM_AND_COPY
+__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
+
 /*
  * Fold a partial checksum without adding pseudo headers.
  */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index bed804137537..20b94220113b 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -44,6 +44,13 @@ enum diag_stat_enum {
 void diag_stat_inc(enum diag_stat_enum nr);
 void diag_stat_inc_norecursion(enum diag_stat_enum nr);
 
+struct hypfs_diag0c_entry;
+
+/*
+ * Diagnose 0c: Pseudo Timer
+ */
+void diag0c(struct hypfs_diag0c_entry *data);
+
 /*
  * Diagnose 10: Release page range
  */
@@ -331,10 +338,10 @@ struct hypfs_diag0c_entry;
  */
 struct diag_ops {
 	int (*diag210)(struct diag210 *addr);
-	int (*diag26c)(void *req, void *resp, enum diag26c_sc subcode);
+	int (*diag26c)(unsigned long rx, unsigned long rx1, enum diag26c_sc subcode);
 	int (*diag14)(unsigned long rx, unsigned long ry1, unsigned long subcode);
 	int (*diag8c)(struct diag8c *addr, struct ccw_dev_id *devno, size_t len);
-	void (*diag0c)(struct hypfs_diag0c_entry *entry);
+	void (*diag0c)(unsigned long rx);
 	void (*diag308_reset)(void);
 };
 
@@ -342,9 +349,9 @@ extern struct diag_ops diag_amode31_ops;
 extern struct diag210 *__diag210_tmp_amode31;
 
 int _diag210_amode31(struct diag210 *addr);
-int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode);
+int _diag26c_amode31(unsigned long rx, unsigned long rx1, enum diag26c_sc subcode);
 int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode);
-void _diag0c_amode31(struct hypfs_diag0c_entry *entry);
+void _diag0c_amode31(unsigned long rx);
 void _diag308_reset_amode31(void);
 int _diag8c_amode31(struct diag8c *addr, struct ccw_dev_id *devno, size_t len);
 
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index fdd319a622b0..7f5004065e8a 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -8,7 +8,7 @@
 #include <linux/processor.h>
 #include <linux/uaccess.h>
 #include <asm/timex.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
 #include <asm/pai.h>
 
 #define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP)
@@ -41,8 +41,7 @@ static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
 
 static __always_inline void arch_exit_to_user_mode(void)
 {
-	if (test_cpu_flag(CIF_FPU))
-		__load_fpu_regs();
+	load_user_fpu_regs();
 
 	if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
 		debug_user_asce(1);
diff --git a/arch/s390/include/asm/vx-insn-asm.h b/arch/s390/include/asm/fpu-insn-asm.h
index 360f8b36d962..02ccfe46050a 100644
--- a/arch/s390/include/asm/vx-insn-asm.h
+++ b/arch/s390/include/asm/fpu-insn-asm.h
@@ -9,11 +9,11 @@
  * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  */
 
-#ifndef __ASM_S390_VX_INSN_INTERNAL_H
-#define __ASM_S390_VX_INSN_INTERNAL_H
+#ifndef __ASM_S390_FPU_INSN_ASM_H
+#define __ASM_S390_FPU_INSN_ASM_H
 
-#ifndef __ASM_S390_VX_INSN_H
-#error only <asm/vx-insn.h> can be included directly
+#ifndef __ASM_S390_FPU_INSN_H
+#error only <asm/fpu-insn.h> can be included directly
 #endif
 
 #ifdef __ASSEMBLY__
@@ -195,10 +195,26 @@
 /* RXB - Compute most significant bit used vector registers
  *
  * @rxb:	Operand to store computed RXB value
- * @v1:		First vector register designated operand
- * @v2:		Second vector register designated operand
- * @v3:		Third vector register designated operand
- * @v4:		Fourth vector register designated operand
+ * @v1:		Vector register designated operand whose MSB is stored in
+ *		RXB bit 0 (instruction bit 36) and whose remaining bits
+ *		are stored in instruction bits 8-11.
+ * @v2:		Vector register designated operand whose MSB is stored in
+ *		RXB bit 1 (instruction bit 37) and whose remaining bits
+ *		are stored in instruction bits 12-15.
+ * @v3:		Vector register designated operand whose MSB is stored in
+ *		RXB bit 2 (instruction bit 38) and whose remaining bits
+ *		are stored in instruction bits 16-19.
+ * @v4:		Vector register designated operand whose MSB is stored in
+ *		RXB bit 3 (instruction bit 39) and whose remaining bits
+ *		are stored in instruction bits 32-35.
+ *
+ * Note: In most vector instruction formats [1] V1, V2, V3, and V4 directly
+ * correspond to @v1, @v2, @v3, and @v4. But there are exceptions, such as but
+ * not limited to the vector instruction formats VRR-g, VRR-h, VRS-a, VRS-d,
+ * and VSI.
+ *
+ * [1] IBM z/Architecture Principles of Operation, chapter "Program
+ * Execution, section "Instructions", subsection "Instruction Formats".
  */
 .macro	RXB	rxb v1 v2=0 v3=0 v4=0
 	\rxb = 0
@@ -223,6 +239,9 @@
  * @v2:		Second vector register designated operand (for RXB)
  * @v3:		Third vector register designated operand (for RXB)
  * @v4:		Fourth vector register designated operand (for RXB)
+ *
+ * Note: For @v1, @v2, @v3, and @v4 also refer to the RXB macro
+ * description for further details.
  */
 .macro	MRXB	m v1 v2=0 v3=0 v4=0
 	rxb = 0
@@ -238,6 +257,9 @@
  * @v2:		Second vector register designated operand (for RXB)
  * @v3:		Third vector register designated operand (for RXB)
  * @v4:		Fourth vector register designated operand (for RXB)
+ *
+ * Note: For @v1, @v2, @v3, and @v4 also refer to the RXB macro
+ * description for further details.
  */
 .macro	MRXBOPC	m opc v1 v2=0 v3=0 v4=0
 	MRXB	\m, \v1, \v2, \v3, \v4
@@ -350,7 +372,7 @@
 	VX_NUM	v3, \vr
 	.word	0xE700 | (r1 << 4) | (v3&15)
 	.word	(b2 << 12) | (\disp)
-	MRXBOPC	\m, 0x21, v3
+	MRXBOPC	\m, 0x21, 0, v3
 .endm
 .macro	VLGVB	gr, vr, disp, base="%r0"
 	VLGV	\gr, \vr, \disp, \base, 0
@@ -499,6 +521,25 @@
 	VMRL	\vr1, \vr2, \vr3, 3
 .endm
 
+/* VECTOR LOAD WITH LENGTH */
+.macro VLL	v, gr, disp, base
+	VX_NUM	v1, \v
+	GR_NUM	b2, \base
+	GR_NUM	r3, \gr
+	.word	0xE700 | ((v1&15) << 4) | r3
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC 0, 0x37, v1
+.endm
+
+/* VECTOR STORE WITH LENGTH */
+.macro VSTL	v, gr, disp, base
+	VX_NUM	v1, \v
+	GR_NUM	b2, \base
+	GR_NUM	r3, \gr
+	.word	0xE700 | ((v1&15) << 4) | r3
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC 0, 0x3f, v1
+.endm
 
 /* Vector integer instructions */
 
@@ -512,6 +553,16 @@
 	MRXBOPC	0, 0x68, v1, v2, v3
 .endm
 
+/* VECTOR CHECKSUM */
+.macro VCKSM	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC 0, 0x66, v1, v2, v3
+.endm
+
 /* VECTOR EXCLUSIVE OR */
 .macro	VX	vr1, vr2, vr3
 	VX_NUM	v1, \vr1
@@ -678,4 +729,4 @@
 .endm
 
 #endif	/* __ASSEMBLY__ */
-#endif	/* __ASM_S390_VX_INSN_INTERNAL_H */
+#endif	/* __ASM_S390_FPU_INSN_ASM_H */
diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h
new file mode 100644
index 000000000000..c1e2e521d9af
--- /dev/null
+++ b/arch/s390/include/asm/fpu-insn.h
@@ -0,0 +1,486 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for Floating Point and Vector Instructions
+ *
+ */
+
+#ifndef __ASM_S390_FPU_INSN_H
+#define __ASM_S390_FPU_INSN_H
+
+#include <asm/fpu-insn-asm.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/instrumented.h>
+#include <asm/asm-extable.h>
+
+asm(".include \"asm/fpu-insn-asm.h\"\n");
+
+/*
+ * Various small helper functions, which can and should be used within
+ * kernel fpu code sections. Each function represents only one floating
+ * point or vector instruction (except for helper functions which require
+ * exception handling).
+ *
+ * This allows to use floating point and vector instructions like C
+ * functions, which has the advantage that all supporting code, like
+ * e.g. loops, can be written in easy to read C code.
+ *
+ * Each of the helper functions provides support for code instrumentation,
+ * like e.g. KASAN. Therefore instrumentation is also covered automatically
+ * when using these functions.
+ *
+ * In order to ensure that code generated with the helper functions stays
+ * within kernel fpu sections, which are guarded with kernel_fpu_begin()
+ * and kernel_fpu_end() calls, each function has a mandatory "memory"
+ * barrier.
+ */
+
+static __always_inline void fpu_cefbr(u8 f1, s32 val)
+{
+	asm volatile("cefbr	%[f1],%[val]\n"
+		     :
+		     : [f1] "I" (f1), [val] "d" (val)
+		     : "memory");
+}
+
+static __always_inline unsigned long fpu_cgebr(u8 f2, u8 mode)
+{
+	unsigned long val;
+
+	asm volatile("cgebr	%[val],%[mode],%[f2]\n"
+		     : [val] "=d" (val)
+		     : [f2] "I" (f2), [mode] "I" (mode)
+		     : "memory");
+	return val;
+}
+
+static __always_inline void fpu_debr(u8 f1, u8 f2)
+{
+	asm volatile("debr	%[f1],%[f2]\n"
+		     :
+		     : [f1] "I" (f1), [f2] "I" (f2)
+		     : "memory");
+}
+
+static __always_inline void fpu_ld(unsigned short fpr, freg_t *reg)
+{
+	instrument_read(reg, sizeof(*reg));
+	asm volatile("ld	 %[fpr],%[reg]\n"
+		     :
+		     : [fpr] "I" (fpr), [reg] "Q" (reg->ui)
+		     : "memory");
+}
+
+static __always_inline void fpu_ldgr(u8 f1, u32 val)
+{
+	asm volatile("ldgr	%[f1],%[val]\n"
+		     :
+		     : [f1] "I" (f1), [val] "d" (val)
+		     : "memory");
+}
+
+static __always_inline void fpu_lfpc(unsigned int *fpc)
+{
+	instrument_read(fpc, sizeof(*fpc));
+	asm volatile("lfpc	%[fpc]"
+		     :
+		     : [fpc] "Q" (*fpc)
+		     : "memory");
+}
+
+/**
+ * fpu_lfpc_safe - Load floating point control register safely.
+ * @fpc: new value for floating point control register
+ *
+ * Load floating point control register. This may lead to an exception,
+ * since a saved value may have been modified by user space (ptrace,
+ * signal return, kvm registers) to an invalid value. In such a case
+ * set the floating point control register to zero.
+ */
+static inline void fpu_lfpc_safe(unsigned int *fpc)
+{
+	u32 tmp;
+
+	instrument_read(fpc, sizeof(*fpc));
+	asm volatile("\n"
+		"0:	lfpc	%[fpc]\n"
+		"1:	nopr	%%r7\n"
+		".pushsection .fixup, \"ax\"\n"
+		"2:	lghi	%[tmp],0\n"
+		"	sfpc	%[tmp]\n"
+		"	jg	1b\n"
+		".popsection\n"
+		EX_TABLE(1b, 2b)
+		: [tmp] "=d" (tmp)
+		: [fpc] "Q" (*fpc)
+		: "memory");
+}
+
+static __always_inline void fpu_std(unsigned short fpr, freg_t *reg)
+{
+	instrument_write(reg, sizeof(*reg));
+	asm volatile("std	 %[fpr],%[reg]\n"
+		     : [reg] "=Q" (reg->ui)
+		     : [fpr] "I" (fpr)
+		     : "memory");
+}
+
+static __always_inline void fpu_sfpc(unsigned int fpc)
+{
+	asm volatile("sfpc	%[fpc]"
+		     :
+		     : [fpc] "d" (fpc)
+		     : "memory");
+}
+
+static __always_inline void fpu_stfpc(unsigned int *fpc)
+{
+	instrument_write(fpc, sizeof(*fpc));
+	asm volatile("stfpc	%[fpc]"
+		     : [fpc] "=Q" (*fpc)
+		     :
+		     : "memory");
+}
+
+static __always_inline void fpu_vab(u8 v1, u8 v2, u8 v3)
+{
+	asm volatile("VAB	%[v1],%[v2],%[v3]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+		     : "memory");
+}
+
+static __always_inline void fpu_vcksm(u8 v1, u8 v2, u8 v3)
+{
+	asm volatile("VCKSM	%[v1],%[v2],%[v3]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+		     : "memory");
+}
+
+static __always_inline void fpu_vesravb(u8 v1, u8 v2, u8 v3)
+{
+	asm volatile("VESRAVB	%[v1],%[v2],%[v3]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+		     : "memory");
+}
+
+static __always_inline void fpu_vgfmag(u8 v1, u8 v2, u8 v3, u8 v4)
+{
+	asm volatile("VGFMAG	%[v1],%[v2],%[v3],%[v4]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3), [v4] "I" (v4)
+		     : "memory");
+}
+
+static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3)
+{
+	asm volatile("VGFMG	%[v1],%[v2],%[v3]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+		     : "memory");
+}
+
+#ifdef CONFIG_CC_IS_CLANG
+
+static __always_inline void fpu_vl(u8 v1, const void *vxr)
+{
+	instrument_read(vxr, sizeof(__vector128));
+	asm volatile("\n"
+		"	la	1,%[vxr]\n"
+		"	VL	%[v1],0,,1\n"
+		:
+		: [vxr] "R" (*(__vector128 *)vxr),
+		  [v1] "I" (v1)
+		: "memory", "1");
+}
+
+#else /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vl(u8 v1, const void *vxr)
+{
+	instrument_read(vxr, sizeof(__vector128));
+	asm volatile("VL	%[v1],%O[vxr],,%R[vxr]\n"
+		     :
+		     : [vxr] "Q" (*(__vector128 *)vxr),
+		       [v1] "I" (v1)
+		     : "memory");
+}
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vleib(u8 v, s16 val, u8 index)
+{
+	asm volatile("VLEIB	%[v],%[val],%[index]"
+		     :
+		     : [v] "I" (v), [val] "K" (val), [index] "I" (index)
+		     : "memory");
+}
+
+static __always_inline void fpu_vleig(u8 v, s16 val, u8 index)
+{
+	asm volatile("VLEIG	%[v],%[val],%[index]"
+		     :
+		     : [v] "I" (v), [val] "K" (val), [index] "I" (index)
+		     : "memory");
+}
+
+static __always_inline u64 fpu_vlgvf(u8 v, u16 index)
+{
+	u64 val;
+
+	asm volatile("VLGVF	%[val],%[v],%[index]"
+		     : [val] "=d" (val)
+		     : [v] "I" (v), [index] "L" (index)
+		     : "memory");
+	return val;
+}
+
+#ifdef CONFIG_CC_IS_CLANG
+
+static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
+{
+	unsigned int size;
+
+	size = min(index + 1, sizeof(__vector128));
+	instrument_read(vxr, size);
+	asm volatile("\n"
+		"	la	1,%[vxr]\n"
+		"	VLL	%[v1],%[index],0,1\n"
+		:
+		: [vxr] "R" (*(u8 *)vxr),
+		  [index] "d" (index),
+		  [v1] "I" (v1)
+		: "memory", "1");
+}
+
+#else /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
+{
+	unsigned int size;
+
+	size = min(index + 1, sizeof(__vector128));
+	instrument_read(vxr, size);
+	asm volatile("VLL	%[v1],%[index],%O[vxr],%R[vxr]\n"
+		     :
+		     : [vxr] "Q" (*(u8 *)vxr),
+		       [index] "d" (index),
+		       [v1] "I" (v1)
+		     : "memory");
+}
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+#ifdef CONFIG_CC_IS_CLANG
+
+#define fpu_vlm(_v1, _v3, _vxrs)					\
+({									\
+	unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128);	\
+	struct {							\
+		__vector128 _v[(_v3) - (_v1) + 1];			\
+	} *_v = (void *)(_vxrs);					\
+									\
+	instrument_read(_v, size);					\
+	asm volatile("\n"						\
+		"	la	1,%[vxrs]\n"				\
+		"	VLM	%[v1],%[v3],0,1\n"			\
+		:							\
+		: [vxrs] "R" (*_v),					\
+		  [v1] "I" (_v1), [v3] "I" (_v3)			\
+		: "memory", "1");					\
+	(_v3) - (_v1) + 1;						\
+})
+
+#else /* CONFIG_CC_IS_CLANG */
+
+#define fpu_vlm(_v1, _v3, _vxrs)					\
+({									\
+	unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128);	\
+	struct {							\
+		__vector128 _v[(_v3) - (_v1) + 1];			\
+	} *_v = (void *)(_vxrs);					\
+									\
+	instrument_read(_v, size);					\
+	asm volatile("VLM	%[v1],%[v3],%O[vxrs],%R[vxrs]\n"	\
+		     :							\
+		     : [vxrs] "Q" (*_v),				\
+		       [v1] "I" (_v1), [v3] "I" (_v3)			\
+		     : "memory");					\
+	(_v3) - (_v1) + 1;						\
+})
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vlr(u8 v1, u8 v2)
+{
+	asm volatile("VLR	%[v1],%[v2]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2)
+		     : "memory");
+}
+
+static __always_inline void fpu_vlvgf(u8 v, u32 val, u16 index)
+{
+	asm volatile("VLVGF	%[v],%[val],%[index]"
+		     :
+		     : [v] "I" (v), [val] "d" (val), [index] "L" (index)
+		     : "memory");
+}
+
+static __always_inline void fpu_vn(u8 v1, u8 v2, u8 v3)
+{
+	asm volatile("VN	%[v1],%[v2],%[v3]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+		     : "memory");
+}
+
+static __always_inline void fpu_vperm(u8 v1, u8 v2, u8 v3, u8 v4)
+{
+	asm volatile("VPERM	%[v1],%[v2],%[v3],%[v4]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3), [v4] "I" (v4)
+		     : "memory");
+}
+
+static __always_inline void fpu_vrepib(u8 v1, s16 i2)
+{
+	asm volatile("VREPIB	%[v1],%[i2]"
+		     :
+		     : [v1] "I" (v1), [i2] "K" (i2)
+		     : "memory");
+}
+
+static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3)
+{
+	asm volatile("VSRLB	%[v1],%[v2],%[v3]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+		     : "memory");
+}
+
+#ifdef CONFIG_CC_IS_CLANG
+
+static __always_inline void fpu_vst(u8 v1, const void *vxr)
+{
+	instrument_write(vxr, sizeof(__vector128));
+	asm volatile("\n"
+		"	la	1,%[vxr]\n"
+		"	VST	%[v1],0,,1\n"
+		: [vxr] "=R" (*(__vector128 *)vxr)
+		: [v1] "I" (v1)
+		: "memory", "1");
+}
+
+#else /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vst(u8 v1, const void *vxr)
+{
+	instrument_write(vxr, sizeof(__vector128));
+	asm volatile("VST	%[v1],%O[vxr],,%R[vxr]\n"
+		     : [vxr] "=Q" (*(__vector128 *)vxr)
+		     : [v1] "I" (v1)
+		     : "memory");
+}
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+#ifdef CONFIG_CC_IS_CLANG
+
+static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
+{
+	unsigned int size;
+
+	size = min(index + 1, sizeof(__vector128));
+	instrument_write(vxr, size);
+	asm volatile("\n"
+		"	la	1,%[vxr]\n"
+		"	VSTL	%[v1],%[index],0,1\n"
+		: [vxr] "=R" (*(u8 *)vxr)
+		: [index] "d" (index), [v1] "I" (v1)
+		: "memory", "1");
+}
+
+#else /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
+{
+	unsigned int size;
+
+	size = min(index + 1, sizeof(__vector128));
+	instrument_write(vxr, size);
+	asm volatile("VSTL	%[v1],%[index],%O[vxr],%R[vxr]\n"
+		     : [vxr] "=Q" (*(u8 *)vxr)
+		     : [index] "d" (index), [v1] "I" (v1)
+		     : "memory");
+}
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+#ifdef CONFIG_CC_IS_CLANG
+
+#define fpu_vstm(_v1, _v3, _vxrs)					\
+({									\
+	unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128);	\
+	struct {							\
+		__vector128 _v[(_v3) - (_v1) + 1];			\
+	} *_v = (void *)(_vxrs);					\
+									\
+	instrument_write(_v, size);					\
+	asm volatile("\n"						\
+		"	la	1,%[vxrs]\n"				\
+		"	VSTM	%[v1],%[v3],0,1\n"			\
+		: [vxrs] "=R" (*_v)					\
+		: [v1] "I" (_v1), [v3] "I" (_v3)			\
+		: "memory", "1");					\
+	(_v3) - (_v1) + 1;						\
+})
+
+#else /* CONFIG_CC_IS_CLANG */
+
+#define fpu_vstm(_v1, _v3, _vxrs)					\
+({									\
+	unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128);	\
+	struct {							\
+		__vector128 _v[(_v3) - (_v1) + 1];			\
+	} *_v = (void *)(_vxrs);					\
+									\
+	instrument_write(_v, size);					\
+	asm volatile("VSTM	%[v1],%[v3],%O[vxrs],%R[vxrs]\n"	\
+		     : [vxrs] "=Q" (*_v)				\
+		     : [v1] "I" (_v1), [v3] "I" (_v3)			\
+		     : "memory");					\
+	(_v3) - (_v1) + 1;						\
+})
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vupllf(u8 v1, u8 v2)
+{
+	asm volatile("VUPLLF	%[v1],%[v2]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2)
+		     : "memory");
+}
+
+static __always_inline void fpu_vx(u8 v1, u8 v2, u8 v3)
+{
+	asm volatile("VX	%[v1],%[v2],%[v3]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+		     : "memory");
+}
+
+static __always_inline void fpu_vzero(u8 v)
+{
+	asm volatile("VZERO	%[v]"
+		     :
+		     : [v] "I" (v)
+		     : "memory");
+}
+
+#endif /* __ASSEMBLY__ */
+#endif	/* __ASM_S390_FPU_INSN_H */
diff --git a/arch/s390/include/asm/fpu-types.h b/arch/s390/include/asm/fpu-types.h
new file mode 100644
index 000000000000..8d58d5a95399
--- /dev/null
+++ b/arch/s390/include/asm/fpu-types.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * FPU data structures
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_TYPES_H
+#define _ASM_S390_FPU_TYPES_H
+
+#include <asm/sigcontext.h>
+
+struct fpu {
+	u32 fpc;
+	__vector128 vxrs[__NUM_VXRS] __aligned(8);
+};
+
+struct kernel_fpu_hdr {
+	int	mask;
+	u32	fpc;
+};
+
+struct kernel_fpu {
+	struct kernel_fpu_hdr hdr;
+	__vector128 vxrs[] __aligned(8);
+};
+
+#define KERNEL_FPU_STRUCT(vxr_size)				\
+struct kernel_fpu_##vxr_size {					\
+	struct kernel_fpu_hdr hdr;				\
+	__vector128 vxrs[vxr_size] __aligned(8);		\
+}
+
+KERNEL_FPU_STRUCT(8);
+KERNEL_FPU_STRUCT(16);
+KERNEL_FPU_STRUCT(32);
+
+#define DECLARE_KERNEL_FPU_ONSTACK(vxr_size, name)		\
+	struct kernel_fpu_##vxr_size name __uninitialized
+
+#define DECLARE_KERNEL_FPU_ONSTACK8(name)			\
+	DECLARE_KERNEL_FPU_ONSTACK(8, name)
+
+#define DECLARE_KERNEL_FPU_ONSTACK16(name)			\
+	DECLARE_KERNEL_FPU_ONSTACK(16, name)
+
+#define DECLARE_KERNEL_FPU_ONSTACK32(name)			\
+	DECLARE_KERNEL_FPU_ONSTACK(32, name)
+
+#endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h
new file mode 100644
index 000000000000..c84cb33913e2
--- /dev/null
+++ b/arch/s390/include/asm/fpu.h
@@ -0,0 +1,295 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * In-kernel FPU support functions
+ *
+ *
+ * Consider these guidelines before using in-kernel FPU functions:
+ *
+ *  1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel
+ *     use of floating-point or vector registers and instructions.
+ *
+ *  2. For kernel_fpu_begin(), specify the vector register range you want to
+ *     use with the KERNEL_VXR_* constants. Consider these usage guidelines:
+ *
+ *     a) If your function typically runs in process-context, use the lower
+ *	  half of the vector registers, for example, specify KERNEL_VXR_LOW.
+ *     b) If your function typically runs in soft-irq or hard-irq context,
+ *	  prefer using the upper half of the vector registers, for example,
+ *	  specify KERNEL_VXR_HIGH.
+ *
+ *     If you adhere to these guidelines, an interrupted process context
+ *     does not require to save and restore vector registers because of
+ *     disjoint register ranges.
+ *
+ *     Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions
+ *     includes logic to save and restore up to 16 vector registers at once.
+ *
+ *  3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different
+ *     struct kernel_fpu states.  Vector registers that are in use by outer
+ *     levels are saved and restored.  You can minimize the save and restore
+ *     effort by choosing disjoint vector register ranges.
+ *
+ *  5. To use vector floating-point instructions, specify the KERNEL_FPC
+ *     flag to save and restore floating-point controls in addition to any
+ *     vector register range.
+ *
+ *  6. To use floating-point registers and instructions only, specify the
+ *     KERNEL_FPR flag.  This flag triggers a save and restore of vector
+ *     registers V0 to V15 and floating-point controls.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_H
+#define _ASM_S390_FPU_H
+
+#include <linux/processor.h>
+#include <linux/preempt.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <asm/sigcontext.h>
+#include <asm/fpu-types.h>
+#include <asm/fpu-insn.h>
+#include <asm/facility.h>
+
+static inline bool cpu_has_vx(void)
+{
+	return likely(test_facility(129));
+}
+
+enum {
+	KERNEL_FPC_BIT = 0,
+	KERNEL_VXR_V0V7_BIT,
+	KERNEL_VXR_V8V15_BIT,
+	KERNEL_VXR_V16V23_BIT,
+	KERNEL_VXR_V24V31_BIT,
+};
+
+#define KERNEL_FPC		BIT(KERNEL_FPC_BIT)
+#define KERNEL_VXR_V0V7		BIT(KERNEL_VXR_V0V7_BIT)
+#define KERNEL_VXR_V8V15	BIT(KERNEL_VXR_V8V15_BIT)
+#define KERNEL_VXR_V16V23	BIT(KERNEL_VXR_V16V23_BIT)
+#define KERNEL_VXR_V24V31	BIT(KERNEL_VXR_V24V31_BIT)
+
+#define KERNEL_VXR_LOW		(KERNEL_VXR_V0V7   | KERNEL_VXR_V8V15)
+#define KERNEL_VXR_MID		(KERNEL_VXR_V8V15  | KERNEL_VXR_V16V23)
+#define KERNEL_VXR_HIGH		(KERNEL_VXR_V16V23 | KERNEL_VXR_V24V31)
+
+#define KERNEL_VXR		(KERNEL_VXR_LOW	   | KERNEL_VXR_HIGH)
+#define KERNEL_FPR		(KERNEL_FPC	   | KERNEL_VXR_LOW)
+
+void load_fpu_state(struct fpu *state, int flags);
+void save_fpu_state(struct fpu *state, int flags);
+void __kernel_fpu_begin(struct kernel_fpu *state, int flags);
+void __kernel_fpu_end(struct kernel_fpu *state, int flags);
+
+static __always_inline void save_vx_regs(__vector128 *vxrs)
+{
+	fpu_vstm(0, 15, &vxrs[0]);
+	fpu_vstm(16, 31, &vxrs[16]);
+}
+
+static __always_inline void load_vx_regs(__vector128 *vxrs)
+{
+	fpu_vlm(0, 15, &vxrs[0]);
+	fpu_vlm(16, 31, &vxrs[16]);
+}
+
+static __always_inline void __save_fp_regs(freg_t *fprs, unsigned int offset)
+{
+	fpu_std(0, &fprs[0 * offset]);
+	fpu_std(1, &fprs[1 * offset]);
+	fpu_std(2, &fprs[2 * offset]);
+	fpu_std(3, &fprs[3 * offset]);
+	fpu_std(4, &fprs[4 * offset]);
+	fpu_std(5, &fprs[5 * offset]);
+	fpu_std(6, &fprs[6 * offset]);
+	fpu_std(7, &fprs[7 * offset]);
+	fpu_std(8, &fprs[8 * offset]);
+	fpu_std(9, &fprs[9 * offset]);
+	fpu_std(10, &fprs[10 * offset]);
+	fpu_std(11, &fprs[11 * offset]);
+	fpu_std(12, &fprs[12 * offset]);
+	fpu_std(13, &fprs[13 * offset]);
+	fpu_std(14, &fprs[14 * offset]);
+	fpu_std(15, &fprs[15 * offset]);
+}
+
+static __always_inline void __load_fp_regs(freg_t *fprs, unsigned int offset)
+{
+	fpu_ld(0, &fprs[0 * offset]);
+	fpu_ld(1, &fprs[1 * offset]);
+	fpu_ld(2, &fprs[2 * offset]);
+	fpu_ld(3, &fprs[3 * offset]);
+	fpu_ld(4, &fprs[4 * offset]);
+	fpu_ld(5, &fprs[5 * offset]);
+	fpu_ld(6, &fprs[6 * offset]);
+	fpu_ld(7, &fprs[7 * offset]);
+	fpu_ld(8, &fprs[8 * offset]);
+	fpu_ld(9, &fprs[9 * offset]);
+	fpu_ld(10, &fprs[10 * offset]);
+	fpu_ld(11, &fprs[11 * offset]);
+	fpu_ld(12, &fprs[12 * offset]);
+	fpu_ld(13, &fprs[13 * offset]);
+	fpu_ld(14, &fprs[14 * offset]);
+	fpu_ld(15, &fprs[15 * offset]);
+}
+
+static __always_inline void save_fp_regs(freg_t *fprs)
+{
+	__save_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t));
+}
+
+static __always_inline void load_fp_regs(freg_t *fprs)
+{
+	__load_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t));
+}
+
+static __always_inline void save_fp_regs_vx(__vector128 *vxrs)
+{
+	freg_t *fprs = (freg_t *)&vxrs[0].high;
+
+	__save_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
+}
+
+static __always_inline void load_fp_regs_vx(__vector128 *vxrs)
+{
+	freg_t *fprs = (freg_t *)&vxrs[0].high;
+
+	__load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
+}
+
+static inline void load_user_fpu_regs(void)
+{
+	struct thread_struct *thread = &current->thread;
+
+	if (!thread->ufpu_flags)
+		return;
+	load_fpu_state(&thread->ufpu, thread->ufpu_flags);
+	thread->ufpu_flags = 0;
+}
+
+static __always_inline void __save_user_fpu_regs(struct thread_struct *thread, int flags)
+{
+	save_fpu_state(&thread->ufpu, flags);
+	__atomic_or(flags, &thread->ufpu_flags);
+}
+
+static inline void save_user_fpu_regs(void)
+{
+	struct thread_struct *thread = &current->thread;
+	int mask, flags;
+
+	mask = __atomic_or(KERNEL_FPC | KERNEL_VXR, &thread->kfpu_flags);
+	flags = ~READ_ONCE(thread->ufpu_flags) & (KERNEL_FPC | KERNEL_VXR);
+	if (flags)
+		__save_user_fpu_regs(thread, flags);
+	barrier();
+	WRITE_ONCE(thread->kfpu_flags, mask);
+}
+
+static __always_inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags)
+{
+	struct thread_struct *thread = &current->thread;
+	int mask, uflags;
+
+	mask = __atomic_or(flags, &thread->kfpu_flags);
+	state->hdr.mask = mask;
+	uflags = READ_ONCE(thread->ufpu_flags);
+	if ((uflags & flags) != flags)
+		__save_user_fpu_regs(thread, ~uflags & flags);
+	if (mask & flags)
+		__kernel_fpu_begin(state, flags);
+}
+
+static __always_inline void _kernel_fpu_end(struct kernel_fpu *state, int flags)
+{
+	int mask = state->hdr.mask;
+
+	if (mask & flags)
+		__kernel_fpu_end(state, flags);
+	barrier();
+	WRITE_ONCE(current->thread.kfpu_flags, mask);
+}
+
+void __kernel_fpu_invalid_size(void);
+
+static __always_inline void kernel_fpu_check_size(int flags, unsigned int size)
+{
+	unsigned int cnt = 0;
+
+	if (flags & KERNEL_VXR_V0V7)
+		cnt += 8;
+	if (flags & KERNEL_VXR_V8V15)
+		cnt += 8;
+	if (flags & KERNEL_VXR_V16V23)
+		cnt += 8;
+	if (flags & KERNEL_VXR_V24V31)
+		cnt += 8;
+	if (cnt != size)
+		__kernel_fpu_invalid_size();
+}
+
+#define kernel_fpu_begin(state, flags)					\
+{									\
+	typeof(state) s = (state);					\
+	int _flags = (flags);						\
+									\
+	kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs));		\
+	_kernel_fpu_begin((struct kernel_fpu *)s, _flags);		\
+}
+
+#define kernel_fpu_end(state, flags)					\
+{									\
+	typeof(state) s = (state);					\
+	int _flags = (flags);						\
+									\
+	kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs));		\
+	_kernel_fpu_end((struct kernel_fpu *)s, _flags);		\
+}
+
+static inline void save_kernel_fpu_regs(struct thread_struct *thread)
+{
+	if (!thread->kfpu_flags)
+		return;
+	save_fpu_state(&thread->kfpu, thread->kfpu_flags);
+}
+
+static inline void restore_kernel_fpu_regs(struct thread_struct *thread)
+{
+	if (!thread->kfpu_flags)
+		return;
+	load_fpu_state(&thread->kfpu, thread->kfpu_flags);
+}
+
+static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		fprs[i].ui = vxrs[i].high;
+}
+
+static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		vxrs[i].high = fprs[i].ui;
+}
+
+static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	fpregs->pad = 0;
+	fpregs->fpc = fpu->fpc;
+	convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
+}
+
+static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	fpu->fpc = fpregs->fpc;
+	convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
+}
+
+#endif /* _ASM_S390_FPU_H */
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
deleted file mode 100644
index d6ca8bc6ca68..000000000000
--- a/arch/s390/include/asm/fpu/api.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * In-kernel FPU support functions
- *
- *
- * Consider these guidelines before using in-kernel FPU functions:
- *
- *  1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel
- *     use of floating-point or vector registers and instructions.
- *
- *  2. For kernel_fpu_begin(), specify the vector register range you want to
- *     use with the KERNEL_VXR_* constants. Consider these usage guidelines:
- *
- *     a) If your function typically runs in process-context, use the lower
- *	  half of the vector registers, for example, specify KERNEL_VXR_LOW.
- *     b) If your function typically runs in soft-irq or hard-irq context,
- *	  prefer using the upper half of the vector registers, for example,
- *	  specify KERNEL_VXR_HIGH.
- *
- *     If you adhere to these guidelines, an interrupted process context
- *     does not require to save and restore vector registers because of
- *     disjoint register ranges.
- *
- *     Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions
- *     includes logic to save and restore up to 16 vector registers at once.
- *
- *  3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different
- *     struct kernel_fpu states.  Vector registers that are in use by outer
- *     levels are saved and restored.  You can minimize the save and restore
- *     effort by choosing disjoint vector register ranges.
- *
- *  5. To use vector floating-point instructions, specify the KERNEL_FPC
- *     flag to save and restore floating-point controls in addition to any
- *     vector register range.
- *
- *  6. To use floating-point registers and instructions only, specify the
- *     KERNEL_FPR flag.  This flag triggers a save and restore of vector
- *     registers V0 to V15 and floating-point controls.
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#ifndef _ASM_S390_FPU_API_H
-#define _ASM_S390_FPU_API_H
-
-#include <linux/preempt.h>
-#include <asm/asm-extable.h>
-#include <asm/fpu/internal.h>
-
-void save_fpu_regs(void);
-void load_fpu_regs(void);
-void __load_fpu_regs(void);
-
-/**
- * sfpc_safe - Set floating point control register safely.
- * @fpc: new value for floating point control register
- *
- * Set floating point control register. This may lead to an exception,
- * since a saved value may have been modified by user space (ptrace,
- * signal return, kvm registers) to an invalid value. In such a case
- * set the floating point control register to zero.
- */
-static inline void sfpc_safe(u32 fpc)
-{
-	asm volatile("\n"
-		"0:	sfpc	%[fpc]\n"
-		"1:	nopr	%%r7\n"
-		".pushsection .fixup, \"ax\"\n"
-		"2:	lghi	%[fpc],0\n"
-		"	jg	0b\n"
-		".popsection\n"
-		EX_TABLE(1b, 2b)
-		: [fpc] "+d" (fpc)
-		: : "memory");
-}
-
-#define KERNEL_FPC		1
-#define KERNEL_VXR_V0V7		2
-#define KERNEL_VXR_V8V15	4
-#define KERNEL_VXR_V16V23	8
-#define KERNEL_VXR_V24V31	16
-
-#define KERNEL_VXR_LOW		(KERNEL_VXR_V0V7|KERNEL_VXR_V8V15)
-#define KERNEL_VXR_MID		(KERNEL_VXR_V8V15|KERNEL_VXR_V16V23)
-#define KERNEL_VXR_HIGH		(KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
-
-#define KERNEL_VXR		(KERNEL_VXR_LOW|KERNEL_VXR_HIGH)
-#define KERNEL_FPR		(KERNEL_FPC|KERNEL_VXR_LOW)
-
-struct kernel_fpu;
-
-/*
- * Note the functions below must be called with preemption disabled.
- * Do not enable preemption before calling __kernel_fpu_end() to prevent
- * an corruption of an existing kernel FPU state.
- *
- * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions.
- */
-void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags);
-void __kernel_fpu_end(struct kernel_fpu *state, u32 flags);
-
-
-static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
-{
-	preempt_disable();
-	state->mask = S390_lowcore.fpu_flags;
-	if (!test_cpu_flag(CIF_FPU))
-		/* Save user space FPU state and register contents */
-		save_fpu_regs();
-	else if (state->mask & flags)
-		/* Save FPU/vector register in-use by the kernel */
-		__kernel_fpu_begin(state, flags);
-	S390_lowcore.fpu_flags |= flags;
-}
-
-static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags)
-{
-	S390_lowcore.fpu_flags = state->mask;
-	if (state->mask & flags)
-		/* Restore FPU/vector register in-use by the kernel */
-		__kernel_fpu_end(state, flags);
-	preempt_enable();
-}
-
-#endif /* _ASM_S390_FPU_API_H */
diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h
deleted file mode 100644
index d511c4cf5afb..000000000000
--- a/arch/s390/include/asm/fpu/internal.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * FPU state and register content conversion primitives
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#ifndef _ASM_S390_FPU_INTERNAL_H
-#define _ASM_S390_FPU_INTERNAL_H
-
-#include <linux/string.h>
-#include <asm/facility.h>
-#include <asm/fpu/types.h>
-
-static inline bool cpu_has_vx(void)
-{
-	return likely(test_facility(129));
-}
-
-static inline void save_vx_regs(__vector128 *vxrs)
-{
-	asm volatile(
-		"	la	1,%0\n"
-		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
-		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
-		: "=Q" (*(struct vx_array *) vxrs) : : "1");
-}
-
-static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
-{
-	int i;
-
-	for (i = 0; i < __NUM_FPRS; i++)
-		fprs[i].ui = vxrs[i].high;
-}
-
-static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
-{
-	int i;
-
-	for (i = 0; i < __NUM_FPRS; i++)
-		vxrs[i].high = fprs[i].ui;
-}
-
-static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
-{
-	fpregs->pad = 0;
-	fpregs->fpc = fpu->fpc;
-	if (cpu_has_vx())
-		convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
-	else
-		memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
-		       sizeof(fpregs->fprs));
-}
-
-static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
-{
-	fpu->fpc = fpregs->fpc;
-	if (cpu_has_vx())
-		convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
-	else
-		memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
-		       sizeof(fpregs->fprs));
-}
-
-#endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h
deleted file mode 100644
index d889e9436865..000000000000
--- a/arch/s390/include/asm/fpu/types.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * FPU data structures
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#ifndef _ASM_S390_FPU_TYPES_H
-#define _ASM_S390_FPU_TYPES_H
-
-#include <asm/sigcontext.h>
-
-struct fpu {
-	__u32 fpc;		/* Floating-point control */
-	void *regs;		/* Pointer to the current save area */
-	union {
-		/* Floating-point register save area */
-		freg_t fprs[__NUM_FPRS];
-		/* Vector register save area */
-		__vector128 vxrs[__NUM_VXRS];
-	};
-};
-
-/* VX array structure for address operand constraints in inline assemblies */
-struct vx_array { __vector128 _[__NUM_VXRS]; };
-
-/* In-kernel FPU state structure */
-struct kernel_fpu {
-	u32	    mask;
-	u32	    fpc;
-	union {
-		freg_t fprs[__NUM_FPRS];
-		__vector128 vxrs[__NUM_VXRS];
-	};
-};
-
-#endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 52664105a473..95990461888f 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -23,7 +23,7 @@
 #include <linux/mmu_notifier.h>
 #include <asm/debug.h>
 #include <asm/cpu.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
 #include <asm/isc.h>
 #include <asm/guarded_storage.h>
 
@@ -743,7 +743,6 @@ struct kvm_vcpu_arch {
 	struct kvm_s390_sie_block *vsie_block;
 	unsigned int      host_acrs[NUM_ACRS];
 	struct gs_cb      *host_gscb;
-	struct fpu	  host_fpregs;
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct kvm_s390_pgm_info pgm;
@@ -765,6 +764,8 @@ struct kvm_vcpu_arch {
 	__u64 cputm_start;
 	bool gs_enabled;
 	bool skey_enabled;
+	/* Indicator if the access registers have been loaded from guest */
+	bool acrs_loaded;
 	struct kvm_s390_pv_vcpu pv;
 	union diag318_info diag318_info;
 };
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 5dc1b6345006..8c5f16857539 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -157,7 +157,7 @@ struct lowcore {
 	__s32	preempt_count;			/* 0x03a8 */
 	__u32	spinlock_lockval;		/* 0x03ac */
 	__u32	spinlock_index;			/* 0x03b0 */
-	__u32	fpu_flags;			/* 0x03b4 */
+	__u8	pad_0x03b4[0x03b8-0x03b4];	/* 0x03b4 */
 	__u64	percpu_offset;			/* 0x03b8 */
 	__u8	pad_0x03c0[0x03c8-0x03c0];	/* 0x03c0 */
 	__u64	machine_flags;			/* 0x03c8 */
diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h
index 7d1888e3dee6..3f609565734b 100644
--- a/arch/s390/include/asm/pai.h
+++ b/arch/s390/include/asm/pai.h
@@ -16,7 +16,7 @@ struct qpaci_info_block {
 	u64 header;
 	struct {
 		u64 : 8;
-		u64 num_cc : 8;	/* # of supported crypto counters */
+		u64 num_cc : 8;		/* # of supported crypto counters */
 		u64 : 9;
 		u64 num_nnpa : 7;	/* # of supported NNPA counters */
 		u64 : 32;
@@ -81,4 +81,5 @@ enum paievt_mode {
 	PAI_MODE_COUNTING,
 };
 
+#define PAI_SAVE_AREA(x)	((x)->hw.event_base)
 #endif
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index e91cd6bbc330..30820a649e6e 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -122,6 +122,7 @@ struct zpci_dev {
 	struct rcu_head rcu;
 	struct hotplug_slot hotplug_slot;
 
+	struct mutex state_lock;	/* protect state changes */
 	enum zpci_state state;
 	u32		fid;		/* function ID, used by sclp */
 	u32		fh;		/* function handle, used by insn's */
@@ -142,7 +143,6 @@ struct zpci_dev {
 	u8		reserved	: 2;
 	unsigned int	devfn;		/* DEVFN part of the RID*/
 
-	struct mutex lock;
 	u8 pfip[CLP_PFIP_NR_SEGMENTS];	/* pci function internal path */
 	u32 uid;			/* user defined id */
 	u8 util_str[CLP_UTIL_STR_LEN];	/* utility string */
@@ -170,6 +170,7 @@ struct zpci_dev {
 	u64		dma_mask;	/* DMA address space mask */
 
 	/* Function measurement block */
+	struct mutex fmb_lock;
 	struct zpci_fmb *fmb;
 	u16		fmb_update;	/* update interval */
 	u16		fmb_length;
diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h
index 9e41a74fce9a..e747b067f8db 100644
--- a/arch/s390/include/asm/physmem_info.h
+++ b/arch/s390/include/asm/physmem_info.h
@@ -22,6 +22,7 @@ enum reserved_range_type {
 	RR_DECOMPRESSOR,
 	RR_INITRD,
 	RR_VMLINUX,
+	RR_RELOC,
 	RR_AMODE31,
 	RR_IPLREPORT,
 	RR_CERT_COMP_LIST,
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index c0b6e74d899a..7cf00cf8fb0b 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -15,13 +15,11 @@
 #include <linux/bits.h>
 
 #define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
-#define CIF_FPU			3	/* restore FPU registers */
 #define CIF_ENABLED_WAIT	5	/* in enabled wait state */
 #define CIF_MCCK_GUEST		6	/* machine check happening in guest */
 #define CIF_DEDICATED_CPU	7	/* this CPU is dedicated */
 
 #define _CIF_NOHZ_DELAY		BIT(CIF_NOHZ_DELAY)
-#define _CIF_FPU		BIT(CIF_FPU)
 #define _CIF_ENABLED_WAIT	BIT(CIF_ENABLED_WAIT)
 #define _CIF_MCCK_GUEST		BIT(CIF_MCCK_GUEST)
 #define _CIF_DEDICATED_CPU	BIT(CIF_DEDICATED_CPU)
@@ -33,13 +31,12 @@
 #include <linux/cpumask.h>
 #include <linux/linkage.h>
 #include <linux/irqflags.h>
+#include <asm/fpu-types.h>
 #include <asm/cpu.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
 #include <asm/setup.h>
 #include <asm/runtime_instr.h>
-#include <asm/fpu/types.h>
-#include <asm/fpu/internal.h>
 #include <asm/irqflags.h>
 
 typedef long (*sys_call_ptr_t)(struct pt_regs *regs);
@@ -169,6 +166,8 @@ struct thread_struct {
 	unsigned int gmap_write_flag;		/* gmap fault write indication */
 	unsigned int gmap_int_code;		/* int code of last gmap fault */
 	unsigned int gmap_pfault;		/* signal of a pending guest pfault */
+	int ufpu_flags;				/* user fpu flags */
+	int kfpu_flags;				/* kernel fpu flags */
 
 	/* Per-thread information related to debugging */
 	struct per_regs per_user;		/* User specified PER registers */
@@ -184,7 +183,8 @@ struct thread_struct {
 	struct gs_cb *gs_cb;			/* Current guarded storage cb */
 	struct gs_cb *gs_bc_cb;			/* Broadcast guarded storage cb */
 	struct pgm_tdb trap_tdb;		/* Transaction abort diagnose block */
-	struct fpu fpu;				/* FP and VX register save area */
+	struct fpu ufpu;			/* User FP and VX register save area */
+	struct fpu kfpu;			/* Kernel FP and VX register save area */
 };
 
 /* Flag to disable transactions. */
@@ -203,7 +203,6 @@ typedef struct thread_struct thread_struct;
 
 #define INIT_THREAD {							\
 	.ksp = sizeof(init_stack) + (unsigned long) &init_stack,	\
-	.fpu.regs = (void *) init_task.thread.fpu.fprs,			\
 	.last_break = 1,						\
 }
 
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index d28bf8fb2799..788bc4467445 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -203,6 +203,10 @@ static inline int test_and_clear_pt_regs_flag(struct pt_regs *regs, int flag)
 	return ret;
 }
 
+struct task_struct;
+
+void update_cr_regs(struct task_struct *task);
+
 /*
  * These are defined as per linux/ptrace.h, which see.
  */
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 31ec4f545e03..433fde85b14e 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -4,7 +4,6 @@
 
 #include <linux/uaccess.h>
 #include <linux/ptrace.h>
-#include <asm/switch_to.h>
 
 struct stack_frame_user {
 	unsigned long back_chain;
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
deleted file mode 100644
index c61b2cc1a8a8..000000000000
--- a/arch/s390/include/asm/switch_to.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright IBM Corp. 1999, 2009
- *
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-
-#ifndef __ASM_SWITCH_TO_H
-#define __ASM_SWITCH_TO_H
-
-#include <linux/thread_info.h>
-#include <asm/fpu/api.h>
-#include <asm/ptrace.h>
-#include <asm/guarded_storage.h>
-
-extern struct task_struct *__switch_to(void *, void *);
-extern void update_cr_regs(struct task_struct *task);
-
-static inline void save_access_regs(unsigned int *acrs)
-{
-	typedef struct { int _[NUM_ACRS]; } acrstype;
-
-	asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs));
-}
-
-static inline void restore_access_regs(unsigned int *acrs)
-{
-	typedef struct { int _[NUM_ACRS]; } acrstype;
-
-	asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
-}
-
-#define switch_to(prev, next, last) do {				\
-	/* save_fpu_regs() sets the CIF_FPU flag, which enforces	\
-	 * a restore of the floating point / vector registers as	\
-	 * soon as the next task returns to user space			\
-	 */								\
-	save_fpu_regs();						\
-	save_access_regs(&prev->thread.acrs[0]);			\
-	save_ri_cb(prev->thread.ri_cb);					\
-	save_gs_cb(prev->thread.gs_cb);					\
-	update_cr_regs(next);						\
-	restore_access_regs(&next->thread.acrs[0]);			\
-	restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);		\
-	restore_gs_cb(next->thread.gs_cb);				\
-	prev = __switch_to(prev, next);					\
-} while (0)
-
-#endif /* __ASM_SWITCH_TO_H */
diff --git a/arch/s390/include/asm/vdso/data.h b/arch/s390/include/asm/vdso/data.h
index 73ee89142666..0e2b40ef69b0 100644
--- a/arch/s390/include/asm/vdso/data.h
+++ b/arch/s390/include/asm/vdso/data.h
@@ -3,7 +3,6 @@
 #define __S390_ASM_VDSO_DATA_H
 
 #include <linux/types.h>
-#include <vdso/datapage.h>
 
 struct arch_vdso_data {
 	__s64 tod_steering_delta;
diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
deleted file mode 100644
index 8c188f1c6d27..000000000000
--- a/arch/s390/include/asm/vx-insn.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Support for Vector Instructions
- *
- * This wrapper header file allows to use the vector instruction macros in
- * both assembler files as well as in inline assemblies in C files.
- */
-
-#ifndef __ASM_S390_VX_INSN_H
-#define __ASM_S390_VX_INSN_H
-
-#include <asm/vx-insn-asm.h>
-
-#ifndef __ASSEMBLY__
-
-asm(".include \"asm/vx-insn-asm.h\"\n");
-
-#endif /* __ASSEMBLY__ */
-#endif	/* __ASM_S390_VX_INSN_H */