1049 files changed, 34422 insertions, 10983 deletions
diff --git a/.mailmap b/.mailmap
index 10ee1103c823..97ccdf147111 100644
--- a/.mailmap
+++ b/.mailmap
@@ -187,6 +187,8 @@ Jiri Slaby <[email protected]> <[email protected]>
 Jiri Slaby <[email protected]> <[email protected]>
 Jiri Slaby <[email protected]> <[email protected]>
 Jiri Slaby <[email protected]> <[email protected]>
+Jisheng Zhang <[email protected]> <[email protected]>
+Jisheng Zhang <[email protected]> <[email protected]>
 Johan Hovold <[email protected]> <[email protected]>
 Johan Hovold <[email protected]> <[email protected]>
 John Paul Adrian Glaubitz <[email protected]>
@@ -216,6 +218,7 @@ Koushik <[email protected]>
 Krishna Manikandan <[email protected]> <[email protected]>
 Krzysztof Kozlowski <[email protected]> <[email protected]>
 Krzysztof Kozlowski <[email protected]> <[email protected]>
+Krzysztof Kozlowski <[email protected]> <[email protected]>
 Kuninori Morimoto <[email protected]>
 Kuogee Hsieh <[email protected]> <[email protected]>
 Leonardo Bras <[email protected]> <[email protected]>
diff --git a/Documentation/ABI/stable/sysfs-devices-system-cpu b/Documentation/ABI/stable/sysfs-devices-system-cpu
index 3965ce504484..902392d7eddf 100644
--- a/Documentation/ABI/stable/sysfs-devices-system-cpu
+++ b/Documentation/ABI/stable/sysfs-devices-system-cpu
@@ -86,6 +86,10 @@ What:           /sys/devices/system/cpu/cpuX/topology/die_cpus
 Description:    internal kernel map of CPUs within the same die.
 Values:         hexadecimal bitmask.
 
+What:           /sys/devices/system/cpu/cpuX/topology/ppin
+Description:    per-socket protected processor inventory number
+Values:         hexadecimal.
+
 What:           /sys/devices/system/cpu/cpuX/topology/die_cpus_list
 Description:    human-readable list of CPUs within the same die.
                 The format is like 0-3, 8-11, 14,17.
diff --git a/Documentation/ABI/testing/debugfs-hisi-hpre b/Documentation/ABI/testing/debugfs-hisi-hpre
index b4be5f1db4b7..396de7bc735d 100644
--- a/Documentation/ABI/testing/debugfs-hisi-hpre
+++ b/Documentation/ABI/testing/debugfs-hisi-hpre
@@ -1,140 +1,150 @@
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/regs
-Date:           Sep 2019
-Contact:        [email protected]
-Description:    Dump debug registers from the HPRE cluster.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/regs
+Date:		Sep 2019
+Contact:	[email protected]
+Description:	Dump debug registers from the HPRE cluster.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/cluster_ctrl
-Date:           Sep 2019
-Contact:        [email protected]
-Description:    Write the HPRE core selection in the cluster into this file,
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/cluster_ctrl
+Date:		Sep 2019
+Contact:	[email protected]
+Description:	Write the HPRE core selection in the cluster into this file,
 		and then we can read the debug information of the core.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/rdclr_en
-Date:           Sep 2019
-Contact:        [email protected]
-Description:    HPRE cores debug registers read clear control. 1 means enable
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/rdclr_en
+Date:		Sep 2019
+Contact:	[email protected]
+Description:	HPRE cores debug registers read clear control. 1 means enable
 		register read clear, otherwise 0. Writing to this file has no
 		functional effect, only enable or disable counters clear after
 		reading of these registers.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/current_qm
-Date:           Sep 2019
-Contact:        [email protected]
-Description:    One HPRE controller has one PF and multiple VFs, each function
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/current_qm
+Date:		Sep 2019
+Contact:	[email protected]
+Description:	One HPRE controller has one PF and multiple VFs, each function
 		has a QM. Select the QM which below qm refers to.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/regs
-Date:           Sep 2019
-Contact:        [email protected]
-Description:    Dump debug registers from the HPRE.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/alg_qos
+Date:		Jun 2021
+Contact:	[email protected]
+Description:	The <bdf> is related the function for PF and VF.
+		HPRE driver supports to configure each function's QoS, the driver
+		supports to write <bdf> value to alg_qos in the host. Such as
+		"echo <bdf> value > alg_qos". The qos value is 1~1000, means
+		1/1000~1000/1000 of total QoS. The driver reading alg_qos to
+		get related QoS in the host and VM, Such as "cat alg_qos".
+
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/regs
+Date:		Sep 2019
+Contact:	[email protected]
+Description:	Dump debug registers from the HPRE.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/regs
-Date:           Sep 2019
-Contact:        [email protected]
-Description:    Dump debug registers from the QM.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/regs
+Date:		Sep 2019
+Contact:	[email protected]
+Description:	Dump debug registers from the QM.
 		Available for PF and VF in host. VF in guest currently only
 		has one debug register.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/current_q
-Date:           Sep 2019
-Contact:        [email protected]
-Description:    One QM may contain multiple queues. Select specific queue to
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/current_q
+Date:		Sep 2019
+Contact:	[email protected]
+Description:	One QM may contain multiple queues. Select specific queue to
 		show its debug registers in above regs.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/clear_enable
-Date:           Sep 2019
-Contact:        [email protected]
-Description:    QM debug registers(regs) read clear control. 1 means enable
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/clear_enable
+Date:		Sep 2019
+Contact:	[email protected]
+Description:	QM debug registers(regs) read clear control. 1 means enable
 		register read clear, otherwise 0.
 		Writing to this file has no functional effect, only enable or
 		disable counters clear after reading of these registers.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/err_irq
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of invalid interrupts for
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/err_irq
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of invalid interrupts for
 		QM task completion.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/aeq_irq
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of QM async event queue interrupts.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/aeq_irq
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of QM async event queue interrupts.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/abnormal_irq
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of interrupts for QM abnormal event.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/abnormal_irq
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of interrupts for QM abnormal event.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/create_qp_err
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of queue allocation errors.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/create_qp_err
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of queue allocation errors.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/mb_err
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of failed QM mailbox commands.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/mb_err
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of failed QM mailbox commands.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/status
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the status of the QM.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/status
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the status of the QM.
 		Four states: initiated, started, stopped and closed.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of sent requests.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of sent requests.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/recv_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of received requests.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/recv_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of received requests.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_busy_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of requests sent
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_busy_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of requests sent
 		with returning busy.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_fail_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of completed but error requests.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_fail_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of completed but error requests.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/invalid_req_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of invalid requests being received.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/invalid_req_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of invalid requests being received.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/overtime_thrhld
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Set the threshold time for counting the request which is
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/overtime_thrhld
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Set the threshold time for counting the request which is
 		processed longer than the threshold.
 		0: disable(default), 1: 1 microsecond.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/over_thrhld_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of time out requests.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/over_thrhld_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of time out requests.
 		Available for both PF and VF, and take no other effect on HPRE.
diff --git a/Documentation/ABI/testing/debugfs-hisi-sec b/Documentation/ABI/testing/debugfs-hisi-sec
index 85feb4408e0f..2bf84ced484b 100644
--- a/Documentation/ABI/testing/debugfs-hisi-sec
+++ b/Documentation/ABI/testing/debugfs-hisi-sec
@@ -1,113 +1,123 @@
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/clear_enable
-Date:           Oct 2019
-Contact:        [email protected]
-Description:    Enabling/disabling of clear action after reading
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/clear_enable
+Date:		Oct 2019
+Contact:	[email protected]
+Description:	Enabling/disabling of clear action after reading
 		the SEC debug registers.
 		0: disable, 1: enable.
 		Only available for PF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/current_qm
-Date:           Oct 2019
-Contact:        [email protected]
-Description:    One SEC controller has one PF and multiple VFs, each function
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/current_qm
+Date:		Oct 2019
+Contact:	[email protected]
+Description:	One SEC controller has one PF and multiple VFs, each function
 		has a QM. This file can be used to select the QM which below
 		qm refers to.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
-Date:           Oct 2019
-Contact:        [email protected]
-Description:    Dump of QM related debug registers.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/alg_qos
+Date:		Jun 2021
+Contact:	[email protected]
+Description:	The <bdf> is related the function for PF and VF.
+		SEC driver supports to configure each function's QoS, the driver
+		supports to write <bdf> value to alg_qos in the host. Such as
+		"echo <bdf> value > alg_qos". The qos value is 1~1000, means
+		1/1000~1000/1000 of total QoS. The driver reading alg_qos to
+		get related QoS in the host and VM, Such as "cat alg_qos".
+
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
+Date:		Oct 2019
+Contact:	[email protected]
+Description:	Dump of QM related debug registers.
 		Available for PF and VF in host. VF in guest currently only
 		has one debug register.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/current_q
-Date:           Oct 2019
-Contact:        [email protected]
-Description:    One QM of SEC may contain multiple queues. Select specific
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/current_q
+Date:		Oct 2019
+Contact:	[email protected]
+Description:	One QM of SEC may contain multiple queues. Select specific
 		queue to show its debug registers in above 'regs'.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/clear_enable
-Date:           Oct 2019
-Contact:        [email protected]
-Description:    Enabling/disabling of clear action after reading
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/clear_enable
+Date:		Oct 2019
+Contact:	[email protected]
+Description:	Enabling/disabling of clear action after reading
 		the SEC's QM debug registers.
 		0: disable, 1: enable.
 		Only available for PF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/err_irq
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of invalid interrupts for
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/err_irq
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of invalid interrupts for
 		QM task completion.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/aeq_irq
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of QM async event queue interrupts.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/aeq_irq
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of QM async event queue interrupts.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/abnormal_irq
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of interrupts for QM abnormal event.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/abnormal_irq
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of interrupts for QM abnormal event.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/create_qp_err
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of queue allocation errors.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/create_qp_err
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of queue allocation errors.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/mb_err
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of failed QM mailbox commands.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/mb_err
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of failed QM mailbox commands.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/status
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the status of the QM.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/status
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the status of the QM.
 		Four states: initiated, started, stopped and closed.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of sent requests.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of sent requests.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/recv_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of received requests.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/recv_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of received requests.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_busy_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of requests sent with returning busy.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_busy_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of requests sent with returning busy.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/err_bd_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of BD type error requests
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/err_bd_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of BD type error requests
 		to be received.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/invalid_req_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of invalid requests being received.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/invalid_req_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of invalid requests being received.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/done_flag_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of completed but marked error requests
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/done_flag_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of completed but marked error requests
 		to be received.
 		Available for both PF and VF, and take no other effect on SEC.
diff --git a/Documentation/ABI/testing/debugfs-hisi-zip b/Documentation/ABI/testing/debugfs-hisi-zip
index 3034a2bf99ca..bf1258bc6495 100644
--- a/Documentation/ABI/testing/debugfs-hisi-zip
+++ b/Documentation/ABI/testing/debugfs-hisi-zip
@@ -1,114 +1,124 @@
-What:           /sys/kernel/debug/hisi_zip/<bdf>/comp_core[01]/regs
-Date:           Nov 2018
-Contact:        [email protected]
-Description:    Dump of compression cores related debug registers.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/comp_core[01]/regs
+Date:		Nov 2018
+Contact:	[email protected]
+Description:	Dump of compression cores related debug registers.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/decomp_core[0-5]/regs
-Date:           Nov 2018
-Contact:        [email protected]
-Description:    Dump of decompression cores related debug registers.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/decomp_core[0-5]/regs
+Date:		Nov 2018
+Contact:	[email protected]
+Description:	Dump of decompression cores related debug registers.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/clear_enable
-Date:           Nov 2018
-Contact:        [email protected]
-Description:    Compression/decompression core debug registers read clear
+What:		/sys/kernel/debug/hisi_zip/<bdf>/clear_enable
+Date:		Nov 2018
+Contact:	[email protected]
+Description:	Compression/decompression core debug registers read clear
 		control. 1 means enable register read clear, otherwise 0.
 		Writing to this file has no functional effect, only enable or
 		disable counters clear after reading of these registers.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/current_qm
-Date:           Nov 2018
-Contact:        [email protected]
-Description:    One ZIP controller has one PF and multiple VFs, each function
+What:		/sys/kernel/debug/hisi_zip/<bdf>/current_qm
+Date:		Nov 2018
+Contact:	[email protected]
+Description:	One ZIP controller has one PF and multiple VFs, each function
 		has a QM. Select the QM which below qm refers to.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/regs
-Date:           Nov 2018
-Contact:        [email protected]
-Description:    Dump of QM related debug registers.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/alg_qos
+Date:		Jun 2021
+Contact:	[email protected]
+Description:	The <bdf> is related the function for PF and VF.
+		ZIP driver supports to configure each function's QoS, the driver
+		supports to write <bdf> value to alg_qos in the host. Such as
+		"echo <bdf> value > alg_qos". The qos value is 1~1000, means
+		1/1000~1000/1000 of total QoS. The driver reading alg_qos to
+		get related QoS in the host and VM, Such as "cat alg_qos".
+
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/regs
+Date:		Nov 2018
+Contact:	[email protected]
+Description:	Dump of QM related debug registers.
 		Available for PF and VF in host. VF in guest currently only
 		has one debug register.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/current_q
-Date:           Nov 2018
-Contact:        [email protected]
-Description:    One QM may contain multiple queues. Select specific queue to
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/current_q
+Date:		Nov 2018
+Contact:	[email protected]
+Description:	One QM may contain multiple queues. Select specific queue to
 		show its debug registers in above regs.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/clear_enable
-Date:           Nov 2018
-Contact:        [email protected]
-Description:    QM debug registers(regs) read clear control. 1 means enable
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/clear_enable
+Date:		Nov 2018
+Contact:	[email protected]
+Description:	QM debug registers(regs) read clear control. 1 means enable
 		register read clear, otherwise 0.
 		Writing to this file has no functional effect, only enable or
 		disable counters clear after reading of these registers.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/err_irq
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of invalid interrupts for
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/err_irq
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of invalid interrupts for
 		QM task completion.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/aeq_irq
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of QM async event queue interrupts.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/aeq_irq
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of QM async event queue interrupts.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/abnormal_irq
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of interrupts for QM abnormal event.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/abnormal_irq
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of interrupts for QM abnormal event.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/create_qp_err
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of queue allocation errors.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/create_qp_err
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of queue allocation errors.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/mb_err
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the number of failed QM mailbox commands.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/mb_err
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the number of failed QM mailbox commands.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/status
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the status of the QM.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/status
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the status of the QM.
 		Four states: initiated, started, stopped and closed.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of sent requests.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of sent requests.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/recv_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of received requests.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/recv_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of received requests.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_busy_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of requests received
+What:		/sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_busy_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of requests received
 		with returning busy.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/err_bd_cnt
-Date:           Apr 2020
-Contact:        [email protected]
-Description:    Dump the total number of BD type error requests
+What:		/sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/err_bd_cnt
+Date:		Apr 2020
+Contact:	[email protected]
+Description:	Dump the total number of BD type error requests
 		to be received.
 		Available for both PF and VF, and take no other effect on ZIP.
diff --git a/Documentation/ABI/testing/sysfs-class-thermal b/Documentation/ABI/testing/sysfs-class-thermal
index 2c52bb1f864c..8eee37982b2a 100644
--- a/Documentation/ABI/testing/sysfs-class-thermal
+++ b/Documentation/ABI/testing/sysfs-class-thermal
@@ -203,7 +203,7 @@ Description:
 
 		- for generic ACPI: should be "Fan", "Processor" or "LCD"
 		- for memory controller device on intel_menlow platform:
-		should be "Memory controller".
+		  should be "Memory controller".
 
 		RO, Required
 
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 61f5676a7429..2ad01cad7f1c 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -73,6 +73,7 @@ What:		/sys/devices/system/cpu/cpuX/topology/core_id
 		/sys/devices/system/cpu/cpuX/topology/physical_package_id
 		/sys/devices/system/cpu/cpuX/topology/thread_siblings
 		/sys/devices/system/cpu/cpuX/topology/thread_siblings_list
+		/sys/devices/system/cpu/cpuX/topology/ppin
 Date:		December 2008
 Contact:	Linux kernel mailing list <[email protected]>
 Description:	CPU topology files that describe a logical CPU's relationship
@@ -103,6 +104,11 @@ Description:	CPU topology files that describe a logical CPU's relationship
 		thread_siblings_list: human-readable list of cpuX's hardware
 		threads within the same core as cpuX
 
+		ppin: human-readable Protected Processor Identification
+		Number of the socket the cpu# belongs to. There should be
+		one per physical_package_id. File is readable only to
+		admin.
+
 		See Documentation/admin-guide/cputopology.rst for more information.
 
 
@@ -662,6 +668,7 @@ Description:	Preferred MTE tag checking mode
 
 		================  ==============================================
 		"sync"	  	  Prefer synchronous mode
+		"asymm"	  	  Prefer asymmetric mode
 		"async"	  	  Prefer asynchronous mode
 		================  ==============================================
 
diff --git a/Documentation/Makefile b/Documentation/Makefile
index 9f4bd42cef18..64d44c1ecad3 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -26,7 +26,7 @@ SPHINX_CONF   = conf.py
 PAPER         =
 BUILDDIR      = $(obj)/output
 PDFLATEX      = xelatex
-LATEXOPTS     = -interaction=batchmode
+LATEXOPTS     = -interaction=batchmode -no-shell-escape
 
 ifeq ($(KBUILD_VERBOSE),0)
 SPHINXOPTS    += "-q"
diff --git a/Documentation/admin-guide/acpi/fan_performance_states.rst b/Documentation/admin-guide/acpi/fan_performance_states.rst
index 98fe5c333121..b9e4b4d146c1 100644
--- a/Documentation/admin-guide/acpi/fan_performance_states.rst
+++ b/Documentation/admin-guide/acpi/fan_performance_states.rst
@@ -60,3 +60,31 @@ For example::
 
 When a given field is not populated or its value provided by the platform
 firmware is invalid, the "not-defined" string is shown instead of the value.
+
+ACPI Fan Fine Grain Control
+=============================
+
+When _FIF object specifies support for fine grain control, then fan speed
+can be set from 0 to 100% with the recommended minimum "step size" via
+_FSL object. User can adjust fan speed using thermal sysfs cooling device.
+
+Here use can look at fan performance states for a reference speed (speed_rpm)
+and set it by changing cooling device cur_state. If the fine grain control
+is supported then user can also adjust to some other speeds which are
+not defined in the performance states.
+
+The support of fine grain control is presented via sysfs attribute
+"fine_grain_control". If fine grain control is present, this attribute
+will show "1" otherwise "0".
+
+This sysfs attribute is presented in the same directory as performance states.
+
+ACPI Fan Performance Feedback
+=============================
+
+The optional _FST object provides status information for the fan device.
+This includes field to provide current fan speed in revolutions per minute
+at which the fan is rotating.
+
+This speed is presented in the sysfs using the attribute "fan_speed_rpm",
+in the same directory as performance states.
diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index 3e11926a4df9..54fe63745ed8 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -315,8 +315,8 @@ To use the feature, admin should set up backing device via::
 
 	echo /dev/sda5 > /sys/block/zramX/backing_dev
 
-before disksize setting. It supports only partition at this moment.
-If admin wants to use incompressible page writeback, they could do via::
+before disksize setting. It supports only partitions at this moment.
+If admin wants to use incompressible page writeback, they could do it via::
 
 	echo huge > /sys/block/zramX/writeback
 
@@ -341,9 +341,9 @@ Admin can request writeback of those idle pages at right timing via::
 
 	echo idle > /sys/block/zramX/writeback
 
-With the command, zram writeback idle pages from memory to the storage.
+With the command, zram will writeback idle pages from memory to the storage.
 
-If admin want to write a specific page in zram device to backing device,
+If an admin wants to write a specific page in zram device to the backing device,
 they could write a page index into the interface.
 
 	echo "page_index=1251" > /sys/block/zramX/writeback
@@ -354,7 +354,7 @@ to guarantee storage health for entire product life.
 
 To overcome the concern, zram supports "writeback_limit" feature.
 The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
-any writeback. IOW, if admin wants to apply writeback budget, he should
+any writeback. IOW, if admin wants to apply writeback budget, they should
 enable writeback_limit_enable via::
 
 	$ echo 1 > /sys/block/zramX/writeback_limit_enable
@@ -365,7 +365,7 @@ until admin sets the budget via /sys/block/zramX/writeback_limit.
 (If admin doesn't enable writeback_limit_enable, writeback_limit's value
 assigned via /sys/block/zramX/writeback_limit is meaningless.)
 
-If admin want to limit writeback as per-day 400M, he could do it
+If admin wants to limit writeback as per-day 400M, they could do it
 like below::
 
 	$ MB_SHIFT=20
@@ -375,16 +375,16 @@ like below::
 	$ echo 1 > /sys/block/zram0/writeback_limit_enable
 
 If admins want to allow further write again once the budget is exhausted,
-he could do it like below::
+they could do it like below::
 
 	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
 		/sys/block/zram0/writeback_limit
 
-If admin wants to see remaining writeback budget since last set::
+If an admin wants to see the remaining writeback budget since last set::
 
 	$ cat /sys/block/zramX/writeback_limit
 
-If admin want to disable writeback limit, he could do::
+If an admin wants to disable writeback limit, they could do::
 
 	$ echo 0 > /sys/block/zramX/writeback_limit_enable
 
@@ -393,7 +393,7 @@ system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
 writeback happened until you reset the zram to allocate extra writeback
 budget in next setting is user's job.
 
-If admin wants to measure writeback count in a certain period, he could
+If admin wants to measure writeback count in a certain period, they could
 know it via /sys/block/zram0/bd_stat's 3rd column.
 
 memory tracking
diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index a2b22d5640ec..9e9556826450 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -60,8 +60,8 @@ privileged data touched during the speculative execution.
 Spectre variant 1 attacks take advantage of speculative execution of
 conditional branches, while Spectre variant 2 attacks use speculative
 execution of indirect branches to leak privileged memory.
-See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[7] <spec_ref7>`
-:ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
+See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[6] <spec_ref6>`
+:ref:`[7] <spec_ref7>` :ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
 
 Spectre variant 1 (Bounds Check Bypass)
 ---------------------------------------
@@ -131,6 +131,19 @@ steer its indirect branch speculations to gadget code, and measure the
 speculative execution's side effects left in level 1 cache to infer the
 victim's data.
 
+Yet another variant 2 attack vector is for the attacker to poison the
+Branch History Buffer (BHB) to speculatively steer an indirect branch
+to a specific Branch Target Buffer (BTB) entry, even if the entry isn't
+associated with the source address of the indirect branch. Specifically,
+the BHB might be shared across privilege levels even in the presence of
+Enhanced IBRS.
+
+Currently the only known real-world BHB attack vector is via
+unprivileged eBPF. Therefore, it's highly recommended to not enable
+unprivileged eBPF, especially when eIBRS is used (without retpolines).
+For a full mitigation against BHB attacks, it's recommended to use
+retpolines (or eIBRS combined with retpolines).
+
 Attack scenarios
 ----------------
 
@@ -364,13 +377,15 @@ The possible values in this file are:
 
   - Kernel status:
 
-  ====================================  =================================
-  'Not affected'                        The processor is not vulnerable
-  'Vulnerable'                          Vulnerable, no mitigation
-  'Mitigation: Full generic retpoline'  Software-focused mitigation
-  'Mitigation: Full AMD retpoline'      AMD-specific software mitigation
-  'Mitigation: Enhanced IBRS'           Hardware-focused mitigation
-  ====================================  =================================
+  ========================================  =================================
+  'Not affected'                            The processor is not vulnerable
+  'Mitigation: None'                        Vulnerable, no mitigation
+  'Mitigation: Retpolines'                  Use Retpoline thunks
+  'Mitigation: LFENCE'                      Use LFENCE instructions
+  'Mitigation: Enhanced IBRS'               Hardware-focused mitigation
+  'Mitigation: Enhanced IBRS + Retpolines'  Hardware-focused + Retpolines
+  'Mitigation: Enhanced IBRS + LFENCE'      Hardware-focused + LFENCE
+  ========================================  =================================
 
   - Firmware status: Show if Indirect Branch Restricted Speculation (IBRS) is
     used to protect against Spectre variant 2 attacks when calling firmware (x86 only).
@@ -583,12 +598,13 @@ kernel command line.
 
 		Specific mitigations can also be selected manually:
 
-		retpoline
-					replace indirect branches
-		retpoline,generic
-					google's original retpoline
-		retpoline,amd
-					AMD-specific minimal thunk
+                retpoline               auto pick between generic,lfence
+                retpoline,generic       Retpolines
+                retpoline,lfence        LFENCE; indirect branch
+                retpoline,amd           alias for retpoline,lfence
+                eibrs                   enhanced IBRS
+                eibrs,retpoline         enhanced IBRS + Retpolines
+                eibrs,lfence            enhanced IBRS + LFENCE
 
 		Not specifying this option is equivalent to
 		spectre_v2=auto.
@@ -599,7 +615,7 @@ kernel command line.
 		spectre_v2=off. Spectre variant 1 mitigations
 		cannot be disabled.
 
-For spectre_v2_user see :doc:`/admin-guide/kernel-parameters`.
+For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
 
 Mitigation selection guide
 --------------------------
@@ -681,7 +697,7 @@ AMD white papers:
 
 .. _spec_ref6:
 
-[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/90343-B_SoftwareTechniquesforManagingSpeculation_WP_7-18Update_FNL.pdf>`_.
+[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/Managing-Speculation-on-AMD-Processors.pdf>`_.
 
 ARM white papers:
 
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 1bedab498104..5bfafcbb9562 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -35,6 +35,7 @@ problems and bugs in particular.
    :maxdepth: 1
 
    reporting-issues
+   reporting-regressions
    security-bugs
    bug-hunting
    bug-bisect
diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst
index 9b14b0c2c9c4..609a3201fd4e 100644
--- a/Documentation/admin-guide/iostats.rst
+++ b/Documentation/admin-guide/iostats.rst
@@ -76,7 +76,7 @@ Field  3 -- # of sectors read (unsigned long)
 
 Field  4 -- # of milliseconds spent reading (unsigned int)
     This is the total number of milliseconds spent by all reads (as
-    measured from __make_request() to end_that_request_last()).
+    measured from blk_mq_alloc_request() to __blk_mq_end_request()).
 
 Field  5 -- # of writes completed (unsigned long)
     This is the total number of writes completed successfully.
@@ -89,7 +89,7 @@ Field  7 -- # of sectors written (unsigned long)
 
 Field  8 -- # of milliseconds spent writing (unsigned int)
     This is the total number of milliseconds spent by all writes (as
-    measured from __make_request() to end_that_request_last()).
+    measured from blk_mq_alloc_request() to __blk_mq_end_request()).
 
 Field  9 -- # of I/Os currently in progress (unsigned int)
     The only field that should go to zero. Incremented as requests are
@@ -120,7 +120,7 @@ Field 14 -- # of sectors discarded (unsigned long)
 
 Field 15 -- # of milliseconds spent discarding (unsigned int)
     This is the total number of milliseconds spent by all discards (as
-    measured from __make_request() to end_that_request_last()).
+    measured from blk_mq_alloc_request() to __blk_mq_end_request()).
 
 Field 16 -- # of flush requests completed
     This is the total number of flush requests completed successfully.
diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
index 3861a25faae1..8419019b6a88 100644
--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@@ -494,6 +494,14 @@ architecture which is used to lookup the page-tables for the Virtual
 addresses in the higher VA range (refer to ARMv8 ARM document for
 more details).
 
+MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END
+-----------------------------------------------------------------------------
+
+Used to get the correct ranges:
+	MODULES_VADDR ~ MODULES_END-1 : Kernel module space.
+	VMALLOC_START ~ VMALLOC_END-1 : vmalloc() / ioremap() space.
+	VMEMMAP_START ~ VMEMMAP_END-1 : vmemmap region, used for struct page array.
+
 arm
 ===
 
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f5a27f067db9..c2d1f8b5e8f3 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2827,6 +2827,9 @@
 
 			For details see: Documentation/admin-guide/hw-vuln/mds.rst
 
+	mem=nn[KMG]	[HEXAGON] Set the memory size.
+			Must be specified, otherwise memory size will be 0.
+
 	mem=nn[KMG]	[KNL,BOOT] Force usage of a specific amount of memory
 			Amount of memory to be used in cases as follows:
 
@@ -2834,6 +2837,13 @@
 			2 when the kernel is not able to see the whole system memory;
 			3 memory that lies after 'mem=' boundary is excluded from
 			 the hypervisor, then assigned to KVM guests.
+			4 to limit the memory available for kdump kernel.
+
+			[ARC,MICROBLAZE] - the limit applies only to low memory,
+			high memory is not affected.
+
+			[ARM64] - only limits memory covered by the linear
+			mapping. The NOMAP regions are not affected.
 
 			[X86] Work as limiting max address. Use together
 			with memmap= to avoid physical address space collisions.
@@ -2844,6 +2854,14 @@
 			in above case 3, memory may need be hot added after boot
 			if system memory of hypervisor is not sufficient.
 
+	mem=nn[KMG]@ss[KMG]
+			[ARM,MIPS] - override the memory layout reported by
+			firmware.
+			Define a memory region of size nn[KMG] starting at
+			ss[KMG].
+			Multiple different regions can be specified with
+			multiple mem= parameters on the command line.
+
 	mem=nopentium	[BUGS=X86-32] Disable usage of 4MB pages for kernel
 			memory.
 
@@ -4504,6 +4522,8 @@
 			(the least-favored priority).  Otherwise, when
 			RCU_BOOST is not set, valid values are 0-99 and
 			the default is zero (non-realtime operation).
+			When RCU_NOCB_CPU is set, also adjust the
+			priority of NOCB callback kthreads.
 
 	rcutree.rcu_nocb_gp_stride= [KNL]
 			Set the number of NOCB callback kthreads in
@@ -5361,8 +5381,12 @@
 			Specific mitigations can also be selected manually:
 
 			retpoline	  - replace indirect branches
-			retpoline,generic - google's original retpoline
-			retpoline,amd     - AMD-specific minimal thunk
+			retpoline,generic - Retpolines
+			retpoline,lfence  - LFENCE; indirect branch
+			retpoline,amd     - alias for retpoline,lfence
+			eibrs		  - enhanced IBRS
+			eibrs,retpoline   - enhanced IBRS + Retpolines
+			eibrs,lfence      - enhanced IBRS + LFENCE
 
 			Not specifying this option is equivalent to
 			spectre_v2=auto.
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index 5a8f2529a033..69b23f087c05 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -8,6 +8,7 @@ Performance monitor support
    :maxdepth: 1
 
    hisi-pmu
+   hisi-pcie-pmu
    imx-ddr
    qcom_l2_pmu
    qcom_l3_pmu
diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
index 2f066df4ee9c..1923cb25073b 100644
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@@ -369,6 +369,32 @@ governor (for the policies it is attached to), or by the ``CPUFreq`` core (for t
 policies with other scaling governors).
 
 
+Tracer Tool
+-------------
+
+``amd_pstate_tracer.py`` can record and parse ``amd-pstate`` trace log, then
+generate performance plots. This utility can be used to debug and tune the
+performance of ``amd-pstate`` driver. The tracer tool needs to import intel
+pstate tracer.
+
+Tracer tool located in ``linux/tools/power/x86/amd_pstate_tracer``. It can be
+used in two ways. If trace file is available, then directly parse the file
+with command ::
+
+ ./amd_pstate_trace.py [-c cpus] -t <trace_file> -n <test_name>
+
+Or generate trace file with root privilege, then parse and plot with command ::
+
+ sudo ./amd_pstate_trace.py [-c cpus] -n <test_name> -i <interval> [-m kbytes]
+
+The test result can be found in ``results/test_name``. Following is the example
+about part of the output. ::
+
+ common_cpu  common_secs  common_usecs  min_perf  des_perf  max_perf  freq    mperf   apef    tsc       load   duration_ms  sample_num  elapsed_time  common_comm
+ CPU_005     712          116384        39        49        166       0.7565  9645075 2214891 38431470  25.1   11.646       469         2.496         kworker/5:0-40
+ CPU_006     712          116408        39        49        166       0.6769  8950227 1839034 37192089  24.06  11.272       470         2.496         kworker/6:0-1264
+
+
 Reference
 ===========
 
diff --git a/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst b/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst
new file mode 100644
index 000000000000..09169d935835
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst
@@ -0,0 +1,60 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==============================
+Intel Uncore Frequency Scaling
+==============================
+
+:Copyright: |copy| 2022 Intel Corporation
+
+:Author: Srinivas Pandruvada <[email protected]>
+
+Introduction
+------------
+
+The uncore can consume significant amount of power in Intel's Xeon servers based
+on the workload characteristics. To optimize the total power and improve overall
+performance, SoCs have internal algorithms for scaling uncore frequency. These
+algorithms monitor workload usage of uncore and set a desirable frequency.
+
+It is possible that users have different expectations of uncore performance and
+want to have control over it. The objective is similar to allowing users to set
+the scaling min/max frequencies via cpufreq sysfs to improve CPU performance.
+Users may have some latency sensitive workloads where they do not want any
+change to uncore frequency. Also, users may have workloads which require
+different core and uncore performance at distinct phases and they may want to
+use both cpufreq and the uncore scaling interface to distribute power and
+improve overall performance.
+
+Sysfs Interface
+---------------
+
+To control uncore frequency, a sysfs interface is provided in the directory:
+`/sys/devices/system/cpu/intel_uncore_frequency/`.
+
+There is one directory for each package and die combination as the scope of
+uncore scaling control is per die in multiple die/package SoCs or per
+package for single die per package SoCs. The name represents the
+scope of control. For example: 'package_00_die_00' is for package id 0 and
+die 0.
+
+Each package_*_die_* contains the following attributes:
+
+``initial_max_freq_khz``
+	Out of reset, this attribute represent the maximum possible frequency.
+	This is a read-only attribute. If users adjust max_freq_khz,
+	they can always go back to maximum using the value from this attribute.
+
+``initial_min_freq_khz``
+	Out of reset, this attribute represent the minimum possible frequency.
+	This is a read-only attribute. If users adjust min_freq_khz,
+	they can always go back to minimum using the value from this attribute.
+
+``max_freq_khz``
+	This attribute is used to set the maximum uncore frequency.
+
+``min_freq_khz``
+	This attribute is used to set the minimum uncore frequency.
+
+``current_freq_khz``
+	This attribute is used to get the current uncore frequency.
diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst
index 5d2757e2de65..ee45887811ff 100644
--- a/Documentation/admin-guide/pm/working-state.rst
+++ b/Documentation/admin-guide/pm/working-state.rst
@@ -15,3 +15,4 @@ Working-State Power Management
    cpufreq_drivers
    intel_epb
    intel-speed-select
+   intel_uncore_frequency_scaling
diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst
index d7ac13f789cc..ec62151fe672 100644
--- a/Documentation/admin-guide/reporting-issues.rst
+++ b/Documentation/admin-guide/reporting-issues.rst
@@ -1,14 +1,5 @@
 .. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
-..
-   If you want to distribute this text under CC-BY-4.0 only, please use 'The
-   Linux kernel developers' for author attribution and link this as source:
-   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst
-..
-   Note: Only the content of this RST file as found in the Linux kernel sources
-   is available under CC-BY-4.0, as versions of this text that were processed
-   (for example by the kernel's build system) might contain content taken from
-   files which use a more restrictive license.
-
+.. See the bottom of this file for additional redistribution information.
 
 Reporting issues
 ++++++++++++++++
@@ -395,22 +386,16 @@ fixed as soon as possible, hence there are 'issues of high priority' that get
 handled slightly differently in the reporting process. Three type of cases
 qualify: regressions, security issues, and really severe problems.
 
-You deal with a 'regression' if something that worked with an older version of
-the Linux kernel does not work with a newer one or somehow works worse with it.
-It thus is a regression when a WiFi driver that did a fine job with Linux 5.7
-somehow misbehaves with 5.8 or doesn't work at all. It's also a regression if
-an application shows erratic behavior with a newer kernel, which might happen
-due to incompatible changes in the interface between the kernel and the
-userland (like procfs and sysfs). Significantly reduced performance or
-increased power consumption also qualify as regression. But keep in mind: the
-new kernel needs to be built with a configuration that is similar to the one
-from the old kernel (see below how to achieve that). That's because the kernel
-developers sometimes can not avoid incompatibilities when implementing new
-features; but to avoid regressions such features have to be enabled explicitly
-during build time configuration.
+You deal with a regression if some application or practical use case running
+fine with one Linux kernel works worse or not at all with a newer version
+compiled using a similar configuration. The document
+Documentation/admin-guide/reporting-regressions.rst explains this in more
+detail. It also provides a good deal of other information about regressions you
+might want to be aware of; it for example explains how to add your issue to the
+list of tracked regressions, to ensure it won't fall through the cracks.
 
 What qualifies as security issue is left to your judgment. Consider reading
-'Documentation/admin-guide/security-bugs.rst' before proceeding, as it
+Documentation/admin-guide/security-bugs.rst before proceeding, as it
 provides additional details how to best handle security issues.
 
 An issue is a 'really severe problem' when something totally unacceptably bad
@@ -517,7 +502,7 @@ line starting with 'CPU:'. It should end with 'Not tainted' if the kernel was
 not tainted when it noticed the problem; it was tainted if you see 'Tainted:'
 followed by a few spaces and some letters.
 
-If your kernel is tainted, study 'Documentation/admin-guide/tainted-kernels.rst'
+If your kernel is tainted, study Documentation/admin-guide/tainted-kernels.rst
 to find out why. Try to eliminate the reason. Often it's caused by one these
 three things:
 
@@ -1043,7 +1028,7 @@ down the culprit, as maintainers often won't have the time or setup at hand to
 reproduce it themselves.
 
 To find the change there is a process called 'bisection' which the document
-'Documentation/admin-guide/bug-bisect.rst' describes in detail. That process
+Documentation/admin-guide/bug-bisect.rst describes in detail. That process
 will often require you to build about ten to twenty kernel images, trying to
 reproduce the issue with each of them before building the next. Yes, that takes
 some time, but don't worry, it works a lot quicker than most people assume.
@@ -1073,10 +1058,11 @@ When dealing with regressions make sure the issue you face is really caused by
 the kernel and not by something else, as outlined above already.
 
 In the whole process keep in mind: an issue only qualifies as regression if the
-older and the newer kernel got built with a similar configuration. The best way
-to archive this: copy the configuration file (``.config``) from the old working
-kernel freshly to each newer kernel version you try. Afterwards run ``make
-olddefconfig`` to adjust it for the needs of the new version.
+older and the newer kernel got built with a similar configuration. This can be
+achieved by using ``make olddefconfig``, as explained in more detail by
+Documentation/admin-guide/reporting-regressions.rst; that document also
+provides a good deal of other information about regressions you might want to be
+aware of.
 
 
 Write and send the report
@@ -1283,7 +1269,7 @@ them when sending the report by mail. If you filed it in a bug tracker, forward
 the report's text to these addresses; but on top of it put a small note where
 you mention that you filed it with a link to the ticket.
 
-See 'Documentation/admin-guide/security-bugs.rst' for more information.
+See Documentation/admin-guide/security-bugs.rst for more information.
 
 
 Duties after the report went out
@@ -1571,7 +1557,7 @@ Once your report is out your might get asked to do a proper one, as it allows to
 pinpoint the exact change that causes the issue (which then can easily get
 reverted to fix the issue quickly). Hence consider to do a proper bisection
 right away if time permits. See the section 'Special care for regressions' and
-the document 'Documentation/admin-guide/bug-bisect.rst' for details how to
+the document Documentation/admin-guide/bug-bisect.rst for details how to
 perform one. In case of a successful bisection add the author of the culprit to
 the recipients; also CC everyone in the signed-off-by chain, which you find at
 the end of its commit message.
@@ -1594,7 +1580,7 @@ Some fixes are too complex
 Even small and seemingly obvious code-changes sometimes introduce new and
 totally unexpected problems. The maintainers of the stable and longterm kernels
 are very aware of that and thus only apply changes to these kernels that are
-within rules outlined in 'Documentation/process/stable-kernel-rules.rst'.
+within rules outlined in Documentation/process/stable-kernel-rules.rst.
 
 Complex or risky changes for example do not qualify and thus only get applied
 to mainline. Other fixes are easy to get backported to the newest stable and
@@ -1756,10 +1742,23 @@ art will lay some groundwork to improve the situation over time.
 
 
 ..
-   This text is maintained by Thorsten Leemhuis <[email protected]>. If you
-   spot a typo or small mistake, feel free to let him know directly and he'll
-   fix it. You are free to do the same in a mostly informal way if you want
-   to contribute changes to the text, but for copyright reasons please CC
+   end-of-content
+..
+   This document is maintained by Thorsten Leemhuis <[email protected]>. If
+   you spot a typo or small mistake, feel free to let him know directly and
+   he'll fix it. You are free to do the same in a mostly informal way if you
+   want to contribute changes to the text, but for copyright reasons please CC
    [email protected] and "sign-off" your contribution as
    Documentation/process/submitting-patches.rst outlines in the section "Sign
    your work - the Developer's Certificate of Origin".
+..
+   This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+   of the file. If you want to distribute this text under CC-BY-4.0 only,
+   please use "The Linux kernel developers" for author attribution and link
+   this as source:
+   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst
+..
+   Note: Only the content of this RST file as found in the Linux kernel sources
+   is available under CC-BY-4.0, as versions of this text that were processed
+   (for example by the kernel's build system) might contain content taken from
+   files which use a more restrictive license.
diff --git a/Documentation/admin-guide/reporting-regressions.rst b/Documentation/admin-guide/reporting-regressions.rst
new file mode 100644
index 000000000000..d8adccdae23f
--- /dev/null
+++ b/Documentation/admin-guide/reporting-regressions.rst
@@ -0,0 +1,451 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. [see the bottom of this file for redistribution information]
+
+Reporting regressions
++++++++++++++++++++++
+
+"*We don't cause regressions*" is the first rule of Linux kernel development;
+Linux founder and lead developer Linus Torvalds established it himself and
+ensures it's obeyed.
+
+This document describes what the rule means for users and how the Linux kernel's
+development model ensures to address all reported regressions; aspects relevant
+for kernel developers are left to Documentation/process/handling-regressions.rst.
+
+
+The important bits (aka "TL;DR")
+================================
+
+#. It's a regression if something running fine with one Linux kernel works worse
+   or not at all with a newer version. Note, the newer kernel has to be compiled
+   using a similar configuration; the detailed explanations below describes this
+   and other fine print in more detail.
+
+#. Report your issue as outlined in Documentation/admin-guide/reporting-issues.rst,
+   it already covers all aspects important for regressions and repeated
+   below for convenience. Two of them are important: start your report's subject
+   with "[REGRESSION]" and CC or forward it to `the regression mailing list
+   <https://lore.kernel.org/regressions/>`_ ([email protected]).
+
+#. Optional, but recommended: when sending or forwarding your report, make the
+   Linux kernel regression tracking bot "regzbot" track the issue by specifying
+   when the regression started like this::
+
+       #regzbot introduced v5.13..v5.14-rc1
+
+
+All the details on Linux kernel regressions relevant for users
+==============================================================
+
+
+The important basics
+--------------------
+
+
+What is a "regression" and what is the "no regressions rule"?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It's a regression if some application or practical use case running fine with
+one Linux kernel works worse or not at all with a newer version compiled using a
+similar configuration. The "no regressions rule" forbids this to take place; if
+it happens by accident, developers that caused it are expected to quickly fix
+the issue.
+
+It thus is a regression when a WiFi driver from Linux 5.13 works fine, but with
+5.14 doesn't work at all, works significantly slower, or misbehaves somehow.
+It's also a regression if a perfectly working application suddenly shows erratic
+behavior with a newer kernel version; such issues can be caused by changes in
+procfs, sysfs, or one of the many other interfaces Linux provides to userland
+software. But keep in mind, as mentioned earlier: 5.14 in this example needs to
+be built from a configuration similar to the one from 5.13. This can be achieved
+using ``make olddefconfig``, as explained in more detail below.
+
+Note the "practical use case" in the first sentence of this section: developers
+despite the "no regressions" rule are free to change any aspect of the kernel
+and even APIs or ABIs to userland, as long as no existing application or use
+case breaks.
+
+Also be aware the "no regressions" rule covers only interfaces the kernel
+provides to the userland. It thus does not apply to kernel-internal interfaces
+like the module API, which some externally developed drivers use to hook into
+the kernel.
+
+How do I report a regression?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Just report the issue as outlined in
+Documentation/admin-guide/reporting-issues.rst, it already describes the
+important points. The following aspects outlined there are especially relevant
+for regressions:
+
+ * When checking for existing reports to join, also search the `archives of the
+   Linux regressions mailing list <https://lore.kernel.org/regressions/>`_ and
+   `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_.
+
+ * Start your report's subject with "[REGRESSION]".
+
+ * In your report, clearly mention the last kernel version that worked fine and
+   the first broken one. Ideally try to find the exact change causing the
+   regression using a bisection, as explained below in more detail.
+
+ * Remember to let the Linux regressions mailing list
+   ([email protected]) know about your report:
+
+   * If you report the regression by mail, CC the regressions list.
+
+   * If you report your regression to some bug tracker, forward the submitted
+     report by mail to the regressions list while CCing the maintainer and the
+     mailing list for the subsystem in question.
+
+   If it's a regression within a stable or longterm series (e.g.
+   v5.15.3..v5.15.5), remember to CC the `Linux stable mailing list
+   <https://lore.kernel.org/stable/>`_ ([email protected]).
+
+  In case you performed a successful bisection, add everyone to the CC the
+  culprit's commit message mentions in lines starting with "Signed-off-by:".
+
+When CCing for forwarding your report to the list, consider directly telling the
+aforementioned Linux kernel regression tracking bot about your report. To do
+that, include a paragraph like this in your mail::
+
+       #regzbot introduced: v5.13..v5.14-rc1
+
+Regzbot will then consider your mail a report for a regression introduced in the
+specified version range. In above case Linux v5.13 still worked fine and Linux
+v5.14-rc1 was the first version where you encountered the issue. If you
+performed a bisection to find the commit that caused the regression, specify the
+culprit's commit-id instead::
+
+       #regzbot introduced: 1f2e3d4c5d
+
+Placing such a "regzbot command" is in your interest, as it will ensure the
+report won't fall through the cracks unnoticed. If you omit this, the Linux
+kernel's regressions tracker will take care of telling regzbot about your
+regression, as long as you send a copy to the regressions mailing lists. But the
+regression tracker is just one human which sometimes has to rest or occasionally
+might even enjoy some time away from computers (as crazy as that might sound).
+Relying on this person thus will result in an unnecessary delay before the
+regressions becomes mentioned `on the list of tracked and unresolved Linux
+kernel regressions <https://linux-regtracking.leemhuis.info/regzbot/>`_ and the
+weekly regression reports sent by regzbot. Such delays can result in Linus
+Torvalds being unaware of important regressions when deciding between "continue
+development or call this finished and release the final?".
+
+Are really all regressions fixed?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Nearly all of them are, as long as the change causing the regression (the
+"culprit commit") is reliably identified. Some regressions can be fixed without
+this, but often it's required.
+
+Who needs to find the root cause of a regression?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Developers of the affected code area should try to locate the culprit on their
+own. But for them that's often impossible to do with reasonable effort, as quite
+a lot of issues only occur in a particular environment outside the developer's
+reach -- for example, a specific hardware platform, firmware, Linux distro,
+system's configuration, or application. That's why in the end it's often up to
+the reporter to locate the culprit commit; sometimes users might even need to
+run additional tests afterwards to pinpoint the exact root cause. Developers
+should offer advice and reasonably help where they can, to make this process
+relatively easy and achievable for typical users.
+
+How can I find the culprit?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Perform a bisection, as roughly outlined in
+Documentation/admin-guide/reporting-issues.rst and described in more detail by
+Documentation/admin-guide/bug-bisect.rst. It might sound like a lot of work, but
+in many cases finds the culprit relatively quickly. If it's hard or
+time-consuming to reliably reproduce the issue, consider teaming up with other
+affected users to narrow down the search range together.
+
+Who can I ask for advice when it comes to regressions?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Send a mail to the regressions mailing list ([email protected]) while
+CCing the Linux kernel's regression tracker ([email protected]); if the
+issue might better be dealt with in private, feel free to omit the list.
+
+
+Additional details about regressions
+------------------------------------
+
+
+What is the goal of the "no regressions rule"?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Users should feel safe when updating kernel versions and not have to worry
+something might break. This is in the interest of the kernel developers to make
+updating attractive: they don't want users to stay on stable or longterm Linux
+series that are either abandoned or more than one and a half years old. That's
+in everybody's interest, as `those series might have known bugs, security
+issues, or other problematic aspects already fixed in later versions
+<http://www.kroah.com/log/blog/2018/08/24/what-stable-kernel-should-i-use/>`_.
+Additionally, the kernel developers want to make it simple and appealing for
+users to test the latest pre-release or regular release. That's also in
+everybody's interest, as it's a lot easier to track down and fix problems, if
+they are reported shortly after being introduced.
+
+Is the "no regressions" rule really adhered in practice?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It's taken really seriously, as can be seen by many mailing list posts from
+Linux creator and lead developer Linus Torvalds, some of which are quoted in
+Documentation/process/handling-regressions.rst.
+
+Exceptions to this rule are extremely rare; in the past developers almost always
+turned out to be wrong when they assumed a particular situation was warranting
+an exception.
+
+Who ensures the "no regressions" is actually followed?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The subsystem maintainers should take care of that, which are watched and
+supported by the tree maintainers -- e.g. Linus Torvalds for mainline and
+Greg Kroah-Hartman et al. for various stable/longterm series.
+
+All of them are helped by people trying to ensure no regression report falls
+through the cracks. One of them is Thorsten Leemhuis, who's currently acting as
+the Linux kernel's "regressions tracker"; to facilitate this work he relies on
+regzbot, the Linux kernel regression tracking bot. That's why you want to bring
+your report on the radar of these people by CCing or forwarding each report to
+the regressions mailing list, ideally with a "regzbot command" in your mail to
+get it tracked immediately.
+
+How quickly are regressions normally fixed?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Developers should fix any reported regression as quickly as possible, to provide
+affected users with a solution in a timely manner and prevent more users from
+running into the issue; nevertheless developers need to take enough time and
+care to ensure regression fixes do not cause additional damage.
+
+The answer thus depends on various factors like the impact of a regression, its
+age, or the Linux series in which it occurs. In the end though, most regressions
+should be fixed within two weeks.
+
+Is it a regression, if the issue can be avoided by updating some software?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Almost always: yes. If a developer tells you otherwise, ask the regression
+tracker for advice as outlined above.
+
+Is it a regression, if a newer kernel works slower or consumes more energy?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Yes, but the difference has to be significant. A five percent slow-down in a
+micro-benchmark thus is unlikely to qualify as regression, unless it also
+influences the results of a broad benchmark by more than one percent. If in
+doubt, ask for advice.
+
+Is it a regression, if an external kernel module breaks when updating Linux?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+No, as the "no regression" rule is about interfaces and services the Linux
+kernel provides to the userland. It thus does not cover building or running
+externally developed kernel modules, as they run in kernel-space and hook into
+the kernel using internal interfaces occasionally changed.
+
+How are regressions handled that are caused by security fixes?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In extremely rare situations security issues can't be fixed without causing
+regressions; those fixes are given way, as they are the lesser evil in the end.
+Luckily this middling almost always can be avoided, as key developers for the
+affected area and often Linus Torvalds himself try very hard to fix security
+issues without causing regressions.
+
+If you nevertheless face such a case, check the mailing list archives if people
+tried their best to avoid the regression. If not, report it; if in doubt, ask
+for advice as outlined above.
+
+What happens if fixing a regression is impossible without causing another?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Sadly these things happen, but luckily not very often; if they occur, expert
+developers of the affected code area should look into the issue to find a fix
+that avoids regressions or at least their impact. If you run into such a
+situation, do what was outlined already for regressions caused by security
+fixes: check earlier discussions if people already tried their best and ask for
+advice if in doubt.
+
+A quick note while at it: these situations could be avoided, if people would
+regularly give mainline pre-releases (say v5.15-rc1 or -rc3) from each
+development cycle a test run. This is best explained by imagining a change
+integrated between Linux v5.14 and v5.15-rc1 which causes a regression, but at
+the same time is a hard requirement for some other improvement applied for
+5.15-rc1. All these changes often can simply be reverted and the regression thus
+solved, if someone finds and reports it before 5.15 is released. A few days or
+weeks later this solution can become impossible, as some software might have
+started to rely on aspects introduced by one of the follow-up changes: reverting
+all changes would then cause a regression for users of said software and thus is
+out of the question.
+
+Is it a regression, if some feature I relied on was removed months ago?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is, but often it's hard to fix such regressions due to the aspects outlined
+in the previous section. It hence needs to be dealt with on a case-by-case
+basis. This is another reason why it's in everybody's interest to regularly test
+mainline pre-releases.
+
+Does the "no regression" rule apply if I seem to be the only affected person?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It does, but only for practical usage: the Linux developers want to be free to
+remove support for hardware only to be found in attics and museums anymore.
+
+Note, sometimes regressions can't be avoided to make progress -- and the latter
+is needed to prevent Linux from stagnation. Hence, if only very few users seem
+to be affected by a regression, it for the greater good might be in their and
+everyone else's interest to lettings things pass. Especially if there is an
+easy way to circumvent the regression somehow, for example by updating some
+software or using a kernel parameter created just for this purpose.
+
+Does the regression rule apply for code in the staging tree as well?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Not according to the `help text for the configuration option covering all
+staging code <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/staging/Kconfig>`_,
+which since its early days states::
+
+       Please note that these drivers are under heavy development, may or
+       may not work, and may contain userspace interfaces that most likely
+       will be changed in the near future.
+
+The staging developers nevertheless often adhere to the "no regressions" rule,
+but sometimes bend it to make progress. That's for example why some users had to
+deal with (often negligible) regressions when a WiFi driver from the staging
+tree was replaced by a totally different one written from scratch.
+
+Why do later versions have to be "compiled with a similar configuration"?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Because the Linux kernel developers sometimes integrate changes known to cause
+regressions, but make them optional and disable them in the kernel's default
+configuration. This trick allows progress, as the "no regressions" rule
+otherwise would lead to stagnation.
+
+Consider for example a new security feature blocking access to some kernel
+interfaces often abused by malware, which at the same time are required to run a
+few rarely used applications. The outlined approach makes both camps happy:
+people using these applications can leave the new security feature off, while
+everyone else can enable it without running into trouble.
+
+How to create a configuration similar to the one of an older kernel?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Start your machine with a known-good kernel and configure the newer Linux
+version with ``make olddefconfig``. This makes the kernel's build scripts pick
+up the configuration file (the ".config" file) from the running kernel as base
+for the new one you are about to compile; afterwards they set all new
+configuration options to their default value, which should disable new features
+that might cause regressions.
+
+Can I report a regression I found with pre-compiled vanilla kernels?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You need to ensure the newer kernel was compiled with a similar configuration
+file as the older one (see above), as those that built them might have enabled
+some known-to-be incompatible feature for the newer kernel. If in doubt, report
+the matter to the kernel's provider and ask for advice.
+
+
+More about regression tracking with "regzbot"
+---------------------------------------------
+
+What is regression tracking and why should I care about it?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Rules like "no regressions" need someone to ensure they are followed, otherwise
+they are broken either accidentally or on purpose. History has shown this to be
+true for Linux kernel development as well. That's why Thorsten Leemhuis, the
+Linux Kernel's regression tracker, and some people try to ensure all regression
+are fixed by keeping an eye on them until they are resolved. Neither of them are
+paid for this, that's why the work is done on a best effort basis.
+
+Why and how are Linux kernel regressions tracked using a bot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Tracking regressions completely manually has proven to be quite hard due to the
+distributed and loosely structured nature of Linux kernel development process.
+That's why the Linux kernel's regression tracker developed regzbot to facilitate
+the work, with the long term goal to automate regression tracking as much as
+possible for everyone involved.
+
+Regzbot works by watching for replies to reports of tracked regressions.
+Additionally, it's looking out for posted or committed patches referencing such
+reports with "Link:" tags; replies to such patch postings are tracked as well.
+Combined this data provides good insights into the current state of the fixing
+process.
+
+How to see which regressions regzbot tracks currently?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Check out `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_.
+
+What kind of issues are supposed to be tracked by regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The bot is meant to track regressions, hence please don't involve regzbot for
+regular issues. But it's okay for the Linux kernel's regression tracker if you
+involve regzbot to track severe issues, like reports about hangs, corrupted
+data, or internal errors (Panic, Oops, BUG(), warning, ...).
+
+How to change aspects of a tracked regression?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By using a 'regzbot command' in a direct or indirect reply to the mail with the
+report. The easiest way to do that: find the report in your "Sent" folder or the
+mailing list archive and reply to it using your mailer's "Reply-all" function.
+In that mail, use one of the following commands in a stand-alone paragraph (IOW:
+use blank lines to separate one or multiple of these commands from the rest of
+the mail's text).
+
+ * Update when the regression started to happen, for example after performing a
+   bisection::
+
+       #regzbot introduced: 1f2e3d4c5d
+
+ * Set or update the title::
+
+       #regzbot title: foo
+
+ * Monitor a discussion or bugzilla.kernel.org ticket where additions aspects of
+   the issue or a fix are discussed:::
+
+       #regzbot monitor: https://lore.kernel.org/r/[email protected]/
+       #regzbot monitor: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
+
+ * Point to a place with further details of interest, like a mailing list post
+   or a ticket in a bug tracker that are slightly related, but about a different
+   topic::
+
+       #regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
+
+ * Mark a regression as invalid::
+
+       #regzbot invalid: wasn't a regression, problem has always existed
+
+Regzbot supports a few other commands primarily used by developers or people
+tracking regressions. They and more details about the aforementioned regzbot
+commands can be found in the `getting started guide
+<https://gitlab.com/knurd42/regzbot/-/blob/main/docs/getting_started.md>`_ and
+the `reference documentation <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/reference.md>`_
+for regzbot.
+
+..
+   end-of-content
+..
+   This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+   of the file. If you want to distribute this text under CC-BY-4.0 only,
+   please use "The Linux kernel developers" for author attribution and link
+   this as source:
+   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-regressions.rst
+..
+   Note: Only the content of this RST file as found in the Linux kernel sources
+   is available under CC-BY-4.0, as versions of this text that were processed
+   (for example by the kernel's build system) might contain content taken from
+   files which use a more restrictive license.
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index d359bcfadd39..5dd660aac0ae 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1029,23 +1029,17 @@ This is a directory, with the following entries:
 * ``poolsize``: the entropy pool size, in bits;
 
 * ``urandom_min_reseed_secs``: obsolete (used to determine the minimum
-  number of seconds between urandom pool reseeding).
+  number of seconds between urandom pool reseeding). This file is
+  writable for compatibility purposes, but writing to it has no effect
+  on any RNG behavior.
 
 * ``uuid``: a UUID generated every time this is retrieved (this can
   thus be used to generate UUIDs at will);
 
 * ``write_wakeup_threshold``: when the entropy count drops below this
   (as a number of bits), processes waiting to write to ``/dev/random``
-  are woken up.
-
-If ``drivers/char/random.c`` is built with ``ADD_INTERRUPT_BENCH``
-defined, these additional entries are present:
-
-* ``add_interrupt_avg_cycles``: the average number of cycles between
-  interrupts used to feed the pool;
-
-* ``add_interrupt_avg_deviation``: the standard deviation seen on the
-  number of cycles between interrupts used to feed the pool.
+  are woken up. This file is writable for compatibility purposes, but
+  writing to it has no effect on any RNG behavior.
 
 
 randomize_va_space
diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index 52d060caf8bb..29884b261aa9 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -10,9 +10,9 @@ This document is based on the ARM booting document by Russell King and
 is relevant to all public releases of the AArch64 Linux kernel.
 
 The AArch64 exception model is made up of a number of exception levels
-(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
-counterpart.  EL2 is the hypervisor level and exists only in non-secure
-mode. EL3 is the highest priority level and exists only in secure mode.
+(EL0 - EL3), with EL0, EL1 and EL2 having a secure and a non-secure
+counterpart.  EL2 is the hypervisor level, EL3 is the highest priority
+level and exists only in secure mode. Both are architecturally optional.
 
 For the purposes of this document, we will use the term `boot loader`
 simply to define all software that executes on the CPU(s) before control
@@ -167,8 +167,8 @@ Before jumping into the kernel, the following conditions must be met:
 
   All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
   IRQ and FIQ).
-  The CPU must be in either EL2 (RECOMMENDED in order to have access to
-  the virtualisation extensions) or non-secure EL1.
+  The CPU must be in non-secure state, either in EL2 (RECOMMENDED in order
+  to have access to the virtualisation extensions), or in EL1.
 
 - Caches, MMUs
 
diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
index b72ff17d600a..a8f30963e550 100644
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -259,6 +259,11 @@ HWCAP2_RPRES
 
     Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.
 
+HWCAP2_MTE3
+
+    Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
+    by Documentation/arm64/memory-tagging-extension.rst.
+
 4. Unused AT_HWCAP bits
 -----------------------
 
diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst
index 7b99c8f428eb..dd27f78d7608 100644
--- a/Documentation/arm64/memory-tagging-extension.rst
+++ b/Documentation/arm64/memory-tagging-extension.rst
@@ -76,6 +76,9 @@ configurable behaviours:
   with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting
   address is unknown).
 
+- *Asymmetric* - Reads are handled as for synchronous mode while writes
+  are handled as for asynchronous mode.
+
 The user can select the above modes, per thread, using the
 ``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where ``flags``
 contains any number of the following values in the ``PR_MTE_TCF_MASK``
@@ -91,8 +94,9 @@ mode is specified, the program will run in that mode. If multiple
 modes are specified, the mode is selected as described in the "Per-CPU
 preferred tag checking modes" section below.
 
-The current tag check fault mode can be read using the
-``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call.
+The current tag check fault configuration can be read using the
+``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. If
+multiple modes were requested then all will be reported.
 
 Tag checking can also be disabled for a user thread by setting the
 ``PSTATE.TCO`` bit with ``MSR TCO, #1``.
@@ -139,18 +143,25 @@ tag checking mode as the CPU's preferred tag checking mode.
 
 The preferred tag checking mode for each CPU is controlled by
 ``/sys/devices/system/cpu/cpu<N>/mte_tcf_preferred``, to which a
-privileged user may write the value ``async`` or ``sync``.  The default
-preferred mode for each CPU is ``async``.
+privileged user may write the value ``async``, ``sync`` or ``asymm``.  The
+default preferred mode for each CPU is ``async``.
 
 To allow a program to potentially run in the CPU's preferred tag
 checking mode, the user program may set multiple tag check fault mode
 bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL,
-flags, 0, 0, 0)`` system call. If the CPU's preferred tag checking
-mode is in the task's set of provided tag checking modes (this will
-always be the case at present because the kernel only supports two
-tag checking modes, but future kernels may support more modes), that
-mode will be selected. Otherwise, one of the modes in the task's mode
-set will be selected in a currently unspecified manner.
+flags, 0, 0, 0)`` system call. If both synchronous and asynchronous
+modes are requested then asymmetric mode may also be selected by the
+kernel. If the CPU's preferred tag checking mode is in the task's set
+of provided tag checking modes, that mode will be selected. Otherwise,
+one of the modes in the task's mode will be selected by the kernel
+from the task's mode set using the preference order:
+
+	1. Asynchronous
+	2. Asymmetric
+	3. Synchronous
+
+Note that there is no way for userspace to request multiple modes and
+also disable asymmetric mode.
 
 Initial process state
 ---------------------
@@ -213,6 +224,29 @@ address ABI control and MTE configuration of a process as per the
 Documentation/arm64/tagged-address-abi.rst and above. The corresponding
 ``regset`` is 1 element of 8 bytes (``sizeof(long))``).
 
+Core dump support
+-----------------
+
+The allocation tags for user memory mapped with ``PROT_MTE`` are dumped
+in the core file as additional ``PT_ARM_MEMTAG_MTE`` segments. The
+program header for such segment is defined as:
+
+:``p_type``: ``PT_ARM_MEMTAG_MTE``
+:``p_flags``: 0
+:``p_offset``: segment file offset
+:``p_vaddr``: segment virtual address, same as the corresponding
+  ``PT_LOAD`` segment
+:``p_paddr``: 0
+:``p_filesz``: segment size in file, calculated as ``p_mem_sz / 32``
+  (two 4-bit tags cover 32 bytes of memory)
+:``p_memsz``: segment size in memory, same as the corresponding
+  ``PT_LOAD`` segment
+:``p_align``: 0
+
+The tags are stored in the core file at ``p_offset`` as two 4-bit tags
+in a byte. With the tag granule of 16 bytes, a 4K page requires 128
+bytes in the core file.
+
 Example of correct usage
 ========================
 
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index ea281dd75517..466cb9e89047 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -136,7 +136,7 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
 +----------------+-----------------+-----------------+-----------------------------+
-| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
+| Cavium         | ThunderX GICv3  | #23154,38545    | CAVIUM_ERRATUM_23154        |
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX GICv3  | #38539          | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/asm-annotations.rst b/Documentation/asm-annotations.rst
index f4bf0f6395fb..a64f2ca469d4 100644
--- a/Documentation/asm-annotations.rst
+++ b/Documentation/asm-annotations.rst
@@ -130,14 +130,13 @@ denoting a range of code via ``SYM_*_START/END`` annotations.
   In fact, this kind of annotation corresponds to the now deprecated ``ENTRY``
   and ``ENDPROC`` macros.
 
-* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` serve for those
-  who decided to have two or more names for one function. The typical use is::
+* ``SYM_FUNC_ALIAS``, ``SYM_FUNC_ALIAS_LOCAL``, and ``SYM_FUNC_ALIAS_WEAK`` can
+  be used to define multiple names for a function. The typical use is::
 
-    SYM_FUNC_START_ALIAS(__memset)
-    SYM_FUNC_START(memset)
+    SYM_FUNC_START(__memset)
         ... asm insns ...
-    SYM_FUNC_END(memset)
-    SYM_FUNC_END_ALIAS(__memset)
+    SYN_FUNC_END(__memset)
+    SYM_FUNC_ALIAS(memset, __memset)
 
   In this example, one can call ``__memset`` or ``memset`` with the same
   result, except the debug information for the instructions is generated to
diff --git a/Documentation/cdrom/packet-writing.rst b/Documentation/cdrom/packet-writing.rst
index c5c957195a5a..43db58c50d29 100644
--- a/Documentation/cdrom/packet-writing.rst
+++ b/Documentation/cdrom/packet-writing.rst
@@ -11,7 +11,7 @@ Getting started quick
 - Compile and install kernel and modules, reboot.
 
 - You need the udftools package (pktsetup, mkudffs, cdrwtool).
-  Download from http://sourceforge.net/projects/linux-udf/
+  Download from https://github.com/pali/udftools
 
 - Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute
   as appropriate)::
@@ -102,7 +102,7 @@ Using the pktcdvd sysfs interface
 
 Since Linux 2.6.20, the pktcdvd module has a sysfs interface
 and can be controlled by it. For example the "pktcdvd" tool uses
-this interface. (see http://tom.ist-im-web.de/download/pktcdvd )
+this interface. (see http://tom.ist-im-web.de/linux/software/pktcdvd )
 
 "pktcdvd" works similar to "pktsetup", e.g.::
 
diff --git a/Documentation/conf.py b/Documentation/conf.py
index f07f2e9b9f2c..072ee31a301d 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -409,135 +409,25 @@ latex_elements = {
 
     # Additional stuff for the LaTeX preamble.
     'preamble': '''
-	% Prevent column squeezing of tabulary.
-	\\setlength{\\tymin}{20em}
         % Use some font with UTF-8 support with XeLaTeX
         \\usepackage{fontspec}
         \\setsansfont{DejaVu Sans}
         \\setromanfont{DejaVu Serif}
         \\setmonofont{DejaVu Sans Mono}
-        % Adjust \\headheight for fancyhdr
-        \\addtolength{\\headheight}{1.6pt}
-        \\addtolength{\\topmargin}{-1.6pt}
-     ''',
+    ''',
 }
 
-# Translations have Asian (CJK) characters which are only displayed if
-# xeCJK is used
-
-latex_elements['preamble']  += '''
-    \\IfFontExistsTF{Noto Sans CJK SC}{
-	% This is needed for translations
-	\\usepackage{xeCJK}
-	\\IfFontExistsTF{Noto Serif CJK SC}{
-	    \\setCJKmainfont{Noto Serif CJK SC}[AutoFakeSlant]
-	}{
-	    \\setCJKmainfont{Noto Sans CJK SC}[AutoFakeSlant]
-	}
-	\\setCJKsansfont{Noto Sans CJK SC}[AutoFakeSlant]
-	\\setCJKmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]
-	% CJK Language-specific font choices
-	\\IfFontExistsTF{Noto Serif CJK SC}{
-	    \\newCJKfontfamily[SCmain]\\scmain{Noto Serif CJK SC}[AutoFakeSlant]
-	    \\newCJKfontfamily[SCserif]\\scserif{Noto Serif CJK SC}[AutoFakeSlant]
-	}{
-	    \\newCJKfontfamily[SCmain]\\scmain{Noto Sans CJK SC}[AutoFakeSlant]
-	    \\newCJKfontfamily[SCserif]\\scserif{Noto Sans CJK SC}[AutoFakeSlant]
-	}
-	\\newCJKfontfamily[SCsans]\\scsans{Noto Sans CJK SC}[AutoFakeSlant]
-	\\newCJKfontfamily[SCmono]\\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant]
-	\\IfFontExistsTF{Noto Serif CJK TC}{
-	    \\newCJKfontfamily[TCmain]\\tcmain{Noto Serif CJK TC}[AutoFakeSlant]
-	    \\newCJKfontfamily[TCserif]\\tcserif{Noto Serif CJK TC}[AutoFakeSlant]
-	}{
-	    \\newCJKfontfamily[TCmain]\\tcmain{Noto Sans CJK TC}[AutoFakeSlant]
-	    \\newCJKfontfamily[TCserif]\\tcserif{Noto Sans CJK TC}[AutoFakeSlant]
-	}
-	\\newCJKfontfamily[TCsans]\\tcsans{Noto Sans CJK TC}[AutoFakeSlant]
-	\\newCJKfontfamily[TCmono]\\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant]
-	\\IfFontExistsTF{Noto Serif CJK KR}{
-	    \\newCJKfontfamily[KRmain]\\krmain{Noto Serif CJK KR}[AutoFakeSlant]
-	    \\newCJKfontfamily[KRserif]\\krserif{Noto Serif CJK KR}[AutoFakeSlant]
-	}{
-	    \\newCJKfontfamily[KRmain]\\krmain{Noto Sans CJK KR}[AutoFakeSlant]
-	    \\newCJKfontfamily[KRserif]\\krserif{Noto Sans CJK KR}[AutoFakeSlant]
-	}
-	\\newCJKfontfamily[KRsans]\\krsans{Noto Sans CJK KR}[AutoFakeSlant]
-	\\newCJKfontfamily[KRmono]\\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant]
-	\\IfFontExistsTF{Noto Serif CJK JP}{
-	    \\newCJKfontfamily[JPmain]\\jpmain{Noto Serif CJK JP}[AutoFakeSlant]
-	    \\newCJKfontfamily[JPserif]\\jpserif{Noto Serif CJK JP}[AutoFakeSlant]
-	}{
-	    \\newCJKfontfamily[JPmain]\\jpmain{Noto Sans CJK JP}[AutoFakeSlant]
-	    \\newCJKfontfamily[JPserif]\\jpserif{Noto Sans CJK JP}[AutoFakeSlant]
-	}
-	\\newCJKfontfamily[JPsans]\\jpsans{Noto Sans CJK JP}[AutoFakeSlant]
-	\\newCJKfontfamily[JPmono]\\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant]
-	% Dummy commands for Sphinx < 2.3 (no 'extrapackages' support)
-	\\providecommand{\\onehalfspacing}{}
-	\\providecommand{\\singlespacing}{}
-	% Define custom macros to on/off CJK
-	\\newcommand{\\kerneldocCJKon}{\\makexeCJKactive\\onehalfspacing}
-	\\newcommand{\\kerneldocCJKoff}{\\makexeCJKinactive\\singlespacing}
-	\\newcommand{\\kerneldocBeginSC}{%
-	    \\begingroup%
-	    \\scmain%
-	}
-	\\newcommand{\\kerneldocEndSC}{\\endgroup}
-	\\newcommand{\\kerneldocBeginTC}{%
-	    \\begingroup%
-	    \\tcmain%
-	    \\renewcommand{\\CJKrmdefault}{TCserif}%
-	    \\renewcommand{\\CJKsfdefault}{TCsans}%
-	    \\renewcommand{\\CJKttdefault}{TCmono}%
-	}
-	\\newcommand{\\kerneldocEndTC}{\\endgroup}
-	\\newcommand{\\kerneldocBeginKR}{%
-	    \\begingroup%
-	    \\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}%
-	    \\xeCJKDeclareCharClass{HalfRight}{`”,`’}%
-	    \\krmain%
-	    \\renewcommand{\\CJKrmdefault}{KRserif}%
-	    \\renewcommand{\\CJKsfdefault}{KRsans}%
-	    \\renewcommand{\\CJKttdefault}{KRmono}%
-	    \\xeCJKsetup{CJKspace = true} % For inter-phrase space
-	}
-	\\newcommand{\\kerneldocEndKR}{\\endgroup}
-	\\newcommand{\\kerneldocBeginJP}{%
-	    \\begingroup%
-	    \\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}%
-	    \\xeCJKDeclareCharClass{HalfRight}{`”,`’}%
-	    \\jpmain%
-	    \\renewcommand{\\CJKrmdefault}{JPserif}%
-	    \\renewcommand{\\CJKsfdefault}{JPsans}%
-	    \\renewcommand{\\CJKttdefault}{JPmono}%
-	}
-	\\newcommand{\\kerneldocEndJP}{\\endgroup}
-	% Single spacing in literal blocks
-	\\fvset{baselinestretch=1}
-	% To customize \\sphinxtableofcontents
-	\\usepackage{etoolbox}
-	% Inactivate CJK after tableofcontents
-	\\apptocmd{\\sphinxtableofcontents}{\\kerneldocCJKoff}{}{}
-    }{ % No CJK font found
-	% Custom macros to on/off CJK (Dummy)
-	\\newcommand{\\kerneldocCJKon}{}
-	\\newcommand{\\kerneldocCJKoff}{}
-	\\newcommand{\\kerneldocBeginSC}{}
-	\\newcommand{\\kerneldocEndSC}{}
-	\\newcommand{\\kerneldocBeginTC}{}
-	\\newcommand{\\kerneldocEndTC}{}
-	\\newcommand{\\kerneldocBeginKR}{}
-	\\newcommand{\\kerneldocEndKR}{}
-	\\newcommand{\\kerneldocBeginJP}{}
-	\\newcommand{\\kerneldocEndJP}{}
-    }
-'''
-
 # Fix reference escape troubles with Sphinx 1.4.x
 if major == 1:
     latex_elements['preamble']  += '\\renewcommand*{\\DUrole}[2]{ #2 }\n'
 
+
+# Load kerneldoc specific LaTeX settings
+latex_elements['preamble'] += '''
+        % Load kerneldoc specific LaTeX settings
+	\\input{kerneldoc-preamble.sty}
+'''
+
 # With Sphinx 1.6, it is possible to change the Bg color directly
 # by using:
 #	\definecolor{sphinxnoteBgColor}{RGB}{204,255,255}
@@ -599,6 +489,11 @@ for fn in os.listdir('.'):
 # If false, no module index is generated.
 #latex_domain_indices = True
 
+# Additional LaTeX stuff to be copied to build directory
+latex_additional_files = [
+    'sphinx/kerneldoc-preamble.sty',
+]
+
 
 # -- Options for manual page output ---------------------------------------
 
diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst
index 17706dc91ec9..1887d92e8e92 100644
--- a/Documentation/core-api/dma-attributes.rst
+++ b/Documentation/core-api/dma-attributes.rst
@@ -130,11 +130,3 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged
 subsystem that the buffer is fully accessible at the elevated privilege
 level (and ideally inaccessible or at least read-only at the
 lesser-privileged levels).
-
-DMA_ATTR_OVERWRITE
-------------------
-
-This is a hint to the DMA-mapping subsystem that the device is expected to
-overwrite the entire mapped size, thus the caller does not require any of the
-previous buffer contents to be preserved. This allows bounce-buffering
-implementations to optimise DMA_FROM_DEVICE transfers.
diff --git a/Documentation/core-api/entry.rst b/Documentation/core-api/entry.rst
new file mode 100644
index 000000000000..e12f22ab33c7
--- /dev/null
+++ b/Documentation/core-api/entry.rst
@@ -0,0 +1,279 @@
+Entry/exit handling for exceptions, interrupts, syscalls and KVM
+================================================================
+
+All transitions between execution domains require state updates which are
+subject to strict ordering constraints. State updates are required for the
+following:
+
+  * Lockdep
+  * RCU / Context tracking
+  * Preemption counter
+  * Tracing
+  * Time accounting
+
+The update order depends on the transition type and is explained below in
+the transition type sections: `Syscalls`_, `KVM`_, `Interrupts and regular
+exceptions`_, `NMI and NMI-like exceptions`_.
+
+Non-instrumentable code - noinstr
+---------------------------------
+
+Most instrumentation facilities depend on RCU, so intrumentation is prohibited
+for entry code before RCU starts watching and exit code after RCU stops
+watching. In addition, many architectures must save and restore register state,
+which means that (for example) a breakpoint in the breakpoint entry code would
+overwrite the debug registers of the initial breakpoint.
+
+Such code must be marked with the 'noinstr' attribute, placing that code into a
+special section inaccessible to instrumentation and debug facilities. Some
+functions are partially instrumentable, which is handled by marking them
+noinstr and using instrumentation_begin() and instrumentation_end() to flag the
+instrumentable ranges of code:
+
+.. code-block:: c
+
+  noinstr void entry(void)
+  {
+  	handle_entry();     // <-- must be 'noinstr' or '__always_inline'
+	...
+
+	instrumentation_begin();
+	handle_context();   // <-- instrumentable code
+	instrumentation_end();
+
+	...
+	handle_exit();      // <-- must be 'noinstr' or '__always_inline'
+  }
+
+This allows verification of the 'noinstr' restrictions via objtool on
+supported architectures.
+
+Invoking non-instrumentable functions from instrumentable context has no
+restrictions and is useful to protect e.g. state switching which would
+cause malfunction if instrumented.
+
+All non-instrumentable entry/exit code sections before and after the RCU
+state transitions must run with interrupts disabled.
+
+Syscalls
+--------
+
+Syscall-entry code starts in assembly code and calls out into low-level C code
+after establishing low-level architecture-specific state and stack frames. This
+low-level C code must not be instrumented. A typical syscall handling function
+invoked from low-level assembly code looks like this:
+
+.. code-block:: c
+
+  noinstr void syscall(struct pt_regs *regs, int nr)
+  {
+	arch_syscall_enter(regs);
+	nr = syscall_enter_from_user_mode(regs, nr);
+
+	instrumentation_begin();
+	if (!invoke_syscall(regs, nr) && nr != -1)
+	 	result_reg(regs) = __sys_ni_syscall(regs);
+	instrumentation_end();
+
+	syscall_exit_to_user_mode(regs);
+  }
+
+syscall_enter_from_user_mode() first invokes enter_from_user_mode() which
+establishes state in the following order:
+
+  * Lockdep
+  * RCU / Context tracking
+  * Tracing
+
+and then invokes the various entry work functions like ptrace, seccomp, audit,
+syscall tracing, etc. After all that is done, the instrumentable invoke_syscall
+function can be invoked. The instrumentable code section then ends, after which
+syscall_exit_to_user_mode() is invoked.
+
+syscall_exit_to_user_mode() handles all work which needs to be done before
+returning to user space like tracing, audit, signals, task work etc. After
+that it invokes exit_to_user_mode() which again handles the state
+transition in the reverse order:
+
+  * Tracing
+  * RCU / Context tracking
+  * Lockdep
+
+syscall_enter_from_user_mode() and syscall_exit_to_user_mode() are also
+available as fine grained subfunctions in cases where the architecture code
+has to do extra work between the various steps. In such cases it has to
+ensure that enter_from_user_mode() is called first on entry and
+exit_to_user_mode() is called last on exit.
+
+Do not nest syscalls. Nested systcalls will cause RCU and/or context tracking
+to print a warning.
+
+KVM
+---
+
+Entering or exiting guest mode is very similar to syscalls. From the host
+kernel point of view the CPU goes off into user space when entering the
+guest and returns to the kernel on exit.
+
+kvm_guest_enter_irqoff() is a KVM-specific variant of exit_to_user_mode()
+and kvm_guest_exit_irqoff() is the KVM variant of enter_from_user_mode().
+The state operations have the same ordering.
+
+Task work handling is done separately for guest at the boundary of the
+vcpu_run() loop via xfer_to_guest_mode_handle_work() which is a subset of
+the work handled on return to user space.
+
+Do not nest KVM entry/exit transitions because doing so is nonsensical.
+
+Interrupts and regular exceptions
+---------------------------------
+
+Interrupts entry and exit handling is slightly more complex than syscalls
+and KVM transitions.
+
+If an interrupt is raised while the CPU executes in user space, the entry
+and exit handling is exactly the same as for syscalls.
+
+If the interrupt is raised while the CPU executes in kernel space the entry and
+exit handling is slightly different. RCU state is only updated when the
+interrupt is raised in the context of the CPU's idle task. Otherwise, RCU will
+already be watching. Lockdep and tracing have to be updated unconditionally.
+
+irqentry_enter() and irqentry_exit() provide the implementation for this.
+
+The architecture-specific part looks similar to syscall handling:
+
+.. code-block:: c
+
+  noinstr void interrupt(struct pt_regs *regs, int nr)
+  {
+	arch_interrupt_enter(regs);
+	state = irqentry_enter(regs);
+
+	instrumentation_begin();
+
+	irq_enter_rcu();
+	invoke_irq_handler(regs, nr);
+	irq_exit_rcu();
+
+	instrumentation_end();
+
+	irqentry_exit(regs, state);
+  }
+
+Note that the invocation of the actual interrupt handler is within a
+irq_enter_rcu() and irq_exit_rcu() pair.
+
+irq_enter_rcu() updates the preemption count which makes in_hardirq()
+return true, handles NOHZ tick state and interrupt time accounting. This
+means that up to the point where irq_enter_rcu() is invoked in_hardirq()
+returns false.
+
+irq_exit_rcu() handles interrupt time accounting, undoes the preemption
+count update and eventually handles soft interrupts and NOHZ tick state.
+
+In theory, the preemption count could be updated in irqentry_enter(). In
+practice, deferring this update to irq_enter_rcu() allows the preemption-count
+code to be traced, while also maintaining symmetry with irq_exit_rcu() and
+irqentry_exit(), which are described in the next paragraph. The only downside
+is that the early entry code up to irq_enter_rcu() must be aware that the
+preemption count has not yet been updated with the HARDIRQ_OFFSET state.
+
+Note that irq_exit_rcu() must remove HARDIRQ_OFFSET from the preemption count
+before it handles soft interrupts, whose handlers must run in BH context rather
+than irq-disabled context. In addition, irqentry_exit() might schedule, which
+also requires that HARDIRQ_OFFSET has been removed from the preemption count.
+
+Even though interrupt handlers are expected to run with local interrupts
+disabled, interrupt nesting is common from an entry/exit perspective. For
+example, softirq handling happens within an irqentry_{enter,exit}() block with
+local interrupts enabled. Also, although uncommon, nothing prevents an
+interrupt handler from re-enabling interrupts.
+
+Interrupt entry/exit code doesn't strictly need to handle reentrancy, since it
+runs with local interrupts disabled. But NMIs can happen anytime, and a lot of
+the entry code is shared between the two.
+
+NMI and NMI-like exceptions
+---------------------------
+
+NMIs and NMI-like exceptions (machine checks, double faults, debug
+interrupts, etc.) can hit any context and must be extra careful with
+the state.
+
+State changes for debug exceptions and machine-check exceptions depend on
+whether these exceptions happened in user-space (breakpoints or watchpoints) or
+in kernel mode (code patching). From user-space, they are treated like
+interrupts, while from kernel mode they are treated like NMIs.
+
+NMIs and other NMI-like exceptions handle state transitions without
+distinguishing between user-mode and kernel-mode origin.
+
+The state update on entry is handled in irqentry_nmi_enter() which updates
+state in the following order:
+
+  * Preemption counter
+  * Lockdep
+  * RCU / Context tracking
+  * Tracing
+
+The exit counterpart irqentry_nmi_exit() does the reverse operation in the
+reverse order.
+
+Note that the update of the preemption counter has to be the first
+operation on enter and the last operation on exit. The reason is that both
+lockdep and RCU rely on in_nmi() returning true in this case. The
+preemption count modification in the NMI entry/exit case must not be
+traced.
+
+Architecture-specific code looks like this:
+
+.. code-block:: c
+
+  noinstr void nmi(struct pt_regs *regs)
+  {
+	arch_nmi_enter(regs);
+	state = irqentry_nmi_enter(regs);
+
+	instrumentation_begin();
+	nmi_handler(regs);
+	instrumentation_end();
+
+	irqentry_nmi_exit(regs);
+  }
+
+and for e.g. a debug exception it can look like this:
+
+.. code-block:: c
+
+  noinstr void debug(struct pt_regs *regs)
+  {
+	arch_nmi_enter(regs);
+
+	debug_regs = save_debug_regs();
+
+	if (user_mode(regs)) {
+		state = irqentry_enter(regs);
+
+		instrumentation_begin();
+		user_mode_debug_handler(regs, debug_regs);
+		instrumentation_end();
+
+		irqentry_exit(regs, state);
+  	} else {
+  		state = irqentry_nmi_enter(regs);
+
+		instrumentation_begin();
+		kernel_mode_debug_handler(regs, debug_regs);
+		instrumentation_end();
+
+		irqentry_nmi_exit(regs, state);
+	}
+  }
+
+There is no combined irqentry_nmi_if_kernel() function available as the
+above cannot be handled in an exception-agnostic way.
+
+NMIs can happen in any context. For example, an NMI-like exception triggered
+while handling an NMI. So NMI entry code has to be reentrant and state updates
+need to handle nesting.
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 5de2c7a4b1b3..972d46a5ddf6 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -44,6 +44,14 @@ Library functionality that is used throughout the kernel.
    timekeeping
    errseq
 
+Low level entry and exit
+========================
+
+.. toctree::
+   :maxdepth: 1
+
+   entry
+
 Concurrency primitives
 ======================
 
diff --git a/Documentation/dev-tools/ktap.rst b/Documentation/dev-tools/ktap.rst
index 878530cb9c27..5ee735c6687f 100644
--- a/Documentation/dev-tools/ktap.rst
+++ b/Documentation/dev-tools/ktap.rst
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0
 
-========================================
-The Kernel Test Anything Protocol (KTAP)
-========================================
+===================================================
+The Kernel Test Anything Protocol (KTAP), version 1
+===================================================
 
 TAP, or the Test Anything Protocol is a format for specifying test results used
 by a number of projects. It's website and specification are found at this `link
@@ -68,7 +68,7 @@ Test case result lines
 Test case result lines indicate the final status of a test.
 They are required and must have the format:
 
-.. code-block::
+.. code-block:: none
 
 	<result> <number> [<description>][ # [<directive>] [<diagnostic data>]]
 
@@ -117,32 +117,32 @@ separator.
 
 Example result lines include:
 
-.. code-block::
+.. code-block:: none
 
 	ok 1 test_case_name
 
 The test "test_case_name" passed.
 
-.. code-block::
+.. code-block:: none
 
 	not ok 1 test_case_name
 
 The test "test_case_name" failed.
 
-.. code-block::
+.. code-block:: none
 
 	ok 1 test # SKIP necessary dependency unavailable
 
 The test "test" was SKIPPED with the diagnostic message "necessary dependency
 unavailable".
 
-.. code-block::
+.. code-block:: none
 
 	not ok 1 test # TIMEOUT 30 seconds
 
 The test "test" timed out, with diagnostic data "30 seconds".
 
-.. code-block::
+.. code-block:: none
 
 	ok 5 check return code # rcode=0
 
@@ -174,6 +174,13 @@ There may be lines within KTAP output that do not follow the format of one of
 the four formats for lines described above. This is allowed, however, they will
 not influence the status of the tests.
 
+This is an important difference from TAP.  Kernel tests may print messages
+to the system console or a log file.  Both of these destinations may contain
+messages either from unrelated kernel or userspace activity, or kernel
+messages from non-test code that is invoked by the test.  The kernel code
+invoked by the test likely is not aware that a test is in progress and
+thus can not print the message as a diagnostic message.
+
 Nested tests
 ------------
 
@@ -186,13 +193,16 @@ starting with another KTAP version line and test plan, and end with the overall
 result. If one of the subtests fail, for example, the parent test should also
 fail.
 
-Additionally, all result lines in a subtest should be indented. One level of
+Additionally, all lines in a subtest should be indented. One level of
 indentation is two spaces: "  ". The indentation should begin at the version
 line and should end before the parent test's result line.
 
+"Unknown lines" are not considered to be lines in a subtest and thus are
+allowed to be either indented or not indented.
+
 An example of a test with two nested subtests:
 
-.. code-block::
+.. code-block:: none
 
 	KTAP version 1
 	1..1
@@ -205,7 +215,7 @@ An example of a test with two nested subtests:
 
 An example format with multiple levels of nested testing:
 
-.. code-block::
+.. code-block:: none
 
 	KTAP version 1
 	1..2
@@ -224,10 +234,15 @@ An example format with multiple levels of nested testing:
 Major differences between TAP and KTAP
 --------------------------------------
 
-Note the major differences between the TAP and KTAP specification:
-- yaml and json are not recommended in diagnostic messages
-- TODO directive not recognized
-- KTAP allows for an arbitrary number of tests to be nested
+==================================================   =========  ===============
+Feature                                              TAP        KTAP
+==================================================   =========  ===============
+yaml and json in diagnosic message                   ok         not recommended
+TODO directive                                       ok         not recognized
+allows an arbitrary number of tests to be nested     no         yes
+"Unknown lines" are in category of "Anything else"   yes        no
+"Unknown lines" are                                  incorrect  allowed
+==================================================   =========  ===============
 
 The TAP14 specification does permit nested tests, but instead of using another
 nested version line, uses a line of the form
@@ -235,7 +250,7 @@ nested version line, uses a line of the form
 
 Example KTAP output
 --------------------
-.. code-block::
+.. code-block:: none
 
 	KTAP version 1
 	1..1
diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml
index 981bac451698..7a04b8aaaec3 100644
--- a/Documentation/devicetree/bindings/arm/pmu.yaml
+++ b/Documentation/devicetree/bindings/arm/pmu.yaml
@@ -20,6 +20,8 @@ properties:
     items:
       - enum:
           - apm,potenza-pmu
+          - apple,firestorm-pmu
+          - apple,icestorm-pmu
           - arm,armv8-pmuv3 # Only for s/w models
           - arm,arm1136-pmu
           - arm,arm1176-pmu
diff --git a/Documentation/devicetree/bindings/arm/qcom.yaml b/Documentation/devicetree/bindings/arm/qcom.yaml
index 370aab274cd1..04ff0b55bb85 100644
--- a/Documentation/devicetree/bindings/arm/qcom.yaml
+++ b/Documentation/devicetree/bindings/arm/qcom.yaml
@@ -48,7 +48,6 @@ description: |
         sdx65
         sm7225
         sm8150
-        sdx65
         sm8250
         sm8350
         sm8450
@@ -230,11 +229,6 @@ properties:
 
       - items:
           - enum:
-              - qcom,sdx65-mtp
-          - const: qcom,sdx65
-
-      - items:
-          - enum:
               - qcom,ipq6018-cp01
               - qcom,ipq6018-cp01-c1
           - const: qcom,ipq6018
diff --git a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
index 1d3e88daca04..25b5ef3f759c 100644
--- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
@@ -91,22 +91,7 @@ properties:
         $ref: /schemas/graph.yaml#/$defs/port-base
         unevaluatedProperties: false
         description:
-          MIPI DSI/DPI input.
-
-        properties:
-          endpoint:
-            $ref: /schemas/media/video-interfaces.yaml#
-            type: object
-            additionalProperties: false
-
-            properties:
-              remote-endpoint: true
-
-              bus-type:
-                enum: [1, 5]
-                default: 1
-
-              data-lanes: true
+          Video port for MIPI DSI input.
 
       port@1:
         $ref: /schemas/graph.yaml#/properties/port
@@ -155,8 +140,6 @@ examples:
                     reg = <0>;
                     anx7625_in: endpoint {
                         remote-endpoint = <&mipi_dsi>;
-                        bus-type = <5>;
-                        data-lanes = <0 1 2 3>;
                     };
                 };
 
diff --git a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
index 23b18b92c558..bde63f8f090e 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
@@ -18,6 +18,7 @@ Required properties:
     "amlogic,meson-g12a-gpio-intc" for G12A SoCs (S905D2, S905X2, S905Y2)
     "amlogic,meson-sm1-gpio-intc" for SM1 SoCs (S905D3, S905X3, S905Y3)
     "amlogic,meson-a1-gpio-intc" for A1 SoCs (A113L)
+    "amlogic,meson-s4-gpio-intc" for S4 SoCs (S802X2, S905Y4, S805X2G, S905W2)
 - reg : Specifies base physical address and size of the registers.
 - interrupt-controller : Identifies the node as an interrupt controller.
 - #interrupt-cells : Specifies the number of cells needed to encode an
diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
index 97359024709a..85c85b694217 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
@@ -56,6 +56,8 @@ properties:
           - 1: virtual HV timer
           - 2: physical guest timer
           - 3: virtual guest timer
+          - 4: 'efficient' CPU PMU
+          - 5: 'performance' CPU PMU
 
       The 3rd cell contains the interrupt flags. This is normally
       IRQ_TYPE_LEVEL_HIGH (4).
@@ -68,6 +70,35 @@ properties:
   power-domains:
     maxItems: 1
 
+  affinities:
+    type: object
+    additionalProperties: false
+    description:
+      FIQ affinity can be expressed as a single "affinities" node,
+      containing a set of sub-nodes, one per FIQ with a non-default
+      affinity.
+    patternProperties:
+      "^.+-affinity$":
+        type: object
+        additionalProperties: false
+        properties:
+          apple,fiq-index:
+            description:
+              The interrupt number specified as a FIQ, and for which
+              the affinity is not the default.
+            $ref: /schemas/types.yaml#/definitions/uint32
+            maximum: 5
+
+          cpus:
+            $ref: /schemas/types.yaml#/definitions/phandle-array
+            description:
+              Should be a list of phandles to CPU nodes (as described in
+              Documentation/devicetree/bindings/arm/cpus.yaml).
+
+        required:
+          - fiq-index
+          - cpus
+
 required:
   - compatible
   - '#interrupt-cells'
diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml
new file mode 100644
index 000000000000..47a78a167aba
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/apple,aic2.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Apple Interrupt Controller 2
+
+maintainers:
+  - Hector Martin <[email protected]>
+
+description: |
+  The Apple Interrupt Controller 2 is a simple interrupt controller present on
+  Apple ARM SoC platforms starting with t600x (M1 Pro and Max).
+
+  It provides the following features:
+
+  - Level-triggered hardware IRQs wired to SoC blocks
+    - Single mask bit per IRQ
+    - Automatic masking on event delivery (auto-ack)
+    - Software triggering (ORed with hw line)
+  - Automatic prioritization (single event/ack register per CPU, lower IRQs =
+    higher priority)
+  - Automatic masking on ack
+  - Support for multiple dies
+
+  This device also represents the FIQ interrupt sources on platforms using AIC,
+  which do not go through a discrete interrupt controller. It also handles
+  FIQ-based Fast IPIs.
+
+properties:
+  compatible:
+    items:
+      - const: apple,t6000-aic
+      - const: apple,aic2
+
+  interrupt-controller: true
+
+  '#interrupt-cells':
+    const: 4
+    description: |
+      The 1st cell contains the interrupt type:
+        - 0: Hardware IRQ
+        - 1: FIQ
+
+      The 2nd cell contains the die ID.
+
+      The next cell contains the interrupt number.
+        - HW IRQs: interrupt number
+        - FIQs:
+          - 0: physical HV timer
+          - 1: virtual HV timer
+          - 2: physical guest timer
+          - 3: virtual guest timer
+
+      The last cell contains the interrupt flags. This is normally
+      IRQ_TYPE_LEVEL_HIGH (4).
+
+  reg:
+    items:
+      - description: Address and size of the main AIC2 registers.
+      - description: Address and size of the AIC2 Event register.
+
+  reg-names:
+    items:
+      - const: core
+      - const: event
+
+  power-domains:
+    maxItems: 1
+
+required:
+  - compatible
+  - '#interrupt-cells'
+  - interrupt-controller
+  - reg
+  - reg-names
+
+additionalProperties: false
+
+allOf:
+  - $ref: /schemas/interrupt-controller.yaml#
+
+examples:
+  - |
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        aic: interrupt-controller@28e100000 {
+            compatible = "apple,t6000-aic", "apple,aic2";
+            #interrupt-cells = <4>;
+            interrupt-controller;
+            reg = <0x2 0x8e100000 0x0 0xc000>,
+                  <0x2 0x8e10c000 0x0 0x4>;
+            reg-names = "core", "event";
+        };
+    };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml
new file mode 100644
index 000000000000..509d20c091af
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/qcom,mpm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcom MPM Interrupt Controller
+
+maintainers:
+  - Shawn Guo <[email protected]>
+
+description:
+  Qualcomm Technologies Inc. SoCs based on the RPM architecture have a
+  MSM Power Manager (MPM) that is in always-on domain. In addition to managing
+  resources during sleep, the hardware also has an interrupt controller that
+  monitors the interrupts when the system is asleep, wakes up the APSS when
+  one of these interrupts occur and replays it to GIC interrupt controller
+  after GIC becomes operational.
+
+allOf:
+  - $ref: /schemas/interrupt-controller.yaml#
+
+properties:
+  compatible:
+    items:
+      - const: qcom,mpm
+
+  reg:
+    maxItems: 1
+    description:
+      Specifies the base address and size of vMPM registers in RPM MSG RAM.
+
+  interrupts:
+    maxItems: 1
+    description:
+      Specify the IRQ used by RPM to wakeup APSS.
+
+  mboxes:
+    maxItems: 1
+    description:
+      Specify the mailbox used to notify RPM for writing vMPM registers.
+
+  interrupt-controller: true
+
+  '#interrupt-cells':
+    const: 2
+    description:
+      The first cell is the MPM pin number for the interrupt, and the second
+      is the trigger type.
+
+  qcom,mpm-pin-count:
+    description:
+      Specify the total MPM pin count that a SoC supports.
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+  qcom,mpm-pin-map:
+    description:
+      A set of MPM pin numbers and the corresponding GIC SPIs.
+    $ref: /schemas/types.yaml#/definitions/uint32-matrix
+    items:
+      items:
+        - description: MPM pin number
+        - description: GIC SPI number for the MPM pin
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - mboxes
+  - interrupt-controller
+  - '#interrupt-cells'
+  - qcom,mpm-pin-count
+  - qcom,mpm-pin-map
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    mpm: interrupt-controller@45f01b8 {
+        compatible = "qcom,mpm";
+        interrupts = <GIC_SPI 197 IRQ_TYPE_EDGE_RISING>;
+        reg = <0x45f01b8 0x1000>;
+        mboxes = <&apcs_glb 1>;
+        interrupt-controller;
+        #interrupt-cells = <2>;
+        interrupt-parent = <&intc>;
+        qcom,mpm-pin-count = <96>;
+        qcom,mpm-pin-map = <2 275>,
+                           <5 296>,
+                           <12 422>,
+                           <24 79>,
+                           <86 183>,
+                           <90 260>,
+                           <91 260>;
+    };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
index d19c881b4abc..e44daa09b137 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
@@ -20,6 +20,7 @@ properties:
       - items:
           - enum:
               - st,stm32mp1-exti
+              - st,stm32mp13-exti
           - const: syscon
 
   "#interrupt-cells":
diff --git a/Documentation/devicetree/bindings/mfd/brcm,cru.yaml b/Documentation/devicetree/bindings/mfd/brcm,cru.yaml
index be4a2df71c25..b85819fbb07c 100644
--- a/Documentation/devicetree/bindings/mfd/brcm,cru.yaml
+++ b/Documentation/devicetree/bindings/mfd/brcm,cru.yaml
@@ -39,7 +39,7 @@ patternProperties:
   '^phy@[a-f0-9]+$':
     $ref: ../phy/bcm-ns-usb2-phy.yaml
 
-  '^pin-controller@[a-f0-9]+$':
+  '^pinctrl@[a-f0-9]+$':
     $ref: ../pinctrl/brcm,ns-pinmux.yaml
 
   '^syscon@[a-f0-9]+$':
@@ -94,7 +94,7 @@ examples:
             reg = <0x180 0x4>;
         };
 
-        pin-controller@1c0 {
+        pinctrl@1c0 {
             compatible = "brcm,bcm4708-pinmux";
             reg = <0x1c0 0x24>;
             reg-names = "cru_gpio_control";
diff --git a/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.yaml b/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.yaml
index c00ad3e21c21..ad285cb480c9 100644
--- a/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.yaml
+++ b/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.yaml
@@ -126,7 +126,7 @@ properties:
       clock-frequency:
         const: 12288000
 
-  lochnagar-pinctrl:
+  pinctrl:
     type: object
     $ref: /schemas/pinctrl/cirrus,lochnagar.yaml#
 
@@ -255,7 +255,7 @@ required:
   - reg
   - reset-gpios
   - lochnagar-clk
-  - lochnagar-pinctrl
+  - pinctrl
 
 additionalProperties: false
 
@@ -293,7 +293,7 @@ examples:
                 clock-frequency = <32768>;
             };
 
-            lochnagar-pinctrl {
+            pinctrl {
                 compatible = "cirrus,lochnagar-pinctrl";
 
                 gpio-controller;
diff --git a/Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml b/Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml
new file mode 100644
index 000000000000..a18dd0a8c43a
--- /dev/null
+++ b/Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/perf/marvell-cn10k-ddr.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell CN10K DDR performance monitor
+
+maintainers:
+  - Bharat Bhushan <[email protected]>
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - marvell,cn10k-ddr-pmu
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    bus {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        pmu@87e1c0000000 {
+            compatible = "marvell,cn10k-ddr-pmu";
+            reg = <0x87e1 0xc0000000 0x0 0x10000>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/phy/ti,tcan104x-can.yaml b/Documentation/devicetree/bindings/phy/ti,tcan104x-can.yaml
index 6107880e5246..02b76f15e717 100644
--- a/Documentation/devicetree/bindings/phy/ti,tcan104x-can.yaml
+++ b/Documentation/devicetree/bindings/phy/ti,tcan104x-can.yaml
@@ -37,6 +37,12 @@ properties:
       max bit rate supported in bps
     minimum: 1
 
+  mux-states:
+    description:
+      mux controller node to route the signals from controller to
+      transceiver.
+    maxItems: 1
+
 required:
   - compatible
   - '#phy-cells'
@@ -53,4 +59,5 @@ examples:
       max-bitrate = <5000000>;
       standby-gpios = <&wakeup_gpio1 16 GPIO_ACTIVE_LOW>;
       enable-gpios = <&main_gpio1 67 GPIO_ACTIVE_HIGH>;
+      mux-states = <&mux0 1>;
     };
diff --git a/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml b/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml
index c85f759ae5a3..8a90d8273767 100644
--- a/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml
@@ -107,9 +107,6 @@ properties:
 
     additionalProperties: false
 
-allOf:
-  - $ref: "pinctrl.yaml#"
-
 required:
   - pinctrl-0
   - pinctrl-names
diff --git a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
deleted file mode 100644
index 33004ce7e5df..000000000000
--- a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
+++ /dev/null
@@ -1,106 +0,0 @@
-* Exynos Thermal Management Unit (TMU)
-
-** Required properties:
-
-- compatible : One of the following:
-	       "samsung,exynos3250-tmu"
-	       "samsung,exynos4412-tmu"
-	       "samsung,exynos4210-tmu"
-	       "samsung,exynos5250-tmu"
-	       "samsung,exynos5260-tmu"
-	       "samsung,exynos5420-tmu" for TMU channel 0, 1 on Exynos5420
-	       "samsung,exynos5420-tmu-ext-triminfo" for TMU channels 2, 3 and 4
-			Exynos5420 (Must pass triminfo base and triminfo clock)
-               "samsung,exynos5433-tmu"
-	       "samsung,exynos7-tmu"
-- reg : Address range of the thermal registers. For soc's which has multiple
-	instances of TMU and some registers are shared across all TMU's like
-	interrupt related then 2 set of register has to supplied. First set
-	belongs	to register set of TMU instance and second set belongs to
-	registers shared with the TMU instance.
-
-  NOTE: On Exynos5420, the TRIMINFO register is misplaced for TMU
-	channels 2, 3 and 4
-	Use "samsung,exynos5420-tmu-ext-triminfo" in cases, there is a misplaced
-	register, also provide clock to access that base.
-
-	TRIMINFO at 0x1006c000 contains data for TMU channel 3
-	TRIMINFO at 0x100a0000 contains data for TMU channel 4
-	TRIMINFO at 0x10068000 contains data for TMU channel 2
-
-- interrupts : Should contain interrupt for thermal system
-- clocks : The main clocks for TMU device
-	-- 1. operational clock for TMU channel
-	-- 2. optional clock to access the shared registers of TMU channel
-	-- 3. optional special clock for functional operation
-- clock-names : Thermal system clock name
-	-- "tmu_apbif" operational clock for current TMU channel
-	-- "tmu_triminfo_apbif" clock to access the shared triminfo register
-		for current TMU channel
-	-- "tmu_sclk" clock for functional operation of the current TMU
-		channel
-
-The Exynos TMU supports generating interrupts when reaching given
-temperature thresholds. Number of supported thermal trip points depends
-on the SoC (only first trip points defined in DT will be configured):
- - most of SoC: 4
- - samsung,exynos5433-tmu: 8
- - samsung,exynos7-tmu: 8
-
-** Optional properties:
-
-- vtmu-supply: This entry is optional and provides the regulator node supplying
-		voltage to TMU. If needed this entry can be placed inside
-		board/platform specific dts file.
-
-Example 1):
-
-	tmu@100c0000 {
-		compatible = "samsung,exynos4412-tmu";
-		interrupt-parent = <&combiner>;
-		reg = <0x100C0000 0x100>;
-		interrupts = <2 4>;
-		clocks = <&clock 383>;
-		clock-names = "tmu_apbif";
-		vtmu-supply = <&tmu_regulator_node>;
-		#thermal-sensor-cells = <0>;
-	};
-
-Example 2): (In case of Exynos5420 "with misplaced TRIMINFO register")
-	tmu_cpu2: tmu@10068000 {
-		compatible = "samsung,exynos5420-tmu-ext-triminfo";
-		reg = <0x10068000 0x100>, <0x1006c000 0x4>;
-		interrupts = <0 184 0>;
-		clocks = <&clock 318>, <&clock 318>;
-		clock-names = "tmu_apbif", "tmu_triminfo_apbif";
-		#thermal-sensor-cells = <0>;
-	};
-
-	tmu_cpu3: tmu@1006c000 {
-		compatible = "samsung,exynos5420-tmu-ext-triminfo";
-		reg = <0x1006c000 0x100>, <0x100a0000 0x4>;
-		interrupts = <0 185 0>;
-		clocks = <&clock 318>, <&clock 319>;
-		clock-names = "tmu_apbif", "tmu_triminfo_apbif";
-		#thermal-sensor-cells = <0>;
-	};
-
-	tmu_gpu: tmu@100a0000 {
-		compatible = "samsung,exynos5420-tmu-ext-triminfo";
-		reg = <0x100a0000 0x100>, <0x10068000 0x4>;
-		interrupts = <0 215 0>;
-		clocks = <&clock 319>, <&clock 318>;
-		clock-names = "tmu_apbif", "tmu_triminfo_apbif";
-		#thermal-sensor-cells = <0>;
-	};
-
-Note: For multi-instance tmu each instance should have an alias correctly
-numbered in "aliases" node.
-
-Example:
-
-aliases {
-	tmuctrl0 = &tmuctrl_0;
-	tmuctrl1 = &tmuctrl_1;
-	tmuctrl2 = &tmuctrl_2;
-};
diff --git a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
index 289e9a845600..a9b7388ca9ac 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
@@ -19,6 +19,7 @@ properties:
   compatible:
     enum:
       - qcom,sdm845-lmh
+      - qcom,sm8150-lmh
 
   reg:
     items:
diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
index d3b9e9b600a2..b6406bcc683f 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
@@ -43,6 +43,7 @@ properties:
       - description: v2 of TSENS
         items:
           - enum:
+              - qcom,msm8953-tsens
               - qcom,msm8996-tsens
               - qcom,msm8998-tsens
               - qcom,sc7180-tsens
diff --git a/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml b/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml
new file mode 100644
index 000000000000..17129f75d962
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml
@@ -0,0 +1,184 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/samsung,exynos-thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC Thermal Management Unit (TMU)
+
+maintainers:
+  - Krzysztof Kozlowski <[email protected]>
+
+description: |
+  For multi-instance tmu each instance should have an alias correctly numbered
+  in "aliases" node.
+
+properties:
+  compatible:
+    enum:
+      - samsung,exynos3250-tmu
+      - samsung,exynos4412-tmu
+      - samsung,exynos4210-tmu
+      - samsung,exynos5250-tmu
+      - samsung,exynos5260-tmu
+        # For TMU channel 0, 1 on Exynos5420:
+      - samsung,exynos5420-tmu
+        # For TMU channels 2, 3 and 4 of Exynos5420:
+      - samsung,exynos5420-tmu-ext-triminfo
+      - samsung,exynos5433-tmu
+      - samsung,exynos7-tmu
+
+  clocks:
+    minItems: 1
+    maxItems: 3
+
+  clock-names:
+    minItems: 1
+    maxItems: 3
+
+  interrupts:
+    description: |
+      The Exynos TMU supports generating interrupts when reaching given
+      temperature thresholds. Number of supported thermal trip points depends
+      on the SoC (only first trip points defined in DT will be configured)::
+       - most of SoC: 4
+       - samsung,exynos5433-tmu: 8
+       - samsung,exynos7-tmu: 8
+    maxItems: 1
+
+  reg:
+    items:
+      - description: TMU instance registers.
+      - description: |
+          Shared TMU registers.
+
+          Note:: On Exynos5420, the TRIMINFO register is misplaced for TMU
+          channels 2, 3 and 4 Use "samsung,exynos5420-tmu-ext-triminfo" in
+          cases, there is a misplaced register, also provide clock to access
+          that base.
+          TRIMINFO at 0x1006c000 contains data for TMU channel 3
+          TRIMINFO at 0x100a0000 contains data for TMU channel 4
+          TRIMINFO at 0x10068000 contains data for TMU channel 2
+    minItems: 1
+
+  '#thermal-sensor-cells': true
+
+  vtmu-supply:
+    description: The regulator node supplying voltage to TMU.
+
+required:
+  - compatible
+  - clocks
+  - clock-names
+  - interrupts
+  - reg
+
+allOf:
+  - $ref: /schemas/thermal/thermal-sensor.yaml
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: samsung,exynos5420-tmu-ext-triminfo
+    then:
+      properties:
+        clocks:
+          items:
+            - description:
+                Operational clock for TMU channel.
+            - description:
+                Optional clock to access the shared registers (e.g. TRIMINFO) of TMU
+                channel.
+        clock-names:
+          items:
+            - const: tmu_apbif
+            - const: tmu_triminfo_apbif
+        reg:
+          minItems: 2
+          maxItems: 2
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - samsung,exynos5433-tmu
+              - samsung,exynos7-tmu
+    then:
+      properties:
+        clocks:
+          items:
+            - description:
+                Operational clock for TMU channel.
+            - description:
+                Optional special clock for functional operation of TMU channel.
+        clock-names:
+          items:
+            - const: tmu_apbif
+            - const: tmu_sclk
+        reg:
+          minItems: 1
+          maxItems: 1
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - samsung,exynos3250-tmu
+              - samsung,exynos4412-tmu
+              - samsung,exynos4210-tmu
+              - samsung,exynos5250-tmu
+              - samsung,exynos5260-tmu
+              - samsung,exynos5420-tmu
+    then:
+      properties:
+        clocks:
+          minItems: 1
+          maxItems: 1
+        reg:
+          minItems: 1
+          maxItems: 1
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/exynos4.h>
+
+    tmu@100c0000 {
+        compatible = "samsung,exynos4412-tmu";
+        reg = <0x100C0000 0x100>;
+        interrupt-parent = <&combiner>;
+        interrupts = <2 4>;
+        #thermal-sensor-cells = <0>;
+        clocks = <&clock CLK_TMU_APBIF>;
+        clock-names = "tmu_apbif";
+        vtmu-supply = <&ldo10_reg>;
+    };
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    tmu@10068000 {
+        compatible = "samsung,exynos5420-tmu-ext-triminfo";
+        reg = <0x10068000 0x100>, <0x1006c000 0x4>;
+        interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+        #thermal-sensor-cells = <0>;
+        clocks = <&clock 318>, <&clock 318>; /* CLK_TMU */
+        clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+        vtmu-supply = <&ldo7_reg>;
+    };
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    tmu@10060000 {
+        compatible = "samsung,exynos5433-tmu";
+        reg = <0x10060000 0x200>;
+        interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
+        #thermal-sensor-cells = <0>;
+        clocks = <&cmu_peris 3>, /* CLK_PCLK_TMU0_APBIF */
+                 <&cmu_peris 35>; /* CLK_SCLK_TMU0 */
+        clock-names = "tmu_apbif", "tmu_sclk";
+        vtmu-supply = <&ldo3_reg>;
+    };
diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml b/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml
new file mode 100644
index 000000000000..b78209cd0f28
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml
@@ -0,0 +1,150 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/timer/nvidia,tegra-timer.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: NVIDIA Tegra timer
+
+maintainers:
+  - Stephen Warren <[email protected]>
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: nvidia,tegra210-timer
+    then:
+      properties:
+        interrupts:
+          # Either a single combined interrupt or up to 14 individual interrupts
+          minItems: 1
+          maxItems: 14
+          description: >
+            A list of 14 interrupts; one per each timer channels 0 through 13
+
+  - if:
+      properties:
+        compatible:
+          oneOf:
+            - items:
+                - enum:
+                    - nvidia,tegra114-timer
+                    - nvidia,tegra124-timer
+                    - nvidia,tegra132-timer
+                - const: nvidia,tegra30-timer
+            - items:
+                - const: nvidia,tegra30-timer
+                - const: nvidia,tegra20-timer
+    then:
+      properties:
+        interrupts:
+          # Either a single combined interrupt or up to 6 individual interrupts
+          minItems: 1
+          maxItems: 6
+          description: >
+            A list of 6 interrupts; one per each of timer channels 1 through 5,
+            and one for the shared interrupt for the remaining channels.
+
+  - if:
+      properties:
+        compatible:
+          const: nvidia,tegra20-timer
+    then:
+      properties:
+        interrupts:
+          # Either a single combined interrupt or up to 4 individual interrupts
+          minItems: 1
+          maxItems: 4
+          description: |
+            A list of 4 interrupts; one per timer channel.
+
+properties:
+  compatible:
+    oneOf:
+      - const: nvidia,tegra210-timer
+        description: >
+          The Tegra210 timer provides fourteen 29-bit timer counters and one 32-bit
+          timestamp counter. The TMRs run at either a fixed 1 MHz clock rate derived
+          from the oscillator clock (TMR0-TMR9) or directly at the oscillator clock
+          (TMR10-TMR13). Each TMR can be programmed to generate one-shot, periodic,
+          or watchdog interrupts.
+      - items:
+          - enum:
+              - nvidia,tegra114-timer
+              - nvidia,tegra124-timer
+              - nvidia,tegra132-timer
+          - const: nvidia,tegra30-timer
+      - items:
+          - const: nvidia,tegra30-timer
+          - const: nvidia,tegra20-timer
+        description: >
+          The Tegra30 timer provides ten 29-bit timer channels, a single 32-bit free
+          running counter, and 5 watchdog modules. The first two channels may also
+          trigger a legacy watchdog reset.
+      - const: nvidia,tegra20-timer
+        description: >
+          The Tegra20 timer provides four 29-bit timer channels and a single 32-bit free
+          running counter. The first two channels may also trigger a watchdog reset.
+
+  reg:
+    maxItems: 1
+
+  interrupts: true
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: timer
+
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    timer@60005000 {
+        compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer";
+        reg = <0x60005000 0x400>;
+        interrupts = <0 0 IRQ_TYPE_LEVEL_HIGH>,
+                     <0 1 IRQ_TYPE_LEVEL_HIGH>,
+                     <0 41 IRQ_TYPE_LEVEL_HIGH>,
+                     <0 42 IRQ_TYPE_LEVEL_HIGH>,
+                     <0 121 IRQ_TYPE_LEVEL_HIGH>,
+                     <0 122 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&tegra_car 214>;
+    };
+  - |
+    #include <dt-bindings/clock/tegra210-car.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    timer@60005000 {
+        compatible = "nvidia,tegra210-timer";
+        reg = <0x60005000 0x400>;
+        interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 178 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&tegra_car TEGRA210_CLK_TIMER>;
+        clock-names = "timer";
+    };
diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt
deleted file mode 100644
index 4a864bd10d3d..000000000000
--- a/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-NVIDIA Tegra20 timer
-
-The Tegra20 timer provides four 29-bit timer channels and a single 32-bit free
-running counter. The first two channels may also trigger a watchdog reset.
-
-Required properties:
-
-- compatible : should be "nvidia,tegra20-timer".
-- reg : Specifies base physical address and size of the registers.
-- interrupts : A list of 4 interrupts; one per timer channel.
-- clocks : Must contain one entry, for the module clock.
-  See ../clocks/clock-bindings.txt for details.
-
-Example:
-
-timer {
-	compatible = "nvidia,tegra20-timer";
-	reg = <0x60005000 0x60>;
-	interrupts = <0 0 0x04
-			0 1 0x04
-			0 41 0x04
-			0 42 0x04>;
-	clocks = <&tegra_car 132>;
-};
diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt
deleted file mode 100644
index 032cda96fe0d..000000000000
--- a/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-NVIDIA Tegra210 timer
-
-The Tegra210 timer provides fourteen 29-bit timer counters and one 32-bit
-timestamp counter. The TMRs run at either a fixed 1 MHz clock rate derived
-from the oscillator clock (TMR0-TMR9) or directly at the oscillator clock
-(TMR10-TMR13). Each TMR can be programmed to generate one-shot, periodic,
-or watchdog interrupts.
-
-Required properties:
-- compatible : "nvidia,tegra210-timer".
-- reg : Specifies base physical address and size of the registers.
-- interrupts : A list of 14 interrupts; one per each timer channels 0 through
-  13.
-- clocks : Must contain one entry, for the module clock.
-  See ../clocks/clock-bindings.txt for details.
-
-timer@60005000 {
-	compatible = "nvidia,tegra210-timer";
-	reg = <0x0 0x60005000 0x0 0x400>;
-	interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 178 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH>;
-	clocks = <&tegra_car TEGRA210_CLK_TIMER>;
-	clock-names = "timer";
-};
diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt
deleted file mode 100644
index 1761f53ee36f..000000000000
--- a/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-NVIDIA Tegra30 timer
-
-The Tegra30 timer provides ten 29-bit timer channels, a single 32-bit free
-running counter, and 5 watchdog modules. The first two channels may also
-trigger a legacy watchdog reset.
-
-Required properties:
-
-- compatible : For Tegra30, must contain "nvidia,tegra30-timer".  Otherwise,
-  must contain '"nvidia,<chip>-timer", "nvidia,tegra30-timer"' where
-  <chip> is tegra124 or tegra132.
-- reg : Specifies base physical address and size of the registers.
-- interrupts : A list of 6 interrupts; one per each of timer channels 1
-    through 5, and one for the shared interrupt for the remaining channels.
-- clocks : Must contain one entry, for the module clock.
-  See ../clocks/clock-bindings.txt for details.
-
-timer {
-	compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer";
-	reg = <0x60005000 0x400>;
-	interrupts = <0 0 0x04
-		      0 1 0x04
-		      0 41 0x04
-		      0 42 0x04
-		      0 121 0x04
-		      0 122 0x04>;
-	clocks = <&tegra_car 214>;
-};
diff --git a/Documentation/driver-api/gpio/board.rst b/Documentation/driver-api/gpio/board.rst
index 191fa867826a..4e3adf31c8d1 100644
--- a/Documentation/driver-api/gpio/board.rst
+++ b/Documentation/driver-api/gpio/board.rst
@@ -71,14 +71,14 @@ with the help of _DSD (Device Specific Data), introduced in ACPI 5.1::
 
 	Device (FOO) {
 		Name (_CRS, ResourceTemplate () {
-			GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
-				"\\_SB.GPI0") {15} // red
-			GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
-				"\\_SB.GPI0") {16} // green
-			GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
-				"\\_SB.GPI0") {17} // blue
-			GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
-				"\\_SB.GPI0") {1} // power
+			GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
+				"\\_SB.GPI0", 0, ResourceConsumer) { 15 } // red
+			GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
+				"\\_SB.GPI0", 0, ResourceConsumer) { 16 } // green
+			GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
+				"\\_SB.GPI0", 0, ResourceConsumer) { 17 } // blue
+			GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionOutputOnly,
+				"\\_SB.GPI0", 0, ResourceConsumer) { 1 } // power
 		})
 
 		Name (_DSD, Package () {
@@ -92,10 +92,7 @@ with the help of _DSD (Device Specific Data), introduced in ACPI 5.1::
 						^FOO, 2, 0, 1,
 					}
 				},
-				Package () {
-					"power-gpios",
-					Package () {^FOO, 3, 0, 0},
-				},
+				Package () { "power-gpios", Package () { ^FOO, 3, 0, 0 } },
 			}
 		})
 	}
diff --git a/Documentation/driver-api/serial/driver.rst b/Documentation/driver-api/serial/driver.rst
index 31bd4e16fb1f..06ec04ba086f 100644
--- a/Documentation/driver-api/serial/driver.rst
+++ b/Documentation/driver-api/serial/driver.rst
@@ -311,7 +311,7 @@ hardware.
 	This call must not sleep
 
   set_ldisc(port,termios)
-	Notifier for discipline change. See Documentation/driver-api/serial/tty.rst.
+	Notifier for discipline change. See Documentation/tty/tty_ldisc.rst.
 
 	Locking: caller holds tty_port->mutex
 
diff --git a/Documentation/driver-api/thermal/index.rst b/Documentation/driver-api/thermal/index.rst
index 4cb0b9b6bfb8..030306ffa408 100644
--- a/Documentation/driver-api/thermal/index.rst
+++ b/Documentation/driver-api/thermal/index.rst
@@ -17,3 +17,4 @@ Thermal
    intel_powerclamp
    nouveau_thermal
    x86_pkg_temperature_thermal
+   intel_dptf
diff --git a/Documentation/driver-api/thermal/intel_dptf.rst b/Documentation/driver-api/thermal/intel_dptf.rst
new file mode 100644
index 000000000000..96668dca753a
--- /dev/null
+++ b/Documentation/driver-api/thermal/intel_dptf.rst
@@ -0,0 +1,272 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================================
+Intel(R) Dynamic Platform and Thermal Framework Sysfs Interface
+===============================================================
+
+:Copyright: |copy| 2022 Intel Corporation
+
+:Author: Srinivas Pandruvada <[email protected]>
+
+Introduction
+------------
+
+Intel(R) Dynamic Platform and Thermal Framework (DPTF) is a platform
+level hardware/software solution for power and thermal management.
+
+As a container for multiple power/thermal technologies, DPTF provides
+a coordinated approach for different policies to effect the hardware
+state of a system.
+
+Since it is a platform level framework, this has several components.
+Some parts of the technology is implemented in the firmware and uses
+ACPI and PCI devices to expose various features for monitoring and
+control. Linux has a set of kernel drivers exposing hardware interface
+to user space. This allows user space thermal solutions like
+"Linux Thermal Daemon" to read platform specific thermal and power
+tables to deliver adequate performance while keeping the system under
+thermal limits.
+
+DPTF ACPI Drivers interface
+----------------------------
+
+:file:`/sys/bus/platform/devices/<N>/uuids`, where <N>
+=INT3400|INTC1040|INTC1041|INTC10A0
+
+``available_uuids`` (RO)
+	A set of UUIDs strings presenting available policies
+	which should be notified to the firmware when the
+	user space can support those policies.
+
+	UUID strings:
+
+	"42A441D6-AE6A-462b-A84B-4A8CE79027D3" : Passive 1
+
+	"3A95C389-E4B8-4629-A526-C52C88626BAE" : Active
+
+	"97C68AE7-15FA-499c-B8C9-5DA81D606E0A" : Critical
+
+	"63BE270F-1C11-48FD-A6F7-3AF253FF3E2D" : Adaptive performance
+
+	"5349962F-71E6-431D-9AE8-0A635B710AEE" : Emergency call
+
+	"9E04115A-AE87-4D1C-9500-0F3E340BFE75" : Passive 2
+
+	"F5A35014-C209-46A4-993A-EB56DE7530A1" : Power Boss
+
+	"6ED722A7-9240-48A5-B479-31EEF723D7CF" : Virtual Sensor
+
+	"16CAF1B7-DD38-40ED-B1C1-1B8A1913D531" : Cooling mode
+
+	"BE84BABF-C4D4-403D-B495-3128FD44dAC1" : HDC
+
+``current_uuid`` (RW)
+	User space can write strings from available UUIDs, one at a
+	time.
+
+:file:`/sys/bus/platform/devices/<N>/`, where <N>
+=INT3400|INTC1040|INTC1041|INTC10A0
+
+``imok`` (WO)
+	User space daemon write 1 to respond to firmware event
+	for sending keep alive notification. User space receives
+	THERMAL_EVENT_KEEP_ALIVE kobject uevent notification when
+	firmware calls for user space to respond with imok ACPI
+	method.
+
+``odvp*`` (RO)
+	Firmware thermal status variable values. Thermal tables
+	calls for different processing based on these variable
+	values.
+
+``data_vault`` (RO)
+	Binary thermal table. Refer to
+	https:/github.com/intel/thermal_daemon for decoding
+	thermal table.
+
+
+ACPI Thermal Relationship table interface
+------------------------------------------
+
+:file:`/dev/acpi_thermal_rel`
+
+	This device provides IOCTL interface to read standard ACPI
+	thermal relationship tables via ACPI methods _TRT and _ART.
+	These IOCTLs are defined in
+	drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.h
+
+	IOCTLs:
+
+	ACPI_THERMAL_GET_TRT_LEN: Get length of TRT table
+
+	ACPI_THERMAL_GET_ART_LEN: Get length of ART table
+
+	ACPI_THERMAL_GET_TRT_COUNT: Number of records in TRT table
+
+	ACPI_THERMAL_GET_ART_COUNT: Number of records in ART table
+
+	ACPI_THERMAL_GET_TRT: Read binary TRT table, length to read is
+	provided via argument to ioctl().
+
+	ACPI_THERMAL_GET_ART: Read binary ART table, length to read is
+	provided via argument to ioctl().
+
+DPTF ACPI Sensor drivers
+-------------------------
+
+DPTF Sensor drivers are presented as standard thermal sysfs thermal_zone.
+
+
+DPTF ACPI Cooling drivers
+--------------------------
+
+DPTF cooling drivers are presented as standard thermal sysfs cooling_device.
+
+
+DPTF Processor thermal PCI Driver interface
+--------------------------------------------
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/power_limits/`
+
+Refer to Documentation/power/powercap/powercap.rst for powercap
+ABI.
+
+``power_limit_0_max_uw`` (RO)
+	Maximum powercap sysfs constraint_0_power_limit_uw for Intel RAPL
+
+``power_limit_0_step_uw`` (RO)
+	Power limit increment/decrements for Intel RAPL constraint 0 power limit
+
+``power_limit_0_min_uw`` (RO)
+	Minimum powercap sysfs constraint_0_power_limit_uw for Intel RAPL
+
+``power_limit_0_tmin_us`` (RO)
+	Minimum powercap sysfs constraint_0_time_window_us for Intel RAPL
+
+``power_limit_0_tmax_us`` (RO)
+	Maximum powercap sysfs constraint_0_time_window_us for Intel RAPL
+
+``power_limit_1_max_uw`` (RO)
+	Maximum powercap sysfs constraint_1_power_limit_uw for Intel RAPL
+
+``power_limit_1_step_uw`` (RO)
+	Power limit increment/decrements for Intel RAPL constraint 1 power limit
+
+``power_limit_1_min_uw`` (RO)
+	Minimum powercap sysfs constraint_1_power_limit_uw for Intel RAPL
+
+``power_limit_1_tmin_us`` (RO)
+	Minimum powercap sysfs constraint_1_time_window_us for Intel RAPL
+
+``power_limit_1_tmax_us`` (RO)
+	Maximum powercap sysfs constraint_1_time_window_us for Intel RAPL
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/`
+
+``tcc_offset_degree_celsius`` (RW)
+	TCC offset from the critical temperature where hardware will throttle
+	CPU.
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/workload_request`
+
+``workload_available_types`` (RO)
+	Available workload types. User space can specify one of the workload type
+	it is currently executing via workload_type. For example: idle, bursty,
+	sustained etc.
+
+``workload_type`` (RW)
+	User space can specify any one of the available workload type using
+	this interface.
+
+DPTF Processor thermal RFIM interface
+--------------------------------------------
+
+RFIM interface allows adjustment of FIVR (Fully Integrated Voltage Regulator)
+and DDR (Double Data Rate)frequencies to avoid RF interference with WiFi and 5G.
+
+Switching voltage regulators (VR) generate radiated EMI or RFI at the
+fundamental frequency and its harmonics. Some harmonics may interfere
+with very sensitive wireless receivers such as Wi-Fi and cellular that
+are integrated into host systems like notebook PCs.  One of mitigation
+methods is requesting SOC integrated VR (IVR) switching frequency to a
+small % and shift away the switching noise harmonic interference from
+radio channels.  OEM or ODMs can use the driver to control SOC IVR
+operation within the range where it does not impact IVR performance.
+
+DRAM devices of DDR IO interface and their power plane can generate EMI
+at the data rates. Similar to IVR control mechanism, Intel offers a
+mechanism by which DDR data rates can be changed if several conditions
+are met: there is strong RFI interference because of DDR; CPU power
+management has no other restriction in changing DDR data rates;
+PC ODMs enable this feature (real time DDR RFI Mitigation referred to as
+DDR-RFIM) for Wi-Fi from BIOS.
+
+
+FIVR attributes
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/fivr/`
+
+``vco_ref_code_lo`` (RW)
+	The VCO reference code is an 11-bit field and controls the FIVR
+	switching frequency. This is the 3-bit LSB field.
+
+``vco_ref_code_hi`` (RW)
+	The VCO reference code is an 11-bit field and controls the FIVR
+	switching frequency. This is the 8-bit MSB field.
+
+``spread_spectrum_pct`` (RW)
+	Set the FIVR spread spectrum clocking percentage
+
+``spread_spectrum_clk_enable`` (RW)
+	Enable/disable of the FIVR spread spectrum clocking feature
+
+``rfi_vco_ref_code`` (RW)
+	This field is a read only status register which reflects the
+	current FIVR switching frequency
+
+``fivr_fffc_rev`` (RW)
+	This field indicated the revision of the FIVR HW.
+
+
+DVFS attributes
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/dvfs/`
+
+``rfi_restriction_run_busy`` (RW)
+	Request the restriction of specific DDR data rate and set this
+	value 1. Self reset to 0 after operation.
+
+``rfi_restriction_err_code`` (RW)
+	0 :Request is accepted, 1:Feature disabled,
+	2: the request restricts more points than it is allowed
+
+``rfi_restriction_data_rate_Delta`` (RW)
+	Restricted DDR data rate for RFI protection: Lower Limit
+
+``rfi_restriction_data_rate_Base`` (RW)
+	Restricted DDR data rate for RFI protection: Upper Limit
+
+``ddr_data_rate_point_0`` (RO)
+	DDR data rate selection 1st point
+
+``ddr_data_rate_point_1`` (RO)
+	DDR data rate selection 2nd point
+
+``ddr_data_rate_point_2`` (RO)
+	DDR data rate selection 3rd point
+
+``ddr_data_rate_point_3`` (RO)
+	DDR data rate selection 4th point
+
+``rfi_disable (RW)``
+	Disable DDR rate change feature
+
+DPTF Power supply and Battery Interface
+----------------------------------------
+
+Refer to Documentation/ABI/testing/sysfs-platform-dptf
+
+DPTF Fan Control
+----------------------------------------
+
+Refer to Documentation/admin-guide/acpi/fan_performance_states.rst
diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index 74b830b2fd59..c414646a1bb4 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -19,16 +19,17 @@ possible we decided to do following:
     platform devices.
 
   - Devices behind real busses where there is a connector resource
-    are represented as struct spi_device or struct i2c_device
-    (standard UARTs are not busses so there is no struct uart_device).
+    are represented as struct spi_device or struct i2c_device. Note
+    that standard UARTs are not busses so there is no struct uart_device,
+    although some of them may be represented by sturct serdev_device.
 
 As both ACPI and Device Tree represent a tree of devices (and their
 resources) this implementation follows the Device Tree way as much as
 possible.
 
-The ACPI implementation enumerates devices behind busses (platform, SPI and
-I2C), creates the physical devices and binds them to their ACPI handle in
-the ACPI namespace.
+The ACPI implementation enumerates devices behind busses (platform, SPI,
+I2C, and in some cases UART), creates the physical devices and binds them
+to their ACPI handle in the ACPI namespace.
 
 This means that when ACPI_HANDLE(dev) returns non-NULL the device was
 enumerated from ACPI namespace. This handle can be used to extract other
@@ -46,18 +47,16 @@ some minor changes.
 Adding ACPI support for an existing driver should be pretty
 straightforward. Here is the simplest example::
 
-	#ifdef CONFIG_ACPI
 	static const struct acpi_device_id mydrv_acpi_match[] = {
 		/* ACPI IDs here */
 		{ }
 	};
 	MODULE_DEVICE_TABLE(acpi, mydrv_acpi_match);
-	#endif
 
 	static struct platform_driver my_driver = {
 		...
 		.driver = {
-			.acpi_match_table = ACPI_PTR(mydrv_acpi_match),
+			.acpi_match_table = mydrv_acpi_match,
 		},
 	};
 
@@ -155,7 +154,7 @@ Here is what the ACPI namespace for a SPI slave might look like::
 	Device (EEP0)
 	{
 		Name (_ADR, 1)
-		Name (_CID, Package() {
+		Name (_CID, Package () {
 			"ATML0025",
 			"AT25",
 		})
@@ -172,59 +171,51 @@ The SPI device drivers only need to add ACPI IDs in a similar way than with
 the platform device drivers. Below is an example where we add ACPI support
 to at25 SPI eeprom driver (this is meant for the above ACPI snippet)::
 
-	#ifdef CONFIG_ACPI
 	static const struct acpi_device_id at25_acpi_match[] = {
 		{ "AT25", 0 },
-		{ },
+		{ }
 	};
 	MODULE_DEVICE_TABLE(acpi, at25_acpi_match);
-	#endif
 
 	static struct spi_driver at25_driver = {
 		.driver = {
 			...
-			.acpi_match_table = ACPI_PTR(at25_acpi_match),
+			.acpi_match_table = at25_acpi_match,
 		},
 	};
 
 Note that this driver actually needs more information like page size of the
-eeprom etc. but at the time writing this there is no standard way of
-passing those. One idea is to return this in _DSM method like::
+eeprom, etc. This information can be passed via _DSD method like::
 
 	Device (EEP0)
 	{
 		...
-		Method (_DSM, 4, NotSerialized)
+		Name (_DSD, Package ()
 		{
-			Store (Package (6)
+			ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+			Package ()
 			{
-				"byte-len", 1024,
-				"addr-mode", 2,
-				"page-size, 32
-			}, Local0)
-
-			// Check UUIDs etc.
-
-			Return (Local0)
-		}
-
-Then the at25 SPI driver can get this configuration by calling _DSM on its
-ACPI handle like::
-
-	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
-	struct acpi_object_list input;
-	acpi_status status;
+				Package () { "size", 1024 },
+				Package () { "pagesize", 32 },
+				Package () { "address-width", 16 },
+			}
+		})
+	}
 
-	/* Fill in the input buffer */
+Then the at25 SPI driver can get this configuration by calling device property
+APIs during ->probe() phase like::
 
-	status = acpi_evaluate_object(ACPI_HANDLE(&spi->dev), "_DSM",
-				      &input, &output);
-	if (ACPI_FAILURE(status))
-		/* Handle the error */
+	err = device_property_read_u32(dev, "size", &size);
+	if (err)
+		...error handling...
 
-	/* Extract the data here */
+	err = device_property_read_u32(dev, "pagesize", &page_size);
+	if (err)
+		...error handling...
 
-	kfree(output.pointer);
+	err = device_property_read_u32(dev, "address-width", &addr_width);
+	if (err)
+		...error handling...
 
 I2C serial bus support
 ======================
@@ -237,26 +228,24 @@ registered.
 Below is an example of how to add ACPI support to the existing mpu3050
 input driver::
 
-	#ifdef CONFIG_ACPI
 	static const struct acpi_device_id mpu3050_acpi_match[] = {
 		{ "MPU3050", 0 },
-		{ },
+		{ }
 	};
 	MODULE_DEVICE_TABLE(acpi, mpu3050_acpi_match);
-	#endif
 
 	static struct i2c_driver mpu3050_i2c_driver = {
 		.driver	= {
 			.name	= "mpu3050",
-			.owner	= THIS_MODULE,
 			.pm	= &mpu3050_pm,
 			.of_match_table = mpu3050_of_match,
-			.acpi_match_table = ACPI_PTR(mpu3050_acpi_match),
+			.acpi_match_table = mpu3050_acpi_match,
 		},
 		.probe		= mpu3050_probe,
 		.remove		= mpu3050_remove,
 		.id_table	= mpu3050_ids,
 	};
+	module_i2c_driver(mpu3050_i2c_driver);
 
 Reference to PWM device
 =======================
@@ -282,9 +271,9 @@ introduced, i.e.::
                     }
                 }
             }
-
         })
         ...
+    }
 
 In the above example the PWM-based LED driver references to the PWM channel 0
 of \_SB.PCI0.PWM device with initial period setting equal to 600 ms (note that
@@ -306,26 +295,13 @@ For example::
 		{
 			Name (SBUF, ResourceTemplate()
 			{
-				...
 				// Used to power on/off the device
-				GpioIo (Exclusive, PullDefault, 0x0000, 0x0000,
-					IoRestrictionOutputOnly, "\\_SB.PCI0.GPI0",
-					0x00, ResourceConsumer,,)
-				{
-					// Pin List
-					0x0055
-				}
+				GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionOutputOnly,
+					"\\_SB.PCI0.GPI0", 0, ResourceConsumer) { 85 }
 
 				// Interrupt for the device
-				GpioInt (Edge, ActiveHigh, ExclusiveAndWake, PullNone,
-					0x0000, "\\_SB.PCI0.GPI0", 0x00, ResourceConsumer,,)
-				{
-					// Pin list
-					0x0058
-				}
-
-				...
-
+				GpioInt (Edge, ActiveHigh, ExclusiveAndWake, PullNone, 0,
+					 "\\_SB.PCI0.GPI0", 0, ResourceConsumer) { 88 }
 			}
 
 			Return (SBUF)
@@ -337,11 +313,12 @@ For example::
 			ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
 			Package ()
 			{
-				Package () {"power-gpios", Package() {^DEV, 0, 0, 0 }},
-				Package () {"irq-gpios", Package() {^DEV, 1, 0, 0 }},
+				Package () { "power-gpios", Package () { ^DEV, 0, 0, 0 } },
+				Package () { "irq-gpios", Package () { ^DEV, 1, 0, 0 } },
 			}
 		})
 		...
+	}
 
 These GPIO numbers are controller relative and path "\\_SB.PCI0.GPI0"
 specifies the path to the controller. In order to use these GPIOs in Linux
@@ -460,10 +437,10 @@ namespace link::
 	Device (TMP0)
 	{
 		Name (_HID, "PRP0001")
-		Name (_DSD, Package() {
+		Name (_DSD, Package () {
 			ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
 			Package () {
-				Package (2) { "compatible", "ti,tmp75" },
+				Package () { "compatible", "ti,tmp75" },
 			}
 		})
 		Method (_CRS, 0, Serialized)
diff --git a/Documentation/firmware-guide/acpi/gpio-properties.rst b/Documentation/firmware-guide/acpi/gpio-properties.rst
index df4b711053ee..eaec732cc77c 100644
--- a/Documentation/firmware-guide/acpi/gpio-properties.rst
+++ b/Documentation/firmware-guide/acpi/gpio-properties.rst
@@ -21,18 +21,18 @@ index, like the ASL example below shows::
       Name (_CRS, ResourceTemplate ()
       {
           GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
-                  "\\_SB.GPO0", 0, ResourceConsumer) {15}
+                  "\\_SB.GPO0", 0, ResourceConsumer) { 15 }
           GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
-                  "\\_SB.GPO0", 0, ResourceConsumer) {27, 31}
+                  "\\_SB.GPO0", 0, ResourceConsumer) { 27, 31 }
       })
 
       Name (_DSD, Package ()
       {
           ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
           Package ()
-	  {
-              Package () {"reset-gpios", Package() {^BTH, 1, 1, 0 }},
-              Package () {"shutdown-gpios", Package() {^BTH, 0, 0, 0 }},
+          {
+              Package () { "reset-gpios", Package () { ^BTH, 1, 1, 0 } },
+              Package () { "shutdown-gpios", Package () { ^BTH, 0, 0, 0 } },
           }
       })
   }
@@ -123,17 +123,17 @@ Example::
       // _DSD Hierarchical Properties Extension UUID
       ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
       Package () {
-          Package () {"hog-gpio8", "G8PU"}
+          Package () { "hog-gpio8", "G8PU" }
       }
   })
 
   Name (G8PU, Package () {
       ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
       Package () {
-          Package () {"gpio-hog", 1},
-          Package () {"gpios", Package () {8, 0}},
-          Package () {"output-high", 1},
-          Package () {"line-name", "gpio8-pullup"},
+          Package () { "gpio-hog", 1 },
+          Package () { "gpios", Package () { 8, 0 } },
+          Package () { "output-high", 1 },
+          Package () { "line-name", "gpio8-pullup" },
       }
   })
 
@@ -266,15 +266,17 @@ have a device like below::
 
       Name (_CRS, ResourceTemplate () {
           GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionNone,
-                  "\\_SB.GPO0", 0, ResourceConsumer) {15}
+                  "\\_SB.GPO0", 0, ResourceConsumer) { 15 }
           GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionNone,
-                  "\\_SB.GPO0", 0, ResourceConsumer) {27}
+                  "\\_SB.GPO0", 0, ResourceConsumer) { 27 }
       })
   }
 
 The driver might expect to get the right GPIO when it does::
 
   desc = gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+  if (IS_ERR(desc))
+	...error handling...
 
 but since there is no way to know the mapping between "reset" and
 the GpioIo() in _CRS desc will hold ERR_PTR(-ENOENT).
diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst
index 4fd7b70fcde1..bfa75ea1b66a 100644
--- a/Documentation/locking/locktypes.rst
+++ b/Documentation/locking/locktypes.rst
@@ -247,7 +247,7 @@ based on rt_mutex which changes the semantics:
    Non-PREEMPT_RT kernels disable preemption to get this effect.
 
    PREEMPT_RT kernels use a per-CPU lock for serialization which keeps
-   preemption disabled. The lock disables softirq handlers and also
+   preemption enabled. The lock disables softirq handlers and also
    prevents reentrancy due to task preemption.
 
 PREEMPT_RT kernels preserve all other spinlock_t semantics:
diff --git a/Documentation/process/applying-patches.rst b/Documentation/process/applying-patches.rst
index c2121c1e55d7..c269f5e1a0a3 100644
--- a/Documentation/process/applying-patches.rst
+++ b/Documentation/process/applying-patches.rst
@@ -249,6 +249,10 @@ The 5.x.y (-stable) and 5.x patches live at
 
 	https://www.kernel.org/pub/linux/kernel/v5.x/
 
+The 5.x.y incremental patches live at
+
+	https://www.kernel.org/pub/linux/kernel/v5.x/incr/
+
 The -rc patches are not stored on the webserver but are generated on
 demand from git tags such as
 
@@ -308,12 +312,11 @@ versions.
 If no 5.x.y kernel is available, then the highest numbered 5.x kernel is
 the current stable kernel.
 
-.. note::
+The -stable team provides normal as well as incremental patches. Below is
+how to apply these patches.
 
- The -stable team usually do make incremental patches available as well
- as patches against the latest mainline release, but I only cover the
- non-incremental ones below. The incremental ones can be found at
- https://www.kernel.org/pub/linux/kernel/v5.x/incr/
+Normal patches
+~~~~~~~~~~~~~~
 
 These patches are not incremental, meaning that for example the 5.7.3
 patch does not apply on top of the 5.7.2 kernel source, but rather on top
@@ -331,6 +334,21 @@ Here's a small example::
 	$ cd ..
 	$ mv linux-5.7.2 linux-5.7.3	# rename the kernel source dir
 
+Incremental patches
+~~~~~~~~~~~~~~~~~~~
+
+Incremental patches are different: instead of being applied on top
+of base 5.x kernel, they are applied on top of previous stable kernel
+(5.x.y-1).
+
+Here's the example to apply these::
+
+	$ cd ~/linux-5.7.2		# change to the kernel source dir
+	$ patch -p1 < ../patch-5.7.2-3	# apply the new 5.7.3 patch
+	$ cd ..
+	$ mv linux-5.7.2 linux-5.7.3	# rename the kernel source dir
+
+
 The -rc kernels
 ===============
 
diff --git a/Documentation/process/handling-regressions.rst b/Documentation/process/handling-regressions.rst
new file mode 100644
index 000000000000..abb741b1aeee
--- /dev/null
+++ b/Documentation/process/handling-regressions.rst
@@ -0,0 +1,746 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. See the bottom of this file for additional redistribution information.
+
+Handling regressions
+++++++++++++++++++++
+
+*We don't cause regressions* -- this document describes what this "first rule of
+Linux kernel development" means in practice for developers. It complements
+Documentation/admin-guide/reporting-regressions.rst, which covers the topic from a
+user's point of view; if you never read that text, go and at least skim over it
+before continuing here.
+
+The important bits (aka "The TL;DR")
+====================================
+
+#. Ensure subscribers of the `regression mailing list <https://lore.kernel.org/regressions/>`_
+   ([email protected]) quickly become aware of any new regression
+   report:
+
+    * When receiving a mailed report that did not CC the list, bring it into the
+      loop by immediately sending at least a brief "Reply-all" with the list
+      CCed.
+
+    * Forward or bounce any reports submitted in bug trackers to the list.
+
+#. Make the Linux kernel regression tracking bot "regzbot" track the issue (this
+   is optional, but recommended):
+
+    * For mailed reports, check if the reporter included a line like ``#regzbot
+      introduced v5.13..v5.14-rc1``. If not, send a reply (with the regressions
+      list in CC) containing a paragraph like the following, which tells regzbot
+      when the issue started to happen::
+
+       #regzbot ^introduced 1f2e3d4c5b6a
+
+    * When forwarding reports from a bug tracker to the regressions list (see
+      above), include a paragraph like the following::
+
+       #regzbot introduced: v5.13..v5.14-rc1
+       #regzbot from: Some N. Ice Human <[email protected]>
+       #regzbot monitor: http://some.bugtracker.example.com/ticket?id=123456789
+
+#. When submitting fixes for regressions, add "Link:" tags to the patch
+   description pointing to all places where the issue was reported, as
+   mandated by Documentation/process/submitting-patches.rst and
+   :ref:`Documentation/process/5.Posting.rst <development_posting>`.
+
+#. Try to fix regressions quickly once the culprit has been identified; fixes
+   for most regressions should be merged within two weeks, but some need to be
+   resolved within two or three days.
+
+
+All the details on Linux kernel regressions relevant for developers
+===================================================================
+
+
+The important basics in more detail
+-----------------------------------
+
+
+What to do when receiving regression reports
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Ensure the Linux kernel's regression tracker and others subscribers of the
+`regression mailing list <https://lore.kernel.org/regressions/>`_
+([email protected]) become aware of any newly reported regression:
+
+ * When you receive a report by mail that did not CC the list, immediately bring
+   it into the loop by sending at least a brief "Reply-all" with the list CCed;
+   try to ensure it gets CCed again in case you reply to a reply that omitted
+   the list.
+
+ * If a report submitted in a bug tracker hits your Inbox, forward or bounce it
+   to the list. Consider checking the list archives beforehand, if the reporter
+   already forwarded the report as instructed by
+   Documentation/admin-guide/reporting-issues.rst.
+
+When doing either, consider making the Linux kernel regression tracking bot
+"regzbot" immediately start tracking the issue:
+
+ * For mailed reports, check if the reporter included a "regzbot command" like
+   ``#regzbot introduced 1f2e3d4c5b6a``. If not, send a reply (with the
+   regressions list in CC) with a paragraph like the following:::
+
+       #regzbot ^introduced: v5.13..v5.14-rc1
+
+   This tells regzbot the version range in which the issue started to happen;
+   you can specify a range using commit-ids as well or state a single commit-id
+   in case the reporter bisected the culprit.
+
+   Note the caret (^) before the "introduced": it tells regzbot to treat the
+   parent mail (the one you reply to) as the initial report for the regression
+   you want to see tracked; that's important, as regzbot will later look out
+   for patches with "Link:" tags pointing to the report in the archives on
+   lore.kernel.org.
+
+ * When forwarding a regressions reported to a bug tracker, include a paragraph
+   with these regzbot commands::
+
+       #regzbot introduced: 1f2e3d4c5b6a
+       #regzbot from: Some N. Ice Human <[email protected]>
+       #regzbot monitor: http://some.bugtracker.example.com/ticket?id=123456789
+
+   Regzbot will then automatically associate patches with the report that
+   contain "Link:" tags pointing to your mail or the mentioned ticket.
+
+What's important when fixing regressions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You don't need to do anything special when submitting fixes for regression, just
+remember to do what Documentation/process/submitting-patches.rst,
+:ref:`Documentation/process/5.Posting.rst <development_posting>`, and
+Documentation/process/stable-kernel-rules.rst already explain in more detail:
+
+ * Point to all places where the issue was reported using "Link:" tags::
+
+       Link: https://lore.kernel.org/r/[email protected]/
+       Link: https://bugzilla.kernel.org/show_bug.cgi?id=1234567890
+
+ * Add a "Fixes:" tag to specify the commit causing the regression.
+
+ * If the culprit was merged in an earlier development cycle, explicitly mark
+   the fix for backporting using the ``Cc: [email protected]`` tag.
+
+All this is expected from you and important when it comes to regression, as
+these tags are of great value for everyone (you included) that might be looking
+into the issue weeks, months, or years later. These tags are also crucial for
+tools and scripts used by other kernel developers or Linux distributions; one of
+these tools is regzbot, which heavily relies on the "Link:" tags to associate
+reports for regression with changes resolving them.
+
+Prioritize work on fixing regressions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You should fix any reported regression as quickly as possible, to provide
+affected users with a solution in a timely manner and prevent more users from
+running into the issue; nevertheless developers need to take enough time and
+care to ensure regression fixes do not cause additional damage.
+
+In the end though, developers should give their best to prevent users from
+running into situations where a regression leaves them only three options: "run
+a kernel with a regression that seriously impacts usage", "continue running an
+outdated and thus potentially insecure kernel version for more than two weeks
+after a regression's culprit was identified", and "downgrade to a still
+supported kernel series that lack required features".
+
+How to realize this depends a lot on the situation. Here are a few rules of
+thumb for you, in order or importance:
+
+ * Prioritize work on handling regression reports and fixing regression over all
+   other Linux kernel work, unless the latter concerns acute security issues or
+   bugs causing data loss or damage.
+
+ * Always consider reverting the culprit commits and reapplying them later
+   together with necessary fixes, as this might be the least dangerous and
+   quickest way to fix a regression.
+
+ * Developers should handle regressions in all supported kernel series, but are
+   free to delegate the work to the stable team, if the issue probably at no
+   point in time occurred with mainline.
+
+ * Try to resolve any regressions introduced in the current development before
+   its end. If you fear a fix might be too risky to apply only days before a new
+   mainline release, let Linus decide: submit the fix separately to him as soon
+   as possible with the explanation of the situation. He then can make a call
+   and postpone the release if necessary, for example if multiple such changes
+   show up in his inbox.
+
+ * Address regressions in stable, longterm, or proper mainline releases with
+   more urgency than regressions in mainline pre-releases. That changes after
+   the release of the fifth pre-release, aka "-rc5": mainline then becomes as
+   important, to ensure all the improvements and fixes are ideally tested
+   together for at least one week before Linus releases a new mainline version.
+
+ * Fix regressions within two or three days, if they are critical for some
+   reason -- for example, if the issue is likely to affect many users of the
+   kernel series in question on all or certain architectures. Note, this
+   includes mainline, as issues like compile errors otherwise might prevent many
+   testers or continuous integration systems from testing the series.
+
+ * Aim to fix regressions within one week after the culprit was identified, if
+   the issue was introduced in either:
+
+    * a recent stable/longterm release
+
+    * the development cycle of the latest proper mainline release
+
+   In the latter case (say Linux v5.14), try to address regressions even
+   quicker, if the stable series for the predecessor (v5.13) will be abandoned
+   soon or already was stamped "End-of-Life" (EOL) -- this usually happens about
+   three to four weeks after a new mainline release.
+
+ * Try to fix all other regressions within two weeks after the culprit was
+   found. Two or three additional weeks are acceptable for performance
+   regressions and other issues which are annoying, but don't prevent anyone
+   from running Linux (unless it's an issue in the current development cycle,
+   as those should ideally be addressed before the release). A few weeks in
+   total are acceptable if a regression can only be fixed with a risky change
+   and at the same time is affecting only a few users; as much time is
+   also okay if the regression is already present in the second newest longterm
+   kernel series.
+
+Note: The aforementioned time frames for resolving regressions are meant to
+include getting the fix tested, reviewed, and merged into mainline, ideally with
+the fix being in linux-next at least briefly. This leads to delays you need to
+account for.
+
+Subsystem maintainers are expected to assist in reaching those periods by doing
+timely reviews and quick handling of accepted patches. They thus might have to
+send git-pull requests earlier or more often than usual; depending on the fix,
+it might even be acceptable to skip testing in linux-next. Especially fixes for
+regressions in stable and longterm kernels need to be handled quickly, as fixes
+need to be merged in mainline before they can be backported to older series.
+
+
+More aspects regarding regressions developers should be aware of
+----------------------------------------------------------------
+
+
+How to deal with changes where a risk of regression is known
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Evaluate how big the risk of regressions is, for example by performing a code
+search in Linux distributions and Git forges. Also consider asking other
+developers or projects likely to be affected to evaluate or even test the
+proposed change; if problems surface, maybe some solution acceptable for all
+can be found.
+
+If the risk of regressions in the end seems to be relatively small, go ahead
+with the change, but let all involved parties know about the risk. Hence, make
+sure your patch description makes this aspect obvious. Once the change is
+merged, tell the Linux kernel's regression tracker and the regressions mailing
+list about the risk, so everyone has the change on the radar in case reports
+trickle in. Depending on the risk, you also might want to ask the subsystem
+maintainer to mention the issue in his mainline pull request.
+
+What else is there to known about regressions?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Check out Documentation/admin-guide/reporting-regressions.rst, it covers a lot
+of other aspects you want might want to be aware of:
+
+ * the purpose of the "no regressions rule"
+
+ * what issues actually qualify as regression
+
+ * who's in charge for finding the root cause of a regression
+
+ * how to handle tricky situations, e.g. when a regression is caused by a
+   security fix or when fixing a regression might cause another one
+
+Whom to ask for advice when it comes to regressions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Send a mail to the regressions mailing list ([email protected]) while
+CCing the Linux kernel's regression tracker ([email protected]); if the
+issue might better be dealt with in private, feel free to omit the list.
+
+
+More about regression tracking and regzbot
+------------------------------------------
+
+
+Why the Linux kernel has a regression tracker, and why is regzbot used?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Rules like "no regressions" need someone to ensure they are followed, otherwise
+they are broken either accidentally or on purpose. History has shown this to be
+true for the Linux kernel as well. That's why Thorsten Leemhuis volunteered to
+keep an eye on things as the Linux kernel's regression tracker, who's
+occasionally helped by other people. Neither of them are paid to do this,
+that's why regression tracking is done on a best effort basis.
+
+Earlier attempts to manually track regressions have shown it's an exhausting and
+frustrating work, which is why they were abandoned after a while. To prevent
+this from happening again, Thorsten developed regzbot to facilitate the work,
+with the long term goal to automate regression tracking as much as possible for
+everyone involved.
+
+How does regression tracking work with regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The bot watches for replies to reports of tracked regressions. Additionally,
+it's looking out for posted or committed patches referencing such reports
+with "Link:" tags; replies to such patch postings are tracked as well.
+Combined this data provides good insights into the current state of the fixing
+process.
+
+Regzbot tries to do its job with as little overhead as possible for both
+reporters and developers. In fact, only reporters are burdened with an extra
+duty: they need to tell regzbot about the regression report using the ``#regzbot
+introduced`` command outlined above; if they don't do that, someone else can
+take care of that using ``#regzbot ^introduced``.
+
+For developers there normally is no extra work involved, they just need to make
+sure to do something that was expected long before regzbot came to light: add
+"Link:" tags to the patch description pointing to all reports about the issue
+fixed.
+
+Do I have to use regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It's in the interest of everyone if you do, as kernel maintainers like Linus
+Torvalds partly rely on regzbot's tracking in their work -- for example when
+deciding to release a new version or extend the development phase. For this they
+need to be aware of all unfixed regression; to do that, Linus is known to look
+into the weekly reports sent by regzbot.
+
+Do I have to tell regzbot about every regression I stumble upon?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Ideally yes: we are all humans and easily forget problems when something more
+important unexpectedly comes up -- for example a bigger problem in the Linux
+kernel or something in real life that's keeping us away from keyboards for a
+while. Hence, it's best to tell regzbot about every regression, except when you
+immediately write a fix and commit it to a tree regularly merged to the affected
+kernel series.
+
+How to see which regressions regzbot tracks currently?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Check `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_
+for the latest info; alternatively, `search for the latest regression report
+<https://lore.kernel.org/lkml/?q=%22Linux+regressions+report%22+f%3Aregzbot>`_,
+which regzbot normally sends out once a week on Sunday evening (UTC), which is a
+few hours before Linus usually publishes new (pre-)releases.
+
+What places is regzbot monitoring?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Regzbot is watching the most important Linux mailing lists as well as the git
+repositories of linux-next, mainline, and stable/longterm.
+
+What kind of issues are supposed to be tracked by regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The bot is meant to track regressions, hence please don't involve regzbot for
+regular issues. But it's okay for the Linux kernel's regression tracker if you
+use regzbot to track severe issues, like reports about hangs, corrupted data,
+or internal errors (Panic, Oops, BUG(), warning, ...).
+
+Can I add regressions found by CI systems to regzbot's tracking?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Feel free to do so, if the particular regression likely has impact on practical
+use cases and thus might be noticed by users; hence, please don't involve
+regzbot for theoretical regressions unlikely to show themselves in real world
+usage.
+
+How to interact with regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By using a 'regzbot command' in a direct or indirect reply to the mail with the
+regression report. These commands need to be in their own paragraph (IOW: they
+need to be separated from the rest of the mail using blank lines).
+
+One such command is ``#regzbot introduced <version or commit>``, which makes
+regzbot consider your mail as a regressions report added to the tracking, as
+already described above; ``#regzbot ^introduced <version or commit>`` is another
+such command, which makes regzbot consider the parent mail as a report for a
+regression which it starts to track.
+
+Once one of those two commands has been utilized, other regzbot commands can be
+used in direct or indirect replies to the report. You can write them below one
+of the `introduced` commands or in replies to the mail that used one of them
+or itself is a reply to that mail:
+
+ * Set or update the title::
+
+       #regzbot title: foo
+
+ * Monitor a discussion or bugzilla.kernel.org ticket where additions aspects of
+   the issue or a fix are discussed -- for example the posting of a patch fixing
+   the regression::
+
+       #regzbot monitor: https://lore.kernel.org/all/[email protected]/
+
+   Monitoring only works for lore.kernel.org and bugzilla.kernel.org; regzbot
+   will consider all messages in that thread or ticket as related to the fixing
+   process.
+
+ * Point to a place with further details of interest, like a mailing list post
+   or a ticket in a bug tracker that are slightly related, but about a different
+   topic::
+
+       #regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
+
+ * Mark a regression as fixed by a commit that is heading upstream or already
+   landed::
+
+       #regzbot fixed-by: 1f2e3d4c5d
+
+ * Mark a regression as a duplicate of another one already tracked by regzbot::
+
+       #regzbot dup-of: https://lore.kernel.org/all/[email protected]/
+
+ * Mark a regression as invalid::
+
+       #regzbot invalid: wasn't a regression, problem has always existed
+
+Is there more to tell about regzbot and its commands?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+More detailed and up-to-date information about the Linux
+kernel's regression tracking bot can be found on its
+`project page <https://gitlab.com/knurd42/regzbot>`_, which among others
+contains a `getting started guide <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/getting_started.md>`_
+and `reference documentation <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/reference.md>`_
+which both cover more details than the above section.
+
+Quotes from Linus about regression
+----------------------------------
+
+Find below a few real life examples of how Linus Torvalds expects regressions to
+be handled:
+
+ * From `2017-10-26 (1/2)
+   <https://lore.kernel.org/lkml/CA+55aFwiiQYJ+YoLKCXjN_beDVfu38mg=Ggg5LFOcqHE8Qi7Zw@mail.gmail.com/>`_::
+
+       If you break existing user space setups THAT IS A REGRESSION.
+
+       It's not ok to say "but we'll fix the user space setup".
+
+       Really. NOT OK.
+
+       [...]
+
+       The first rule is:
+
+        - we don't cause regressions
+
+       and the corollary is that when regressions *do* occur, we admit to
+       them and fix them, instead of blaming user space.
+
+       The fact that you have apparently been denying the regression now for
+       three weeks means that I will revert, and I will stop pulling apparmor
+       requests until the people involved understand how kernel development
+       is done.
+
+ * From `2017-10-26 (2/2)
+   <https://lore.kernel.org/lkml/CA+55aFxW7NMAMvYhkvz1UPbUTUJewRt6Yb51QAx5RtrWOwjebg@mail.gmail.com/>`_::
+
+       People should basically always feel like they can update their kernel
+       and simply not have to worry about it.
+
+       I refuse to introduce "you can only update the kernel if you also
+       update that other program" kind of limitations. If the kernel used to
+       work for you, the rule is that it continues to work for you.
+
+       There have been exceptions, but they are few and far between, and they
+       generally have some major and fundamental reasons for having happened,
+       that were basically entirely unavoidable, and people _tried_hard_ to
+       avoid them. Maybe we can't practically support the hardware any more
+       after it is decades old and nobody uses it with modern kernels any
+       more. Maybe there's a serious security issue with how we did things,
+       and people actually depended on that fundamentally broken model. Maybe
+       there was some fundamental other breakage that just _had_ to have a
+       flag day for very core and fundamental reasons.
+
+       And notice that this is very much about *breaking* peoples environments.
+
+       Behavioral changes happen, and maybe we don't even support some
+       feature any more. There's a number of fields in /proc/<pid>/stat that
+       are printed out as zeroes, simply because they don't even *exist* in
+       the kernel any more, or because showing them was a mistake (typically
+       an information leak). But the numbers got replaced by zeroes, so that
+       the code that used to parse the fields still works. The user might not
+       see everything they used to see, and so behavior is clearly different,
+       but things still _work_, even if they might no longer show sensitive
+       (or no longer relevant) information.
+
+       But if something actually breaks, then the change must get fixed or
+       reverted. And it gets fixed in the *kernel*. Not by saying "well, fix
+       your user space then". It was a kernel change that exposed the
+       problem, it needs to be the kernel that corrects for it, because we
+       have a "upgrade in place" model. We don't have a "upgrade with new
+       user space".
+
+       And I seriously will refuse to take code from people who do not
+       understand and honor this very simple rule.
+
+       This rule is also not going to change.
+
+       And yes, I realize that the kernel is "special" in this respect. I'm
+       proud of it.
+
+       I have seen, and can point to, lots of projects that go "We need to
+       break that use case in order to make progress" or "you relied on
+       undocumented behavior, it sucks to be you" or "there's a better way to
+       do what you want to do, and you have to change to that new better
+       way", and I simply don't think that's acceptable outside of very early
+       alpha releases that have experimental users that know what they signed
+       up for. The kernel hasn't been in that situation for the last two
+       decades.
+
+       We do API breakage _inside_ the kernel all the time. We will fix
+       internal problems by saying "you now need to do XYZ", but then it's
+       about internal kernel API's, and the people who do that then also
+       obviously have to fix up all the in-kernel users of that API. Nobody
+       can say "I now broke the API you used, and now _you_ need to fix it
+       up". Whoever broke something gets to fix it too.
+
+       And we simply do not break user space.
+
+ * From `2020-05-21
+   <https://lore.kernel.org/all/CAHk-=wiVi7mSrsMP=fLXQrXK_UimybW=ziLOwSzFTtoXUacWVQ@mail.gmail.com/>`_::
+
+       The rules about regressions have never been about any kind of
+       documented behavior, or where the code lives.
+
+       The rules about regressions are always about "breaks user workflow".
+
+       Users are literally the _only_ thing that matters.
+
+       No amount of "you shouldn't have used this" or "that behavior was
+       undefined, it's your own fault your app broke" or "that used to work
+       simply because of a kernel bug" is at all relevant.
+
+       Now, reality is never entirely black-and-white. So we've had things
+       like "serious security issue" etc that just forces us to make changes
+       that may break user space. But even then the rule is that we don't
+       really have other options that would allow things to continue.
+
+       And obviously, if users take years to even notice that something
+       broke, or if we have sane ways to work around the breakage that
+       doesn't make for too much trouble for users (ie "ok, there are a
+       handful of users, and they can use a kernel command line to work
+       around it" kind of things) we've also been a bit less strict.
+
+       But no, "that was documented to be broken" (whether it's because the
+       code was in staging or because the man-page said something else) is
+       irrelevant. If staging code is so useful that people end up using it,
+       that means that it's basically regular kernel code with a flag saying
+       "please clean this up".
+
+       The other side of the coin is that people who talk about "API
+       stability" are entirely wrong. API's don't matter either. You can make
+       any changes to an API you like - as long as nobody notices.
+
+       Again, the regression rule is not about documentation, not about
+       API's, and not about the phase of the moon.
+
+       It's entirely about "we caused problems for user space that used to work".
+
+ * From `2017-11-05
+   <https://lore.kernel.org/all/CA+55aFzUvbGjD8nQ-+3oiMBx14c_6zOj2n7KLN3UsJ-qsd4Dcw@mail.gmail.com/>`_::
+
+       And our regression rule has never been "behavior doesn't change".
+       That would mean that we could never make any changes at all.
+
+       For example, we do things like add new error handling etc all the
+       time, which we then sometimes even add tests for in our kselftest
+       directory.
+
+       So clearly behavior changes all the time and we don't consider that a
+       regression per se.
+
+       The rule for a regression for the kernel is that some real user
+       workflow breaks. Not some test. Not a "look, I used to be able to do
+       X, now I can't".
+
+ * From `2018-08-03
+   <https://lore.kernel.org/all/CA+55aFwWZX=CXmWDTkDGb36kf12XmTehmQjbiMPCqCRG2hi9kw@mail.gmail.com/>`_::
+
+       YOU ARE MISSING THE #1 KERNEL RULE.
+
+       We do not regress, and we do not regress exactly because your are 100% wrong.
+
+       And the reason you state for your opinion is in fact exactly *WHY* you
+       are wrong.
+
+       Your "good reasons" are pure and utter garbage.
+
+       The whole point of "we do not regress" is so that people can upgrade
+       the kernel and never have to worry about it.
+
+       > Kernel had a bug which has been fixed
+
+       That is *ENTIRELY* immaterial.
+
+       Guys, whether something was buggy or not DOES NOT MATTER.
+
+       Why?
+
+       Bugs happen. That's a fact of life. Arguing that "we had to break
+       something because we were fixing a bug" is completely insane. We fix
+       tens of bugs every single day, thinking that "fixing a bug" means that
+       we can break something is simply NOT TRUE.
+
+       So bugs simply aren't even relevant to the discussion. They happen,
+       they get found, they get fixed, and it has nothing to do with "we
+       break users".
+
+       Because the only thing that matters IS THE USER.
+
+       How hard is that to understand?
+
+       Anybody who uses "but it was buggy" as an argument is entirely missing
+       the point. As far as the USER was concerned, it wasn't buggy - it
+       worked for him/her.
+
+       Maybe it worked *because* the user had taken the bug into account,
+       maybe it worked because the user didn't notice - again, it doesn't
+       matter. It worked for the user.
+
+       Breaking a user workflow for a "bug" is absolutely the WORST reason
+       for breakage you can imagine.
+
+       It's basically saying "I took something that worked, and I broke it,
+       but now it's better". Do you not see how f*cking insane that statement
+       is?
+
+       And without users, your program is not a program, it's a pointless
+       piece of code that you might as well throw away.
+
+       Seriously. This is *why* the #1 rule for kernel development is "we
+       don't break users". Because "I fixed a bug" is absolutely NOT AN
+       ARGUMENT if that bug fix broke a user setup. You actually introduced a
+       MUCH BIGGER bug by "fixing" something that the user clearly didn't
+       even care about.
+
+       And dammit, we upgrade the kernel ALL THE TIME without upgrading any
+       other programs at all. It is absolutely required, because flag-days
+       and dependencies are horribly bad.
+
+       And it is also required simply because I as a kernel developer do not
+       upgrade random other tools that I don't even care about as I develop
+       the kernel, and I want any of my users to feel safe doing the same
+       time.
+
+       So no. Your rule is COMPLETELY wrong. If you cannot upgrade a kernel
+       without upgrading some other random binary, then we have a problem.
+
+ * From `2021-06-05
+   <https://lore.kernel.org/all/CAHk-=wiUVqHN76YUwhkjZzwTdjMMJf_zN4+u7vEJjmEGh3recw@mail.gmail.com/>`_::
+
+       THERE ARE NO VALID ARGUMENTS FOR REGRESSIONS.
+
+       Honestly, security people need to understand that "not working" is not
+       a success case of security. It's a failure case.
+
+       Yes, "not working" may be secure. But security in that case is *pointless*.
+
+ * From `2011-05-06 (1/3)
+   <https://lore.kernel.org/all/[email protected]/>`_::
+
+       Binary compatibility is more important.
+
+       And if binaries don't use the interface to parse the format (or just
+       parse it wrongly - see the fairly recent example of adding uuid's to
+       /proc/self/mountinfo), then it's a regression.
+
+       And regressions get reverted, unless there are security issues or
+       similar that makes us go "Oh Gods, we really have to break things".
+
+       I don't understand why this simple logic is so hard for some kernel
+       developers to understand. Reality matters. Your personal wishes matter
+       NOT AT ALL.
+
+       If you made an interface that can be used without parsing the
+       interface description, then we're stuck with the interface. Theory
+       simply doesn't matter.
+
+       You could help fix the tools, and try to avoid the compatibility
+       issues that way. There aren't that many of them.
+
+   From `2011-05-06 (2/3)
+   <https://lore.kernel.org/all/[email protected]/>`_::
+
+       it's clearly NOT an internal tracepoint. By definition. It's being
+       used by powertop.
+
+   From `2011-05-06 (3/3)
+   <https://lore.kernel.org/all/[email protected]/>`_::
+
+       We have programs that use that ABI and thus it's a regression if they break.
+
+ * From `2012-07-06 <https://lore.kernel.org/all/CA+55aFwnLJ+0sjx92EGREGTWOx84wwKaraSzpTNJwPVV8edw8g@mail.gmail.com/>`_::
+
+       > Now this got me wondering if Debian _unstable_ actually qualifies as a
+       > standard distro userspace.
+
+       Oh, if the kernel breaks some standard user space, that counts. Tons
+       of people run Debian unstable
+
+ * From `2019-09-15
+   <https://lore.kernel.org/lkml/CAHk-=wiP4K8DRJWsCo=20hn_6054xBamGKF2kPgUzpB5aMaofA@mail.gmail.com/>`_::
+
+       One _particularly_ last-minute revert is the top-most commit (ignoring
+       the version change itself) done just before the release, and while
+       it's very annoying, it's perhaps also instructive.
+
+       What's instructive about it is that I reverted a commit that wasn't
+       actually buggy. In fact, it was doing exactly what it set out to do,
+       and did it very well. In fact it did it _so_ well that the much
+       improved IO patterns it caused then ended up revealing a user-visible
+       regression due to a real bug in a completely unrelated area.
+
+       The actual details of that regression are not the reason I point that
+       revert out as instructive, though. It's more that it's an instructive
+       example of what counts as a regression, and what the whole "no
+       regressions" kernel rule means. The reverted commit didn't change any
+       API's, and it didn't introduce any new bugs. But it ended up exposing
+       another problem, and as such caused a kernel upgrade to fail for a
+       user. So it got reverted.
+
+       The point here being that we revert based on user-reported _behavior_,
+       not based on some "it changes the ABI" or "it caused a bug" concept.
+       The problem was really pre-existing, and it just didn't happen to
+       trigger before. The better IO patterns introduced by the change just
+       happened to expose an old bug, and people had grown to depend on the
+       previously benign behavior of that old issue.
+
+       And never fear, we'll re-introduce the fix that improved on the IO
+       patterns once we've decided just how to handle the fact that we had a
+       bad interaction with an interface that people had then just happened
+       to rely on incidental behavior for before. It's just that we'll have
+       to hash through how to do that (there are no less than three different
+       patches by three different developers being discussed, and there might
+       be more coming...). In the meantime, I reverted the thing that exposed
+       the problem to users for this release, even if I hope it will be
+       re-introduced (perhaps even backported as a stable patch) once we have
+       consensus about the issue it exposed.
+
+       Take-away from the whole thing: it's not about whether you change the
+       kernel-userspace ABI, or fix a bug, or about whether the old code
+       "should never have worked in the first place". It's about whether
+       something breaks existing users' workflow.
+
+       Anyway, that was my little aside on the whole regression thing.  Since
+       it's that "first rule of kernel programming", I felt it is perhaps
+       worth just bringing it up every once in a while
+
+..
+   end-of-content
+..
+   This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+   of the file. If you want to distribute this text under CC-BY-4.0 only,
+   please use "The Linux kernel developers" for author attribution and link
+   this as source:
+   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/process/handling-regressions.rst
+..
+   Note: Only the content of this RST file as found in the Linux kernel sources
+   is available under CC-BY-4.0, as versions of this text that were processed
+   (for example by the kernel's build system) might contain content taken from
+   files which use a more restrictive license.
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index 9f1b88492bb3..3587dae4d0ef 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -25,6 +25,7 @@ Below are the essential guides that every developer should read.
    code-of-conduct-interpretation
    development-process
    submitting-patches
+   handling-regressions
    programming-language
    coding-style
    maintainer-handbooks
@@ -48,6 +49,7 @@ Other guides to the community that are of interest to most developers are:
    deprecated
    embargoed-hardware-issues
    maintainers
+   researcher-guidelines
 
 These are some overall technical guides that have been put here for now for
 lack of a better place.
diff --git a/Documentation/process/researcher-guidelines.rst b/Documentation/process/researcher-guidelines.rst
new file mode 100644
index 000000000000..afc944e0e898
--- /dev/null
+++ b/Documentation/process/researcher-guidelines.rst
@@ -0,0 +1,143 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _researcher_guidelines:
+
+Researcher Guidelines
++++++++++++++++++++++
+
+The Linux kernel community welcomes transparent research on the Linux
+kernel, the activities involved in producing it, and any other byproducts
+of its development. Linux benefits greatly from this kind of research, and
+most aspects of Linux are driven by research in one form or another.
+
+The community greatly appreciates if researchers can share preliminary
+findings before making their results public, especially if such research
+involves security. Getting involved early helps both improve the quality
+of research and ability for Linux to improve from it. In any case,
+sharing open access copies of the published research with the community
+is recommended.
+
+This document seeks to clarify what the Linux kernel community considers
+acceptable and non-acceptable practices when conducting such research. At
+the very least, such research and related activities should follow
+standard research ethics rules. For more background on research ethics
+generally, ethics in technology, and research of developer communities
+in particular, see:
+
+* `History of Research Ethics <https://www.unlv.edu/research/ORI-HSR/history-ethics>`_
+* `IEEE Ethics <https://www.ieee.org/about/ethics/index.html>`_
+* `Developer and Researcher Views on the Ethics of Experiments on Open-Source Projects <https://arxiv.org/pdf/2112.13217.pdf>`_
+
+The Linux kernel community expects that everyone interacting with the
+project is participating in good faith to make Linux better. Research on
+any publicly-available artifact (including, but not limited to source
+code) produced by the Linux kernel community is welcome, though research
+on developers must be distinctly opt-in.
+
+Passive research that is based entirely on publicly available sources,
+including posts to public mailing lists and commits to public
+repositories, is clearly permissible. Though, as with any research,
+standard ethics must still be followed.
+
+Active research on developer behavior, however, must be done with the
+explicit agreement of, and full disclosure to, the individual developers
+involved. Developers cannot be interacted with/experimented on without
+consent; this, too, is standard research ethics.
+
+To help clarify: sending patches to developers *is* interacting
+with them, but they have already consented to receiving *good faith
+contributions*. Sending intentionally flawed/vulnerable patches or
+contributing misleading information to discussions is not consented
+to. Such communication can be damaging to the developer (e.g. draining
+time, effort, and morale) and damaging to the project by eroding
+the entire developer community's trust in the contributor (and the
+contributor's organization as a whole), undermining efforts to provide
+constructive feedback to contributors, and putting end users at risk of
+software flaws.
+
+Participation in the development of Linux itself by researchers, as
+with anyone, is welcomed and encouraged. Research into Linux code is
+a common practice, especially when it comes to developing or running
+analysis tools that produce actionable results.
+
+When engaging with the developer community, sending a patch has
+traditionally been the best way to make an impact. Linux already has
+plenty of known bugs -- what's much more helpful is having vetted fixes.
+Before contributing, carefully read the appropriate documentation:
+
+* Documentation/process/development-process.rst
+* Documentation/process/submitting-patches.rst
+* Documentation/admin-guide/reporting-issues.rst
+* Documentation/admin-guide/security-bugs.rst
+
+Then send a patch (including a commit log with all the details listed
+below) and follow up on any feedback from other developers.
+
+When sending patches produced from research, the commit logs should
+contain at least the following details, so that developers have
+appropriate context for understanding the contribution. Answer:
+
+* What is the specific problem that has been found?
+* How could the problem be reached on a running system?
+* What effect would encountering the problem have on the system?
+* How was the problem found? Specifically include details about any
+  testing, static or dynamic analysis programs, and any other tools or
+  methods used to perform the work.
+* Which version of Linux was the problem found on? Using the most recent
+  release or a recent linux-next branch is strongly preferred (see
+  Documentation/process/howto.rst).
+* What was changed to fix the problem, and why it is believed to be correct?
+* How was the change build tested and run-time tested?
+* What prior commit does this change fix? This should go in a "Fixes:"
+  tag as the documentation describes.
+* Who else has reviewed this patch? This should go in appropriate
+  "Reviewed-by:" tags; see below.
+
+For example::
+
+  From: Author <author@email>
+  Subject: [PATCH] drivers/foo_bar: Add missing kfree()
+
+  The error path in foo_bar driver does not correctly free the allocated
+  struct foo_bar_info. This can happen if the attached foo_bar device
+  rejects the initialization packets sent during foo_bar_probe(). This
+  would result in a 64 byte slab memory leak once per device attach,
+  wasting memory resources over time.
+
+  This flaw was found using an experimental static analysis tool we are
+  developing, LeakMagic[1], which reported the following warning when
+  analyzing the v5.15 kernel release:
+
+   path/to/foo_bar.c:187: missing kfree() call?
+
+  Add the missing kfree() to the error path. No other references to
+  this memory exist outside the probe function, so this is the only
+  place it can be freed.
+
+  x86_64 and arm64 defconfig builds with CONFIG_FOO_BAR=y using GCC
+  11.2 show no new warnings, and LeakMagic no longer warns about this
+  code path. As we don't have a FooBar device to test with, no runtime
+  testing was able to be performed.
+
+  [1] https://url/to/leakmagic/details
+
+  Reported-by: Researcher <researcher@email>
+  Fixes: aaaabbbbccccdddd ("Introduce support for FooBar")
+  Signed-off-by: Author <author@email>
+  Reviewed-by: Reviewer <reviewer@email>
+
+If you are a first time contributor it is recommended that the patch
+itself be vetted by others privately before being posted to public lists.
+(This is required if you have been explicitly told your patches need
+more careful internal review.) These people are expected to have their
+"Reviewed-by" tag included in the resulting patch. Finding another
+developer familiar with Linux contribution, especially within your own
+organization, and having them help with reviews before sending them to
+the public mailing lists tends to significantly improve the quality of the
+resulting patches, and there by reduces the burden on other developers.
+
+If no one can be found to internally review patches and you need
+help finding such a person, or if you have any other questions
+related to this document and the developer community's expectations,
+please reach out to the private Technical Advisory Board mailing list:
+<[email protected]>.
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index 31ea120ce531..fb496b2ebfd3 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -495,7 +495,8 @@ Using Reported-by:, Tested-by:, Reviewed-by:, Suggested-by: and Fixes:
 The Reported-by tag gives credit to people who find bugs and report them and it
 hopefully inspires them to help us again in the future.  Please note that if
 the bug was reported in private, then ask for permission first before using the
-Reported-by tag.
+Reported-by tag. The tag is intended for bugs; please do not use it to credit
+feature requests.
 
 A Tested-by: tag indicates that the patch has been successfully tested (in
 some environment) by the person named.  This tag informs maintainers that
diff --git a/Documentation/scheduler/index.rst b/Documentation/scheduler/index.rst
index 88900aabdbf7..693eaac769a7 100644
--- a/Documentation/scheduler/index.rst
+++ b/Documentation/scheduler/index.rst
@@ -14,6 +14,7 @@ Linux Scheduler
     sched-domains
     sched-capacity
     sched-energy
+    schedutil
     sched-nice-design
     sched-rt-group
     sched-stats
diff --git a/Documentation/scheduler/sched-domains.rst b/Documentation/scheduler/sched-domains.rst
index 84dcdcd2911c..e57ad28301bd 100644
--- a/Documentation/scheduler/sched-domains.rst
+++ b/Documentation/scheduler/sched-domains.rst
@@ -37,10 +37,10 @@ rebalancing event for the current runqueue has arrived. The actual load
 balancing workhorse, run_rebalance_domains()->rebalance_domains(), is then run
 in softirq context (SCHED_SOFTIRQ).
 
-The latter function takes two arguments: the current CPU and whether it was idle
-at the time the scheduler_tick() happened and iterates over all sched domains
-our CPU is on, starting from its base domain and going up the ->parent chain.
-While doing that, it checks to see if the current domain has exhausted its
+The latter function takes two arguments: the runqueue of current CPU and whether
+the CPU was idle at the time the scheduler_tick() happened and iterates over all
+sched domains our CPU is on, starting from its base domain and going up the ->parent
+chain. While doing that, it checks to see if the current domain has exhausted its
 rebalance interval. If so, it runs load_balance() on that domain. It then checks
 the parent sched_domain (if it exists), and the parent of the parent and so
 forth.
diff --git a/Documentation/scheduler/schedutil.txt b/Documentation/scheduler/schedutil.rst
index 78f6b91e2291..32c7d69fc86c 100644
--- a/Documentation/scheduler/schedutil.txt
+++ b/Documentation/scheduler/schedutil.rst
@@ -1,11 +1,15 @@
+=========
+Schedutil
+=========
 
+.. note::
 
-NOTE; all this assumes a linear relation between frequency and work capacity,
-we know this is flawed, but it is the best workable approximation.
+   All this assumes a linear relation between frequency and work capacity,
+   we know this is flawed, but it is the best workable approximation.
 
 
 PELT (Per Entity Load Tracking)
--------------------------------
+===============================
 
 With PELT we track some metrics across the various scheduler entities, from
 individual tasks to task-group slices to CPU runqueues. As the basis for this
@@ -38,8 +42,8 @@ while 'runnable' will increase to reflect the amount of contention.
 For more detail see: kernel/sched/pelt.c
 
 
-Frequency- / CPU Invariance
----------------------------
+Frequency / CPU Invariance
+==========================
 
 Because consuming the CPU for 50% at 1GHz is not the same as consuming the CPU
 for 50% at 2GHz, nor is running 50% on a LITTLE CPU the same as running 50% on
@@ -47,7 +51,7 @@ a big CPU, we allow architectures to scale the time delta with two ratios, one
 Dynamic Voltage and Frequency Scaling (DVFS) ratio and one microarch ratio.
 
 For simple DVFS architectures (where software is in full control) we trivially
-compute the ratio as:
+compute the ratio as::
 
 	    f_cur
   r_dvfs := -----
@@ -55,7 +59,7 @@ compute the ratio as:
 
 For more dynamic systems where the hardware is in control of DVFS we use
 hardware counters (Intel APERF/MPERF, ARMv8.4-AMU) to provide us this ratio.
-For Intel specifically, we use:
+For Intel specifically, we use::
 
 	   APERF
   f_cur := ----- * P0
@@ -87,7 +91,7 @@ For more detail see:
 
 
 UTIL_EST / UTIL_EST_FASTUP
---------------------------
+==========================
 
 Because periodic tasks have their averages decayed while they sleep, even
 though when running their expected utilization will be the same, they suffer a
@@ -106,7 +110,7 @@ For more detail see: kernel/sched/fair.c:util_est_dequeue()
 
 
 UCLAMP
-------
+======
 
 It is possible to set effective u_min and u_max clamps on each CFS or RT task;
 the runqueue keeps an max aggregate of these clamps for all running tasks.
@@ -115,7 +119,7 @@ For more detail see: include/uapi/linux/sched/types.h
 
 
 Schedutil / DVFS
-----------------
+================
 
 Every time the scheduler load tracking is updated (task wakeup, task
 migration, time progression) we call out to schedutil to update the hardware
@@ -123,7 +127,7 @@ DVFS state.
 
 The basis is the CPU runqueue's 'running' metric, which per the above it is
 the frequency invariant utilization estimate of the CPU. From this we compute
-a desired frequency like:
+a desired frequency like::
 
              max( running, util_est );	if UTIL_EST
   u_cfs := { running;			otherwise
@@ -135,7 +139,7 @@ a desired frequency like:
 
   f_des := min( f_max, 1.25 u * f_max )
 
-XXX IO-wait; when the update is due to a task wakeup from IO-completion we
+XXX IO-wait: when the update is due to a task wakeup from IO-completion we
 boost 'u' above.
 
 This frequency is then used to select a P-state/OPP or directly munged into a
@@ -153,7 +157,7 @@ For more information see: kernel/sched/cpufreq_schedutil.c
 
 
 NOTES
------
+=====
 
  - On low-load scenarios, where DVFS is most relevant, the 'running' numbers
    will closely reflect utilization.
diff --git a/Documentation/sphinx/kerneldoc-preamble.sty b/Documentation/sphinx/kerneldoc-preamble.sty
new file mode 100644
index 000000000000..9d0204dc38be
--- /dev/null
+++ b/Documentation/sphinx/kerneldoc-preamble.sty
@@ -0,0 +1,226 @@
+% -*- coding: utf-8 -*-
+% SPDX-License-Identifier: GPL-2.0
+%
+% LaTeX preamble for "make latexdocs" or "make pdfdocs" including:
+%   - TOC width settings
+%   - Setting of tabulary (\tymin)
+%   - Headheight setting for fancyhdr
+%   - Fontfamily settings for CJK (Chinese, Japanese, and Korean) translations
+%
+% Note on the suffix of .sty:
+%   This is not implemented as a LaTeX style file, but as a file containing
+%   plain LaTeX code to be included into preamble.
+%   ".sty" is chosen because ".tex" would cause the build scripts to confuse
+%   this file with a LaTeX main file.
+%
+% Copyright (C) 2022  Akira Yokosawa
+
+% Custom width parameters for TOC
+%  - Redefine low-level commands defined in report.cls.
+%  - Indent of 2 chars is preserved for ease of comparison.
+% Summary of changes from default params:
+%   Width of page number (\@pnumwidth): 1.55em -> 2.7em
+%   Width of chapter number:            1.5em  -> 1.8em
+%   Indent of section number:           1.5em  -> 1.8em
+%   Width of section number:            2.6em  -> 3.2em
+%   Indent of sebsection number:        4.1em  -> 5em
+%   Width of subsection number:         3.5em  -> 4.3em
+%
+% These params can have 4 digit page counts, 2 digit chapter counts,
+% section counts of 4 digits + 1 period (e.g., 18.10), and subsection counts
+% of 5 digits + 2 periods (e.g., 18.7.13).
+\makeatletter
+%% Redefine \@pnumwidth (page number width)
+\renewcommand*\@pnumwidth{2.7em}
+%% Redefine \l@chapter (chapter list entry)
+\renewcommand*\l@chapter[2]{%
+  \ifnum \c@tocdepth >\m@ne
+    \addpenalty{-\@highpenalty}%
+    \vskip 1.0em \@plus\p@
+    \setlength\@tempdima{1.8em}%
+    \begingroup
+      \parindent \z@ \rightskip \@pnumwidth
+      \parfillskip -\@pnumwidth
+      \leavevmode \bfseries
+      \advance\leftskip\@tempdima
+      \hskip -\leftskip
+      #1\nobreak\hfil
+      \nobreak\hb@xt@\@pnumwidth{\hss #2%
+                                 \kern-\p@\kern\p@}\par
+      \penalty\@highpenalty
+    \endgroup
+  \fi}
+%% Redefine \l@section and \l@subsection
+\renewcommand*\l@section{\@dottedtocline{1}{1.8em}{3.2em}}
+\renewcommand*\l@subsection{\@dottedtocline{2}{5em}{4.3em}}
+\makeatother
+%% Sphinx < 1.8 doesn't have \sphinxtableofcontentshook
+\providecommand{\sphinxtableofcontentshook}{}
+%% Undefine it for compatibility with Sphinx 1.7.9
+\renewcommand{\sphinxtableofcontentshook}{} % Empty the hook
+
+% Prevent column squeezing of tabulary.  \tymin is set by Sphinx as:
+%   \setlength{\tymin}{3\fontcharwd\font`0 }
+% , which is too short.
+\setlength{\tymin}{20em}
+
+% Adjust \headheight for fancyhdr
+\addtolength{\headheight}{1.6pt}
+\addtolength{\topmargin}{-1.6pt}
+
+% Translations have Asian (CJK) characters which are only displayed if
+% xeCJK is used
+\IfFontExistsTF{Noto Sans CJK SC}{
+    % Load xeCJK when CJK font is available
+    \usepackage{xeCJK}
+    % Noto CJK fonts don't provide slant shape. [AutoFakeSlant] permits
+    % its emulation.
+    % Select KR variant at the beginning of each document so that quotation
+    % and apostorph symbols of half-width is used in TOC of Latin documents.
+    \IfFontExistsTF{Noto Serif CJK KR}{
+	\setCJKmainfont{Noto Serif CJK KR}[AutoFakeSlant]
+    }{
+	\setCJKmainfont{Noto Sans CJK KR}[AutoFakeSlant]
+    }
+    \setCJKsansfont{Noto Sans CJK KR}[AutoFakeSlant]
+    \setCJKmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]
+    % Teach xeCJK of half-width symbols
+    \xeCJKDeclareCharClass{HalfLeft}{`“,`‘}
+    \xeCJKDeclareCharClass{HalfRight}{`”,`’}
+    % CJK Language-specific font choices
+    %% for Simplified Chinese
+    \IfFontExistsTF{Noto Serif CJK SC}{
+	\newCJKfontfamily[SCmain]\scmain{Noto Serif CJK SC}[AutoFakeSlant]
+	\newCJKfontfamily[SCserif]\scserif{Noto Serif CJK SC}[AutoFakeSlant]
+    }{
+	\newCJKfontfamily[SCmain]\scmain{Noto Sans CJK SC}[AutoFakeSlant]
+	\newCJKfontfamily[SCserif]\scserif{Noto Sans CJK SC}[AutoFakeSlant]
+    }
+    \newCJKfontfamily[SCsans]\scsans{Noto Sans CJK SC}[AutoFakeSlant]
+    \newCJKfontfamily[SCmono]\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant]
+    %% for Traditional Chinese
+    \IfFontExistsTF{Noto Serif CJK TC}{
+	\newCJKfontfamily[TCmain]\tcmain{Noto Serif CJK TC}[AutoFakeSlant]
+	\newCJKfontfamily[TCserif]\tcserif{Noto Serif CJK TC}[AutoFakeSlant]
+    }{
+	\newCJKfontfamily[TCmain]\tcmain{Noto Sans CJK TC}[AutoFakeSlant]
+	\newCJKfontfamily[TCserif]\tcserif{Noto Sans CJK TC}[AutoFakeSlant]
+    }
+    \newCJKfontfamily[TCsans]\tcsans{Noto Sans CJK TC}[AutoFakeSlant]
+    \newCJKfontfamily[TCmono]\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant]
+    %% for Korean
+    \IfFontExistsTF{Noto Serif CJK KR}{
+	\newCJKfontfamily[KRmain]\krmain{Noto Serif CJK KR}[AutoFakeSlant]
+	\newCJKfontfamily[KRserif]\krserif{Noto Serif CJK KR}[AutoFakeSlant]
+    }{
+	\newCJKfontfamily[KRmain]\krmain{Noto Sans CJK KR}[AutoFakeSlant]
+	\newCJKfontfamily[KRserif]\krserif{Noto Sans CJK KR}[AutoFakeSlant]
+    }
+    \newCJKfontfamily[KRsans]\krsans{Noto Sans CJK KR}[AutoFakeSlant]
+    \newCJKfontfamily[KRmono]\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant]
+    %% for Japanese
+    \IfFontExistsTF{Noto Serif CJK JP}{
+	\newCJKfontfamily[JPmain]\jpmain{Noto Serif CJK JP}[AutoFakeSlant]
+	\newCJKfontfamily[JPserif]\jpserif{Noto Serif CJK JP}[AutoFakeSlant]
+    }{
+	\newCJKfontfamily[JPmain]\jpmain{Noto Sans CJK JP}[AutoFakeSlant]
+	\newCJKfontfamily[JPserif]\jpserif{Noto Sans CJK JP}[AutoFakeSlant]
+    }
+    \newCJKfontfamily[JPsans]\jpsans{Noto Sans CJK JP}[AutoFakeSlant]
+    \newCJKfontfamily[JPmono]\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant]
+    % Dummy commands for Sphinx < 2.3 (no 'extrapackages' support)
+    \providecommand{\onehalfspacing}{}
+    \providecommand{\singlespacing}{}
+    % Define custom macros to on/off CJK
+    %% One and half spacing for CJK contents
+    \newcommand{\kerneldocCJKon}{\makexeCJKactive\onehalfspacing}
+    \newcommand{\kerneldocCJKoff}{\makexeCJKinactive\singlespacing}
+    % Define custom macros for switching CJK font setting
+    %% for Simplified Chinese
+    \newcommand{\kerneldocBeginSC}{%
+	\begingroup%
+	\scmain%
+	\xeCJKDeclareCharClass{FullLeft}{`“,`‘}% Full-width in SC
+	\xeCJKDeclareCharClass{FullRight}{`”,`’}% Full-width in SC
+	\renewcommand{\CJKrmdefault}{SCserif}%
+	\renewcommand{\CJKsfdefault}{SCsans}%
+	\renewcommand{\CJKttdefault}{SCmono}%
+	\xeCJKsetup{CJKspace = false}% gobble white spaces by ' '
+	% For CJK ascii-art alignment
+	\setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]%
+    }
+    \newcommand{\kerneldocEndSC}{\endgroup}
+    %% for Traditional Chinese
+    \newcommand{\kerneldocBeginTC}{%
+	\begingroup%
+	\tcmain%
+	\xeCJKDeclareCharClass{FullLeft}{`“,`‘}% Full-width in TC
+	\xeCJKDeclareCharClass{FullRight}{`”,`’}% Full-width in TC
+	\renewcommand{\CJKrmdefault}{TCserif}%
+	\renewcommand{\CJKsfdefault}{TCsans}%
+	\renewcommand{\CJKttdefault}{TCmono}%
+	\xeCJKsetup{CJKspace = false}% gobble white spaces by ' '
+	% For CJK ascii-art alignment
+	\setmonofont{Noto Sans Mono CJK TC}[AutoFakeSlant]%
+    }
+    \newcommand{\kerneldocEndTC}{\endgroup}
+    %% for Korean
+    \newcommand{\kerneldocBeginKR}{%
+	\begingroup%
+	\krmain%
+	\renewcommand{\CJKrmdefault}{KRserif}%
+	\renewcommand{\CJKsfdefault}{KRsans}%
+	\renewcommand{\CJKttdefault}{KRmono}%
+	% \xeCJKsetup{CJKspace = true} % true by default
+	% For CJK ascii-art alignment (still misaligned for Hangul)
+	\setmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]%
+    }
+    \newcommand{\kerneldocEndKR}{\endgroup}
+    %% for Japanese
+    \newcommand{\kerneldocBeginJP}{%
+	\begingroup%
+	\jpmain%
+	\renewcommand{\CJKrmdefault}{JPserif}%
+	\renewcommand{\CJKsfdefault}{JPsans}%
+	\renewcommand{\CJKttdefault}{JPmono}%
+	\xeCJKsetup{CJKspace = false}% gobble white space by ' '
+	% For CJK ascii-art alignment
+	\setmonofont{Noto Sans Mono CJK JP}[AutoFakeSlant]%
+    }
+    \newcommand{\kerneldocEndJP}{\endgroup}
+
+    % Single spacing in literal blocks
+    \fvset{baselinestretch=1}
+    % To customize \sphinxtableofcontents
+    \usepackage{etoolbox}
+    % Inactivate CJK after tableofcontents
+    \apptocmd{\sphinxtableofcontents}{\kerneldocCJKoff}{}{}
+    \xeCJKsetup{CJKspace = true}% For inter-phrase space of Korean TOC
+}{ % No CJK font found
+    % Custom macros to on/off CJK and switch CJK fonts (Dummy)
+    \newcommand{\kerneldocCJKon}{}
+    \newcommand{\kerneldocCJKoff}{}
+    %% By defining \kerneldocBegin(SC|TC|KR|JP) as commands with an argument
+    %% and ignore the argument (#1) in their definitions, whole contents of
+    %% CJK chapters can be ignored.
+    \newcommand{\kerneldocBeginSC}[1]{%
+	%% Put a note on missing CJK fonts in place of zh_CN translation.
+	\begin{sphinxadmonition}{note}{Note on missing fonts:}
+	    Translations of Simplified Chinese (zh\_CN), Traditional Chinese
+	    (zh\_TW), Korean (ko\_KR), and Japanese (ja\_JP) were skipped
+	    due to the lack of suitable font families.
+
+	    If you want them, please install ``Noto Sans CJK'' font families
+	    by following instructions from
+	    \sphinxcode{./scripts/sphinx-pre-install}.
+	    Having optional ``Noto Serif CJK'' font families will improve
+	    the looks of those translations.
+	\end{sphinxadmonition}}
+    \newcommand{\kerneldocEndSC}{}
+    \newcommand{\kerneldocBeginTC}[1]{}
+    \newcommand{\kerneldocEndTC}{}
+    \newcommand{\kerneldocBeginKR}[1]{}
+    \newcommand{\kerneldocEndKR}{}
+    \newcommand{\kerneldocBeginJP}[1]{}
+    \newcommand{\kerneldocEndJP}{}
+}
diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py
index 3c78828330be..24d2b2addcce 100644
--- a/Documentation/sphinx/kfigure.py
+++ b/Documentation/sphinx/kfigure.py
@@ -31,10 +31,13 @@ u"""
 
     * ``dot(1)``: Graphviz (https://www.graphviz.org). If Graphviz is not
       available, the DOT language is inserted as literal-block.
+      For conversion to PDF, ``rsvg-convert(1)`` of librsvg
+      (https://gitlab.gnome.org/GNOME/librsvg) is used when available.
 
     * SVG to PDF: To generate PDF, you need at least one of this tools:
 
       - ``convert(1)``: ImageMagick (https://www.imagemagick.org)
+      - ``inkscape(1)``: Inkscape (https://inkscape.org/)
 
     List of customizations:
 
@@ -49,6 +52,7 @@ import os
 from os import path
 import subprocess
 from hashlib import sha1
+import re
 from docutils import nodes
 from docutils.statemachine import ViewList
 from docutils.parsers.rst import directives
@@ -109,10 +113,20 @@ def pass_handle(self, node):           # pylint: disable=W0613
 
 # Graphviz's dot(1) support
 dot_cmd = None
+# dot(1) -Tpdf should be used
+dot_Tpdf = False
 
 # ImageMagick' convert(1) support
 convert_cmd = None
 
+# librsvg's rsvg-convert(1) support
+rsvg_convert_cmd = None
+
+# Inkscape's inkscape(1) support
+inkscape_cmd = None
+# Inkscape prior to 1.0 uses different command options
+inkscape_ver_one = False
+
 
 def setup(app):
     # check toolchain first
@@ -160,23 +174,62 @@ def setupTools(app):
 
     This function is called once, when the builder is initiated.
     """
-    global dot_cmd, convert_cmd   # pylint: disable=W0603
+    global dot_cmd, dot_Tpdf, convert_cmd, rsvg_convert_cmd   # pylint: disable=W0603
+    global inkscape_cmd, inkscape_ver_one  # pylint: disable=W0603
     kernellog.verbose(app, "kfigure: check installed tools ...")
 
     dot_cmd = which('dot')
     convert_cmd = which('convert')
+    rsvg_convert_cmd = which('rsvg-convert')
+    inkscape_cmd = which('inkscape')
 
     if dot_cmd:
         kernellog.verbose(app, "use dot(1) from: " + dot_cmd)
+
+        try:
+            dot_Thelp_list = subprocess.check_output([dot_cmd, '-Thelp'],
+                                    stderr=subprocess.STDOUT)
+        except subprocess.CalledProcessError as err:
+            dot_Thelp_list = err.output
+            pass
+
+        dot_Tpdf_ptn = b'pdf'
+        dot_Tpdf = re.search(dot_Tpdf_ptn, dot_Thelp_list)
     else:
         kernellog.warn(app, "dot(1) not found, for better output quality install "
                        "graphviz from https://www.graphviz.org")
-    if convert_cmd:
-        kernellog.verbose(app, "use convert(1) from: " + convert_cmd)
+    if inkscape_cmd:
+        kernellog.verbose(app, "use inkscape(1) from: " + inkscape_cmd)
+        inkscape_ver = subprocess.check_output([inkscape_cmd, '--version'],
+                                               stderr=subprocess.DEVNULL)
+        ver_one_ptn = b'Inkscape 1'
+        inkscape_ver_one = re.search(ver_one_ptn, inkscape_ver)
+        convert_cmd = None
+        rsvg_convert_cmd = None
+        dot_Tpdf = False
+
     else:
-        kernellog.warn(app,
-            "convert(1) not found, for SVG to PDF conversion install "
-            "ImageMagick (https://www.imagemagick.org)")
+        if convert_cmd:
+            kernellog.verbose(app, "use convert(1) from: " + convert_cmd)
+        else:
+            kernellog.warn(app,
+                "Neither inkscape(1) nor convert(1) found.\n"
+                "For SVG to PDF conversion, "
+                "install either Inkscape (https://inkscape.org/) (preferred) or\n"
+                "ImageMagick (https://www.imagemagick.org)")
+
+        if rsvg_convert_cmd:
+            kernellog.verbose(app, "use rsvg-convert(1) from: " + rsvg_convert_cmd)
+            kernellog.verbose(app, "use 'dot -Tsvg' and rsvg-convert(1) for DOT -> PDF conversion")
+            dot_Tpdf = False
+        else:
+            kernellog.verbose(app,
+                "rsvg-convert(1) not found.\n"
+                "  SVG rendering of convert(1) is done by ImageMagick-native renderer.")
+            if dot_Tpdf:
+                kernellog.verbose(app, "use 'dot -Tpdf' for DOT -> PDF conversion")
+            else:
+                kernellog.verbose(app, "use 'dot -Tsvg' and convert(1) for DOT -> PDF conversion")
 
 
 # integrate conversion tools
@@ -242,7 +295,7 @@ def convert_image(img_node, translator, src_fname=None):
     elif in_ext == '.svg':
 
         if translator.builder.format == 'latex':
-            if convert_cmd is None:
+            if not inkscape_cmd and convert_cmd is None:
                 kernellog.verbose(app,
                                   "no SVG to PDF conversion available / include SVG raw.")
                 img_node.replace_self(file2literal(src_fname))
@@ -266,7 +319,14 @@ def convert_image(img_node, translator, src_fname=None):
 
             if in_ext == '.dot':
                 kernellog.verbose(app, 'convert DOT to: {out}/' + _name)
-                ok = dot2format(app, src_fname, dst_fname)
+                if translator.builder.format == 'latex' and not dot_Tpdf:
+                    svg_fname = path.join(translator.builder.outdir, fname + '.svg')
+                    ok1 = dot2format(app, src_fname, svg_fname)
+                    ok2 = svg2pdf_by_rsvg(app, svg_fname, dst_fname)
+                    ok = ok1 and ok2
+
+                else:
+                    ok = dot2format(app, src_fname, dst_fname)
 
             elif in_ext == '.svg':
                 kernellog.verbose(app, 'convert SVG to: {out}/' + _name)
@@ -303,22 +363,70 @@ def dot2format(app, dot_fname, out_fname):
     return bool(exit_code == 0)
 
 def svg2pdf(app, svg_fname, pdf_fname):
-    """Converts SVG to PDF with ``convert(1)`` command.
+    """Converts SVG to PDF with ``inkscape(1)`` or ``convert(1)`` command.
 
-    Uses ``convert(1)`` from ImageMagick (https://www.imagemagick.org) for
-    conversion.  Returns ``True`` on success and ``False`` if an error occurred.
+    Uses ``inkscape(1)`` from Inkscape (https://inkscape.org/) or ``convert(1)``
+    from ImageMagick (https://www.imagemagick.org) for conversion.
+    Returns ``True`` on success and ``False`` if an error occurred.
 
     * ``svg_fname`` pathname of the input SVG file with extension (``.svg``)
     * ``pdf_name``  pathname of the output PDF file with extension (``.pdf``)
 
     """
     cmd = [convert_cmd, svg_fname, pdf_fname]
-    # use stdout and stderr from parent
-    exit_code = subprocess.call(cmd)
+    cmd_name = 'convert(1)'
+
+    if inkscape_cmd:
+        cmd_name = 'inkscape(1)'
+        if inkscape_ver_one:
+            cmd = [inkscape_cmd, '-o', pdf_fname, svg_fname]
+        else:
+            cmd = [inkscape_cmd, '-z', '--export-pdf=%s' % pdf_fname, svg_fname]
+
+    try:
+        warning_msg = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
+        exit_code = 0
+    except subprocess.CalledProcessError as err:
+        warning_msg = err.output
+        exit_code = err.returncode
+        pass
+
     if exit_code != 0:
         kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
+        if warning_msg:
+            kernellog.warn(app, "Warning msg from %s: %s"
+                           % (cmd_name, str(warning_msg, 'utf-8')))
+    elif warning_msg:
+        kernellog.verbose(app, "Warning msg from %s (likely harmless):\n%s"
+                          % (cmd_name, str(warning_msg, 'utf-8')))
+
     return bool(exit_code == 0)
 
+def svg2pdf_by_rsvg(app, svg_fname, pdf_fname):
+    """Convert SVG to PDF with ``rsvg-convert(1)`` command.
+
+    * ``svg_fname`` pathname of input SVG file, including extension ``.svg``
+    * ``pdf_fname`` pathname of output PDF file, including extension ``.pdf``
+
+    Input SVG file should be the one generated by ``dot2format()``.
+    SVG -> PDF conversion is done by ``rsvg-convert(1)``.
+
+    If ``rsvg-convert(1)`` is unavailable, fall back to ``svg2pdf()``.
+
+    """
+
+    if rsvg_convert_cmd is None:
+        ok = svg2pdf(app, svg_fname, pdf_fname)
+    else:
+        cmd = [rsvg_convert_cmd, '--format=pdf', '-o', pdf_fname, svg_fname]
+        # use stdout and stderr from parent
+        exit_code = subprocess.call(cmd)
+        if exit_code != 0:
+            kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
+        ok = bool(exit_code == 0)
+
+    return ok
+
 
 # image handling
 # ---------------------
diff --git a/Documentation/trace/osnoise-tracer.rst b/Documentation/trace/osnoise-tracer.rst
index b648cb9bf1f0..963def9f97c6 100644
--- a/Documentation/trace/osnoise-tracer.rst
+++ b/Documentation/trace/osnoise-tracer.rst
@@ -51,7 +51,7 @@ For example::
         [root@f32 ~]# cd /sys/kernel/tracing/
         [root@f32 tracing]# echo osnoise > current_tracer
 
-It is possible to follow the trace by reading the trace trace file::
+It is possible to follow the trace by reading the trace file::
 
         [root@f32 tracing]# cat trace
         # tracer: osnoise
@@ -108,7 +108,7 @@ The tracer has a set of options inside the osnoise directory, they are:
    option.
  - tracing_threshold: the minimum delta between two time() reads to be
    considered as noise, in us. When set to 0, the default value will
-   will be used, which is currently 5 us.
+   be used, which is currently 5 us.
 
 Additional Tracing
 ------------------
diff --git a/Documentation/translations/conf.py b/Documentation/translations/conf.py
deleted file mode 100644
index 92cdbba74229..000000000000
--- a/Documentation/translations/conf.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-# SPDX-License-Identifier: GPL-2.0
-
-# -- Additinal options for LaTeX output ----------------------------------
-# font config for ascii-art alignment
-
-latex_elements['preamble']  += '''
-    \\IfFontExistsTF{Noto Sans CJK SC}{
-	% For CJK ascii-art alignment
-	\\setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]
-    }{}
-'''
diff --git a/Documentation/translations/ja_JP/index.rst b/Documentation/translations/ja_JP/index.rst
index 88d4d98eed15..20738c931d02 100644
--- a/Documentation/translations/ja_JP/index.rst
+++ b/Documentation/translations/ja_JP/index.rst
@@ -3,7 +3,7 @@
 	\renewcommand\thesection*
 	\renewcommand\thesubsection*
 	\kerneldocCJKon
-	\kerneldocBeginJP
+	\kerneldocBeginJP{
 
 Japanese translations
 =====================
@@ -15,4 +15,4 @@ Japanese translations
 
 .. raw:: latex
 
-	\kerneldocEndJP
+	}\kerneldocEndJP
diff --git a/Documentation/translations/ko_KR/index.rst b/Documentation/translations/ko_KR/index.rst
index f636b482fb4c..4add6b2fe1f2 100644
--- a/Documentation/translations/ko_KR/index.rst
+++ b/Documentation/translations/ko_KR/index.rst
@@ -3,7 +3,7 @@
 	\renewcommand\thesection*
 	\renewcommand\thesubsection*
 	\kerneldocCJKon
-	\kerneldocBeginKR
+	\kerneldocBeginKR{
 
 한국어 번역
 ===========
@@ -26,5 +26,4 @@
 
 .. raw:: latex
 
-    \normalsize
-    \kerneldocEndKR
+    }\kerneldocEndKR
diff --git a/Documentation/translations/zh_CN/accounting/delay-accounting.rst b/Documentation/translations/zh_CN/accounting/delay-accounting.rst
index 67d5606e5401..f1849411018e 100644
--- a/Documentation/translations/zh_CN/accounting/delay-accounting.rst
+++ b/Documentation/translations/zh_CN/accounting/delay-accounting.rst
@@ -17,6 +17,8 @@ a) 等待一个CPU（任务为可运行）
 b) 完成由该任务发起的块I/O同步请求
 c) 页面交换
 d) 内存回收
+e) 页缓存抖动
+f) 直接规整
 
 并将这些统计信息通过taskstats接口提供给用户空间。
 
@@ -37,10 +39,10 @@ d) 内存回收
 向用户态返回一个通用数据结构，对应每pid或每tgid的统计信息。延时计数功能填写
 该数据结构的特定字段。见
 
-     include/linux/taskstats.h
+     include/uapi/linux/taskstats.h
 
 其描述了延时计数相关字段。系统通常以计数器形式返回 CPU、同步块 I/O、交换、内存
-回收等的累积延时。
+回收、页缓存抖动、直接规整等的累积延时。
 
 取任务某计数器两个连续读数的差值，将得到任务在该时间间隔内等待对应资源的总延时。
 
@@ -72,40 +74,36 @@ kernel.task_delayacct进行开关。注意，只有在启用延时计数后启�
 
 getdelays命令的一般格式::
 
-	getdelays [-t tgid] [-p pid] [-c cmd...]
+	getdelays [-dilv] [-t tgid] [-p pid]
 
 获取pid为10的任务从系统启动后的延时信息::
 
-	# ./getdelays -p 10
+	# ./getdelays -d -p 10
 	（输出信息和下例相似）
 
 获取所有tgid为5的任务从系统启动后的总延时信息::
 
-	# ./getdelays -t 5
-
-
-	CPU	count	real total	virtual total	delay total
-		7876	92005750	100000000	24001500
-	IO	count	delay total
-		0	0
-	SWAP	count	delay total
-		0	0
-	RECLAIM	count	delay total
-		0	0
-
-获取指定简单命令运行时的延时信息::
-
-  # ./getdelays -c ls /
-
-  bin   data1  data3  data5  dev  home  media  opt   root  srv        sys  usr
-  boot  data2  data4  data6  etc  lib   mnt    proc  sbin  subdomain  tmp  var
-
-
-  CPU	count	real total	virtual total	delay total
-	6	4000250		4000000		0
-  IO	count	delay total
-	0	0
-  SWAP	count	delay total
-	0	0
-  RECLAIM	count	delay total
-	0	0
+	# ./getdelays -d -t 5
+	print delayacct stats ON
+	TGID	5
+
+
+	CPU             count     real total  virtual total    delay total  delay average
+	                    8        7000000        6872122        3382277          0.423ms
+	IO              count    delay total  delay average
+	                    0              0              0ms
+	SWAP            count    delay total  delay average
+	                    0              0              0ms
+	RECLAIM         count    delay total  delay average
+	                    0              0              0ms
+	THRASHING       count    delay total  delay average
+	                    0              0              0ms
+	COMPACT         count    delay total  delay average
+	                    0              0              0ms
+
+获取pid为1的IO计数，它只和-p一起使用::
+	# ./getdelays -i -p 1
+	printing IO accounting
+	linuxrc: read=65536, write=0, cancelled_write=0
+
+上面的命令与-v一起使用，可以获取更多调试信息。
diff --git a/Documentation/translations/zh_CN/admin-guide/index.rst b/Documentation/translations/zh_CN/admin-guide/index.rst
index 548e57f4b3f1..be535ffaf4b0 100644
--- a/Documentation/translations/zh_CN/admin-guide/index.rst
+++ b/Documentation/translations/zh_CN/admin-guide/index.rst
@@ -20,15 +20,15 @@ Linux 内核用户和管理员指南
 
 Todolist:
 
-   kernel-parameters
-   devices
-   sysctl/index
+*   kernel-parameters
+*   devices
+*   sysctl/index
 
 本节介绍CPU漏洞及其缓解措施。
 
 Todolist:
 
-   hw-vuln/index
+*   hw-vuln/index
 
 下面的一组文档，针对的是试图跟踪问题和bug的用户。
 
@@ -44,18 +44,18 @@ Todolist:
 
 Todolist:
 
-   reporting-bugs
-   ramoops
-   dynamic-debug-howto
-   kdump/index
-   perf/index
+*   reporting-bugs
+*   ramoops
+*   dynamic-debug-howto
+*   kdump/index
+*   perf/index
 
 这是应用程序开发人员感兴趣的章节的开始。可以在这里找到涵盖内核ABI各个
 方面的文档。
 
 Todolist:
 
-   sysfs-rules
+*   sysfs-rules
 
 本手册的其余部分包括各种指南，介绍如何根据您的喜好配置内核的特定行为。
 
@@ -69,61 +69,61 @@ Todolist:
    lockup-watchdogs
    unicode
    sysrq
+   mm/index
 
 Todolist:
 
-   acpi/index
-   aoe/index
-   auxdisplay/index
-   bcache
-   binderfs
-   binfmt-misc
-   blockdev/index
-   bootconfig
-   braille-console
-   btmrvl
-   cgroup-v1/index
-   cgroup-v2
-   cifs/index
-   dell_rbu
-   device-mapper/index
-   edid
-   efi-stub
-   ext4
-   nfs/index
-   gpio/index
-   highuid
-   hw_random
-   initrd
-   iostats
-   java
-   jfs
-   kernel-per-CPU-kthreads
-   laptops/index
-   lcd-panel-cgram
-   ldm
-   LSM/index
-   md
-   media/index
-   mm/index
-   module-signing
-   mono
-   namespaces/index
-   numastat
-   parport
-   perf-security
-   pm/index
-   pnp
-   rapidio
-   ras
-   rtc
-   serial-console
-   svga
-   thunderbolt
-   ufs
-   vga-softcursor
-   video-output
-   xfs
+*   acpi/index
+*   aoe/index
+*   auxdisplay/index
+*   bcache
+*   binderfs
+*   binfmt-misc
+*   blockdev/index
+*   bootconfig
+*   braille-console
+*   btmrvl
+*   cgroup-v1/index
+*   cgroup-v2
+*   cifs/index
+*   dell_rbu
+*   device-mapper/index
+*   edid
+*   efi-stub
+*   ext4
+*   nfs/index
+*   gpio/index
+*   highuid
+*   hw_random
+*   initrd
+*   iostats
+*   java
+*   jfs
+*   kernel-per-CPU-kthreads
+*   laptops/index
+*   lcd-panel-cgram
+*   ldm
+*   LSM/index
+*   md
+*   media/index
+*   module-signing
+*   mono
+*   namespaces/index
+*   numastat
+*   parport
+*   perf-security
+*   pm/index
+*   pnp
+*   rapidio
+*   ras
+*   rtc
+*   serial-console
+*   svga
+*   thunderbolt
+*   ufs
+*   vga-softcursor
+*   video-output
+*   xfs
 
 .. only::  subproject and html
 
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
new file mode 100644
index 000000000000..0c8276109fc0
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
@@ -0,0 +1,28 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/damon/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+============
+监测数据访问
+============
+
+:doc:`DAMON </vm/damon/index>` 允许轻量级的数据访问监测。使用DAMON，
+用户可以分析他们系统的内存访问模式，并优化它们。
+
+.. toctree::
+   :maxdepth: 2
+
+   start
+   usage
+   reclaim
+
+
+
+
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst
new file mode 100644
index 000000000000..9e541578f38d
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst
@@ -0,0 +1,232 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/damon/reclaim.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+===============
+基于DAMON的回收
+===============
+
+基于DAMON的回收（DAMON_RECLAIM）是一个静态的内核模块，旨在用于轻度内存压力下的主动和轻
+量级的回收。它的目的不是取代基于LRU列表的页面回收，而是有选择地用于不同程度的内存压力和要
+求。
+
+哪些地方需要主动回收？
+======================
+
+在一般的内存超量使用（over-committed systems，虚拟化相关术语）的系统上，主动回收冷页
+有助于节省内存和减少延迟高峰，这些延迟是由直接回收进程或kswapd的CPU消耗引起的，同时只产
+生最小的性能下降 [1]_ [2]_ 。
+
+基于空闲页报告 [3]_ 的内存过度承诺的虚拟化系统就是很好的例子。在这样的系统中，客户机
+向主机报告他们的空闲内存，而主机则将报告的内存重新分配给其他客户。因此，系统的内存得到了充
+分的利用。然而，客户可能不那么节省内存，主要是因为一些内核子系统和用户空间应用程序被设计为
+使用尽可能多的内存。然后，客户机可能只向主机报告少量的内存是空闲的，导致系统的内存利用率下降。
+在客户中运行主动回收可以缓解这个问题。
+
+它是如何工作的？
+================
+
+DAMON_RECLAIM找到在特定时间内没有被访问的内存区域并分页。为了避免它在分页操作中消耗过多
+的CPU，可以配置一个速度限制。在这个速度限制下，它首先分页出那些没有被访问过的内存区域。系
+统管理员还可以配置在什么情况下这个方案应该自动激活和停用三个内存压力水位。
+
+接口: 模块参数
+==============
+
+要使用这个功能，你首先要确保你的系统运行在一个以 ``CONFIG_DAMON_RECLAIM=y`` 构建的内
+核上。
+
+为了让系统管理员启用或禁用它，并为给定的系统进行调整，DAMON_RECLAIM利用了模块参数。也就
+是说，你可以把 ``damon_reclaim.<parameter>=<value>`` 放在内核启动命令行上，或者把
+适当的值写入 ``/sys/modules/damon_reclaim/parameters/<parameter>`` 文件。
+
+注意，除 ``启用`` 外的参数值只在DAMON_RECLAIM启动时应用。因此，如果你想在运行时应用新
+的参数值，而DAMON_RECLAIM已经被启用，你应该通过 ``启用`` 的参数文件禁用和重新启用它。
+在重新启用之前，应将新的参数值写入适当的参数值中。
+
+下面是每个参数的描述。
+
+enable
+------
+
+启用或禁用DAMON_RECLAIM。
+
+你可以通过把这个参数的值设置为 ``Y`` 来启用DAMON_RCLAIM，把它设置为 ``N`` 可以禁用
+DAMON_RECLAIM。注意，由于基于水位的激活条件，DAMON_RECLAIM不能进行真正的监测和回收。
+这一点请参考下面关于水位参数的描述。
+
+min_age
+-------
+
+识别冷内存区域的时间阈值，单位是微秒。
+
+如果一个内存区域在这个时间或更长的时间内没有被访问，DAMON_RECLAIM会将该区域识别为冷的，
+并回收它。
+
+默认为120秒。
+
+quota_ms
+--------
+
+回收的时间限制，以毫秒为单位。
+
+DAMON_RECLAIM 试图在一个时间窗口（quota_reset_interval_ms）内只使用到这个时间，以
+尝试回收冷页。这可以用来限制DAMON_RECLAIM的CPU消耗。如果该值为零，则该限制被禁用。
+
+默认为10ms。
+
+quota_sz
+--------
+
+回收的内存大小限制，单位为字节。
+
+DAMON_RECLAIM 收取在一个时间窗口（quota_reset_interval_ms）内试图回收的内存量，并
+使其不超过这个限制。这可以用来限制CPU和IO的消耗。如果该值为零，则限制被禁用。
+
+默认情况下是128 MiB。
+
+quota_reset_interval_ms
+-----------------------
+
+时间/大小配额收取重置间隔，单位为毫秒。
+
+时间（quota_ms）和大小（quota_sz）的配额的目标重置间隔。也就是说，DAMON_RECLAIM在
+尝试回收‘不’超过quota_ms毫秒或quota_sz字节的内存。
+
+默认为1秒。
+
+wmarks_interval
+---------------
+
+当DAMON_RECLAIM被启用但由于其水位规则而不活跃时，在检查水位之前的最小等待时间。
+
+wmarks_high
+-----------
+
+高水位的可用内存率（每千字节）。
+
+如果系统的可用内存（以每千字节为单位）高于这个数值，DAMON_RECLAIM就会变得不活跃，所以
+它什么也不做，只是定期检查水位。
+
+wmarks_mid
+----------
+
+中间水位的可用内存率（每千字节）。
+
+如果系统的空闲内存（以每千字节为单位）在这个和低水位线之间，DAMON_RECLAIM就会被激活，
+因此开始监测和回收。
+
+wmarks_low
+----------
+
+低水位的可用内存率（每千字节）。
+
+如果系统的空闲内存（以每千字节为单位）低于这个数值，DAMON_RECLAIM就会变得不活跃，所以
+它除了定期检查水位外什么都不做。在这种情况下，系统会退回到基于LRU列表的页面粒度回收逻辑。
+
+sample_interval
+---------------
+
+监测的采样间隔，单位是微秒。
+
+DAMON用于监测冷内存的采样间隔。更多细节请参考DAMON文档 (:doc:`usage`) 。
+
+aggr_interval
+-------------
+
+监测的聚集间隔，单位是微秒。
+
+DAMON对冷内存监测的聚集间隔。更多细节请参考DAMON文档 (:doc:`usage`)。
+
+min_nr_regions
+--------------
+
+监测区域的最小数量。
+
+DAMON用于冷内存监测的最小监测区域数。这可以用来设置监测质量的下限。但是，设
+置的太高可能会导致监测开销的增加。更多细节请参考DAMON文档 (:doc:`usage`) 。
+
+max_nr_regions
+--------------
+
+监测区域的最大数量。
+
+DAMON用于冷内存监测的最大监测区域数。这可以用来设置监测开销的上限值。但是，
+设置得太低可能会导致监测质量不好。更多细节请参考DAMON文档 (:doc:`usage`) 。
+
+monitor_region_start
+--------------------
+
+目标内存区域的物理地址起点。
+
+DAMON_RECLAIM将对其进行工作的内存区域的起始物理地址。也就是说，DAMON_RECLAIM
+将在这个区域中找到冷的内存区域并进行回收。默认情况下，该区域使用最大系统内存区。
+
+monitor_region_end
+------------------
+
+目标内存区域的结束物理地址。
+
+DAMON_RECLAIM将对其进行工作的内存区域的末端物理地址。也就是说，DAMON_RECLAIM将
+在这个区域内找到冷的内存区域并进行回收。默认情况下，该区域使用最大系统内存区。
+
+kdamond_pid
+-----------
+
+DAMON线程的PID。
+
+如果DAMON_RECLAIM被启用，这将成为工作线程的PID。否则，为-1。
+
+nr_reclaim_tried_regions
+------------------------
+
+试图通过DAMON_RECLAIM回收的内存区域的数量。
+
+bytes_reclaim_tried_regions
+---------------------------
+
+试图通过DAMON_RECLAIM回收的内存区域的总字节数。
+
+nr_reclaimed_regions
+--------------------
+
+通过DAMON_RECLAIM成功回收的内存区域的数量。
+
+bytes_reclaimed_regions
+-----------------------
+
+通过DAMON_RECLAIM成功回收的内存区域的总字节数。
+
+nr_quota_exceeds
+----------------
+
+超过时间/空间配额限制的次数。
+
+例子
+====
+
+下面的运行示例命令使DAMON_RECLAIM找到30秒或更长时间没有访问的内存区域并“回收”？
+为了避免DAMON_RECLAIM在分页操作中消耗过多的CPU时间，回收被限制在每秒1GiB以内。
+它还要求DAMON_RECLAIM在系统的可用内存率超过50%时不做任何事情，但如果它低于40%时
+就开始真正的工作。如果DAMON_RECLAIM没有取得进展，因此空闲内存率低于20%，它会要求
+DAMON_RECLAIM再次什么都不做，这样我们就可以退回到基于LRU列表的页面粒度回收了::
+
+    # cd /sys/modules/damon_reclaim/parameters
+    # echo 30000000 > min_age
+    # echo $((1 * 1024 * 1024 * 1024)) > quota_sz
+    # echo 1000 > quota_reset_interval_ms
+    # echo 500 > wmarks_high
+    # echo 400 > wmarks_mid
+    # echo 200 > wmarks_low
+    # echo Y > enabled
+
+.. [1] https://research.google/pubs/pub48551/
+.. [2] https://lwn.net/Articles/787611/
+.. [3] https://www.kernel.org/doc/html/latest/vm/free_page_reporting.html
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst
new file mode 100644
index 000000000000..67d1b49481dc
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst
@@ -0,0 +1,132 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/damon/start.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+========
+入门指南
+========
+
+本文通过演示DAMON的默认用户空间工具，简要地介绍了如何使用DAMON。请注意，为了简洁
+起见，本文档只描述了它的部分功能。更多细节请参考该工具的使用文档。
+`doc <https://github.com/awslabs/damo/blob/next/USAGE.md>`_ .
+
+
+前提条件
+========
+
+内核
+----
+
+首先，你要确保你当前系统中跑的内核构建时选定了这个功能选项 ``CONFIG_DAMON_*=y``.
+
+
+用户空间工具
+------------
+
+在演示中，我们将使用DAMON的默认用户空间工具，称为DAMON Operator（DAMO）。它可以在
+https://github.com/awslabs/damo找到。下面的例子假设DAMO在你的$PATH上。当然，但
+这并不是强制性的。
+
+因为DAMO使用的是DAMON的debugfs接口(详情请参考 :doc:`usage` 中的使用方法) 你应该
+确保debugfs被挂载。手动挂载它，如下所示::
+
+    # mount -t debugfs none /sys/kernel/debug/
+
+或者在你的 ``/etc/fstab`` 文件中添加以下一行，这样你的系统就可以在启动时自动挂载
+debugfs了::
+
+    debugfs /sys/kernel/debug debugfs defaults 0 0
+
+
+记录数据访问模式
+================
+
+下面的命令记录了一个程序的内存访问模式，并将监测结果保存到文件中。 ::
+
+    $ git clone https://github.com/sjp38/masim
+    $ cd masim; make; ./masim ./configs/zigzag.cfg &
+    $ sudo damo record -o damon.data $(pidof masim)
+
+命令的前两行下载了一个人工内存访问生成器程序并在后台运行。生成器将重复地逐一访问两个
+100 MiB大小的内存区域。你可以用你的真实工作负载来代替它。最后一行要求 ``damo`` 将
+访问模式记录在 ``damon.data`` 文件中。
+
+
+将记录的模式可视化
+==================
+
+你可以在heatmap中直观地看到这种模式，显示哪个内存区域（X轴）何时被访问（Y轴）以及访
+问的频率（数字）。::
+
+    $ sudo damo report heats --heatmap stdout
+    22222222222222222222222222222222222222211111111111111111111111111111111111111100
+    44444444444444444444444444444444444444434444444444444444444444444444444444443200
+    44444444444444444444444444444444444444433444444444444444444444444444444444444200
+    33333333333333333333333333333333333333344555555555555555555555555555555555555200
+    33333333333333333333333333333333333344444444444444444444444444444444444444444200
+    22222222222222222222222222222222222223355555555555555555555555555555555555555200
+    00000000000000000000000000000000000000288888888888888888888888888888888888888400
+    00000000000000000000000000000000000000288888888888888888888888888888888888888400
+    33333333333333333333333333333333333333355555555555555555555555555555555555555200
+    88888888888888888888888888888888888888600000000000000000000000000000000000000000
+    88888888888888888888888888888888888888600000000000000000000000000000000000000000
+    33333333333333333333333333333333333333444444444444444444444444444444444444443200
+    00000000000000000000000000000000000000288888888888888888888888888888888888888400
+    [...]
+    # access_frequency:  0  1  2  3  4  5  6  7  8  9
+    # x-axis: space (139728247021568-139728453431248: 196.848 MiB)
+    # y-axis: time (15256597248362-15326899978162: 1 m 10.303 s)
+    # resolution: 80x40 (2.461 MiB and 1.758 s for each character)
+
+你也可以直观地看到工作集的大小分布，按大小排序。::
+
+    $ sudo damo report wss --range 0 101 10
+    # <percentile> <wss>
+    # target_id     18446632103789443072
+    # avr:  107.708 MiB
+      0             0 B |                                                           |
+     10      95.328 MiB |****************************                               |
+     20      95.332 MiB |****************************                               |
+     30      95.340 MiB |****************************                               |
+     40      95.387 MiB |****************************                               |
+     50      95.387 MiB |****************************                               |
+     60      95.398 MiB |****************************                               |
+     70      95.398 MiB |****************************                               |
+     80      95.504 MiB |****************************                               |
+     90     190.703 MiB |*********************************************************  |
+    100     196.875 MiB |***********************************************************|
+
+在上述命令中使用 ``--sortby`` 选项，可以显示工作集的大小是如何按时间顺序变化的。::
+
+    $ sudo damo report wss --range 0 101 10 --sortby time
+    # <percentile> <wss>
+    # target_id     18446632103789443072
+    # avr:  107.708 MiB
+      0       3.051 MiB |                                                           |
+     10     190.703 MiB |***********************************************************|
+     20      95.336 MiB |*****************************                              |
+     30      95.328 MiB |*****************************                              |
+     40      95.387 MiB |*****************************                              |
+     50      95.332 MiB |*****************************                              |
+     60      95.320 MiB |*****************************                              |
+     70      95.398 MiB |*****************************                              |
+     80      95.398 MiB |*****************************                              |
+     90      95.340 MiB |*****************************                              |
+    100      95.398 MiB |*****************************                              |
+
+
+数据访问模式感知的内存管理
+==========================
+
+以下三个命令使每一个大小>=4K的内存区域在你的工作负载中没有被访问>=60秒，就会被换掉。 ::
+
+    $ echo "#min-size max-size min-acc max-acc min-age max-age action" > test_scheme
+    $ echo "4K        max      0       0       60s     max     pageout" >> test_scheme
+    $ damo schemes -c test_scheme <pid of your workload>
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst
new file mode 100644
index 000000000000..5d7533347216
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst
@@ -0,0 +1,286 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/damon/usage.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+========
+详细用法
+========
+
+DAMON 为不同的用户提供了下面三种接口。
+
+- *DAMON用户空间工具。*
+  `这 <https://github.com/awslabs/damo>`_ 为有这特权的人， 如系统管理员，希望有一个刚好
+  可以工作的人性化界面。
+  使用它，用户可以以人性化的方式使用DAMON的主要功能。不过，它可能不会为特殊情况进行高度调整。
+  它同时支持虚拟和物理地址空间的监测。更多细节，请参考它的 `使用文档
+  <https://github.com/awslabs/damo/blob/next/USAGE.md>`_。
+- *debugfs接口。*
+  :ref:`这 <debugfs_interface>` 是为那些希望更高级的使用DAMON的特权用户空间程序员准备的。
+  使用它，用户可以通过读取和写入特殊的debugfs文件来使用DAMON的主要功能。因此，你可以编写和使
+  用你个性化的DAMON debugfs包装程序，代替你读/写debugfs文件。  `DAMON用户空间工具
+  <https://github.com/awslabs/damo>`_ 就是这种程序的一个例子  它同时支持虚拟和物理地址
+  空间的监测。注意，这个界面只提供简单的监测结果 :ref:`统计 <damos_stats>`。对于详细的监测
+  结果，DAMON提供了一个:ref:`跟踪点 <tracepoint>`。
+
+- *内核空间编程接口。*
+  :doc:`This </vm/damon/api>` 这是为内核空间程序员准备的。使用它，用户可以通过为你编写内
+  核空间的DAMON应用程序，最灵活有效地利用DAMON的每一个功能。你甚至可以为各种地址空间扩展DAMON。
+  详细情况请参考接口 :doc:`文件 </vm/damon/api>`。
+
+
+debugfs接口
+===========
+
+DAMON导出了八个文件, ``attrs``, ``target_ids``, ``init_regions``,
+``schemes``, ``monitor_on``, ``kdamond_pid``, ``mk_contexts`` 和
+``rm_contexts`` under its debugfs directory, ``<debugfs>/damon/``.
+
+
+属性
+----
+
+用户可以通过读取和写入 ``attrs`` 文件获得和设置 ``采样间隔`` 、 ``聚集间隔`` 、 ``区域更新间隔``
+以及监测目标区域的最小/最大数量。要详细了解监测属性，请参考 `:doc:/vm/damon/design` 。例如，
+下面的命令将这些值设置为5ms、100ms、1000ms、10和1000，然后再次检查::
+
+    # cd <debugfs>/damon
+    # echo 5000 100000 1000000 10 1000 > attrs
+    # cat attrs
+    5000 100000 1000000 10 1000
+
+
+目标ID
+------
+
+一些类型的地址空间支持多个监测目标。例如，虚拟内存地址空间的监测可以有多个进程作为监测目标。用户
+可以通过写入目标的相关id值来设置目标，并通过读取 ``target_ids`` 文件来获得当前目标的id。在监
+测虚拟地址空间的情况下，这些值应该是监测目标进程的pid。例如，下面的命令将pid为42和4242的进程设
+为监测目标，并再次检查::
+
+    # cd <debugfs>/damon
+    # echo 42 4242 > target_ids
+    # cat target_ids
+    42 4242
+
+用户还可以通过在文件中写入一个特殊的关键字 "paddr\n" 来监测系统的物理内存地址空间。因为物理地
+址空间监测不支持多个目标，读取文件会显示一个假值，即 ``42`` ，如下图所示::
+
+    # cd <debugfs>/damon
+    # echo paddr > target_ids
+    # cat target_ids
+    42
+
+请注意，设置目标ID并不启动监测。
+
+
+初始监测目标区域
+----------------
+
+在虚拟地址空间监测的情况下，DAMON自动设置和更新监测的目标区域，这样就可以覆盖目标进程的整个
+内存映射。然而，用户可能希望将监测区域限制在特定的地址范围内，如堆、栈或特定的文件映射区域。
+或者，一些用户可以知道他们工作负载的初始访问模式，因此希望为“自适应区域调整”设置最佳初始区域。
+
+相比之下，DAMON在物理内存监测的情况下不会自动设置和更新监测目标区域。因此，用户应该自己设置
+监测目标区域。
+
+在这种情况下，用户可以通过在 ``init_regions`` 文件中写入适当的值，明确地设置他们想要的初
+始监测目标区域。输入的每一行应代表一个区域，形式如下::
+
+    <target idx> <start address> <end address>
+
+目标idx应该是 ``target_ids`` 文件中目标的索引，从 ``0`` 开始，区域应该按照地址顺序传递。
+例如，下面的命令将设置几个地址范围， ``1-100`` 和 ``100-200`` 作为pid 42的初始监测目标
+区域，这是 ``target_ids`` 中的第一个（索引 ``0`` ），另外几个地址范围， ``20-40`` 和
+``50-100`` 作为pid 4242的地址，这是 ``target_ids`` 中的第二个（索引 ``1`` ）::
+
+    # cd <debugfs>/damon
+    # cat target_ids
+    42 4242
+    # echo "0   1       100
+            0   100     200
+            1   20      40
+            1   50      100" > init_regions
+
+请注意，这只是设置了初始的监测目标区域。在虚拟内存监测的情况下，DAMON会在一个 ``区域更新间隔``
+后自动更新区域的边界。因此，在这种情况下，如果用户不希望更新的话，应该把 ``区域的更新间隔`` 设
+置得足够大。
+
+
+方案
+----
+
+对于通常的基于DAMON的数据访问感知的内存管理优化，用户只是希望系统对特定访问模式的内存区域应用内
+存管理操作。DAMON从用户那里接收这种形式化的操作方案，并将这些方案应用到目标进程中。
+
+用户可以通过读取和写入 ``scheme`` debugfs文件来获得和设置这些方案。读取该文件还可以显示每个
+方案的统计数据。在文件中，每一个方案都应该在每一行中以下列形式表示出来::
+
+    <target access pattern> <action> <quota> <watermarks>
+
+你可以通过简单地在文件中写入一个空字符串来禁用方案。
+
+目标访问模式
+~~~~~~~~~~~~
+
+``<目标访问模式>`` 是由三个范围构成的，形式如下::
+
+    min-size max-size min-acc max-acc min-age max-age
+
+具体来说，区域大小的字节数（ `min-size` 和 `max-size` ），访问频率的每聚合区间的监测访问次
+数（ `min-acc` 和 `max-acc` ），区域年龄的聚合区间数（ `min-age` 和 `max-age` ）都被指定。
+请注意，这些范围是封闭区间。
+
+动作
+~~~~
+
+``<action>`` 是一个预定义的内存管理动作的整数，DAMON将应用于具有目标访问模式的区域。支持
+的数字和它们的含义如下::
+
+ - 0: Call ``madvise()`` for the region with ``MADV_WILLNEED``
+ - 1: Call ``madvise()`` for the region with ``MADV_COLD``
+ - 2: Call ``madvise()`` for the region with ``MADV_PAGEOUT``
+ - 3: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``
+ - 4: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``
+ - 5: Do nothing but count the statistics
+
+配额
+~~~~
+
+每个 ``动作`` 的最佳 ``目标访问模式`` 取决于工作负载，所以不容易找到。更糟糕的是，将某个
+动作的方案设置得过于激进会导致严重的开销。为了避免这种开销，用户可以通过下面表格中的 ``<quota>``
+来限制方案的时间和大小配额::
+
+    <ms> <sz> <reset interval> <priority weights>
+
+这使得DAMON在 ``<reset interval>`` 毫秒内，尽量只用 ``<ms>`` 毫秒的时间对 ``目标访
+问模式`` 的内存区域应用动作，并在 ``<reset interval>`` 内只对最多<sz>字节的内存区域应
+用动作。将 ``<ms>`` 和 ``<sz>`` 都设置为零，可以禁用配额限制。
+
+当预计超过配额限制时，DAMON会根据 ``目标访问模式`` 的大小、访问频率和年龄，对发现的内存
+区域进行优先排序。为了实现个性化的优先级，用户可以在 ``<优先级权重>`` 中设置这三个属性的
+权重，具体形式如下::
+
+    <size weight> <access frequency weight> <age weight>
+
+水位
+~~~~
+
+有些方案需要根据系统特定指标的当前值来运行，如自由内存比率。对于这种情况，用户可以为该条
+件指定水位。::
+
+    <metric> <check interval> <high mark> <middle mark> <low mark>
+
+``<metric>`` 是一个预定义的整数，用于要检查的度量。支持的数字和它们的含义如下。
+
+ - 0: 忽视水位
+ - 1: 系统空闲内存率 (千分比)
+
+每隔 ``<检查间隔>`` 微秒检查一次公制的值。
+
+如果该值高于 ``<高标>`` 或低于 ``<低标>`` ，该方案被停用。如果该值低于 ``<中标>`` ，
+该方案将被激活。
+
+统计数据
+~~~~~~~~
+
+它还统计每个方案被尝试应用的区域的总数量和字节数，每个方案被成功应用的区域的两个数量，以
+及超过配额限制的总数量。这些统计数据可用于在线分析或调整方案。
+
+统计数据可以通过读取方案文件来显示。读取该文件将显示你在每一行中输入的每个 ``方案`` ，
+统计的五个数字将被加在每一行的末尾。
+
+例子
+~~~~
+
+下面的命令应用了一个方案：”如果一个大小为[4KiB, 8KiB]的内存区域在[10, 20]的聚合时间
+间隔内显示出每一个聚合时间间隔[0, 5]的访问量，请分页出该区域。对于分页，每秒最多只能使
+用10ms，而且每秒分页不能超过1GiB。在这一限制下，首先分页出具有较长年龄的内存区域。另外，
+每5秒钟检查一次系统的可用内存率，当可用内存率低于50%时开始监测和分页，但如果可用内存率
+大于60%，或低于30%，则停止监测“::
+
+    # cd <debugfs>/damon
+    # scheme="4096 8192  0 5    10 20    2"  # target access pattern and action
+    # scheme+=" 10 $((1024*1024*1024)) 1000" # quotas
+    # scheme+=" 0 0 100"                     # prioritization weights
+    # scheme+=" 1 5000000 600 500 300"       # watermarks
+    # echo "$scheme" > schemes
+
+
+开关
+----
+
+除非你明确地启动监测，否则如上所述的文件设置不会产生效果。你可以通过写入和读取 ``monitor_on``
+文件来启动、停止和检查监测的当前状态。写入 ``on`` 该文件可以启动对有属性的目标的监测。写入
+``off`` 该文件则停止这些目标。如果每个目标进程被终止，DAMON也会停止。下面的示例命令开启、关
+闭和检查DAMON的状态::
+
+    # cd <debugfs>/damon
+    # echo on > monitor_on
+    # echo off > monitor_on
+    # cat monitor_on
+    off
+
+请注意，当监测开启时，你不能写到上述的debugfs文件。如果你在DAMON运行时写到这些文件，将会返
+回一个错误代码，如 ``-EBUSY`` 。
+
+
+监测线程PID
+-----------
+
+DAMON通过一个叫做kdamond的内核线程来进行请求监测。你可以通过读取 ``kdamond_pid`` 文件获
+得该线程的 ``pid`` 。当监测被 ``关闭`` 时，读取该文件不会返回任何信息::
+
+    # cd <debugfs>/damon
+    # cat monitor_on
+    off
+    # cat kdamond_pid
+    none
+    # echo on > monitor_on
+    # cat kdamond_pid
+    18594
+
+
+使用多个监测线程
+----------------
+
+每个监测上下文都会创建一个 ``kdamond`` 线程。你可以使用 ``mk_contexts`` 和 ``rm_contexts``
+文件为多个 ``kdamond`` 需要的用例创建和删除监测上下文。
+
+将新上下文的名称写入 ``mk_contexts`` 文件，在 ``DAMON debugfs`` 目录上创建一个该名称的目录。
+该目录将有该上下文的 ``DAMON debugfs`` 文件::
+
+    # cd <debugfs>/damon
+    # ls foo
+    # ls: cannot access 'foo': No such file or directory
+    # echo foo > mk_contexts
+    # ls foo
+    # attrs  init_regions  kdamond_pid  schemes  target_ids
+
+如果不再需要上下文，你可以通过把上下文的名字放到 ``rm_contexts`` 文件中来删除它和相应的目录::
+
+    # echo foo > rm_contexts
+    # ls foo
+    # ls: cannot access 'foo': No such file or directory
+
+注意， ``mk_contexts`` 、 ``rm_contexts`` 和 ``monitor_on`` 文件只在根目录下。
+
+
+监测结果的监测点
+================
+
+DAMON通过一个tracepoint ``damon:damon_aggregated`` 提供监测结果.  当监测开启时，你可
+以记录追踪点事件，并使用追踪点支持工具如perf显示结果。比如说::
+
+    # echo on > monitor_on
+    # perf record -e damon:damon_aggregated &
+    # sleep 5
+    # kill 9 $(pidof perf)
+    # echo off > monitor_on
+    # perf script
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/index.rst b/Documentation/translations/zh_CN/admin-guide/mm/index.rst
new file mode 100644
index 000000000000..702271c5b683
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/index.rst
@@ -0,0 +1,49 @@
+.. include:: ../../disclaimer-zh_CN.rst
+
+:Original:   Documentation/admin-guide/mm/index.rst
+
+:翻译:
+
+  徐鑫 xu xin <[email protected]>
+
+
+========
+内存管理
+========
+
+Linux内存管理子系统，顾名思义，是负责系统中的内存管理。它包括了虚拟内存与请求
+分页的实现，内核内部结构和用户空间程序的内存分配、将文件映射到进程地址空间以
+及许多其他很酷的事情。
+
+Linux内存管理是一个具有许多可配置设置的复杂系统, 且这些设置中的大多数都可以通
+过 ``/proc`` 文件系统获得，并且可以使用 ``sysctl`` 进行查询和调整。这些API接
+口被描述在Documentation/admin-guide/sysctl/vm.rst文件和 `man 5 proc`_ 中。
+
+.. _man 5 proc: http://man7.org/linux/man-pages/man5/proc.5.html
+
+Linux内存管理有它自己的术语，如果你还不熟悉它，请考虑阅读下面参考：
+:ref:`Documentation/admin-guide/mm/concepts.rst <mm_concepts>`.
+
+在此目录下，我们详细描述了如何与Linux内存管理中的各种机制交互。
+
+.. toctree::
+   :maxdepth: 1
+
+   damon/index
+   ksm
+
+Todolist:
+* concepts
+* cma_debugfs
+* hugetlbpage
+* idle_page_tracking
+* memory-hotplug
+* nommu-mmap
+* numa_memory_policy
+* numaperf
+* pagemap
+* soft-dirty
+* swap_numa
+* transhuge
+* userfaultfd
+* zswap
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/ksm.rst b/Documentation/translations/zh_CN/admin-guide/mm/ksm.rst
new file mode 100644
index 000000000000..4829156ef1ae
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/ksm.rst
@@ -0,0 +1,148 @@
+.. include:: ../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/ksm.rst
+
+:翻译:
+
+  徐鑫 xu xin <[email protected]>
+
+
+============
+内核同页合并
+============
+
+
+概述
+====
+
+KSM是一种能节省内存的数据去重功能，由CONFIG_KSM=y启用，并在2.6.32版本时被添
+加到Linux内核。详见 ``mm/ksm.c`` 的实现，以及http://lwn.net/Articles/306704
+和https://lwn.net/Articles/330589
+
+KSM最初目的是为了与KVM（即著名的内核共享内存）一起使用而开发的，通过共享虚拟机
+之间的公共数据，将更多虚拟机放入物理内存。但它对于任何会生成多个相同数据实例的
+应用程序都是很有用的。
+
+KSM的守护进程ksmd会定期扫描那些已注册的用户内存区域，查找内容相同的页面，这些
+页面可以被单个写保护页面替换（如果进程以后想要更新其内容，将自动复制）。使用：
+引用:`sysfs intraface  <ksm_sysfs>` 接口来配置KSM守护程序在单个过程中所扫描的页
+数以及两个过程之间的间隔时间。
+
+KSM只合并匿名（私有）页面，从不合并页缓存（文件）页面。KSM的合并页面最初只能被
+锁定在内核内存中，但现在可以就像其他用户页面一样被换出（但当它们被交换回来时共
+享会被破坏: ksmd必须重新发现它们的身份并再次合并）。
+
+以madvise控制KSM
+================
+
+KSM仅在特定的地址空间区域时运行，即应用程序通过使用如下所示的madvise(2)系统调
+用来请求某块地址成为可能的合并候选者的地址空间::
+
+    int madvise(addr, length, MADV_MERGEABLE)
+
+应用程序当然也可以通过调用::
+
+    int madvise(addr, length, MADV_UNMERGEABLE)
+
+来取消该请求，并恢复为非共享页面：此时KSM将去除合并在该范围内的任何合并页。注意：
+这个去除合并的调用可能突然需要的内存量超过实际可用的内存量-那么可能会出现EAGAIN
+失败，但更可能会唤醒OOM killer。
+
+如果KSM未被配置到正在运行的内核中，则madvise MADV_MERGEABLE 和 MADV_UNMERGEABLE
+的调用只会以EINVAL 失败。如果正在运行的内核是用CONFIG_KSM=y方式构建的，那么这些
+调用通常会成功：即使KSM守护程序当前没有运行，MADV_MERGEABLE 仍然会在KSM守护程序
+启动时注册范围，即使该范围不能包含KSM实际可以合并的任何页面，即使MADV_UNMERGEABLE
+应用于从未标记为MADV_MERGEABLE的范围。
+
+如果一块内存区域必须被拆分为至少一个新的MADV_MERGEABLE区域或MADV_UNMERGEABLE区域，
+当该进程将超过 ``vm.max_map_count`` 的设定，则madvise可能返回ENOMEM。（请参阅文档
+Documentation/admin-guide/sysctl/vm.rst）。
+
+与其他madvise调用一样，它们在用户地址空间的映射区域上使用：如果指定的范围包含未
+映射的间隙（尽管在中间的映射区域工作），它们将报告ENOMEM，如果没有足够的内存用于
+内部结构，则可能会因EAGAIN而失败。
+
+KSM守护进程sysfs接口
+====================
+
+KSM守护进程可以由``/sys/kernel/mm/ksm/`` 中的sysfs文件控制，所有人都可以读取，但
+只能由root用户写入。各接口解释如下：
+
+
+pages_to_scan
+        ksmd进程进入睡眠前要扫描的页数。
+        例如， ``echo 100 > /sys/kernel/mm/ksm/pages_to_scan``
+
+        默认值：100（该值被选择用于演示目的）
+
+sleep_millisecs
+        ksmd在下次扫描前应休眠多少毫秒
+        例如， ``echo 20 > /sys/kernel/mm/ksm/sleep_millisecs``
+
+        默认值：20（该值被选择用于演示目的）
+
+merge_across_nodes
+        指定是否可以合并来自不同NUMA节点的页面。当设置为0时，ksm仅合并在物理上位
+        于同一NUMA节点的内存区域中的页面。这降低了访问共享页面的延迟。在有明显的
+        NUMA距离上，具有更多节点的系统可能受益于设置该值为0时的更低延迟。而对于
+        需要对内存使用量最小化的较小系统来说，设置该值为1（默认设置）则可能会受
+        益于更大共享页面。在决定使用哪种设置之前，您可能希望比较系统在每种设置下
+        的性能。 ``merge_across_nodes`` 仅当系统中没有ksm共享页面时，才能被更改设
+        置：首先将接口`run` 设置为2从而对页进行去合并，然后在修改
+        ``merge_across_nodes`` 后再将‘run’又设置为1，以根据新设置来重新合并。
+
+        默认值：1（如早期的发布版本一样合并跨站点）
+
+run
+        * 设置为0可停止ksmd运行，但保留合并页面，
+        * 设置为1可运行ksmd，例如， ``echo 1 > /sys/kernel/mm/ksm/run`` ，
+        * 设置为2可停止ksmd运行，并且对所有目前已合并的页进行去合并，但保留可合并
+          区域以供下次运行。
+
+        默认值：0（必须设置为1才能激活KSM，除非禁用了CONFIG_SYSFS）
+
+use_zero_pages
+        指定是否应当特殊处理空页（即那些仅含zero的已分配页）。当该值设置为1时，
+        空页与内核零页合并，而不是像通常情况下那样空页自身彼此合并。这可以根据
+        工作负载的不同，在具有着色零页的架构上可以提高性能。启用此设置时应小心，
+        因为它可能会降低某些工作负载的KSM性能，比如，当待合并的候选页面的校验和
+        与空页面的校验和恰好匹配的时候。此设置可随时更改，仅对那些更改后再合并
+        的页面有效。
+
+        默认值：0（如同早期版本的KSM正常表现）
+
+max_page_sharing
+        单个KSM页面允许的最大共享站点数。这将强制执行重复数据消除限制，以避免涉
+        及遍历共享KSM页面的虚拟映射的虚拟内存操作的高延迟。最小值为2，因为新创
+        建的KSM页面将至少有两个共享者。该值越高，KSM合并内存的速度越快，去重
+        因子也越高，但是对于任何给定的KSM页面，虚拟映射的最坏情况遍历的速度也会
+        越慢。减慢了这种遍历速度就意味着在交换、压缩、NUMA平衡和页面迁移期间，
+        某些虚拟内存操作将有更高的延迟，从而降低这些虚拟内存操作调用者的响应能力。
+        其他任务如果不涉及执行虚拟映射遍历的VM操作，其任务调度延迟不受此参数的影
+        响，因为这些遍历本身是调度友好的。
+
+stable_node_chains_prune_millisecs
+        指定KSM检查特定页面的元数据的频率（即那些达到过时信息数据去重限制标准的
+        页面）单位是毫秒。较小的毫秒值将以更低的延迟来释放KSM元数据，但它们将使
+        ksmd在扫描期间使用更多CPU。如果还没有一个KSM页面达到 ``max_page_sharing``
+        标准，那就没有什么用。
+
+KSM与MADV_MERGEABLE的工作有效性体现于 ``/sys/kernel/mm/ksm/`` 路径下的接口：
+
+pages_shared
+        表示多少共享页正在被使用
+pages_sharing
+        表示还有多少站点正在共享这些共享页，即节省了多少
+pages_unshared
+        表示有多少页是唯一的，但被反复检查以进行合并
+pages_volatile
+        表示有多少页因变化太快而无法放在tree中
+full_scans
+        表示所有可合并区域已扫描多少次
+stable_node_chains
+        达到 ``max_page_sharing`` 限制的KSM页数
+stable_node_dups
+        重复的KSM页数
+
+比值 ``pages_sharing/pages_shared`` 的最大值受限制于 ``max_page_sharing``
+的设定。要想增加该比值，则相应地要增加 ``max_page_sharing`` 的值。
diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst
index d10191c45cf1..26d9913fc8b6 100644
--- a/Documentation/translations/zh_CN/core-api/index.rst
+++ b/Documentation/translations/zh_CN/core-api/index.rst
@@ -42,6 +42,7 @@
    kref
    assoc_array
    xarray
+   rbtree
 
 Todolist:
 
@@ -49,7 +50,6 @@ Todolist:
 
    idr
    circular-buffers
-   rbtree
    generic-radix-tree
    packing
    bus-virt-phys-mapping
diff --git a/Documentation/translations/zh_CN/core-api/rbtree.rst b/Documentation/translations/zh_CN/core-api/rbtree.rst
new file mode 100644
index 000000000000..a3e1555cb974
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/rbtree.rst
@@ -0,0 +1,391 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/rbtree.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <[email protected]>
+
+=========================
+Linux中的红黑树（rbtree）
+=========================
+
+
+:日期: 2007年1月18日
+:作者: Rob Landley <[email protected]>
+
+何为红黑树，它们有什么用？
+--------------------------
+
+红黑树是一种自平衡二叉搜索树，被用来存储可排序的键/值数据对。这与基数树（被用来高效
+存储稀疏数组，因此使用长整型下标来插入/访问/删除结点）和哈希表（没有保持排序因而无法
+容易地按序遍历，同时必须调节其大小和哈希函数，然而红黑树可以优雅地伸缩以便存储任意
+数量的键）不同。
+
+红黑树和AVL树类似，但在插入和删除时提供了更快的实时有界的最坏情况性能（分别最多两次
+旋转和三次旋转，来平衡树），查询时间轻微变慢（但时间复杂度仍然是O(log n)）。
+
+引用Linux每周新闻（Linux Weekly News）：
+
+    内核中有多处红黑树的使用案例。最后期限调度器和完全公平排队（CFQ）I/O调度器利用
+    红黑树跟踪请求；数据包CD/DVD驱动程序也是如此。高精度时钟代码使用一颗红黑树组织
+    未完成的定时器请求。ext3文件系统用红黑树跟踪目录项。虚拟内存区域（VMAs）、epoll
+    文件描述符、密码学密钥和在“分层令牌桶”调度器中的网络数据包都由红黑树跟踪。
+
+本文档涵盖了对Linux红黑树实现的使用方法。更多关于红黑树的性质和实现的信息，参见：
+
+  Linux每周新闻关于红黑树的文章
+    https://lwn.net/Articles/184495/
+
+  维基百科红黑树词条
+    https://en.wikipedia.org/wiki/Red-black_tree
+
+红黑树的Linux实现
+-----------------
+
+Linux的红黑树实现在文件“lib/rbtree.c”中。要使用它，需要“#include <linux/rbtree.h>”。
+
+Linux的红黑树实现对速度进行了优化，因此比传统的实现少一个间接层（有更好的缓存局部性）。
+每个rb_node结构体的实例嵌入在它管理的数据结构中，因此不需要靠指针来分离rb_node和它
+管理的数据结构。用户应该编写他们自己的树搜索和插入函数，来调用已提供的红黑树函数，
+而不是使用一个比较回调函数指针。加锁代码也留给红黑树的用户编写。
+
+创建一颗红黑树
+--------------
+
+红黑树中的数据结点是包含rb_node结构体成员的结构体::
+
+  struct mytype {
+  	struct rb_node node;
+  	char *keystring;
+  };
+
+当处理一个指向内嵌rb_node结构体的指针时，包住rb_node的结构体可用标准的container_of()
+宏访问。此外，个体成员可直接用rb_entry(node, type, member)访问。
+
+每颗红黑树的根是一个rb_root数据结构，它由以下方式初始化为空:
+
+  struct rb_root mytree = RB_ROOT;
+
+在一颗红黑树中搜索值
+--------------------
+
+为你的树写一个搜索函数是相当简单的：从树根开始，比较每个值，然后根据需要继续前往左边或
+右边的分支。
+
+示例::
+
+  struct mytype *my_search(struct rb_root *root, char *string)
+  {
+  	struct rb_node *node = root->rb_node;
+
+  	while (node) {
+  		struct mytype *data = container_of(node, struct mytype, node);
+		int result;
+
+		result = strcmp(string, data->keystring);
+
+		if (result < 0)
+  			node = node->rb_left;
+		else if (result > 0)
+  			node = node->rb_right;
+		else
+  			return data;
+	}
+	return NULL;
+  }
+
+在一颗红黑树中插入数据
+----------------------
+
+在树中插入数据的步骤包括：首先搜索插入新结点的位置，然后插入结点并对树再平衡
+（"recoloring"）。
+
+插入的搜索和上文的搜索不同，它要找到嫁接新结点的位置。新结点也需要一个指向它的父节点
+的链接，以达到再平衡的目的。
+
+示例::
+
+  int my_insert(struct rb_root *root, struct mytype *data)
+  {
+  	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+  	/* Figure out where to put new node */
+  	while (*new) {
+  		struct mytype *this = container_of(*new, struct mytype, node);
+  		int result = strcmp(data->keystring, this->keystring);
+
+		parent = *new;
+  		if (result < 0)
+  			new = &((*new)->rb_left);
+  		else if (result > 0)
+  			new = &((*new)->rb_right);
+  		else
+  			return FALSE;
+  	}
+
+  	/* Add new node and rebalance tree. */
+  	rb_link_node(&data->node, parent, new);
+  	rb_insert_color(&data->node, root);
+
+	return TRUE;
+  }
+
+在一颗红黑树中删除或替换已经存在的数据
+--------------------------------------
+
+若要从树中删除一个已经存在的结点，调用::
+
+  void rb_erase(struct rb_node *victim, struct rb_root *tree);
+
+示例::
+
+  struct mytype *data = mysearch(&mytree, "walrus");
+
+  if (data) {
+  	rb_erase(&data->node, &mytree);
+  	myfree(data);
+  }
+
+若要用一个新结点替换树中一个已经存在的键值相同的结点，调用::
+
+  void rb_replace_node(struct rb_node *old, struct rb_node *new,
+  			struct rb_root *tree);
+
+通过这种方式替换结点不会对树做重排序：如果新结点的键值和旧结点不同，红黑树可能被
+破坏。
+
+（按排序的顺序）遍历存储在红黑树中的元素
+----------------------------------------
+
+我们提供了四个函数，用于以排序的方式遍历一颗红黑树的内容。这些函数可以在任意红黑树
+上工作，并且不需要被修改或包装（除非加锁的目的）::
+
+  struct rb_node *rb_first(struct rb_root *tree);
+  struct rb_node *rb_last(struct rb_root *tree);
+  struct rb_node *rb_next(struct rb_node *node);
+  struct rb_node *rb_prev(struct rb_node *node);
+
+要开始迭代，需要使用一个指向树根的指针调用rb_first()或rb_last()，它将返回一个指向
+树中第一个或最后一个元素所包含的节点结构的指针。要继续的话，可以在当前结点上调用
+rb_next()或rb_prev()来获取下一个或上一个结点。当没有剩余的结点时，将返回NULL。
+
+迭代器函数返回一个指向被嵌入的rb_node结构体的指针，由此，包住rb_node的结构体可用
+标准的container_of()宏访问。此外，个体成员可直接用rb_entry(node, type, member)
+访问。
+
+示例::
+
+  struct rb_node *node;
+  for (node = rb_first(&mytree); node; node = rb_next(node))
+	printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring);
+
+带缓存的红黑树
+--------------
+
+计算最左边（最小的）结点是二叉搜索树的一个相当常见的任务，例如用于遍历，或用户根据
+他们自己的逻辑依赖一个特定的顺序。为此，用户可以使用'struct rb_root_cached'来优化
+时间复杂度为O(logN)的rb_first()的调用，以简单地获取指针，避免了潜在的昂贵的树迭代。
+维护操作的额外运行时间开销可忽略，不过内存占用较大。
+
+和rb_root结构体类似，带缓存的红黑树由以下方式初始化为空::
+
+  struct rb_root_cached mytree = RB_ROOT_CACHED;
+
+带缓存的红黑树只是一个常规的rb_root，加上一个额外的指针来缓存最左边的节点。这使得
+rb_root_cached可以存在于rb_root存在的任何地方，并且只需增加几个接口来支持带缓存的
+树::
+
+  struct rb_node *rb_first_cached(struct rb_root_cached *tree);
+  void rb_insert_color_cached(struct rb_node *, struct rb_root_cached *, bool);
+  void rb_erase_cached(struct rb_node *node, struct rb_root_cached *);
+
+操作和删除也有对应的带缓存的树的调用::
+
+  void rb_insert_augmented_cached(struct rb_node *node, struct rb_root_cached *,
+				  bool, struct rb_augment_callbacks *);
+  void rb_erase_augmented_cached(struct rb_node *, struct rb_root_cached *,
+				 struct rb_augment_callbacks *);
+
+
+对增强型红黑树的支持
+--------------------
+
+增强型红黑树是一种在每个结点里存储了“一些”附加数据的红黑树，其中结点N的附加数据
+必须是以N为根的子树中所有结点的内容的函数。它是建立在红黑树基础设施之上的可选特性。
+想要使用这个特性的红黑树用户，插入和删除结点时必须调用增强型接口并提供增强型回调函数。
+
+实现增强型红黑树操作的C文件必须包含<linux/rbtree_augmented.h>而不是<linux/rbtree.h>。
+注意，linux/rbtree_augmented.h暴露了一些红黑树实现的细节而你不应依赖它们，请坚持
+使用文档记录的API，并且不要在头文件中包含<linux/rbtree_augmented.h>，以最小化你的
+用户意外地依赖这些实现细节的可能。
+
+插入时，用户必须更新通往被插入节点的路径上的增强信息，然后像往常一样调用rb_link_node()，
+然后是rb_augment_inserted()而不是平时的rb_insert_color()调用。如果
+rb_augment_inserted()再平衡了红黑树，它将回调至一个用户提供的函数来更新受影响的
+子树上的增强信息。
+
+删除一个结点时，用户必须调用rb_erase_augmented()而不是rb_erase()。
+rb_erase_augmented()回调至一个用户提供的函数来更新受影响的子树上的增强信息。
+
+在两种情况下，回调都是通过rb_augment_callbacks结构体提供的。必须定义3个回调：
+
+- 一个传播回调，它更新一个给定结点和它的祖先们的增强数据，直到一个给定的停止点
+  （如果是NULL，将更新一路更新到树根）。
+
+- 一个复制回调，它将一颗给定子树的增强数据复制到一个新指定的子树树根。
+
+- 一个树旋转回调，它将一颗给定的子树的增强值复制到新指定的子树树根上，并重新计算
+  先前的子树树根的增强值。
+
+rb_erase_augmented()编译后的代码可能会内联传播、复制回调，这将导致函数体积更大，
+因此每个增强型红黑树的用户应该只有一个rb_erase_augmented()的调用点，以限制编译后
+的代码大小。
+
+
+使用示例
+^^^^^^^^
+
+区间树是增强型红黑树的一个例子。参考Cormen，Leiserson，Rivest和Stein写的
+《算法导论》。区间树的更多细节：
+
+经典的红黑树只有一个键，它不能直接用来存储像[lo:hi]这样的区间范围，也不能快速查找
+与新的lo:hi重叠的部分，或者查找是否有与新的lo:hi完全匹配的部分。
+
+然而，红黑树可以被增强，以一种结构化的方式来存储这种区间范围，从而使高效的查找和
+精确匹配成为可能。
+
+这个存储在每个节点中的“额外信息”是其所有后代结点中的最大hi（max_hi）值。这个信息
+可以保持在每个结点上，只需查看一下该结点和它的直系子结点们。这将被用于时间复杂度
+为O(log n)的最低匹配查找（所有可能的匹配中最低的起始地址），就像这样::
+
+  struct interval_tree_node *
+  interval_tree_first_match(struct rb_root *root,
+			    unsigned long start, unsigned long last)
+  {
+	struct interval_tree_node *node;
+
+	if (!root->rb_node)
+		return NULL;
+	node = rb_entry(root->rb_node, struct interval_tree_node, rb);
+
+	while (true) {
+		if (node->rb.rb_left) {
+			struct interval_tree_node *left =
+				rb_entry(node->rb.rb_left,
+					 struct interval_tree_node, rb);
+			if (left->__subtree_last >= start) {
+				/*
+				 * Some nodes in left subtree satisfy Cond2.
+				 * Iterate to find the leftmost such node N.
+				 * If it also satisfies Cond1, that's the match
+				 * we are looking for. Otherwise, there is no
+				 * matching interval as nodes to the right of N
+				 * can't satisfy Cond1 either.
+				 */
+				node = left;
+				continue;
+			}
+		}
+		if (node->start <= last) {		/* Cond1 */
+			if (node->last >= start)	/* Cond2 */
+				return node;	/* node is leftmost match */
+			if (node->rb.rb_right) {
+				node = rb_entry(node->rb.rb_right,
+					struct interval_tree_node, rb);
+				if (node->__subtree_last >= start)
+					continue;
+			}
+		}
+		return NULL;	/* No match */
+	}
+  }
+
+插入/删除是通过以下增强型回调来定义的::
+
+  static inline unsigned long
+  compute_subtree_last(struct interval_tree_node *node)
+  {
+	unsigned long max = node->last, subtree_last;
+	if (node->rb.rb_left) {
+		subtree_last = rb_entry(node->rb.rb_left,
+			struct interval_tree_node, rb)->__subtree_last;
+		if (max < subtree_last)
+			max = subtree_last;
+	}
+	if (node->rb.rb_right) {
+		subtree_last = rb_entry(node->rb.rb_right,
+			struct interval_tree_node, rb)->__subtree_last;
+		if (max < subtree_last)
+			max = subtree_last;
+	}
+	return max;
+  }
+
+  static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
+  {
+	while (rb != stop) {
+		struct interval_tree_node *node =
+			rb_entry(rb, struct interval_tree_node, rb);
+		unsigned long subtree_last = compute_subtree_last(node);
+		if (node->__subtree_last == subtree_last)
+			break;
+		node->__subtree_last = subtree_last;
+		rb = rb_parent(&node->rb);
+	}
+  }
+
+  static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
+  {
+	struct interval_tree_node *old =
+		rb_entry(rb_old, struct interval_tree_node, rb);
+	struct interval_tree_node *new =
+		rb_entry(rb_new, struct interval_tree_node, rb);
+
+	new->__subtree_last = old->__subtree_last;
+  }
+
+  static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
+  {
+	struct interval_tree_node *old =
+		rb_entry(rb_old, struct interval_tree_node, rb);
+	struct interval_tree_node *new =
+		rb_entry(rb_new, struct interval_tree_node, rb);
+
+	new->__subtree_last = old->__subtree_last;
+	old->__subtree_last = compute_subtree_last(old);
+  }
+
+  static const struct rb_augment_callbacks augment_callbacks = {
+	augment_propagate, augment_copy, augment_rotate
+  };
+
+  void interval_tree_insert(struct interval_tree_node *node,
+			    struct rb_root *root)
+  {
+	struct rb_node **link = &root->rb_node, *rb_parent = NULL;
+	unsigned long start = node->start, last = node->last;
+	struct interval_tree_node *parent;
+
+	while (*link) {
+		rb_parent = *link;
+		parent = rb_entry(rb_parent, struct interval_tree_node, rb);
+		if (parent->__subtree_last < last)
+			parent->__subtree_last = last;
+		if (start < parent->start)
+			link = &parent->rb.rb_left;
+		else
+			link = &parent->rb.rb_right;
+	}
+
+	node->__subtree_last = last;
+	rb_link_node(&node->rb, rb_parent, link);
+	rb_insert_augmented(&node->rb, root, &augment_callbacks);
+  }
+
+  void interval_tree_remove(struct interval_tree_node *node,
+			    struct rb_root *root)
+  {
+	rb_erase_augmented(&node->rb, root, &augment_callbacks);
+  }
diff --git a/Documentation/translations/zh_CN/devicetree/index.rst b/Documentation/translations/zh_CN/devicetree/index.rst
new file mode 100644
index 000000000000..23d0b6fccd58
--- /dev/null
+++ b/Documentation/translations/zh_CN/devicetree/index.rst
@@ -0,0 +1,50 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/Devicetree/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+
+=============================
+Open Firmware 和 Devicetree
+=============================
+
+该文档是整个设备树文档的总目录，标题中多是业内默认的术语，初见就翻译成中文，
+晦涩难懂，因此尽量保留，后面翻译其子文档时，可能会根据语境，灵活地翻译为中文。
+
+内核Devicetree的使用
+=======================
+.. toctree::
+   :maxdepth: 1
+
+   usage-model
+   of_unittest
+
+Todolist:
+
+*   kernel-api
+
+Devicetree Overlays
+===================
+.. toctree::
+   :maxdepth: 1
+
+Todolist:
+
+*   changesets
+*   dynamic-resolution-notes
+*   overlay-notes
+
+Devicetree Bindings
+===================
+.. toctree::
+   :maxdepth: 1
+
+Todolist:
+
+*   bindings/index
diff --git a/Documentation/translations/zh_CN/devicetree/of_unittest.rst b/Documentation/translations/zh_CN/devicetree/of_unittest.rst
new file mode 100644
index 000000000000..abd94e771ef8
--- /dev/null
+++ b/Documentation/translations/zh_CN/devicetree/of_unittest.rst
@@ -0,0 +1,189 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/Devicetree/of_unittest.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+=================================
+Open Firmware Devicetree 单元测试
+=================================
+
+作者: Gaurav Minocha <[email protected]>
+
+1. 概述
+=======
+
+本文档解释了执行 OF 单元测试所需的测试数据是如何动态地附加到实时树上的，与机器的架构无关。
+
+建议在继续读下去之前，先阅读以下文件。
+
+(1) Documentation/devicetree/usage-model.rst
+(2) http://www.devicetree.org/Device_Tree_Usage
+
+OF Selftest被设计用来测试提供给设备驱动开发者的接口（include/linux/of.h），以从未扁平
+化的设备树数据结构中获取设备信息等。这个接口被大多数设备驱动在各种使用情况下使用。
+
+
+2. 测试数据
+===========
+
+设备树源文件（drivers/of/unittest-data/testcases.dts）包含执行drivers/of/unittest.c
+中自动化单元测试所需的测试数据。目前，以下设备树源包含文件（.dtsi）被包含在testcases.dt中::
+
+    drivers/of/unittest-data/tests-interrupts.dtsi
+    drivers/of/unittest-data/tests-platform.dtsi
+    drivers/of/unittest-data/tests-phandle.dtsi
+    drivers/of/unittest-data/tests-match.dtsi
+
+当内核在启用OF_SELFTEST的情况下被构建时，那么下面的make规则::
+
+    $(obj)/%.dtb: $(src)/%.dts FORCE
+	    $(call if_changed_dep, dtc)
+
+用于将DT源文件（testcases.dts）编译成二进制blob（testcases.dtb），也被称为扁平化的DT。
+
+之后，使用以下规则将上述二进制blob包装成一个汇编文件（testcases.dtb.S）::
+
+    $(obj)/%.dtb.S: $(obj)/%.dtb
+	    $(call cmd, dt_S_dtb)
+
+汇编文件被编译成一个对象文件（testcases.dtb.o），并被链接到内核镜像中。
+
+
+2.1. 添加测试数据
+-----------------
+
+未扁平化的设备树结构体:
+
+未扁平化的设备树由连接的设备节点组成，其树状结构形式如下所述::
+
+    // following struct members are used to construct the tree
+    struct device_node {
+	...
+	struct  device_node *parent;
+	struct  device_node *child;
+	struct  device_node *sibling;
+	...
+    };
+
+图1描述了一个机器的未扁平化设备树的通用结构，只考虑了子节点和同级指针。存在另一个指针，
+``*parent`` ，用于反向遍历该树。因此，在一个特定的层次上，子节点和所有的兄弟姐妹节点将
+有一个指向共同节点的父指针（例如，child1、sibling2、sibling3、sibling4的父指针指向
+根节点）::
+
+    root ('/')
+    |
+    child1 -> sibling2 -> sibling3 -> sibling4 -> null
+    |         |           |           |
+    |         |           |          null
+    |         |           |
+    |         |        child31 -> sibling32 -> null
+    |         |           |          |
+    |         |          null       null
+    |         |
+    |      child21 -> sibling22 -> sibling23 -> null
+    |         |          |            |
+    |        null       null         null
+    |
+    child11 -> sibling12 -> sibling13 -> sibling14 -> null
+    |           |           |            |
+    |           |           |           null
+    |           |           |
+    null        null       child131 -> null
+			    |
+			    null
+
+Figure 1: 未扁平化的设备树的通用结构
+
+
+在执行OF单元测试之前，需要将测试数据附加到机器的设备树上（如果存在）。因此，当调用
+selftest_data_add()时，首先会读取通过以下内核符号链接到内核镜像中的扁平化设备树
+数据::
+
+    __dtb_testcases_begin - address marking the start of test data blob
+    __dtb_testcases_end   - address marking the end of test data blob
+
+其次，它调用of_fdt_unflatten_tree()来解除扁平化的blob。最后，如果机器的设备树
+（即实时树）是存在的，那么它将未扁平化的测试数据树附加到实时树上，否则它将自己作为
+实时设备树附加。
+
+attach_node_and_children()使用of_attach_node()将节点附加到实时树上，如下所
+述。为了解释这一点，图2中描述的测试数据树被附加到图1中描述的实时树上::
+
+    root ('/')
+	|
+    testcase-data
+	|
+    test-child0 -> test-sibling1 -> test-sibling2 -> test-sibling3 -> null
+	|               |                |                |
+    test-child01      null             null             null
+
+
+Figure 2: 将测试数据树附在实时树上的例子。
+
+根据上面的方案，实时树已经存在，所以不需要附加根('/')节点。所有其他节点都是通过在
+每个节点上调用of_attach_node()来附加的。
+
+在函数of_attach_node()中，新的节点被附在实时树中给定的父节点的子节点上。但是，如
+果父节点已经有了一个孩子，那么新节点就会取代当前的孩子，并将其变成其兄弟姐妹。因此，
+当测试案例的数据节点被连接到上面的实时树（图1）时，最终的结构如图3所示::
+
+    root ('/')
+    |
+    testcase-data -> child1 -> sibling2 -> sibling3 -> sibling4 -> null
+    |               |          |           |           |
+    (...)             |          |           |          null
+		    |          |         child31 -> sibling32 -> null
+		    |          |           |           |
+		    |          |          null        null
+		    |          |
+		    |        child21 -> sibling22 -> sibling23 -> null
+		    |          |           |            |
+		    |         null        null         null
+		    |
+		    child11 -> sibling12 -> sibling13 -> sibling14 -> null
+		    |          |            |            |
+		    null       null          |           null
+					    |
+					    child131 -> null
+					    |
+					    null
+    -----------------------------------------------------------------------
+
+    root ('/')
+    |
+    testcase-data -> child1 -> sibling2 -> sibling3 -> sibling4 -> null
+    |               |          |           |           |
+    |             (...)      (...)       (...)        null
+    |
+    test-sibling3 -> test-sibling2 -> test-sibling1 -> test-child0 -> null
+    |                |                   |                |
+    null             null                null         test-child01
+
+
+Figure 3: 附加测试案例数据后的实时设备树结构。
+
+
+聪明的读者会注意到，与先前的结构相比，test-child0节点成为最后一个兄弟姐妹（图2）。
+在连接了第一个test-child0节点之后，又连接了test-sibling1节点，该节点推动子节点
+（即test-child0）成为兄弟姐妹，并使自己成为子节点，如上所述。
+
+如果发现一个重复的节点（即如果一个具有相同full_name属性的节点已经存在于实时树中），
+那么该节点不会被附加，而是通过调用函数update_node_properties()将其属性更新到活
+树的节点中。
+
+
+2.2. 删除测试数据
+-----------------
+
+一旦测试用例执行完，selftest_data_remove被调用，以移除最初连接的设备节点（首先是
+叶子节点被分离，然后向上移动父节点被移除，最后是整个树）。selftest_data_remove()
+调用detach_node_and_children()，使用of_detach_node()将节点从实时设备树上分离。
+
+为了分离一个节点，of_detach_node()要么将给定节点的父节点的子节点指针更新为其同级节
+点，要么根据情况将前一个同级节点附在给定节点的同级节点上。就这样吧。 :)
diff --git a/Documentation/translations/zh_CN/devicetree/usage-model.rst b/Documentation/translations/zh_CN/devicetree/usage-model.rst
new file mode 100644
index 000000000000..318a3c6a0114
--- /dev/null
+++ b/Documentation/translations/zh_CN/devicetree/usage-model.rst
@@ -0,0 +1,330 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/Devicetree/usage-model.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+
+===================
+Linux 和 Devicetree
+===================
+
+Linux对设备树数据的使用模型
+
+:作者: Grant Likely <[email protected]>
+
+这篇文章描述了Linux如何使用设备树。关于设备树数据格式的概述可以在
+devicetree.org\ [1]_ 的设备树使用页面上找到。
+
+.. [1] https://www.devicetree.org/specifications/
+
+"Open Firmware Device Tree"，或简称为Devicetree（DT），是一种用于描述硬
+件的数据结构和语言。更确切地说，它是一种操作系统可读的硬件描述，这样操作系统就不
+需要对机器的细节进行硬编码。
+
+从结构上看，DT是一棵树，或者说是带有命名节点的无环图，节点可以有任意数量的命名
+属性来封装任意的数据。还存在一种机制，可以在自然的树状结构之外创建从一个节点到
+另一个节点的任意链接。
+
+从概念上讲，一套通用的使用惯例，称为 "bindings"（后文译为绑定），被定义为数据
+应该如何出现在树中，以描述典型的硬件特性，包括数据总线、中断线、GPIO连接和外围
+设备。
+
+尽可能使用现有的绑定来描述硬件，以最大限度地利用现有的支持代码，但由于属性和节
+点名称是简单的文本字符串，通过定义新的节点和属性来扩展现有的绑定或创建新的绑定
+很容易。然而，要警惕的是，在创建一个新的绑定之前，最好先对已经存在的东西做一些
+功课。目前有两种不同的、不兼容的i2c总线的绑定，这是因为在创建新的绑定时没有事先
+调查i2c设备在现有系统中是如何被枚举的。
+
+1. 历史
+-------
+DT最初是由Open Firmware创建的，作为将数据从Open Firmware传递给客户程序
+（如传递给操作系统）的通信方法的一部分。操作系统使用设备树在运行时探测硬件的拓
+扑结构，从而在没有硬编码信息的情况下支持大多数可用的硬件（假设所有设备的驱动程
+序都可用）。
+
+由于Open Firmware通常在PowerPC和SPARC平台上使用，长期以来，对这些架构的
+Linux支持一直使用设备树。
+
+2005年，当PowerPC Linux开始大规模清理并合并32位和64位支持时，决定在所有
+Powerpc平台上要求DT支持，无论它们是否使用Open Firmware。为了做到这一点，
+我们创建了一个叫做扁平化设备树（FDT）的DT表示法，它可以作为一个二进制的blob
+传递给内核，而不需要真正的Open Firmware实现。U-Boot、kexec和其他引导程序
+被修改，以支持传递设备树二进制（dtb）和在引导时修改dtb。DT也被添加到PowerPC
+引导包装器（arch/powerpc/boot/\*）中，这样dtb就可以被包裹在内核镜像中，以
+支持引导现有的非DT察觉的固件。
+
+一段时间后，FDT基础架构被普及到了所有的架构中。在写这篇文章的时候，6个主线架
+构（arm、microblaze、mips、powerpc、sparc和x86）和1个非主线架构（ios）
+有某种程度的DT支持。
+
+1. 数据模型
+-----------
+如果你还没有读过设备树用法\ [1]_页，那么现在就去读吧。没关系，我等着....
+
+2.1 高层次视角
+--------------
+最重要的是要明白，DT只是一个描述硬件的数据结构。它没有什么神奇之处，也不会神
+奇地让所有的硬件配置问题消失。它所做的是提供一种语言，将硬件配置与Linux内核
+（或任何其他操作系统）中的板卡和设备驱动支持解耦。使用它可以使板卡和设备支持
+变成数据驱动；根据传递到内核的数据做出设置决定，而不是根据每台机器的硬编码选
+择。
+
+理想情况下，数据驱动的平台设置应该导致更少的代码重复，并使其更容易用一个内核
+镜像支持各种硬件。
+
+Linux使用DT数据有三个主要目的:
+
+1) 平台识别。
+2) 运行时配置，以及
+3) 设备数量。
+
+2.2 平台识别
+------------
+首先，内核将使用DT中的数据来识别特定的机器。在一个理想的世界里，具体的平台对
+内核来说并不重要，因为所有的平台细节都会被设备树以一致和可靠的方式完美描述。
+但是，硬件并不完美，所以内核必须在早期启动时识别机器，以便有机会运行特定于机
+器的修复程序。
+
+在大多数情况下，机器的身份是不相关的，而内核将根据机器的核心CPU或SoC来选择
+设置代码。例如，在ARM上，arch/arm/kernel/setup.c中的setup_arch()将调
+用arch/arm/kernel/devtree.c中的setup_machine_fdt()，它通过
+machine_desc表搜索并选择与设备树数据最匹配的machine_desc。它通过查看根
+设备树节点中的'compatible'属性，并将其与struct machine_desc中的
+dt_compat列表（如果你好奇，该列表定义在arch/arm/include/asm/mach/arch.h
+中）进行比较，从而确定最佳匹配。
+
+“compatible” 属性包含一个排序的字符串列表，以机器的确切名称开始，后面是
+一个可选的与之兼容的板子列表，从最兼容到最不兼容排序。例如，TI BeagleBoard
+和它的后继者BeagleBoard xM板的根兼容属性可能看起来分别为::
+
+	compatible = "ti,omap3-beagleboard", "ti,omap3450", "ti,omap3";
+	compatible = "ti,omap3-beagleboard-xm", "ti,omap3450", "ti,omap3";
+
+其中 "ti,map3-beagleboard-xm "指定了确切的型号，它还声称它与OMAP 3450 SoC
+以及一般的OMP3系列SoC兼容。你会注意到，该列表从最具体的（确切的板子）到最
+不具体的（SoC系列）进行排序。
+
+聪明的读者可能会指出，Beagle xM也可以声称与原Beagle板兼容。然而，我们应
+该当心在板级上这样做，因为通常情况下，即使在同一产品系列中，每块板都有很高
+的变化，而且当一块板声称与另一块板兼容时，很难确定到底是什么意思。对于高层
+来说，最好是谨慎行事，不要声称一块板子与另一块板子兼容。值得注意的例外是，
+当一块板子是另一块板子的载体时，例如CPU模块连接到一个载体板上。
+
+关于兼容值还有一个注意事项。在兼容属性中使用的任何字符串都必须有文件说明它
+表示什么。在Documentation/devicetree/bindings中添加兼容字符串的文档。
+
+同样在ARM上，对于每个machine_desc，内核会查看是否有任何dt_compat列表条
+目出现在兼容属性中。如果有，那么该机器_desc就是驱动该机器的候选者。在搜索
+了整个machine_descs表之后，setup_machine_fdt()根据每个machine_desc
+在兼容属性中匹配的条目，返回 “最兼容” 的machine_desc。如果没有找到匹配
+的machine_desc，那么它将返回NULL。
+
+这个方案背后的原因是观察到，在大多数情况下，如果它们都使用相同的SoC或相同
+系列的SoC，一个机器_desc可以支持大量的电路板。然而，不可避免地会有一些例
+外情况，即特定的板子需要特殊的设置代码，这在一般情况下是没有用的。特殊情况
+可以通过在通用设置代码中明确检查有问题的板子来处理，但如果超过几个情况下，
+这样做很快就会变得很难看和/或无法维护。
+
+相反，兼容列表允许通用机器_desc通过在dt_compat列表中指定“不太兼容”的值
+来提供对广泛的通用板的支持。在上面的例子中，通用板支持可以声称与“ti,ompa3”
+或“ti,ompa3450”兼容。如果在最初的beagleboard上发现了一个bug，需要在
+早期启动时使用特殊的变通代码，那么可以添加一个新的machine_desc，实现变通，
+并且只在“ti,omap3-beagleboard”上匹配。
+
+PowerPC使用了一个稍微不同的方案，它从每个机器_desc中调用.probe()钩子，
+并使用第一个返回TRUE的钩子。然而，这种方法没有考虑到兼容列表的优先级，对于
+新的架构支持可能应该避免。
+
+2.3 运行时配置
+--------------
+在大多数情况下，DT是将数据从固件传递给内核的唯一方法，所以也被用来传递运行
+时和配置数据，如内核参数字符串和initrd镜像的位置。
+
+这些数据大部分都包含在/chosen节点中，当启动Linux时，它看起来就像这样::
+
+	chosen {
+		bootargs = "console=ttyS0,115200 loglevel=8";
+		initrd-start = <0xc8000000>;
+		initrd-end = <0xc8200000>;
+	};
+
+bootargs属性包含内核参数，initrd-\*属性定义initrd blob的地址和大小。注
+意initrd-end是initrd映像后的第一个地址，所以这与结构体资源的通常语义不一
+致。选择的节点也可以选择包含任意数量的额外属性，用于平台特定的配置数据。
+
+在早期启动过程中，架构设置代码通过不同的辅助回调函数多次调用
+of_scan_flat_dt()来解析设备树数据，然后进行分页设置。of_scan_flat_dt()
+代码扫描设备树，并使用辅助函数来提取早期启动期间所需的信息。通常情况下，
+early_init_dt_scan_chosen()辅助函数用于解析所选节点，包括内核参数，
+early_init_dt_scan_root()用于初始化DT地址空间模型，early_init_dt_scan_memory()
+用于确定可用RAM的大小和位置。
+
+在ARM上，函数setup_machine_fdt()负责在选择支持板子的正确machine_desc
+后，对设备树进行早期扫描。
+
+2.4 设备数量
+------------
+在电路板被识别后，在早期配置数据被解析后，内核初始化可以以正常方式进行。在
+这个过程中的某个时刻，unflatten_device_tree()被调用以将数据转换成更有
+效的运行时表示。这也是调用机器特定设置钩子的时候，比如ARM上的machine_desc
+.init_early()、.init_irq()和.init_machine()钩子。本节的其余部分使用
+了ARM实现的例子，但所有架构在使用DT时都会做几乎相同的事情。
+
+从名称上可以猜到，.init_early()用于在启动过程早期需要执行的任何机器特定设
+置，而.init_irq()则用于设置中断处理。使用DT并不会实质性地改变这两个函数的
+行为。如果提供了DT，那么.init_early()和.init_irq()都能调用任何一个DT查
+询函数（of_* in include/linux/of*.h），以获得关于平台的额外数据。
+
+DT上下文中最有趣的钩子是.init_machine()，它主要负责将平台的数据填充到
+Linux设备模型中。历史上，这在嵌入式平台上是通过在板卡support .c文件中定
+义一组静态时钟结构、platform_devices和其他数据，并在.init_machine()中
+大量注册来实现的。当使用DT时，就不用为每个平台的静态设备进行硬编码，可以通过
+解析DT获得设备列表，并动态分配设备结构体。
+
+最简单的情况是，.init_machine()只负责注册一个platform_devices。
+platform_device是Linux使用的一个概念，用于不能被硬件检测到的内存或I/O映
+射的设备，以及“复合”或 “虚拟”设备（后面会详细介绍）。虽然DT没有“平台设备”的
+术语，但平台设备大致对应于树根的设备节点和简单内存映射总线节点的子节点。
+
+现在是举例说明的好时机。下面是NVIDIA Tegra板的设备树的一部分::
+
+  /{
+	compatible = "nvidia,harmony", "nvidia,tegra20";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&intc>;
+
+	chosen { };
+	aliases { };
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x40000000>;
+	};
+
+	soc {
+		compatible = "nvidia,tegra20-soc", "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+
+		intc: interrupt-controller@50041000 {
+			compatible = "nvidia,tegra20-gic";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			reg = <0x50041000 0x1000>, < 0x50040100 0x0100 >;
+		};
+
+		serial@70006300 {
+			compatible = "nvidia,tegra20-uart";
+			reg = <0x70006300 0x100>;
+			interrupts = <122>;
+		};
+
+		i2s1: i2s@70002800 {
+			compatible = "nvidia,tegra20-i2s";
+			reg = <0x70002800 0x100>;
+			interrupts = <77>;
+			codec = <&wm8903>;
+		};
+
+		i2c@7000c000 {
+			compatible = "nvidia,tegra20-i2c";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x7000c000 0x100>;
+			interrupts = <70>;
+
+			wm8903: codec@1a {
+				compatible = "wlf,wm8903";
+				reg = <0x1a>;
+				interrupts = <347>;
+			};
+		};
+	};
+
+	sound {
+		compatible = "nvidia,harmony-sound";
+		i2s-controller = <&i2s1>;
+		i2s-codec = <&wm8903>;
+	};
+  };
+
+在.init_machine()时，Tegra板支持代码将需要查看这个DT，并决定为哪些节点
+创建platform_devices。然而，看一下这个树，并不能立即看出每个节点代表什么
+类型的设备，甚至不能看出一个节点是否代表一个设备。/chosen、/aliases和
+/memory节点是信息节点，并不描述设备（尽管可以说内存可以被认为是一个设备）。
+/soc节点的子节点是内存映射的设备，但是codec@1a是一个i2c设备，而sound节
+点代表的不是一个设备，而是其他设备是如何连接在一起以创建音频子系统的。我知
+道每个设备是什么，因为我熟悉电路板的设计，但是内核怎么知道每个节点该怎么做？
+
+诀窍在于，内核从树的根部开始，寻找具有“兼容”属性的节点。首先，一般认为任何
+具有“兼容”属性的节点都代表某种设备；其次，可以认为树根的任何节点要么直接连
+接到处理器总线上，要么是无法用其他方式描述的杂项系统设备。对于这些节点中的
+每一个，Linux都会分配和注册一个platform_device，它又可能被绑定到一个
+platform_driver。
+
+为什么为这些节点使用platform_device是一个安全的假设？嗯，就Linux对设备
+的建模方式而言，几乎所有的总线类型都假定其设备是总线控制器的孩子。例如，每
+个i2c_client是i2c_master的一个子节点。每个spi_device都是SPI总线的一
+个子节点。类似的还有USB、PCI、MDIO等。同样的层次结构也出现在DT中，I2C设
+备节点只作为I2C总线节点的子节点出现。同理，SPI、MDIO、USB等等。唯一不需
+要特定类型的父设备的设备是platform_devices（和amba_devices，但后面会
+详细介绍），它们将愉快地运行在Linux/sys/devices树的底部。因此，如果一个
+DT节点位于树的根部，那么它真的可能最好注册为platform_device。
+
+Linux板支持代码调用of_platform_populate(NULL, NULL, NULL, NULL)来
+启动树根的设备发现。参数都是NULL，因为当从树的根部开始时，不需要提供一个起
+始节点（第一个NULL），一个父结构设备（最后一个NULL），而且我们没有使用匹配
+表（尚未）。对于只需要注册设备的板子，除了of_platform_populate()的调用，
+.init_machine()可以完全为空。
+
+在Tegra的例子中，这说明了/soc和/sound节点，但是SoC节点的子节点呢？它们
+不应该也被注册为平台设备吗？对于Linux DT支持，一般的行为是子设备在驱动
+.probe()时被父设备驱动注册。因此，一个i2c总线设备驱动程序将为每个子节点
+注册一个i2c_client，一个SPI总线驱动程序将注册其spi_device子节点，其他
+总线类型也是如此。根据该模型，可以编写一个与SoC节点绑定的驱动程序，并简单
+地为其每个子节点注册platform_device。板卡支持代码将分配和注册一个SoC设
+备，一个（理论上的）SoC设备驱动程序可以绑定到SoC设备，并在其.probe()钩
+中为/soc/interruptcontroller、/soc/serial、/soc/i2s和/soc/i2c注
+册platform_devices。很简单，对吗？
+
+实际上，事实证明，将一些platform_device的子设备注册为更多的platform_device
+是一种常见的模式，设备树支持代码反映了这一点，并使上述例子更简单。
+of_platform_populate()的第二个参数是一个of_device_id表，任何与该表
+中的条目相匹配的节点也将获得其子节点的注册。在Tegra的例子中，代码可以是
+这样的::
+
+  static void __init harmony_init_machine(void)
+  {
+	/* ... */
+	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+  }
+
+“simple-bus”在Devicetree规范中被定义为一个属性，意味着一个简单的内存映射
+的总线，所以of_platform_populate()代码可以被写成只是假设简单总线兼容的节
+点将总是被遍历。然而，我们把它作为一个参数传入，以便电路板支持代码可以随时覆
+盖默认行为。
+
+[需要添加关于添加i2c/spi/etc子设备的讨论] 。
+
+附录A：AMBA设备
+---------------
+
+ARM Primecell是连接到ARM AMBA总线的某种设备，它包括对硬件检测和电源管理
+的一些支持。在Linux中，amba_device和amba_bus_type结构体被用来表示
+Primecell设备。然而，棘手的一点是，AMBA总线上的所有设备并非都是Primecell，
+而且对于Linux来说，典型的情况是amba_device和platform_device实例都是同
+一总线段的同义词。
+
+当使用DT时，这给of_platform_populate()带来了问题，因为它必须决定是否将
+每个节点注册为platform_device或amba_device。不幸的是，这使设备创建模型
+变得有点复杂，但解决方案原来并不是太具有侵略性。如果一个节点与“arm,amba-primecell”
+兼容，那么of_platform_populate()将把它注册为amba_device而不是
+platform_device。
diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index 46e14ec9963d..88d8df957a78 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -5,7 +5,7 @@
 	\renewcommand\thesection*
 	\renewcommand\thesubsection*
 	\kerneldocCJKon
-	\kerneldocBeginSC
+	\kerneldocBeginSC{
 
 .. _linux_doc_zh:
 
@@ -56,10 +56,14 @@ TODOList:
 
 下列文档描述了内核需要的平台固件相关信息。
 
+.. toctree::
+   :maxdepth: 2
+
+   devicetree/index
+
 TODOList:
 
 * firmware-guide/index
-* devicetree/index
 
 应用程序开发人员文档
 --------------------
@@ -104,14 +108,17 @@ TODOList:
    :maxdepth: 2
 
    core-api/index
+   accounting/index
    cpu-freq/index
    iio/index
+   infiniband/index
+   power/index
+   virt/index
    sound/index
    filesystems/index
-   virt/index
-   infiniband/index
-   accounting/index
    scheduler/index
+   vm/index
+   peci/index
 
 TODOList:
 
@@ -129,7 +136,6 @@ TODOList:
 * netlabel/index
 * networking/index
 * pcmcia/index
-* power/index
 * target/index
 * timers/index
 * spi/index
@@ -140,7 +146,6 @@ TODOList:
 * gpu/index
 * security/index
 * crypto/index
-* vm/index
 * bpf/index
 * usb/index
 * PCI/index
@@ -198,4 +203,4 @@ TODOList:
 
 .. raw:: latex
 
-	\kerneldocEndSC
+	}\kerneldocEndSC
diff --git a/Documentation/translations/zh_CN/peci/index.rst b/Documentation/translations/zh_CN/peci/index.rst
new file mode 100644
index 000000000000..4f6694c828fa
--- /dev/null
+++ b/Documentation/translations/zh_CN/peci/index.rst
@@ -0,0 +1,26 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/peci/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+=================
+Linux PECI 子系统
+=================
+
+.. toctree::
+
+
+   peci
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/translations/zh_CN/peci/peci.rst b/Documentation/translations/zh_CN/peci/peci.rst
new file mode 100644
index 000000000000..a3b4f99b994c
--- /dev/null
+++ b/Documentation/translations/zh_CN/peci/peci.rst
@@ -0,0 +1,54 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/peci/peci.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+====
+概述
+====
+
+平台环境控制接口（PECI）是英特尔处理器和管理控制器（如底板管理控制器，BMC）
+之间的一个通信接口。PECI提供的服务允许管理控制器通过访问各种寄存器来配置、监
+控和调试平台。它定义了一个专门的命令协议，管理控制器作为PECI的发起者，处理器
+作为PECI的响应者。PECI可以用于基于单处理器和多处理器的系统中。
+
+注意：英特尔PECI规范没有作为专门的文件发布，而是作为英特尔CPU的外部设计规范
+（EDS）的一部分。外部设计规范通常是不公开的。
+
+PECI 线
+---------
+
+PECI线接口使用单线进行自锁和数据传输。它不需要任何额外的控制线--物理层是一个
+自锁的单线总线信号，每一个比特都从接近零伏的空闲状态开始驱动、上升边缘。驱动高
+电平信号的持续时间可以确定位值是逻辑 “0” 还是逻辑 “1”。PECI线还包括与每个信
+息建立的可变数据速率。
+
+对于PECI线，每个处理器包将在一个定义的范围内利用唯一的、固定的地址，该地址应
+该与处理器插座ID有固定的关系--如果其中一个处理器被移除，它不会影响其余处理器
+的地址。
+
+PECI子系统代码内嵌文档
+------------------------
+
+该API在以下内核代码中:
+
+include/linux/peci.h
+
+drivers/peci/internal.h
+
+drivers/peci/core.c
+
+drivers/peci/request.c
+
+PECI CPU 驱动 API
+-------------------
+
+该API在以下内核代码中:
+
+drivers/peci/cpu.c
diff --git a/Documentation/translations/zh_CN/power/energy-model.rst b/Documentation/translations/zh_CN/power/energy-model.rst
new file mode 100644
index 000000000000..c7da1b6aefee
--- /dev/null
+++ b/Documentation/translations/zh_CN/power/energy-model.rst
@@ -0,0 +1,190 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/power/energy-model.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <[email protected]>
+
+============
+设备能量模型
+============
+
+1. 概述
+-------
+
+能量模型（EM）框架是一种驱动程序与内核子系统之间的接口。其中驱动程序了解不同
+性能层级的设备所消耗的功率，而内核子系统愿意使用该信息做出能量感知决策。
+
+设备所消耗的功率的信息来源在不同的平台上可能有很大的不同。这些功率成本在某些
+情况下可以使用设备树数据来估算。在其它情况下，固件会更清楚。或者，用户空间可能
+是最清楚的。以此类推。为了避免每一个客户端子系统对每一种可能的信息源自己重新
+实现支持，EM框架作为一个抽象层介入，它在内核中对功率成本表的格式进行标准化，
+因此能够避免多余的工作。
+
+功率值可以用毫瓦或“抽象刻度”表示。多个子系统可能使用EM，由系统集成商来检查
+功率值刻度类型的要求是否满足。可以在能量感知调度器的文档中找到一个例子
+Documentation/scheduler/sched-energy.rst。对于一些子系统，比如热能或
+powercap，用“抽象刻度”描述功率值可能会导致问题。这些子系统对过去使用的功率的
+估算值更感兴趣，因此可能需要真实的毫瓦。这些要求的一个例子可以在智能功率分配
+Documentation/driver-api/thermal/power_allocator.rst文档中找到。
+
+内核子系统可能（基于EM内部标志位）实现了对EM注册设备是否具有不一致刻度的自动
+检查。要记住的重要事情是，当功率值以“抽象刻度”表示时，从中推导以毫焦耳为单位
+的真实能量消耗是不可能的。
+
+下图描述了一个驱动的例子（这里是针对Arm的，但该方法适用于任何体系结构），它
+向EM框架提供了功率成本，感兴趣的客户端可从中读取数据::
+
+       +---------------+  +-----------------+  +---------------+
+       | Thermal (IPA) |  | Scheduler (EAS) |  |     Other     |
+       +---------------+  +-----------------+  +---------------+
+               |                   | em_cpu_energy()   |
+               |                   | em_cpu_get()      |
+               +---------+         |         +---------+
+                         |         |         |
+                         v         v         v
+                        +---------------------+
+                        |    Energy Model     |
+                        |     Framework       |
+                        +---------------------+
+                           ^       ^       ^
+                           |       |       | em_dev_register_perf_domain()
+                +----------+       |       +---------+
+                |                  |                 |
+        +---------------+  +---------------+  +--------------+
+        |  cpufreq-dt   |  |   arm_scmi    |  |    Other     |
+        +---------------+  +---------------+  +--------------+
+                ^                  ^                 ^
+                |                  |                 |
+        +--------------+   +---------------+  +--------------+
+        | Device Tree  |   |   Firmware    |  |      ?       |
+        +--------------+   +---------------+  +--------------+
+
+对于CPU设备，EM框架管理着系统中每个“性能域”的功率成本表。一个性能域是一组
+性能一起伸缩的CPU。性能域通常与CPUFreq策略具有1对1映射。一个性能域中的
+所有CPU要求具有相同的微架构。不同性能域中的CPU可以有不同的微架构。
+
+
+2. 核心API
+----------
+
+2.1 配置选项
+^^^^^^^^^^^^
+
+必须使能CONFIG_ENERGY_MODEL才能使用EM框架。
+
+
+2.2 性能域的注册
+^^^^^^^^^^^^^^^^
+
+“高级”EM的注册
+~~~~~~~~~~~~~~~~
+
+“高级”EM因它允许驱动提供更精确的功率模型而得名。它并不受限于框架中的一些已
+实现的数学公式（就像“简单”EM那样）。它可以更好地反映每个性能状态的实际功率
+测量。因此，在EM静态功率（漏电流功率）是重要的情况下，应该首选这种注册方式。
+
+驱动程序应通过以下API将性能域注册到EM框架中::
+
+  int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
+		struct em_data_callback *cb, cpumask_t *cpus, bool milliwatts);
+
+驱动程序必须提供一个回调函数，为每个性能状态返回<频率,功率>元组。驱动程序
+提供的回调函数可以自由地从任何相关位置（DT、固件......）以及以任何被认为是
+必要的方式获取数据。只有对于CPU设备，驱动程序必须使用cpumask指定性能域的CPU。
+对于CPU以外的其他设备，最后一个参数必须被设置为NULL。
+
+最后一个参数“milliwatts”（毫瓦）设置成正确的值是很重要的，使用EM的内核
+子系统可能会依赖这个标志来检查所有的EM设备是否使用相同的刻度。如果有不同的
+刻度，这些子系统可能决定：返回警告/错误，停止工作或崩溃（panic）。
+
+关于实现这个回调函数的驱动程序的例子，参见第3节。或者在第2.4节阅读这个API
+的更多文档。
+
+
+“简单”EM的注册
+~~~~~~~~~~~~~~~~
+
+“简单”EM是用框架的辅助函数cpufreq_register_em_with_opp()注册的。它实现了
+一个和以下数学公式紧密相关的功率模型::
+
+	Power = C * V^2 * f
+
+使用这种方法注册的EM可能无法正确反映真实设备的物理特性，例如当静态功率
+（漏电流功率）很重要时。
+
+
+2.3 访问性能域
+^^^^^^^^^^^^^^
+
+有两个API函数提供对能量模型的访问。em_cpu_get()以CPU id为参数，em_pd_get()
+以设备指针为参数。使用哪个接口取决于子系统，但对于CPU设备来说，这两个函数都返
+回相同的性能域。
+
+对CPU的能量模型感兴趣的子系统可以通过em_cpu_get() API检索它。在创建性能域时
+分配一次能量模型表，它保存在内存中不被修改。
+
+一个性能域所消耗的能量可以使用em_cpu_energy() API来估算。该估算假定CPU设备
+使用的CPUfreq监管器是schedutil。当前该计算不能提供给其它类型的设备。
+
+关于上述API的更多细节可以在 ``<linux/energy_model.h>`` 或第2.4节中找到。
+
+
+2.4 API的细节描述
+^^^^^^^^^^^^^^^^^
+参见 include/linux/energy_model.h 和 kernel/power/energy_model.c 的kernel doc。
+
+3. 驱动示例
+-----------
+
+CPUFreq框架支持专用的回调函数，用于为指定的CPU（们）注册EM：
+cpufreq_driver::register_em()。这个回调必须为每个特定的驱动程序正确实现，
+因为框架会在设置过程中适时地调用它。本节提供了一个简单的例子，展示CPUFreq驱动
+在能量模型框架中使用（假的）“foo”协议注册性能域。该驱动实现了一个est_power()
+函数提供给EM框架::
+
+  -> drivers/cpufreq/foo_cpufreq.c
+
+  01	static int est_power(unsigned long *mW, unsigned long *KHz,
+  02			struct device *dev)
+  03	{
+  04		long freq, power;
+  05
+  06		/* 使用“foo”协议设置频率上限 */
+  07		freq = foo_get_freq_ceil(dev, *KHz);
+  08		if (freq < 0);
+  09			return freq;
+  10
+  11		/* 估算相关频率下设备的功率成本 */
+  12		power = foo_estimate_power(dev, freq);
+  13		if (power < 0);
+  14			return power;
+  15
+  16		/* 将这些值返回给EM框架 */
+  17		*mW = power;
+  18		*KHz = freq;
+  19
+  20		return 0;
+  21	}
+  22
+  23	static void foo_cpufreq_register_em(struct cpufreq_policy *policy)
+  24	{
+  25		struct em_data_callback em_cb = EM_DATA_CB(est_power);
+  26		struct device *cpu_dev;
+  27		int nr_opp;
+  28
+  29		cpu_dev = get_cpu_device(cpumask_first(policy->cpus));
+  30
+  31     	/* 查找该策略支持的OPP数量 */
+  32     	nr_opp = foo_get_nr_opp(policy);
+  33
+  34     	/* 并注册新的性能域 */
+  35     	em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus,
+  36					    true);
+  37	}
+  38
+  39	static struct cpufreq_driver foo_cpufreq_driver = {
+  40		.register_em = foo_cpufreq_register_em,
+  41	};
diff --git a/Documentation/translations/zh_CN/power/index.rst b/Documentation/translations/zh_CN/power/index.rst
new file mode 100644
index 000000000000..bc54983ba515
--- /dev/null
+++ b/Documentation/translations/zh_CN/power/index.rst
@@ -0,0 +1,56 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/power/index.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <[email protected]>
+
+========
+电源管理
+========
+
+.. toctree::
+    :maxdepth: 1
+
+    energy-model
+    opp
+
+TODOList:
+
+    * apm-acpi
+    * basic-pm-debugging
+    * charger-manager
+    * drivers-testing
+    * freezing-of-tasks
+    * pci
+    * pm_qos_interface
+    * power_supply_class
+    * runtime_pm
+    * s2ram
+    * suspend-and-cpuhotplug
+    * suspend-and-interrupts
+    * swsusp-and-swap-files
+    * swsusp-dmcrypt
+    * swsusp
+    * video
+    * tricks
+
+    * userland-swsusp
+
+    * powercap/powercap
+    * powercap/dtpm
+
+    * regulator/consumer
+    * regulator/design
+    * regulator/machine
+    * regulator/overview
+    * regulator/regulator
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/translations/zh_CN/power/opp.rst b/Documentation/translations/zh_CN/power/opp.rst
new file mode 100644
index 000000000000..8d6e3f6f6202
--- /dev/null
+++ b/Documentation/translations/zh_CN/power/opp.rst
@@ -0,0 +1,341 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/power/opp.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <[email protected]>
+
+======================
+操作性能值（OPP）库
+======================
+
+(C) 2009-2010 Nishanth Menon <[email protected]>, 德州仪器公司
+
+.. 目录
+
+  1. 简介
+  2. OPP链表初始注册
+  3. OPP搜索函数
+  4. OPP可用性控制函数
+  5. OPP数据检索函数
+  6. 数据结构
+
+1. 简介
+=======
+
+1.1 何为操作性能值（OPP）？
+------------------------------
+
+当今复杂的单片系统（SoC）由多个子模块组成，这些子模块会联合工作。在一个执行不同用例
+的操作系统中，并不是SoC中的所有模块都需要一直以最高频率工作。为了促成这一点，SoC中
+的子模块被分组为不同域，允许一些域以较低的电压和频率运行，而其它域则以较高的“电压/
+频率对”运行。
+
+设备按域支持的由频率电压对组成的离散的元组的集合，被称为操作性能值（组），或OPPs。
+
+举例来说：
+
+让我们考虑一个支持下述频率、电压值的内存保护单元（MPU）设备：
+{300MHz，最低电压为1V}, {800MHz，最低电压为1.2V}, {1GHz，最低电压为1.3V}
+
+我们能将它们表示为3个OPP，如下述{Hz, uV}元组（译注：频率的单位是赫兹，电压的单位是
+微伏）。
+
+- {300000000, 1000000}
+- {800000000, 1200000}
+- {1000000000, 1300000}
+
+1.2 操作性能值库
+----------------
+
+OPP库提供了一组辅助函数来组织和查询OPP信息。该库位于drivers/opp/目录下，其头文件
+位于include/linux/pm_opp.h中。OPP库可以通过开启CONFIG_PM_OPP来启用。某些SoC，
+如德州仪器的OMAP框架允许在不需要cpufreq的情况下可选地在某一OPP下启动。
+
+OPP库的典型用法如下::
+
+ （用户）        -> 注册一个默认的OPP集合               -> （库）
+ （SoC框架）     -> 在必要的情况下，对某些OPP进行修改     -> OPP layer
+                -> 搜索/检索信息的查询                 ->
+
+OPP层期望每个域由一个唯一的设备指针来表示。SoC框架在OPP层为每个设备注册了一组初始
+OPP。这个链表的长度被期望是一个最优化的小数字，通常每个设备大约5个。初始链表包含了
+一个OPP集合，这个集合被期望能在系统中安全使能。
+
+关于OPP可用性的说明
+^^^^^^^^^^^^^^^^^^^
+
+随着系统的运行，SoC框架可能会基于各种外部因素选择让某些OPP在每个设备上可用或不可用，
+示例：温度管理或其它异常场景中，SoC框架可能会选择禁用一个较高频率的OPP以安全地继续
+运行，直到该OPP被重新启用（如果可能）。
+
+OPP库在它的实现中达成了这个概念。以下操作函数只能对可用的OPP使用：
+dev_pm_opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage,
+dev_pm_opp_get_freq, dev_pm_opp_get_opp_count。
+
+dev_pm_opp_find_freq_exact是用来查找OPP指针的，该指针可被用在dev_pm_opp_enable/
+disable函数，使一个OPP在被需要时变为可用。
+
+警告：如果对一个设备调用dev_pm_opp_enable/disable函数，OPP库的用户应该使用
+dev_pm_opp_get_opp_count来刷新OPP的可用性计数。触发这些的具体机制，或者对有依赖的
+子系统（比如cpufreq）的通知机制，都是由使用OPP库的SoC特定框架酌情处理的。在这些操作
+中，同样需要注意刷新cpufreq表。
+
+2. OPP链表初始注册
+==================
+SoC的实现会迭代调用dev_pm_opp_add函数来增加每个设备的OPP。预期SoC框架将以最优的
+方式注册OPP条目 - 典型的数字范围小于5。通过注册OPP生成的OPP链表，在整个设备运行过程
+中由OPP库维护。SoC框架随后可以使用dev_pm_opp_enable / disable函数动态地
+控制OPP的可用性。
+
+dev_pm_opp_add
+	为设备指针所指向的特定域添加一个新的OPP。OPP是用频率和电压定义的。一旦完成
+	添加，OPP被认为是可用的，可以用dev_pm_opp_enable/disable函数来控制其可用性。
+	OPP库内部用dev_pm_opp结构体存储并管理这些信息。这个函数可以被SoC框架根据SoC
+	的使用环境的需求来定义一个最优链表。
+
+	警告：
+		不要在中断上下文使用这个函数。
+
+	示例::
+
+	 soc_pm_init()
+	 {
+		/* 做一些事情 */
+		r = dev_pm_opp_add(mpu_dev, 1000000, 900000);
+		if (!r) {
+			pr_err("%s: unable to register mpu opp(%d)\n", r);
+			goto no_cpufreq;
+		}
+		/* 做一些和cpufreq相关的事情 */
+	 no_cpufreq:
+		/* 做剩余的事情 */
+	 }
+
+3. OPP搜索函数
+==============
+cpufreq等高层框架对频率进行操作，为了将频率映射到相应的OPP，OPP库提供了便利的函数
+来搜索OPP库内部管理的OPP链表。这些搜索函数如果找到匹配的OPP，将返回指向该OPP的指针，
+否则返回错误。这些错误预计由标准的错误检查，如IS_ERR()来处理，并由调用者采取适当的
+行动。
+
+这些函数的调用者应在使用完OPP后调用dev_pm_opp_put()。否则，OPP的内存将永远不会
+被释放，并导致内存泄露。
+
+dev_pm_opp_find_freq_exact
+	根据 *精确的* 频率和可用性来搜索OPP。这个函数对默认不可用的OPP特别有用。
+	例子：在SoC框架检测到更高频率可用的情况下，它可以使用这个函数在调用
+	dev_pm_opp_enable之前找到OPP::
+
+	 opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
+	 dev_pm_opp_put(opp);
+	 /* 不要操作指针.. 只是做有效性检查.. */
+	 if (IS_ERR(opp)) {
+		pr_err("frequency not disabled!\n");
+		/* 触发合适的操作.. */
+	 } else {
+		dev_pm_opp_enable(dev,1000000000);
+	 }
+
+	注意：
+	  这是唯一一个可以搜索不可用OPP的函数。
+
+dev_pm_opp_find_freq_floor
+	搜索一个 *最多* 提供指定频率的可用OPP。这个函数在搜索较小的匹配或按频率
+	递减的顺序操作OPP信息时很有用。
+	例子：要找的一个设备的最高OPP::
+
+	 freq = ULONG_MAX;
+	 opp = dev_pm_opp_find_freq_floor(dev, &freq);
+	 dev_pm_opp_put(opp);
+
+dev_pm_opp_find_freq_ceil
+	搜索一个 *最少* 提供指定频率的可用OPP。这个函数在搜索较大的匹配或按频率
+	递增的顺序操作OPP信息时很有用。
+	例1：找到一个设备最小的OPP::
+
+	 freq = 0;
+	 opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+	 dev_pm_opp_put(opp);
+
+	例: 一个SoC的cpufreq_driver->target的简易实现::
+
+	 soc_cpufreq_target(..)
+	 {
+		/* 做策略检查等操作 */
+		/* 找到和请求最接近的频率 */
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+		dev_pm_opp_put(opp);
+		if (!IS_ERR(opp))
+			soc_switch_to_freq_voltage(freq);
+		else
+			/* 当不能满足请求时，要做的事 */
+		/* 做其它事 */
+	 }
+
+4. OPP可用性控制函数
+====================
+在OPP库中注册的默认OPP链表也许无法满足所有可能的场景。OPP库提供了一套函数来修改
+OPP链表中的某个OPP的可用性。这使得SoC框架能够精细地动态控制哪一组OPP是可用于操作
+的。设计这些函数的目的是在诸如考虑温度时 *暂时地* 删除某个OPP（例如，在温度下降
+之前不要使用某OPP）。
+
+警告：
+	不要在中断上下文使用这些函数。
+
+dev_pm_opp_enable
+	使一个OPP可用于操作。
+	例子：假设1GHz的OPP只有在SoC温度低于某个阈值时才可用。SoC框架的实现可能
+	会选择做以下事情::
+
+	 if (cur_temp < temp_low_thresh) {
+		/* 若1GHz未使能，则使能 */
+		opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
+		dev_pm_opp_put(opp);
+		/* 仅仅是错误检查 */
+		if (!IS_ERR(opp))
+			ret = dev_pm_opp_enable(dev, 1000000000);
+		else
+			goto try_something_else;
+	 }
+
+dev_pm_opp_disable
+	使一个OPP不可用于操作。
+	例子：假设1GHz的OPP只有在SoC温度高于某个阈值时才可用。SoC框架的实现可能
+	会选择做以下事情::
+
+	 if (cur_temp > temp_high_thresh) {
+		/* 若1GHz已使能，则关闭 */
+		opp = dev_pm_opp_find_freq_exact(dev, 1000000000, true);
+		dev_pm_opp_put(opp);
+		/* 仅仅是错误检查 */
+		if (!IS_ERR(opp))
+			ret = dev_pm_opp_disable(dev, 1000000000);
+		else
+			goto try_something_else;
+	 }
+
+5. OPP数据检索函数
+==================
+由于OPP库对OPP信息进行了抽象化处理，因此需要一组函数来从dev_pm_opp结构体中提取
+信息。一旦使用搜索函数检索到一个OPP指针，以下函数就可以被SoC框架用来检索OPP层
+内部描述的信息。
+
+dev_pm_opp_get_voltage
+	检索OPP指针描述的电压。
+	例子: 当cpufreq切换到到不同频率时，SoC框架需要用稳压器框架将OPP描述
+	的电压设置到提供电压的电源管理芯片中::
+
+	 soc_switch_to_freq_voltage(freq)
+	 {
+		/* 做一些事情 */
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+		v = dev_pm_opp_get_voltage(opp);
+		dev_pm_opp_put(opp);
+		if (v)
+			regulator_set_voltage(.., v);
+		/* 做其它事 */
+	 }
+
+dev_pm_opp_get_freq
+	检索OPP指针描述的频率。
+	例子：比方说，SoC框架使用了几个辅助函数，通过这些函数，我们可以将OPP
+	指针传入，而不是传入额外的参数，用来处理一系列数据参数::
+
+	 soc_cpufreq_target(..)
+	 {
+		/* 做一些事情.. */
+		 max_freq = ULONG_MAX;
+		 max_opp = dev_pm_opp_find_freq_floor(dev,&max_freq);
+		 requested_opp = dev_pm_opp_find_freq_ceil(dev,&freq);
+		 if (!IS_ERR(max_opp) && !IS_ERR(requested_opp))
+			r = soc_test_validity(max_opp, requested_opp);
+		 dev_pm_opp_put(max_opp);
+		 dev_pm_opp_put(requested_opp);
+		/* 做其它事 */
+	 }
+	 soc_test_validity(..)
+	 {
+		 if(dev_pm_opp_get_voltage(max_opp) < dev_pm_opp_get_voltage(requested_opp))
+			 return -EINVAL;
+		 if(dev_pm_opp_get_freq(max_opp) < dev_pm_opp_get_freq(requested_opp))
+			 return -EINVAL;
+		/* 做一些事情.. */
+	 }
+
+dev_pm_opp_get_opp_count
+	检索某个设备可用的OPP数量。
+	例子：假设SoC中的一个协处理器需要知道某个表中的可用频率，主处理器可以
+	按如下方式发出通知::
+
+	 soc_notify_coproc_available_frequencies()
+	 {
+		/* 做一些事情 */
+		num_available = dev_pm_opp_get_opp_count(dev);
+		speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL);
+		/* 按升序填充表 */
+		freq = 0;
+		while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) {
+			speeds[i] = freq;
+			freq++;
+			i++;
+			dev_pm_opp_put(opp);
+		}
+
+		soc_notify_coproc(AVAILABLE_FREQs, speeds, num_available);
+		/* 做其它事 */
+	 }
+
+6. 数据结构
+===========
+通常，一个SoC包含多个可变电压域。每个域由一个设备指针描述。和OPP之间的关系可以
+按以下方式描述::
+
+  SoC
+   |- device 1
+   |	|- opp 1 (availability, freq, voltage)
+   |	|- opp 2 ..
+   ...	...
+   |	`- opp n ..
+   |- device 2
+   ...
+   `- device m
+
+OPP库维护着一个内部链表，SoC框架使用上文描述的各个函数来填充和访问。然而，描述
+真实OPP和域的结构体是OPP库自身的内部组成，以允许合适的抽象在不同系统中得到复用。
+
+struct dev_pm_opp
+	OPP库的内部数据结构，用于表示一个OPP。除了频率、电压、可用性信息外，
+	它还包含OPP库运行所需的内部统计信息。指向这个结构体的指针被提供给
+	用户（比如SoC框架）使用，在与OPP层的交互中作为OPP的标识符。
+
+	警告：
+	  结构体dev_pm_opp的指针不应该由用户解析或修改。一个实例的默认值由
+	  dev_pm_opp_add填充，但OPP的可用性由dev_pm_opp_enable/disable函数
+	  修改。
+
+struct device
+	这用于向OPP层标识一个域。设备的性质和它的实现是由OPP库的用户决定的，
+	如SoC框架。
+
+总体来说，以一个简化的视角看，对数据结构的操作可以描述为下面各图::
+
+  初始化 / 修改:
+              +-----+        /- dev_pm_opp_enable
+  dev_pm_opp_add --> | opp | <-------
+    |         +-----+        \- dev_pm_opp_disable
+    \-------> domain_info(device)
+
+  搜索函数:
+               /-- dev_pm_opp_find_freq_ceil  ---\   +-----+
+  domain_info<---- dev_pm_opp_find_freq_exact -----> | opp |
+               \-- dev_pm_opp_find_freq_floor ---/   +-----+
+
+  检索函数:
+  +-----+     /- dev_pm_opp_get_voltage
+  | opp | <---
+  +-----+     \- dev_pm_opp_get_freq
+
+  domain_info <- dev_pm_opp_get_opp_count
diff --git a/Documentation/translations/zh_CN/riscv/index.rst b/Documentation/translations/zh_CN/riscv/index.rst
index bbf5d7b3777a..614cde0c0997 100644
--- a/Documentation/translations/zh_CN/riscv/index.rst
+++ b/Documentation/translations/zh_CN/riscv/index.rst
@@ -18,6 +18,7 @@ RISC-V 体系结构
     :maxdepth: 1
 
     boot-image-header
+    vm-layout
     pmu
     patch-acceptance
 
diff --git a/Documentation/translations/zh_CN/riscv/vm-layout.rst b/Documentation/translations/zh_CN/riscv/vm-layout.rst
new file mode 100644
index 000000000000..585cb89317a3
--- /dev/null
+++ b/Documentation/translations/zh_CN/riscv/vm-layout.rst
@@ -0,0 +1,67 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/riscv/vm-layout.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+============================
+RISC-V Linux上的虚拟内存布局
+============================
+
+:作者: Alexandre Ghiti <[email protected]>
+:日期: 12 February 2021
+
+这份文件描述了RISC-V Linux内核使用的虚拟内存布局。
+
+32位 RISC-V Linux 内核
+======================
+
+RISC-V Linux Kernel SV32
+------------------------
+
+TODO
+
+64位 RISC-V Linux 内核
+======================
+
+RISC-V特权架构文档指出，64位地址 "必须使第63-48位值都等于第47位，否则将发生缺页异常。"：这将虚
+拟地址空间分成两半，中间有一个非常大的洞，下半部分是用户空间所在的地方，上半部分是RISC-V Linux
+内核所在的地方。
+
+RISC-V Linux Kernel SV39
+------------------------
+
+::
+
+  ========================================================================================================================
+      开始地址       |   偏移      |     结束地址      |  大小    | 虚拟内存区域描述
+  ========================================================================================================================
+                    |            |                  |         |
+   0000000000000000 |    0       | 0000003fffffffff |  256 GB | 用户空间虚拟内存，每个内存管理器不同
+  __________________|____________|__________________|_________|___________________________________________________________
+                    |            |                  |         |
+   0000004000000000 | +256    GB | ffffffbfffffffff | ~16M TB | ... 巨大的、几乎64位宽的直到内核映射的-256GB地方
+                    |            |                  |         |     开始偏移的非经典虚拟内存地址空洞。
+                    |            |                  |         |
+  __________________|____________|__________________|_________|___________________________________________________________
+                                                              |
+                                                              | 内核空间的虚拟内存，在所有进程之间共享:
+  ____________________________________________________________|___________________________________________________________
+                    |            |                  |         |
+   ffffffc6fee00000 | -228    GB | ffffffc6feffffff |    2 MB | fixmap
+   ffffffc6ff000000 | -228    GB | ffffffc6ffffffff |   16 MB | PCI io
+   ffffffc700000000 | -228    GB | ffffffc7ffffffff |    4 GB | vmemmap
+   ffffffc800000000 | -224    GB | ffffffd7ffffffff |   64 GB | vmalloc/ioremap space
+   ffffffd800000000 | -160    GB | fffffff6ffffffff |  124 GB | 直接映射所有物理内存
+   fffffff700000000 |  -36    GB | fffffffeffffffff |   32 GB | kasan
+  __________________|____________|__________________|_________|____________________________________________________________
+                                                              |
+                                                              |
+  ____________________________________________________________|____________________________________________________________
+                    |            |                  |         |
+   ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | modules, BPF
+   ffffffff80000000 |   -2    GB | ffffffffffffffff |    2 GB | kernel
+  __________________|____________|__________________|_________|____________________________________________________________
diff --git a/Documentation/translations/zh_CN/scheduler/index.rst b/Documentation/translations/zh_CN/scheduler/index.rst
index f8f8f35d53c7..12bf3bd02ccf 100644
--- a/Documentation/translations/zh_CN/scheduler/index.rst
+++ b/Documentation/translations/zh_CN/scheduler/index.rst
@@ -5,6 +5,7 @@
 :翻译:
 
  司延腾 Yanteng Si <[email protected]>
+ 唐艺舟 Tang Yizhou <[email protected]>
 
 :校译:
 
@@ -23,16 +24,14 @@ Linux调度器
     sched-design-CFS
     sched-domains
     sched-capacity
-
+    sched-energy
+    sched-nice-design
+    sched-stats
 
 TODOList:
 
-    sched-bwc
     sched-deadline
-    sched-energy
-    sched-nice-design
     sched-rt-group
-    sched-stats
 
     text_files
 
diff --git a/Documentation/translations/zh_CN/scheduler/sched-energy.rst b/Documentation/translations/zh_CN/scheduler/sched-energy.rst
new file mode 100644
index 000000000000..fdbf6cfeea93
--- /dev/null
+++ b/Documentation/translations/zh_CN/scheduler/sched-energy.rst
@@ -0,0 +1,351 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/scheduler/sched-energy.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <[email protected]>
+
+============
+能量感知调度
+============
+
+1. 简介
+-------
+
+能量感知调度（EAS）使调度器有能力预测其决策对CPU所消耗的能量的影响。EAS依靠
+一个能量模型（EM）来为每个任务选择一个节能的CPU，同时最小化对吞吐率的影响。
+本文档致力于介绍介绍EAS是如何工作的，它背后的主要设计决策是什么，以及使其运行
+所需的条件细节。
+
+在进一步阅读之前，请注意，在撰写本文时::
+
+   /!\ EAS不支持对称CPU拓扑的平台 /!\
+
+EAS只在异构CPU拓扑结构（如Arm大小核，big.LITTLE）上运行。因为在这种情况下，
+通过调度来节约能量的潜力是最大的。
+
+EAS实际使用的EM不是由调度器维护的，而是一个专门的框架。关于这个框架的细节和
+它提供的内容，请参考其文档（见Documentation/power/energy-model.rst）。
+
+
+2. 背景和术语
+-------------
+
+从一开始就说清楚定义:
+ - 能量 = [焦耳] （比如供电设备上的电池提供的资源）
+ - 功率 = 能量/时间 = [焦耳/秒] = [瓦特]
+
+ EAS的目标是最小化能量消耗，同时仍能将工作完成。也就是说，我们要最大化::
+
+	性能 [指令数/秒]
+	----------------
+	   功率 [瓦特]
+
+它等效于最小化::
+
+	能量 [焦耳]
+	-----------
+	   指令数
+
+同时仍然获得“良好”的性能。当前调度器只考虑性能目标，因此该式子本质上是一个
+可选的优化目标，它同时考虑了两个目标：能量效率和性能。
+
+引入EM的想法是为了让调度器评估其决策的影响，而不是盲目地应用可能仅在部分
+平台有正面效果的节能技术。同时，EM必须尽可能的简单，以最小化调度器的时延
+影响。
+
+简而言之，EAS改变了CFS任务分配给CPU的方式。当调度器决定一个任务应该在哪里
+运行时（在唤醒期间），EM被用来在不损害系统吞吐率的情况下，从几个较好的候选
+CPU中挑选一个经预测能量消耗最优的CPU。EAS的预测依赖于对平台拓扑结构特定元素
+的了解，包括CPU的“算力”，以及它们各自的能量成本。
+
+
+3. 拓扑信息
+-----------
+
+EAS（以及调度器的剩余部分）使用“算力”的概念来区分不同计算吞吐率的CPU。一个
+CPU的“算力”代表了它在最高频率下运行时能完成的工作量，且这个值是相对系统中
+算力最大的CPU而言的。算力值被归一化为1024以内，并且可与由实体负载跟踪
+（PELT）机制算出的利用率信号做对比。由于有算力值和利用率值，EAS能够估计一个
+任务/CPU有多大/有多忙，并在评估性能与能量时将其考虑在内。CPU算力由特定体系
+结构实现的arch_scale_cpu_capacity()回调函数提供。
+
+EAS使用的其余平台信息是直接从能量模型（EM）框架中读取的。一个平台的EM是一张
+表，表中每项代表系统中一个“性能域”的功率成本。（若要了解更多关于性能域的细节，
+见Documentation/power/energy-model.rst）
+
+当调度域被建立或重新建立时，调度器管理对拓扑代码中EM对象的引用。对于每个根域
+（rd），调度器维护一个与当前rd->span相交的所有性能域的单向链表。链表中的每个
+节点都包含一个指向EM框架所提供的结构体em_perf_domain的指针。
+
+链表被附加在根域上，以应对独占的cpuset的配置。由于独占的cpuset的边界不一定与
+性能域的边界一致，不同根域的链表可能包含重复的元素。
+
+示例1
+    让我们考虑一个有12个CPU的平台，分成3个性能域，（pd0，pd4和pd8），按以下
+    方式组织::
+
+	          CPUs:   0 1 2 3 4 5 6 7 8 9 10 11
+	          PDs:   |--pd0--|--pd4--|---pd8---|
+	          RDs:   |----rd1----|-----rd2-----|
+
+    现在，考虑用户空间决定将系统分成两个独占的cpusets，因此创建了两个独立的根域，
+    每个根域包含6个CPU。这两个根域在上图中被表示为rd1和rd2。由于pd4与rd1和rd2
+    都有交集，它将同时出现于附加在这两个根域的“->pd”链表中:
+
+       * rd1->pd: pd0 -> pd4
+       * rd2->pd: pd4 -> pd8
+
+    请注意，调度器将为pd4创建两个重复的链表节点（每个链表中各一个）。然而这
+    两个节点持有指向同一个EM框架的共享数据结构的指针。
+
+由于对这些链表的访问可能与热插拔及其它事件并发发生，因此它们受RCU锁保护，就像
+被调度器操控的拓扑结构体中剩下字段一样。
+
+EAS同样维护了一个静态键（sched_energy_present），当至少有一个根域满足EAS
+启动的所有条件时，这个键就会被启动。在第6节中总结了这些条件。
+
+
+4. 能量感知任务放置
+-------------------
+
+EAS覆盖了CFS的任务唤醒平衡代码。在唤醒平衡时，它使用平台的EM和PELT信号来选择节能
+的目标CPU。当EAS被启用时，select_task_rq_fair()调用find_energy_efficient_cpu()
+来做任务放置决定。这个函数寻找在每个性能域中寻找具有最高剩余算力（CPU算力 - CPU
+利用率）的CPU，因为它能让我们保持最低的频率。然后，该函数检查将任务放在新CPU相较
+依然放在之前活动的prev_cpu是否可以节省能量。
+
+如果唤醒的任务被迁移，find_energy_efficient_cpu()使用compute_energy()来估算
+系统将消耗多少能量。compute_energy()检查各CPU当前的利用率情况，并尝试调整来
+“模拟”任务迁移。EM框架提供了API em_pd_energy()计算每个性能域在给定的利用率条件
+下的预期能量消耗。
+
+下面详细介绍一个优化能量消耗的任务放置决策的例子。
+
+示例2
+    让我们考虑一个有两个独立性能域的（伪）平台，每个性能域含有2个CPU。CPU0和CPU1
+    是小核，CPU2和CPU3是大核。
+
+    调度器必须决定将任务P放在哪个CPU上，这个任务的util_avg = 200（平均利用率），
+    prev_cpu = 0（上一次运行在CPU0）。
+
+    目前CPU的利用率情况如下图所示。CPU 0-3的util_avg分别为400、100、600和500。
+    每个性能域有三个操作性能值（OPP）。与每个OPP相关的CPU算力和功率成本列在能量
+    模型表中。P的util_avg在图中显示为"PP"::
+
+     CPU util.
+      1024                 - - - - - - -              Energy Model
+                                               +-----------+-------------+
+                                               |  Little   |     Big     |
+       768                 =============       +-----+-----+------+------+
+                                               | Cap | Pwr | Cap  | Pwr  |
+                                               +-----+-----+------+------+
+       512  ===========    - ##- - - - -       | 170 | 50  | 512  | 400  |
+                             ##     ##         | 341 | 150 | 768  | 800  |
+       341  -PP - - - -      ##     ##         | 512 | 300 | 1024 | 1700 |
+             PP              ##     ##         +-----+-----+------+------+
+       170  -## - - - -      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+      Current OPP: =====       Other OPP: - - -     util_avg (100 each): ##
+
+
+    find_energy_efficient_cpu()将首先在两个性能域中寻找具有最大剩余算力的CPU。
+    在这个例子中是CPU1和CPU3。然后，它将估算，当P被放在它们中的任意一个时，系统的
+    能耗，并检查这样做是否会比把P放在CPU0上节省一些能量。EAS假定OPPs遵循利用率
+    （这与CPUFreq监管器schedutil的行为一致。关于这个问题的更多细节，见第6节）。
+
+    **情况1. P被迁移到CPU1**::
+
+      1024                 - - - - - - -
+
+                                            Energy calculation:
+       768                 =============     * CPU0: 200 / 341 * 150 = 88
+                                             * CPU1: 300 / 341 * 150 = 131
+                                             * CPU2: 600 / 768 * 800 = 625
+       512  - - - - - -    - ##- - - - -     * CPU3: 500 / 768 * 800 = 520
+                             ##     ##          => total_energy = 1364
+       341  ===========      ##     ##
+                    PP       ##     ##
+       170  -## - - PP-      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+
+    **情况2. P被迁移到CPU3**::
+
+      1024                 - - - - - - -
+
+                                            Energy calculation:
+       768                 =============     * CPU0: 200 / 341 * 150 = 88
+                                             * CPU1: 100 / 341 * 150 = 43
+                                    PP       * CPU2: 600 / 768 * 800 = 625
+       512  - - - - - -    - ##- - -PP -     * CPU3: 700 / 768 * 800 = 729
+                             ##     ##          => total_energy = 1485
+       341  ===========      ##     ##
+                             ##     ##
+       170  -## - - - -      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+    **情况3. P依旧留在prev_cpu/CPU0**::
+
+      1024                 - - - - - - -
+
+                                            Energy calculation:
+       768                 =============     * CPU0: 400 / 512 * 300 = 234
+                                             * CPU1: 100 / 512 * 300 = 58
+                                             * CPU2: 600 / 768 * 800 = 625
+       512  ===========    - ##- - - - -     * CPU3: 500 / 768 * 800 = 520
+                             ##     ##          => total_energy = 1437
+       341  -PP - - - -      ##     ##
+             PP              ##     ##
+       170  -## - - - -      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+    从这些计算结果来看，情况1的总能量最低。所以从节约能量的角度看，CPU1是最佳候选
+    者。
+
+大核通常比小核更耗电，因此主要在任务不适合在小核运行时使用。然而，小核并不总是比
+大核节能。举例来说，对于某些系统，小核的高OPP可能比大核的低OPP能量消耗更高。因此，
+如果小核在某一特定时间点刚好有足够的利用率，在此刻被唤醒的小任务放在大核执行可能
+会更节能，尽管它在小核上运行也是合适的。
+
+即使在大核所有OPP都不如小核OPP节能的情况下，在某些特定条件下，令小任务运行在大核
+上依然可能节能。事实上，将一个任务放在一个小核上可能导致整个性能域的OPP提高，这将
+增加已经在该性能域运行的任务的能量成本。如果唤醒的任务被放在一个大核上，它的执行
+成本可能比放在小核上更高，但这不会影响小核上的其它任务，这些任务将继续以较低的OPP
+运行。因此，当考虑CPU消耗的总能量时，在大核上运行一个任务的额外成本可能小于为所有
+其它运行在小核的任务提高OPP的成本。
+
+上面的例子几乎不可能以一种通用的方式得到正确的结果；同时，对于所有平台，在不知道
+系统所有CPU每个不同OPP的运行成本时，也无法得到正确的结果。得益于基于EM的设计，
+EAS应该能够正确处理这些问题而不会引发太多麻烦。然而，为了确保对高利用率场景的
+吞吐率造成的影响最小化，EAS同样实现了另外一种叫“过度利用率”的机制。
+
+
+5. 过度利用率
+-------------
+
+从一般的角度来看，EAS能提供最大帮助的是那些涉及低、中CPU利用率的使用场景。每当CPU
+密集型的长任务运行，它们将需要所有的可用CPU算力，调度器将没有什么办法来节省能量同时
+又不损害吞吐率。为了避免EAS损害性能，一旦CPU被使用的算力超过80%，它将被标记为“过度
+利用”。只要根域中没有CPU是过度利用状态，负载均衡被禁用，而EAS将覆盖唤醒平衡代码。
+EAS很可能将负载放置在系统中能量效率最高的CPU而不是其它CPU上，只要不损害吞吐率。
+因此，负载均衡器被禁用以防止它打破EAS发现的节能任务放置。当系统未处于过度利用状态时，
+这样做是安全的，因为低于80%的临界点意味着：
+
+    a. 所有的CPU都有一些空闲时间，所以EAS使用的利用率信号很可能准确地代表各种任务
+       的“大小”。
+    b. 所有任务，不管它们的nice值是多大，都应该被提供了足够多的CPU算力。
+    c. 既然有多余的算力，那么所有的任务都必须定期阻塞/休眠，在唤醒时进行平衡就足够
+       了。
+
+只要一个CPU利用率超过80%的临界点，上述三个假设中至少有一个是不正确的。在这种情况下，
+整个根域的“过度利用”标志被设置，EAS被禁用，负载均衡器被重新启用。通过这样做，调度器
+又回到了在CPU密集的条件下基于负载的算法做负载均衡。这更好地尊重了任务的nice值。
+
+由于过度利用率的概念在很大程度上依赖于检测系统中是否有一些空闲时间，所以必须考虑
+（比CFS）更高优先级的调度类（以及中断）“窃取”的CPU算力。像这样，对过度使用率的检测
+不仅要考虑CFS任务使用的算力，还需要考虑其它调度类和中断。
+
+
+6. EAS的依赖和要求
+------------------
+
+能量感知调度依赖系统的CPU具有特定的硬件属性，以及内核中的其它特性被启用。本节列出
+了这些依赖，并对如何满足这些依赖提供了提示。
+
+
+6.1 - 非对称CPU拓扑
+^^^^^^^^^^^^^^^^^^^
+
+
+如简介所提，目前只有非对称CPU拓扑结构的平台支持EAS。通过在运行时查询
+SD_ASYM_CPUCAPACITY_FULL标志位是否在创建调度域时已设置来检查这一要求是否满足。
+
+参阅Documentation/scheduler/sched-capacity.rst以了解在sched_domain层次结构
+中设置此标志位所需满足的要求。
+
+请注意，EAS并非从根本上与SMP不兼容，但在SMP平台上还没有观察到明显的节能。这一
+限制可以在将来进行修改，如果被证明不是这样的话。
+
+
+6.2 - 当前的能量模型
+^^^^^^^^^^^^^^^^^^^^
+
+EAS使用一个平台的EM来估算调度决策对能量的影响。因此，你的平台必须向EM框架提供
+能量成本表，以启动EAS。要做到这一点，请参阅文档
+Documentation/power/energy-model.rst中的独立EM框架部分。
+
+另请注意，调度域需要在EM注册后重建，以便启动EAS。
+
+EAS使用EM对能量使用率进行预测决策，因此它在检查任务放置的可能选项时更加注重
+差异。对于EAS来说，EM的功率值是以毫瓦还是以“抽象刻度”为单位表示并不重要。
+
+
+
+6.3 - 能量模型复杂度
+^^^^^^^^^^^^^^^^^^^^
+
+任务唤醒路径是时延敏感的。当一个平台的EM太复杂（太多CPU，太多性能域，太多状态
+等），在唤醒路径中使用它的成本就会升高到不可接受。能量感知唤醒算法的复杂度为：
+
+	C = Nd * (Nc + Ns)
+
+其中：Nd是性能域的数量；Nc是CPU的数量；Ns是OPP的总数（例如：对于两个性能域，
+每个域有4个OPP，则Ns = 8）。
+
+当调度域建立时，复杂性检查是在根域上进行的。如果一个根域的复杂度C恰好高于完全
+主观设定的EM_MAX_COMPLEXITY阈值（在本文写作时，是2048），则EAS不会在此根域
+启动。
+
+如果你的平台的能量模型的复杂度太高，EAS无法在这个根域上使用，但你真的想用，
+那么你就只剩下两个可能的选择：
+
+    1. 将你的系统拆分成分离的、较小的、使用独占cpuset的根域，并在每个根域局部
+       启用EAS。这个方案的好处是开箱即用，但缺点是无法在根域之间实现负载均衡，
+       这可能会导致总体系统负载不均衡。
+    2. 提交补丁以降低EAS唤醒算法的复杂度，从而使其能够在合理的时间内处理更大
+       的EM。
+
+
+6.4 - Schedutil监管器
+^^^^^^^^^^^^^^^^^^^^^
+
+EAS试图预测CPU在不久的将来会在哪个OPP下运行，以估算它们的能量消耗。为了做到
+这一点，它假定CPU的OPP跟随CPU利用率变化而变化。
+
+尽管在实践中很难对这一假设的准确性提供硬性保证（因为，举例来说硬件可能不会做
+它被要求做的事情），相对于其他CPUFreq监管器，schedutil至少_请求_使用利用率
+信号计算的频率。因此，与EAS一起使用的唯一合理的监管器是schedutil，因为它是
+唯一一个在频率请求和能量预测之间提供某种程度的一致性的监管器。
+
+不支持将EAS与schedutil之外的任何其它监管器一起使用。
+
+
+6.5 刻度不变性使用率信号
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+为了对不同的CPU和所有的性能状态做出准确的预测，EAS需要频率不变的和CPU不变的
+PELT信号。这些信号可以通过每个体系结构定义的arch_scale{cpu,freq}_capacity()
+回调函数获取。
+
+不支持在没有实现这两个回调函数的平台上使用EAS。
+
+
+6.6 多线程（SMT）
+^^^^^^^^^^^^^^^^^
+
+当前实现的EAS是不感知SMT的，因此无法利用多线程硬件节约能量。EAS认为线程是独立的
+CPU，这实际上对性能和能量消耗都是不利的。
+
+不支持在SMT上使用EAS。
diff --git a/Documentation/translations/zh_CN/scheduler/sched-nice-design.rst b/Documentation/translations/zh_CN/scheduler/sched-nice-design.rst
new file mode 100644
index 000000000000..9107f0c0b979
--- /dev/null
+++ b/Documentation/translations/zh_CN/scheduler/sched-nice-design.rst
@@ -0,0 +1,99 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/scheduler/sched-nice-design.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <[email protected]>
+
+=====================
+调度器nice值设计
+=====================
+
+本文档解释了新的Linux调度器中修改和精简后的nice级别的实现思路。
+
+Linux的nice级别总是非常脆弱，人们持续不断地缠着我们，让nice +19的任务占用
+更少的CPU时间。
+
+不幸的是，在旧的调度器中，这不是那么容易实现的（否则我们早就做到了），因为对
+nice级别的支持在历史上是与时间片长度耦合的，而时间片单位是由HZ滴答驱动的，
+所以最小的时间片是1/HZ。
+
+在O(1)调度器中（2003年），我们改变了负的nice级别，使它们比2.4内核更强
+（人们对这一变化很满意），而且我们还故意校正了线性时间片准则，使得nice +19
+的级别 _正好_ 是1 jiffy。为了让大家更好地理解它，时间片的图会是这样的（质量
+不佳的ASCII艺术提醒！）::
+
+
+                   A
+             \     | [timeslice length]
+              \    |
+               \   |
+                \  |
+                 \ |
+                  \|___100msecs
+                   |^ . _
+                   |      ^ . _
+                   |            ^ . _
+ -*----------------------------------*-----> [nice level]
+ -20               |                +19
+                   |
+                   |
+
+因此，如果有人真的想renice任务，相较线性规则，+19会给出更大的效果（改变
+ABI来扩展优先级的解决方案在早期就被放弃了）。
+
+这种方法在一定程度上奏效了一段时间，但后来HZ=1000时，它导致1 jiffy为
+1ms，这意味着0.1%的CPU使用率，我们认为这有点过度。过度 _不是_ 因为它表示
+的CPU使用率过小，而是因为它引发了过于频繁（每毫秒1次）的重新调度（因此会
+破坏缓存，等等。请记住，硬件更弱、cache更小是很久以前的事了，当时人们在
+nice +19级别运行数量颇多的应用程序）。
+
+因此，对于HZ=1000，我们将nice +19改为5毫秒，因为这感觉像是正确的最小
+粒度——这相当于5%的CPU利用率。但nice +19的根本的HZ敏感属性依然保持不变，
+我们没有收到过关于nice +19在CPU利用率方面太 _弱_ 的任何抱怨，我们只收到
+过它（依然）太 _强_ 的抱怨 :-)。
+
+总结一下：我们一直想让nice各级别一致性更强，但在HZ和jiffies的限制下，以及
+nice级别与时间片、调度粒度耦合是令人讨厌的设计，这一目标并不真正可行。
+
+第二个关于Linux nice级别支持的抱怨是（不那么频繁，但仍然定期发生），它
+在原点周围的不对称性（你可以在上面的图片中看到），或者更准确地说：事实上
+nice级别的行为取决于 _绝对的_ nice级别，而nice应用程序接口本身从根本上
+说是“相对”的：
+
+   int nice(int inc);
+
+   asmlinkage long sys_nice(int increment)
+
+（第一个是glibc的应用程序接口，第二个是syscall的应用程序接口）
+注意，“inc”是相对当前nice级别而言的，类似bash的“nice”命令等工具是这个
+相对性应用程序接口的镜像。
+
+在旧的调度器中，举例来说，如果你以nice +1启动一个任务，并以nice +2启动
+另一个任务，这两个任务的CPU分配将取决于父外壳程序的nice级别——如果它是
+nice -10，那么CPU的分配不同于+5或+10。
+
+第三个关于Linux nice级别支持的抱怨是，负数nice级别“不够有力”，以很多人
+不得不诉诸于实时调度优先级来运行音频（和其它多媒体）应用程序，比如
+SCHED_FIFO。但这也造成了其它问题：SCHED_FIFO未被证明是免于饥饿的，一个
+有问题的SCHED_FIFO应用程序也会锁住运行良好的系统。
+
+v2.6.23版内核的新调度器解决了这三种类型的抱怨：
+
+为了解决第一个抱怨（nice级别不够“有力”），调度器与“时间片”、HZ的概念
+解耦（调度粒度被处理成一个和nice级别独立的概念），因此有可能实现更好、
+更一致的nice +19支持：在新的调度器中，nice +19的任务得到一个HZ无关的
+1.5%CPU使用率，而不是旧版调度器中3%-5%-9%的可变范围。
+
+为了解决第二个抱怨（nice各级别不一致），新调度器令调用nice(1)对各任务的
+CPU利用率有相同的影响，无论其绝对nice级别如何。所以在新调度器上，运行一个
+nice +10和一个nice +11的任务会与运行一个nice -5和一个nice -4的任务的
+CPU利用率分割是相同的（一个会得到55%的CPU，另一个会得到45%）。这是为什么
+nice级别被改为“乘法”（或指数）——这样的话，不管你从哪个级别开始，“相对”
+结果将总是一样的。
+
+第三个抱怨（负数nice级别不够“有力”，并迫使音频应用程序在更危险的
+SCHED_FIFO调度策略下运行）几乎被新的调度器自动解决了：更强的负数级别
+具有重新校正nice级别动态范围的自动化副作用。
diff --git a/Documentation/translations/zh_CN/scheduler/sched-stats.rst b/Documentation/translations/zh_CN/scheduler/sched-stats.rst
new file mode 100644
index 000000000000..1c68c3d1c283
--- /dev/null
+++ b/Documentation/translations/zh_CN/scheduler/sched-stats.rst
@@ -0,0 +1,156 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/scheduler/sched-stats.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <[email protected]>
+
+==============
+调度器统计数据
+==============
+
+第15版schedstats去掉了sched_yield的一些计数器：yld_exp_empty，yld_act_empty
+和yld_both_empty。在其它方面和第14版完全相同。
+
+第14版schedstats包括对sched_domains（译注：调度域）的支持，该特性进入内核
+主线2.6.20，不过这一版schedstats与2.6.13-2.6.19内核的版本12的统计数据是完全
+相同的（内核未发布第13版）。有些计数器按每个运行队列统计是更有意义的，其它则
+按每个调度域统计是更有意义的。注意，调度域（以及它们的附属信息）仅在开启
+CONFIG_SMP的机器上是相关的和可用的。
+
+在第14版schedstat中，每个被列出的CPU至少会有一级域统计数据，且很可能有一个
+以上的域。在这个实现中，域没有特别的名字，但是编号最高的域通常在机器上所有的
+CPU上仲裁平衡，而domain0是最紧密聚焦的域，有时仅在一对CPU之间进行平衡。此时，
+没有任何体系结构需要3层以上的域。域统计数据中的第一个字段是一个位图，表明哪些
+CPU受该域的影响。
+
+这些字段是计数器，而且只能递增。使用这些字段的程序将需要从基线观测开始，然后在
+后续每一个观测中计算出计数器的变化。一个能以这种方式处理其中很多字段的perl脚本
+可见
+
+    http://eaglet.pdxhosts.com/rick/linux/schedstat/
+
+请注意，任何这样的脚本都必须是特定于版本的，改变版本的主要原因是输出格式的变化。
+对于那些希望编写自己的脚本的人，可以参考这里描述的各个字段。
+
+CPU统计数据
+-----------
+cpu<N> 1 2 3 4 5 6 7 8 9
+
+第一个字段是sched_yield()的统计数据：
+
+     1) sched_yield()被调用了#次
+
+接下来的三个是schedule()的统计数据：
+
+     2) 这个字段是一个过时的数组过期计数，在O(1)调度器中使用。为了ABI兼容性，
+	我们保留了它，但它总是被设置为0。
+     3) schedule()被调用了#次
+     4) 调用schedule()导致处理器变为空闲了#次
+
+接下来的两个是try_to_wake_up()的统计数据：
+
+     5) try_to_wake_up()被调用了#次
+     6) 调用try_to_wake_up()导致本地CPU被唤醒了#次
+
+接下来的三个统计数据描述了调度延迟：
+
+     7) 本处理器运行任务的总时间，单位是jiffies
+     8) 本处理器任务等待运行的时间，单位是jiffies
+     9) 本CPU运行了#个时间片
+
+域统计数据
+----------
+
+对于每个被描述的CPU，和它相关的每一个调度域均会产生下面一行数据（注意，如果
+CONFIG_SMP没有被定义，那么*没有*调度域被使用，这些行不会出现在输出中）。
+
+domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
+
+第一个字段是一个位掩码，表明该域在操作哪些CPU。
+
+接下来的24个字段是load_balance()函数的各个统计数据，按空闲类型分组（空闲，
+繁忙，新空闲）：
+
+
+    1)  当CPU空闲时，load_balance()在这个调度域中被调用了#次
+    2)  当CPU空闲时，load_balance()在这个调度域中被调用，但是发现负载无需
+        均衡#次
+    3)  当CPU空闲时，load_balance()在这个调度域中被调用，试图迁移1个或更多
+        任务且失败了#次
+    4)  当CPU空闲时，load_balance()在这个调度域中被调用，发现不均衡（如果有）
+        #次
+    5)  当CPU空闲时，pull_task()在这个调度域中被调用#次
+    6)  当CPU空闲时，尽管目标任务是热缓存状态，pull_task()依然被调用#次
+    7)  当CPU空闲时，load_balance()在这个调度域中被调用，未能找到更繁忙的
+        队列#次
+    8)  当CPU空闲时，在调度域中找到了更繁忙的队列，但未找到更繁忙的调度组
+        #次
+    9)  当CPU繁忙时，load_balance()在这个调度域中被调用了#次
+    10) 当CPU繁忙时，load_balance()在这个调度域中被调用，但是发现负载无需
+        均衡#次
+    11) 当CPU繁忙时，load_balance()在这个调度域中被调用，试图迁移1个或更多
+        任务且失败了#次
+    12) 当CPU繁忙时，load_balance()在这个调度域中被调用，发现不均衡（如果有）
+        #次
+    13) 当CPU繁忙时，pull_task()在这个调度域中被调用#次
+    14) 当CPU繁忙时，尽管目标任务是热缓存状态，pull_task()依然被调用#次
+    15) 当CPU繁忙时，load_balance()在这个调度域中被调用，未能找到更繁忙的
+        队列#次
+    16) 当CPU繁忙时，在调度域中找到了更繁忙的队列，但未找到更繁忙的调度组
+        #次
+    17) 当CPU新空闲时，load_balance()在这个调度域中被调用了#次
+    18) 当CPU新空闲时，load_balance()在这个调度域中被调用，但是发现负载无需
+        均衡#次
+    19) 当CPU新空闲时，load_balance()在这个调度域中被调用，试图迁移1个或更多
+        任务且失败了#次
+    20) 当CPU新空闲时，load_balance()在这个调度域中被调用，发现不均衡（如果有）
+        #次
+    21) 当CPU新空闲时，pull_task()在这个调度域中被调用#次
+    22) 当CPU新空闲时，尽管目标任务是热缓存状态，pull_task()依然被调用#次
+    23) 当CPU新空闲时，load_balance()在这个调度域中被调用，未能找到更繁忙的
+        队列#次
+    24) 当CPU新空闲时，在调度域中找到了更繁忙的队列，但未找到更繁忙的调度组
+        #次
+
+接下来的3个字段是active_load_balance()函数的各个统计数据：
+
+    25) active_load_balance()被调用了#次
+    26) active_load_balance()被调用，试图迁移1个或更多任务且失败了#次
+    27) active_load_balance()被调用，成功迁移了#次任务
+
+接下来的3个字段是sched_balance_exec()函数的各个统计数据：
+
+    28) sbe_cnt不再被使用
+    29) sbe_balanced不再被使用
+    30) sbe_pushed不再被使用
+
+接下来的3个字段是sched_balance_fork()函数的各个统计数据：
+
+    31) sbf_cnt不再被使用
+    32) sbf_balanced不再被使用
+    33) sbf_pushed不再被使用
+
+接下来的3个字段是try_to_wake_up()函数的各个统计数据：
+
+    34) 在这个调度域中调用try_to_wake_up()唤醒任务时，任务在调度域中一个
+        和上次运行不同的新CPU上运行了#次
+    35) 在这个调度域中调用try_to_wake_up()唤醒任务时，任务被迁移到发生唤醒
+        的CPU次数为#，因为该任务在原CPU是冷缓存状态
+    36) 在这个调度域中调用try_to_wake_up()唤醒任务时，引发被动负载均衡#次
+
+/proc/<pid>/schedstat
+---------------------
+schedstats还添加了一个新的/proc/<pid>/schedstat文件，来提供一些进程级的
+相同信息。这个文件中，有三个字段与该进程相关：
+
+     1) 在CPU上运行花费的时间
+     2) 在运行队列上等待的时间
+     3) 在CPU上运行了#个时间片
+
+可以很容易地编写一个程序，利用这些额外的字段来报告一个特定的进程或一组进程在
+调度器策略下的表现如何。这样的程序的一个简单版本可在下面的链接找到
+
+    http://eaglet.pdxhosts.com/rick/linux/schedstat/v12/latency.c
diff --git a/Documentation/translations/zh_CN/vm/active_mm.rst b/Documentation/translations/zh_CN/vm/active_mm.rst
new file mode 100644
index 000000000000..366609ea4f37
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/active_mm.rst
@@ -0,0 +1,85 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/active_mm.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+
+=========
+Active MM
+=========
+
+这是一封linux之父回复开发者的一封邮件，所以翻译时我尽量保持邮件格式的完整。
+
+::
+
+ List:       linux-kernel
+ Subject:    Re: active_mm
+ From:       Linus Torvalds <torvalds () transmeta ! com>
+ Date:       1999-07-30 21:36:24
+
+ 因为我并不经常写解释，所以已经抄送到linux-kernel邮件列表，而当我做这些，
+ 且更多的人在阅读它们时，我觉得棒极了。
+
+ 1999年7月30日 星期五， David Mosberger 写道：
+ >
+ > 是否有一个简短的描述，说明task_struct中的
+ >  "mm" 和 "active_mm"应该如何使用？ (如果
+ > 这个问题在邮件列表中讨论过，我表示歉意--我刚
+ > 刚度假回来，有一段时间没能关注linux-kernel了）。
+
+ 基本上，新的设定是：
+
+  - 我们有“真实地址空间”和“匿名地址空间”。区别在于，匿名地址空间根本不关心用
+    户级页表，所以当我们做上下文切换到匿名地址空间时，我们只是让以前的地址空间
+    处于活动状态。
+
+    一个“匿名地址空间”的明显用途是任何不需要任何用户映射的线程--所有的内核线
+    程基本上都属于这一类，但即使是“真正的”线程也可以暂时说在一定时间内它们不
+    会对用户空间感兴趣，调度器不妨试着避免在切换VM状态上浪费时间。目前只有老
+    式的bdflush sync能做到这一点。
+
+  - “tsk->mm” 指向 “真实地址空间”。对于一个匿名进程来说，tsk->mm将是NULL，
+    其逻辑原因是匿名进程实际上根本就 “没有” 真正的地址空间。
+
+  - 然而，我们显然需要跟踪我们为这样的匿名用户“偷用”了哪个地址空间。为此，我们
+    有 “tsk->active_mm”，它显示了当前活动的地址空间是什么。
+
+    规则是，对于一个有真实地址空间的进程（即tsk->mm是 non-NULL），active_mm
+    显然必须与真实的mm相同。
+
+    对于一个匿名进程，tsk->mm == NULL，而tsk->active_mm是匿名进程运行时
+    “借用”的mm。当匿名进程被调度走时，借用的地址空间被返回并清除。
+
+ 为了支持所有这些，“struct mm_struct”现在有两个计数器：一个是 “mm_users”
+ 计数器，即有多少 “真正的地址空间用户”，另一个是 “mm_count”计数器，即 “lazy”
+ 用户（即匿名用户）的数量，如果有任何真正的用户，则加1。
+
+ 通常情况下，至少有一个真正的用户，但也可能是真正的用户在另一个CPU上退出，而
+ 一个lazy的用户仍在活动，所以你实际上得到的情况是，你有一个地址空间 **只**
+ 被lazy的用户使用。这通常是一个短暂的生命周期状态，因为一旦这个线程被安排给一
+ 个真正的线程，这个 “僵尸” mm就会被释放，因为 “mm_count”变成了零。
+
+ 另外，一个新的规则是，**没有人** 再把 “init_mm” 作为一个真正的MM了。
+ “init_mm”应该被认为只是一个 “没有其他上下文时的lazy上下文”，事实上，它主
+ 要是在启动时使用，当时还没有真正的VM被创建。因此，用来检查的代码
+
+   if (current->mm == &init_mm)
+
+ 一般来说，应该用
+
+   if (!current->mm)
+
+ 取代上面的写法（这更有意义--测试基本上是 “我们是否有一个用户环境”，并且通常
+ 由缺页异常处理程序和类似的东西来完成）。
+
+ 总之，我刚才在ftp.kernel.org上放了一个pre-patch-2.3.13-1，因为它稍微改
+ 变了接口以适配alpha（谁会想到呢，但alpha体系结构上下文切换代码实际上最终是
+ 最丑陋的之一--不像其他架构的MM和寄存器状态是分开的，alpha的PALcode将两者
+ 连接起来，你需要同时切换两者）。
+
+ (文档来源 http://marc.info/?l=linux-kernel&m=93337278602211&w=2)
diff --git a/Documentation/translations/zh_CN/vm/balance.rst b/Documentation/translations/zh_CN/vm/balance.rst
new file mode 100644
index 000000000000..e98a47ef24a8
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/balance.rst
@@ -0,0 +1,81 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/balance.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+
+========
+内存平衡
+========
+
+2000年1月开始，作者：Kanoj Sarcar <[email protected]>
+
+对于 !__GFP_HIGH 和 !__GFP_KSWAPD_RECLAIM 以及非 __GFP_IO 的分配，需要进行
+内存平衡。
+
+调用者避免回收的第一个原因是调用者由于持有自旋锁或处于中断环境中而无法睡眠。第二个
+原因可能是，调用者愿意在不产生页面回收开销的情况下分配失败。这可能发生在有0阶回退
+选项的机会主义高阶分配请求中。在这种情况下，调用者可能也希望避免唤醒kswapd。
+
+__GFP_IO分配请求是为了防止文件系统死锁。
+
+在没有非睡眠分配请求的情况下，做平衡似乎是有害的。页面回收可以被懒散地启动，也就是
+说，只有在需要的时候（也就是区域的空闲内存为0），而不是让它成为一个主动的过程。
+
+也就是说，内核应该尝试从直接映射池中满足对直接映射页的请求，而不是回退到dma池中，
+这样就可以保持dma池为dma请求（不管是不是原子的）所填充。类似的争论也适用于高内存
+和直接映射的页面。相反，如果有很多空闲的dma页，最好是通过从dma池中分配一个来满足
+常规的内存请求，而不是产生常规区域平衡的开销。
+
+在2.2中，只有当空闲页总数低于总内存的1/64时，才会启动内存平衡/页面回收。如果dma
+和常规内存的比例合适，即使dma区完全空了，也很可能不会进行平衡。2.2已经在不同内存
+大小的生产机器上运行，即使有这个问题存在，似乎也做得不错。在2.3中，由于HIGHMEM的
+存在，这个问题变得更加严重。
+
+在2.3中，区域平衡可以用两种方式之一来完成：根据区域的大小（可能是低级区域的大小），
+我们可以在初始化阶段决定在平衡任何区域时应该争取多少空闲页。好的方面是，在平衡的时
+候，我们不需要看低级区的大小，坏的方面是，我们可能会因为忽略低级区可能较低的使用率
+而做过于频繁的平衡。另外，只要对分配程序稍作修改，就有可能将memclass()宏简化为一
+个简单的等式。
+
+另一个可能的解决方案是，我们只在一个区 **和** 其所有低级区的空闲内存低于该区及其
+低级区总内存的1/64时进行平衡。这就解决了2.2的平衡问题，并尽可能地保持了与2.2行为
+的接近。另外，平衡算法在各种架构上的工作方式也是一样的，这些架构有不同数量和类型的
+内存区。如果我们想变得更花哨一点，我们可以在未来为不同区域的自由页面分配不同的权重。
+
+请注意，如果普通区的大小与dma区相比是巨大的，那么在决定是否平衡普通区的时候，考虑
+空闲的dma页就变得不那么重要了。那么第一个解决方案就变得更有吸引力。
+
+所附的补丁实现了第二个解决方案。它还 “修复”了两个问题：首先，在低内存条件下，kswapd
+被唤醒，就像2.2中的非睡眠分配。第二，HIGHMEM区也被平衡了，以便给replace_with_highmem()
+一个争取获得HIGHMEM页的机会，同时确保HIGHMEM分配不会落回普通区。这也确保了HIGHMEM
+页不会被泄露（例如，在一个HIGHMEM页在交换缓存中但没有被任何人使用的情况下）。
+
+kswapd还需要知道它应该平衡哪些区。kswapd主要是在无法进行平衡的情况下需要的，可能
+是因为所有的分配请求都来自中断上下文，而所有的进程上下文都在睡眠。对于2.3，
+kswapd并不真正需要平衡高内存区，因为中断上下文并不请求高内存页。kswapd看zone
+结构体中的zone_wake_kswapd字段来决定一个区是否需要平衡。
+
+如果从进程内存和shm中偷取页面可以减轻该页面节点中任何区的内存压力，而该区的内存压力
+已经低于其水位，则会进行偷取。
+
+watemark[WMARK_MIN/WMARK_LOW/WMARK_HIGH]/low_on_memory/zone_wake_kswapd：
+这些是每个区的字段，用于确定一个区何时需要平衡。当页面数低于水位[WMARK_MIN]时，
+hysteric 的字段low_on_memory被设置。这个字段会一直被设置，直到空闲页数变成水位
+[WMARK_HIGH]。当low_on_memory被设置时，页面分配请求将尝试释放该区域的一些页面（如果
+请求中设置了GFP_WAIT）。与此相反的是，决定唤醒kswapd以释放一些区的页。这个决定不是基于
+hysteresis 的，而是当空闲页的数量低于watermark[WMARK_LOW]时就会进行；在这种情况下，
+zone_wake_kswapd也被设置。
+
+
+我所听到的（超棒的）想法：
+
+1. 动态经历应该影响平衡：可以跟踪一个区的失败请求的数量，并反馈到平衡方案中（[email protected]）。
+
+2. 实现一个类似于replace_with_highmem()的replace_with_regular()，以保留dma页面。
+   ([email protected])
diff --git a/Documentation/translations/zh_CN/vm/damon/api.rst b/Documentation/translations/zh_CN/vm/damon/api.rst
new file mode 100644
index 000000000000..21143eea4ebe
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/damon/api.rst
@@ -0,0 +1,32 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Original: Documentation/vm/damon/api.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+
+=======
+API参考
+=======
+
+内核空间的程序可以使用下面的API来使用DAMON的每个功能。你所需要做的就是引用 ``damon.h`` ，
+它位于源代码树的include/linux/。
+
+结构体
+======
+
+该API在以下内核代码中:
+
+include/linux/damon.h
+
+
+函数
+====
+
+该API在以下内核代码中:
+
+mm/damon/core.c
diff --git a/Documentation/translations/zh_CN/vm/damon/design.rst b/Documentation/translations/zh_CN/vm/damon/design.rst
new file mode 100644
index 000000000000..05f66c02740a
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/damon/design.rst
@@ -0,0 +1,139 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Original: Documentation/vm/damon/design.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+
+====
+设计
+====
+
+可配置的层
+==========
+
+DAMON提供了数据访问监控功能，同时使其准确性和开销可控。基本的访问监控需要依赖于目标地址空间
+并为之优化的基元。另一方面，作为DAMON的核心，准确性和开销的权衡机制是在纯逻辑空间中。DAMON
+将这两部分分离在不同的层中，并定义了它的接口，以允许各种低层次的基元实现与核心逻辑的配置。
+
+由于这种分离的设计和可配置的接口，用户可以通过配置核心逻辑和适当的低级基元实现来扩展DAMON的
+任何地址空间。如果没有提供合适的，用户可以自己实现基元。
+
+例如，物理内存、虚拟内存、交换空间、那些特定的进程、NUMA节点、文件和支持的内存设备将被支持。
+另外，如果某些架构或设备支持特殊的优化访问检查基元，这些基元将很容易被配置。
+
+
+特定地址空间基元的参考实现
+==========================
+
+基本访问监测的低级基元被定义为两部分。:
+
+1. 确定地址空间的监测目标地址范围
+2. 目标空间中特定地址范围的访问检查。
+
+DAMON目前为物理和虚拟地址空间提供了基元的实现。下面两个小节描述了这些工作的方式。
+
+
+基于VMA的目标地址范围构造
+-------------------------
+
+这仅仅是针对虚拟地址空间基元的实现。对于物理地址空间，只是要求用户手动设置监控目标地址范围。
+
+在进程的超级巨大的虚拟地址空间中，只有小部分被映射到物理内存并被访问。因此，跟踪未映射的地
+址区域只是一种浪费。然而，由于DAMON可以使用自适应区域调整机制来处理一定程度的噪声，所以严
+格来说，跟踪每一个映射并不是必须的，但在某些情况下甚至会产生很高的开销。也就是说，监测目标
+内部过于巨大的未映射区域应该被移除，以不占用自适应机制的时间。
+
+出于这个原因，这个实现将复杂的映射转换为三个不同的区域，覆盖地址空间的每个映射区域。这三个
+区域之间的两个空隙是给定地址空间中两个最大的未映射区域。这两个最大的未映射区域是堆和最上面
+的mmap()区域之间的间隙，以及在大多数情况下最下面的mmap()区域和堆之间的间隙。因为这些间隙
+在通常的地址空间中是异常巨大的，排除这些间隙就足以做出合理的权衡。下面详细说明了这一点::
+
+    <heap>
+    <BIG UNMAPPED REGION 1>
+    <uppermost mmap()-ed region>
+    (small mmap()-ed regions and munmap()-ed regions)
+    <lowermost mmap()-ed region>
+    <BIG UNMAPPED REGION 2>
+    <stack>
+
+
+基于PTE访问位的访问检查
+-----------------------
+
+物理和虚拟地址空间的实现都使用PTE Accessed-bit进行基本访问检查。唯一的区别在于从地址中
+找到相关的PTE访问位的方式。虚拟地址的实现是为该地址的目标任务查找页表，而物理地址的实现则
+是查找与该地址有映射关系的每一个页表。通过这种方式，实现者找到并清除下一个采样目标地址的位，
+并检查该位是否在一个采样周期后再次设置。这可能会干扰其他使用访问位的内核子系统，即空闲页跟
+踪和回收逻辑。为了避免这种干扰，DAMON使其与空闲页面跟踪相互排斥，并使用 ``PG_idle`` 和
+``PG_young`` 页面标志来解决与回收逻辑的冲突，就像空闲页面跟踪那样。
+
+
+独立于地址空间的核心机制
+========================
+
+下面四个部分分别描述了DAMON的核心机制和五个监测属性，即 ``采样间隔`` 、 ``聚集间隔`` 、
+``区域更新间隔`` 、 ``最小区域数`` 和 ``最大区域数`` 。
+
+
+访问频率监测
+------------
+
+DAMON的输出显示了在给定的时间内哪些页面的访问频率是多少。访问频率的分辨率是通过设置
+``采样间隔`` 和 ``聚集间隔`` 来控制的。详细地说，DAMON检查每个 ``采样间隔`` 对每
+个页面的访问，并将结果汇总。换句话说，计算每个页面的访问次数。在每个 ``聚合间隔`` 过
+去后，DAMON调用先前由用户注册的回调函数，以便用户可以阅读聚合的结果，然后再清除这些结
+果。这可以用以下简单的伪代码来描述::
+
+    while monitoring_on:
+        for page in monitoring_target:
+            if accessed(page):
+                nr_accesses[page] += 1
+        if time() % aggregation_interval == 0:
+            for callback in user_registered_callbacks:
+                callback(monitoring_target, nr_accesses)
+            for page in monitoring_target:
+                nr_accesses[page] = 0
+        sleep(sampling interval)
+
+这种机制的监测开销将随着目标工作负载规模的增长而任意增加。
+
+
+基于区域的抽样调查
+------------------
+
+为了避免开销的无限制增加，DAMON将假定具有相同访问频率的相邻页面归入一个区域。只要保持
+这个假设（一个区域内的页面具有相同的访问频率），该区域内就只需要检查一个页面。因此，对
+于每个 ``采样间隔`` ，DAMON在每个区域中随机挑选一个页面，等待一个 ``采样间隔`` ，检
+查该页面是否同时被访问，如果被访问则增加该区域的访问频率。因此，监测开销是可以通过设置
+区域的数量来控制的。DAMON允许用户设置最小和最大的区域数量来进行权衡。
+
+然而，如果假设没有得到保证，这个方案就不能保持输出的质量。
+
+
+适应性区域调整
+--------------
+
+即使最初的监测目标区域被很好地构建以满足假设（同一区域内的页面具有相似的访问频率），数
+据访问模式也会被动态地改变。这将导致监测质量下降。为了尽可能地保持假设，DAMON根据每个
+区域的访问频率自适应地进行合并和拆分。
+
+对于每个 ``聚集区间`` ，它比较相邻区域的访问频率，如果频率差异较小，就合并这些区域。
+然后，在它报告并清除每个区域的聚合接入频率后，如果区域总数不超过用户指定的最大区域数，
+它将每个区域拆分为两个或三个区域。
+
+通过这种方式，DAMON提供了其最佳的质量和最小的开销，同时保持了用户为其权衡设定的界限。
+
+
+动态目标空间更新处理
+--------------------
+
+监测目标地址范围可以动态改变。例如，虚拟内存可以动态地被映射和解映射。物理内存可以被
+热插拔。
+
+由于在某些情况下变化可能相当频繁，DAMON检查动态内存映射的变化，并仅在用户指定的时间
+间隔（ ``区域更新间隔`` ）内将其应用于抽象的目标区域。
diff --git a/Documentation/translations/zh_CN/vm/damon/faq.rst b/Documentation/translations/zh_CN/vm/damon/faq.rst
new file mode 100644
index 000000000000..07b4ac19407d
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/damon/faq.rst
@@ -0,0 +1,48 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Original: Documentation/vm/damon/faq.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+
+========
+常见问题
+========
+
+为什么是一个新的子系统，而不是扩展perf或其他用户空间工具？
+==========================================================
+
+首先，因为它需要尽可能的轻量级，以便可以在线使用，所以应该避免任何不必要的开销，如内核-用户
+空间的上下文切换成本。第二，DAMON的目标是被包括内核在内的其他程序所使用。因此，对特定工具
+（如perf）的依赖性是不可取的。这就是DAMON在内核空间实现的两个最大的原因。
+
+
+“闲置页面跟踪” 或 “perf mem” 可以替代DAMON吗？
+==============================================
+
+闲置页跟踪是物理地址空间访问检查的一个低层次的原始方法。“perf mem”也是类似的，尽管它可以
+使用采样来减少开销。另一方面，DAMON是一个更高层次的框架，用于监控各种地址空间。它专注于内
+存管理优化，并提供复杂的精度/开销处理机制。因此，“空闲页面跟踪” 和 “perf mem” 可以提供
+DAMON输出的一个子集，但不能替代DAMON。
+
+
+DAMON是否只支持虚拟内存？
+=========================
+
+不，DAMON的核心是独立于地址空间的。用户可以在DAMON核心上实现和配置特定地址空间的低级原始
+部分，包括监测目标区域的构造和实际的访问检查。通过这种方式，DAMON用户可以用任何访问检查技
+术来监测任何地址空间。
+
+尽管如此，DAMON默认为虚拟内存和物理内存提供了基于vma/rmap跟踪和PTE访问位检查的地址空间
+相关功能的实现，以供参考和方便使用。
+
+
+我可以简单地监测页面的粒度吗？
+==============================
+
+是的，你可以通过设置 ``min_nr_regions`` 属性高于工作集大小除以页面大小的值来实现。
+因为监视目标区域的大小被强制为 ``>=page size`` ，所以区域分割不会产生任何影响。
diff --git a/Documentation/translations/zh_CN/vm/damon/index.rst b/Documentation/translations/zh_CN/vm/damon/index.rst
new file mode 100644
index 000000000000..84d36d90c9b0
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/damon/index.rst
@@ -0,0 +1,33 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Original: Documentation/vm/damon/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+
+==========================
+DAMON:数据访问监视器
+==========================
+
+DAMON是Linux内核的一个数据访问监控框架子系统。DAMON的核心机制使其成为
+（该核心机制详见(Documentation/translations/zh_CN/vm/damon/design.rst)）
+
+ - *准确度* （监测输出对DRAM级别的内存管理足够有用；但可能不适合CPU Cache级别），
+ - *轻量级* （监控开销低到可以在线应用），以及
+ - *可扩展* （无论目标工作负载的大小，开销的上限值都在恒定范围内）。
+
+因此，利用这个框架，内核的内存管理机制可以做出高级决策。会导致高数据访问监控开销的实
+验性内存管理优化工作可以再次进行。同时，在用户空间，有一些特殊工作负载的用户可以编写
+个性化的应用程序，以便更好地了解和优化他们的工作负载和系统。
+
+.. toctree::
+   :maxdepth: 2
+
+   faq
+   design
+   api
+
diff --git a/Documentation/translations/zh_CN/vm/free_page_reporting.rst b/Documentation/translations/zh_CN/vm/free_page_reporting.rst
new file mode 100644
index 000000000000..31d6c34b956b
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/free_page_reporting.rst
@@ -0,0 +1,38 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/_free_page_reporting.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+==========
+空闲页报告
+==========
+
+空闲页报告是一个API，设备可以通过它来注册接收系统当前未使用的页面列表。这在虚拟
+化的情况下是很有用的，客户机能够使用这些数据来通知管理器它不再使用内存中的某些页
+面。
+
+对于驱动，通常是气球驱动要使用这个功能，它将分配和初始化一个page_reporting_dev_info
+结构体。它要填充的结构体中的字段是用于处理散点列表的 "report" 函数指针。它还必
+须保证每次调用该函数时能处理至少相当于PAGE_REPORTING_CAPACITY的散点列表条目。
+假设没有其他页面报告设备已经注册， 对page_reporting_register的调用将向报告框
+架注册页面报告接口。
+
+一旦注册，页面报告API将开始向驱动报告成批的页面。API将在接口被注册后2秒开始报告
+页面，并在任何足够高的页面被释放之后2秒继续报告。
+
+报告的页面将被存储在传递给报告函数的散列表中，最后一个条目的结束位被设置在条目
+nent-1中。 当页面被报告函数处理时，分配器将无法访问它们。一旦报告函数完成，这些
+页将被返回到它们所获得的自由区域。
+
+在移除使用空闲页报告的驱动之前，有必要调用page_reporting_unregister，以移除
+目前被空闲页报告使用的page_reporting_dev_info结构体。这样做将阻止进一步的报
+告通过该接口发出。如果另一个驱动或同一驱动被注册，它就有可能恢复前一个驱动在报告
+空闲页方面的工作。
+
+
+Alexander Duyck, 2019年12月04日
diff --git a/Documentation/translations/zh_CN/vm/highmem.rst b/Documentation/translations/zh_CN/vm/highmem.rst
new file mode 100644
index 000000000000..018838e58c3e
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/highmem.rst
@@ -0,0 +1,128 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/highmem.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+==========
+高内存处理
+==========
+
+作者: Peter Zijlstra <[email protected]>
+
+.. contents:: :local:
+
+高内存是什么？
+==============
+
+当物理内存的大小接近或超过虚拟内存的最大大小时，就会使用高内存（highmem）。在这一点上，内
+核不可能在任何时候都保持所有可用的物理内存的映射。这意味着内核需要开始使用它想访问的物理内
+存的临时映射。
+
+没有被永久映射覆盖的那部分（物理）内存就是我们所说的 "高内存"。对于这个边界的确切位置，有
+各种架构上的限制。
+
+例如，在i386架构中，我们选择将内核映射到每个进程的虚拟空间，这样我们就不必为内核的进入/退
+出付出全部的TLB作废代价。这意味着可用的虚拟内存空间（i386上为4GiB）必须在用户和内核空间之
+间进行划分。
+
+使用这种方法的架构的传统分配方式是3:1，3GiB用于用户空间，顶部的1GiB用于内核空间。::
+
+		+--------+ 0xffffffff
+		| Kernel |
+		+--------+ 0xc0000000
+		|        |
+		| User   |
+		|        |
+		+--------+ 0x00000000
+
+这意味着内核在任何时候最多可以映射1GiB的物理内存，但是由于我们需要虚拟地址空间来做其他事
+情--包括访问其余物理内存的临时映射--实际的直接映射通常会更少（通常在~896MiB左右）。
+
+其他有mm上下文标签的TLB的架构可以有独立的内核和用户映射。然而，一些硬件（如一些ARM）在使
+用mm上下文标签时，其虚拟空间有限。
+
+
+临时虚拟映射
+============
+
+内核包含几种创建临时映射的方法。:
+
+* vmap().  这可以用来将多个物理页长期映射到一个连续的虚拟空间。它需要synchronization
+  来解除映射。
+
+* kmap().  这允许对单个页面进行短期映射。它需要synchronization，但在一定程度上被摊销。
+  当以嵌套方式使用时，它也很容易出现死锁，因此不建议在新代码中使用它。
+
+* kmap_atomic().  这允许对单个页面进行非常短的时间映射。由于映射被限制在发布它的CPU上，
+  它表现得很好，但发布任务因此被要求留在该CPU上直到它完成，以免其他任务取代它的映射。
+
+  kmap_atomic() 也可以由中断上下文使用，因为它不睡眠，而且调用者可能在调用kunmap_atomic()
+  之后才睡眠。
+
+  可以假设k[un]map_atomic()不会失败。
+
+
+使用kmap_atomic
+===============
+
+何时何地使用 kmap_atomic() 是很直接的。当代码想要访问一个可能从高内存（见__GFP_HIGHMEM）
+分配的页面的内容时，例如在页缓存中的页面，就会使用它。该API有两个函数，它们的使用方式与
+下面类似::
+
+	/* 找到感兴趣的页面。 */
+	struct page *page = find_get_page(mapping, offset);
+
+	/* 获得对该页内容的访问权。 */
+	void *vaddr = kmap_atomic(page);
+
+	/* 对该页的内容做一些处理。 */
+	memset(vaddr, 0, PAGE_SIZE);
+
+	/* 解除该页面的映射。 */
+	kunmap_atomic(vaddr);
+
+注意，kunmap_atomic()调用的是kmap_atomic()调用的结果而不是参数。
+
+如果你需要映射两个页面，因为你想从一个页面复制到另一个页面，你需要保持kmap_atomic调用严
+格嵌套，如::
+
+	vaddr1 = kmap_atomic(page1);
+	vaddr2 = kmap_atomic(page2);
+
+	memcpy(vaddr1, vaddr2, PAGE_SIZE);
+
+	kunmap_atomic(vaddr2);
+	kunmap_atomic(vaddr1);
+
+
+临时映射的成本
+==============
+
+创建临时映射的代价可能相当高。体系架构必须操作内核的页表、数据TLB和/或MMU的寄存器。
+
+如果CONFIG_HIGHMEM没有被设置，那么内核会尝试用一点计算来创建映射，将页面结构地址转换成
+指向页面内容的指针，而不是去捣鼓映射。在这种情况下，解映射操作可能是一个空操作。
+
+如果CONFIG_MMU没有被设置，那么就不可能有临时映射和高内存。在这种情况下，也将使用计算方法。
+
+
+i386 PAE
+========
+
+在某些情况下，i386 架构将允许你在 32 位机器上安装多达 64GiB 的内存。但这有一些后果:
+
+* Linux需要为系统中的每个页面建立一个页帧结构，而且页帧需要驻在永久映射中，这意味着：
+
+* 你最多可以有896M/sizeof(struct page)页帧；由于页结构体是32字节的，所以最终会有
+  112G的页；然而，内核需要在内存中存储更多的页帧......
+
+* PAE使你的页表变大--这使系统变慢，因为更多的数据需要在TLB填充等方面被访问。一个好处
+  是，PAE有更多的PTE位，可以提供像NX和PAT这样的高级功能。
+
+一般的建议是，你不要在32位机器上使用超过8GiB的空间--尽管更多的空间可能对你和你的工作
+量有用，但你几乎是靠你自己--不要指望内核开发者真的会很关心事情的进展情况。
diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst
new file mode 100644
index 000000000000..a1d2f0356cc1
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/index.rst
@@ -0,0 +1,53 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+:校译:
+
+=================
+Linux内存管理文档
+=================
+
+这是一个关于Linux内存管理（mm）子系统内部的文档集，其中有不同层次的细节，包括注释
+和邮件列表的回复，用于阐述数据结构和算法的基本情况。如果你正在寻找关于简单分配内存的建
+议，请参阅(Documentation/translations/zh_CN/core-api/memory-allocation.rst)。
+对于控制和调整指南，请参阅(Documentation/admin-guide/mm/index)。
+TODO:待引用文档集被翻译完毕后请及时修改此处）
+
+.. toctree::
+   :maxdepth: 1
+
+   active_mm
+   balance
+   damon/index
+   free_page_reporting
+   highmem
+   ksm
+
+TODOLIST:
+* arch_pgtable_helpers
+* free_page_reporting
+* frontswap
+* hmm
+* hwpoison
+* hugetlbfs_reserv
+* memory-model
+* mmu_notifier
+* numa
+* overcommit-accounting
+* page_migration
+* page_frags
+* page_owner
+* page_table_check
+* remap_file_pages
+* slub
+* split_page_table_lock
+* transhuge
+* unevictable-lru
+* vmalloced-kernel-stacks
+* z3fold
+* zsmalloc
diff --git a/Documentation/translations/zh_CN/vm/ksm.rst b/Documentation/translations/zh_CN/vm/ksm.rst
new file mode 100644
index 000000000000..83b0c73984da
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/ksm.rst
@@ -0,0 +1,70 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/ksm.rst
+
+:翻译:
+
+   徐鑫 xu xin <[email protected]>
+
+============
+内核同页合并
+============
+
+KSM 是一种节省内存的数据去重功能，由CONFIG_KSM=y启用，并在2.6.32版本时被添加
+到Linux内核。详见 ``mm/ksm.c`` 的实现，以及http://lwn.net/Articles/306704和
+https://lwn.net/Articles/330589
+
+KSM的用户空间的接口在Documentation/translations/zh_CN/admin-guide/mm/ksm.rst
+文档中有描述。
+
+设计
+====
+
+概述
+----
+
+概述内容请见mm/ksm.c文档中的“DOC: Overview”
+
+逆映射
+------
+KSM维护着稳定树中的KSM页的逆映射信息。
+
+当KSM页面的共享数小于 ``max_page_sharing`` 的虚拟内存区域(VMAs)时，则代表了
+KSM页的稳定树其中的节点指向了一个rmap_item结构体类型的列表。同时，这个KSM页
+的 ``page->mapping`` 指向了该稳定树节点。
+
+如果共享数超过了阈值，KSM将给稳定树添加第二个维度。稳定树就变成链接一个或多
+个稳定树"副本"的"链"。每个副本都保留KSM页的逆映射信息，其中 ``page->mapping``
+指向该"副本"。
+
+每个链以及链接到该链中的所有"副本"强制不变的是，它们代表了相同的写保护内存
+内容，尽管任中一个"副本"是由同一片内存区的不同的KSM复制页所指向的。
+
+这样一来，相比与无限的逆映射链表，稳定树的查找计算复杂性不受影响。但在稳定树
+本身中不能有重复的KSM页面内容仍然是强制要求。
+
+由 ``max_page_sharing`` 强制决定的数据去重限制是必要的，以此来避免虚拟内存
+rmap链表变得过大。rmap的遍历具有O(N)的复杂度，其中N是共享页面的rmap_项（即
+虚拟映射）的数量，而这个共享页面的节点数量又被 ``max_page_sharing`` 所限制。
+因此，这有效地将线性O(N)计算复杂度从rmap遍历中分散到不同的KSM页面上。ksmd进
+程在稳定节点"链"上的遍历也是O(N)，但这个N是稳定树"副本"的数量，而不是rmap项
+的数量，因此它对ksmd性能没有显著影响。实际上，最佳稳定树"副本"的候选节点将
+保留在"副本"列表的开头。
+
+``max_page_sharing`` 的值设置得高了会促使更快的内存合并（因为将有更少的稳定
+树副本排队进入稳定节点chain->hlist）和更高的数据去重系数，但代价是在交换、压
+缩、NUMA平衡和页面迁移过程中可能导致KSM页的最大rmap遍历速度较慢。
+
+``stable_node_dups/stable_node_chains`` 的比值还受 ``max_page_sharing`` 调控
+的影响，高比值可能意味着稳定节点dup中存在碎片，这可以通过在ksmd中引入碎片算
+法来解决，该算法将rmap项从一个稳定节点dup重定位到另一个稳定节点dup，以便释放
+那些仅包含极少rmap项的稳定节点"dup"，但这可能会增加ksmd进程的CPU使用率，并可
+能会减慢应用程序在KSM页面上的只读计算。
+
+KSM会定期扫描稳定节点"链"中链接的所有稳定树"副本"，以便删减过时了的稳定节点。
+这种扫描的频率由 ``stable_node_chains_prune_millisecs`` 这个sysfs 接口定义。
+
+参考
+====
+内核代码请见mm/ksm.c。
+涉及的函数(mm_slot  ksm_scan  stable_node  rmap_item)。
diff --git a/Documentation/translations/zh_TW/index.rst b/Documentation/translations/zh_TW/index.rst
index f56f78ba7860..e1ce9d8c06f8 100644
--- a/Documentation/translations/zh_TW/index.rst
+++ b/Documentation/translations/zh_TW/index.rst
@@ -5,7 +5,7 @@
 	\renewcommand\thesection*
 	\renewcommand\thesubsection*
 	\kerneldocCJKon
-	\kerneldocBeginTC
+	\kerneldocBeginTC{
 
 .. _linux_doc_zh_tw:
 
@@ -174,4 +174,4 @@ TODOList:
 
 .. raw:: latex
 
-	\kerneldocEndTC
+	}\kerneldocEndTC
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index e6fce2cbd99e..dfbc27d17ff7 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -256,7 +256,7 @@ Code  Seq#    Include File                                           Comments
 'l'   00-3F  linux/tcfs_fs.h                                         transparent cryptographic file system
                                                                      <http://web.archive.org/web/%2A/http://mikonos.dia.unisa.it/tcfs>
 'l'   40-7F  linux/udf_fs_i.h                                        in development:
-                                                                     <http://sourceforge.net/projects/linux-udf/>
+                                                                     <https://github.com/pali/udftools>
 'm'   00-09  linux/mmtimer.h                                         conflict!
 'm'   all    linux/mtio.h                                            conflict!
 'm'   all    linux/soundcard.h                                       conflict!
diff --git a/Documentation/virt/uml/user_mode_linux_howto_v2.rst b/Documentation/virt/uml/user_mode_linux_howto_v2.rst
index 2cafd3c3c6cb..d5ad96c795f6 100644
--- a/Documentation/virt/uml/user_mode_linux_howto_v2.rst
+++ b/Documentation/virt/uml/user_mode_linux_howto_v2.rst
@@ -664,7 +664,11 @@ one is input, the second one output.
 * The fd channel - use file descriptor numbers for input/output. Example:
   ``con1=fd:0,fd:1.``
 
-* The port channel - listen on TCP port number. Example: ``con1=port:4321``
+* The port channel - start a telnet server on TCP port number. Example:
+  ``con1=port:4321``.  The host must have /usr/sbin/in.telnetd (usually part of
+  a telnetd package) and the port-helper from the UML utilities (see the
+  information for the xterm channel below).  UML will not boot until a client
+  connects.
 
 * The pty and pts channels - use system pty/pts.
 
diff --git a/Documentation/vm/page_owner.rst b/Documentation/vm/page_owner.rst
index 9837fc8147dd..905555e3e483 100644
--- a/Documentation/vm/page_owner.rst
+++ b/Documentation/vm/page_owner.rst
@@ -26,9 +26,9 @@ fragmentation statistics can be obtained through gfp flag information of
 each page. It is already implemented and activated if page owner is
 enabled. Other usages are more than welcome.
 
-page owner is disabled in default. So, if you'd like to use it, you need
-to add "page_owner=on" into your boot cmdline. If the kernel is built
-with page owner and page owner is disabled in runtime due to no enabling
+page owner is disabled by default. So, if you'd like to use it, you need
+to add "page_owner=on" to your boot cmdline. If the kernel is built
+with page owner and page owner is disabled in runtime due to not enabling
 boot option, runtime overhead is marginal. If disabled in runtime, it
 doesn't require memory to store owner information, so there is no runtime
 memory overhead. And, page owner inserts just two unlikely branches into
@@ -85,7 +85,7 @@ Usage
 	cat /sys/kernel/debug/page_owner > page_owner_full.txt
 	./page_owner_sort page_owner_full.txt sorted_page_owner.txt
 
-   The general output of ``page_owner_full.txt`` is as follows:
+   The general output of ``page_owner_full.txt`` is as follows::
 
 	Page allocated via order XXX, ...
 	PFN XXX ...
@@ -100,7 +100,7 @@ Usage
    and pages of buf, and finally sorts them according to the times.
 
    See the result about who allocated each page
-   in the ``sorted_page_owner.txt``. General output:
+   in the ``sorted_page_owner.txt``. General output::
 
 	XXX times, XXX pages:
 	Page allocated via order XXX, ...
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index f498f1d36cd3..982c8af853b9 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -21,6 +21,7 @@ x86-specific Documentation
    tlb
    mtrr
    pat
+   intel-hfi
    intel-iommu
    intel_txt
    amd-memory-encryption
diff --git a/Documentation/x86/intel-hfi.rst b/Documentation/x86/intel-hfi.rst
new file mode 100644
index 000000000000..49dea58ea4fb
--- /dev/null
+++ b/Documentation/x86/intel-hfi.rst
@@ -0,0 +1,72 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================================
+Hardware-Feedback Interface for scheduling on Intel Hardware
+============================================================
+
+Overview
+--------
+
+Intel has described the Hardware Feedback Interface (HFI) in the Intel 64 and
+IA-32 Architectures Software Developer's Manual (Intel SDM) Volume 3 Section
+14.6 [1]_.
+
+The HFI gives the operating system a performance and energy efficiency
+capability data for each CPU in the system. Linux can use the information from
+the HFI to influence task placement decisions.
+
+The Hardware Feedback Interface
+-------------------------------
+
+The Hardware Feedback Interface provides to the operating system information
+about the performance and energy efficiency of each CPU in the system. Each
+capability is given as a unit-less quantity in the range [0-255]. Higher values
+indicate higher capability. Energy efficiency and performance are reported in
+separate capabilities. Even though on some systems these two metrics may be
+related, they are specified as independent capabilities in the Intel SDM.
+
+These capabilities may change at runtime as a result of changes in the
+operating conditions of the system or the action of external factors. The rate
+at which these capabilities are updated is specific to each processor model. On
+some models, capabilities are set at boot time and never change. On others,
+capabilities may change every tens of milliseconds. For instance, a remote
+mechanism may be used to lower Thermal Design Power. Such change can be
+reflected in the HFI. Likewise, if the system needs to be throttled due to
+excessive heat, the HFI may reflect reduced performance on specific CPUs.
+
+The kernel or a userspace policy daemon can use these capabilities to modify
+task placement decisions. For instance, if either the performance or energy
+capabilities of a given logical processor becomes zero, it is an indication that
+the hardware recommends to the operating system to not schedule any tasks on
+that processor for performance or energy efficiency reasons, respectively.
+
+Implementation details for Linux
+--------------------------------
+
+The infrastructure to handle thermal event interrupts has two parts. In the
+Local Vector Table of a CPU's local APIC, there exists a register for the
+Thermal Monitor Register. This register controls how interrupts are delivered
+to a CPU when the thermal monitor generates and interrupt. Further details
+can be found in the Intel SDM Vol. 3 Section 10.5 [1]_.
+
+The thermal monitor may generate interrupts per CPU or per package. The HFI
+generates package-level interrupts. This monitor is configured and initialized
+via a set of machine-specific registers. Specifically, the HFI interrupt and
+status are controlled via designated bits in the IA32_PACKAGE_THERM_INTERRUPT
+and IA32_PACKAGE_THERM_STATUS registers, respectively. There exists one HFI
+table per package. Further details can be found in the Intel SDM Vol. 3
+Section 14.9 [1]_.
+
+The hardware issues an HFI interrupt after updating the HFI table and is ready
+for the operating system to consume it. CPUs receive such interrupt via the
+thermal entry in the Local APIC's Local Vector Table.
+
+When servicing such interrupt, the HFI driver parses the updated table and
+relays the update to userspace using the thermal notification framework. Given
+that there may be many HFI updates every second, the updates relayed to
+userspace are throttled at a rate of CONFIG_HZ jiffies.
+
+References
+----------
+
+.. [1] https://www.intel.com/sdm
diff --git a/Documentation/x86/sva.rst b/Documentation/x86/sva.rst
index 076efd51ef1f..2e9b8b0f9a0f 100644
--- a/Documentation/x86/sva.rst
+++ b/Documentation/x86/sva.rst
@@ -104,18 +104,47 @@ The MSR must be configured on each logical CPU before any application
 thread can interact with a device. Threads that belong to the same
 process share the same page tables, thus the same MSR value.
 
-PASID is cleared when a process is created. The PASID allocation and MSR
-programming may occur long after a process and its threads have been created.
-One thread must call iommu_sva_bind_device() to allocate the PASID for the
-process. If a thread uses ENQCMD without the MSR first being populated, a #GP
-will be raised. The kernel will update the PASID MSR with the PASID for all
-threads in the process. A single process PASID can be used simultaneously
-with multiple devices since they all share the same address space.
-
-One thread can call iommu_sva_unbind_device() to free the allocated PASID.
-The kernel will clear the PASID MSR for all threads belonging to the process.
-
-New threads inherit the MSR value from the parent.
+PASID Life Cycle Management
+===========================
+
+PASID is initialized as INVALID_IOASID (-1) when a process is created.
+
+Only processes that access SVA-capable devices need to have a PASID
+allocated. This allocation happens when a process opens/binds an SVA-capable
+device but finds no PASID for this process. Subsequent binds of the same, or
+other devices will share the same PASID.
+
+Although the PASID is allocated to the process by opening a device,
+it is not active in any of the threads of that process. It's loaded to the
+IA32_PASID MSR lazily when a thread tries to submit a work descriptor
+to a device using the ENQCMD.
+
+That first access will trigger a #GP fault because the IA32_PASID MSR
+has not been initialized with the PASID value assigned to the process
+when the device was opened. The Linux #GP handler notes that a PASID has
+been allocated for the process, and so initializes the IA32_PASID MSR
+and returns so that the ENQCMD instruction is re-executed.
+
+On fork(2) or exec(2) the PASID is removed from the process as it no
+longer has the same address space that it had when the device was opened.
+
+On clone(2) the new task shares the same address space, so will be
+able to use the PASID allocated to the process. The IA32_PASID is not
+preemptively initialized as the PASID value might not be allocated yet or
+the kernel does not know whether this thread is going to access the device
+and the cleared IA32_PASID MSR reduces context switch overhead by xstate
+init optimization. Since #GP faults have to be handled on any threads that
+were created before the PASID was assigned to the mm of the process, newly
+created threads might as well be treated in a consistent way.
+
+Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in
+all threads in unbind, free the PASID lazily only on mm exit.
+
+If a process does a close(2) of the device file descriptor and munmap(2)
+of the device MMIO portal, then the driver will unbind the device. The
+PASID is still marked VALID in the PASID_MSR for any threads in the
+process that accessed the device. But this is harmless as without the
+MMIO portal they cannot submit new work to the device.
 
 Relationships
 =============
diff --git a/MAINTAINERS b/MAINTAINERS
index 05fd080b82f3..f39be89f9128 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1002,6 +1002,7 @@ L:	[email protected]
 S:	Supported
 F:	Documentation/admin-guide/pm/amd-pstate.rst
 F:	drivers/cpufreq/amd-pstate*
+F:	tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
 
 AMD PTDMA DRIVER
 M:	Sanjay R Mehta <[email protected]>
@@ -1769,7 +1770,7 @@ T:	git https://github.com/AsahiLinux/linux.git
 F:	Documentation/devicetree/bindings/arm/apple.yaml
 F:	Documentation/devicetree/bindings/arm/apple/*
 F:	Documentation/devicetree/bindings/i2c/apple,i2c.yaml
-F:	Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
+F:	Documentation/devicetree/bindings/interrupt-controller/apple,*
 F:	Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml
 F:	Documentation/devicetree/bindings/pci/apple,pcie.yaml
 F:	Documentation/devicetree/bindings/pinctrl/apple,pinctrl.yaml
@@ -2572,7 +2573,7 @@ F:	sound/soc/rockchip/
 N:	rockchip
 
 ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES
-M:	Krzysztof Kozlowski <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 R:	Alim Akhtar <[email protected]>
 L:	[email protected] (moderated for non-subscribers)
 L:	[email protected]
@@ -2739,7 +2740,7 @@ N:	stm32
 N:	stm
 
 ARM/Synaptics SoC support
-M:	Jisheng Zhang <[email protected]>
+M:	Jisheng Zhang <[email protected]>
 M:	Sebastian Hesselbarth <[email protected]>
 L:	[email protected] (moderated for non-subscribers)
 S:	Maintained
@@ -3905,7 +3906,7 @@ M:	Scott Branden <[email protected]>
 M:	[email protected]
 L:	[email protected] (moderated for non-subscribers)
 S:	Maintained
-T:	git git://github.com/broadcom/cygnus-linux.git
+T:	git git://github.com/broadcom/stblinux.git
 F:	arch/arm64/boot/dts/broadcom/northstar2/*
 F:	arch/arm64/boot/dts/broadcom/stingray/*
 F:	drivers/clk/bcm/clk-ns*
@@ -6307,8 +6308,8 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/vboxvideo/
 
 DRM DRIVER FOR VMWARE VIRTUAL GPU
-M:	"VMware Graphics" <[email protected]>
 M:	Zack Rusin <[email protected]>
+R:	VMware Graphics Reviewers <[email protected]>
 L:	[email protected]
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@@ -8643,7 +8644,7 @@ S:	Maintained
 F:	drivers/gpio/gpio-hisi.c
 
 HISILICON HIGH PERFORMANCE RSA ENGINE DRIVER (HPRE)
-M:	Zaibo Xu <[email protected]>
+M:	Longfang Liu <[email protected]>
 L:	[email protected]
 S:	Maintained
 F:	Documentation/ABI/testing/debugfs-hisi-hpre
@@ -8723,8 +8724,8 @@ F:	Documentation/devicetree/bindings/scsi/hisilicon-sas.txt
 F:	drivers/scsi/hisi_sas/
 
 HISILICON SECURITY ENGINE V2 DRIVER (SEC2)
-M:	Zaibo Xu <[email protected]>
 M:	Kai Ye <[email protected]>
+M:	Longfang Liu <[email protected]>
 L:	[email protected]
 S:	Maintained
 F:	Documentation/ABI/testing/debugfs-hisi-sec
@@ -8755,7 +8756,7 @@ F:	Documentation/devicetree/bindings/mfd/hisilicon,hi6421-spmi-pmic.yaml
 F:	drivers/mfd/hi6421-spmi-pmic.c
 
 HISILICON TRUE RANDOM NUMBER GENERATOR V2 SUPPORT
-M:	Zaibo Xu <[email protected]>
+M:	Weili Qian <[email protected]>
 S:	Maintained
 F:	drivers/crypto/hisilicon/trng/trng.c
 
@@ -10455,6 +10456,8 @@ KERNEL REGRESSIONS
 M:	Thorsten Leemhuis <[email protected]>
 L:	[email protected]
 S:	Supported
+F:	Documentation/admin-guide/reporting-regressions.rst
+F:	Documentation/process/handling-regressions.rst
 
 KERNEL SELFTEST FRAMEWORK
 M:	Shuah Khan <[email protected]>
@@ -11675,7 +11678,7 @@ F:	drivers/iio/proximity/mb1232.c
 
 MAXIM MAX17040 FAMILY FUEL GAUGE DRIVERS
 R:	Iskren Chernev <[email protected]>
-R:	Krzysztof Kozlowski <[email protected]>
+R:	Krzysztof Kozlowski <[email protected]>
 R:	Marek Szyprowski <[email protected]>
 R:	Matheus Castello <[email protected]>
 L:	[email protected]
@@ -11685,7 +11688,7 @@ F:	drivers/power/supply/max17040_battery.c
 
 MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS
 R:	Hans de Goede <[email protected]>
-R:	Krzysztof Kozlowski <[email protected]>
+R:	Krzysztof Kozlowski <[email protected]>
 R:	Marek Szyprowski <[email protected]>
 R:	Sebastian Krzyszkowiak <[email protected]>
 R:	Purism Kernel Team <[email protected]>
@@ -11730,7 +11733,7 @@ F:	Documentation/devicetree/bindings/power/supply/maxim,max77976.yaml
 F:	drivers/power/supply/max77976_charger.c
 
 MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
-M:	Krzysztof Kozlowski <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 M:	Bartlomiej Zolnierkiewicz <[email protected]>
 L:	[email protected]
 S:	Supported
@@ -11739,7 +11742,7 @@ F:	drivers/power/supply/max77693_charger.c
 
 MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
 M:	Chanwoo Choi <[email protected]>
-M:	Krzysztof Kozlowski <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 M:	Bartlomiej Zolnierkiewicz <[email protected]>
 L:	[email protected]
 S:	Supported
@@ -12428,7 +12431,7 @@ F:	include/linux/memblock.h
 F:	mm/memblock.c
 
 MEMORY CONTROLLER DRIVERS
-M:	Krzysztof Kozlowski <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 L:	[email protected]
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git
@@ -13376,6 +13379,7 @@ F:	net/core/drop_monitor.c
 NETWORKING DRIVERS
 M:	"David S. Miller" <[email protected]>
 M:	Jakub Kicinski <[email protected]>
+M:	Paolo Abeni <[email protected]>
 L:	[email protected]
 S:	Maintained
 Q:	https://patchwork.kernel.org/project/netdevbpf/list/
@@ -13422,6 +13426,7 @@ F:	tools/testing/selftests/drivers/net/dsa/
 NETWORKING [GENERAL]
 M:	"David S. Miller" <[email protected]>
 M:	Jakub Kicinski <[email protected]>
+M:	Paolo Abeni <[email protected]>
 L:	[email protected]
 S:	Maintained
 Q:	https://patchwork.kernel.org/project/netdevbpf/list/
@@ -13565,7 +13570,7 @@ F:	include/uapi/linux/nexthop.h
 F:	net/ipv4/nexthop.c
 
 NFC SUBSYSTEM
-M:	Krzysztof Kozlowski <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 L:	[email protected] (subscribers-only)
 L:	[email protected]
 S:	Maintained
@@ -13879,7 +13884,7 @@ F:	Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml
 F:	drivers/regulator/pf8x00-regulator.c
 
 NXP PTN5150A CC LOGIC AND EXTCON DRIVER
-M:	Krzysztof Kozlowski <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 L:	[email protected]
 S:	Maintained
 F:	Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml
@@ -14603,8 +14608,9 @@ F:	include/uapi/linux/ppdev.h
 
 PARAVIRT_OPS INTERFACE
 M:	Juergen Gross <[email protected]>
-M:	Deep Shah <[email protected]>
-M:	"VMware, Inc." <[email protected]>
+M:	Srivatsa S. Bhat (VMware) <[email protected]>
+R:	Alexey Makhalov <[email protected]>
+R:	VMware PV-Drivers Reviewers <[email protected]>
 L:	[email protected]
 L:	[email protected]
 S:	Supported
@@ -15305,7 +15311,7 @@ F:	drivers/pinctrl/renesas/
 
 PIN CONTROLLER - SAMSUNG
 M:	Tomasz Figa <[email protected]>
-M:	Krzysztof Kozlowski <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 M:	Sylwester Nawrocki <[email protected]>
 R:	Alim Akhtar <[email protected]>
 L:	[email protected] (moderated for non-subscribers)
@@ -16207,6 +16213,7 @@ M:	Jason A. Donenfeld <[email protected]>
 T:	git https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git
 S:	Maintained
 F:	drivers/char/random.c
+F:	drivers/virt/vmgenid.c
 
 RAPIDIO SUBSYSTEM
 M:	Matt Porter <[email protected]>
@@ -16321,6 +16328,8 @@ F:	tools/testing/selftests/resctrl/
 
 READ-COPY UPDATE (RCU)
 M:	"Paul E. McKenney" <[email protected]>
+M:	Frederic Weisbecker <[email protected]> (kernel/rcu/tree_nocb.h)
+M:	Neeraj Upadhyay <[email protected]> (kernel/rcu/tasks.h)
 M:	Josh Triplett <[email protected]>
 R:	Steven Rostedt <[email protected]>
 R:	Mathieu Desnoyers <[email protected]>
@@ -16947,7 +16956,7 @@ W:	http://www.ibm.com/developerworks/linux/linux390/
 F:	drivers/s390/scsi/zfcp_*
 
 S3C ADC BATTERY DRIVER
-M:	Krzysztof Kozlowski <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 L:	[email protected]
 S:	Odd Fixes
 F:	drivers/power/supply/s3c_adc_battery.c
@@ -16992,7 +17001,7 @@ F:	Documentation/admin-guide/LSM/SafeSetID.rst
 F:	security/safesetid/
 
 SAMSUNG AUDIO (ASoC) DRIVERS
-M:	Krzysztof Kozlowski <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 M:	Sylwester Nawrocki <[email protected]>
 L:	[email protected] (moderated for non-subscribers)
 S:	Supported
@@ -17000,7 +17009,7 @@ F:	Documentation/devicetree/bindings/sound/samsung*
 F:	sound/soc/samsung/
 
 SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
-M:	Krzysztof Kozlowski <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 L:	[email protected]
 L:	[email protected]
 S:	Maintained
@@ -17035,7 +17044,7 @@ S:	Maintained
 F:	drivers/platform/x86/samsung-laptop.c
 
 SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
-M:	Krzysztof Kozlowski <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 M:	Bartlomiej Zolnierkiewicz <[email protected]>
 L:	[email protected]
 L:	[email protected]
@@ -17061,7 +17070,7 @@ F:	drivers/media/platform/s3c-camif/
 F:	include/media/drv-intf/s3c_camif.h
 
 SAMSUNG S3FWRN5 NFC DRIVER
-M:	Krzysztof Kozlowski <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 M:	Krzysztof Opasiak <[email protected]>
 L:	[email protected] (subscribers-only)
 S:	Maintained
@@ -17083,7 +17092,7 @@ S:	Supported
 F:	drivers/media/i2c/s5k5baf.c
 
 SAMSUNG S5P Security SubSystem (SSS) DRIVER
-M:	Krzysztof Kozlowski <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 M:	Vladimir Zapolskiy <[email protected]>
 L:	[email protected]
 L:	[email protected]
@@ -17118,7 +17127,7 @@ F:	include/linux/clk/samsung.h
 F:	include/linux/platform_data/clk-s3c2410.h
 
 SAMSUNG SPI DRIVERS
-M:	Krzysztof Kozlowski <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 M:	Andi Shyti <[email protected]>
 L:	[email protected]
 L:	[email protected]
@@ -17135,11 +17144,12 @@ S:	Supported
 F:	drivers/net/ethernet/samsung/sxgbe/
 
 SAMSUNG THERMAL DRIVER
-M:	Bartlomiej Zolnierkiewicz <[email protected]>
+M:	Bartlomiej Zolnierkiewicz <[email protected]>
+M:	Krzysztof Kozlowski <[email protected]>
 L:	[email protected]
 L:	[email protected]
-S:	Supported
-T:	git https://github.com/lmajewski/linux-samsung-thermal.git
+S:	Maintained
+F:	Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml
 F:	drivers/thermal/samsung/
 
 SAMSUNG USB2 PHY DRIVER
@@ -20638,30 +20648,33 @@ F:	tools/testing/vsock/
 
 VMWARE BALLOON DRIVER
 M:	Nadav Amit <[email protected]>
-M:	"VMware, Inc." <[email protected]>
+R:	VMware PV-Drivers Reviewers <[email protected]>
 L:	[email protected]
 S:	Maintained
 F:	drivers/misc/vmw_balloon.c
 
 VMWARE HYPERVISOR INTERFACE
-M:	Deep Shah <[email protected]>
-M:	"VMware, Inc." <[email protected]>
+M:	Srivatsa S. Bhat (VMware) <[email protected]>
+M:	Alexey Makhalov <[email protected]>
+R:	VMware PV-Drivers Reviewers <[email protected]>
 L:	[email protected]
+L:	[email protected]
 S:	Supported
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vmware
 F:	arch/x86/include/asm/vmware.h
 F:	arch/x86/kernel/cpu/vmware.c
 
 VMWARE PVRDMA DRIVER
 M:	Bryan Tan <[email protected]>
 M:	Vishnu Dasa <[email protected]>
-M:	VMware PV-Drivers <[email protected]>
+R:	VMware PV-Drivers Reviewers <[email protected]>
 L:	[email protected]
 S:	Maintained
 F:	drivers/infiniband/hw/vmw_pvrdma/
 
 VMware PVSCSI driver
 M:	Vishal Bhakta <[email protected]>
-M:	VMware PV-Drivers <[email protected]>
+R:	VMware PV-Drivers Reviewers <[email protected]>
 L:	[email protected]
 S:	Maintained
 F:	drivers/scsi/vmw_pvscsi.c
@@ -20669,7 +20682,7 @@ F:	drivers/scsi/vmw_pvscsi.h
 
 VMWARE VIRTUAL PTP CLOCK DRIVER
 M:	Vivek Thampi <[email protected]>
-M:	"VMware, Inc." <[email protected]>
+R:	VMware PV-Drivers Reviewers <[email protected]>
 L:	[email protected]
 S:	Supported
 F:	drivers/ptp/ptp_vmw.c
@@ -20677,14 +20690,15 @@ F:	drivers/ptp/ptp_vmw.c
 VMWARE VMCI DRIVER
 M:	Jorgen Hansen <[email protected]>
 M:	Vishnu Dasa <[email protected]>
+R:	VMware PV-Drivers Reviewers <[email protected]>
 L:	[email protected]
-L:	[email protected] (private)
 S:	Maintained
 F:	drivers/misc/vmw_vmci/
 
 VMWARE VMMOUSE SUBDRIVER
-M:	"VMware Graphics" <[email protected]>
-M:	"VMware, Inc." <[email protected]>
+M:	Zack Rusin <[email protected]>
+R:	VMware Graphics Reviewers <[email protected]>
+R:	VMware PV-Drivers Reviewers <[email protected]>
 L:	[email protected]
 S:	Maintained
 F:	drivers/input/mouse/vmmouse.c
@@ -20692,7 +20706,7 @@ F:	drivers/input/mouse/vmmouse.h
 
 VMWARE VMXNET3 ETHERNET DRIVER
 M:	Ronak Doshi <[email protected]>
-M:	[email protected]
+R:	VMware PV-Drivers Reviewers <[email protected]>
 L:	[email protected]
 S:	Maintained
 F:	drivers/net/vmxnet3/
@@ -21288,6 +21302,11 @@ T:	git https://github.com/Xilinx/linux-xlnx.git
 F:	Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml
 F:	drivers/phy/xilinx/phy-zynqmp.c
 
+XILINX ZYNQMP SHA3 DRIVER
+M:	Harsha <[email protected]>
+S:	Maintained
+F:	drivers/crypto/xilinx/zynqmp-sha.c
+
 XILINX EVENT MANAGEMENT DRIVER
 M:	Abhyuday Godhasara <[email protected]>
 S:	Maintained
diff --git a/Makefile b/Makefile
index 87f672473523..7214f075e1f0 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 17
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Superb Owl
 
 # *DOCUMENTATION*
diff --git a/arch/alpha/include/asm/xor.h b/arch/alpha/include/asm/xor.h
index 5aeb4fb3cb7c..e0de0c233ab9 100644
--- a/arch/alpha/include/asm/xor.h
+++ b/arch/alpha/include/asm/xor.h
@@ -5,24 +5,43 @@
  * Optimized RAID-5 checksumming functions for alpha EV5 and EV6
  */
 
-extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *);
-extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *,
-		        unsigned long *);
-extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *,
-		        unsigned long *, unsigned long *);
-extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *,
-		        unsigned long *, unsigned long *, unsigned long *);
+extern void
+xor_alpha_2(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2);
+extern void
+xor_alpha_3(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2,
+	    const unsigned long * __restrict p3);
+extern void
+xor_alpha_4(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2,
+	    const unsigned long * __restrict p3,
+	    const unsigned long * __restrict p4);
+extern void
+xor_alpha_5(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2,
+	    const unsigned long * __restrict p3,
+	    const unsigned long * __restrict p4,
+	    const unsigned long * __restrict p5);
 
-extern void xor_alpha_prefetch_2(unsigned long, unsigned long *,
-				 unsigned long *);
-extern void xor_alpha_prefetch_3(unsigned long, unsigned long *,
-				 unsigned long *, unsigned long *);
-extern void xor_alpha_prefetch_4(unsigned long, unsigned long *,
-				 unsigned long *, unsigned long *,
-				 unsigned long *);
-extern void xor_alpha_prefetch_5(unsigned long, unsigned long *,
-				 unsigned long *, unsigned long *,
-				 unsigned long *, unsigned long *);
+extern void
+xor_alpha_prefetch_2(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2);
+extern void
+xor_alpha_prefetch_3(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3);
+extern void
+xor_alpha_prefetch_4(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4);
+extern void
+xor_alpha_prefetch_5(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4,
+		     const unsigned long * __restrict p5);
 
 asm("								\n\
 	.text							\n\
diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
index 6dde51c2aed3..e4775bbceecc 100644
--- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
+++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
@@ -118,7 +118,7 @@
 	};
 
 	pinctrl_fwqspid_default: fwqspid_default {
-		function = "FWQSPID";
+		function = "FWSPID";
 		groups = "FWQSPID";
 	};
 
diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi
index dff18fc9a906..21294f775a20 100644
--- a/arch/arm/boot/dts/bcm2711.dtsi
+++ b/arch/arm/boot/dts/bcm2711.dtsi
@@ -290,6 +290,7 @@
 
 		hvs: hvs@7e400000 {
 			compatible = "brcm,bcm2711-hvs";
+			reg = <0x7e400000 0x8000>;
 			interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
 		};
 
diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi
index 956a26d52a4c..0a11bacffc1f 100644
--- a/arch/arm/boot/dts/dra7-l4.dtsi
+++ b/arch/arm/boot/dts/dra7-l4.dtsi
@@ -3482,8 +3482,7 @@
 				ti,timer-pwm;
 			};
 		};
-
-		target-module@2c000 {			/* 0x4882c000, ap 17 02.0 */
+		timer15_target: target-module@2c000 {	/* 0x4882c000, ap 17 02.0 */
 			compatible = "ti,sysc-omap4-timer", "ti,sysc";
 			reg = <0x2c000 0x4>,
 			      <0x2c010 0x4>;
@@ -3511,7 +3510,7 @@
 			};
 		};
 
-		target-module@2e000 {			/* 0x4882e000, ap 19 14.0 */
+		timer16_target: target-module@2e000 {	/* 0x4882e000, ap 19 14.0 */
 			compatible = "ti,sysc-omap4-timer", "ti,sysc";
 			reg = <0x2e000 0x4>,
 			      <0x2e010 0x4>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 42bff117656c..97ce0c4f1df7 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -1339,20 +1339,20 @@
 };
 
 /* Local timers, see ARM architected timer wrap erratum i940 */
-&timer3_target {
+&timer15_target {
 	ti,no-reset-on-init;
 	ti,no-idle;
 	timer@0 {
-		assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER3_CLKCTRL 24>;
+		assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>;
 		assigned-clock-parents = <&timer_sys_clk_div>;
 	};
 };
 
-&timer4_target {
+&timer16_target {
 	ti,no-reset-on-init;
 	ti,no-idle;
 	timer@0 {
-		assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 24>;
+		assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>;
 		assigned-clock-parents = <&timer_sys_clk_div>;
 	};
 };
diff --git a/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts b/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts
index d35fb79d2f51..4db43324dafa 100644
--- a/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts
+++ b/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts
@@ -5,7 +5,13 @@
 
 / {
 	/* Version of Nyan Big with 1080p panel */
-	panel {
-		compatible = "auo,b133htn01";
+	host1x@50000000 {
+		dpaux@545c0000 {
+			aux-bus {
+				panel: panel {
+					compatible = "auo,b133htn01";
+				};
+			};
+		};
 	};
 };
diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S
index 7d0cc7f226a5..7b61032f29fa 100644
--- a/arch/arm/crypto/aes-neonbs-core.S
+++ b/arch/arm/crypto/aes-neonbs-core.S
@@ -758,29 +758,24 @@ ENTRY(aesbs_cbc_decrypt)
 ENDPROC(aesbs_cbc_decrypt)
 
 	.macro		next_ctr, q
-	vmov.32		\q\()h[1], r10
+	vmov		\q\()h, r9, r10
 	adds		r10, r10, #1
-	vmov.32		\q\()h[0], r9
 	adcs		r9, r9, #0
-	vmov.32		\q\()l[1], r8
+	vmov		\q\()l, r7, r8
 	adcs		r8, r8, #0
-	vmov.32		\q\()l[0], r7
 	adc		r7, r7, #0
 	vrev32.8	\q, \q
 	.endm
 
 	/*
 	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-	 *		     int rounds, int blocks, u8 ctr[], u8 final[])
+	 *		     int rounds, int bytes, u8 ctr[])
 	 */
 ENTRY(aesbs_ctr_encrypt)
 	mov		ip, sp
 	push		{r4-r10, lr}
 
-	ldm		ip, {r5-r7}		// load args 4-6
-	teq		r7, #0
-	addne		r5, r5, #1		// one extra block if final != 0
-
+	ldm		ip, {r5, r6}		// load args 4-5
 	vld1.8		{q0}, [r6]		// load counter
 	vrev32.8	q1, q0
 	vmov		r9, r10, d3
@@ -792,20 +787,19 @@ ENTRY(aesbs_ctr_encrypt)
 	adc		r7, r7, #0
 
 99:	vmov		q1, q0
+	sub		lr, r5, #1
 	vmov		q2, q0
+	adr		ip, 0f
 	vmov		q3, q0
+	and		lr, lr, #112
 	vmov		q4, q0
+	cmp		r5, #112
 	vmov		q5, q0
+	sub		ip, ip, lr, lsl #1
 	vmov		q6, q0
+	add		ip, ip, lr, lsr #2
 	vmov		q7, q0
-
-	adr		ip, 0f
-	sub		lr, r5, #1
-	and		lr, lr, #7
-	cmp		r5, #8
-	sub		ip, ip, lr, lsl #5
-	sub		ip, ip, lr, lsl #2
-	movlt		pc, ip			// computed goto if blocks < 8
+	movle		pc, ip			// computed goto if bytes < 112
 
 	next_ctr	q1
 	next_ctr	q2
@@ -820,12 +814,14 @@ ENTRY(aesbs_ctr_encrypt)
 	bl		aesbs_encrypt8
 
 	adr		ip, 1f
-	and		lr, r5, #7
-	cmp		r5, #8
-	movgt		r4, #0
-	ldrle		r4, [sp, #40]		// load final in the last round
-	sub		ip, ip, lr, lsl #2
-	movlt		pc, ip			// computed goto if blocks < 8
+	sub		lr, r5, #1
+	cmp		r5, #128
+	bic		lr, lr, #15
+	ands		r4, r5, #15		// preserves C flag
+	teqcs		r5, r5			// set Z flag if not last iteration
+	sub		ip, ip, lr, lsr #2
+	rsb		r4, r4, #16
+	movcc		pc, ip			// computed goto if bytes < 128
 
 	vld1.8		{q8}, [r1]!
 	vld1.8		{q9}, [r1]!
@@ -834,46 +830,70 @@ ENTRY(aesbs_ctr_encrypt)
 	vld1.8		{q12}, [r1]!
 	vld1.8		{q13}, [r1]!
 	vld1.8		{q14}, [r1]!
-	teq		r4, #0			// skip last block if 'final'
-1:	bne		2f
+1:	subne		r1, r1, r4
 	vld1.8		{q15}, [r1]!
 
-2:	adr		ip, 3f
-	cmp		r5, #8
-	sub		ip, ip, lr, lsl #3
-	movlt		pc, ip			// computed goto if blocks < 8
+	add		ip, ip, #2f - 1b
 
 	veor		q0, q0, q8
-	vst1.8		{q0}, [r0]!
 	veor		q1, q1, q9
-	vst1.8		{q1}, [r0]!
 	veor		q4, q4, q10
-	vst1.8		{q4}, [r0]!
 	veor		q6, q6, q11
-	vst1.8		{q6}, [r0]!
 	veor		q3, q3, q12
-	vst1.8		{q3}, [r0]!
 	veor		q7, q7, q13
-	vst1.8		{q7}, [r0]!
 	veor		q2, q2, q14
+	bne		3f
+	veor		q5, q5, q15
+
+	movcc		pc, ip			// computed goto if bytes < 128
+
+	vst1.8		{q0}, [r0]!
+	vst1.8		{q1}, [r0]!
+	vst1.8		{q4}, [r0]!
+	vst1.8		{q6}, [r0]!
+	vst1.8		{q3}, [r0]!
+	vst1.8		{q7}, [r0]!
 	vst1.8		{q2}, [r0]!
-	teq		r4, #0			// skip last block if 'final'
-	W(bne)		5f
-3:	veor		q5, q5, q15
+2:	subne		r0, r0, r4
 	vst1.8		{q5}, [r0]!
 
-4:	next_ctr	q0
+	next_ctr	q0
 
-	subs		r5, r5, #8
+	subs		r5, r5, #128
 	bgt		99b
 
 	vst1.8		{q0}, [r6]
 	pop		{r4-r10, pc}
 
-5:	vst1.8		{q5}, [r4]
-	b		4b
+3:	adr		lr, .Lpermute_table + 16
+	cmp		r5, #16			// Z flag remains cleared
+	sub		lr, lr, r4
+	vld1.8		{q8-q9}, [lr]
+	vtbl.8		d16, {q5}, d16
+	vtbl.8		d17, {q5}, d17
+	veor		q5, q8, q15
+	bcc		4f			// have to reload prev if R5 < 16
+	vtbx.8		d10, {q2}, d18
+	vtbx.8		d11, {q2}, d19
+	mov		pc, ip			// branch back to VST sequence
+
+4:	sub		r0, r0, r4
+	vshr.s8		q9, q9, #7		// create mask for VBIF
+	vld1.8		{q8}, [r0]		// reload
+	vbif		q5, q8, q9
+	vst1.8		{q5}, [r0]
+	pop		{r4-r10, pc}
 ENDPROC(aesbs_ctr_encrypt)
 
+	.align		6
+.Lpermute_table:
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+	.byte		0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+
 	.macro		next_tweak, out, in, const, tmp
 	vshr.s64	\tmp, \in, #63
 	vand		\tmp, \tmp, \const
@@ -888,6 +908,7 @@ ENDPROC(aesbs_ctr_encrypt)
 	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 *		     int blocks, u8 iv[], int reorder_last_tweak)
 	 */
+	.align		6
 __xts_prepare8:
 	vld1.8		{q14}, [r7]		// load iv
 	vmov.i32	d30, #0x87		// compose tweak mask vector
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index 5c6cd3c63cbc..f00f042ef357 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -37,7 +37,7 @@ asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[]);
 
 asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				  int rounds, int blocks, u8 ctr[], u8 final[]);
+				  int rounds, int blocks, u8 ctr[]);
 
 asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[], int);
@@ -243,32 +243,25 @@ static int ctr_encrypt(struct skcipher_request *req)
 	err = skcipher_walk_virt(&walk, req, false);
 
 	while (walk.nbytes > 0) {
-		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
-		u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
+		const u8 *src = walk.src.virt.addr;
+		u8 *dst = walk.dst.virt.addr;
+		int bytes = walk.nbytes;
 
-		if (walk.nbytes < walk.total) {
-			blocks = round_down(blocks,
-					    walk.stride / AES_BLOCK_SIZE);
-			final = NULL;
-		}
+		if (unlikely(bytes < AES_BLOCK_SIZE))
+			src = dst = memcpy(buf + sizeof(buf) - bytes,
+					   src, bytes);
+		else if (walk.nbytes < walk.total)
+			bytes &= ~(8 * AES_BLOCK_SIZE - 1);
 
 		kernel_neon_begin();
-		aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				  ctx->rk, ctx->rounds, blocks, walk.iv, final);
+		aesbs_ctr_encrypt(dst, src, ctx->rk, ctx->rounds, bytes, walk.iv);
 		kernel_neon_end();
 
-		if (final) {
-			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
-			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+		if (unlikely(bytes < AES_BLOCK_SIZE))
+			memcpy(walk.dst.virt.addr,
+			       buf + sizeof(buf) - bytes, bytes);
 
-			crypto_xor_cpy(dst, src, final,
-				       walk.total % AES_BLOCK_SIZE);
-
-			err = skcipher_walk_done(&walk, 0);
-			break;
-		}
-		err = skcipher_walk_done(&walk,
-					 walk.nbytes - blocks * AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes - bytes);
 	}
 
 	return err;
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 6fe67963ba5a..aee73ef5b3dc 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -107,6 +107,16 @@
 	.endm
 #endif
 
+#if __LINUX_ARM_ARCH__ < 7
+	.macro	dsb, args
+	mcr	p15, 0, r0, c7, c10, 4
+	.endm
+
+	.macro	isb, args
+	mcr	p15, 0, r0, c7, c5, 4
+	.endm
+#endif
+
 	.macro asm_trace_hardirqs_off, save=1
 #if defined(CONFIG_TRACE_IRQFLAGS)
 	.if \save
diff --git a/arch/arm/include/asm/spectre.h b/arch/arm/include/asm/spectre.h
new file mode 100644
index 000000000000..85f9e538fb32
--- /dev/null
+++ b/arch/arm/include/asm/spectre.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_SPECTRE_H
+#define __ASM_SPECTRE_H
+
+enum {
+	SPECTRE_UNAFFECTED,
+	SPECTRE_MITIGATED,
+	SPECTRE_VULNERABLE,
+};
+
+enum {
+	__SPECTRE_V2_METHOD_BPIALL,
+	__SPECTRE_V2_METHOD_ICIALLU,
+	__SPECTRE_V2_METHOD_SMC,
+	__SPECTRE_V2_METHOD_HVC,
+	__SPECTRE_V2_METHOD_LOOP8,
+};
+
+enum {
+	SPECTRE_V2_METHOD_BPIALL = BIT(__SPECTRE_V2_METHOD_BPIALL),
+	SPECTRE_V2_METHOD_ICIALLU = BIT(__SPECTRE_V2_METHOD_ICIALLU),
+	SPECTRE_V2_METHOD_SMC = BIT(__SPECTRE_V2_METHOD_SMC),
+	SPECTRE_V2_METHOD_HVC = BIT(__SPECTRE_V2_METHOD_HVC),
+	SPECTRE_V2_METHOD_LOOP8 = BIT(__SPECTRE_V2_METHOD_LOOP8),
+};
+
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+void spectre_v2_update_state(unsigned int state, unsigned int methods);
+#else
+static inline void spectre_v2_update_state(unsigned int state,
+					   unsigned int methods)
+{}
+#endif
+
+int spectre_bhb_update_vectors(unsigned int method);
+
+#endif
diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h
index 4a91428c324d..fad45c884e98 100644
--- a/arch/arm/include/asm/vmlinux.lds.h
+++ b/arch/arm/include/asm/vmlinux.lds.h
@@ -26,6 +26,19 @@
 #define ARM_MMU_DISCARD(x)	x
 #endif
 
+/*
+ * ld.lld does not support NOCROSSREFS:
+ * https://github.com/ClangBuiltLinux/linux/issues/1609
+ */
+#ifdef CONFIG_LD_IS_LLD
+#define NOCROSSREFS
+#endif
+
+/* Set start/end symbol names to the LMA for the section */
+#define ARM_LMA(sym, section)						\
+	sym##_start = LOADADDR(section);				\
+	sym##_end = LOADADDR(section) + SIZEOF(section)
+
 #define PROC_INFO							\
 		. = ALIGN(4);						\
 		__proc_info_begin = .;					\
@@ -110,19 +123,31 @@
  * only thing that matters is their relative offsets
  */
 #define ARM_VECTORS							\
-	__vectors_start = .;						\
-	.vectors 0xffff0000 : AT(__vectors_start) {			\
-		*(.vectors)						\
+	__vectors_lma = .;						\
+	OVERLAY 0xffff0000 : NOCROSSREFS AT(__vectors_lma) {		\
+		.vectors {						\
+			*(.vectors)					\
+		}							\
+		.vectors.bhb.loop8 {					\
+			*(.vectors.bhb.loop8)				\
+		}							\
+		.vectors.bhb.bpiall {					\
+			*(.vectors.bhb.bpiall)				\
+		}							\
 	}								\
-	. = __vectors_start + SIZEOF(.vectors);				\
-	__vectors_end = .;						\
+	ARM_LMA(__vectors, .vectors);					\
+	ARM_LMA(__vectors_bhb_loop8, .vectors.bhb.loop8);		\
+	ARM_LMA(__vectors_bhb_bpiall, .vectors.bhb.bpiall);		\
+	. = __vectors_lma + SIZEOF(.vectors) +				\
+		SIZEOF(.vectors.bhb.loop8) +				\
+		SIZEOF(.vectors.bhb.bpiall);				\
 									\
-	__stubs_start = .;						\
-	.stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) {		\
+	__stubs_lma = .;						\
+	.stubs ADDR(.vectors) + 0x1000 : AT(__stubs_lma) {		\
 		*(.stubs)						\
 	}								\
-	. = __stubs_start + SIZEOF(.stubs);				\
-	__stubs_end = .;						\
+	ARM_LMA(__stubs, .stubs);					\
+	. = __stubs_lma + SIZEOF(.stubs);				\
 									\
 	PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors));
 
diff --git a/arch/arm/include/asm/xor.h b/arch/arm/include/asm/xor.h
index aefddec79286..669cad5194d3 100644
--- a/arch/arm/include/asm/xor.h
+++ b/arch/arm/include/asm/xor.h
@@ -44,7 +44,8 @@
 		: "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4))
 
 static void
-xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_arm4regs_2(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2)
 {
 	unsigned int lines = bytes / sizeof(unsigned long) / 4;
 	register unsigned int a1 __asm__("r4");
@@ -64,8 +65,9 @@ xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3)
+xor_arm4regs_3(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3)
 {
 	unsigned int lines = bytes / sizeof(unsigned long) / 4;
 	register unsigned int a1 __asm__("r4");
@@ -86,8 +88,10 @@ xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3, unsigned long *p4)
+xor_arm4regs_4(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4)
 {
 	unsigned int lines = bytes / sizeof(unsigned long) / 2;
 	register unsigned int a1 __asm__("r8");
@@ -105,8 +109,11 @@ xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_arm4regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4,
+	       const unsigned long * __restrict p5)
 {
 	unsigned int lines = bytes / sizeof(unsigned long) / 2;
 	register unsigned int a1 __asm__("r8");
@@ -146,7 +153,8 @@ static struct xor_block_template xor_block_arm4regs = {
 extern struct xor_block_template const xor_block_neon_inner;
 
 static void
-xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2)
 {
 	if (in_interrupt()) {
 		xor_arm4regs_2(bytes, p1, p2);
@@ -158,8 +166,9 @@ xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3)
+xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2,
+	   const unsigned long * __restrict p3)
 {
 	if (in_interrupt()) {
 		xor_arm4regs_3(bytes, p1, p2, p3);
@@ -171,8 +180,10 @@ xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3, unsigned long *p4)
+xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2,
+	   const unsigned long * __restrict p3,
+	   const unsigned long * __restrict p4)
 {
 	if (in_interrupt()) {
 		xor_arm4regs_4(bytes, p1, p2, p3, p4);
@@ -184,8 +195,11 @@ xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2,
+	   const unsigned long * __restrict p3,
+	   const unsigned long * __restrict p4,
+	   const unsigned long * __restrict p5)
 {
 	if (in_interrupt()) {
 		xor_arm4regs_5(bytes, p1, p2, p3, p4, p5);
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index ae295a3bcfef..6ef3b535b7bf 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -106,4 +106,6 @@ endif
 
 obj-$(CONFIG_HAVE_ARM_SMCCC)	+= smccc-call.o
 
+obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += spectre.o
+
 extra-y := $(head-y) vmlinux.lds
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 5cd057859fe9..ee3f7a599181 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -1002,12 +1002,11 @@ vector_\name:
 	sub	lr, lr, #\correction
 	.endif
 
-	@
-	@ Save r0, lr_<exception> (parent PC) and spsr_<exception>
-	@ (parent CPSR)
-	@
+	@ Save r0, lr_<exception> (parent PC)
 	stmia	sp, {r0, lr}		@ save r0, lr
-	mrs	lr, spsr
+
+	@ Save spsr_<exception> (parent CPSR)
+2:	mrs	lr, spsr
 	str	lr, [sp, #8]		@ save spsr
 
 	@
@@ -1028,6 +1027,44 @@ vector_\name:
 	movs	pc, lr			@ branch to handler in SVC mode
 ENDPROC(vector_\name)
 
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+	.subsection 1
+	.align 5
+vector_bhb_loop8_\name:
+	.if \correction
+	sub	lr, lr, #\correction
+	.endif
+
+	@ Save r0, lr_<exception> (parent PC)
+	stmia	sp, {r0, lr}
+
+	@ bhb workaround
+	mov	r0, #8
+3:	b	. + 4
+	subs	r0, r0, #1
+	bne	3b
+	dsb
+	isb
+	b	2b
+ENDPROC(vector_bhb_loop8_\name)
+
+vector_bhb_bpiall_\name:
+	.if \correction
+	sub	lr, lr, #\correction
+	.endif
+
+	@ Save r0, lr_<exception> (parent PC)
+	stmia	sp, {r0, lr}
+
+	@ bhb workaround
+	mcr	p15, 0, r0, c7, c5, 6	@ BPIALL
+	@ isb not needed due to "movs pc, lr" in the vector stub
+	@ which gives a "context synchronisation".
+	b	2b
+ENDPROC(vector_bhb_bpiall_\name)
+	.previous
+#endif
+
 	.align	2
 	@ handler addresses follow this label
 1:
@@ -1036,6 +1073,10 @@ ENDPROC(vector_\name)
 	.section .stubs, "ax", %progbits
 	@ This must be the first word
 	.word	vector_swi
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+	.word	vector_bhb_loop8_swi
+	.word	vector_bhb_bpiall_swi
+#endif
 
 vector_rst:
  ARM(	swi	SYS_ERROR0	)
@@ -1150,8 +1191,10 @@ vector_addrexcptn:
  * FIQ "NMI" handler
  *-----------------------------------------------------------------------------
  * Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86
- * systems.
+ * systems. This must be the last vector stub, so lets place it in its own
+ * subsection.
  */
+	.subsection 2
 	vector_stub	fiq, FIQ_MODE, 4
 
 	.long	__fiq_usr			@  0  (USR_26 / USR_32)
@@ -1184,6 +1227,30 @@ vector_addrexcptn:
 	W(b)	vector_irq
 	W(b)	vector_fiq
 
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+	.section .vectors.bhb.loop8, "ax", %progbits
+.L__vectors_bhb_loop8_start:
+	W(b)	vector_rst
+	W(b)	vector_bhb_loop8_und
+	W(ldr)	pc, .L__vectors_bhb_loop8_start + 0x1004
+	W(b)	vector_bhb_loop8_pabt
+	W(b)	vector_bhb_loop8_dabt
+	W(b)	vector_addrexcptn
+	W(b)	vector_bhb_loop8_irq
+	W(b)	vector_bhb_loop8_fiq
+
+	.section .vectors.bhb.bpiall, "ax", %progbits
+.L__vectors_bhb_bpiall_start:
+	W(b)	vector_rst
+	W(b)	vector_bhb_bpiall_und
+	W(ldr)	pc, .L__vectors_bhb_bpiall_start + 0x1008
+	W(b)	vector_bhb_bpiall_pabt
+	W(b)	vector_bhb_bpiall_dabt
+	W(b)	vector_addrexcptn
+	W(b)	vector_bhb_bpiall_irq
+	W(b)	vector_bhb_bpiall_fiq
+#endif
+
 	.data
 	.align	2
 
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index ac86c34682bb..dbc1913ee30b 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -154,12 +154,36 @@ ENDPROC(ret_from_fork)
  */
 
 	.align	5
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+ENTRY(vector_bhb_loop8_swi)
+	sub	sp, sp, #PT_REGS_SIZE
+	stmia	sp, {r0 - r12}
+	mov	r8, #8
+1:	b	2f
+2:	subs	r8, r8, #1
+	bne	1b
+	dsb
+	isb
+	b	3f
+ENDPROC(vector_bhb_loop8_swi)
+
+	.align	5
+ENTRY(vector_bhb_bpiall_swi)
+	sub	sp, sp, #PT_REGS_SIZE
+	stmia	sp, {r0 - r12}
+	mcr	p15, 0, r8, c7, c5, 6	@ BPIALL
+	isb
+	b	3f
+ENDPROC(vector_bhb_bpiall_swi)
+#endif
+	.align	5
 ENTRY(vector_swi)
 #ifdef CONFIG_CPU_V7M
 	v7m_exception_entry
 #else
 	sub	sp, sp, #PT_REGS_SIZE
 	stmia	sp, {r0 - r12}			@ Calling r0 - r12
+3:
  ARM(	add	r8, sp, #S_PC		)
  ARM(	stmdb	r8, {sp, lr}^		)	@ Calling sp, lr
  THUMB(	mov	r8, sp			)
diff --git a/arch/arm/kernel/spectre.c b/arch/arm/kernel/spectre.c
new file mode 100644
index 000000000000..0dcefc36fb7a
--- /dev/null
+++ b/arch/arm/kernel/spectre.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/bpf.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+
+#include <asm/spectre.h>
+
+static bool _unprivileged_ebpf_enabled(void)
+{
+#ifdef CONFIG_BPF_SYSCALL
+	return !sysctl_unprivileged_bpf_disabled;
+#else
+	return false;
+#endif
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+static unsigned int spectre_v2_state;
+static unsigned int spectre_v2_methods;
+
+void spectre_v2_update_state(unsigned int state, unsigned int method)
+{
+	if (state > spectre_v2_state)
+		spectre_v2_state = state;
+	spectre_v2_methods |= method;
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	const char *method;
+
+	if (spectre_v2_state == SPECTRE_UNAFFECTED)
+		return sprintf(buf, "%s\n", "Not affected");
+
+	if (spectre_v2_state != SPECTRE_MITIGATED)
+		return sprintf(buf, "%s\n", "Vulnerable");
+
+	if (_unprivileged_ebpf_enabled())
+		return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n");
+
+	switch (spectre_v2_methods) {
+	case SPECTRE_V2_METHOD_BPIALL:
+		method = "Branch predictor hardening";
+		break;
+
+	case SPECTRE_V2_METHOD_ICIALLU:
+		method = "I-cache invalidation";
+		break;
+
+	case SPECTRE_V2_METHOD_SMC:
+	case SPECTRE_V2_METHOD_HVC:
+		method = "Firmware call";
+		break;
+
+	case SPECTRE_V2_METHOD_LOOP8:
+		method = "History overwrite";
+		break;
+
+	default:
+		method = "Multiple mitigations";
+		break;
+	}
+
+	return sprintf(buf, "Mitigation: %s\n", method);
+}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index da04ed85855a..cae4a748811f 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -30,6 +30,7 @@
 #include <linux/atomic.h>
 #include <asm/cacheflush.h>
 #include <asm/exception.h>
+#include <asm/spectre.h>
 #include <asm/unistd.h>
 #include <asm/traps.h>
 #include <asm/ptrace.h>
@@ -789,10 +790,59 @@ static inline void __init kuser_init(void *vectors)
 }
 #endif
 
+#ifndef CONFIG_CPU_V7M
+static void copy_from_lma(void *vma, void *lma_start, void *lma_end)
+{
+	memcpy(vma, lma_start, lma_end - lma_start);
+}
+
+static void flush_vectors(void *vma, size_t offset, size_t size)
+{
+	unsigned long start = (unsigned long)vma + offset;
+	unsigned long end = start + size;
+
+	flush_icache_range(start, end);
+}
+
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+int spectre_bhb_update_vectors(unsigned int method)
+{
+	extern char __vectors_bhb_bpiall_start[], __vectors_bhb_bpiall_end[];
+	extern char __vectors_bhb_loop8_start[], __vectors_bhb_loop8_end[];
+	void *vec_start, *vec_end;
+
+	if (system_state >= SYSTEM_FREEING_INITMEM) {
+		pr_err("CPU%u: Spectre BHB workaround too late - system vulnerable\n",
+		       smp_processor_id());
+		return SPECTRE_VULNERABLE;
+	}
+
+	switch (method) {
+	case SPECTRE_V2_METHOD_LOOP8:
+		vec_start = __vectors_bhb_loop8_start;
+		vec_end = __vectors_bhb_loop8_end;
+		break;
+
+	case SPECTRE_V2_METHOD_BPIALL:
+		vec_start = __vectors_bhb_bpiall_start;
+		vec_end = __vectors_bhb_bpiall_end;
+		break;
+
+	default:
+		pr_err("CPU%u: unknown Spectre BHB state %d\n",
+		       smp_processor_id(), method);
+		return SPECTRE_VULNERABLE;
+	}
+
+	copy_from_lma(vectors_page, vec_start, vec_end);
+	flush_vectors(vectors_page, 0, vec_end - vec_start);
+
+	return SPECTRE_MITIGATED;
+}
+#endif
+
 void __init early_trap_init(void *vectors_base)
 {
-#ifndef CONFIG_CPU_V7M
-	unsigned long vectors = (unsigned long)vectors_base;
 	extern char __stubs_start[], __stubs_end[];
 	extern char __vectors_start[], __vectors_end[];
 	unsigned i;
@@ -813,17 +863,20 @@ void __init early_trap_init(void *vectors_base)
 	 * into the vector page, mapped at 0xffff0000, and ensure these
 	 * are visible to the instruction stream.
 	 */
-	memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
-	memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start);
+	copy_from_lma(vectors_base, __vectors_start, __vectors_end);
+	copy_from_lma(vectors_base + 0x1000, __stubs_start, __stubs_end);
 
 	kuser_init(vectors_base);
 
-	flush_icache_range(vectors, vectors + PAGE_SIZE * 2);
+	flush_vectors(vectors_base, 0, PAGE_SIZE * 2);
+}
 #else /* ifndef CONFIG_CPU_V7M */
+void __init early_trap_init(void *vectors_base)
+{
 	/*
 	 * on V7-M there is no need to copy the vector table to a dedicated
 	 * memory area. The address is configurable and so a table in the kernel
 	 * image can be used.
 	 */
-#endif
 }
+#endif
diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c
index b99dd8e1c93f..522510baed49 100644
--- a/arch/arm/lib/xor-neon.c
+++ b/arch/arm/lib/xor-neon.c
@@ -17,17 +17,11 @@ MODULE_LICENSE("GPL");
 /*
  * Pull in the reference implementations while instructing GCC (through
  * -ftree-vectorize) to attempt to exploit implicit parallelism and emit
- * NEON instructions.
+ * NEON instructions. Clang does this by default at O2 so no pragma is
+ * needed.
  */
-#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+#ifdef CONFIG_CC_IS_GCC
 #pragma GCC optimize "tree-vectorize"
-#else
-/*
- * While older versions of GCC do not generate incorrect code, they fail to
- * recognize the parallel nature of these functions, and emit plain ARM code,
- * which is known to be slower than the optimized ARM code in asm-arm/xor.h.
- */
-#warning This code requires at least version 4.6 of GCC
 #endif
 
 #pragma GCC diagnostic ignored "-Wunused-variable"
diff --git a/arch/arm/mach-mstar/Kconfig b/arch/arm/mach-mstar/Kconfig
index cd300eeedc20..0bf4d312bcfd 100644
--- a/arch/arm/mach-mstar/Kconfig
+++ b/arch/arm/mach-mstar/Kconfig
@@ -3,6 +3,7 @@ menuconfig ARCH_MSTARV7
 	depends on ARCH_MULTI_V7
 	select ARM_GIC
 	select ARM_HEAVY_MB
+	select HAVE_ARM_ARCH_TIMER
 	select MST_IRQ
 	select MSTAR_MSC313_MPLL
 	help
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 58afba346729..9724c16e9076 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -830,6 +830,7 @@ config CPU_BPREDICT_DISABLE
 
 config CPU_SPECTRE
 	bool
+	select GENERIC_CPU_VULNERABILITIES
 
 config HARDEN_BRANCH_PREDICTOR
 	bool "Harden the branch predictor against aliasing attacks" if EXPERT
@@ -850,6 +851,16 @@ config HARDEN_BRANCH_PREDICTOR
 
 	   If unsure, say Y.
 
+config HARDEN_BRANCH_HISTORY
+	bool "Harden Spectre style attacks against branch history" if EXPERT
+	depends on CPU_SPECTRE
+	default y
+	help
+	  Speculation attacks against some high-performance processors can
+	  make use of branch history to influence future speculation. When
+	  taking an exception, a sequence of branches overwrites the branch
+	  history, or branch history is invalidated.
+
 config TLS_REG_EMUL
 	bool
 	select NEED_KUSER_HELPERS
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 6d0cb0f7bc54..fe249ea91908 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -164,47 +164,6 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align)
 	return phys;
 }
 
-static void __init arm_initrd_init(void)
-{
-#ifdef CONFIG_BLK_DEV_INITRD
-	phys_addr_t start;
-	unsigned long size;
-
-	initrd_start = initrd_end = 0;
-
-	if (!phys_initrd_size)
-		return;
-
-	/*
-	 * Round the memory region to page boundaries as per free_initrd_mem()
-	 * This allows us to detect whether the pages overlapping the initrd
-	 * are in use, but more importantly, reserves the entire set of pages
-	 * as we don't want these pages allocated for other purposes.
-	 */
-	start = round_down(phys_initrd_start, PAGE_SIZE);
-	size = phys_initrd_size + (phys_initrd_start - start);
-	size = round_up(size, PAGE_SIZE);
-
-	if (!memblock_is_region_memory(start, size)) {
-		pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n",
-		       (u64)start, size);
-		return;
-	}
-
-	if (memblock_is_region_reserved(start, size)) {
-		pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n",
-		       (u64)start, size);
-		return;
-	}
-
-	memblock_reserve(start, size);
-
-	/* Now convert initrd to virtual addresses */
-	initrd_start = __phys_to_virt(phys_initrd_start);
-	initrd_end = initrd_start + phys_initrd_size;
-#endif
-}
-
 #ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
 void check_cpu_icache_size(int cpuid)
 {
@@ -226,7 +185,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc)
 	/* Register the kernel text, kernel data and initrd with memblock. */
 	memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START);
 
-	arm_initrd_init();
+	reserve_initrd_mem();
 
 	arm_mm_memblock_reserve();
 
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index 114c05ab4dd9..06dbfb968182 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -6,8 +6,35 @@
 #include <asm/cp15.h>
 #include <asm/cputype.h>
 #include <asm/proc-fns.h>
+#include <asm/spectre.h>
 #include <asm/system_misc.h>
 
+#ifdef CONFIG_ARM_PSCI
+static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+			     ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+
+	switch ((int)res.a0) {
+	case SMCCC_RET_SUCCESS:
+		return SPECTRE_MITIGATED;
+
+	case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
+		return SPECTRE_UNAFFECTED;
+
+	default:
+		return SPECTRE_VULNERABLE;
+	}
+}
+#else
+static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void)
+{
+	return SPECTRE_VULNERABLE;
+}
+#endif
+
 #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn);
 
@@ -36,13 +63,61 @@ static void __maybe_unused call_hvc_arch_workaround_1(void)
 	arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
 }
 
-static void cpu_v7_spectre_init(void)
+static unsigned int spectre_v2_install_workaround(unsigned int method)
 {
 	const char *spectre_v2_method = NULL;
 	int cpu = smp_processor_id();
 
 	if (per_cpu(harden_branch_predictor_fn, cpu))
-		return;
+		return SPECTRE_MITIGATED;
+
+	switch (method) {
+	case SPECTRE_V2_METHOD_BPIALL:
+		per_cpu(harden_branch_predictor_fn, cpu) =
+			harden_branch_predictor_bpiall;
+		spectre_v2_method = "BPIALL";
+		break;
+
+	case SPECTRE_V2_METHOD_ICIALLU:
+		per_cpu(harden_branch_predictor_fn, cpu) =
+			harden_branch_predictor_iciallu;
+		spectre_v2_method = "ICIALLU";
+		break;
+
+	case SPECTRE_V2_METHOD_HVC:
+		per_cpu(harden_branch_predictor_fn, cpu) =
+			call_hvc_arch_workaround_1;
+		cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
+		spectre_v2_method = "hypervisor";
+		break;
+
+	case SPECTRE_V2_METHOD_SMC:
+		per_cpu(harden_branch_predictor_fn, cpu) =
+			call_smc_arch_workaround_1;
+		cpu_do_switch_mm = cpu_v7_smc_switch_mm;
+		spectre_v2_method = "firmware";
+		break;
+	}
+
+	if (spectre_v2_method)
+		pr_info("CPU%u: Spectre v2: using %s workaround\n",
+			smp_processor_id(), spectre_v2_method);
+
+	return SPECTRE_MITIGATED;
+}
+#else
+static unsigned int spectre_v2_install_workaround(unsigned int method)
+{
+	pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n",
+		smp_processor_id());
+
+	return SPECTRE_VULNERABLE;
+}
+#endif
+
+static void cpu_v7_spectre_v2_init(void)
+{
+	unsigned int state, method = 0;
 
 	switch (read_cpuid_part()) {
 	case ARM_CPU_PART_CORTEX_A8:
@@ -51,69 +126,133 @@ static void cpu_v7_spectre_init(void)
 	case ARM_CPU_PART_CORTEX_A17:
 	case ARM_CPU_PART_CORTEX_A73:
 	case ARM_CPU_PART_CORTEX_A75:
-		per_cpu(harden_branch_predictor_fn, cpu) =
-			harden_branch_predictor_bpiall;
-		spectre_v2_method = "BPIALL";
+		state = SPECTRE_MITIGATED;
+		method = SPECTRE_V2_METHOD_BPIALL;
 		break;
 
 	case ARM_CPU_PART_CORTEX_A15:
 	case ARM_CPU_PART_BRAHMA_B15:
-		per_cpu(harden_branch_predictor_fn, cpu) =
-			harden_branch_predictor_iciallu;
-		spectre_v2_method = "ICIALLU";
+		state = SPECTRE_MITIGATED;
+		method = SPECTRE_V2_METHOD_ICIALLU;
 		break;
 
-#ifdef CONFIG_ARM_PSCI
 	case ARM_CPU_PART_BRAHMA_B53:
 		/* Requires no workaround */
+		state = SPECTRE_UNAFFECTED;
 		break;
+
 	default:
 		/* Other ARM CPUs require no workaround */
-		if (read_cpuid_implementor() == ARM_CPU_IMP_ARM)
+		if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) {
+			state = SPECTRE_UNAFFECTED;
 			break;
+		}
+
 		fallthrough;
-		/* Cortex A57/A72 require firmware workaround */
-	case ARM_CPU_PART_CORTEX_A57:
-	case ARM_CPU_PART_CORTEX_A72: {
-		struct arm_smccc_res res;
 
-		arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-				     ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-		if ((int)res.a0 != 0)
-			return;
+	/* Cortex A57/A72 require firmware workaround */
+	case ARM_CPU_PART_CORTEX_A57:
+	case ARM_CPU_PART_CORTEX_A72:
+		state = spectre_v2_get_cpu_fw_mitigation_state();
+		if (state != SPECTRE_MITIGATED)
+			break;
 
 		switch (arm_smccc_1_1_get_conduit()) {
 		case SMCCC_CONDUIT_HVC:
-			per_cpu(harden_branch_predictor_fn, cpu) =
-				call_hvc_arch_workaround_1;
-			cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
-			spectre_v2_method = "hypervisor";
+			method = SPECTRE_V2_METHOD_HVC;
 			break;
 
 		case SMCCC_CONDUIT_SMC:
-			per_cpu(harden_branch_predictor_fn, cpu) =
-				call_smc_arch_workaround_1;
-			cpu_do_switch_mm = cpu_v7_smc_switch_mm;
-			spectre_v2_method = "firmware";
+			method = SPECTRE_V2_METHOD_SMC;
 			break;
 
 		default:
+			state = SPECTRE_VULNERABLE;
 			break;
 		}
 	}
-#endif
+
+	if (state == SPECTRE_MITIGATED)
+		state = spectre_v2_install_workaround(method);
+
+	spectre_v2_update_state(state, method);
+}
+
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+static int spectre_bhb_method;
+
+static const char *spectre_bhb_method_name(int method)
+{
+	switch (method) {
+	case SPECTRE_V2_METHOD_LOOP8:
+		return "loop";
+
+	case SPECTRE_V2_METHOD_BPIALL:
+		return "BPIALL";
+
+	default:
+		return "unknown";
 	}
+}
 
-	if (spectre_v2_method)
-		pr_info("CPU%u: Spectre v2: using %s workaround\n",
-			smp_processor_id(), spectre_v2_method);
+static int spectre_bhb_install_workaround(int method)
+{
+	if (spectre_bhb_method != method) {
+		if (spectre_bhb_method) {
+			pr_err("CPU%u: Spectre BHB: method disagreement, system vulnerable\n",
+			       smp_processor_id());
+
+			return SPECTRE_VULNERABLE;
+		}
+
+		if (spectre_bhb_update_vectors(method) == SPECTRE_VULNERABLE)
+			return SPECTRE_VULNERABLE;
+
+		spectre_bhb_method = method;
+	}
+
+	pr_info("CPU%u: Spectre BHB: using %s workaround\n",
+		smp_processor_id(), spectre_bhb_method_name(method));
+
+	return SPECTRE_MITIGATED;
 }
 #else
-static void cpu_v7_spectre_init(void)
+static int spectre_bhb_install_workaround(int method)
 {
+	return SPECTRE_VULNERABLE;
 }
 #endif
 
+static void cpu_v7_spectre_bhb_init(void)
+{
+	unsigned int state, method = 0;
+
+	switch (read_cpuid_part()) {
+	case ARM_CPU_PART_CORTEX_A15:
+	case ARM_CPU_PART_BRAHMA_B15:
+	case ARM_CPU_PART_CORTEX_A57:
+	case ARM_CPU_PART_CORTEX_A72:
+		state = SPECTRE_MITIGATED;
+		method = SPECTRE_V2_METHOD_LOOP8;
+		break;
+
+	case ARM_CPU_PART_CORTEX_A73:
+	case ARM_CPU_PART_CORTEX_A75:
+		state = SPECTRE_MITIGATED;
+		method = SPECTRE_V2_METHOD_BPIALL;
+		break;
+
+	default:
+		state = SPECTRE_UNAFFECTED;
+		break;
+	}
+
+	if (state == SPECTRE_MITIGATED)
+		state = spectre_bhb_install_workaround(method);
+
+	spectre_v2_update_state(state, method);
+}
+
 static __maybe_unused bool cpu_v7_check_auxcr_set(bool *warned,
 						  u32 mask, const char *msg)
 {
@@ -142,16 +281,17 @@ static bool check_spectre_auxcr(bool *warned, u32 bit)
 void cpu_v7_ca8_ibe(void)
 {
 	if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6)))
-		cpu_v7_spectre_init();
+		cpu_v7_spectre_v2_init();
 }
 
 void cpu_v7_ca15_ibe(void)
 {
 	if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0)))
-		cpu_v7_spectre_init();
+		cpu_v7_spectre_v2_init();
 }
 
 void cpu_v7_bugs_init(void)
 {
-	cpu_v7_spectre_init();
+	cpu_v7_spectre_v2_init();
+	cpu_v7_spectre_bhb_init();
 }
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index 7c9e395b77f7..ec52b776f926 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -18,7 +18,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO32
 
 ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8
 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
-	    -z max-page-size=4096 -nostdlib -shared $(ldflags-y) \
+	    -z max-page-size=4096 -shared $(ldflags-y) \
 	    --hash-style=sysv --build-id=sha1 \
 	    -T
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 09b885cc4db5..a555f409ba95 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -10,6 +10,7 @@ config ARM64
 	select ACPI_SPCR_TABLE if ACPI
 	select ACPI_PPTT if ACPI
 	select ARCH_HAS_DEBUG_WX
+	select ARCH_BINFMT_ELF_EXTRA_PHDRS
 	select ARCH_BINFMT_ELF_STATE
 	select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
 	select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
@@ -891,13 +892,17 @@ config CAVIUM_ERRATUM_23144
 	  If unsure, say Y.
 
 config CAVIUM_ERRATUM_23154
-	bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
+	bool "Cavium errata 23154 and 38545: GICv3 lacks HW synchronisation"
 	default y
 	help
-	  The gicv3 of ThunderX requires a modified version for
+	  The ThunderX GICv3 implementation requires a modified version for
 	  reading the IAR status to ensure data synchronization
 	  (access to icc_iar1_el1 is not sync'ed before and after).
 
+	  It also suffers from erratum 38545 (also present on Marvell's
+	  OcteonTX and OcteonTX2), resulting in deactivated interrupts being
+	  spuriously presented to the CPU interface.
+
 	  If unsure, say Y.
 
 config CAVIUM_ERRATUM_27456
@@ -1252,9 +1257,6 @@ config HW_PERF_EVENTS
 	def_bool y
 	depends on ARM_PMU
 
-config ARCH_HAS_FILTER_PGPROT
-	def_bool y
-
 # Supported by clang >= 7.0
 config CC_HAVE_SHADOW_CALL_STACK
 	def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
@@ -1383,6 +1385,15 @@ config UNMAP_KERNEL_AT_EL0
 
 	  If unsure, say Y.
 
+config MITIGATE_SPECTRE_BRANCH_HISTORY
+	bool "Mitigate Spectre style attacks against branch history" if EXPERT
+	default y
+	help
+	  Speculation attacks against some high-performance processors can
+	  make use of branch history to influence future speculation.
+	  When taking an exception from user-space, a sequence of branches
+	  or a firmware call overwrites the branch history.
+
 config RODATA_FULL_DEFAULT_ENABLED
 	bool "Apply r/o permissions of VM areas also to their linear aliases"
 	default y
diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi
index 19afbc91020a..9f8f4145db88 100644
--- a/arch/arm64/boot/dts/apple/t8103.dtsi
+++ b/arch/arm64/boot/dts/apple/t8103.dtsi
@@ -97,6 +97,18 @@
 			     <AIC_FIQ AIC_TMR_HV_VIRT IRQ_TYPE_LEVEL_HIGH>;
 	};
 
+	pmu-e {
+		compatible = "apple,icestorm-pmu";
+		interrupt-parent = <&aic>;
+		interrupts = <AIC_FIQ AIC_CPU_PMU_E IRQ_TYPE_LEVEL_HIGH>;
+	};
+
+	pmu-p {
+		compatible = "apple,firestorm-pmu";
+		interrupt-parent = <&aic>;
+		interrupts = <AIC_FIQ AIC_CPU_PMU_P IRQ_TYPE_LEVEL_HIGH>;
+	};
+
 	clkref: clock-ref {
 		compatible = "fixed-clock";
 		#clock-cells = <0>;
@@ -213,6 +225,18 @@
 			interrupt-controller;
 			reg = <0x2 0x3b100000 0x0 0x8000>;
 			power-domains = <&ps_aic>;
+
+			affinities {
+				e-core-pmu-affinity {
+					apple,fiq-index = <AIC_CPU_PMU_E>;
+					cpus = <&cpu0 &cpu1 &cpu2 &cpu3>;
+				};
+
+				p-core-pmu-affinity {
+					apple,fiq-index = <AIC_CPU_PMU_P>;
+					cpus = <&cpu4 &cpu5 &cpu6 &cpu7>;
+				};
+			};
 		};
 
 		pmgr: power-management@23b700000 {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
index 3ed1f2c51cad..18e529118476 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
@@ -253,18 +253,18 @@
 				interrupt-controller;
 				reg = <0x14 4>;
 				interrupt-map =
-					<0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
-					<1 0 &gic 0 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
-					<2 0 &gic 0 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
-					<3 0 &gic 0 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
-					<4 0 &gic 0 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
-					<5 0 &gic 0 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
-					<6 0 &gic 0 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
-					<7 0 &gic 0 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
-					<8 0 &gic 0 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
-					<9 0 &gic 0 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
-					<10 0 &gic 0 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
-					<11 0 &gic 0 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+					<0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+					<1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+					<2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+					<3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+					<4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+					<5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
+					<6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
+					<7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
+					<8 0 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+					<9 0 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
+					<10 0 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
+					<11 0 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
 				interrupt-map-mask = <0xffffffff 0x0>;
 			};
 		};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
index 3cb9c21d2775..1282b61da8a5 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
@@ -293,18 +293,18 @@
 				interrupt-controller;
 				reg = <0x14 4>;
 				interrupt-map =
-					<0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
-					<1 0 &gic 0 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
-					<2 0 &gic 0 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
-					<3 0 &gic 0 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
-					<4 0 &gic 0 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
-					<5 0 &gic 0 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
-					<6 0 &gic 0 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
-					<7 0 &gic 0 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
-					<8 0 &gic 0 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
-					<9 0 &gic 0 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
-					<10 0 &gic 0 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
-					<11 0 &gic 0 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+					<0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+					<1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+					<2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+					<3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+					<4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+					<5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
+					<6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
+					<7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
+					<8 0 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+					<9 0 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
+					<10 0 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
+					<11 0 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
 				interrupt-map-mask = <0xffffffff 0x0>;
 			};
 		};
diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
index 7032505f5ef3..3c611cb4f5fe 100644
--- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
@@ -680,18 +680,18 @@
 				interrupt-controller;
 				reg = <0x14 4>;
 				interrupt-map =
-					<0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
-					<1 0 &gic 0 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
-					<2 0 &gic 0 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
-					<3 0 &gic 0 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
-					<4 0 &gic 0 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
-					<5 0 &gic 0 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
-					<6 0 &gic 0 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
-					<7 0 &gic 0 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
-					<8 0 &gic 0 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
-					<9 0 &gic 0 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
-					<10 0 &gic 0 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
-					<11 0 &gic 0 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+					<0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+					<1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+					<2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+					<3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+					<4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+					<5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
+					<6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
+					<7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
+					<8 0 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+					<9 0 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
+					<10 0 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
+					<11 0 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
 				interrupt-map-mask = <0xffffffff 0x0>;
 			};
 		};
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
index 04da07ae4420..1cee26479bfe 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
+++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
@@ -18,6 +18,7 @@
 
 	aliases {
 		spi0 = &spi0;
+		ethernet0 = &eth0;
 		ethernet1 = &eth1;
 		mmc0 = &sdhci0;
 		mmc1 = &sdhci1;
@@ -138,7 +139,9 @@
 	/*
 	 * U-Boot port for Turris Mox has a bug which always expects that "ranges" DT property
 	 * contains exactly 2 ranges with 3 (child) address cells, 2 (parent) address cells and
-	 * 2 size cells and also expects that the second range starts at 16 MB offset. If these
+	 * 2 size cells and also expects that the second range starts at 16 MB offset. Also it
+	 * expects that first range uses same address for PCI (child) and CPU (parent) cells (so
+	 * no remapping) and that this address is the lowest from all specified ranges. If these
 	 * conditions are not met then U-Boot crashes during loading kernel DTB file. PCIe address
 	 * space is 128 MB long, so the best split between MEM and IO is to use fixed 16 MB window
 	 * for IO and the rest 112 MB (64+32+16) for MEM, despite that maximal IO size is just 64 kB.
@@ -147,6 +150,9 @@
 	 * https://source.denx.de/u-boot/u-boot/-/commit/cb2ddb291ee6fcbddd6d8f4ff49089dfe580f5d7
 	 * https://source.denx.de/u-boot/u-boot/-/commit/c64ac3b3185aeb3846297ad7391fc6df8ecd73bf
 	 * https://source.denx.de/u-boot/u-boot/-/commit/4a82fca8e330157081fc132a591ebd99ba02ee33
+	 * Bug related to requirement of same child and parent addresses for first range is fixed
+	 * in U-Boot version 2022.04 by following commit:
+	 * https://source.denx.de/u-boot/u-boot/-/commit/1fd54253bca7d43d046bba4853fe5fafd034bc17
 	 */
 	#address-cells = <3>;
 	#size-cells = <2>;
diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
index 673f4906eef9..fb78ef613b29 100644
--- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
@@ -499,7 +499,7 @@
 			 * (totaling 127 MiB) for MEM.
 			 */
 			ranges = <0x82000000 0 0xe8000000   0 0xe8000000   0 0x07f00000   /* Port 0 MEM */
-				  0x81000000 0 0xefff0000   0 0xefff0000   0 0x00010000>; /* Port 0 IO */
+				  0x81000000 0 0x00000000   0 0xefff0000   0 0x00010000>; /* Port 0 IO */
 			interrupt-map-mask = <0 0 0 7>;
 			interrupt-map = <0 0 0 1 &pcie_intc 0>,
 					<0 0 0 2 &pcie_intc 1>,
diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
index 2d48c3715fc6..aaa00da5351d 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -1584,7 +1584,7 @@
 			#iommu-cells = <1>;
 
 			nvidia,memory-controller = <&mc>;
-			status = "okay";
+			status = "disabled";
 		};
 
 		smmu: iommu@12000000 {
diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
index 58845a14805f..e2b9ec134cb1 100644
--- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
+++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
@@ -807,3 +807,8 @@
 
 	qcom,snoc-host-cap-8bit-quirk;
 };
+
+&crypto {
+	/* FIXME: qce_start triggers an SError */
+	status= "disable";
+};
diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi
index 53b39e718fb6..4b19744bcfb3 100644
--- a/arch/arm64/boot/dts/qcom/sm8350.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi
@@ -35,6 +35,24 @@
 			clock-frequency = <32000>;
 			#clock-cells = <0>;
 		};
+
+		ufs_phy_rx_symbol_0_clk: ufs-phy-rx-symbol-0 {
+			compatible = "fixed-clock";
+			clock-frequency = <1000>;
+			#clock-cells = <0>;
+		};
+
+		ufs_phy_rx_symbol_1_clk: ufs-phy-rx-symbol-1 {
+			compatible = "fixed-clock";
+			clock-frequency = <1000>;
+			#clock-cells = <0>;
+		};
+
+		ufs_phy_tx_symbol_0_clk: ufs-phy-tx-symbol-0 {
+			compatible = "fixed-clock";
+			clock-frequency = <1000>;
+			#clock-cells = <0>;
+		};
 	};
 
 	cpus {
@@ -603,9 +621,9 @@
 				 <0>,
 				 <0>,
 				 <0>,
-				 <0>,
-				 <0>,
-				 <0>,
+				 <&ufs_phy_rx_symbol_0_clk>,
+				 <&ufs_phy_rx_symbol_1_clk>,
+				 <&ufs_phy_tx_symbol_0_clk>,
 				 <0>,
 				 <0>;
 		};
@@ -1923,8 +1941,8 @@
 				<75000000 300000000>,
 				<0 0>,
 				<0 0>,
-				<75000000 300000000>,
-				<75000000 300000000>;
+				<0 0>,
+				<0 0>;
 			status = "disabled";
 		};
 
diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi
index 10c25ad2d0c7..02b97e838c47 100644
--- a/arch/arm64/boot/dts/qcom/sm8450.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi
@@ -726,7 +726,7 @@
 			compatible = "qcom,sm8450-smmu-500", "arm,mmu-500";
 			reg = <0 0x15000000 0 0x100000>;
 			#iommu-cells = <2>;
-			#global-interrupts = <2>;
+			#global-interrupts = <1>;
 			interrupts =    <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>,
 					<GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>,
 					<GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>,
@@ -813,6 +813,7 @@
 					<GIC_SPI 412 IRQ_TYPE_LEVEL_HIGH>,
 					<GIC_SPI 421 IRQ_TYPE_LEVEL_HIGH>,
 					<GIC_SPI 707 IRQ_TYPE_LEVEL_HIGH>,
+					<GIC_SPI 423 IRQ_TYPE_LEVEL_HIGH>,
 					<GIC_SPI 424 IRQ_TYPE_LEVEL_HIGH>,
 					<GIC_SPI 425 IRQ_TYPE_LEVEL_HIGH>,
 					<GIC_SPI 690 IRQ_TYPE_LEVEL_HIGH>,
@@ -1072,9 +1073,10 @@
 				 <&gcc GCC_USB30_PRIM_MASTER_CLK>,
 				 <&gcc GCC_AGGRE_USB3_PRIM_AXI_CLK>,
 				 <&gcc GCC_USB30_PRIM_MOCK_UTMI_CLK>,
-				 <&gcc GCC_USB30_PRIM_SLEEP_CLK>;
+				 <&gcc GCC_USB30_PRIM_SLEEP_CLK>,
+				 <&gcc GCC_USB3_0_CLKREF_EN>;
 			clock-names = "cfg_noc", "core", "iface", "mock_utmi",
-				      "sleep";
+				      "sleep", "xo";
 
 			assigned-clocks = <&gcc GCC_USB30_PRIM_MOCK_UTMI_CLK>,
 					  <&gcc GCC_USB30_PRIM_MASTER_CLK>;
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index addfa413650b..2a965aa0188d 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -45,7 +45,7 @@ config CRYPTO_SM3_ARM64_CE
 	tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
-	select CRYPTO_SM3
+	select CRYPTO_LIB_SM3
 
 config CRYPTO_SM4_ARM64_CE
 	tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 30b7cc6a7079..561dd2332571 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -24,7 +24,6 @@
 #ifdef USE_V8_CRYPTO_EXTENSIONS
 #define MODE			"ce"
 #define PRIO			300
-#define STRIDE			5
 #define aes_expandkey		ce_aes_expandkey
 #define aes_ecb_encrypt		ce_aes_ecb_encrypt
 #define aes_ecb_decrypt		ce_aes_ecb_decrypt
@@ -42,7 +41,6 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
 #else
 #define MODE			"neon"
 #define PRIO			200
-#define STRIDE			4
 #define aes_ecb_encrypt		neon_aes_ecb_encrypt
 #define aes_ecb_decrypt		neon_aes_ecb_decrypt
 #define aes_cbc_encrypt		neon_aes_cbc_encrypt
@@ -89,7 +87,7 @@ asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
 				int rounds, int bytes, u8 const iv[]);
 
 asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
-				int rounds, int bytes, u8 ctr[], u8 finalbuf[]);
+				int rounds, int bytes, u8 ctr[]);
 
 asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
 				int rounds, int bytes, u32 const rk2[], u8 iv[],
@@ -458,26 +456,21 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
 		unsigned int nbytes = walk.nbytes;
 		u8 *dst = walk.dst.virt.addr;
 		u8 buf[AES_BLOCK_SIZE];
-		unsigned int tail;
 
 		if (unlikely(nbytes < AES_BLOCK_SIZE))
-			src = memcpy(buf, src, nbytes);
+			src = dst = memcpy(buf + sizeof(buf) - nbytes,
+					   src, nbytes);
 		else if (nbytes < walk.total)
 			nbytes &= ~(AES_BLOCK_SIZE - 1);
 
 		kernel_neon_begin();
 		aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
-				walk.iv, buf);
+				walk.iv);
 		kernel_neon_end();
 
-		tail = nbytes % (STRIDE * AES_BLOCK_SIZE);
-		if (tail > 0 && tail < AES_BLOCK_SIZE)
-			/*
-			 * The final partial block could not be returned using
-			 * an overlapping store, so it was passed via buf[]
-			 * instead.
-			 */
-			memcpy(dst + nbytes - tail, buf, tail);
+		if (unlikely(nbytes < AES_BLOCK_SIZE))
+			memcpy(walk.dst.virt.addr,
+			       buf + sizeof(buf) - nbytes, nbytes);
 
 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
@@ -983,6 +976,7 @@ module_cpu_feature_match(AES, aes_init);
 module_init(aes_init);
 EXPORT_SYMBOL(neon_aes_ecb_encrypt);
 EXPORT_SYMBOL(neon_aes_cbc_encrypt);
+EXPORT_SYMBOL(neon_aes_ctr_encrypt);
 EXPORT_SYMBOL(neon_aes_xts_encrypt);
 EXPORT_SYMBOL(neon_aes_xts_decrypt);
 #endif
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index ff01f0167ba2..dc35eb0245c5 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -321,7 +321,7 @@ AES_FUNC_END(aes_cbc_cts_decrypt)
 
 	/*
 	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int bytes, u8 ctr[], u8 finalbuf[])
+	 *		   int bytes, u8 ctr[])
 	 */
 
 AES_FUNC_START(aes_ctr_encrypt)
@@ -414,8 +414,8 @@ ST5(	st1		{v4.16b}, [x0], #16		)
 .Lctrtail:
 	/* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */
 	mov		x16, #16
-	ands		x13, x4, #0xf
-	csel		x13, x13, x16, ne
+	ands		x6, x4, #0xf
+	csel		x13, x6, x16, ne
 
 ST5(	cmp		w4, #64 - (MAX_STRIDE << 4)	)
 ST5(	csel		x14, x16, xzr, gt		)
@@ -424,10 +424,10 @@ ST5(	csel		x14, x16, xzr, gt		)
 	cmp		w4, #32 - (MAX_STRIDE << 4)
 	csel		x16, x16, xzr, gt
 	cmp		w4, #16 - (MAX_STRIDE << 4)
-	ble		.Lctrtail1x
 
 	adr_l		x12, .Lcts_permute_table
 	add		x12, x12, x13
+	ble		.Lctrtail1x
 
 ST5(	ld1		{v5.16b}, [x1], x14		)
 	ld1		{v6.16b}, [x1], x15
@@ -462,11 +462,19 @@ ST5(	st1		{v5.16b}, [x0], x14		)
 	b		.Lctrout
 
 .Lctrtail1x:
-	csel		x0, x0, x6, eq		// use finalbuf if less than a full block
+	sub		x7, x6, #16
+	csel		x6, x6, x7, eq
+	add		x1, x1, x6
+	add		x0, x0, x6
 	ld1		{v5.16b}, [x1]
+	ld1		{v6.16b}, [x0]
 ST5(	mov		v3.16b, v4.16b			)
 	encrypt_block	v3, w3, x2, x8, w7
+	ld1		{v10.16b-v11.16b}, [x12]
+	tbl		v3.16b, {v3.16b}, v10.16b
+	sshr		v11.16b, v11.16b, #7
 	eor		v5.16b, v5.16b, v3.16b
+	bif		v5.16b, v6.16b, v11.16b
 	st1		{v5.16b}, [x0]
 	b		.Lctrout
 AES_FUNC_END(aes_ctr_encrypt)
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index a3405b8c344b..d427f4556b6e 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -735,119 +735,67 @@ SYM_FUNC_END(aesbs_cbc_decrypt)
 	 *		     int blocks, u8 iv[])
 	 */
 SYM_FUNC_START_LOCAL(__xts_crypt8)
-	mov		x6, #1
-	lsl		x6, x6, x23
-	subs		w23, w23, #8
-	csel		x23, x23, xzr, pl
-	csel		x6, x6, xzr, mi
+	movi		v18.2s, #0x1
+	movi		v19.2s, #0x87
+	uzp1		v18.4s, v18.4s, v19.4s
+
+	ld1		{v0.16b-v3.16b}, [x1], #64
+	ld1		{v4.16b-v7.16b}, [x1], #64
+
+	next_tweak	v26, v25, v18, v19
+	next_tweak	v27, v26, v18, v19
+	next_tweak	v28, v27, v18, v19
+	next_tweak	v29, v28, v18, v19
+	next_tweak	v30, v29, v18, v19
+	next_tweak	v31, v30, v18, v19
+	next_tweak	v16, v31, v18, v19
+	next_tweak	v17, v16, v18, v19
 
-	ld1		{v0.16b}, [x20], #16
-	next_tweak	v26, v25, v30, v31
 	eor		v0.16b, v0.16b, v25.16b
-	tbnz		x6, #1, 0f
-
-	ld1		{v1.16b}, [x20], #16
-	next_tweak	v27, v26, v30, v31
 	eor		v1.16b, v1.16b, v26.16b
-	tbnz		x6, #2, 0f
-
-	ld1		{v2.16b}, [x20], #16
-	next_tweak	v28, v27, v30, v31
 	eor		v2.16b, v2.16b, v27.16b
-	tbnz		x6, #3, 0f
-
-	ld1		{v3.16b}, [x20], #16
-	next_tweak	v29, v28, v30, v31
 	eor		v3.16b, v3.16b, v28.16b
-	tbnz		x6, #4, 0f
-
-	ld1		{v4.16b}, [x20], #16
-	str		q29, [sp, #.Lframe_local_offset]
 	eor		v4.16b, v4.16b, v29.16b
-	next_tweak	v29, v29, v30, v31
-	tbnz		x6, #5, 0f
-
-	ld1		{v5.16b}, [x20], #16
-	str		q29, [sp, #.Lframe_local_offset + 16]
-	eor		v5.16b, v5.16b, v29.16b
-	next_tweak	v29, v29, v30, v31
-	tbnz		x6, #6, 0f
-
-	ld1		{v6.16b}, [x20], #16
-	str		q29, [sp, #.Lframe_local_offset + 32]
-	eor		v6.16b, v6.16b, v29.16b
-	next_tweak	v29, v29, v30, v31
-	tbnz		x6, #7, 0f
+	eor		v5.16b, v5.16b, v30.16b
+	eor		v6.16b, v6.16b, v31.16b
+	eor		v7.16b, v7.16b, v16.16b
 
-	ld1		{v7.16b}, [x20], #16
-	str		q29, [sp, #.Lframe_local_offset + 48]
-	eor		v7.16b, v7.16b, v29.16b
-	next_tweak	v29, v29, v30, v31
+	stp		q16, q17, [sp, #16]
 
-0:	mov		bskey, x21
-	mov		rounds, x22
+	mov		bskey, x2
+	mov		rounds, x3
 	br		x16
 SYM_FUNC_END(__xts_crypt8)
 
 	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
-	frame_push	6, 64
-
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-	mov		x24, x5
+	stp		x29, x30, [sp, #-48]!
+	mov		x29, sp
 
-	movi		v30.2s, #0x1
-	movi		v25.2s, #0x87
-	uzp1		v30.4s, v30.4s, v25.4s
-	ld1		{v25.16b}, [x24]
+	ld1		{v25.16b}, [x5]
 
-99:	adr		x16, \do8
+0:	adr		x16, \do8
 	bl		__xts_crypt8
 
-	ldp		q16, q17, [sp, #.Lframe_local_offset]
-	ldp		q18, q19, [sp, #.Lframe_local_offset + 32]
+	eor		v16.16b, \o0\().16b, v25.16b
+	eor		v17.16b, \o1\().16b, v26.16b
+	eor		v18.16b, \o2\().16b, v27.16b
+	eor		v19.16b, \o3\().16b, v28.16b
 
-	eor		\o0\().16b, \o0\().16b, v25.16b
-	eor		\o1\().16b, \o1\().16b, v26.16b
-	eor		\o2\().16b, \o2\().16b, v27.16b
-	eor		\o3\().16b, \o3\().16b, v28.16b
+	ldp		q24, q25, [sp, #16]
 
-	st1		{\o0\().16b}, [x19], #16
-	mov		v25.16b, v26.16b
-	tbnz		x6, #1, 1f
-	st1		{\o1\().16b}, [x19], #16
-	mov		v25.16b, v27.16b
-	tbnz		x6, #2, 1f
-	st1		{\o2\().16b}, [x19], #16
-	mov		v25.16b, v28.16b
-	tbnz		x6, #3, 1f
-	st1		{\o3\().16b}, [x19], #16
-	mov		v25.16b, v29.16b
-	tbnz		x6, #4, 1f
+	eor		v20.16b, \o4\().16b, v29.16b
+	eor		v21.16b, \o5\().16b, v30.16b
+	eor		v22.16b, \o6\().16b, v31.16b
+	eor		v23.16b, \o7\().16b, v24.16b
 
-	eor		\o4\().16b, \o4\().16b, v16.16b
-	eor		\o5\().16b, \o5\().16b, v17.16b
-	eor		\o6\().16b, \o6\().16b, v18.16b
-	eor		\o7\().16b, \o7\().16b, v19.16b
+	st1		{v16.16b-v19.16b}, [x0], #64
+	st1		{v20.16b-v23.16b}, [x0], #64
 
-	st1		{\o4\().16b}, [x19], #16
-	tbnz		x6, #5, 1f
-	st1		{\o5\().16b}, [x19], #16
-	tbnz		x6, #6, 1f
-	st1		{\o6\().16b}, [x19], #16
-	tbnz		x6, #7, 1f
-	st1		{\o7\().16b}, [x19], #16
+	subs		x4, x4, #8
+	b.gt		0b
 
-	cbz		x23, 1f
-	st1		{v25.16b}, [x24]
-
-	b		99b
-
-1:	st1		{v25.16b}, [x24]
-	frame_pop
+	st1		{v25.16b}, [x5]
+	ldp		x29, x30, [sp], #48
 	ret
 	.endm
 
@@ -869,133 +817,51 @@ SYM_FUNC_END(aesbs_xts_decrypt)
 
 	/*
 	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-	 *		     int rounds, int blocks, u8 iv[], u8 final[])
+	 *		     int rounds, int blocks, u8 iv[])
 	 */
 SYM_FUNC_START(aesbs_ctr_encrypt)
-	frame_push	8
-
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-	mov		x24, x5
-	mov		x25, x6
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
-	cmp		x25, #0
-	cset		x26, ne
-	add		x23, x23, x26		// do one extra block if final
-
-	ldp		x7, x8, [x24]
-	ld1		{v0.16b}, [x24]
+	ldp		x7, x8, [x5]
+	ld1		{v0.16b}, [x5]
 CPU_LE(	rev		x7, x7		)
 CPU_LE(	rev		x8, x8		)
 	adds		x8, x8, #1
 	adc		x7, x7, xzr
 
-99:	mov		x9, #1
-	lsl		x9, x9, x23
-	subs		w23, w23, #8
-	csel		x23, x23, xzr, pl
-	csel		x9, x9, xzr, le
-
-	tbnz		x9, #1, 0f
-	next_ctr	v1
-	tbnz		x9, #2, 0f
+0:	next_ctr	v1
 	next_ctr	v2
-	tbnz		x9, #3, 0f
 	next_ctr	v3
-	tbnz		x9, #4, 0f
 	next_ctr	v4
-	tbnz		x9, #5, 0f
 	next_ctr	v5
-	tbnz		x9, #6, 0f
 	next_ctr	v6
-	tbnz		x9, #7, 0f
 	next_ctr	v7
 
-0:	mov		bskey, x21
-	mov		rounds, x22
+	mov		bskey, x2
+	mov		rounds, x3
 	bl		aesbs_encrypt8
 
-	lsr		x9, x9, x26		// disregard the extra block
-	tbnz		x9, #0, 0f
-
-	ld1		{v8.16b}, [x20], #16
-	eor		v0.16b, v0.16b, v8.16b
-	st1		{v0.16b}, [x19], #16
-	tbnz		x9, #1, 1f
+	ld1		{ v8.16b-v11.16b}, [x1], #64
+	ld1		{v12.16b-v15.16b}, [x1], #64
 
-	ld1		{v9.16b}, [x20], #16
-	eor		v1.16b, v1.16b, v9.16b
-	st1		{v1.16b}, [x19], #16
-	tbnz		x9, #2, 2f
+	eor		v8.16b, v0.16b, v8.16b
+	eor		v9.16b, v1.16b, v9.16b
+	eor		v10.16b, v4.16b, v10.16b
+	eor		v11.16b, v6.16b, v11.16b
+	eor		v12.16b, v3.16b, v12.16b
+	eor		v13.16b, v7.16b, v13.16b
+	eor		v14.16b, v2.16b, v14.16b
+	eor		v15.16b, v5.16b, v15.16b
 
-	ld1		{v10.16b}, [x20], #16
-	eor		v4.16b, v4.16b, v10.16b
-	st1		{v4.16b}, [x19], #16
-	tbnz		x9, #3, 3f
+	st1		{ v8.16b-v11.16b}, [x0], #64
+	st1		{v12.16b-v15.16b}, [x0], #64
 
-	ld1		{v11.16b}, [x20], #16
-	eor		v6.16b, v6.16b, v11.16b
-	st1		{v6.16b}, [x19], #16
-	tbnz		x9, #4, 4f
-
-	ld1		{v12.16b}, [x20], #16
-	eor		v3.16b, v3.16b, v12.16b
-	st1		{v3.16b}, [x19], #16
-	tbnz		x9, #5, 5f
-
-	ld1		{v13.16b}, [x20], #16
-	eor		v7.16b, v7.16b, v13.16b
-	st1		{v7.16b}, [x19], #16
-	tbnz		x9, #6, 6f
+	next_ctr	v0
+	subs		x4, x4, #8
+	b.gt		0b
 
-	ld1		{v14.16b}, [x20], #16
-	eor		v2.16b, v2.16b, v14.16b
-	st1		{v2.16b}, [x19], #16
-	tbnz		x9, #7, 7f
-
-	ld1		{v15.16b}, [x20], #16
-	eor		v5.16b, v5.16b, v15.16b
-	st1		{v5.16b}, [x19], #16
-
-8:	next_ctr	v0
-	st1		{v0.16b}, [x24]
-	cbz		x23, .Lctr_done
-
-	b		99b
-
-.Lctr_done:
-	frame_pop
+	st1		{v0.16b}, [x5]
+	ldp		x29, x30, [sp], #16
 	ret
-
-	/*
-	 * If we are handling the tail of the input (x6 != NULL), return the
-	 * final keystream block back to the caller.
-	 */
-0:	cbz		x25, 8b
-	st1		{v0.16b}, [x25]
-	b		8b
-1:	cbz		x25, 8b
-	st1		{v1.16b}, [x25]
-	b		8b
-2:	cbz		x25, 8b
-	st1		{v4.16b}, [x25]
-	b		8b
-3:	cbz		x25, 8b
-	st1		{v6.16b}, [x25]
-	b		8b
-4:	cbz		x25, 8b
-	st1		{v3.16b}, [x25]
-	b		8b
-5:	cbz		x25, 8b
-	st1		{v7.16b}, [x25]
-	b		8b
-6:	cbz		x25, 8b
-	st1		{v2.16b}, [x25]
-	b		8b
-7:	cbz		x25, 8b
-	st1		{v5.16b}, [x25]
-	b		8b
 SYM_FUNC_END(aesbs_ctr_encrypt)
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index 8df6ad8cb09d..bac4cabef607 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -34,7 +34,7 @@ asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[]);
 
 asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				  int rounds, int blocks, u8 iv[], u8 final[]);
+				  int rounds, int blocks, u8 iv[]);
 
 asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[]);
@@ -46,6 +46,8 @@ asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
 				     int rounds, int blocks);
 asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
 				     int rounds, int blocks, u8 iv[]);
+asmlinkage void neon_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
+				     int rounds, int bytes, u8 ctr[]);
 asmlinkage void neon_aes_xts_encrypt(u8 out[], u8 const in[],
 				     u32 const rk1[], int rounds, int bytes,
 				     u32 const rk2[], u8 iv[], int first);
@@ -58,7 +60,7 @@ struct aesbs_ctx {
 	int	rounds;
 } __aligned(AES_BLOCK_SIZE);
 
-struct aesbs_cbc_ctx {
+struct aesbs_cbc_ctr_ctx {
 	struct aesbs_ctx	key;
 	u32			enc[AES_MAX_KEYLENGTH_U32];
 };
@@ -128,10 +130,10 @@ static int ecb_decrypt(struct skcipher_request *req)
 	return __ecb_crypt(req, aesbs_ecb_decrypt);
 }
 
-static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int key_len)
 {
-	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_aes_ctx rk;
 	int err;
 
@@ -154,7 +156,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 static int cbc_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_walk walk;
 	int err;
 
@@ -177,7 +179,7 @@ static int cbc_encrypt(struct skcipher_request *req)
 static int cbc_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_walk walk;
 	int err;
 
@@ -205,40 +207,32 @@ static int cbc_decrypt(struct skcipher_request *req)
 static int ctr_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_walk walk;
-	u8 buf[AES_BLOCK_SIZE];
 	int err;
 
 	err = skcipher_walk_virt(&walk, req, false);
 
 	while (walk.nbytes > 0) {
-		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
-		u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
-
-		if (walk.nbytes < walk.total) {
-			blocks = round_down(blocks,
-					    walk.stride / AES_BLOCK_SIZE);
-			final = NULL;
-		}
+		int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7;
+		int nbytes = walk.nbytes % (8 * AES_BLOCK_SIZE);
+		const u8 *src = walk.src.virt.addr;
+		u8 *dst = walk.dst.virt.addr;
 
 		kernel_neon_begin();
-		aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				  ctx->rk, ctx->rounds, blocks, walk.iv, final);
-		kernel_neon_end();
-
-		if (final) {
-			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
-			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
-
-			crypto_xor_cpy(dst, src, final,
-				       walk.total % AES_BLOCK_SIZE);
-
-			err = skcipher_walk_done(&walk, 0);
-			break;
+		if (blocks >= 8) {
+			aesbs_ctr_encrypt(dst, src, ctx->key.rk, ctx->key.rounds,
+					  blocks, walk.iv);
+			dst += blocks * AES_BLOCK_SIZE;
+			src += blocks * AES_BLOCK_SIZE;
 		}
-		err = skcipher_walk_done(&walk,
-					 walk.nbytes - blocks * AES_BLOCK_SIZE);
+		if (nbytes && walk.nbytes == walk.total) {
+			neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds,
+					     nbytes, walk.iv);
+			nbytes = 0;
+		}
+		kernel_neon_end();
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	return err;
 }
@@ -308,23 +302,18 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
 		return err;
 
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
-		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
-
-		if (walk.nbytes < walk.total || walk.nbytes % AES_BLOCK_SIZE)
-			blocks = round_down(blocks,
-					    walk.stride / AES_BLOCK_SIZE);
-
+		int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7;
 		out = walk.dst.virt.addr;
 		in = walk.src.virt.addr;
 		nbytes = walk.nbytes;
 
 		kernel_neon_begin();
-		if (likely(blocks > 6)) { /* plain NEON is faster otherwise */
-			if (first)
+		if (blocks >= 8) {
+			if (first == 1)
 				neon_aes_ecb_encrypt(walk.iv, walk.iv,
 						     ctx->twkey,
 						     ctx->key.rounds, 1);
-			first = 0;
+			first = 2;
 
 			fn(out, in, ctx->key.rk, ctx->key.rounds, blocks,
 			   walk.iv);
@@ -333,10 +322,17 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
 			in += blocks * AES_BLOCK_SIZE;
 			nbytes -= blocks * AES_BLOCK_SIZE;
 		}
-
-		if (walk.nbytes == walk.total && nbytes > 0)
-			goto xts_tail;
-
+		if (walk.nbytes == walk.total && nbytes > 0) {
+			if (encrypt)
+				neon_aes_xts_encrypt(out, in, ctx->cts.key_enc,
+						     ctx->key.rounds, nbytes,
+						     ctx->twkey, walk.iv, first);
+			else
+				neon_aes_xts_decrypt(out, in, ctx->cts.key_dec,
+						     ctx->key.rounds, nbytes,
+						     ctx->twkey, walk.iv, first);
+			nbytes = first = 0;
+		}
 		kernel_neon_end();
 		err = skcipher_walk_done(&walk, nbytes);
 	}
@@ -361,13 +357,12 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
 	nbytes = walk.nbytes;
 
 	kernel_neon_begin();
-xts_tail:
 	if (encrypt)
 		neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds,
-				     nbytes, ctx->twkey, walk.iv, first ?: 2);
+				     nbytes, ctx->twkey, walk.iv, first);
 	else
 		neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds,
-				     nbytes, ctx->twkey, walk.iv, first ?: 2);
+				     nbytes, ctx->twkey, walk.iv, first);
 	kernel_neon_end();
 
 	return skcipher_walk_done(&walk, 0);
@@ -402,14 +397,14 @@ static struct skcipher_alg aes_algs[] = { {
 	.base.cra_driver_name	= "cbc-aes-neonbs",
 	.base.cra_priority	= 250,
 	.base.cra_blocksize	= AES_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct aesbs_cbc_ctx),
+	.base.cra_ctxsize	= sizeof(struct aesbs_cbc_ctr_ctx),
 	.base.cra_module	= THIS_MODULE,
 
 	.min_keysize		= AES_MIN_KEY_SIZE,
 	.max_keysize		= AES_MAX_KEY_SIZE,
 	.walksize		= 8 * AES_BLOCK_SIZE,
 	.ivsize			= AES_BLOCK_SIZE,
-	.setkey			= aesbs_cbc_setkey,
+	.setkey			= aesbs_cbc_ctr_setkey,
 	.encrypt		= cbc_encrypt,
 	.decrypt		= cbc_decrypt,
 }, {
@@ -417,7 +412,7 @@ static struct skcipher_alg aes_algs[] = { {
 	.base.cra_driver_name	= "ctr-aes-neonbs",
 	.base.cra_priority	= 250,
 	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct aesbs_ctx),
+	.base.cra_ctxsize	= sizeof(struct aesbs_cbc_ctr_ctx),
 	.base.cra_module	= THIS_MODULE,
 
 	.min_keysize		= AES_MIN_KEY_SIZE,
@@ -425,7 +420,7 @@ static struct skcipher_alg aes_algs[] = { {
 	.chunksize		= AES_BLOCK_SIZE,
 	.walksize		= 8 * AES_BLOCK_SIZE,
 	.ivsize			= AES_BLOCK_SIZE,
-	.setkey			= aesbs_setkey,
+	.setkey			= aesbs_cbc_ctr_setkey,
 	.encrypt		= ctr_encrypt,
 	.decrypt		= ctr_encrypt,
 }, {
diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
index 8c65cecf560a..250e1377c481 100644
--- a/arch/arm64/crypto/sha3-ce-glue.c
+++ b/arch/arm64/crypto/sha3-ce-glue.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+// SPDX-License-Identifier: GPL-2.0
 /*
  * sha3-ce-glue.c - core SHA-3 transform using v8.2 Crypto Extensions
  *
diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/crypto/sha512-armv8.pl
index 2d8655d5b1af..35ec9ae99fe1 100644
--- a/arch/arm64/crypto/sha512-armv8.pl
+++ b/arch/arm64/crypto/sha512-armv8.pl
@@ -43,7 +43,7 @@
 #	on Cortex-A53 (or by 4 cycles per round).
 # (***)	Super-impressive coefficients over gcc-generated code are
 #	indication of some compiler "pathology", most notably code
-#	generated with -mgeneral-regs-only is significanty faster
+#	generated with -mgeneral-regs-only is significantly faster
 #	and the gap is only 40-90%.
 #
 # October 2016.
diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c
index e62a094a9d52..94cb7580deb7 100644
--- a/arch/arm64/crypto/sha512-ce-glue.c
+++ b/arch/arm64/crypto/sha512-ce-glue.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+// SPDX-License-Identifier: GPL-2.0
 /*
  * sha512-ce-glue.c - SHA-384/SHA-512 using ARMv8 Crypto Extensions
  *
diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c
index d71faca322f2..ee98954ae8ca 100644
--- a/arch/arm64/crypto/sm3-ce-glue.c
+++ b/arch/arm64/crypto/sm3-ce-glue.c
@@ -26,8 +26,10 @@ asmlinkage void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
 static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
 {
-	if (!crypto_simd_usable())
-		return crypto_sm3_update(desc, data, len);
+	if (!crypto_simd_usable()) {
+		sm3_update(shash_desc_ctx(desc), data, len);
+		return 0;
+	}
 
 	kernel_neon_begin();
 	sm3_base_do_update(desc, data, len, sm3_ce_transform);
@@ -38,8 +40,10 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
 
 static int sm3_ce_final(struct shash_desc *desc, u8 *out)
 {
-	if (!crypto_simd_usable())
-		return crypto_sm3_finup(desc, NULL, 0, out);
+	if (!crypto_simd_usable()) {
+		sm3_final(shash_desc_ctx(desc), out);
+		return 0;
+	}
 
 	kernel_neon_begin();
 	sm3_base_do_finalize(desc, sm3_ce_transform);
@@ -51,14 +55,22 @@ static int sm3_ce_final(struct shash_desc *desc, u8 *out)
 static int sm3_ce_finup(struct shash_desc *desc, const u8 *data,
 			unsigned int len, u8 *out)
 {
-	if (!crypto_simd_usable())
-		return crypto_sm3_finup(desc, data, len, out);
+	if (!crypto_simd_usable()) {
+		struct sm3_state *sctx = shash_desc_ctx(desc);
+
+		if (len)
+			sm3_update(sctx, data, len);
+		sm3_final(sctx, out);
+		return 0;
+	}
 
 	kernel_neon_begin();
-	sm3_base_do_update(desc, data, len, sm3_ce_transform);
+	if (len)
+		sm3_base_do_update(desc, data, len, sm3_ce_transform);
+	sm3_base_do_finalize(desc, sm3_ce_transform);
 	kernel_neon_end();
 
-	return sm3_ce_final(desc, out);
+	return sm3_base_finish(desc, out);
 }
 
 static struct shash_alg sm3_alg = {
diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h
new file mode 100644
index 000000000000..99483b19b99f
--- /dev/null
+++ b/arch/arm64/include/asm/apple_m1_pmu.h
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef __ASM_APPLE_M1_PMU_h
+#define __ASM_APPLE_M1_PMU_h
+
+#include <linux/bits.h>
+#include <asm/sysreg.h>
+
+/* Counters */
+#define SYS_IMP_APL_PMC0_EL1	sys_reg(3, 2, 15, 0, 0)
+#define SYS_IMP_APL_PMC1_EL1	sys_reg(3, 2, 15, 1, 0)
+#define SYS_IMP_APL_PMC2_EL1	sys_reg(3, 2, 15, 2, 0)
+#define SYS_IMP_APL_PMC3_EL1	sys_reg(3, 2, 15, 3, 0)
+#define SYS_IMP_APL_PMC4_EL1	sys_reg(3, 2, 15, 4, 0)
+#define SYS_IMP_APL_PMC5_EL1	sys_reg(3, 2, 15, 5, 0)
+#define SYS_IMP_APL_PMC6_EL1	sys_reg(3, 2, 15, 6, 0)
+#define SYS_IMP_APL_PMC7_EL1	sys_reg(3, 2, 15, 7, 0)
+#define SYS_IMP_APL_PMC8_EL1	sys_reg(3, 2, 15, 9, 0)
+#define SYS_IMP_APL_PMC9_EL1	sys_reg(3, 2, 15, 10, 0)
+
+/* Core PMC control register */
+#define SYS_IMP_APL_PMCR0_EL1	sys_reg(3, 1, 15, 0, 0)
+#define PMCR0_CNT_ENABLE_0_7	GENMASK(7, 0)
+#define PMCR0_IMODE		GENMASK(10, 8)
+#define PMCR0_IMODE_OFF		0
+#define PMCR0_IMODE_PMI		1
+#define PMCR0_IMODE_AIC		2
+#define PMCR0_IMODE_HALT	3
+#define PMCR0_IMODE_FIQ		4
+#define PMCR0_IACT		BIT(11)
+#define PMCR0_PMI_ENABLE_0_7	GENMASK(19, 12)
+#define PMCR0_STOP_CNT_ON_PMI	BIT(20)
+#define PMCR0_CNT_GLOB_L2C_EVT	BIT(21)
+#define PMCR0_DEFER_PMI_TO_ERET	BIT(22)
+#define PMCR0_ALLOW_CNT_EN_EL0	BIT(30)
+#define PMCR0_CNT_ENABLE_8_9	GENMASK(33, 32)
+#define PMCR0_PMI_ENABLE_8_9	GENMASK(45, 44)
+
+#define SYS_IMP_APL_PMCR1_EL1	sys_reg(3, 1, 15, 1, 0)
+#define PMCR1_COUNT_A64_EL0_0_7	GENMASK(15, 8)
+#define PMCR1_COUNT_A64_EL1_0_7	GENMASK(23, 16)
+#define PMCR1_COUNT_A64_EL0_8_9	GENMASK(41, 40)
+#define PMCR1_COUNT_A64_EL1_8_9	GENMASK(49, 48)
+
+#define SYS_IMP_APL_PMCR2_EL1	sys_reg(3, 1, 15, 2, 0)
+#define SYS_IMP_APL_PMCR3_EL1	sys_reg(3, 1, 15, 3, 0)
+#define SYS_IMP_APL_PMCR4_EL1	sys_reg(3, 1, 15, 4, 0)
+
+#define SYS_IMP_APL_PMESR0_EL1	sys_reg(3, 1, 15, 5, 0)
+#define PMESR0_EVT_CNT_2	GENMASK(7, 0)
+#define PMESR0_EVT_CNT_3	GENMASK(15, 8)
+#define PMESR0_EVT_CNT_4	GENMASK(23, 16)
+#define PMESR0_EVT_CNT_5	GENMASK(31, 24)
+
+#define SYS_IMP_APL_PMESR1_EL1	sys_reg(3, 1, 15, 6, 0)
+#define PMESR1_EVT_CNT_6	GENMASK(7, 0)
+#define PMESR1_EVT_CNT_7	GENMASK(15, 8)
+#define PMESR1_EVT_CNT_8	GENMASK(23, 16)
+#define PMESR1_EVT_CNT_9	GENMASK(31, 24)
+
+#define SYS_IMP_APL_PMSR_EL1	sys_reg(3, 1, 15, 13, 0)
+#define PMSR_OVERFLOW		GENMASK(9, 0)
+
+#endif /* __ASM_APPLE_M1_PMU_h */
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 4ad22c3135db..8bd5afc7b692 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -53,17 +53,36 @@ static inline u64 gic_read_iar_common(void)
  * The gicv3 of ThunderX requires a modified version for reading the
  * IAR status to ensure data synchronization (access to icc_iar1_el1
  * is not sync'ed before and after).
+ *
+ * Erratum 38545
+ *
+ * When a IAR register read races with a GIC interrupt RELEASE event,
+ * GIC-CPU interface could wrongly return a valid INTID to the CPU
+ * for an interrupt that is already released(non activated) instead of 0x3ff.
+ *
+ * To workaround this, return a valid interrupt ID only if there is a change
+ * in the active priority list after the IAR read.
+ *
+ * Common function used for both the workarounds since,
+ * 1. On Thunderx 88xx 1.x both erratas are applicable.
+ * 2. Having extra nops doesn't add any side effects for Silicons where
+ *    erratum 23154 is not applicable.
  */
 static inline u64 gic_read_iar_cavium_thunderx(void)
 {
-	u64 irqstat;
+	u64 irqstat, apr;
 
+	apr = read_sysreg_s(SYS_ICC_AP1R0_EL1);
 	nops(8);
 	irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
 	nops(4);
 	mb();
 
-	return irqstat;
+	/* Max priority groups implemented is only 32 */
+	if (likely(apr != read_sysreg_s(SYS_ICC_AP1R0_EL1)))
+		return irqstat;
+
+	return 0x3ff;
 }
 
 static inline void gic_write_ctlr(u32 val)
diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
index 09e43272ccb0..d1bb5e71df25 100644
--- a/arch/arm64/include/asm/archrandom.h
+++ b/arch/arm64/include/asm/archrandom.h
@@ -42,13 +42,47 @@ static inline bool __arm64_rndr(unsigned long *v)
 	return ok;
 }
 
+static inline bool __arm64_rndrrs(unsigned long *v)
+{
+	bool ok;
+
+	/*
+	 * Reads of RNDRRS set PSTATE.NZCV to 0b0000 on success,
+	 * and set PSTATE.NZCV to 0b0100 otherwise.
+	 */
+	asm volatile(
+		__mrs_s("%0", SYS_RNDRRS_EL0) "\n"
+	"	cset %w1, ne\n"
+	: "=r" (*v), "=r" (ok)
+	:
+	: "cc");
+
+	return ok;
+}
+
 static inline bool __must_check arch_get_random_long(unsigned long *v)
 {
+	/*
+	 * Only support the generic interface after we have detected
+	 * the system wide capability, avoiding complexity with the
+	 * cpufeature code and with potential scheduling between CPUs
+	 * with and without the feature.
+	 */
+	if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v))
+		return true;
 	return false;
 }
 
 static inline bool __must_check arch_get_random_int(unsigned int *v)
 {
+	if (cpus_have_const_cap(ARM64_HAS_RNG)) {
+		unsigned long val;
+
+		if (__arm64_rndr(&val)) {
+			*v = val;
+			return true;
+		}
+	}
 	return false;
 }
 
@@ -71,12 +105,11 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
 	}
 
 	/*
-	 * Only support the generic interface after we have detected
-	 * the system wide capability, avoiding complexity with the
-	 * cpufeature code and with potential scheduling between CPUs
-	 * with and without the feature.
+	 * RNDRRS is not backed by an entropy source but by a DRBG that is
+	 * reseeded after each invocation. This is not a 100% fit but good
+	 * enough to implement this API if no other entropy source exists.
 	 */
-	if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v))
+	if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndrrs(v))
 		return true;
 
 	return false;
@@ -96,7 +129,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
 	}
 
 	if (cpus_have_const_cap(ARM64_HAS_RNG)) {
-		if (__arm64_rndr(&val)) {
+		if (__arm64_rndrrs(&val)) {
 			*v = val;
 			return true;
 		}
diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h
index f1bba5fc61c4..ead62f7dd269 100644
--- a/arch/arm64/include/asm/asm_pointer_auth.h
+++ b/arch/arm64/include/asm/asm_pointer_auth.h
@@ -60,6 +60,9 @@ alternative_else_nop_endif
 	.macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3
 	mrs	\tmp1, id_aa64isar1_el1
 	ubfx	\tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8
+	mrs_s	\tmp2, SYS_ID_AA64ISAR2_EL1
+	ubfx	\tmp2, \tmp2, #ID_AA64ISAR2_APA3_SHIFT, #4
+	orr	\tmp1, \tmp1, \tmp2
 	cbz	\tmp1, .Lno_addr_auth\@
 	mov_q	\tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
 			SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index e8bd0af0141c..8c5a61aeaf8e 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -109,6 +109,13 @@
 	.endm
 
 /*
+ * Clear Branch History instruction
+ */
+	.macro clearbhb
+	hint	#22
+	.endm
+
+/*
  * Speculation barrier
  */
 	.macro	sb
@@ -535,11 +542,6 @@ alternative_endif
 #define EXPORT_SYMBOL_NOKASAN(name)	EXPORT_SYMBOL(name)
 #endif
 
-#ifdef CONFIG_KASAN_HW_TAGS
-#define EXPORT_SYMBOL_NOHWKASAN(name)
-#else
-#define EXPORT_SYMBOL_NOHWKASAN(name)	EXPORT_SYMBOL_NOKASAN(name)
-#endif
 	/*
 	 * Emit a 64-bit absolute little endian symbol reference in a way that
 	 * ensures that it will be resolved at build time, even when building a
@@ -850,4 +852,50 @@ alternative_endif
 
 #endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */
 
+	.macro __mitigate_spectre_bhb_loop      tmp
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+alternative_cb  spectre_bhb_patch_loop_iter
+	mov	\tmp, #32		// Patched to correct the immediate
+alternative_cb_end
+.Lspectre_bhb_loop\@:
+	b	. + 4
+	subs	\tmp, \tmp, #1
+	b.ne	.Lspectre_bhb_loop\@
+	sb
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+	.endm
+
+	.macro mitigate_spectre_bhb_loop	tmp
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+alternative_cb	spectre_bhb_patch_loop_mitigation_enable
+	b	.L_spectre_bhb_loop_done\@	// Patched to NOP
+alternative_cb_end
+	__mitigate_spectre_bhb_loop	\tmp
+.L_spectre_bhb_loop_done\@:
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+	.endm
+
+	/* Save/restores x0-x3 to the stack */
+	.macro __mitigate_spectre_bhb_fw
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+	stp	x0, x1, [sp, #-16]!
+	stp	x2, x3, [sp, #-16]!
+	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_3
+alternative_cb	smccc_patch_fw_mitigation_conduit
+	nop					// Patched to SMC/HVC #0
+alternative_cb_end
+	ldp	x2, x3, [sp], #16
+	ldp	x0, x1, [sp], #16
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+	.endm
+
+	.macro mitigate_spectre_bhb_clear_insn
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+alternative_cb	spectre_bhb_patch_clearbhb
+	/* Patched to NOP when not supported */
+	clearbhb
+	isb
+alternative_cb_end
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+	.endm
 #endif	/* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index ef6be92b1921..c62e7e5e2f0c 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -356,6 +356,7 @@ struct arm64_cpu_capabilities {
 		struct {	/* Feature register checking */
 			u32 sys_reg;
 			u8 field_pos;
+			u8 field_width;
 			u8 min_field_value;
 			u8 hwcap_type;
 			bool sign;
@@ -576,6 +577,8 @@ static inline u64 arm64_ftr_reg_user_value(const struct arm64_ftr_reg *reg)
 static inline int __attribute_const__
 cpuid_feature_extract_field_width(u64 features, int field, int width, bool sign)
 {
+	if (WARN_ON_ONCE(!width))
+		width = 4;
 	return (sign) ?
 		cpuid_feature_extract_signed_field_width(features, field, width) :
 		cpuid_feature_extract_unsigned_field_width(features, field, width);
@@ -637,6 +640,35 @@ static inline bool cpu_supports_mixed_endian_el0(void)
 	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
 }
 
+
+static inline bool supports_csv2p3(int scope)
+{
+	u64 pfr0;
+	u8 csv2_val;
+
+	if (scope == SCOPE_LOCAL_CPU)
+		pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1);
+	else
+		pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+	csv2_val = cpuid_feature_extract_unsigned_field(pfr0,
+							ID_AA64PFR0_CSV2_SHIFT);
+	return csv2_val == 3;
+}
+
+static inline bool supports_clearbhb(int scope)
+{
+	u64 isar2;
+
+	if (scope == SCOPE_LOCAL_CPU)
+		isar2 = read_sysreg_s(SYS_ID_AA64ISAR2_EL1);
+	else
+		isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
+
+	return cpuid_feature_extract_unsigned_field(isar2,
+						    ID_AA64ISAR2_CLEARBHB_SHIFT);
+}
+
 const struct cpumask *system_32bit_el0_cpumask(void);
 DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
 
@@ -854,6 +886,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
 extern struct arm64_ftr_override id_aa64mmfr1_override;
 extern struct arm64_ftr_override id_aa64pfr1_override;
 extern struct arm64_ftr_override id_aa64isar1_override;
+extern struct arm64_ftr_override id_aa64isar2_override;
 
 u32 get_kvm_ipa_limit(void);
 void dump_cpu_features(void);
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 999b9149f856..232b439cbaf3 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -73,10 +73,14 @@
 #define ARM_CPU_PART_CORTEX_A76		0xD0B
 #define ARM_CPU_PART_NEOVERSE_N1	0xD0C
 #define ARM_CPU_PART_CORTEX_A77		0xD0D
+#define ARM_CPU_PART_NEOVERSE_V1	0xD40
+#define ARM_CPU_PART_CORTEX_A78		0xD41
+#define ARM_CPU_PART_CORTEX_X1		0xD44
 #define ARM_CPU_PART_CORTEX_A510	0xD46
 #define ARM_CPU_PART_CORTEX_A710	0xD47
 #define ARM_CPU_PART_CORTEX_X2		0xD48
 #define ARM_CPU_PART_NEOVERSE_N2	0xD49
+#define ARM_CPU_PART_CORTEX_A78C	0xD4B
 
 #define APM_CPU_PART_POTENZA		0x000
 
@@ -84,6 +88,13 @@
 #define CAVIUM_CPU_PART_THUNDERX_81XX	0x0A2
 #define CAVIUM_CPU_PART_THUNDERX_83XX	0x0A3
 #define CAVIUM_CPU_PART_THUNDERX2	0x0AF
+/* OcteonTx2 series */
+#define CAVIUM_CPU_PART_OCTX2_98XX	0x0B1
+#define CAVIUM_CPU_PART_OCTX2_96XX	0x0B2
+#define CAVIUM_CPU_PART_OCTX2_95XX	0x0B3
+#define CAVIUM_CPU_PART_OCTX2_95XXN	0x0B4
+#define CAVIUM_CPU_PART_OCTX2_95XXMM	0x0B5
+#define CAVIUM_CPU_PART_OCTX2_95XXO	0x0B6
 
 #define BRCM_CPU_PART_BRAHMA_B53	0x100
 #define BRCM_CPU_PART_VULCAN		0x516
@@ -117,13 +128,23 @@
 #define MIDR_CORTEX_A76	MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
 #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
 #define MIDR_CORTEX_A77	MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
+#define MIDR_NEOVERSE_V1	MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
+#define MIDR_CORTEX_A78	MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
+#define MIDR_CORTEX_X1	MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
 #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
 #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
 #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
 #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
+#define MIDR_CORTEX_A78C	MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C)
 #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
+#define MIDR_OCTX2_98XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_98XX)
+#define MIDR_OCTX2_96XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_96XX)
+#define MIDR_OCTX2_95XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XX)
+#define MIDR_OCTX2_95XXN MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXN)
+#define MIDR_OCTX2_95XXMM MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXMM)
+#define MIDR_OCTX2_95XXO MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXO)
 #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2)
 #define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53)
 #define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN)
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 657c921fd784..00c291067e57 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -34,18 +34,6 @@
  */
 #define BREAK_INSTR_SIZE		AARCH64_INSN_SIZE
 
-/*
- * BRK instruction encoding
- * The #imm16 value should be placed at bits[20:5] within BRK ins
- */
-#define AARCH64_BREAK_MON	0xd4200000
-
-/*
- * BRK instruction for provoking a fault on purpose
- * Unlike kgdb, #imm16 value with unallocated handler is used for faulting.
- */
-#define AARCH64_BREAK_FAULT	(AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5))
-
 #define AARCH64_BREAK_KGDB_DYN_DBG	\
 	(AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5))
 
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 4335800201c9..daff882883f9 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -62,9 +62,11 @@ enum fixed_addresses {
 #endif /* CONFIG_ACPI_APEI_GHES */
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+	FIX_ENTRY_TRAMP_TEXT3,
+	FIX_ENTRY_TRAMP_TEXT2,
+	FIX_ENTRY_TRAMP_TEXT1,
 	FIX_ENTRY_TRAMP_DATA,
-	FIX_ENTRY_TRAMP_TEXT,
-#define TRAMP_VALIAS		(__fix_to_virt(FIX_ENTRY_TRAMP_TEXT))
+#define TRAMP_VALIAS		(__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1))
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
 	__end_of_permanent_fixed_addresses,
 
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index f68fbb207473..8db5ec0089db 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -108,6 +108,7 @@
 #define KERNEL_HWCAP_ECV		__khwcap2_feature(ECV)
 #define KERNEL_HWCAP_AFP		__khwcap2_feature(AFP)
 #define KERNEL_HWCAP_RPRES		__khwcap2_feature(RPRES)
+#define KERNEL_HWCAP_MTE3		__khwcap2_feature(MTE3)
 
 /*
  * This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/insn-def.h b/arch/arm64/include/asm/insn-def.h
index 2c075f615c6a..1a7d0d483698 100644
--- a/arch/arm64/include/asm/insn-def.h
+++ b/arch/arm64/include/asm/insn-def.h
@@ -3,7 +3,21 @@
 #ifndef __ASM_INSN_DEF_H
 #define __ASM_INSN_DEF_H
 
+#include <asm/brk-imm.h>
+
 /* A64 instructions are always 32 bits. */
 #define	AARCH64_INSN_SIZE		4
 
+/*
+ * BRK instruction encoding
+ * The #imm16 value should be placed at bits[20:5] within BRK ins
+ */
+#define AARCH64_BREAK_MON	0xd4200000
+
+/*
+ * BRK instruction for provoking a fault on purpose
+ * Unlike kgdb, #imm16 value with unallocated handler is used for faulting.
+ */
+#define AARCH64_BREAK_FAULT	(AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5))
+
 #endif /* __ASM_INSN_DEF_H */
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 6b776c8667b2..1e5760d567ae 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -65,6 +65,7 @@ enum aarch64_insn_hint_cr_op {
 	AARCH64_INSN_HINT_PSB  = 0x11 << 5,
 	AARCH64_INSN_HINT_TSB  = 0x12 << 5,
 	AARCH64_INSN_HINT_CSDB = 0x14 << 5,
+	AARCH64_INSN_HINT_CLEARBHB = 0x16 << 5,
 
 	AARCH64_INSN_HINT_BTI   = 0x20 << 5,
 	AARCH64_INSN_HINT_BTIC  = 0x22 << 5,
@@ -205,7 +206,9 @@ enum aarch64_insn_ldst_type {
 	AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
 	AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
 	AARCH64_INSN_LDST_LOAD_EX,
+	AARCH64_INSN_LDST_LOAD_ACQ_EX,
 	AARCH64_INSN_LDST_STORE_EX,
+	AARCH64_INSN_LDST_STORE_REL_EX,
 };
 
 enum aarch64_insn_adsb_type {
@@ -280,6 +283,36 @@ enum aarch64_insn_adr_type {
 	AARCH64_INSN_ADR_TYPE_ADR,
 };
 
+enum aarch64_insn_mem_atomic_op {
+	AARCH64_INSN_MEM_ATOMIC_ADD,
+	AARCH64_INSN_MEM_ATOMIC_CLR,
+	AARCH64_INSN_MEM_ATOMIC_EOR,
+	AARCH64_INSN_MEM_ATOMIC_SET,
+	AARCH64_INSN_MEM_ATOMIC_SWP,
+};
+
+enum aarch64_insn_mem_order_type {
+	AARCH64_INSN_MEM_ORDER_NONE,
+	AARCH64_INSN_MEM_ORDER_ACQ,
+	AARCH64_INSN_MEM_ORDER_REL,
+	AARCH64_INSN_MEM_ORDER_ACQREL,
+};
+
+enum aarch64_insn_mb_type {
+	AARCH64_INSN_MB_SY,
+	AARCH64_INSN_MB_ST,
+	AARCH64_INSN_MB_LD,
+	AARCH64_INSN_MB_ISH,
+	AARCH64_INSN_MB_ISHST,
+	AARCH64_INSN_MB_ISHLD,
+	AARCH64_INSN_MB_NSH,
+	AARCH64_INSN_MB_NSHST,
+	AARCH64_INSN_MB_NSHLD,
+	AARCH64_INSN_MB_OSH,
+	AARCH64_INSN_MB_OSHST,
+	AARCH64_INSN_MB_OSHLD,
+};
+
 #define	__AARCH64_INSN_FUNCS(abbr, mask, val)				\
 static __always_inline bool aarch64_insn_is_##abbr(u32 code)		\
 {									\
@@ -303,6 +336,11 @@ __AARCH64_INSN_FUNCS(store_post,	0x3FE00C00, 0x38000400)
 __AARCH64_INSN_FUNCS(load_post,	0x3FE00C00, 0x38400400)
 __AARCH64_INSN_FUNCS(str_reg,	0x3FE0EC00, 0x38206800)
 __AARCH64_INSN_FUNCS(ldadd,	0x3F20FC00, 0x38200000)
+__AARCH64_INSN_FUNCS(ldclr,	0x3F20FC00, 0x38201000)
+__AARCH64_INSN_FUNCS(ldeor,	0x3F20FC00, 0x38202000)
+__AARCH64_INSN_FUNCS(ldset,	0x3F20FC00, 0x38203000)
+__AARCH64_INSN_FUNCS(swp,	0x3F20FC00, 0x38208000)
+__AARCH64_INSN_FUNCS(cas,	0x3FA07C00, 0x08A07C00)
 __AARCH64_INSN_FUNCS(ldr_reg,	0x3FE0EC00, 0x38606800)
 __AARCH64_INSN_FUNCS(ldr_lit,	0xBF000000, 0x18000000)
 __AARCH64_INSN_FUNCS(ldrsw_lit,	0xFF000000, 0x98000000)
@@ -474,13 +512,6 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
 				   enum aarch64_insn_register state,
 				   enum aarch64_insn_size_type size,
 				   enum aarch64_insn_ldst_type type);
-u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
-			   enum aarch64_insn_register address,
-			   enum aarch64_insn_register value,
-			   enum aarch64_insn_size_type size);
-u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
-			   enum aarch64_insn_register value,
-			   enum aarch64_insn_size_type size);
 u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
 				 enum aarch64_insn_register src,
 				 int imm, enum aarch64_insn_variant variant,
@@ -541,6 +572,42 @@ u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
 			      enum aarch64_insn_prfm_type type,
 			      enum aarch64_insn_prfm_target target,
 			      enum aarch64_insn_prfm_policy policy);
+#ifdef CONFIG_ARM64_LSE_ATOMICS
+u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
+				  enum aarch64_insn_register address,
+				  enum aarch64_insn_register value,
+				  enum aarch64_insn_size_type size,
+				  enum aarch64_insn_mem_atomic_op op,
+				  enum aarch64_insn_mem_order_type order);
+u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
+			 enum aarch64_insn_register address,
+			 enum aarch64_insn_register value,
+			 enum aarch64_insn_size_type size,
+			 enum aarch64_insn_mem_order_type order);
+#else
+static inline
+u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
+				  enum aarch64_insn_register address,
+				  enum aarch64_insn_register value,
+				  enum aarch64_insn_size_type size,
+				  enum aarch64_insn_mem_atomic_op op,
+				  enum aarch64_insn_mem_order_type order)
+{
+	return AARCH64_BREAK_FAULT;
+}
+
+static inline
+u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
+			 enum aarch64_insn_register address,
+			 enum aarch64_insn_register value,
+			 enum aarch64_insn_size_type size,
+			 enum aarch64_insn_mem_order_type order)
+{
+	return AARCH64_BREAK_FAULT;
+}
+#endif
+u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
+
 s32 aarch64_get_branch_offset(u32 insn);
 u32 aarch64_set_branch_offset(u32 insn, s32 offset);
 
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 01d47c5886dc..1767ded83888 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -355,8 +355,8 @@
 	ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
 	ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
 
-#define CPACR_EL1_FPEN		(3 << 20)
 #define CPACR_EL1_TTA		(1 << 28)
-#define CPACR_EL1_DEFAULT	(CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN)
+#define CPACR_EL1_DEFAULT	(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\
+				 CPACR_EL1_ZEN_EL1EN)
 
 #endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5bc01e62c08a..031e3a2537fc 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -714,6 +714,11 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
 	ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr();
 }
 
+static inline bool kvm_system_needs_idmapped_vectors(void)
+{
+	return cpus_have_const_cap(ARM64_SPECTRE_V3A);
+}
+
 void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
 
 static inline void kvm_arch_hardware_unsetup(void) {}
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 462882f356c7..aa7fa2a08f06 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -118,6 +118,7 @@ extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index b77e9b3f5371..43f8c25b3fda 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -39,28 +39,4 @@
 	SYM_START(name, SYM_L_WEAK, SYM_A_NONE)		\
 	bti c ;
 
-/*
- * Annotate a function as position independent, i.e., safe to be called before
- * the kernel virtual mapping is activated.
- */
-#define SYM_FUNC_START_PI(x)			\
-		SYM_FUNC_START_ALIAS(__pi_##x);	\
-		SYM_FUNC_START(x)
-
-#define SYM_FUNC_START_WEAK_PI(x)		\
-		SYM_FUNC_START_ALIAS(__pi_##x);	\
-		SYM_FUNC_START_WEAK(x)
-
-#define SYM_FUNC_START_WEAK_ALIAS_PI(x)		\
-		SYM_FUNC_START_ALIAS(__pi_##x);	\
-		SYM_START(x, SYM_L_WEAK, SYM_A_ALIGN)
-
-#define SYM_FUNC_END_PI(x)			\
-		SYM_FUNC_END(x);		\
-		SYM_FUNC_END_ALIAS(__pi_##x)
-
-#define SYM_FUNC_END_ALIAS_PI(x)		\
-		SYM_FUNC_END_ALIAS(x);		\
-		SYM_FUNC_END_ALIAS(__pi_##x)
-
 #endif
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index 5d10051c3e62..29c85810ae69 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -17,12 +17,10 @@
 #include <asm/cpucaps.h>
 
 extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
-extern struct static_key_false arm64_const_caps_ready;
 
-static inline bool system_uses_lse_atomics(void)
+static __always_inline bool system_uses_lse_atomics(void)
 {
-	return (static_branch_likely(&arm64_const_caps_ready)) &&
-		static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]);
+	return static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]);
 }
 
 #define __lse_ll_sc_body(op, ...)					\
diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h
index a11ccadd47d2..094701ec5500 100644
--- a/arch/arm64/include/asm/module.lds.h
+++ b/arch/arm64/include/asm/module.lds.h
@@ -1,8 +1,8 @@
 SECTIONS {
 #ifdef CONFIG_ARM64_MODULE_PLTS
-	.plt 0 (NOLOAD) : { BYTE(0) }
-	.init.plt 0 (NOLOAD) : { BYTE(0) }
-	.text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) }
+	.plt 0 : { BYTE(0) }
+	.init.plt 0 : { BYTE(0) }
+	.text.ftrace_trampoline 0 : { BYTE(0) }
 #endif
 
 #ifdef CONFIG_KASAN_SW_TAGS
diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h
index 626d359b396e..14ee86b019c2 100644
--- a/arch/arm64/include/asm/mte-def.h
+++ b/arch/arm64/include/asm/mte-def.h
@@ -11,6 +11,7 @@
 #define MTE_TAG_SHIFT		56
 #define MTE_TAG_SIZE		4
 #define MTE_TAG_MASK		GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
+#define MTE_PAGE_TAG_STORAGE	(MTE_GRANULES_PER_PAGE * MTE_TAG_SIZE / 8)
 
 #define __MTE_PREAMBLE		ARM64_ASM_PREAMBLE ".arch_extension memtag\n"
 
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index e4704a403237..a857bcacf0fe 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -5,6 +5,7 @@
 #ifndef __ASM_MTE_KASAN_H
 #define __ASM_MTE_KASAN_H
 
+#include <asm/compiler.h>
 #include <asm/mte-def.h>
 
 #ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 075539f5f1c8..adcb937342f1 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -11,7 +11,9 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/bitfield.h>
+#include <linux/kasan-enabled.h>
 #include <linux/page-flags.h>
+#include <linux/sched.h>
 #include <linux/types.h>
 
 #include <asm/pgtable-types.h>
@@ -86,6 +88,26 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child,
 
 #endif /* CONFIG_ARM64_MTE */
 
+static inline void mte_disable_tco_entry(struct task_struct *task)
+{
+	if (!system_supports_mte())
+		return;
+
+	/*
+	 * Re-enable tag checking (TCO set on exception entry). This is only
+	 * necessary if MTE is enabled in either the kernel or the userspace
+	 * task in synchronous or asymmetric mode (SCTLR_EL1.TCF0 bit 0 is set
+	 * for both). With MTE disabled in the kernel and disabled or
+	 * asynchronous in userspace, tag check faults (including in uaccesses)
+	 * are not reported, therefore there is no need to re-enable checking.
+	 * This is beneficial on microarchitectures where re-enabling TCO is
+	 * expensive.
+	 */
+	if (kasan_hw_tags_enabled() ||
+	    (task->thread.sctlr_user & (1UL << SCTLR_EL1_TCF0_SHIFT)))
+		asm volatile(SET_PSTATE_TCO(0));
+}
+
 #ifdef CONFIG_KASAN_HW_TAGS
 /* Whether the MTE asynchronous mode is enabled. */
 DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 4ef6f19331f9..3eaf462f5752 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -15,70 +15,70 @@
 /*
  * Common architectural and microarchitectural event numbers.
  */
-#define ARMV8_PMUV3_PERFCTR_SW_INCR				0x00
-#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL			0x01
-#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL			0x02
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL			0x03
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE				0x04
-#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL			0x05
-#define ARMV8_PMUV3_PERFCTR_LD_RETIRED				0x06
-#define ARMV8_PMUV3_PERFCTR_ST_RETIRED				0x07
-#define ARMV8_PMUV3_PERFCTR_INST_RETIRED			0x08
-#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN				0x09
-#define ARMV8_PMUV3_PERFCTR_EXC_RETURN				0x0A
-#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED			0x0B
-#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED			0x0C
-#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED			0x0D
-#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED			0x0E
-#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED		0x0F
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED				0x10
-#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES				0x11
-#define ARMV8_PMUV3_PERFCTR_BR_PRED				0x12
-#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS				0x13
-#define ARMV8_PMUV3_PERFCTR_L1I_CACHE				0x14
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB			0x15
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE				0x16
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL			0x17
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB			0x18
-#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS				0x19
-#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR			0x1A
-#define ARMV8_PMUV3_PERFCTR_INST_SPEC				0x1B
-#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED			0x1C
-#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES				0x1D
-#define ARMV8_PMUV3_PERFCTR_CHAIN				0x1E
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE			0x1F
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE			0x20
-#define ARMV8_PMUV3_PERFCTR_BR_RETIRED				0x21
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED			0x22
-#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND			0x23
-#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND			0x24
-#define ARMV8_PMUV3_PERFCTR_L1D_TLB				0x25
-#define ARMV8_PMUV3_PERFCTR_L1I_TLB				0x26
-#define ARMV8_PMUV3_PERFCTR_L2I_CACHE				0x27
-#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL			0x28
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE			0x29
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL			0x2A
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE				0x2B
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB			0x2C
-#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL			0x2D
-#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL			0x2E
-#define ARMV8_PMUV3_PERFCTR_L2D_TLB				0x2F
-#define ARMV8_PMUV3_PERFCTR_L2I_TLB				0x30
-#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS			0x31
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE				0x32
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS			0x33
-#define ARMV8_PMUV3_PERFCTR_DTLB_WALK				0x34
-#define ARMV8_PMUV3_PERFCTR_ITLB_WALK				0x35
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD				0x36
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD			0x37
-#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD			0x38
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD			0x39
-#define ARMV8_PMUV3_PERFCTR_OP_RETIRED				0x3A
-#define ARMV8_PMUV3_PERFCTR_OP_SPEC				0x3B
-#define ARMV8_PMUV3_PERFCTR_STALL				0x3C
-#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND			0x3D
-#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND			0x3E
-#define ARMV8_PMUV3_PERFCTR_STALL_SLOT				0x3F
+#define ARMV8_PMUV3_PERFCTR_SW_INCR				0x0000
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL			0x0001
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL			0x0002
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL			0x0003
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE				0x0004
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL			0x0005
+#define ARMV8_PMUV3_PERFCTR_LD_RETIRED				0x0006
+#define ARMV8_PMUV3_PERFCTR_ST_RETIRED				0x0007
+#define ARMV8_PMUV3_PERFCTR_INST_RETIRED			0x0008
+#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN				0x0009
+#define ARMV8_PMUV3_PERFCTR_EXC_RETURN				0x000A
+#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED			0x000B
+#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED			0x000C
+#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED			0x000D
+#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED			0x000E
+#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED		0x000F
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED				0x0010
+#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES				0x0011
+#define ARMV8_PMUV3_PERFCTR_BR_PRED				0x0012
+#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS				0x0013
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE				0x0014
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB			0x0015
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE				0x0016
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL			0x0017
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB			0x0018
+#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS				0x0019
+#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR			0x001A
+#define ARMV8_PMUV3_PERFCTR_INST_SPEC				0x001B
+#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED			0x001C
+#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES				0x001D
+#define ARMV8_PMUV3_PERFCTR_CHAIN				0x001E
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE			0x001F
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE			0x0020
+#define ARMV8_PMUV3_PERFCTR_BR_RETIRED				0x0021
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED			0x0022
+#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND			0x0023
+#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND			0x0024
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB				0x0025
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB				0x0026
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE				0x0027
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL			0x0028
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE			0x0029
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL			0x002A
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE				0x002B
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB			0x002C
+#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL			0x002D
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL			0x002E
+#define ARMV8_PMUV3_PERFCTR_L2D_TLB				0x002F
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB				0x0030
+#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS			0x0031
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE				0x0032
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS			0x0033
+#define ARMV8_PMUV3_PERFCTR_DTLB_WALK				0x0034
+#define ARMV8_PMUV3_PERFCTR_ITLB_WALK				0x0035
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD				0x0036
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD			0x0037
+#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD			0x0038
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD			0x0039
+#define ARMV8_PMUV3_PERFCTR_OP_RETIRED				0x003A
+#define ARMV8_PMUV3_PERFCTR_OP_SPEC				0x003B
+#define ARMV8_PMUV3_PERFCTR_STALL				0x003C
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND			0x003D
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND			0x003E
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT				0x003F
 
 /* Statistical profiling extension microarchitectural events */
 #define	ARMV8_SPE_PERFCTR_SAMPLE_POP				0x4000
@@ -96,6 +96,20 @@
 #define	ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS			0x400A
 #define	ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD			0x400B
 
+/* Trace buffer events */
+#define ARMV8_PMUV3_PERFCTR_TRB_WRAP				0x400C
+#define ARMV8_PMUV3_PERFCTR_TRB_TRIG				0x400E
+
+/* Trace unit events */
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT0				0x4010
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT1				0x4011
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT2				0x4012
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT3				0x4013
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4			0x4018
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5			0x4019
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6			0x401A
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7			0x401B
+
 /* additional latency from alignment events */
 #define	ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT			0x4020
 #define	ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT			0x4021
@@ -107,91 +121,91 @@
 #define	ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR			0x4026
 
 /* ARMv8 recommended implementation defined event types */
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD			0x40
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR			0x41
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD		0x42
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR		0x43
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER		0x44
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER		0x45
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM		0x46
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN			0x47
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL			0x48
-
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD			0x4C
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR			0x4D
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD				0x4E
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR				0x4F
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD			0x50
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR			0x51
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD		0x52
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR		0x53
-
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM		0x56
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN			0x57
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL			0x58
-
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD			0x5C
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR			0x5D
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD				0x5E
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR				0x5F
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD			0x60
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR			0x61
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED			0x62
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED		0x63
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL			0x64
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH			0x65
-#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD			0x66
-#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR			0x67
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC			0x68
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC			0x69
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC		0x6A
-
-#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC				0x6C
-#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC			0x6D
-#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC			0x6E
-#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC				0x6F
-#define ARMV8_IMPDEF_PERFCTR_LD_SPEC				0x70
-#define ARMV8_IMPDEF_PERFCTR_ST_SPEC				0x71
-#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC				0x72
-#define ARMV8_IMPDEF_PERFCTR_DP_SPEC				0x73
-#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC				0x74
-#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC				0x75
-#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC			0x76
-#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC			0x77
-#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC			0x78
-#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC			0x79
-#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC			0x7A
-
-#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC				0x7C
-#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC				0x7D
-#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC				0x7E
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF				0x81
-#define ARMV8_IMPDEF_PERFCTR_EXC_SVC				0x82
-#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT				0x83
-#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT				0x84
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ				0x86
-#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ				0x87
-#define ARMV8_IMPDEF_PERFCTR_EXC_SMC				0x88
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_HVC				0x8A
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT			0x8B
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT			0x8C
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER			0x8D
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ			0x8E
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ			0x8F
-#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC				0x90
-#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC				0x91
-
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD			0xA0
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR			0xA1
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD		0xA2
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR		0xA3
-
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM		0xA6
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN			0xA7
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL			0xA8
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD			0x0040
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR			0x0041
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD		0x0042
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR		0x0043
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER		0x0044
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER		0x0045
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM		0x0046
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN			0x0047
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL			0x0048
+
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD			0x004C
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR			0x004D
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD				0x004E
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR				0x004F
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD			0x0050
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR			0x0051
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD		0x0052
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR		0x0053
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM		0x0056
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN			0x0057
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL			0x0058
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD			0x005C
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR			0x005D
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD				0x005E
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR				0x005F
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD			0x0060
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR			0x0061
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED			0x0062
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED		0x0063
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL			0x0064
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH			0x0065
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD			0x0066
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR			0x0067
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC			0x0068
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC			0x0069
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC		0x006A
+
+#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC				0x006C
+#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC			0x006D
+#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC			0x006E
+#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC				0x006F
+#define ARMV8_IMPDEF_PERFCTR_LD_SPEC				0x0070
+#define ARMV8_IMPDEF_PERFCTR_ST_SPEC				0x0071
+#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC				0x0072
+#define ARMV8_IMPDEF_PERFCTR_DP_SPEC				0x0073
+#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC				0x0074
+#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC				0x0075
+#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC			0x0076
+#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC			0x0077
+#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC			0x0078
+#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC			0x0079
+#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC			0x007A
+
+#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC				0x007C
+#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC				0x007D
+#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC				0x007E
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF				0x0081
+#define ARMV8_IMPDEF_PERFCTR_EXC_SVC				0x0082
+#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT				0x0083
+#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT				0x0084
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ				0x0086
+#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ				0x0087
+#define ARMV8_IMPDEF_PERFCTR_EXC_SMC				0x0088
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_HVC				0x008A
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT			0x008B
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT			0x008C
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER			0x008D
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ			0x008E
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ			0x008F
+#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC				0x0090
+#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC				0x0091
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD			0x00A0
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR			0x00A1
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD		0x00A2
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR		0x00A3
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM		0x00A6
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN			0x00A7
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL			0x00A8
 
 /*
  * Per-CPU PMCR: config reg
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 40085e53f573..66671ff05183 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -273,6 +273,8 @@
 #define TCR_NFD1		(UL(1) << 54)
 #define TCR_E0PD0		(UL(1) << 55)
 #define TCR_E0PD1		(UL(1) << 56)
+#define TCR_TCMA0		(UL(1) << 57)
+#define TCR_TCMA1		(UL(1) << 58)
 
 /*
  * TTBR.
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 7032f04c8ac6..b1e1b74d993c 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -92,7 +92,7 @@ extern bool arm64_use_ng_mappings;
 #define __P001  PAGE_READONLY
 #define __P010  PAGE_READONLY
 #define __P011  PAGE_READONLY
-#define __P100  PAGE_EXECONLY
+#define __P100  PAGE_READONLY_EXEC	/* PAGE_EXECONLY if Enhanced PAN */
 #define __P101  PAGE_READONLY_EXEC
 #define __P110  PAGE_READONLY_EXEC
 #define __P111  PAGE_READONLY_EXEC
@@ -101,7 +101,7 @@ extern bool arm64_use_ng_mappings;
 #define __S001  PAGE_READONLY
 #define __S010  PAGE_SHARED
 #define __S011  PAGE_SHARED
-#define __S100  PAGE_EXECONLY
+#define __S100  PAGE_READONLY_EXEC	/* PAGE_EXECONLY if Enhanced PAN */
 #define __S101  PAGE_READONLY_EXEC
 #define __S110  PAGE_SHARED_EXEC
 #define __S111  PAGE_SHARED_EXEC
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index c4ba047a82d2..94e147e5456c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1017,17 +1017,6 @@ static inline bool arch_wants_old_prefaulted_pte(void)
 }
 #define arch_wants_old_prefaulted_pte	arch_wants_old_prefaulted_pte
 
-static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
-{
-	if (cpus_have_const_cap(ARM64_HAS_EPAN))
-		return prot;
-
-	if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY))
-		return prot;
-
-	return PAGE_READONLY_EXEC;
-}
-
 static inline bool pud_sect_supported(void)
 {
 	return PAGE_SIZE == SZ_4K;
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 6f41b65f9962..73e38d9a540c 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -21,6 +21,7 @@
 
 #define MTE_CTRL_TCF_SYNC		(1UL << 16)
 #define MTE_CTRL_TCF_ASYNC		(1UL << 17)
+#define MTE_CTRL_TCF_ASYMM		(1UL << 18)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h
index 1bce62fa908a..56f7b1d4d54b 100644
--- a/arch/arm64/include/asm/rwonce.h
+++ b/arch/arm64/include/asm/rwonce.h
@@ -5,7 +5,7 @@
 #ifndef __ASM_RWONCE_H
 #define __ASM_RWONCE_H
 
-#ifdef CONFIG_LTO
+#if defined(CONFIG_LTO) && !defined(__ASSEMBLY__)
 
 #include <linux/compiler_types.h>
 #include <asm/alternative-macros.h>
@@ -66,7 +66,7 @@
 })
 
 #endif	/* !BUILD_VDSO */
-#endif	/* CONFIG_LTO */
+#endif	/* CONFIG_LTO && !__ASSEMBLY__ */
 
 #include <asm-generic/rwonce.h>
 
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 152cb35bf9df..40971ac1303f 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -23,4 +23,9 @@ extern char __mmuoff_data_start[], __mmuoff_data_end[];
 extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
 extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[];
 
+static inline size_t entry_tramp_text_size(void)
+{
+	return __entry_tramp_text_end - __entry_tramp_text_start;
+}
+
 #endif /* __ASM_SECTIONS_H */
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index f62ca39da6c5..aa3d3607d5c8 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -67,7 +67,8 @@ struct bp_hardening_data {
 
 DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
 
-static inline void arm64_apply_bp_hardening(void)
+/* Called during entry so must be __always_inline */
+static __always_inline void arm64_apply_bp_hardening(void)
 {
 	struct bp_hardening_data *d;
 
@@ -93,5 +94,9 @@ void spectre_v4_enable_task_mitigation(struct task_struct *tsk);
 
 enum mitigation_state arm64_get_meltdown_state(void);
 
+enum mitigation_state arm64_get_spectre_bhb_state(void);
+bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope);
+u8 spectre_bhb_loop_affected(int scope);
+void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
 #endif	/* __ASSEMBLY__ */
 #endif	/* __ASM_SPECTRE_H */
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index 95f7686b728d..3a3264ff47b9 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -12,13 +12,11 @@ extern char *strrchr(const char *, int c);
 #define __HAVE_ARCH_STRCHR
 extern char *strchr(const char *, int c);
 
-#ifndef CONFIG_KASAN_HW_TAGS
 #define __HAVE_ARCH_STRCMP
 extern int strcmp(const char *, const char *);
 
 #define __HAVE_ARCH_STRNCMP
 extern int strncmp(const char *, const char *, __kernel_size_t);
-#endif
 
 #define __HAVE_ARCH_STRLEN
 extern __kernel_size_t strlen(const char *);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 898bee0004ae..c7ca3a105528 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -773,6 +773,9 @@
 #define ID_AA64ISAR1_GPI_IMP_DEF		0x1
 
 /* id_aa64isar2 */
+#define ID_AA64ISAR2_CLEARBHB_SHIFT	28
+#define ID_AA64ISAR2_APA3_SHIFT		12
+#define ID_AA64ISAR2_GPA3_SHIFT		8
 #define ID_AA64ISAR2_RPRES_SHIFT	4
 #define ID_AA64ISAR2_WFXT_SHIFT		0
 
@@ -786,6 +789,16 @@
 #define ID_AA64ISAR2_WFXT_NI		0x0
 #define ID_AA64ISAR2_WFXT_SUPPORTED	0x2
 
+#define ID_AA64ISAR2_APA3_NI			0x0
+#define ID_AA64ISAR2_APA3_ARCHITECTED		0x1
+#define ID_AA64ISAR2_APA3_ARCH_EPAC		0x2
+#define ID_AA64ISAR2_APA3_ARCH_EPAC2		0x3
+#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC	0x4
+#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC_CMB	0x5
+
+#define ID_AA64ISAR2_GPA3_NI			0x0
+#define ID_AA64ISAR2_GPA3_ARCHITECTED		0x1
+
 /* id_aa64pfr0 */
 #define ID_AA64PFR0_CSV3_SHIFT		60
 #define ID_AA64PFR0_CSV2_SHIFT		56
@@ -904,6 +917,7 @@
 #endif
 
 /* id_aa64mmfr1 */
+#define ID_AA64MMFR1_ECBHB_SHIFT	60
 #define ID_AA64MMFR1_AFP_SHIFT		44
 #define ID_AA64MMFR1_ETS_SHIFT		36
 #define ID_AA64MMFR1_TWED_SHIFT		32
@@ -1097,13 +1111,11 @@
 #define ZCR_ELx_LEN_SIZE	9
 #define ZCR_ELx_LEN_MASK	0x1ff
 
+#define CPACR_EL1_FPEN_EL1EN	(BIT(20)) /* enable EL1 access */
+#define CPACR_EL1_FPEN_EL0EN	(BIT(21)) /* enable EL0 access, if EL1EN set */
+
 #define CPACR_EL1_ZEN_EL1EN	(BIT(16)) /* enable EL1 access */
 #define CPACR_EL1_ZEN_EL0EN	(BIT(17)) /* enable EL0 access, if EL1EN set */
-#define CPACR_EL1_ZEN		(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
-
-/* TCR EL1 Bit Definitions */
-#define SYS_TCR_EL1_TCMA1	(BIT(58))
-#define SYS_TCR_EL1_TCMA0	(BIT(57))
 
 /* GCR_EL1 Definitions */
 #define SYS_GCR_EL1_RRND	(BIT(16))
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index f386b90a79c8..9fab663dd2de 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -24,6 +24,10 @@ void update_freq_counters_refs(void);
 #define arch_scale_freq_capacity topology_get_freq_scale
 #define arch_scale_freq_invariant topology_scale_freq_invariant
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc
+#endif
+
 /* Replace task scheduler's default cpu-invariant accounting */
 #define arch_scale_cpu_capacity topology_get_cpu_scale
 
diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h
new file mode 100644
index 000000000000..bc9a2145f419
--- /dev/null
+++ b/arch/arm64/include/asm/vectors.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 ARM Ltd.
+ */
+#ifndef __ASM_VECTORS_H
+#define __ASM_VECTORS_H
+
+#include <linux/bug.h>
+#include <linux/percpu.h>
+
+#include <asm/fixmap.h>
+
+extern char vectors[];
+extern char tramp_vectors[];
+extern char __bp_harden_el1_vectors[];
+
+/*
+ * Note: the order of this enum corresponds to two arrays in entry.S:
+ * tramp_vecs and __bp_harden_el1_vectors. By default the canonical
+ * 'full fat' vectors are used directly.
+ */
+enum arm64_bp_harden_el1_vectors {
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+	/*
+	 * Perform the BHB loop mitigation, before branching to the canonical
+	 * vectors.
+	 */
+	EL1_VECTOR_BHB_LOOP,
+
+	/*
+	 * Make the SMC call for firmware mitigation, before branching to the
+	 * canonical vectors.
+	 */
+	EL1_VECTOR_BHB_FW,
+
+	/*
+	 * Use the ClearBHB instruction, before branching to the canonical
+	 * vectors.
+	 */
+	EL1_VECTOR_BHB_CLEAR_INSN,
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+
+	/*
+	 * Remap the kernel before branching to the canonical vectors.
+	 */
+	EL1_VECTOR_KPTI,
+};
+
+#ifndef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+#define EL1_VECTOR_BHB_LOOP		-1
+#define EL1_VECTOR_BHB_FW		-1
+#define EL1_VECTOR_BHB_CLEAR_INSN	-1
+#endif /* !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+
+/* The vectors to use on return from EL0. e.g. to remap the kernel */
+DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector);
+
+#ifndef CONFIG_UNMAP_KERNEL_AT_EL0
+#define TRAMP_VALIAS	0ul
+#endif
+
+static inline const char *
+arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot)
+{
+	if (arm64_kernel_unmapped_at_el0())
+		return (char *)(TRAMP_VALIAS + SZ_2K * slot);
+
+	WARN_ON_ONCE(slot == EL1_VECTOR_KPTI);
+
+	return __bp_harden_el1_vectors + SZ_2K * slot;
+}
+
+#endif /* __ASM_VECTORS_H */
diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h
index 947f6a4f1aa0..befcd8a7abc9 100644
--- a/arch/arm64/include/asm/xor.h
+++ b/arch/arm64/include/asm/xor.h
@@ -16,7 +16,8 @@
 extern struct xor_block_template const xor_block_inner_neon;
 
 static void
-xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2)
 {
 	kernel_neon_begin();
 	xor_block_inner_neon.do_2(bytes, p1, p2);
@@ -24,8 +25,9 @@ xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3)
+xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2,
+	   const unsigned long * __restrict p3)
 {
 	kernel_neon_begin();
 	xor_block_inner_neon.do_3(bytes, p1, p2, p3);
@@ -33,8 +35,10 @@ xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3, unsigned long *p4)
+xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2,
+	   const unsigned long * __restrict p3,
+	   const unsigned long * __restrict p4)
 {
 	kernel_neon_begin();
 	xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4);
@@ -42,8 +46,11 @@ xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2,
+	   const unsigned long * __restrict p3,
+	   const unsigned long * __restrict p4,
+	   const unsigned long * __restrict p5)
 {
 	kernel_neon_begin();
 	xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5);
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index f03731847d9d..99cb5d383048 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -78,5 +78,6 @@
 #define HWCAP2_ECV		(1 << 19)
 #define HWCAP2_AFP		(1 << 20)
 #define HWCAP2_RPRES		(1 << 21)
+#define HWCAP2_MTE3		(1 << 22)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index b3edde68bc3e..323e251ed37b 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -281,6 +281,11 @@ struct kvm_arm_copy_mte_tags {
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED	3
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED     	(1U << 4)
 
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3	KVM_REG_ARM_FW_REG(3)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL		0
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL		1
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED	2
+
 /* SVE registers */
 #define KVM_REG_ARM64_SVE		(0x15 << KVM_REG_ARM_COPROC_SHIFT)
 
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 88b3e2a21408..986837d7ec82 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
 obj-$(CONFIG_PARAVIRT)			+= paravirt.o
 obj-$(CONFIG_RANDOMIZE_BASE)		+= kaslr.o
 obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
+obj-$(CONFIG_ELF_CORE)			+= elfcore.o
 obj-$(CONFIG_KEXEC_CORE)		+= machine_kexec.o relocate_kernel.o	\
 					   cpu-reset.o
 obj-$(CONFIG_KEXEC_FILE)		+= machine_kexec_file.o kexec_image.o
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index b217941713a8..4c9b5b4b7a0b 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -214,6 +214,21 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
 };
 #endif
 
+#ifdef CONFIG_CAVIUM_ERRATUM_23154
+const struct midr_range cavium_erratum_23154_cpus[] = {
+	MIDR_ALL_VERSIONS(MIDR_THUNDERX),
+	MIDR_ALL_VERSIONS(MIDR_THUNDERX_81XX),
+	MIDR_ALL_VERSIONS(MIDR_THUNDERX_83XX),
+	MIDR_ALL_VERSIONS(MIDR_OCTX2_98XX),
+	MIDR_ALL_VERSIONS(MIDR_OCTX2_96XX),
+	MIDR_ALL_VERSIONS(MIDR_OCTX2_95XX),
+	MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXN),
+	MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXMM),
+	MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXO),
+	{},
+};
+#endif
+
 #ifdef CONFIG_CAVIUM_ERRATUM_27456
 const struct midr_range cavium_erratum_27456_cpus[] = {
 	/* Cavium ThunderX, T88 pass 1.x - 2.1 */
@@ -425,10 +440,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 #endif
 #ifdef CONFIG_CAVIUM_ERRATUM_23154
 	{
-	/* Cavium ThunderX, pass 1.x */
-		.desc = "Cavium erratum 23154",
+		.desc = "Cavium errata 23154 and 38545",
 		.capability = ARM64_WORKAROUND_CAVIUM_23154,
-		ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1),
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+		ERRATA_MIDR_RANGE_LIST(cavium_erratum_23154_cpus),
 	},
 #endif
 #ifdef CONFIG_CAVIUM_ERRATUM_27456
@@ -502,6 +517,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.matches = has_spectre_v4,
 		.cpu_enable = spectre_v4_enable_mitigation,
 	},
+	{
+		.desc = "Spectre-BHB",
+		.capability = ARM64_SPECTRE_BHB,
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+		.matches = is_spectre_bhb_affected,
+		.cpu_enable = spectre_bhb_enable_mitigation,
+	},
 #ifdef CONFIG_ARM64_ERRATUM_1418040
 	{
 		.desc = "ARM erratum 1418040",
@@ -604,7 +626,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	{
 		.desc = "ARM erratum 2077057",
 		.capability = ARM64_WORKAROUND_2077057,
-		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 		ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
 	},
 #endif
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e5f23dab1c8d..d72c4b4d389c 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -73,6 +73,8 @@
 #include <linux/mm.h>
 #include <linux/cpu.h>
 #include <linux/kasan.h>
+#include <linux/percpu.h>
+
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
@@ -85,6 +87,7 @@
 #include <asm/smp.h>
 #include <asm/sysreg.h>
 #include <asm/traps.h>
+#include <asm/vectors.h>
 #include <asm/virt.h>
 
 /* Kernel representation of AT_HWCAP and AT_HWCAP2 */
@@ -110,6 +113,8 @@ DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
 bool arm64_use_ng_mappings = false;
 EXPORT_SYMBOL(arm64_use_ng_mappings);
 
+DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors;
+
 /*
  * Permit PER_LINUX32 and execve() of 32-bit binaries even if not all CPUs
  * support it?
@@ -226,6 +231,11 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
+		       FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_APA3_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
@@ -596,6 +606,7 @@ static const struct arm64_ftr_bits ftr_raz[] = {
 struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
 struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
 struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
+struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
 
 static const struct __ftr_reg_entry {
 	u32			sys_id;
@@ -644,6 +655,8 @@ static const struct __ftr_reg_entry {
 	ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1,
 			       &id_aa64isar1_override),
 	ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2),
+	ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2,
+			       &id_aa64isar2_override),
 
 	/* Op1 = 0, CRn = 0, CRm = 7 */
 	ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
@@ -1307,7 +1320,9 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
 static bool
 feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
 {
-	int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign);
+	int val = cpuid_feature_extract_field_width(reg, entry->field_pos,
+						    entry->field_width,
+						    entry->sign);
 
 	return val >= entry->min_field_value;
 }
@@ -1590,6 +1605,12 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
 
 	int cpu = smp_processor_id();
 
+	if (__this_cpu_read(this_cpu_vector) == vectors) {
+		const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI);
+
+		__this_cpu_write(this_cpu_vector, v);
+	}
+
 	/*
 	 * We don't need to rewrite the page-tables if either we've done
 	 * it already or we have KASLR enabled and therefore have not
@@ -1775,14 +1796,6 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
 		write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
 }
 
-static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
-{
-	u64 val = read_sysreg_s(SYS_CLIDR_EL1);
-
-	/* Check that CLIDR_EL1.LOU{U,IS} are both 0 */
-	WARN_ON(CLIDR_LOUU(val) || CLIDR_LOUIS(val));
-}
-
 #ifdef CONFIG_ARM64_PAN
 static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
 {
@@ -1829,21 +1842,27 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry,
 	/* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */
 	sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg),
 					      entry->field_pos, entry->sign);
-	return sec_val == boot_val;
+	return (sec_val >= entry->min_field_value) && (sec_val == boot_val);
 }
 
 static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry,
 				     int scope)
 {
-	return has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH], scope) ||
-	       has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
+	bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
+	bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope);
+	bool apa3 = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope);
+
+	return apa || apa3 || api;
 }
 
 static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
 			     int __unused)
 {
-	return __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH) ||
-	       __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF);
+	bool gpi = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF);
+	bool gpa = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5);
+	bool gpa3 = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3);
+
+	return gpa || gpa3 || gpi;
 }
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
@@ -1945,6 +1964,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_useable_gicv3_cpuif,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.field_pos = ID_AA64PFR0_GIC_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
 	},
@@ -1955,6 +1975,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64MMFR0_EL1,
 		.field_pos = ID_AA64MMFR0_ECV_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
 	},
@@ -1966,6 +1987,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64MMFR1_EL1,
 		.field_pos = ID_AA64MMFR1_PAN_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
 		.cpu_enable = cpu_enable_pan,
@@ -1979,6 +2001,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64MMFR1_EL1,
 		.field_pos = ID_AA64MMFR1_PAN_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 3,
 	},
@@ -1991,6 +2014,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64ISAR0_EL1,
 		.field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 2,
 	},
@@ -2015,6 +2039,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_EL0_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
 	},
 #ifdef CONFIG_KVM
@@ -2026,6 +2051,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_EL1_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
 	},
 	{
@@ -2046,6 +2072,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		 */
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.field_pos = ID_AA64PFR0_CSV3_SHIFT,
+		.field_width = 4,
 		.min_field_value = 1,
 		.matches = unmap_kernel_at_el0,
 		.cpu_enable = kpti_install_ng_mappings,
@@ -2065,6 +2092,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.field_pos = ID_AA64ISAR1_DPB_SHIFT,
+		.field_width = 4,
 		.min_field_value = 1,
 	},
 	{
@@ -2075,6 +2103,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_DPB_SHIFT,
+		.field_width = 4,
 		.min_field_value = 2,
 	},
 #endif
@@ -2086,6 +2115,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_SVE_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR0_SVE,
 		.matches = has_cpuid_feature,
 		.cpu_enable = sve_kernel_enable,
@@ -2100,6 +2130,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_RAS_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR0_RAS_V1,
 		.cpu_enable = cpu_clear_disr,
 	},
@@ -2118,6 +2149,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_AMU_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR0_AMU,
 		.cpu_enable = cpu_amu_enable,
 	},
@@ -2142,9 +2174,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64MMFR2_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64MMFR2_FWB_SHIFT,
+		.field_width = 4,
 		.min_field_value = 1,
 		.matches = has_cpuid_feature,
-		.cpu_enable = cpu_has_fwb,
 	},
 	{
 		.desc = "ARMv8.4 Translation Table Level",
@@ -2153,6 +2185,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64MMFR2_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64MMFR2_TTL_SHIFT,
+		.field_width = 4,
 		.min_field_value = 1,
 		.matches = has_cpuid_feature,
 	},
@@ -2163,6 +2196,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64ISAR0_EL1,
 		.field_pos = ID_AA64ISAR0_TLB_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = ID_AA64ISAR0_TLB_RANGE,
 	},
@@ -2181,6 +2215,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64MMFR1_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64MMFR1_HADBS_SHIFT,
+		.field_width = 4,
 		.min_field_value = 2,
 		.matches = has_hw_dbm,
 		.cpu_enable = cpu_enable_hw_dbm,
@@ -2193,6 +2228,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64ISAR0_EL1,
 		.field_pos = ID_AA64ISAR0_CRC32_SHIFT,
+		.field_width = 4,
 		.min_field_value = 1,
 	},
 	{
@@ -2202,6 +2238,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64PFR1_EL1,
 		.field_pos = ID_AA64PFR1_SSBS_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY,
 	},
@@ -2214,6 +2251,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64MMFR2_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64MMFR2_CNP_SHIFT,
+		.field_width = 4,
 		.min_field_value = 1,
 		.cpu_enable = cpu_enable_cnp,
 	},
@@ -2225,27 +2263,41 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.field_pos = ID_AA64ISAR1_SB_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
 	},
 #ifdef CONFIG_ARM64_PTR_AUTH
 	{
-		.desc = "Address authentication (architected algorithm)",
-		.capability = ARM64_HAS_ADDRESS_AUTH_ARCH,
+		.desc = "Address authentication (architected QARMA5 algorithm)",
+		.capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5,
 		.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_APA_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64ISAR1_APA_ARCHITECTED,
 		.matches = has_address_auth_cpucap,
 	},
 	{
+		.desc = "Address authentication (architected QARMA3 algorithm)",
+		.capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3,
+		.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+		.sys_reg = SYS_ID_AA64ISAR2_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64ISAR2_APA3_SHIFT,
+		.field_width = 4,
+		.min_field_value = ID_AA64ISAR2_APA3_ARCHITECTED,
+		.matches = has_address_auth_cpucap,
+	},
+	{
 		.desc = "Address authentication (IMP DEF algorithm)",
 		.capability = ARM64_HAS_ADDRESS_AUTH_IMP_DEF,
 		.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_API_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64ISAR1_API_IMP_DEF,
 		.matches = has_address_auth_cpucap,
 	},
@@ -2255,22 +2307,35 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_address_auth_metacap,
 	},
 	{
-		.desc = "Generic authentication (architected algorithm)",
-		.capability = ARM64_HAS_GENERIC_AUTH_ARCH,
+		.desc = "Generic authentication (architected QARMA5 algorithm)",
+		.capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5,
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_GPA_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED,
 		.matches = has_cpuid_feature,
 	},
 	{
+		.desc = "Generic authentication (architected QARMA3 algorithm)",
+		.capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.sys_reg = SYS_ID_AA64ISAR2_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64ISAR2_GPA3_SHIFT,
+		.field_width = 4,
+		.min_field_value = ID_AA64ISAR2_GPA3_ARCHITECTED,
+		.matches = has_cpuid_feature,
+	},
+	{
 		.desc = "Generic authentication (IMP DEF algorithm)",
 		.capability = ARM64_HAS_GENERIC_AUTH_IMP_DEF,
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_GPI_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64ISAR1_GPI_IMP_DEF,
 		.matches = has_cpuid_feature,
 	},
@@ -2291,6 +2356,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = can_use_gic_priorities,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.field_pos = ID_AA64PFR0_GIC_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
 	},
@@ -2302,6 +2368,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.sys_reg = SYS_ID_AA64MMFR2_EL1,
 		.sign = FTR_UNSIGNED,
+		.field_width = 4,
 		.field_pos = ID_AA64MMFR2_E0PD_SHIFT,
 		.matches = has_cpuid_feature,
 		.min_field_value = 1,
@@ -2316,6 +2383,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64ISAR0_EL1,
 		.field_pos = ID_AA64ISAR0_RNDR_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
 	},
@@ -2333,6 +2401,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.cpu_enable = bti_enable,
 		.sys_reg = SYS_ID_AA64PFR1_EL1,
 		.field_pos = ID_AA64PFR1_BT_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR1_BT_BTI,
 		.sign = FTR_UNSIGNED,
 	},
@@ -2345,6 +2414,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64PFR1_EL1,
 		.field_pos = ID_AA64PFR1_MTE_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR1_MTE,
 		.sign = FTR_UNSIGNED,
 		.cpu_enable = cpu_enable_mte,
@@ -2356,6 +2426,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64PFR1_EL1,
 		.field_pos = ID_AA64PFR1_MTE_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR1_MTE_ASYMM,
 		.sign = FTR_UNSIGNED,
 	},
@@ -2367,16 +2438,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_LRCPC_SHIFT,
+		.field_width = 4,
 		.matches = has_cpuid_feature,
 		.min_field_value = 1,
 	},
 	{},
 };
 
-#define HWCAP_CPUID_MATCH(reg, field, s, min_value)				\
+#define HWCAP_CPUID_MATCH(reg, field, width, s, min_value)			\
 		.matches = has_cpuid_feature,					\
 		.sys_reg = reg,							\
 		.field_pos = field,						\
+		.field_width = width,						\
 		.sign = s,							\
 		.min_field_value = min_value,
 
@@ -2386,10 +2459,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.hwcap_type = cap_type,						\
 		.hwcap = cap,							\
 
-#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap)			\
+#define HWCAP_CAP(reg, field, width, s, min_value, cap_type, cap)		\
 	{									\
 		__HWCAP_CAP(#cap, cap_type, cap)				\
-		HWCAP_CPUID_MATCH(reg, field, s, min_value)			\
+		HWCAP_CPUID_MATCH(reg, field, width, s, min_value) 		\
 	}
 
 #define HWCAP_MULTI_CAP(list, cap_type, cap)					\
@@ -2409,11 +2482,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
 	{
 		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT,
-				  FTR_UNSIGNED, ID_AA64ISAR1_APA_ARCHITECTED)
+				  4, FTR_UNSIGNED,
+				  ID_AA64ISAR1_APA_ARCHITECTED)
+	},
+	{
+		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_SHIFT,
+				  4, FTR_UNSIGNED, ID_AA64ISAR2_APA3_ARCHITECTED)
 	},
 	{
 		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT,
-				  FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF)
+				  4, FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF)
 	},
 	{},
 };
@@ -2421,77 +2499,82 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
 static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
 	{
 		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT,
-				  FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED)
+				  4, FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED)
+	},
+	{
+		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_GPA3_SHIFT,
+				  4, FTR_UNSIGNED, ID_AA64ISAR2_GPA3_ARCHITECTED)
 	},
 	{
 		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT,
-				  FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF)
+				  4, FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF)
 	},
 	{},
 };
 #endif
 
 static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
-	HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
+	HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
 #ifdef CONFIG_ARM64_SVE
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
 #endif
-	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
 #ifdef CONFIG_ARM64_BTI
-	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI),
 #endif
 #ifdef CONFIG_ARM64_PTR_AUTH
 	HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA),
 	HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG),
 #endif
 #ifdef CONFIG_ARM64_MTE
-	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3),
 #endif /* CONFIG_ARM64_MTE */
-	HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
-	HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
-	HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
+	HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
+	HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
+	HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
 	{},
 };
 
@@ -2520,15 +2603,15 @@ static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope)
 static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = {
 #ifdef CONFIG_COMPAT
 	HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON),
-	HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4),
+	HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4),
 	/* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */
-	HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP),
-	HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3),
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+	HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP),
+	HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
 #endif
 	{},
 };
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 03991eeff643..3006f4324808 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -54,6 +54,9 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu)
 	struct acpi_lpi_state *lpi;
 	struct acpi_processor *pr = per_cpu(processors, cpu);
 
+	if (unlikely(!pr || !pr->flags.has_lpi))
+		return -EINVAL;
+
 	/*
 	 * If the PSCI cpu_suspend function hook has not been initialized
 	 * idle states must not be enabled, so bail out
@@ -61,9 +64,6 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu)
 	if (!psci_ops.cpu_suspend)
 		return -EOPNOTSUPP;
 
-	if (unlikely(!pr || !pr->flags.has_lpi))
-		return -EINVAL;
-
 	count = pr->power.count - 1;
 	if (count <= 0)
 		return -ENODEV;
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 591c18a889a5..330b92ea863a 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -97,6 +97,7 @@ static const char *const hwcap_str[] = {
 	[KERNEL_HWCAP_ECV]		= "ecv",
 	[KERNEL_HWCAP_AFP]		= "afp",
 	[KERNEL_HWCAP_RPRES]		= "rpres",
+	[KERNEL_HWCAP_MTE3]		= "mte3",
 };
 
 #ifdef CONFIG_COMPAT
diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c
index 314391a156ee..2b65aae332ce 100644
--- a/arch/arm64/kernel/crash_core.c
+++ b/arch/arm64/kernel/crash_core.c
@@ -20,6 +20,12 @@ void arch_crash_save_vmcoreinfo(void)
 {
 	VMCOREINFO_NUMBER(VA_BITS);
 	/* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
+	vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR);
+	vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END);
+	vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START);
+	vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END);
+	vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START);
+	vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END);
 	vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n",
 						kimage_voffset);
 	vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
new file mode 100644
index 000000000000..3ed39c61a510
--- /dev/null
+++ b/arch/arm64/kernel/elfcore.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/coredump.h>
+#include <linux/elfcore.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#include <asm/cpufeature.h>
+#include <asm/mte.h>
+
+#ifndef VMA_ITERATOR
+#define VMA_ITERATOR(name, mm, addr)	\
+	struct mm_struct *name = mm
+#define for_each_vma(vmi, vma)		\
+	for (vma = vmi->mmap; vma; vma = vma->vm_next)
+#endif
+
+#define for_each_mte_vma(vmi, vma)					\
+	if (system_supports_mte())					\
+		for_each_vma(vmi, vma)					\
+			if (vma->vm_flags & VM_MTE)
+
+static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_DONTDUMP)
+		return 0;
+
+	return vma_pages(vma) * MTE_PAGE_TAG_STORAGE;
+}
+
+/* Derived from dump_user_range(); start/end must be page-aligned */
+static int mte_dump_tag_range(struct coredump_params *cprm,
+			      unsigned long start, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = start; addr < end; addr += PAGE_SIZE) {
+		char tags[MTE_PAGE_TAG_STORAGE];
+		struct page *page = get_dump_page(addr);
+
+		/*
+		 * get_dump_page() returns NULL when encountering an empty
+		 * page table entry that would otherwise have been filled with
+		 * the zero page. Skip the equivalent tag dump which would
+		 * have been all zeros.
+		 */
+		if (!page) {
+			dump_skip(cprm, MTE_PAGE_TAG_STORAGE);
+			continue;
+		}
+
+		/*
+		 * Pages mapped in user space as !pte_access_permitted() (e.g.
+		 * PROT_EXEC only) may not have the PG_mte_tagged flag set.
+		 */
+		if (!test_bit(PG_mte_tagged, &page->flags)) {
+			put_page(page);
+			dump_skip(cprm, MTE_PAGE_TAG_STORAGE);
+			continue;
+		}
+
+		mte_save_page_tags(page_address(page), tags);
+		put_page(page);
+		if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE))
+			return 0;
+	}
+
+	return 1;
+}
+
+Elf_Half elf_core_extra_phdrs(void)
+{
+	struct vm_area_struct *vma;
+	int vma_count = 0;
+	VMA_ITERATOR(vmi, current->mm, 0);
+
+	for_each_mte_vma(vmi, vma)
+		vma_count++;
+
+	return vma_count;
+}
+
+int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
+{
+	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, current->mm, 0);
+
+	for_each_mte_vma(vmi, vma) {
+		struct elf_phdr phdr;
+
+		phdr.p_type = PT_ARM_MEMTAG_MTE;
+		phdr.p_offset = offset;
+		phdr.p_vaddr = vma->vm_start;
+		phdr.p_paddr = 0;
+		phdr.p_filesz = mte_vma_tag_dump_size(vma);
+		phdr.p_memsz = vma->vm_end - vma->vm_start;
+		offset += phdr.p_filesz;
+		phdr.p_flags = 0;
+		phdr.p_align = 0;
+
+		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
+			return 0;
+	}
+
+	return 1;
+}
+
+size_t elf_core_extra_data_size(void)
+{
+	struct vm_area_struct *vma;
+	size_t data_size = 0;
+	VMA_ITERATOR(vmi, current->mm, 0);
+
+	for_each_mte_vma(vmi, vma)
+		data_size += mte_vma_tag_dump_size(vma);
+
+	return data_size;
+}
+
+int elf_core_write_extra_data(struct coredump_params *cprm)
+{
+	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, current->mm, 0);
+
+	for_each_mte_vma(vmi, vma) {
+		if (vma->vm_flags & VM_DONTDUMP)
+			continue;
+
+		if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end))
+			return 0;
+	}
+
+	return 1;
+}
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index ef7fcefb96bd..7093b578e325 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/context_tracking.h>
+#include <linux/kasan.h>
 #include <linux/linkage.h>
 #include <linux/lockdep.h>
 #include <linux/ptrace.h>
@@ -56,6 +57,7 @@ static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
 {
 	__enter_from_kernel_mode(regs);
 	mte_check_tfsr_entry();
+	mte_disable_tco_entry(current);
 }
 
 /*
@@ -103,6 +105,7 @@ static __always_inline void __enter_from_user_mode(void)
 	CT_WARN_ON(ct_state() != CONTEXT_USER);
 	user_exit_irqoff();
 	trace_hardirqs_off_finish();
+	mte_disable_tco_entry(current);
 }
 
 static __always_inline void enter_from_user_mode(struct pt_regs *regs)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 772ec2ecf488..ede028dee81b 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -37,18 +37,21 @@
 
 	.macro kernel_ventry, el:req, ht:req, regsize:req, label:req
 	.align 7
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+.Lventry_start\@:
 	.if	\el == 0
-alternative_if ARM64_UNMAP_KERNEL_AT_EL0
+	/*
+	 * This must be the first instruction of the EL0 vector entries. It is
+	 * skipped by the trampoline vectors, to trigger the cleanup.
+	 */
+	b	.Lskip_tramp_vectors_cleanup\@
 	.if	\regsize == 64
 	mrs	x30, tpidrro_el0
 	msr	tpidrro_el0, xzr
 	.else
 	mov	x30, xzr
 	.endif
-alternative_else_nop_endif
+.Lskip_tramp_vectors_cleanup\@:
 	.endif
-#endif
 
 	sub	sp, sp, #PT_REGS_SIZE
 #ifdef CONFIG_VMAP_STACK
@@ -95,11 +98,15 @@ alternative_else_nop_endif
 	mrs	x0, tpidrro_el0
 #endif
 	b	el\el\ht\()_\regsize\()_\label
+.org .Lventry_start\@ + 128	// Did we overflow the ventry slot?
 	.endm
 
-	.macro tramp_alias, dst, sym
+	.macro tramp_alias, dst, sym, tmp
 	mov_q	\dst, TRAMP_VALIAS
-	add	\dst, \dst, #(\sym - .entry.tramp.text)
+	adr_l	\tmp, \sym
+	add	\dst, \dst, \tmp
+	adr_l	\tmp, .entry.tramp.text
+	sub	\dst, \dst, \tmp
 	.endm
 
 	/*
@@ -116,7 +123,7 @@ alternative_cb_end
 	tbnz	\tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@
 	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_2
 	mov	w1, #\state
-alternative_cb	spectre_v4_patch_fw_mitigation_conduit
+alternative_cb	smccc_patch_fw_mitigation_conduit
 	nop					// Patched to SMC/HVC #0
 alternative_cb_end
 .L__asm_ssbd_skip\@:
@@ -300,6 +307,7 @@ alternative_else_nop_endif
 	str	w21, [sp, #S_SYSCALLNO]
 	.endif
 
+#ifdef CONFIG_ARM64_PSEUDO_NMI
 	/* Save pmr */
 alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	mrs_s	x20, SYS_ICC_PMR_EL1
@@ -307,12 +315,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	mov	x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET
 	msr_s	SYS_ICC_PMR_EL1, x20
 alternative_else_nop_endif
-
-	/* Re-enable tag checking (TCO set on exception entry) */
-#ifdef CONFIG_ARM64_MTE
-alternative_if ARM64_MTE
-	SET_PSTATE_TCO(0)
-alternative_else_nop_endif
 #endif
 
 	/*
@@ -330,6 +332,7 @@ alternative_else_nop_endif
 	disable_daif
 	.endif
 
+#ifdef CONFIG_ARM64_PSEUDO_NMI
 	/* Restore pmr */
 alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	ldr	x20, [sp, #S_PMR_SAVE]
@@ -339,6 +342,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	dsb	sy				// Ensure priority change is seen by redistributor
 .L__skip_pmr_sync\@:
 alternative_else_nop_endif
+#endif
 
 	ldp	x21, x22, [sp, #S_PC]		// load ELR, SPSR
 
@@ -413,21 +417,26 @@ alternative_else_nop_endif
 	ldp	x24, x25, [sp, #16 * 12]
 	ldp	x26, x27, [sp, #16 * 13]
 	ldp	x28, x29, [sp, #16 * 14]
-	ldr	lr, [sp, #S_LR]
-	add	sp, sp, #PT_REGS_SIZE		// restore sp
 
 	.if	\el == 0
-alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
+alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
+	ldr	lr, [sp, #S_LR]
+	add	sp, sp, #PT_REGS_SIZE		// restore sp
+	eret
+alternative_else_nop_endif
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	bne	4f
-	msr	far_el1, x30
-	tramp_alias	x30, tramp_exit_native
+	msr	far_el1, x29
+	tramp_alias	x30, tramp_exit_native, x29
 	br	x30
 4:
-	tramp_alias	x30, tramp_exit_compat
+	tramp_alias	x30, tramp_exit_compat, x29
 	br	x30
 #endif
 	.else
+	ldr	lr, [sp, #S_LR]
+	add	sp, sp, #PT_REGS_SIZE		// restore sp
+
 	/* Ensure any device/NC reads complete */
 	alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412
 
@@ -594,12 +603,6 @@ SYM_CODE_END(ret_to_user)
 
 	.popsection				// .entry.text
 
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-/*
- * Exception vectors trampoline.
- */
-	.pushsection ".entry.tramp.text", "ax"
-
 	// Move from tramp_pg_dir to swapper_pg_dir
 	.macro tramp_map_kernel, tmp
 	mrs	\tmp, ttbr1_el1
@@ -633,12 +636,47 @@ alternative_else_nop_endif
 	 */
 	.endm
 
-	.macro tramp_ventry, regsize = 64
+	.macro tramp_data_page	dst
+	adr_l	\dst, .entry.tramp.text
+	sub	\dst, \dst, PAGE_SIZE
+	.endm
+
+	.macro tramp_data_read_var	dst, var
+#ifdef CONFIG_RANDOMIZE_BASE
+	tramp_data_page		\dst
+	add	\dst, \dst, #:lo12:__entry_tramp_data_\var
+	ldr	\dst, [\dst]
+#else
+	ldr	\dst, =\var
+#endif
+	.endm
+
+#define BHB_MITIGATION_NONE	0
+#define BHB_MITIGATION_LOOP	1
+#define BHB_MITIGATION_FW	2
+#define BHB_MITIGATION_INSN	3
+
+	.macro tramp_ventry, vector_start, regsize, kpti, bhb
 	.align	7
 1:
 	.if	\regsize == 64
 	msr	tpidrro_el0, x30	// Restored in kernel_ventry
 	.endif
+
+	.if	\bhb == BHB_MITIGATION_LOOP
+	/*
+	 * This sequence must appear before the first indirect branch. i.e. the
+	 * ret out of tramp_ventry. It appears here because x30 is free.
+	 */
+	__mitigate_spectre_bhb_loop	x30
+	.endif // \bhb == BHB_MITIGATION_LOOP
+
+	.if	\bhb == BHB_MITIGATION_INSN
+	clearbhb
+	isb
+	.endif // \bhb == BHB_MITIGATION_INSN
+
+	.if	\kpti == 1
 	/*
 	 * Defend against branch aliasing attacks by pushing a dummy
 	 * entry onto the return stack and using a RET instruction to
@@ -648,46 +686,75 @@ alternative_else_nop_endif
 	b	.
 2:
 	tramp_map_kernel	x30
-#ifdef CONFIG_RANDOMIZE_BASE
-	adr	x30, tramp_vectors + PAGE_SIZE
 alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
-	ldr	x30, [x30]
-#else
-	ldr	x30, =vectors
-#endif
+	tramp_data_read_var	x30, vectors
 alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM
-	prfm	plil1strm, [x30, #(1b - tramp_vectors)]
+	prfm	plil1strm, [x30, #(1b - \vector_start)]
 alternative_else_nop_endif
+
 	msr	vbar_el1, x30
-	add	x30, x30, #(1b - tramp_vectors)
 	isb
+	.else
+	ldr	x30, =vectors
+	.endif // \kpti == 1
+
+	.if	\bhb == BHB_MITIGATION_FW
+	/*
+	 * The firmware sequence must appear before the first indirect branch.
+	 * i.e. the ret out of tramp_ventry. But it also needs the stack to be
+	 * mapped to save/restore the registers the SMC clobbers.
+	 */
+	__mitigate_spectre_bhb_fw
+	.endif // \bhb == BHB_MITIGATION_FW
+
+	add	x30, x30, #(1b - \vector_start + 4)
 	ret
+.org 1b + 128	// Did we overflow the ventry slot?
 	.endm
 
 	.macro tramp_exit, regsize = 64
-	adr	x30, tramp_vectors
+	tramp_data_read_var	x30, this_cpu_vector
+	get_this_cpu_offset x29
+	ldr	x30, [x30, x29]
+
 	msr	vbar_el1, x30
-	tramp_unmap_kernel	x30
+	ldr	lr, [sp, #S_LR]
+	tramp_unmap_kernel	x29
 	.if	\regsize == 64
-	mrs	x30, far_el1
+	mrs	x29, far_el1
 	.endif
+	add	sp, sp, #PT_REGS_SIZE		// restore sp
 	eret
 	sb
 	.endm
 
-	.align	11
-SYM_CODE_START_NOALIGN(tramp_vectors)
+	.macro	generate_tramp_vector,	kpti, bhb
+.Lvector_start\@:
 	.space	0x400
 
-	tramp_ventry
-	tramp_ventry
-	tramp_ventry
-	tramp_ventry
+	.rept	4
+	tramp_ventry	.Lvector_start\@, 64, \kpti, \bhb
+	.endr
+	.rept	4
+	tramp_ventry	.Lvector_start\@, 32, \kpti, \bhb
+	.endr
+	.endm
 
-	tramp_ventry	32
-	tramp_ventry	32
-	tramp_ventry	32
-	tramp_ventry	32
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+/*
+ * Exception vectors trampoline.
+ * The order must match __bp_harden_el1_vectors and the
+ * arm64_bp_harden_el1_vectors enum.
+ */
+	.pushsection ".entry.tramp.text", "ax"
+	.align	11
+SYM_CODE_START_NOALIGN(tramp_vectors)
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+	generate_tramp_vector	kpti=1, bhb=BHB_MITIGATION_LOOP
+	generate_tramp_vector	kpti=1, bhb=BHB_MITIGATION_FW
+	generate_tramp_vector	kpti=1, bhb=BHB_MITIGATION_INSN
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+	generate_tramp_vector	kpti=1, bhb=BHB_MITIGATION_NONE
 SYM_CODE_END(tramp_vectors)
 
 SYM_CODE_START(tramp_exit_native)
@@ -704,13 +771,57 @@ SYM_CODE_END(tramp_exit_compat)
 	.pushsection ".rodata", "a"
 	.align PAGE_SHIFT
 SYM_DATA_START(__entry_tramp_data_start)
+__entry_tramp_data_vectors:
 	.quad	vectors
+#ifdef CONFIG_ARM_SDE_INTERFACE
+__entry_tramp_data___sdei_asm_handler:
+	.quad	__sdei_asm_handler
+#endif /* CONFIG_ARM_SDE_INTERFACE */
+__entry_tramp_data_this_cpu_vector:
+	.quad	this_cpu_vector
 SYM_DATA_END(__entry_tramp_data_start)
 	.popsection				// .rodata
 #endif /* CONFIG_RANDOMIZE_BASE */
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
 
 /*
+ * Exception vectors for spectre mitigations on entry from EL1 when
+ * kpti is not in use.
+ */
+	.macro generate_el1_vector, bhb
+.Lvector_start\@:
+	kernel_ventry	1, t, 64, sync		// Synchronous EL1t
+	kernel_ventry	1, t, 64, irq		// IRQ EL1t
+	kernel_ventry	1, t, 64, fiq		// FIQ EL1h
+	kernel_ventry	1, t, 64, error		// Error EL1t
+
+	kernel_ventry	1, h, 64, sync		// Synchronous EL1h
+	kernel_ventry	1, h, 64, irq		// IRQ EL1h
+	kernel_ventry	1, h, 64, fiq		// FIQ EL1h
+	kernel_ventry	1, h, 64, error		// Error EL1h
+
+	.rept	4
+	tramp_ventry	.Lvector_start\@, 64, 0, \bhb
+	.endr
+	.rept 4
+	tramp_ventry	.Lvector_start\@, 32, 0, \bhb
+	.endr
+	.endm
+
+/* The order must match tramp_vecs and the arm64_bp_harden_el1_vectors enum. */
+	.pushsection ".entry.text", "ax"
+	.align	11
+SYM_CODE_START(__bp_harden_el1_vectors)
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+	generate_el1_vector	bhb=BHB_MITIGATION_LOOP
+	generate_el1_vector	bhb=BHB_MITIGATION_FW
+	generate_el1_vector	bhb=BHB_MITIGATION_INSN
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+SYM_CODE_END(__bp_harden_el1_vectors)
+	.popsection
+
+
+/*
  * Register switch for AArch64. The callee-saved registers need to be saved
  * and restored. On entry:
  *   x0 = previous task_struct (must be preserved across the switch)
@@ -835,14 +946,7 @@ SYM_CODE_START(__sdei_asm_entry_trampoline)
 	 * Remember whether to unmap the kernel on exit.
 	 */
 1:	str	x4, [x1, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)]
-
-#ifdef CONFIG_RANDOMIZE_BASE
-	adr	x4, tramp_vectors + PAGE_SIZE
-	add	x4, x4, #:lo12:__sdei_asm_trampoline_next_handler
-	ldr	x4, [x4]
-#else
-	ldr	x4, =__sdei_asm_handler
-#endif
+	tramp_data_read_var     x4, __sdei_asm_handler
 	br	x4
 SYM_CODE_END(__sdei_asm_entry_trampoline)
 NOKPROBE(__sdei_asm_entry_trampoline)
@@ -865,13 +969,6 @@ SYM_CODE_END(__sdei_asm_exit_trampoline)
 NOKPROBE(__sdei_asm_exit_trampoline)
 	.ltorg
 .popsection		// .entry.tramp.text
-#ifdef CONFIG_RANDOMIZE_BASE
-.pushsection ".rodata", "a"
-SYM_DATA_START(__sdei_asm_trampoline_next_handler)
-	.quad	__sdei_asm_handler
-SYM_DATA_END(__sdei_asm_trampoline_next_handler)
-.popsection		// .rodata
-#endif /* CONFIG_RANDOMIZE_BASE */
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
 
 /*
@@ -981,7 +1078,7 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
 alternative_else_nop_endif
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-	tramp_alias	dst=x5, sym=__sdei_asm_exit_trampoline
+	tramp_alias	dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3
 	br	x5
 #endif
 SYM_CODE_END(__sdei_asm_handler)
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index d8e606fe3c21..8a2ceb591686 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -17,7 +17,7 @@
 #define FTR_DESC_NAME_LEN	20
 #define FTR_DESC_FIELD_LEN	10
 #define FTR_ALIAS_NAME_LEN	30
-#define FTR_ALIAS_OPTION_LEN	80
+#define FTR_ALIAS_OPTION_LEN	116
 
 struct ftr_set_desc {
 	char 				name[FTR_DESC_NAME_LEN];
@@ -71,6 +71,16 @@ static const struct ftr_set_desc isar1 __initconst = {
 	},
 };
 
+static const struct ftr_set_desc isar2 __initconst = {
+	.name		= "id_aa64isar2",
+	.override	= &id_aa64isar2_override,
+	.fields		= {
+	        { "gpa3", ID_AA64ISAR2_GPA3_SHIFT },
+	        { "apa3", ID_AA64ISAR2_APA3_SHIFT },
+		{}
+	},
+};
+
 extern struct arm64_ftr_override kaslr_feature_override;
 
 static const struct ftr_set_desc kaslr __initconst = {
@@ -88,6 +98,7 @@ static const struct ftr_set_desc * const regs[] __initconst = {
 	&mmfr1,
 	&pfr1,
 	&isar1,
+	&isar2,
 	&kaslr,
 };
 
@@ -100,7 +111,8 @@ static const struct {
 	{ "arm64.nobti",		"id_aa64pfr1.bt=0" },
 	{ "arm64.nopauth",
 	  "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
-	  "id_aa64isar1.api=0 id_aa64isar1.apa=0"	   },
+	  "id_aa64isar1.api=0 id_aa64isar1.apa=0 "
+	  "id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0"	   },
 	{ "arm64.nomte",		"id_aa64pfr1.mte=0" },
 	{ "nokaslr",			"kaslr.disabled=1" },
 };
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 7eaf1f7c4168..55a1ced8eb77 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -66,6 +66,10 @@ KVM_NVHE_ALIAS(kvm_patch_vector_branch);
 KVM_NVHE_ALIAS(kvm_update_va_mask);
 KVM_NVHE_ALIAS(kvm_get_kimage_voffset);
 KVM_NVHE_ALIAS(kvm_compute_final_ctr_el0);
+KVM_NVHE_ALIAS(spectre_bhb_patch_loop_iter);
+KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable);
+KVM_NVHE_ALIAS(spectre_bhb_patch_wa3);
+KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb);
 
 /* Global kernel state accessed by nVHE hyp code. */
 KVM_NVHE_ALIAS(kvm_vgic_global_state);
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index f418ebc65f95..78b3e0f8e997 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -186,6 +186,11 @@ void mte_check_tfsr_el1(void)
 }
 #endif
 
+/*
+ * This is where we actually resolve the system and process MTE mode
+ * configuration into an actual value in SCTLR_EL1 that affects
+ * userspace.
+ */
 static void mte_update_sctlr_user(struct task_struct *task)
 {
 	/*
@@ -199,9 +204,20 @@ static void mte_update_sctlr_user(struct task_struct *task)
 	unsigned long pref, resolved_mte_tcf;
 
 	pref = __this_cpu_read(mte_tcf_preferred);
+	/*
+	 * If there is no overlap between the system preferred and
+	 * program requested values go with what was requested.
+	 */
 	resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
 	sctlr &= ~SCTLR_EL1_TCF0_MASK;
-	if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
+	/*
+	 * Pick an actual setting. The order in which we check for
+	 * set bits and map into register values determines our
+	 * default order.
+	 */
+	if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM)
+		sctlr |= SCTLR_EL1_TCF0_ASYMM;
+	else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
 		sctlr |= SCTLR_EL1_TCF0_ASYNC;
 	else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
 		sctlr |= SCTLR_EL1_TCF0_SYNC;
@@ -253,6 +269,9 @@ void mte_thread_switch(struct task_struct *next)
 	mte_update_sctlr_user(next);
 	mte_update_gcr_excl(next);
 
+	/* TCO may not have been disabled on exception entry for the current task. */
+	mte_disable_tco_entry(next);
+
 	/*
 	 * Check if an async tag exception occurred at EL1.
 	 *
@@ -293,6 +312,17 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 	if (arg & PR_MTE_TCF_SYNC)
 		mte_ctrl |= MTE_CTRL_TCF_SYNC;
 
+	/*
+	 * If the system supports it and both sync and async modes are
+	 * specified then implicitly enable asymmetric mode.
+	 * Userspace could see a mix of both sync and async anyway due
+	 * to differing or changing defaults on CPUs.
+	 */
+	if (cpus_have_cap(ARM64_MTE_ASYMM) &&
+	    (arg & PR_MTE_TCF_ASYNC) &&
+	    (arg & PR_MTE_TCF_SYNC))
+		mte_ctrl |= MTE_CTRL_TCF_ASYMM;
+
 	task->thread.mte_ctrl = mte_ctrl;
 	if (task == current) {
 		preempt_disable();
@@ -467,6 +497,8 @@ static ssize_t mte_tcf_preferred_show(struct device *dev,
 		return sysfs_emit(buf, "async\n");
 	case MTE_CTRL_TCF_SYNC:
 		return sysfs_emit(buf, "sync\n");
+	case MTE_CTRL_TCF_ASYMM:
+		return sysfs_emit(buf, "asymm\n");
 	default:
 		return sysfs_emit(buf, "???\n");
 	}
@@ -482,6 +514,8 @@ static ssize_t mte_tcf_preferred_store(struct device *dev,
 		tcf = MTE_CTRL_TCF_ASYNC;
 	else if (sysfs_streq(buf, "sync"))
 		tcf = MTE_CTRL_TCF_SYNC;
+	else if (cpus_have_cap(ARM64_MTE_ASYMM) && sysfs_streq(buf, "asymm"))
+		tcf = MTE_CTRL_TCF_ASYMM;
 	else
 		return -EINVAL;
 
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index cab678ed6618..cb69ff1e6138 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -242,6 +242,16 @@ static struct attribute *armv8_pmuv3_event_attrs[] = {
 	ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD),
 	ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS),
 	ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD),
+	ARMV8_EVENT_ATTR(trb_wrap, ARMV8_PMUV3_PERFCTR_TRB_WRAP),
+	ARMV8_EVENT_ATTR(trb_trig, ARMV8_PMUV3_PERFCTR_TRB_TRIG),
+	ARMV8_EVENT_ATTR(trcextout0, ARMV8_PMUV3_PERFCTR_TRCEXTOUT0),
+	ARMV8_EVENT_ATTR(trcextout1, ARMV8_PMUV3_PERFCTR_TRCEXTOUT1),
+	ARMV8_EVENT_ATTR(trcextout2, ARMV8_PMUV3_PERFCTR_TRCEXTOUT2),
+	ARMV8_EVENT_ATTR(trcextout3, ARMV8_PMUV3_PERFCTR_TRCEXTOUT3),
+	ARMV8_EVENT_ATTR(cti_trigout4, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4),
+	ARMV8_EVENT_ATTR(cti_trigout5, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5),
+	ARMV8_EVENT_ATTR(cti_trigout6, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6),
+	ARMV8_EVENT_ATTR(cti_trigout7, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7),
 	ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT),
 	ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT),
 	ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT),
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 5369e649fa79..7fa97df55e3a 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -635,7 +635,8 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
 		return -EINVAL;
 
 	if (system_supports_mte())
-		valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK;
+		valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \
+			| PR_MTE_TAG_MASK;
 
 	if (arg & ~valid_mask)
 		return -EINVAL;
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index 902e4084c477..5777929d35bf 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -18,15 +18,18 @@
  */
 
 #include <linux/arm-smccc.h>
+#include <linux/bpf.h>
 #include <linux/cpu.h>
 #include <linux/device.h>
 #include <linux/nospec.h>
 #include <linux/prctl.h>
 #include <linux/sched/task_stack.h>
 
+#include <asm/debug-monitors.h>
 #include <asm/insn.h>
 #include <asm/spectre.h>
 #include <asm/traps.h>
+#include <asm/vectors.h>
 #include <asm/virt.h>
 
 /*
@@ -96,14 +99,51 @@ static bool spectre_v2_mitigations_off(void)
 	return ret;
 }
 
+static const char *get_bhb_affected_string(enum mitigation_state bhb_state)
+{
+	switch (bhb_state) {
+	case SPECTRE_UNAFFECTED:
+		return "";
+	default:
+	case SPECTRE_VULNERABLE:
+		return ", but not BHB";
+	case SPECTRE_MITIGATED:
+		return ", BHB";
+	}
+}
+
+static bool _unprivileged_ebpf_enabled(void)
+{
+#ifdef CONFIG_BPF_SYSCALL
+	return !sysctl_unprivileged_bpf_disabled;
+#else
+	return false;
+#endif
+}
+
 ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
 			    char *buf)
 {
+	enum mitigation_state bhb_state = arm64_get_spectre_bhb_state();
+	const char *bhb_str = get_bhb_affected_string(bhb_state);
+	const char *v2_str = "Branch predictor hardening";
+
 	switch (spectre_v2_state) {
 	case SPECTRE_UNAFFECTED:
-		return sprintf(buf, "Not affected\n");
+		if (bhb_state == SPECTRE_UNAFFECTED)
+			return sprintf(buf, "Not affected\n");
+
+		/*
+		 * Platforms affected by Spectre-BHB can't report
+		 * "Not affected" for Spectre-v2.
+		 */
+		v2_str = "CSV2";
+		fallthrough;
 	case SPECTRE_MITIGATED:
-		return sprintf(buf, "Mitigation: Branch predictor hardening\n");
+		if (bhb_state == SPECTRE_MITIGATED && _unprivileged_ebpf_enabled())
+			return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n");
+
+		return sprintf(buf, "Mitigation: %s%s\n", v2_str, bhb_str);
 	case SPECTRE_VULNERABLE:
 		fallthrough;
 	default:
@@ -193,17 +233,20 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn)
 	__this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT);
 }
 
-static void call_smc_arch_workaround_1(void)
+/* Called during entry so must be noinstr */
+static noinstr void call_smc_arch_workaround_1(void)
 {
 	arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
 }
 
-static void call_hvc_arch_workaround_1(void)
+/* Called during entry so must be noinstr */
+static noinstr void call_hvc_arch_workaround_1(void)
 {
 	arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
 }
 
-static void qcom_link_stack_sanitisation(void)
+/* Called during entry so must be noinstr */
+static noinstr void qcom_link_stack_sanitisation(void)
 {
 	u64 tmp;
 
@@ -554,9 +597,9 @@ void __init spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt,
  * Patch a NOP in the Spectre-v4 mitigation code with an SMC/HVC instruction
  * to call into firmware to adjust the mitigation state.
  */
-void __init spectre_v4_patch_fw_mitigation_conduit(struct alt_instr *alt,
-						   __le32 *origptr,
-						   __le32 *updptr, int nr_inst)
+void __init smccc_patch_fw_mitigation_conduit(struct alt_instr *alt,
+					       __le32 *origptr,
+					       __le32 *updptr, int nr_inst)
 {
 	u32 insn;
 
@@ -770,3 +813,344 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
 		return -ENODEV;
 	}
 }
+
+/*
+ * Spectre BHB.
+ *
+ * A CPU is either:
+ * - Mitigated by a branchy loop a CPU specific number of times, and listed
+ *   in our "loop mitigated list".
+ * - Mitigated in software by the firmware Spectre v2 call.
+ * - Has the ClearBHB instruction to perform the mitigation.
+ * - Has the 'Exception Clears Branch History Buffer' (ECBHB) feature, so no
+ *   software mitigation in the vectors is needed.
+ * - Has CSV2.3, so is unaffected.
+ */
+static enum mitigation_state spectre_bhb_state;
+
+enum mitigation_state arm64_get_spectre_bhb_state(void)
+{
+	return spectre_bhb_state;
+}
+
+enum bhb_mitigation_bits {
+	BHB_LOOP,
+	BHB_FW,
+	BHB_HW,
+	BHB_INSN,
+};
+static unsigned long system_bhb_mitigations;
+
+/*
+ * This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any
+ * SCOPE_SYSTEM call will give the right answer.
+ */
+u8 spectre_bhb_loop_affected(int scope)
+{
+	u8 k = 0;
+	static u8 max_bhb_k;
+
+	if (scope == SCOPE_LOCAL_CPU) {
+		static const struct midr_range spectre_bhb_k32_list[] = {
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
+			MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+			MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+			{},
+		};
+		static const struct midr_range spectre_bhb_k24_list[] = {
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
+			MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+			{},
+		};
+		static const struct midr_range spectre_bhb_k8_list[] = {
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+			{},
+		};
+
+		if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
+			k = 32;
+		else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
+			k = 24;
+		else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
+			k =  8;
+
+		max_bhb_k = max(max_bhb_k, k);
+	} else {
+		k = max_bhb_k;
+	}
+
+	return k;
+}
+
+static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void)
+{
+	int ret;
+	struct arm_smccc_res res;
+
+	arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+			     ARM_SMCCC_ARCH_WORKAROUND_3, &res);
+
+	ret = res.a0;
+	switch (ret) {
+	case SMCCC_RET_SUCCESS:
+		return SPECTRE_MITIGATED;
+	case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
+		return SPECTRE_UNAFFECTED;
+	default:
+		fallthrough;
+	case SMCCC_RET_NOT_SUPPORTED:
+		return SPECTRE_VULNERABLE;
+	}
+}
+
+static bool is_spectre_bhb_fw_affected(int scope)
+{
+	static bool system_affected;
+	enum mitigation_state fw_state;
+	bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE;
+	static const struct midr_range spectre_bhb_firmware_mitigated_list[] = {
+		MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
+		MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
+		{},
+	};
+	bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(),
+					 spectre_bhb_firmware_mitigated_list);
+
+	if (scope != SCOPE_LOCAL_CPU)
+		return system_affected;
+
+	fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
+	if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) {
+		system_affected = true;
+		return true;
+	}
+
+	return false;
+}
+
+static bool supports_ecbhb(int scope)
+{
+	u64 mmfr1;
+
+	if (scope == SCOPE_LOCAL_CPU)
+		mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1);
+	else
+		mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+
+	return cpuid_feature_extract_unsigned_field(mmfr1,
+						    ID_AA64MMFR1_ECBHB_SHIFT);
+}
+
+bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
+			     int scope)
+{
+	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+	if (supports_csv2p3(scope))
+		return false;
+
+	if (supports_clearbhb(scope))
+		return true;
+
+	if (spectre_bhb_loop_affected(scope))
+		return true;
+
+	if (is_spectre_bhb_fw_affected(scope))
+		return true;
+
+	return false;
+}
+
+static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
+{
+	const char *v = arm64_get_bp_hardening_vector(slot);
+
+	if (slot < 0)
+		return;
+
+	__this_cpu_write(this_cpu_vector, v);
+
+	/*
+	 * When KPTI is in use, the vectors are switched when exiting to
+	 * user-space.
+	 */
+	if (arm64_kernel_unmapped_at_el0())
+		return;
+
+	write_sysreg(v, vbar_el1);
+	isb();
+}
+
+void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
+{
+	bp_hardening_cb_t cpu_cb;
+	enum mitigation_state fw_state, state = SPECTRE_VULNERABLE;
+	struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
+
+	if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU))
+		return;
+
+	if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) {
+		/* No point mitigating Spectre-BHB alone. */
+	} else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) {
+		pr_info_once("spectre-bhb mitigation disabled by compile time option\n");
+	} else if (cpu_mitigations_off()) {
+		pr_info_once("spectre-bhb mitigation disabled by command line option\n");
+	} else if (supports_ecbhb(SCOPE_LOCAL_CPU)) {
+		state = SPECTRE_MITIGATED;
+		set_bit(BHB_HW, &system_bhb_mitigations);
+	} else if (supports_clearbhb(SCOPE_LOCAL_CPU)) {
+		/*
+		 * Ensure KVM uses the indirect vector which will have ClearBHB
+		 * added.
+		 */
+		if (!data->slot)
+			data->slot = HYP_VECTOR_INDIRECT;
+
+		this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN);
+		state = SPECTRE_MITIGATED;
+		set_bit(BHB_INSN, &system_bhb_mitigations);
+	} else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) {
+		/*
+		 * Ensure KVM uses the indirect vector which will have the
+		 * branchy-loop added. A57/A72-r0 will already have selected
+		 * the spectre-indirect vector, which is sufficient for BHB
+		 * too.
+		 */
+		if (!data->slot)
+			data->slot = HYP_VECTOR_INDIRECT;
+
+		this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP);
+		state = SPECTRE_MITIGATED;
+		set_bit(BHB_LOOP, &system_bhb_mitigations);
+	} else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) {
+		fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
+		if (fw_state == SPECTRE_MITIGATED) {
+			/*
+			 * Ensure KVM uses one of the spectre bp_hardening
+			 * vectors. The indirect vector doesn't include the EL3
+			 * call, so needs upgrading to
+			 * HYP_VECTOR_SPECTRE_INDIRECT.
+			 */
+			if (!data->slot || data->slot == HYP_VECTOR_INDIRECT)
+				data->slot += 1;
+
+			this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
+
+			/*
+			 * The WA3 call in the vectors supersedes the WA1 call
+			 * made during context-switch. Uninstall any firmware
+			 * bp_hardening callback.
+			 */
+			cpu_cb = spectre_v2_get_sw_mitigation_cb();
+			if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb)
+				__this_cpu_write(bp_hardening_data.fn, NULL);
+
+			state = SPECTRE_MITIGATED;
+			set_bit(BHB_FW, &system_bhb_mitigations);
+		}
+	}
+
+	update_mitigation_state(&spectre_bhb_state, state);
+}
+
+/* Patched to NOP when enabled */
+void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt,
+						     __le32 *origptr,
+						      __le32 *updptr, int nr_inst)
+{
+	BUG_ON(nr_inst != 1);
+
+	if (test_bit(BHB_LOOP, &system_bhb_mitigations))
+		*updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
+/* Patched to NOP when enabled */
+void noinstr spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt,
+						   __le32 *origptr,
+						   __le32 *updptr, int nr_inst)
+{
+	BUG_ON(nr_inst != 1);
+
+	if (test_bit(BHB_FW, &system_bhb_mitigations))
+		*updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
+/* Patched to correct the immediate */
+void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
+				   __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+	u8 rd;
+	u32 insn;
+	u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM);
+
+	BUG_ON(nr_inst != 1); /* MOV -> MOV */
+
+	if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY))
+		return;
+
+	insn = le32_to_cpu(*origptr);
+	rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
+	insn = aarch64_insn_gen_movewide(rd, loop_count, 0,
+					 AARCH64_INSN_VARIANT_64BIT,
+					 AARCH64_INSN_MOVEWIDE_ZERO);
+	*updptr++ = cpu_to_le32(insn);
+}
+
+/* Patched to mov WA3 when supported */
+void noinstr spectre_bhb_patch_wa3(struct alt_instr *alt,
+				   __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+	u8 rd;
+	u32 insn;
+
+	BUG_ON(nr_inst != 1); /* MOV -> MOV */
+
+	if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) ||
+	    !test_bit(BHB_FW, &system_bhb_mitigations))
+		return;
+
+	insn = le32_to_cpu(*origptr);
+	rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
+
+	insn = aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_ORR,
+						  AARCH64_INSN_VARIANT_32BIT,
+						  AARCH64_INSN_REG_ZR, rd,
+						  ARM_SMCCC_ARCH_WORKAROUND_3);
+	if (WARN_ON_ONCE(insn == AARCH64_BREAK_FAULT))
+		return;
+
+	*updptr++ = cpu_to_le32(insn);
+}
+
+/* Patched to NOP when not supported */
+void __init spectre_bhb_patch_clearbhb(struct alt_instr *alt,
+				   __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+	BUG_ON(nr_inst != 2);
+
+	if (test_bit(BHB_INSN, &system_bhb_mitigations))
+		return;
+
+	*updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
+	*updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
+#ifdef CONFIG_BPF_SYSCALL
+#define EBPF_WARN "Unprivileged eBPF is enabled, data leaks possible via Spectre v2 BHB attacks!\n"
+void unpriv_ebpf_notify(int new_state)
+{
+	if (spectre_v2_state == SPECTRE_VULNERABLE ||
+	    spectre_bhb_state != SPECTRE_MITIGATED)
+		return;
+
+	if (!new_state)
+		pr_err("WARNING: %s", EBPF_WARN);
+}
+#endif
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index d8aaf4b6f432..50fe8eaf7df0 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -11,7 +11,6 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
-#include <linux/personality.h>
 #include <linux/freezer.h>
 #include <linux/stddef.h>
 #include <linux/uaccess.h>
@@ -577,10 +576,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
 {
 	int err;
 
-	err = sigframe_alloc(user, &user->fpsimd_offset,
-			     sizeof(struct fpsimd_context));
-	if (err)
-		return err;
+	if (system_supports_fpsimd()) {
+		err = sigframe_alloc(user, &user->fpsimd_offset,
+				     sizeof(struct fpsimd_context));
+		if (err)
+			return err;
+	}
 
 	/* fault information, if valid */
 	if (add_all || current->thread.fault_code) {
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index db5159a3055f..12c6864e51e1 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -9,7 +9,6 @@
 
 #include <linux/compat.h>
 #include <linux/cpufeature.h>
-#include <linux/personality.h>
 #include <linux/sched.h>
 #include <linux/sched/signal.h>
 #include <linux/slab.h>
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 70fc42470f13..bb878f52ca0a 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -9,7 +9,6 @@
 #include <linux/bug.h>
 #include <linux/context_tracking.h>
 #include <linux/signal.h>
-#include <linux/personality.h>
 #include <linux/kallsyms.h>
 #include <linux/kprobes.h>
 #include <linux/spinlock.h>
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 50bab186c49b..edaf0faf766f 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -341,7 +341,7 @@ ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
 	<= SZ_4K, "Hibernate exit text too big or misaligned")
 #endif
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
+ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE,
 	"Entry trampoline text too big")
 #endif
 #ifdef CONFIG_KVM
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index ecc5958e27fe..946e401ef6b0 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1491,10 +1491,7 @@ static int kvm_init_vector_slots(void)
 	base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
 	kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT);
 
-	if (!cpus_have_const_cap(ARM64_SPECTRE_V3A))
-		return 0;
-
-	if (!has_vhe()) {
+	if (kvm_system_needs_idmapped_vectors() && !has_vhe()) {
 		err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs),
 					       __BP_HARDEN_HYP_VECS_SZ, &base);
 		if (err)
@@ -1870,6 +1867,7 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits)
 	kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
 	kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
 	kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
+	kvm_nvhe_sym(id_aa64isar2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
 	kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 	kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
 	kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index b6b6801d96d5..7839d075729b 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -62,6 +62,10 @@ el1_sync:				// Guest trapped into EL2
 	/* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
 	eor	w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \
 			  ARM_SMCCC_ARCH_WORKAROUND_2)
+	cbz	w1, wa_epilogue
+
+	eor	w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_2 ^ \
+			  ARM_SMCCC_ARCH_WORKAROUND_3)
 	cbnz	w1, el1_trap
 
 wa_epilogue:
@@ -192,7 +196,10 @@ SYM_CODE_END(__kvm_hyp_vector)
 	sub	sp, sp, #(8 * 4)
 	stp	x2, x3, [sp, #(8 * 0)]
 	stp	x0, x1, [sp, #(8 * 2)]
+	alternative_cb spectre_bhb_patch_wa3
+	/* Patched to mov WA3 when supported */
 	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_1
+	alternative_cb_end
 	smc	#0
 	ldp	x2, x3, [sp, #(8 * 0)]
 	add	sp, sp, #(8 * 2)
@@ -205,6 +212,8 @@ SYM_CODE_END(__kvm_hyp_vector)
 	spectrev2_smccc_wa1_smc
 	.else
 	stp	x0, x1, [sp, #-16]!
+	mitigate_spectre_bhb_loop	x0
+	mitigate_spectre_bhb_clear_insn
 	.endif
 	.if \indirect != 0
 	alternative_cb  kvm_patch_vector_branch
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 701cfb964905..6379a1e3e6e5 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -174,9 +174,9 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
 
 	/* Valid trap.  Switch the context: */
 	if (has_vhe()) {
-		reg = CPACR_EL1_FPEN;
+		reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
 		if (sve_guest)
-			reg |= CPACR_EL1_ZEN;
+			reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
 
 		sysreg_clear_set(cpacr_el1, 0, reg);
 	} else {
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
index eea1f6a53723..5ad626527d41 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -192,6 +192,11 @@
 	ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
 	)
 
+#define PVM_ID_AA64ISAR2_ALLOW (\
+	ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3) | \
+	ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) \
+	)
+
 u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
 bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
 bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S
index 958734f4d6b0..0c367eb5f4e2 100644
--- a/arch/arm64/kvm/hyp/nvhe/cache.S
+++ b/arch/arm64/kvm/hyp/nvhe/cache.S
@@ -7,7 +7,8 @@
 #include <asm/assembler.h>
 #include <asm/alternative.h>
 
-SYM_FUNC_START_PI(dcache_clean_inval_poc)
+SYM_FUNC_START(__pi_dcache_clean_inval_poc)
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
-SYM_FUNC_END_PI(dcache_clean_inval_poc)
+SYM_FUNC_END(__pi_dcache_clean_inval_poc)
+SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc)
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 526a7d6fa86f..cdbe8e246418 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -148,8 +148,10 @@ int hyp_map_vectors(void)
 	phys_addr_t phys;
 	void *bp_base;
 
-	if (!cpus_have_const_cap(ARM64_SPECTRE_V3A))
+	if (!kvm_system_needs_idmapped_vectors()) {
+		__hyp_bp_vect_base = __bp_harden_hyp_vecs;
 		return 0;
+	}
 
 	phys = __hyp_pa(__bp_harden_hyp_vecs);
 	bp_base = (void *)__pkvm_create_private_mapping(phys,
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 792cf6e6ac92..33f5181af330 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -22,6 +22,7 @@ u64 id_aa64pfr0_el1_sys_val;
 u64 id_aa64pfr1_el1_sys_val;
 u64 id_aa64isar0_el1_sys_val;
 u64 id_aa64isar1_el1_sys_val;
+u64 id_aa64isar2_el1_sys_val;
 u64 id_aa64mmfr0_el1_sys_val;
 u64 id_aa64mmfr1_el1_sys_val;
 u64 id_aa64mmfr2_el1_sys_val;
@@ -183,6 +184,17 @@ static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu)
 	return id_aa64isar1_el1_sys_val & allow_mask;
 }
 
+static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu)
+{
+	u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW;
+
+	if (!vcpu_has_ptrauth(vcpu))
+		allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) |
+				ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3));
+
+	return id_aa64isar2_el1_sys_val & allow_mask;
+}
+
 static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu)
 {
 	u64 set_mask;
@@ -225,6 +237,8 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
 		return get_pvm_id_aa64isar0(vcpu);
 	case SYS_ID_AA64ISAR1_EL1:
 		return get_pvm_id_aa64isar1(vcpu);
+	case SYS_ID_AA64ISAR2_EL1:
+		return get_pvm_id_aa64isar2(vcpu);
 	case SYS_ID_AA64MMFR0_EL1:
 		return get_pvm_id_aa64mmfr0(vcpu);
 	case SYS_ID_AA64MMFR1_EL1:
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 11d053fdd604..262dfe03134d 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -10,6 +10,7 @@
 #include <linux/kvm_host.h>
 #include <linux/types.h>
 #include <linux/jump_label.h>
+#include <linux/percpu.h>
 #include <uapi/linux/psci.h>
 
 #include <kvm/arm_psci.h>
@@ -24,6 +25,8 @@
 #include <asm/fpsimd.h>
 #include <asm/debug-monitors.h>
 #include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/vectors.h>
 
 /* VHE specific context */
 DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
@@ -38,7 +41,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 
 	val = read_sysreg(cpacr_el1);
 	val |= CPACR_EL1_TTA;
-	val &= ~CPACR_EL1_ZEN;
+	val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
 
 	/*
 	 * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
@@ -53,9 +56,9 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 
 	if (update_fp_enabled(vcpu)) {
 		if (vcpu_has_sve(vcpu))
-			val |= CPACR_EL1_ZEN;
+			val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
 	} else {
-		val &= ~CPACR_EL1_FPEN;
+		val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
 		__activate_traps_fpsimd32(vcpu);
 	}
 
@@ -67,7 +70,7 @@ NOKPROBE_SYMBOL(__activate_traps);
 
 static void __deactivate_traps(struct kvm_vcpu *vcpu)
 {
-	extern char vectors[];	/* kernel exception vectors */
+	const char *host_vectors = vectors;
 
 	___deactivate_traps(vcpu);
 
@@ -81,7 +84,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
 	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
 
 	write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
-	write_sysreg(vectors, vbar_el1);
+
+	if (!arm64_kernel_unmapped_at_el0())
+		host_vectors = __this_cpu_read(this_cpu_vector);
+	write_sysreg(host_vectors, vbar_el1);
 }
 NOKPROBE_SYMBOL(__deactivate_traps);
 
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 30da78f72b3b..202b8c455724 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -107,6 +107,18 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 				break;
 			}
 			break;
+		case ARM_SMCCC_ARCH_WORKAROUND_3:
+			switch (arm64_get_spectre_bhb_state()) {
+			case SPECTRE_VULNERABLE:
+				break;
+			case SPECTRE_MITIGATED:
+				val[0] = SMCCC_RET_SUCCESS;
+				break;
+			case SPECTRE_UNAFFECTED:
+				val[0] = SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED;
+				break;
+			}
+			break;
 		case ARM_SMCCC_HV_PV_TIME_FEATURES:
 			val[0] = SMCCC_RET_SUCCESS;
 			break;
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index 2ce60fecd861..5918095c90a5 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -405,7 +405,7 @@ int kvm_psci_call(struct kvm_vcpu *vcpu)
 
 int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
 {
-	return 3;		/* PSCI version and two workaround registers */
+	return 4;		/* PSCI version and three workaround registers */
 }
 
 int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
@@ -419,6 +419,9 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
 		return -EFAULT;
 
+	if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, uindices++))
+		return -EFAULT;
+
 	return 0;
 }
 
@@ -458,6 +461,17 @@ static int get_kernel_wa_level(u64 regid)
 		case SPECTRE_VULNERABLE:
 			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
 		}
+		break;
+	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
+		switch (arm64_get_spectre_bhb_state()) {
+		case SPECTRE_VULNERABLE:
+			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
+		case SPECTRE_MITIGATED:
+			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL;
+		case SPECTRE_UNAFFECTED:
+			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED;
+		}
+		return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
 	}
 
 	return -EINVAL;
@@ -474,6 +488,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 		break;
 	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
 	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
 		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
 		break;
 	default:
@@ -519,6 +534,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	}
 
 	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
 		if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
 			return -EINVAL;
 
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 4dc2fba316ff..baa65292bbc2 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1097,6 +1097,11 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
 				 ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
 				 ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
 		break;
+	case SYS_ID_AA64ISAR2_EL1:
+		if (!vcpu_has_ptrauth(vcpu))
+			val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) |
+				 ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3));
+		break;
 	case SYS_ID_AA64DFR0_EL1:
 		/* Limit debug to ARMv8.0 */
 		val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER);
diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
index 1fd5d790ab80..ebde40e7fa2b 100644
--- a/arch/arm64/lib/clear_page.S
+++ b/arch/arm64/lib/clear_page.S
@@ -14,7 +14,7 @@
  * Parameters:
  *	x0 - dest
  */
-SYM_FUNC_START_PI(clear_page)
+SYM_FUNC_START(__pi_clear_page)
 	mrs	x1, dczid_el0
 	tbnz	x1, #4, 2f	/* Branch if DC ZVA is prohibited */
 	and	w1, w1, #0xf
@@ -35,5 +35,6 @@ SYM_FUNC_START_PI(clear_page)
 	tst	x0, #(PAGE_SIZE - 1)
 	b.ne	2b
 	ret
-SYM_FUNC_END_PI(clear_page)
+SYM_FUNC_END(__pi_clear_page)
+SYM_FUNC_ALIAS(clear_page, __pi_clear_page)
 EXPORT_SYMBOL(clear_page)
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index 29144f4cd449..c336d2ffdec5 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -17,7 +17,7 @@
  *	x0 - dest
  *	x1 - src
  */
-SYM_FUNC_START_PI(copy_page)
+SYM_FUNC_START(__pi_copy_page)
 alternative_if ARM64_HAS_NO_HW_PREFETCH
 	// Prefetch three cache lines ahead.
 	prfm	pldl1strm, [x1, #128]
@@ -75,5 +75,6 @@ alternative_else_nop_endif
 	stnp	x16, x17, [x0, #112 - 256]
 
 	ret
-SYM_FUNC_END_PI(copy_page)
+SYM_FUNC_END(__pi_copy_page)
+SYM_FUNC_ALIAS(copy_page, __pi_copy_page)
 EXPORT_SYMBOL(copy_page)
diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S
index 0f9e10ecda23..8340dccff46f 100644
--- a/arch/arm64/lib/crc32.S
+++ b/arch/arm64/lib/crc32.S
@@ -11,7 +11,44 @@
 
 	.arch		armv8-a+crc
 
-	.macro		__crc32, c
+	.macro		byteorder, reg, be
+	.if		\be
+CPU_LE( rev		\reg, \reg	)
+	.else
+CPU_BE( rev		\reg, \reg	)
+	.endif
+	.endm
+
+	.macro		byteorder16, reg, be
+	.if		\be
+CPU_LE( rev16		\reg, \reg	)
+	.else
+CPU_BE( rev16		\reg, \reg	)
+	.endif
+	.endm
+
+	.macro		bitorder, reg, be
+	.if		\be
+	rbit		\reg, \reg
+	.endif
+	.endm
+
+	.macro		bitorder16, reg, be
+	.if		\be
+	rbit		\reg, \reg
+	lsr		\reg, \reg, #16
+	.endif
+	.endm
+
+	.macro		bitorder8, reg, be
+	.if		\be
+	rbit		\reg, \reg
+	lsr		\reg, \reg, #24
+	.endif
+	.endm
+
+	.macro		__crc32, c, be=0
+	bitorder	w0, \be
 	cmp		x2, #16
 	b.lt		8f			// less than 16 bytes
 
@@ -24,10 +61,14 @@
 	add		x8, x8, x1
 	add		x1, x1, x7
 	ldp		x5, x6, [x8]
-CPU_BE(	rev		x3, x3		)
-CPU_BE(	rev		x4, x4		)
-CPU_BE(	rev		x5, x5		)
-CPU_BE(	rev		x6, x6		)
+	byteorder	x3, \be
+	byteorder	x4, \be
+	byteorder	x5, \be
+	byteorder	x6, \be
+	bitorder	x3, \be
+	bitorder	x4, \be
+	bitorder	x5, \be
+	bitorder	x6, \be
 
 	tst		x7, #8
 	crc32\c\()x	w8, w0, x3
@@ -55,33 +96,43 @@ CPU_BE(	rev		x6, x6		)
 32:	ldp		x3, x4, [x1], #32
 	sub		x2, x2, #32
 	ldp		x5, x6, [x1, #-16]
-CPU_BE(	rev		x3, x3		)
-CPU_BE(	rev		x4, x4		)
-CPU_BE(	rev		x5, x5		)
-CPU_BE(	rev		x6, x6		)
+	byteorder	x3, \be
+	byteorder	x4, \be
+	byteorder	x5, \be
+	byteorder	x6, \be
+	bitorder	x3, \be
+	bitorder	x4, \be
+	bitorder	x5, \be
+	bitorder	x6, \be
 	crc32\c\()x	w0, w0, x3
 	crc32\c\()x	w0, w0, x4
 	crc32\c\()x	w0, w0, x5
 	crc32\c\()x	w0, w0, x6
 	cbnz		x2, 32b
-0:	ret
+0:	bitorder	w0, \be
+	ret
 
 8:	tbz		x2, #3, 4f
 	ldr		x3, [x1], #8
-CPU_BE(	rev		x3, x3		)
+	byteorder	x3, \be
+	bitorder	x3, \be
 	crc32\c\()x	w0, w0, x3
 4:	tbz		x2, #2, 2f
 	ldr		w3, [x1], #4
-CPU_BE(	rev		w3, w3		)
+	byteorder	w3, \be
+	bitorder	w3, \be
 	crc32\c\()w	w0, w0, w3
 2:	tbz		x2, #1, 1f
 	ldrh		w3, [x1], #2
-CPU_BE(	rev16		w3, w3		)
+	byteorder16	w3, \be
+	bitorder16	w3, \be
 	crc32\c\()h	w0, w0, w3
 1:	tbz		x2, #0, 0f
 	ldrb		w3, [x1]
+	bitorder8	w3, \be
 	crc32\c\()b	w0, w0, w3
-0:	ret
+0:	bitorder	w0, \be
+	ret
 	.endm
 
 	.align		5
@@ -99,3 +150,11 @@ alternative_if_not ARM64_HAS_CRC32
 alternative_else_nop_endif
 	__crc32		c
 SYM_FUNC_END(__crc32c_le)
+
+	.align		5
+SYM_FUNC_START(crc32_be)
+alternative_if_not ARM64_HAS_CRC32
+	b		crc32_be_base
+alternative_else_nop_endif
+	__crc32		be=1
+SYM_FUNC_END(crc32_be)
diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
index fccfe363e567..5e90887deec4 100644
--- a/arch/arm64/lib/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -578,10 +578,16 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
 
 	switch (type) {
 	case AARCH64_INSN_LDST_LOAD_EX:
+	case AARCH64_INSN_LDST_LOAD_ACQ_EX:
 		insn = aarch64_insn_get_load_ex_value();
+		if (type == AARCH64_INSN_LDST_LOAD_ACQ_EX)
+			insn |= BIT(15);
 		break;
 	case AARCH64_INSN_LDST_STORE_EX:
+	case AARCH64_INSN_LDST_STORE_REL_EX:
 		insn = aarch64_insn_get_store_ex_value();
+		if (type == AARCH64_INSN_LDST_STORE_REL_EX)
+			insn |= BIT(15);
 		break;
 	default:
 		pr_err("%s: unknown load/store exclusive encoding %d\n", __func__, type);
@@ -603,12 +609,65 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
 					    state);
 }
 
-u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
-			   enum aarch64_insn_register address,
-			   enum aarch64_insn_register value,
-			   enum aarch64_insn_size_type size)
+#ifdef CONFIG_ARM64_LSE_ATOMICS
+static u32 aarch64_insn_encode_ldst_order(enum aarch64_insn_mem_order_type type,
+					  u32 insn)
 {
-	u32 insn = aarch64_insn_get_ldadd_value();
+	u32 order;
+
+	switch (type) {
+	case AARCH64_INSN_MEM_ORDER_NONE:
+		order = 0;
+		break;
+	case AARCH64_INSN_MEM_ORDER_ACQ:
+		order = 2;
+		break;
+	case AARCH64_INSN_MEM_ORDER_REL:
+		order = 1;
+		break;
+	case AARCH64_INSN_MEM_ORDER_ACQREL:
+		order = 3;
+		break;
+	default:
+		pr_err("%s: unknown mem order %d\n", __func__, type);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn &= ~GENMASK(23, 22);
+	insn |= order << 22;
+
+	return insn;
+}
+
+u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
+				  enum aarch64_insn_register address,
+				  enum aarch64_insn_register value,
+				  enum aarch64_insn_size_type size,
+				  enum aarch64_insn_mem_atomic_op op,
+				  enum aarch64_insn_mem_order_type order)
+{
+	u32 insn;
+
+	switch (op) {
+	case AARCH64_INSN_MEM_ATOMIC_ADD:
+		insn = aarch64_insn_get_ldadd_value();
+		break;
+	case AARCH64_INSN_MEM_ATOMIC_CLR:
+		insn = aarch64_insn_get_ldclr_value();
+		break;
+	case AARCH64_INSN_MEM_ATOMIC_EOR:
+		insn = aarch64_insn_get_ldeor_value();
+		break;
+	case AARCH64_INSN_MEM_ATOMIC_SET:
+		insn = aarch64_insn_get_ldset_value();
+		break;
+	case AARCH64_INSN_MEM_ATOMIC_SWP:
+		insn = aarch64_insn_get_swp_value();
+		break;
+	default:
+		pr_err("%s: unimplemented mem atomic op %d\n", __func__, op);
+		return AARCH64_BREAK_FAULT;
+	}
 
 	switch (size) {
 	case AARCH64_INSN_SIZE_32:
@@ -621,6 +680,8 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
 
 	insn = aarch64_insn_encode_ldst_size(size, insn);
 
+	insn = aarch64_insn_encode_ldst_order(order, insn);
+
 	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
 					    result);
 
@@ -631,17 +692,68 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
 					    value);
 }
 
-u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
-			   enum aarch64_insn_register value,
-			   enum aarch64_insn_size_type size)
+static u32 aarch64_insn_encode_cas_order(enum aarch64_insn_mem_order_type type,
+					 u32 insn)
 {
-	/*
-	 * STADD is simply encoded as an alias for LDADD with XZR as
-	 * the destination register.
-	 */
-	return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address,
-				      value, size);
+	u32 order;
+
+	switch (type) {
+	case AARCH64_INSN_MEM_ORDER_NONE:
+		order = 0;
+		break;
+	case AARCH64_INSN_MEM_ORDER_ACQ:
+		order = BIT(22);
+		break;
+	case AARCH64_INSN_MEM_ORDER_REL:
+		order = BIT(15);
+		break;
+	case AARCH64_INSN_MEM_ORDER_ACQREL:
+		order = BIT(15) | BIT(22);
+		break;
+	default:
+		pr_err("%s: unknown mem order %d\n", __func__, type);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn &= ~(BIT(15) | BIT(22));
+	insn |= order;
+
+	return insn;
+}
+
+u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
+			 enum aarch64_insn_register address,
+			 enum aarch64_insn_register value,
+			 enum aarch64_insn_size_type size,
+			 enum aarch64_insn_mem_order_type order)
+{
+	u32 insn;
+
+	switch (size) {
+	case AARCH64_INSN_SIZE_32:
+	case AARCH64_INSN_SIZE_64:
+		break;
+	default:
+		pr_err("%s: unimplemented size encoding %d\n", __func__, size);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn = aarch64_insn_get_cas_value();
+
+	insn = aarch64_insn_encode_ldst_size(size, insn);
+
+	insn = aarch64_insn_encode_cas_order(order, insn);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
+					    result);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+					    address);
+
+	return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
+					    value);
 }
+#endif
 
 static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type,
 					enum aarch64_insn_prfm_target target,
@@ -1379,7 +1491,7 @@ static u32 aarch64_encode_immediate(u64 imm,
 		 * Compute the rotation to get a continuous set of
 		 * ones, with the first bit set at position 0
 		 */
-		ror = fls(~imm);
+		ror = fls64(~imm);
 	}
 
 	/*
@@ -1456,3 +1568,48 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
 	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
 	return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm);
 }
+
+u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
+{
+	u32 opt;
+	u32 insn;
+
+	switch (type) {
+	case AARCH64_INSN_MB_SY:
+		opt = 0xf;
+		break;
+	case AARCH64_INSN_MB_ST:
+		opt = 0xe;
+		break;
+	case AARCH64_INSN_MB_LD:
+		opt = 0xd;
+		break;
+	case AARCH64_INSN_MB_ISH:
+		opt = 0xb;
+		break;
+	case AARCH64_INSN_MB_ISHST:
+		opt = 0xa;
+		break;
+	case AARCH64_INSN_MB_ISHLD:
+		opt = 0x9;
+		break;
+	case AARCH64_INSN_MB_NSH:
+		opt = 0x7;
+		break;
+	case AARCH64_INSN_MB_NSHST:
+		opt = 0x6;
+		break;
+	case AARCH64_INSN_MB_NSHLD:
+		opt = 0x5;
+		break;
+	default:
+		pr_err("%s: unknown dmb type %d\n", __func__, type);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn = aarch64_insn_get_dmb_value();
+	insn &= ~GENMASK(11, 8);
+	insn |= (opt << 8);
+
+	return insn;
+}
diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S
index 7c2276fdab54..37a9f2a4f7f4 100644
--- a/arch/arm64/lib/memchr.S
+++ b/arch/arm64/lib/memchr.S
@@ -38,7 +38,7 @@
 
 	.p2align 4
 	nop
-SYM_FUNC_START_WEAK_PI(memchr)
+SYM_FUNC_START(__pi_memchr)
 	and	chrin, chrin, #0xff
 	lsr	wordcnt, cntin, #3
 	cbz	wordcnt, L(byte_loop)
@@ -71,5 +71,6 @@ CPU_LE(	rev	tmp, tmp)
 L(not_found):
 	mov	result, #0
 	ret
-SYM_FUNC_END_PI(memchr)
+SYM_FUNC_END(__pi_memchr)
+SYM_FUNC_ALIAS_WEAK(memchr, __pi_memchr)
 EXPORT_SYMBOL_NOKASAN(memchr)
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
index 7d956384222f..a5ccf2c55f91 100644
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -32,7 +32,7 @@
 #define tmp1		x7
 #define tmp2		x8
 
-SYM_FUNC_START_WEAK_PI(memcmp)
+SYM_FUNC_START(__pi_memcmp)
 	subs	limit, limit, 8
 	b.lo	L(less8)
 
@@ -134,6 +134,6 @@ L(byte_loop):
 	b.eq	L(byte_loop)
 	sub	result, data1w, data2w
 	ret
-
-SYM_FUNC_END_PI(memcmp)
+SYM_FUNC_END(__pi_memcmp)
+SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp)
 EXPORT_SYMBOL_NOKASAN(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index b82fd64ee1e1..4ab48d49c451 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -57,10 +57,7 @@
    The loop tail is handled by always copying 64 bytes from the end.
 */
 
-SYM_FUNC_START_ALIAS(__memmove)
-SYM_FUNC_START_WEAK_ALIAS_PI(memmove)
-SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_WEAK_PI(memcpy)
+SYM_FUNC_START(__pi_memcpy)
 	add	srcend, src, count
 	add	dstend, dstin, count
 	cmp	count, 128
@@ -241,12 +238,16 @@ L(copy64_from_start):
 	stp	B_l, B_h, [dstin, 16]
 	stp	C_l, C_h, [dstin]
 	ret
+SYM_FUNC_END(__pi_memcpy)
 
-SYM_FUNC_END_PI(memcpy)
-EXPORT_SYMBOL(memcpy)
-SYM_FUNC_END_ALIAS(__memcpy)
+SYM_FUNC_ALIAS(__memcpy, __pi_memcpy)
 EXPORT_SYMBOL(__memcpy)
-SYM_FUNC_END_ALIAS_PI(memmove)
-EXPORT_SYMBOL(memmove)
-SYM_FUNC_END_ALIAS(__memmove)
+SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
+EXPORT_SYMBOL(memcpy)
+
+SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy)
+
+SYM_FUNC_ALIAS(__memmove, __pi_memmove)
 EXPORT_SYMBOL(__memmove)
+SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
+EXPORT_SYMBOL(memmove)
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index a9c1c9a01ea9..a5aebe82ad73 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -42,8 +42,7 @@ dst		.req	x8
 tmp3w		.req	w9
 tmp3		.req	x9
 
-SYM_FUNC_START_ALIAS(__memset)
-SYM_FUNC_START_WEAK_PI(memset)
+SYM_FUNC_START(__pi_memset)
 	mov	dst, dstin	/* Preserve return value.  */
 	and	A_lw, val, #255
 	orr	A_lw, A_lw, A_lw, lsl #8
@@ -202,7 +201,10 @@ SYM_FUNC_START_WEAK_PI(memset)
 	ands	count, count, zva_bits_x
 	b.ne	.Ltail_maybe_long
 	ret
-SYM_FUNC_END_PI(memset)
-EXPORT_SYMBOL(memset)
-SYM_FUNC_END_ALIAS(__memset)
+SYM_FUNC_END(__pi_memset)
+
+SYM_FUNC_ALIAS(__memset, __pi_memset)
 EXPORT_SYMBOL(__memset)
+
+SYM_FUNC_ALIAS_WEAK(memset, __pi_memset)
+EXPORT_SYMBOL(memset)
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index f531dcb95174..8590af3c98c0 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -134,7 +134,7 @@ SYM_FUNC_END(mte_copy_tags_to_user)
 /*
  * Save the tags in a page
  *   x0 - page address
- *   x1 - tag storage
+ *   x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes
  */
 SYM_FUNC_START(mte_save_page_tags)
 	multitag_transfer_size x7, x5
@@ -158,7 +158,7 @@ SYM_FUNC_END(mte_save_page_tags)
 /*
  * Restore the tags in a page
  *   x0 - page address
- *   x1 - tag storage
+ *   x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes
  */
 SYM_FUNC_START(mte_restore_page_tags)
 	multitag_transfer_size x7, x5
diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S
index 1f47eae3b0d6..94ee67a6b212 100644
--- a/arch/arm64/lib/strchr.S
+++ b/arch/arm64/lib/strchr.S
@@ -18,7 +18,7 @@
  * Returns:
  *	x0 - address of first occurrence of 'c' or 0
  */
-SYM_FUNC_START_WEAK(strchr)
+SYM_FUNC_START(__pi_strchr)
 	and	w1, w1, #0xff
 1:	ldrb	w2, [x0], #1
 	cmp	w2, w1
@@ -28,5 +28,7 @@ SYM_FUNC_START_WEAK(strchr)
 	cmp	w2, w1
 	csel	x0, x0, xzr, eq
 	ret
-SYM_FUNC_END(strchr)
+SYM_FUNC_END(__pi_strchr)
+
+SYM_FUNC_ALIAS_WEAK(strchr, __pi_strchr)
 EXPORT_SYMBOL_NOKASAN(strchr)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index 83bcad72ec97..9b89b4533607 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -1,9 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2012-2021, Arm Limited.
+ * Copyright (c) 2012-2022, Arm Limited.
  *
  * Adapted from the original at:
- * https://github.com/ARM-software/optimized-routines/blob/afd6244a1f8d9229/string/aarch64/strcmp.S
+ * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S
  */
 
 #include <linux/linkage.h>
@@ -11,166 +11,180 @@
 
 /* Assumptions:
  *
- * ARMv8-a, AArch64
+ * ARMv8-a, AArch64.
+ * MTE compatible.
  */
 
 #define L(label) .L ## label
 
 #define REP8_01 0x0101010101010101
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
 
-/* Parameters and result.  */
 #define src1		x0
 #define src2		x1
 #define result		x0
 
-/* Internal variables.  */
 #define data1		x2
 #define data1w		w2
 #define data2		x3
 #define data2w		w3
 #define has_nul		x4
 #define diff		x5
+#define off1		x5
 #define syndrome	x6
-#define tmp1		x7
-#define tmp2		x8
-#define tmp3		x9
-#define zeroones	x10
-#define pos		x11
-
-	/* Start of performance-critical section  -- one 64B cache line.  */
-	.align 6
-SYM_FUNC_START_WEAK_PI(strcmp)
-	eor	tmp1, src1, src2
-	mov	zeroones, #REP8_01
-	tst	tmp1, #7
+#define tmp		x6
+#define data3		x7
+#define zeroones	x8
+#define shift		x9
+#define off2		x10
+
+/* On big-endian early bytes are at MSB and on little-endian LSB.
+   LS_FW means shifting towards early bytes.  */
+#ifdef __AARCH64EB__
+# define LS_FW lsl
+#else
+# define LS_FW lsr
+#endif
+
+/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+   can be done in parallel across the entire word.
+   Since carry propagation makes 0x1 bytes before a NUL byte appear
+   NUL too in big-endian, byte-reverse the data before the NUL check.  */
+
+
+SYM_FUNC_START(__pi_strcmp)
+	sub	off2, src2, src1
+	mov	zeroones, REP8_01
+	and	tmp, src1, 7
+	tst	off2, 7
 	b.ne	L(misaligned8)
-	ands	tmp1, src1, #7
-	b.ne	L(mutual_align)
-	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
-	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-	   can be done in parallel across the entire word.  */
+	cbnz	tmp, L(mutual_align)
+
+	.p2align 4
+
 L(loop_aligned):
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
+	ldr	data2, [src1, off2]
+	ldr	data1, [src1], 8
 L(start_realigned):
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
+#ifdef __AARCH64EB__
+	rev	tmp, data1
+	sub	has_nul, tmp, zeroones
+	orr	tmp, tmp, REP8_7f
+#else
+	sub	has_nul, data1, zeroones
+	orr	tmp, data1, REP8_7f
+#endif
+	bics	has_nul, has_nul, tmp	/* Non-zero if NUL terminator.  */
+	ccmp	data1, data2, 0, eq
+	b.eq	L(loop_aligned)
+#ifdef __AARCH64EB__
+	rev	has_nul, has_nul
+#endif
+	eor	diff, data1, data2
 	orr	syndrome, diff, has_nul
-	cbz	syndrome, L(loop_aligned)
-	/* End of performance-critical section  -- one 64B cache line.  */
-
 L(end):
-#ifndef	__AARCH64EB__
+#ifndef __AARCH64EB__
 	rev	syndrome, syndrome
 	rev	data1, data1
-	/* The MS-non-zero bit of the syndrome marks either the first bit
-	   that is different, or the top bit of the first zero byte.
-	   Shifting left now will bring the critical information into the
-	   top bits.  */
-	clz	pos, syndrome
 	rev	data2, data2
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
-	/* But we need to zero-extend (char is unsigned) the value and then
-	   perform a signed 32-bit subtraction.  */
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
-	ret
-#else
-	/* For big-endian we cannot use the trick with the syndrome value
-	   as carry-propagation can corrupt the upper bits if the trailing
-	   bytes in the string contain 0x01.  */
-	/* However, if there is no NUL byte in the dword, we can generate
-	   the result directly.  We can't just subtract the bytes as the
-	   MSB might be significant.  */
-	cbnz	has_nul, 1f
-	cmp	data1, data2
-	cset	result, ne
-	cneg	result, result, lo
-	ret
-1:
-	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
-	rev	tmp3, data1
-	sub	tmp1, tmp3, zeroones
-	orr	tmp2, tmp3, #REP8_7f
-	bic	has_nul, tmp1, tmp2
-	rev	has_nul, has_nul
-	orr	syndrome, diff, has_nul
-	clz	pos, syndrome
-	/* The MS-non-zero bit of the syndrome marks either the first bit
-	   that is different, or the top bit of the first zero byte.
+#endif
+	clz	shift, syndrome
+	/* The most-significant-non-zero bit of the syndrome marks either the
+	   first bit that is different, or the top bit of the first zero byte.
 	   Shifting left now will bring the critical information into the
 	   top bits.  */
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
+	lsl	data1, data1, shift
+	lsl	data2, data2, shift
 	/* But we need to zero-extend (char is unsigned) the value and then
 	   perform a signed 32-bit subtraction.  */
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
+	lsr	data1, data1, 56
+	sub	result, data1, data2, lsr 56
 	ret
-#endif
+
+	.p2align 4
 
 L(mutual_align):
 	/* Sources are mutually aligned, but are not currently at an
 	   alignment boundary.  Round down the addresses and then mask off
-	   the bytes that preceed the start point.  */
-	bic	src1, src1, #7
-	bic	src2, src2, #7
-	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
-	ldr	data1, [src1], #8
-	neg	tmp1, tmp1		/* Bits to alignment -64.  */
-	ldr	data2, [src2], #8
-	mov	tmp2, #~0
-#ifdef __AARCH64EB__
-	/* Big-endian.  Early bytes are at MSB.  */
-	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
-#else
-	/* Little-endian.  Early bytes are at LSB.  */
-	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
-#endif
-	orr	data1, data1, tmp2
-	orr	data2, data2, tmp2
+	   the bytes that precede the start point.  */
+	bic	src1, src1, 7
+	ldr	data2, [src1, off2]
+	ldr	data1, [src1], 8
+	neg	shift, src2, lsl 3	/* Bits to alignment -64.  */
+	mov	tmp, -1
+	LS_FW	tmp, tmp, shift
+	orr	data1, data1, tmp
+	orr	data2, data2, tmp
 	b	L(start_realigned)
 
 L(misaligned8):
 	/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
-	   checking to make sure that we don't access beyond page boundary in
-	   SRC2.  */
-	tst	src1, #7
-	b.eq	L(loop_misaligned)
+	   checking to make sure that we don't access beyond the end of SRC2.  */
+	cbz	tmp, L(src1_aligned)
 L(do_misaligned):
-	ldrb	data1w, [src1], #1
-	ldrb	data2w, [src2], #1
-	cmp	data1w, #1
-	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
+	ldrb	data1w, [src1], 1
+	ldrb	data2w, [src2], 1
+	cmp	data1w, 0
+	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
 	b.ne	L(done)
-	tst	src1, #7
+	tst	src1, 7
 	b.ne	L(do_misaligned)
 
-L(loop_misaligned):
-	/* Test if we are within the last dword of the end of a 4K page.  If
-	   yes then jump back to the misaligned loop to copy a byte at a time.  */
-	and	tmp1, src2, #0xff8
-	eor	tmp1, tmp1, #0xff8
-	cbz	tmp1, L(do_misaligned)
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
+L(src1_aligned):
+	neg	shift, src2, lsl 3
+	bic	src2, src2, 7
+	ldr	data3, [src2], 8
+#ifdef __AARCH64EB__
+	rev	data3, data3
+#endif
+	lsr	tmp, zeroones, shift
+	orr	data3, data3, tmp
+	sub	has_nul, data3, zeroones
+	orr	tmp, data3, REP8_7f
+	bics	has_nul, has_nul, tmp
+	b.ne	L(tail)
+
+	sub	off1, src2, src1
+
+	.p2align 4
+
+L(loop_unaligned):
+	ldr	data3, [src1, off1]
+	ldr	data2, [src1, off2]
+#ifdef __AARCH64EB__
+	rev	data3, data3
+#endif
+	sub	has_nul, data3, zeroones
+	orr	tmp, data3, REP8_7f
+	ldr	data1, [src1], 8
+	bics	has_nul, has_nul, tmp
+	ccmp	data1, data2, 0, eq
+	b.eq	L(loop_unaligned)
+
+	lsl	tmp, has_nul, shift
+#ifdef __AARCH64EB__
+	rev	tmp, tmp
+#endif
+	eor	diff, data1, data2
+	orr	syndrome, diff, tmp
+	cbnz	syndrome, L(end)
+L(tail):
+	ldr	data1, [src1]
+	neg	shift, shift
+	lsr	data2, data3, shift
+	lsr	has_nul, has_nul, shift
+#ifdef __AARCH64EB__
+	rev     data2, data2
+	rev	has_nul, has_nul
+#endif
+	eor	diff, data1, data2
 	orr	syndrome, diff, has_nul
-	cbz	syndrome, L(loop_misaligned)
 	b	L(end)
 
 L(done):
 	sub	result, data1, data2
 	ret
-
-SYM_FUNC_END_PI(strcmp)
-EXPORT_SYMBOL_NOHWKASAN(strcmp)
+SYM_FUNC_END(__pi_strcmp)
+SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp)
+EXPORT_SYMBOL_NOKASAN(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
index 1648790e91b3..4919fe81ae54 100644
--- a/arch/arm64/lib/strlen.S
+++ b/arch/arm64/lib/strlen.S
@@ -79,7 +79,7 @@
 	   whether the first fetch, which may be misaligned, crosses a page
 	   boundary.  */
 
-SYM_FUNC_START_WEAK_PI(strlen)
+SYM_FUNC_START(__pi_strlen)
 	and	tmp1, srcin, MIN_PAGE_SIZE - 1
 	mov	zeroones, REP8_01
 	cmp	tmp1, MIN_PAGE_SIZE - 16
@@ -208,6 +208,6 @@ L(page_cross):
 	csel	data1, data1, tmp4, eq
 	csel	data2, data2, tmp2, eq
 	b	L(page_cross_entry)
-
-SYM_FUNC_END_PI(strlen)
+SYM_FUNC_END(__pi_strlen)
+SYM_FUNC_ALIAS_WEAK(strlen, __pi_strlen)
 EXPORT_SYMBOL_NOKASAN(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index e42bcfcd37e6..fe7bbc0b42a7 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -1,9 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2013-2021, Arm Limited.
+ * Copyright (c) 2013-2022, Arm Limited.
  *
  * Adapted from the original at:
- * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/strncmp.S
+ * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strncmp.S
  */
 
 #include <linux/linkage.h>
@@ -11,14 +11,14 @@
 
 /* Assumptions:
  *
- * ARMv8-a, AArch64
+ * ARMv8-a, AArch64.
+ * MTE compatible.
  */
 
 #define L(label) .L ## label
 
 #define REP8_01 0x0101010101010101
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
 
 /* Parameters and result.  */
 #define src1		x0
@@ -39,12 +39,26 @@
 #define tmp3		x10
 #define zeroones	x11
 #define pos		x12
-#define limit_wd	x13
-#define mask		x14
-#define endloop		x15
+#define mask		x13
+#define endloop		x14
 #define count		mask
+#define offset		pos
+#define neg_offset	x15
 
-SYM_FUNC_START_WEAK_PI(strncmp)
+/* Define endian dependent shift operations.
+   On big-endian early bytes are at MSB and on little-endian LSB.
+   LS_FW means shifting towards early bytes.
+   LS_BK means shifting towards later bytes.
+   */
+#ifdef __AARCH64EB__
+#define LS_FW lsl
+#define LS_BK lsr
+#else
+#define LS_FW lsr
+#define LS_BK lsl
+#endif
+
+SYM_FUNC_START(__pi_strncmp)
 	cbz	limit, L(ret0)
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
@@ -52,9 +66,6 @@ SYM_FUNC_START_WEAK_PI(strncmp)
 	and	count, src1, #7
 	b.ne	L(misaligned8)
 	cbnz	count, L(mutual_align)
-	/* Calculate the number of full and partial words -1.  */
-	sub	limit_wd, limit, #1	/* limit != 0, so no underflow.  */
-	lsr	limit_wd, limit_wd, #3	/* Convert to Dwords.  */
 
 	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
 	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
@@ -64,56 +75,52 @@ L(loop_aligned):
 	ldr	data1, [src1], #8
 	ldr	data2, [src2], #8
 L(start_realigned):
-	subs	limit_wd, limit_wd, #1
+	subs	limit, limit, #8
 	sub	tmp1, data1, zeroones
 	orr	tmp2, data1, #REP8_7f
 	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	csinv	endloop, diff, xzr, pl	/* Last Dword or differences.  */
+	csinv	endloop, diff, xzr, hi	/* Last Dword or differences.  */
 	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
 	ccmp	endloop, #0, #0, eq
 	b.eq	L(loop_aligned)
 	/* End of main loop */
 
-	/* Not reached the limit, must have found the end or a diff.  */
-	tbz	limit_wd, #63, L(not_limit)
-
-	/* Limit % 8 == 0 => all bytes significant.  */
-	ands	limit, limit, #7
-	b.eq	L(not_limit)
-
-	lsl	limit, limit, #3	/* Bits -> bytes.  */
-	mov	mask, #~0
-#ifdef __AARCH64EB__
-	lsr	mask, mask, limit
-#else
-	lsl	mask, mask, limit
-#endif
-	bic	data1, data1, mask
-	bic	data2, data2, mask
-
-	/* Make sure that the NUL byte is marked in the syndrome.  */
-	orr	has_nul, has_nul, mask
-
-L(not_limit):
+L(full_check):
+#ifndef __AARCH64EB__
 	orr	syndrome, diff, has_nul
-
-#ifndef	__AARCH64EB__
+	add	limit, limit, 8	/* Rewind limit to before last subs. */
+L(syndrome_check):
+	/* Limit was reached. Check if the NUL byte or the difference
+	   is before the limit. */
 	rev	syndrome, syndrome
 	rev	data1, data1
-	/* The MS-non-zero bit of the syndrome marks either the first bit
-	   that is different, or the top bit of the first zero byte.
-	   Shifting left now will bring the critical information into the
-	   top bits.  */
 	clz	pos, syndrome
 	rev	data2, data2
 	lsl	data1, data1, pos
+	cmp	limit, pos, lsr #3
 	lsl	data2, data2, pos
 	/* But we need to zero-extend (char is unsigned) the value and then
 	   perform a signed 32-bit subtraction.  */
 	lsr	data1, data1, #56
 	sub	result, data1, data2, lsr #56
+	csel result, result, xzr, hi
 	ret
 #else
+	/* Not reached the limit, must have found the end or a diff.  */
+	tbz	limit, #63, L(not_limit)
+	add	tmp1, limit, 8
+	cbz	limit, L(not_limit)
+
+	lsl	limit, tmp1, #3	/* Bits -> bytes.  */
+	mov	mask, #~0
+	lsr	mask, mask, limit
+	bic	data1, data1, mask
+	bic	data2, data2, mask
+
+	/* Make sure that the NUL byte is marked in the syndrome.  */
+	orr	has_nul, has_nul, mask
+
+L(not_limit):
 	/* For big-endian we cannot use the trick with the syndrome value
 	   as carry-propagation can corrupt the upper bits if the trailing
 	   bytes in the string contain 0x01.  */
@@ -134,10 +141,11 @@ L(not_limit):
 	rev	has_nul, has_nul
 	orr	syndrome, diff, has_nul
 	clz	pos, syndrome
-	/* The MS-non-zero bit of the syndrome marks either the first bit
-	   that is different, or the top bit of the first zero byte.
+	/* The most-significant-non-zero bit of the syndrome marks either the
+	   first bit that is different, or the top bit of the first zero byte.
 	   Shifting left now will bring the critical information into the
 	   top bits.  */
+L(end_quick):
 	lsl	data1, data1, pos
 	lsl	data2, data2, pos
 	/* But we need to zero-extend (char is unsigned) the value and then
@@ -159,22 +167,12 @@ L(mutual_align):
 	neg	tmp3, count, lsl #3	/* 64 - bits(bytes beyond align). */
 	ldr	data2, [src2], #8
 	mov	tmp2, #~0
-	sub	limit_wd, limit, #1	/* limit != 0, so no underflow.  */
-#ifdef __AARCH64EB__
-	/* Big-endian.  Early bytes are at MSB.  */
-	lsl	tmp2, tmp2, tmp3	/* Shift (count & 63).  */
-#else
-	/* Little-endian.  Early bytes are at LSB.  */
-	lsr	tmp2, tmp2, tmp3	/* Shift (count & 63).  */
-#endif
-	and	tmp3, limit_wd, #7
-	lsr	limit_wd, limit_wd, #3
-	/* Adjust the limit. Only low 3 bits used, so overflow irrelevant.  */
-	add	limit, limit, count
-	add	tmp3, tmp3, count
+	LS_FW	tmp2, tmp2, tmp3	/* Shift (count & 63).  */
+	/* Adjust the limit and ensure it doesn't overflow.  */
+	adds	limit, limit, count
+	csinv	limit, limit, xzr, lo
 	orr	data1, data1, tmp2
 	orr	data2, data2, tmp2
-	add	limit_wd, limit_wd, tmp3, lsr #3
 	b	L(start_realigned)
 
 	.p2align 4
@@ -197,13 +195,11 @@ L(done):
 	/* Align the SRC1 to a dword by doing a bytewise compare and then do
 	   the dword loop.  */
 L(try_misaligned_words):
-	lsr	limit_wd, limit, #3
-	cbz	count, L(do_misaligned)
+	cbz	count, L(src1_aligned)
 
 	neg	count, count
 	and	count, count, #7
 	sub	limit, limit, count
-	lsr	limit_wd, limit, #3
 
 L(page_end_loop):
 	ldrb	data1w, [src1], #1
@@ -214,48 +210,101 @@ L(page_end_loop):
 	subs	count, count, #1
 	b.hi	L(page_end_loop)
 
-L(do_misaligned):
-	/* Prepare ourselves for the next page crossing.  Unlike the aligned
-	   loop, we fetch 1 less dword because we risk crossing bounds on
-	   SRC2.  */
-	mov	count, #8
-	subs	limit_wd, limit_wd, #1
-	b.lo	L(done_loop)
-L(loop_misaligned):
-	and	tmp2, src2, #0xff8
-	eor	tmp2, tmp2, #0xff8
-	cbz	tmp2, L(page_end_loop)
+	/* The following diagram explains the comparison of misaligned strings.
+	   The bytes are shown in natural order. For little-endian, it is
+	   reversed in the registers. The "x" bytes are before the string.
+	   The "|" separates data that is loaded at one time.
+	   src1     | a a a a a a a a | b b b c c c c c | . . .
+	   src2     | x x x x x a a a   a a a a a b b b | c c c c c . . .
+
+	   After shifting in each step, the data looks like this:
+	                STEP_A              STEP_B              STEP_C
+	   data1    a a a a a a a a     b b b c c c c c     b b b c c c c c
+	   data2    a a a a a a a a     b b b 0 0 0 0 0     0 0 0 c c c c c
 
+	   The bytes with "0" are eliminated from the syndrome via mask.
+
+	   Align SRC2 down to 16 bytes. This way we can read 16 bytes at a
+	   time from SRC2. The comparison happens in 3 steps. After each step
+	   the loop can exit, or read from SRC1 or SRC2. */
+L(src1_aligned):
+	/* Calculate offset from 8 byte alignment to string start in bits. No
+	   need to mask offset since shifts are ignoring upper bits. */
+	lsl	offset, src2, #3
+	bic	src2, src2, #0xf
+	mov	mask, -1
+	neg	neg_offset, offset
 	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
-	ccmp	diff, #0, #0, eq
-	b.ne	L(not_limit)
-	subs	limit_wd, limit_wd, #1
-	b.pl	L(loop_misaligned)
+	ldp	tmp1, tmp2, [src2], #16
+	LS_BK	mask, mask, neg_offset
+	and	neg_offset, neg_offset, #63	/* Need actual value for cmp later. */
+	/* Skip the first compare if data in tmp1 is irrelevant. */
+	tbnz	offset, 6, L(misaligned_mid_loop)
 
-L(done_loop):
-	/* We found a difference or a NULL before the limit was reached.  */
-	and	limit, limit, #7
-	cbz	limit, L(not_limit)
-	/* Read the last word.  */
-	sub	src1, src1, 8
-	sub	src2, src2, 8
-	ldr	data1, [src1, limit]
-	ldr	data2, [src2, limit]
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
+L(loop_misaligned):
+	/* STEP_A: Compare full 8 bytes when there is enough data from SRC2.*/
+	LS_FW	data2, tmp1, offset
+	LS_BK	tmp1, tmp2, neg_offset
+	subs	limit, limit, #8
+	orr	data2, data2, tmp1	/* 8 bytes from SRC2 combined from two regs.*/
+	sub	has_nul, data1, zeroones
 	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
-	ccmp	diff, #0, #0, eq
-	b.ne	L(not_limit)
+	orr	tmp3, data1, #REP8_7f
+	csinv	endloop, diff, xzr, hi	/* If limit, set to all ones. */
+	bic	has_nul, has_nul, tmp3	/* Non-zero if NUL byte found in SRC1. */
+	orr	tmp3, endloop, has_nul
+	cbnz	tmp3, L(full_check)
+
+	ldr	data1, [src1], #8
+L(misaligned_mid_loop):
+	/* STEP_B: Compare first part of data1 to second part of tmp2. */
+	LS_FW	data2, tmp2, offset
+#ifdef __AARCH64EB__
+	/* For big-endian we do a byte reverse to avoid carry-propagation
+	problem described above. This way we can reuse the has_nul in the
+	next step and also use syndrome value trick at the end. */
+	rev	tmp3, data1
+	#define data1_fixed tmp3
+#else
+	#define data1_fixed data1
+#endif
+	sub	has_nul, data1_fixed, zeroones
+	orr	tmp3, data1_fixed, #REP8_7f
+	eor	diff, data2, data1	/* Non-zero if differences found.  */
+	bic	has_nul, has_nul, tmp3	/* Non-zero if NUL terminator.  */
+#ifdef __AARCH64EB__
+	rev	has_nul, has_nul
+#endif
+	cmp	limit, neg_offset, lsr #3
+	orr	syndrome, diff, has_nul
+	bic	syndrome, syndrome, mask	/* Ignore later bytes. */
+	csinv	tmp3, syndrome, xzr, hi	/* If limit, set to all ones. */
+	cbnz	tmp3, L(syndrome_check)
+
+	/* STEP_C: Compare second part of data1 to first part of tmp1. */
+	ldp	tmp1, tmp2, [src2], #16
+	cmp	limit, #8
+	LS_BK	data2, tmp1, neg_offset
+	eor	diff, data2, data1	/* Non-zero if differences found.  */
+	orr	syndrome, diff, has_nul
+	and	syndrome, syndrome, mask	/* Ignore earlier bytes. */
+	csinv	tmp3, syndrome, xzr, hi	/* If limit, set to all ones. */
+	cbnz	tmp3, L(syndrome_check)
+
+	ldr	data1, [src1], #8
+	sub	limit, limit, #8
+	b	L(loop_misaligned)
+
+#ifdef	__AARCH64EB__
+L(syndrome_check):
+	clz	pos, syndrome
+	cmp	pos, limit, lsl #3
+	b.lo	L(end_quick)
+#endif
 
 L(ret0):
 	mov	result, #0
 	ret
-
-SYM_FUNC_END_PI(strncmp)
-EXPORT_SYMBOL_NOHWKASAN(strncmp)
+SYM_FUNC_END(__pi_strncmp)
+SYM_FUNC_ALIAS_WEAK(strncmp, __pi_strncmp)
+EXPORT_SYMBOL_NOKASAN(strncmp)
diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S
index b72913a99038..d5ac0e10a01d 100644
--- a/arch/arm64/lib/strnlen.S
+++ b/arch/arm64/lib/strnlen.S
@@ -47,7 +47,7 @@ limit_wd	.req	x14
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 #define REP8_80 0x8080808080808080
 
-SYM_FUNC_START_WEAK_PI(strnlen)
+SYM_FUNC_START(__pi_strnlen)
 	cbz	limit, .Lhit_limit
 	mov	zeroones, #REP8_01
 	bic	src, srcin, #15
@@ -156,5 +156,7 @@ CPU_LE( lsr	tmp2, tmp2, tmp4 )	/* Shift (tmp1 & 63).  */
 .Lhit_limit:
 	mov	len, limit
 	ret
-SYM_FUNC_END_PI(strnlen)
+SYM_FUNC_END(__pi_strnlen)
+
+SYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen)
 EXPORT_SYMBOL_NOKASAN(strnlen)
diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S
index 13132d1ed6d1..a5123cf0ce12 100644
--- a/arch/arm64/lib/strrchr.S
+++ b/arch/arm64/lib/strrchr.S
@@ -18,7 +18,7 @@
  * Returns:
  *	x0 - address of last occurrence of 'c' or 0
  */
-SYM_FUNC_START_WEAK_PI(strrchr)
+SYM_FUNC_START(__pi_strrchr)
 	mov	x3, #0
 	and	w1, w1, #0xff
 1:	ldrb	w2, [x0], #1
@@ -29,5 +29,6 @@ SYM_FUNC_START_WEAK_PI(strrchr)
 	b	1b
 2:	mov	x0, x3
 	ret
-SYM_FUNC_END_PI(strrchr)
+SYM_FUNC_END(__pi_strrchr)
+SYM_FUNC_ALIAS_WEAK(strrchr, __pi_strrchr)
 EXPORT_SYMBOL_NOKASAN(strrchr)
diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c
index d189cf4e70ea..96b171995d19 100644
--- a/arch/arm64/lib/xor-neon.c
+++ b/arch/arm64/lib/xor-neon.c
@@ -10,8 +10,8 @@
 #include <linux/module.h>
 #include <asm/neon-intrinsics.h>
 
-void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1,
-	unsigned long *p2)
+void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
 	uint64_t *dp2 = (uint64_t *)p2;
@@ -37,8 +37,9 @@ void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1,
 	} while (--lines > 0);
 }
 
-void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1,
-	unsigned long *p2, unsigned long *p3)
+void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
 	uint64_t *dp2 = (uint64_t *)p2;
@@ -72,8 +73,10 @@ void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1,
 	} while (--lines > 0);
 }
 
-void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1,
-	unsigned long *p2, unsigned long *p3, unsigned long *p4)
+void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3,
+	const unsigned long * __restrict p4)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
 	uint64_t *dp2 = (uint64_t *)p2;
@@ -115,9 +118,11 @@ void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1,
 	} while (--lines > 0);
 }
 
-void xor_arm64_neon_5(unsigned long bytes, unsigned long *p1,
-	unsigned long *p2, unsigned long *p3,
-	unsigned long *p4, unsigned long *p5)
+void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3,
+	const unsigned long * __restrict p4,
+	const unsigned long * __restrict p5)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
 	uint64_t *dp2 = (uint64_t *)p2;
@@ -186,8 +191,10 @@ static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
 	return res;
 }
 
-static void xor_arm64_eor3_3(unsigned long bytes, unsigned long *p1,
-			     unsigned long *p2, unsigned long *p3)
+static void xor_arm64_eor3_3(unsigned long bytes,
+	unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
 	uint64_t *dp2 = (uint64_t *)p2;
@@ -219,9 +226,11 @@ static void xor_arm64_eor3_3(unsigned long bytes, unsigned long *p1,
 	} while (--lines > 0);
 }
 
-static void xor_arm64_eor3_4(unsigned long bytes, unsigned long *p1,
-			     unsigned long *p2, unsigned long *p3,
-			     unsigned long *p4)
+static void xor_arm64_eor3_4(unsigned long bytes,
+	unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3,
+	const unsigned long * __restrict p4)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
 	uint64_t *dp2 = (uint64_t *)p2;
@@ -261,9 +270,12 @@ static void xor_arm64_eor3_4(unsigned long bytes, unsigned long *p1,
 	} while (--lines > 0);
 }
 
-static void xor_arm64_eor3_5(unsigned long bytes, unsigned long *p1,
-			     unsigned long *p2, unsigned long *p3,
-			     unsigned long *p4, unsigned long *p5)
+static void xor_arm64_eor3_5(unsigned long bytes,
+	unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3,
+	const unsigned long * __restrict p4,
+	const unsigned long * __restrict p5)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
 	uint64_t *dp2 = (uint64_t *)p2;
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 7d0563db4201..0ea6cc25dc66 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -107,10 +107,11 @@ SYM_FUNC_END(icache_inval_pou)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-SYM_FUNC_START_PI(dcache_clean_inval_poc)
+SYM_FUNC_START(__pi_dcache_clean_inval_poc)
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
-SYM_FUNC_END_PI(dcache_clean_inval_poc)
+SYM_FUNC_END(__pi_dcache_clean_inval_poc)
+SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc)
 
 /*
  *	dcache_clean_pou(start, end)
@@ -140,7 +141,7 @@ SYM_FUNC_END(dcache_clean_pou)
  *	- start   - kernel start address of region
  *	- end     - kernel end address of region
  */
-SYM_FUNC_START_PI(dcache_inval_poc)
+SYM_FUNC_START(__pi_dcache_inval_poc)
 	dcache_line_size x2, x3
 	sub	x3, x2, #1
 	tst	x1, x3				// end cache line aligned?
@@ -158,7 +159,8 @@ SYM_FUNC_START_PI(dcache_inval_poc)
 	b.lo	2b
 	dsb	sy
 	ret
-SYM_FUNC_END_PI(dcache_inval_poc)
+SYM_FUNC_END(__pi_dcache_inval_poc)
+SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc)
 
 /*
  *	dcache_clean_poc(start, end)
@@ -169,10 +171,11 @@ SYM_FUNC_END_PI(dcache_inval_poc)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-SYM_FUNC_START_PI(dcache_clean_poc)
+SYM_FUNC_START(__pi_dcache_clean_poc)
 	dcache_by_line_op cvac, sy, x0, x1, x2, x3
 	ret
-SYM_FUNC_END_PI(dcache_clean_poc)
+SYM_FUNC_END(__pi_dcache_clean_poc)
+SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc)
 
 /*
  *	dcache_clean_pop(start, end)
@@ -183,13 +186,14 @@ SYM_FUNC_END_PI(dcache_clean_poc)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-SYM_FUNC_START_PI(dcache_clean_pop)
+SYM_FUNC_START(__pi_dcache_clean_pop)
 	alternative_if_not ARM64_HAS_DCPOP
 	b	dcache_clean_poc
 	alternative_else_nop_endif
 	dcache_by_line_op cvap, sy, x0, x1, x2, x3
 	ret
-SYM_FUNC_END_PI(dcache_clean_pop)
+SYM_FUNC_END(__pi_dcache_clean_pop)
+SYM_FUNC_ALIAS(dcache_clean_pop, __pi_dcache_clean_pop)
 
 /*
  *	__dma_flush_area(start, size)
@@ -199,11 +203,12 @@ SYM_FUNC_END_PI(dcache_clean_pop)
  *	- start   - virtual start address of region
  *	- size    - size in question
  */
-SYM_FUNC_START_PI(__dma_flush_area)
+SYM_FUNC_START(__pi___dma_flush_area)
 	add	x1, x0, x1
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
-SYM_FUNC_END_PI(__dma_flush_area)
+SYM_FUNC_END(__pi___dma_flush_area)
+SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area)
 
 /*
  *	__dma_map_area(start, size, dir)
@@ -211,12 +216,13 @@ SYM_FUNC_END_PI(__dma_flush_area)
  *	- size	- size of region
  *	- dir	- DMA direction
  */
-SYM_FUNC_START_PI(__dma_map_area)
+SYM_FUNC_START(__pi___dma_map_area)
 	add	x1, x0, x1
 	cmp	w2, #DMA_FROM_DEVICE
 	b.eq	__pi_dcache_inval_poc
 	b	__pi_dcache_clean_poc
-SYM_FUNC_END_PI(__dma_map_area)
+SYM_FUNC_END(__pi___dma_map_area)
+SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area)
 
 /*
  *	__dma_unmap_area(start, size, dir)
@@ -224,9 +230,10 @@ SYM_FUNC_END_PI(__dma_map_area)
  *	- size	- size of region
  *	- dir	- DMA direction
  */
-SYM_FUNC_START_PI(__dma_unmap_area)
+SYM_FUNC_START(__pi___dma_unmap_area)
 	add	x1, x0, x1
 	cmp	w2, #DMA_TO_DEVICE
 	b.ne	__pi_dcache_inval_poc
 	ret
-SYM_FUNC_END_PI(__dma_unmap_area)
+SYM_FUNC_END(__pi___dma_unmap_area)
+SYM_FUNC_ALIAS(__dma_unmap_area, __pi___dma_unmap_area)
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 2aaf950b906c..a06c6ac770d4 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -52,6 +52,13 @@ void __sync_icache_dcache(pte_t pte)
 {
 	struct page *page = pte_page(pte);
 
+	/*
+	 * HugeTLB pages are always fully mapped, so only setting head page's
+	 * PG_dcache_clean flag is enough.
+	 */
+	if (PageHuge(page))
+		page = compound_head(page);
+
 	if (!test_bit(PG_dcache_clean, &page->flags)) {
 		sync_icache_aliases((unsigned long)page_address(page),
 				    (unsigned long)page_address(page) +
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index ffb9c229610a..a33aba91ad89 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -56,25 +56,34 @@ void __init arm64_hugetlb_cma_reserve(void)
 }
 #endif /* CONFIG_CMA */
 
-#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
-bool arch_hugetlb_migration_supported(struct hstate *h)
+static bool __hugetlb_valid_size(unsigned long size)
 {
-	size_t pagesize = huge_page_size(h);
-
-	switch (pagesize) {
+	switch (size) {
 #ifndef __PAGETABLE_PMD_FOLDED
 	case PUD_SIZE:
 		return pud_sect_supported();
 #endif
-	case PMD_SIZE:
 	case CONT_PMD_SIZE:
+	case PMD_SIZE:
 	case CONT_PTE_SIZE:
 		return true;
 	}
-	pr_warn("%s: unrecognized huge page size 0x%lx\n",
-			__func__, pagesize);
+
 	return false;
 }
+
+#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
+bool arch_hugetlb_migration_supported(struct hstate *h)
+{
+	size_t pagesize = huge_page_size(h);
+
+	if (!__hugetlb_valid_size(pagesize)) {
+		pr_warn("%s: unrecognized huge page size 0x%lx\n",
+			__func__, pagesize);
+		return false;
+	}
+	return true;
+}
 #endif
 
 int pmd_huge(pmd_t pmd)
@@ -506,16 +515,5 @@ arch_initcall(hugetlbpage_init);
 
 bool __init arch_hugetlb_valid_size(unsigned long size)
 {
-	switch (size) {
-#ifndef __PAGETABLE_PMD_FOLDED
-	case PUD_SIZE:
-		return pud_sect_supported();
-#endif
-	case CONT_PMD_SIZE:
-	case PMD_SIZE:
-	case CONT_PTE_SIZE:
-		return true;
-	}
-
-	return false;
+	return __hugetlb_valid_size(size);
 }
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index db63cc885771..9e26ec80d317 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr);
  * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4).
  * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory,
  * otherwise it is empty.
+ *
+ * Memory reservation for crash kernel either done early or deferred
+ * depending on DMA memory zones configs (ZONE_DMA) --
+ *
+ * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized
+ * here instead of max_zone_phys().  This lets early reservation of
+ * crash kernel memory which has a dependency on arm64_dma_phys_limit.
+ * Reserving memory early for crash kernel allows linear creation of block
+ * mappings (greater than page-granularity) for all the memory bank rangs.
+ * In this scheme a comparatively quicker boot is observed.
+ *
+ * If ZONE_DMA configs are defined, crash kernel memory reservation
+ * is delayed until DMA zone memory range size initilazation performed in
+ * zone_sizes_init().  The defer is necessary to steer clear of DMA zone
+ * memory range to avoid overlap allocation.  So crash kernel memory boundaries
+ * are not known when mapping all bank memory ranges, which otherwise means
+ * not possible to exclude crash kernel range from creating block mappings
+ * so page-granularity mappings are created for the entire memory range.
+ * Hence a slightly slower boot is observed.
+ *
+ * Note: Page-granularity mapppings are necessary for crash kernel memory
+ * range for shrinking its size via /sys/kernel/kexec_crash_size interface.
  */
-phys_addr_t arm64_dma_phys_limit __ro_after_init;
+#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)
+phys_addr_t __ro_after_init arm64_dma_phys_limit;
+#else
+phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1;
+#endif
 
 #ifdef CONFIG_KEXEC_CORE
 /*
@@ -153,8 +179,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 	if (!arm64_dma_phys_limit)
 		arm64_dma_phys_limit = dma32_phys_limit;
 #endif
-	if (!arm64_dma_phys_limit)
-		arm64_dma_phys_limit = PHYS_MASK + 1;
 	max_zone_pfns[ZONE_NORMAL] = max;
 
 	free_area_init(max_zone_pfns);
@@ -315,6 +339,9 @@ void __init arm64_memblock_init(void)
 
 	early_init_fdt_scan_reserved_mem();
 
+	if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32))
+		reserve_crashkernel();
+
 	high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
 }
 
@@ -361,7 +388,8 @@ void __init bootmem_init(void)
 	 * request_standard_resources() depends on crashkernel's memory being
 	 * reserved, so do it here.
 	 */
-	reserve_crashkernel();
+	if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32))
+		reserve_crashkernel();
 
 	memblock_dump_all();
 }
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index a38f54cd638c..77ada00280d9 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -7,8 +7,10 @@
 
 #include <linux/io.h>
 #include <linux/memblock.h>
+#include <linux/mm.h>
 #include <linux/types.h>
 
+#include <asm/cpufeature.h>
 #include <asm/page.h>
 
 /*
@@ -38,3 +40,18 @@ int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
 {
 	return !(((pfn << PAGE_SHIFT) + size) & ~PHYS_MASK);
 }
+
+static int __init adjust_protection_map(void)
+{
+	/*
+	 * With Enhanced PAN we can honour the execute-only permissions as
+	 * there is no PAN override with such mappings.
+	 */
+	if (cpus_have_const_cap(ARM64_HAS_EPAN)) {
+		protection_map[VM_EXEC] = PAGE_EXECONLY;
+		protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY;
+	}
+
+	return 0;
+}
+arch_initcall(adjust_protection_map);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index acfae9b41cc8..0b7d25887ec3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -63,6 +63,7 @@ static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
 static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
 
 static DEFINE_SPINLOCK(swapper_pgdir_lock);
+static DEFINE_MUTEX(fixmap_lock);
 
 void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
 {
@@ -294,18 +295,6 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
 	} while (addr = next, addr != end);
 }
 
-static inline bool use_1G_block(unsigned long addr, unsigned long next,
-			unsigned long phys)
-{
-	if (PAGE_SHIFT != 12)
-		return false;
-
-	if (((addr | next | phys) & ~PUD_MASK) != 0)
-		return false;
-
-	return true;
-}
-
 static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 			   phys_addr_t phys, pgprot_t prot,
 			   phys_addr_t (*pgtable_alloc)(int),
@@ -329,6 +318,12 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 	}
 	BUG_ON(p4d_bad(p4d));
 
+	/*
+	 * No need for locking during early boot. And it doesn't work as
+	 * expected with KASLR enabled.
+	 */
+	if (system_state != SYSTEM_BOOTING)
+		mutex_lock(&fixmap_lock);
 	pudp = pud_set_fixmap_offset(p4dp, addr);
 	do {
 		pud_t old_pud = READ_ONCE(*pudp);
@@ -338,7 +333,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 		/*
 		 * For 4K granule only, attempt to put down a 1GB block
 		 */
-		if (use_1G_block(addr, next, phys) &&
+		if (pud_sect_supported() &&
+		   ((addr | next | phys) & ~PUD_MASK) == 0 &&
 		    (flags & NO_BLOCK_MAPPINGS) == 0) {
 			pud_set_huge(pudp, phys, prot);
 
@@ -359,6 +355,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 	} while (pudp++, addr = next, addr != end);
 
 	pud_clear_fixmap();
+	if (system_state != SYSTEM_BOOTING)
+		mutex_unlock(&fixmap_lock);
 }
 
 static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
@@ -517,7 +515,7 @@ static void __init map_mem(pgd_t *pgdp)
 	 */
 	BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
 
-	if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE))
+	if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE))
 		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	/*
@@ -528,6 +526,17 @@ static void __init map_mem(pgd_t *pgdp)
 	 */
 	memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
 
+#ifdef CONFIG_KEXEC_CORE
+	if (crash_mem_map) {
+		if (IS_ENABLED(CONFIG_ZONE_DMA) ||
+		    IS_ENABLED(CONFIG_ZONE_DMA32))
+			flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+		else if (crashk_res.end)
+			memblock_mark_nomap(crashk_res.start,
+			    resource_size(&crashk_res));
+	}
+#endif
+
 	/* map all the memory banks */
 	for_each_mem_range(i, &start, &end) {
 		if (start >= end)
@@ -554,6 +563,25 @@ static void __init map_mem(pgd_t *pgdp)
 	__map_memblock(pgdp, kernel_start, kernel_end,
 		       PAGE_KERNEL, NO_CONT_MAPPINGS);
 	memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
+
+	/*
+	 * Use page-level mappings here so that we can shrink the region
+	 * in page granularity and put back unused memory to buddy system
+	 * through /sys/kernel/kexec_crash_size interface.
+	 */
+#ifdef CONFIG_KEXEC_CORE
+	if (crash_mem_map &&
+	    !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) {
+		if (crashk_res.end) {
+			__map_memblock(pgdp, crashk_res.start,
+				       crashk_res.end + 1,
+				       PAGE_KERNEL,
+				       NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
+			memblock_clear_nomap(crashk_res.start,
+					     resource_size(&crashk_res));
+		}
+	}
+#endif
 }
 
 void mark_rodata_ro(void)
@@ -617,6 +645,8 @@ early_param("rodata", parse_rodata);
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 static int __init map_entry_trampoline(void)
 {
+	int i;
+
 	pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
 	phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
 
@@ -625,11 +655,15 @@ static int __init map_entry_trampoline(void)
 
 	/* Map only the text into the trampoline page table */
 	memset(tramp_pg_dir, 0, PGD_SIZE);
-	__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
-			     prot, __pgd_pgtable_alloc, 0);
+	__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS,
+			     entry_tramp_text_size(), prot,
+			     __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS);
 
 	/* Map both the text and data into the kernel page table */
-	__set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
+	for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++)
+		__set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
+			     pa_start + i * PAGE_SIZE, prot);
+
 	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
 		extern char __entry_tramp_data_start[];
 
diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
index 7c4ef56265ee..a9e50e930484 100644
--- a/arch/arm64/mm/mteswap.c
+++ b/arch/arm64/mm/mteswap.c
@@ -12,7 +12,7 @@ static DEFINE_XARRAY(mte_pages);
 void *mte_allocate_tag_storage(void)
 {
 	/* tags granule is 16 bytes, 2 tags stored per byte */
-	return kmalloc(PAGE_SIZE / 16 / 2, GFP_KERNEL);
+	return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
 }
 
 void mte_free_tag_storage(char *storage)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index d35c90d2e47a..50bbed947bec 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -46,7 +46,7 @@
 #endif
 
 #ifdef CONFIG_KASAN_HW_TAGS
-#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
+#define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1
 #else
 /*
  * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index cc0cf0f5c7c3..9d9250c7cc72 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -89,9 +89,16 @@
 #define A64_STXR(sf, Rt, Rn, Rs) \
 	A64_LSX(sf, Rt, Rn, Rs, STORE_EX)
 
-/* LSE atomics */
+/*
+ * LSE atomics
+ *
+ * STADD is simply encoded as an alias for LDADD with XZR as
+ * the destination register.
+ */
 #define A64_STADD(sf, Rn, Rs) \
-	aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf))
+	aarch64_insn_gen_atomic_ld_op(A64_ZR, Rn, Rs, \
+		A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_ADD, \
+		AARCH64_INSN_MEM_ORDER_NONE)
 
 /* Add/subtract (immediate) */
 #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \
diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile
index 932b4fe5c768..cf1307188150 100644
--- a/arch/arm64/tools/Makefile
+++ b/arch/arm64/tools/Makefile
@@ -5,18 +5,14 @@ kapi := $(gen)/asm
 
 kapi-hdrs-y := $(kapi)/cpucaps.h
 
-targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y))
+targets += $(addprefix ../../../, $(kapi-hdrs-y))
 
 PHONY += kapi
 
-kapi:   $(kapi-hdrs-y) $(gen-y)
-
-# Create output directory if not already present
-_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+kapi:   $(kapi-hdrs-y)
 
 quiet_cmd_gen_cpucaps = GEN     $@
-      cmd_gen_cpucaps = mkdir -p $(dir $@) && \
-                     $(AWK) -f $(filter-out $(PHONY),$^) > $@
+      cmd_gen_cpucaps = mkdir -p $(dir $@); $(AWK) -f $(real-prereqs) > $@
 
 $(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE
 	$(call if_changed,gen_cpucaps)
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 9c65b1e25a96..3ed418f70e3b 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -7,7 +7,8 @@ BTI
 HAS_32BIT_EL0_DO_NOT_USE
 HAS_32BIT_EL1
 HAS_ADDRESS_AUTH
-HAS_ADDRESS_AUTH_ARCH
+HAS_ADDRESS_AUTH_ARCH_QARMA3
+HAS_ADDRESS_AUTH_ARCH_QARMA5
 HAS_ADDRESS_AUTH_IMP_DEF
 HAS_AMU_EXTN
 HAS_ARMv8_4_TTL
@@ -21,7 +22,8 @@ HAS_E0PD
 HAS_ECV
 HAS_EPAN
 HAS_GENERIC_AUTH
-HAS_GENERIC_AUTH_ARCH
+HAS_GENERIC_AUTH_ARCH_QARMA3
+HAS_GENERIC_AUTH_ARCH_QARMA5
 HAS_GENERIC_AUTH_IMP_DEF
 HAS_IRQ_PRIO_MASKING
 HAS_LDAPR
@@ -44,6 +46,7 @@ MTE_ASYMM
 SPECTRE_V2
 SPECTRE_V3A
 SPECTRE_V4
+SPECTRE_BHB
 SSBS
 SVE
 UNMAP_KERNEL_AT_EL0
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index a7e01573abd8..e003b2473c64 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -8,6 +8,7 @@ menu "Processor type and features"
 
 config IA64
 	bool
+	select ARCH_BINFMT_ELF_EXTRA_PHDRS
 	select ARCH_HAS_DMA_MARK_CLEAN
 	select ARCH_HAS_STRNCPY_FROM_USER
 	select ARCH_HAS_STRNLEN_USER
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 51d20cb37706..1684716f0820 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -55,15 +55,15 @@ struct thread_info {
 #ifndef ASM_OFFSETS_C
 /* how to get the thread information struct from C */
 #define current_thread_info()	((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
-#define alloc_thread_stack_node(tsk, node)	\
+#define arch_alloc_thread_stack_node(tsk, node)	\
 		((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE))
 #define task_thread_info(tsk)	((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
 #else
 #define current_thread_info()	((struct thread_info *) 0)
-#define alloc_thread_stack_node(tsk, node)	((unsigned long *) 0)
+#define arch_alloc_thread_stack_node(tsk, node)	((unsigned long *) 0)
 #define task_thread_info(tsk)	((struct thread_info *) 0)
 #endif
-#define free_thread_stack(tsk)	/* nothing */
+#define arch_free_thread_stack(tsk)	/* nothing */
 #define task_stack_page(tsk)	((void *)(tsk))
 
 #define __HAVE_THREAD_FUNCTIONS
diff --git a/arch/ia64/include/asm/xor.h b/arch/ia64/include/asm/xor.h
index 673051bf9d7d..6785f70d3208 100644
--- a/arch/ia64/include/asm/xor.h
+++ b/arch/ia64/include/asm/xor.h
@@ -4,13 +4,20 @@
  */
 
 
-extern void xor_ia64_2(unsigned long, unsigned long *, unsigned long *);
-extern void xor_ia64_3(unsigned long, unsigned long *, unsigned long *,
-		       unsigned long *);
-extern void xor_ia64_4(unsigned long, unsigned long *, unsigned long *,
-		       unsigned long *, unsigned long *);
-extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *,
-		       unsigned long *, unsigned long *, unsigned long *);
+extern void xor_ia64_2(unsigned long bytes, unsigned long * __restrict p1,
+		       const unsigned long * __restrict p2);
+extern void xor_ia64_3(unsigned long bytes, unsigned long * __restrict p1,
+		       const unsigned long * __restrict p2,
+		       const unsigned long * __restrict p3);
+extern void xor_ia64_4(unsigned long bytes, unsigned long * __restrict p1,
+		       const unsigned long * __restrict p2,
+		       const unsigned long * __restrict p3,
+		       const unsigned long * __restrict p4);
+extern void xor_ia64_5(unsigned long bytes, unsigned long * __restrict p1,
+		       const unsigned long * __restrict p2,
+		       const unsigned long * __restrict p3,
+		       const unsigned long * __restrict p4,
+		       const unsigned long * __restrict p5);
 
 static struct xor_block_template xor_block_ia64 = {
 	.name =	"ia64",
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index be2dfab48fd4..3137b45750df 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -37,6 +37,7 @@
 #include <asm/irq.h>
 #include <asm/machdep.h>
 #include <asm/io.h>
+#include <asm/config.h>
 
 static unsigned long amiga_model;
 
diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c
index 581a5f68d102..42a8b8e2b664 100644
--- a/arch/m68k/apollo/config.c
+++ b/arch/m68k/apollo/config.c
@@ -16,6 +16,7 @@
 #include <asm/apollohw.h>
 #include <asm/irq.h>
 #include <asm/machdep.h>
+#include <asm/config.h>
 
 u_long sio01_physaddr;
 u_long sio23_physaddr;
diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index 261a0f57cc9a..38a7c0578105 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -46,6 +46,7 @@
 #include <asm/machdep.h>
 #include <asm/hwtest.h>
 #include <asm/io.h>
+#include <asm/config.h>
 
 u_long atari_mch_cookie;
 EXPORT_SYMBOL(atari_mch_cookie);
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 0c6feafbbd11..9b060d466e03 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -36,6 +36,7 @@
 #include <asm/traps.h>
 #include <asm/machdep.h>
 #include <asm/bvme6000hw.h>
+#include <asm/config.h>
 
 static void bvme6000_get_model(char *model);
 extern void bvme6000_sched_init(void);
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index bc9952f8be66..49f301c57df5 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -104,7 +104,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -204,7 +203,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -229,7 +227,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -435,6 +432,7 @@ CONFIG_FB_AMIGA_ECS=y
 CONFIG_FB_AMIGA_AGA=y
 CONFIG_FB_FM2=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
 CONFIG_DMASOUND_PAULA=m
@@ -643,7 +641,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index a77269c6e5ba..405997b61447 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -100,7 +100,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -200,7 +199,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -225,7 +223,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -393,6 +390,7 @@ CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_VGA16 is not set
 # CONFIG_LOGO_LINUX_CLUT224 is not set
@@ -599,7 +597,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 7a74efa6b9a1..eb342a33b73e 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -107,7 +107,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -207,7 +206,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -232,7 +230,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -621,7 +618,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index a5323bf2eb33..e6de6b4dff86 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -97,7 +97,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -197,7 +196,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -222,7 +220,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -592,7 +589,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 5e80aa0869d5..048d9b114eb4 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -99,7 +99,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -199,7 +198,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -224,7 +222,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -395,6 +392,7 @@ CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
@@ -601,7 +599,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index e84326a3f62d..4e5b32ba00df 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -98,7 +98,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -198,7 +197,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -223,7 +221,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -623,7 +620,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 337552f43339..7df61e743591 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -118,7 +118,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -218,7 +217,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -243,7 +241,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -497,6 +494,7 @@ CONFIG_FB_ATARI=y
 CONFIG_FB_VALKYRIE=y
 CONFIG_FB_MAC=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
 CONFIG_DMASOUND_ATARI=m
@@ -708,7 +706,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 7b688f7d272a..80922fe80d9d 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -96,7 +96,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -196,7 +195,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -221,7 +219,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -591,7 +588,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 7c2cb31d63dd..530c4cf7c59b 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -97,7 +97,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -197,7 +196,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -222,7 +220,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -592,7 +589,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index ca43897af26d..d3f371e490ec 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -98,7 +98,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -198,7 +197,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -223,7 +221,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -610,7 +607,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index e3d515f37144..db6769790bdb 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -94,7 +94,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -194,7 +193,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -219,7 +217,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -388,9 +385,6 @@ CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
-CONFIG_FB=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_LOGO=y
 CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
@@ -593,7 +587,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index d601606c969b..e9c362683666 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -94,7 +94,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -194,7 +193,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -219,7 +217,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -387,9 +384,6 @@ CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
-CONFIG_FB=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_LOGO=y
 CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
@@ -593,7 +587,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c
index ce1eb3d3d55d..2c92843397c3 100644
--- a/arch/m68k/hp300/config.c
+++ b/arch/m68k/hp300/config.c
@@ -22,6 +22,7 @@
 #include <asm/blinken.h>
 #include <asm/io.h>                               /* readb() and writeb() */
 #include <asm/hp300hw.h>
+#include <asm/config.h>
 
 #include "time.h"
 
diff --git a/arch/m68k/include/asm/config.h b/arch/m68k/include/asm/config.h
new file mode 100644
index 000000000000..e73ffa23c4f5
--- /dev/null
+++ b/arch/m68k/include/asm/config.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * This file contains prototypes provided by each m68k machine
+ * to parse bootinfo data structures and to configure the machine
+ */
+
+#ifndef _M68K_CONFIG_H
+#define _M68K_CONFIG_H
+
+extern int amiga_parse_bootinfo(const struct bi_record *record);
+extern int apollo_parse_bootinfo(const struct bi_record *record);
+extern int atari_parse_bootinfo(const struct bi_record *record);
+extern int bvme6000_parse_bootinfo(const struct bi_record *record);
+extern int hp300_parse_bootinfo(const struct bi_record *record);
+extern int mac_parse_bootinfo(const struct bi_record *record);
+extern int mvme147_parse_bootinfo(const struct bi_record *record);
+extern int mvme16x_parse_bootinfo(const struct bi_record *record);
+extern int q40_parse_bootinfo(const struct bi_record *record);
+
+extern void config_amiga(void);
+extern void config_apollo(void);
+extern void config_atari(void);
+extern void config_bvme6000(void);
+extern void config_hp300(void);
+extern void config_mac(void);
+extern void config_mvme147(void);
+extern void config_mvme16x(void);
+extern void config_q40(void);
+extern void config_sun3(void);
+extern void config_sun3x(void);
+
+#endif /* _M68K_CONFIG_H */
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 49e573b94326..8228275aae3e 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -47,6 +47,7 @@
 #endif
 #include <asm/macintosh.h>
 #include <asm/natfeat.h>
+#include <asm/config.h>
 
 #if !FPSTATESIZE || !NR_IRQS
 #warning No CPU/platform type selected, your kernel will not work!
@@ -113,28 +114,6 @@ EXPORT_SYMBOL(isa_type);
 EXPORT_SYMBOL(isa_sex);
 #endif
 
-extern int amiga_parse_bootinfo(const struct bi_record *);
-extern int atari_parse_bootinfo(const struct bi_record *);
-extern int mac_parse_bootinfo(const struct bi_record *);
-extern int q40_parse_bootinfo(const struct bi_record *);
-extern int bvme6000_parse_bootinfo(const struct bi_record *);
-extern int mvme16x_parse_bootinfo(const struct bi_record *);
-extern int mvme147_parse_bootinfo(const struct bi_record *);
-extern int hp300_parse_bootinfo(const struct bi_record *);
-extern int apollo_parse_bootinfo(const struct bi_record *);
-
-extern void config_amiga(void);
-extern void config_atari(void);
-extern void config_mac(void);
-extern void config_sun3(void);
-extern void config_apollo(void);
-extern void config_mvme147(void);
-extern void config_mvme16x(void);
-extern void config_bvme6000(void);
-extern void config_hp300(void);
-extern void config_q40(void);
-extern void config_sun3x(void);
-
 #define MASK_256K 0xfffc0000
 
 extern void paging_init(void);
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 5d16f9b47aa9..65d124ec80bb 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -47,6 +47,7 @@
 #include <asm/mac_via.h>
 #include <asm/mac_oss.h>
 #include <asm/mac_psc.h>
+#include <asm/config.h>
 
 /* Mac bootinfo struct */
 struct mac_booter_data mac_bi_data;
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 1493cf5eac1e..71aa9f6315dc 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -93,8 +93,6 @@ retry:
 	vma = find_vma(mm, address);
 	if (!vma)
 		goto map_err;
-	if (vma->vm_flags & VM_IO)
-		goto acc_err;
 	if (vma->vm_start <= address)
 		goto good_area;
 	if (!(vma->vm_flags & VM_GROWSDOWN))
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index dfd6202fd403..b96ea7c76a19 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -34,6 +34,7 @@
 #include <asm/traps.h>
 #include <asm/machdep.h>
 #include <asm/mvme147hw.h>
+#include <asm/config.h>
 
 
 static void mvme147_get_model(char *model);
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index b4422c2dfbbf..88cbdc10925b 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -37,6 +37,7 @@
 #include <asm/traps.h>
 #include <asm/machdep.h>
 #include <asm/mvme16xhw.h>
+#include <asm/config.h>
 
 extern t_bdid mvme_bdid;
 
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index 5caf1e5be1c2..9237243077ce 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -34,6 +34,7 @@
 #include <asm/traps.h>
 #include <asm/machdep.h>
 #include <asm/q40_master.h>
+#include <asm/config.h>
 
 extern void q40_init_IRQ(void);
 static void q40_get_model(char *model);
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 59798e43cdb0..da568e981604 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -45,6 +45,8 @@ config MICROBLAZE
 	select SET_FS
 	select ZONE_DMA
 	select TRACE_IRQFLAGS_SUPPORT
+	select GENERIC_IRQ_MULTI_HANDLER
+	select HANDLE_DOMAIN_IRQ
 
 # Endianness selection
 choice
diff --git a/arch/microblaze/include/asm/irq.h b/arch/microblaze/include/asm/irq.h
index 0a28e80bbab0..cb6ab55d1d01 100644
--- a/arch/microblaze/include/asm/irq.h
+++ b/arch/microblaze/include/asm/irq.h
@@ -11,7 +11,4 @@
 struct pt_regs;
 extern void do_IRQ(struct pt_regs *regs);
 
-/* should be defined in each interrupt controller driver */
-extern unsigned int xintc_get_irq(void);
-
 #endif /* _ASM_MICROBLAZE_IRQ_H */
diff --git a/arch/microblaze/kernel/irq.c b/arch/microblaze/kernel/irq.c
index 903dad822fad..1f8cb4c4f74f 100644
--- a/arch/microblaze/kernel/irq.c
+++ b/arch/microblaze/kernel/irq.c
@@ -20,27 +20,13 @@
 #include <linux/irqchip.h>
 #include <linux/of_irq.h>
 
-static u32 concurrent_irq;
-
 void __irq_entry do_IRQ(struct pt_regs *regs)
 {
-	unsigned int irq;
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	trace_hardirqs_off();
 
 	irq_enter();
-	irq = xintc_get_irq();
-next_irq:
-	BUG_ON(!irq);
-	generic_handle_irq(irq);
-
-	irq = xintc_get_irq();
-	if (irq != -1U) {
-		pr_debug("next irq: %d\n", irq);
-		++concurrent_irq;
-		goto next_irq;
-	}
-
+	handle_arch_irq(regs);
 	irq_exit();
 	set_irq_regs(old_regs);
 	trace_hardirqs_on();
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 43c1c880def6..864b4756dcdf 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -10,10 +10,12 @@ config PARISC
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_NO_SG_CHAIN
 	select ARCH_SUPPORTS_HUGETLBFS if PA20
 	select ARCH_SUPPORTS_MEMORY_FAILURE
 	select ARCH_STACKWALK
+	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select HAVE_RELIABLE_STACKTRACE
 	select DMA_OPS
 	select RTC_CLASS
@@ -259,18 +261,6 @@ config PARISC_PAGE_SIZE_64KB
 
 endchoice
 
-config PARISC_SELF_EXTRACT
-	bool "Build kernel as self-extracting executable"
-	default y
-	help
-	  Say Y if you want to build the parisc kernel as a kind of
-	  self-extracting executable.
-
-	  If you say N here, the kernel will be compressed with gzip
-	  which can be loaded by the palo bootloader directly too.
-
-	  If you don't know what to do here, say Y.
-
 config SMP
 	bool "Symmetric multi-processing support"
 	help
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 82d77f4b0d08..2a9387a93592 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -15,12 +15,8 @@
 # Mike Shaver, Helge Deller and Martin K. Petersen
 #
 
-ifdef CONFIG_PARISC_SELF_EXTRACT
 boot := arch/parisc/boot
 KBUILD_IMAGE := $(boot)/bzImage
-else
-KBUILD_IMAGE := vmlinuz
-endif
 
 NM		= sh $(srctree)/arch/parisc/nm
 CHECKFLAGS	+= -D__hppa__=1
@@ -44,6 +40,16 @@ endif
 
 export LD_BFD
 
+# Set default 32 bits cross compilers for vdso
+CC_ARCHES_32 = hppa hppa2.0 hppa1.1
+CC_SUFFIXES  = linux linux-gnu unknown-linux-gnu
+CROSS32_COMPILE := $(call cc-cross-prefix, \
+	$(foreach a,$(CC_ARCHES_32), \
+	$(foreach s,$(CC_SUFFIXES),$(a)-$(s)-)))
+CROSS32CC := $(CROSS32_COMPILE)gcc
+export CROSS32CC
+
+# Set default cross compiler for kernel build
 ifdef cross_compiling
 	ifeq ($(CROSS_COMPILE),)
 		CC_SUFFIXES = linux linux-gnu unknown-linux-gnu
@@ -155,14 +161,29 @@ Image: vmlinux
 bzImage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
-ifdef CONFIG_PARISC_SELF_EXTRACT
 vmlinuz: bzImage
 	$(OBJCOPY) $(boot)/bzImage $@
-else
-vmlinuz: vmlinux
-	@$(KGZIP) -cf -9 $< > $@
+
+ifeq ($(KBUILD_EXTMOD),)
+# We need to generate vdso-offsets.h before compiling certain files in kernel/.
+# In order to do that, we should use the archprepare target, but we can't since
+# asm-offsets.h is included in some files used to generate vdso-offsets.h, and
+# asm-offsets.h is built in prepare0, for which archprepare is a dependency.
+# Therefore we need to generate the header after prepare0 has been made, hence
+# this hack.
+prepare: vdso_prepare
+vdso_prepare: prepare0
+	$(if $(CONFIG_64BIT),$(Q)$(MAKE) \
+		$(build)=arch/parisc/kernel/vdso64 include/generated/vdso64-offsets.h)
+	$(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso32 include/generated/vdso32-offsets.h
 endif
 
+PHONY += vdso_install
+
+vdso_install:
+	$(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso $@
+	$(if $(CONFIG_COMPAT_VDSO), \
+		$(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso32 $@)
 install:
 	$(CONFIG_SHELL) $(srctree)/arch/parisc/install.sh \
 			$(KERNELRELEASE) vmlinux System.map "$(INSTALL_PATH)"
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index 6369082c6c74..ea0cb318b13d 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -47,6 +47,12 @@
 #define PRIV_USER	3
 #define PRIV_KERNEL	0
 
+/* Space register used inside kernel */
+#define SR_KERNEL	0
+#define SR_TEMP1	1
+#define SR_TEMP2	2
+#define SR_USER		3
+
 #ifdef __ASSEMBLY__
 
 #ifdef CONFIG_64BIT
diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
index d53e9e27dba0..5032e758594e 100644
--- a/arch/parisc/include/asm/cache.h
+++ b/arch/parisc/include/asm/cache.h
@@ -39,16 +39,13 @@ extern int icache_stride;
 extern struct pdc_cache_info cache_info;
 void parisc_setup_cache_timing(void);
 
-#define pdtlb(addr)	asm volatile("pdtlb 0(%%sr1,%0)" \
+#define pdtlb(sr, addr)	asm volatile("pdtlb 0(%%sr%0,%1)" \
 			ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \
-			: : "r" (addr) : "memory")
-#define pitlb(addr)	asm volatile("pitlb 0(%%sr1,%0)" \
+			: : "i"(sr), "r" (addr) : "memory")
+#define pitlb(sr, addr)	asm volatile("pitlb 0(%%sr%0,%1)" \
 			ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \
 			ALTERNATIVE(ALT_COND_NO_SPLIT_TLB, INSN_NOP) \
-			: : "r" (addr) : "memory")
-#define pdtlb_kernel(addr)  asm volatile("pdtlb 0(%0)"   \
-			ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \
-			: : "r" (addr) : "memory")
+			: : "i"(sr), "r" (addr) : "memory")
 
 #define asm_io_fdc(addr) asm volatile("fdc %%r0(%0)" \
 			ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 859b8a34adcf..e8b4a03343d3 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -9,16 +9,11 @@
 /* The usual comment is "Caches aren't brain-dead on the <architecture>".
  * Unfortunately, that doesn't apply to PA-RISC. */
 
-/* Internal implementation */
-void flush_data_cache_local(void *);  /* flushes local data-cache only */
-void flush_instruction_cache_local(void *); /* flushes local code-cache only */
-#ifdef CONFIG_SMP
-void flush_data_cache(void); /* flushes data-cache only (all processors) */
-void flush_instruction_cache(void); /* flushes i-cache only (all processors) */
-#else
-#define flush_data_cache() flush_data_cache_local(NULL)
-#define flush_instruction_cache() flush_instruction_cache_local(NULL)
-#endif
+#include <linux/jump_label.h>
+
+DECLARE_STATIC_KEY_TRUE(parisc_has_cache);
+DECLARE_STATIC_KEY_TRUE(parisc_has_dcache);
+DECLARE_STATIC_KEY_TRUE(parisc_has_icache);
 
 #define flush_cache_dup_mm(mm) flush_cache_mm(mm)
 
diff --git a/arch/parisc/include/asm/current.h b/arch/parisc/include/asm/current.h
index 568b739e42af..dc7aea07c3f3 100644
--- a/arch/parisc/include/asm/current.h
+++ b/arch/parisc/include/asm/current.h
@@ -2,14 +2,16 @@
 #ifndef _ASM_PARISC_CURRENT_H
 #define _ASM_PARISC_CURRENT_H
 
-#include <asm/special_insns.h>
-
 #ifndef __ASSEMBLY__
 struct task_struct;
 
 static __always_inline struct task_struct *get_current(void)
 {
-	return (struct task_struct *) mfctl(30);
+	struct task_struct *ts;
+
+	/* do not use mfctl() macro as it is marked volatile */
+	asm( "mfctl %%cr30,%0" : "=r" (ts) );
+	return ts;
 }
 
 #define current get_current()
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index 3bd465a27791..cc426d365892 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -359,4 +359,19 @@ struct mm_struct;
 extern unsigned long arch_randomize_brk(struct mm_struct *);
 #define arch_randomize_brk arch_randomize_brk
 
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+					int executable_stack);
+#define VDSO_AUX_ENT(a, b) NEW_AUX_ENT(a, b)
+#define VDSO_CURRENT_BASE current->mm->context.vdso_base
+
+#define ARCH_DLINFO						\
+do {								\
+	if (VDSO_CURRENT_BASE) {				\
+		NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);\
+	}							\
+} while (0)
+
 #endif
diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h
index 904034da4974..0a175ac87698 100644
--- a/arch/parisc/include/asm/kprobes.h
+++ b/arch/parisc/include/asm/kprobes.h
@@ -18,8 +18,9 @@
 #include <linux/notifier.h>
 
 #define PARISC_KPROBES_BREAK_INSN	0x3ff801f
+#define PARISC_KPROBES_BREAK_INSN2	0x3ff801e
 #define  __ARCH_WANT_KPROBES_INSN_SLOT
-#define MAX_INSN_SIZE 1
+#define MAX_INSN_SIZE 2
 
 typedef u32 kprobe_opcode_t;
 struct kprobe;
@@ -29,7 +30,7 @@ void arch_remove_kprobe(struct kprobe *p);
 #define flush_insn_slot(p) \
 	flush_icache_range((unsigned long)&(p)->ainsn.insn[0], \
 			   (unsigned long)&(p)->ainsn.insn[0] + \
-			   sizeof(kprobe_opcode_t))
+			   MAX_INSN_SIZE*sizeof(kprobe_opcode_t))
 
 #define kretprobe_blacklist_size    0
 
diff --git a/arch/parisc/include/asm/mmu.h b/arch/parisc/include/asm/mmu.h
index 3fb70a601d5c..44fd062b62ed 100644
--- a/arch/parisc/include/asm/mmu.h
+++ b/arch/parisc/include/asm/mmu.h
@@ -2,7 +2,9 @@
 #ifndef _PARISC_MMU_H_
 #define _PARISC_MMU_H_
 
-/* On parisc, we store the space id here */
-typedef unsigned long mm_context_t;
+typedef struct {
+	unsigned long space_id;
+	unsigned long vdso_base;
+} mm_context_t;
 
 #endif /* _PARISC_MMU_H_ */
diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
index 726257648d9f..c9187fe836a3 100644
--- a/arch/parisc/include/asm/mmu_context.h
+++ b/arch/parisc/include/asm/mmu_context.h
@@ -20,7 +20,7 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
 	BUG_ON(atomic_read(&mm->mm_users) != 1);
 
-	mm->context = alloc_sid();
+	mm->context.space_id = alloc_sid();
 	return 0;
 }
 
@@ -28,22 +28,22 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 static inline void
 destroy_context(struct mm_struct *mm)
 {
-	free_sid(mm->context);
-	mm->context = 0;
+	free_sid(mm->context.space_id);
+	mm->context.space_id = 0;
 }
 
 static inline unsigned long __space_to_prot(mm_context_t context)
 {
 #if SPACEID_SHIFT == 0
-	return context << 1;
+	return context.space_id << 1;
 #else
-	return context >> (SPACEID_SHIFT - 1);
+	return context.space_id >> (SPACEID_SHIFT - 1);
 #endif
 }
 
 static inline void load_context(mm_context_t context)
 {
-	mtsp(context, 3);
+	mtsp(context.space_id, SR_USER);
 	mtctl(__space_to_prot(context), 8);
 }
 
@@ -89,8 +89,8 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
 
 	BUG_ON(next == &init_mm); /* Should never happen */
 
-	if (next->context == 0)
-	    next->context = alloc_sid();
+	if (next->context.space_id == 0)
+		next->context.space_id = alloc_sid();
 
 	switch_mm(prev,next,current);
 }
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 3e7cf882639f..7dff736936d0 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -70,9 +70,9 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 	unsigned long flags;
 
 	purge_tlb_start(flags);
-	mtsp(mm->context, 1);
-	pdtlb(addr);
-	pitlb(addr);
+	mtsp(mm->context.space_id, SR_TEMP1);
+	pdtlb(SR_TEMP1, addr);
+	pitlb(SR_TEMP1, addr);
 	purge_tlb_end(flags);
 }
 
@@ -219,9 +219,10 @@ extern void __update_cache(pte_t pte);
 #define _PAGE_PRESENT  (1 << xlate_pabit(_PAGE_PRESENT_BIT))
 #define _PAGE_HUGE     (1 << xlate_pabit(_PAGE_HPAGE_BIT))
 #define _PAGE_USER     (1 << xlate_pabit(_PAGE_USER_BIT))
+#define _PAGE_SPECIAL  (_PAGE_DMB)
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)
-#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL)
 #define _PAGE_KERNEL_RO	(_PAGE_PRESENT | _PAGE_READ | _PAGE_DIRTY | _PAGE_ACCESSED)
 #define _PAGE_KERNEL_EXEC	(_PAGE_KERNEL_RO | _PAGE_EXEC)
 #define _PAGE_KERNEL_RWX	(_PAGE_KERNEL_EXEC | _PAGE_WRITE)
@@ -348,6 +349,7 @@ static inline void pud_clear(pud_t *pud) {
 static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
 static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
 static inline int pte_write(pte_t pte)		{ return pte_val(pte) & _PAGE_WRITE; }
+static inline int pte_special(pte_t pte)	{ return pte_val(pte) & _PAGE_SPECIAL; }
 
 static inline pte_t pte_mkclean(pte_t pte)	{ pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkold(pte_t pte)	{ pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
@@ -355,6 +357,7 @@ static inline pte_t pte_wrprotect(pte_t pte)	{ pte_val(pte) &= ~_PAGE_WRITE; ret
 static inline pte_t pte_mkdirty(pte_t pte)	{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)	{ pte_val(pte) |= _PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mkwrite(pte_t pte)	{ pte_val(pte) |= _PAGE_WRITE; return pte; }
+static inline pte_t pte_mkspecial(pte_t pte)	{ pte_val(pte) |= _PAGE_SPECIAL; return pte; }
 
 /*
  * Huge pte definitions.
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index 3a3d05438408..006364212795 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -236,7 +236,7 @@ on downward growing arches, it looks like this:
 
 #define start_thread(regs, new_pc, new_sp) do {		\
 	elf_addr_t *sp = (elf_addr_t *)new_sp;		\
-	__u32 spaceid = (__u32)current->mm->context;	\
+	__u32 spaceid = (__u32)current->mm->context.space_id;	\
 	elf_addr_t pc = (elf_addr_t)new_pc | 3;		\
 	elf_caddr_t *argv = (elf_caddr_t *)bprm->exec + 1;	\
 							\
diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h
index 2b3010ade00e..bb7fb4153327 100644
--- a/arch/parisc/include/asm/rt_sigframe.h
+++ b/arch/parisc/include/asm/rt_sigframe.h
@@ -2,16 +2,8 @@
 #ifndef _ASM_PARISC_RT_SIGFRAME_H
 #define _ASM_PARISC_RT_SIGFRAME_H
 
-#define SIGRETURN_TRAMP 4
-#define SIGRESTARTBLOCK_TRAMP 5 
-#define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
-
 struct rt_sigframe {
-	/* XXX: Must match trampoline size in arch/parisc/kernel/signal.c 
-	        Secondary to that it must protect the ERESTART_RESTARTBLOCK
-		trampoline we left on the stack (we were bad and didn't 
-		change sp so we could run really fast.) */
-	unsigned int tramp[TRAMP_SIZE];
+	unsigned int tramp[2]; /* holds original return address */
 	struct siginfo info;
 	struct ucontext uc;
 };
diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h
index 16ee41e77174..41b3ddbd344c 100644
--- a/arch/parisc/include/asm/special_insns.h
+++ b/arch/parisc/include/asm/special_insns.h
@@ -55,8 +55,8 @@ static inline void set_eiem(unsigned long val)
 #define mfsp(reg)	({		\
 	unsigned long cr;		\
 	__asm__ __volatile__(		\
-		"mfsp " #reg ",%0" :	\
-		 "=r" (cr)		\
+		"mfsp %%sr%1,%0"	\
+		: "=r" (cr) : "i"(reg)	\
 	);				\
 	cr;				\
 })
diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
index c5ded01d45be..5ffd7c17f593 100644
--- a/arch/parisc/include/asm/tlbflush.h
+++ b/arch/parisc/include/asm/tlbflush.h
@@ -17,7 +17,7 @@ int __flush_tlb_range(unsigned long sid,
 	unsigned long start, unsigned long end);
 
 #define flush_tlb_range(vma, start, end) \
-	__flush_tlb_range((vma)->vm_mm->context, start, end)
+	__flush_tlb_range((vma)->vm_mm->context.space_id, start, end)
 
 #define flush_tlb_kernel_range(start, end) \
 	__flush_tlb_range(0, start, end)
diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h
index 34619f010c63..0ccdb738a9a3 100644
--- a/arch/parisc/include/asm/traps.h
+++ b/arch/parisc/include/asm/traps.h
@@ -18,6 +18,7 @@ unsigned long parisc_acctyp(unsigned long code, unsigned int inst);
 const char *trap_name(unsigned long code);
 void do_page_fault(struct pt_regs *regs, unsigned long code,
 		unsigned long address);
+int handle_nadtlb_fault(struct pt_regs *regs);
 #endif
 
 #endif
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 123d5f16cd9d..b3eb4541e441 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -79,18 +79,18 @@ struct exception_table_entry {
 
 #define __get_user(val, ptr)				\
 ({							\
-	__get_user_internal("%%sr3,", val, ptr);	\
+	__get_user_internal(SR_USER, val, ptr);	\
 })
 
 #define __get_user_asm(sr, val, ldx, ptr)		\
 {							\
 	register long __gu_val;				\
 							\
-	__asm__("1: " ldx " 0(" sr "%2),%0\n"		\
+	__asm__("1: " ldx " 0(%%sr%2,%3),%0\n"		\
 		"9:\n"					\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	\
 		: "=r"(__gu_val), "+r"(__gu_err)        \
-		: "r"(ptr));				\
+		: "i"(sr), "r"(ptr));			\
 							\
 	(val) = (__force __typeof__(*(ptr))) __gu_val;	\
 }
@@ -100,7 +100,7 @@ struct exception_table_entry {
 {							\
 	type __z;					\
 	long __err;					\
-	__err = __get_user_internal("%%sr0,", __z, (type *)(src)); \
+	__err = __get_user_internal(SR_KERNEL, __z, (type *)(src)); \
 	if (unlikely(__err))				\
 		goto err_label;				\
 	else						\
@@ -118,13 +118,13 @@ struct exception_table_entry {
 	} __gu_tmp;					\
 							\
 	__asm__("   copy %%r0,%R0\n"			\
-		"1: ldw 0(" sr "%2),%0\n"		\
-		"2: ldw 4(" sr "%2),%R0\n"		\
+		"1: ldw 0(%%sr%2,%3),%0\n"		\
+		"2: ldw 4(%%sr%2,%3),%R0\n"		\
 		"9:\n"					\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)	\
 		: "=&r"(__gu_tmp.l), "+r"(__gu_err)	\
-		: "r"(ptr));				\
+		: "i"(sr), "r"(ptr));			\
 							\
 	(val) = __gu_tmp.t;				\
 }
@@ -151,14 +151,14 @@ struct exception_table_entry {
 ({								\
 	__typeof__(&*(ptr)) __ptr = ptr;			\
 	__typeof__(*(__ptr)) __x = (__typeof__(*(__ptr)))(x);	\
-	__put_user_internal("%%sr3,", __x, __ptr);		\
+	__put_user_internal(SR_USER, __x, __ptr);		\
 })
 
 #define __put_kernel_nofault(dst, src, type, err_label)		\
 {								\
 	type __z = *(type *)(src);				\
 	long __err;						\
-	__err = __put_user_internal("%%sr0,", __z, (type *)(dst)); \
+	__err = __put_user_internal(SR_KERNEL, __z, (type *)(dst)); \
 	if (unlikely(__err))					\
 		goto err_label;					\
 }
@@ -178,24 +178,24 @@ struct exception_table_entry {
 
 #define __put_user_asm(sr, stx, x, ptr)				\
 	__asm__ __volatile__ (					\
-		"1: " stx " %2,0(" sr "%1)\n"			\
+		"1: " stx " %1,0(%%sr%2,%3)\n"			\
 		"9:\n"						\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)		\
 		: "+r"(__pu_err)				\
-		: "r"(ptr), "r"(x))
+		: "r"(x), "i"(sr), "r"(ptr))
 
 
 #if !defined(CONFIG_64BIT)
 
 #define __put_user_asm64(sr, __val, ptr) do {			\
 	__asm__ __volatile__ (					\
-		"1: stw %2,0(" sr "%1)\n"			\
-		"2: stw %R2,4(" sr "%1)\n"			\
+		"1: stw %1,0(%%sr%2,%3)\n"			\
+		"2: stw %R1,4(%%sr%2,%3)\n"			\
 		"9:\n"						\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)		\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)		\
 		: "+r"(__pu_err)				\
-		: "r"(ptr), "r"(__val));			\
+		: "r"(__val), "i"(sr), "r"(ptr));		\
 } while (0)
 
 #endif /* !defined(CONFIG_64BIT) */
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index cd438e4150f6..7708a5806f09 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -63,10 +63,6 @@
 		);							\
 		__sys_res = (long)__res;				\
 	}								\
-	if ( (unsigned long)__sys_res >= (unsigned long)-4095 ){	\
-		errno = -__sys_res;		        		\
-		__sys_res = -1;						\
-	}								\
 	__sys_res;							\
 })
 
diff --git a/arch/parisc/include/asm/vdso.h b/arch/parisc/include/asm/vdso.h
new file mode 100644
index 000000000000..ef8206193f82
--- /dev/null
+++ b/arch/parisc/include/asm/vdso.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PARISC_VDSO_H__
+#define __PARISC_VDSO_H__
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_64BIT
+#include <generated/vdso64-offsets.h>
+#endif
+#include <generated/vdso32-offsets.h>
+
+#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name))
+#define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name))
+
+extern struct vdso_data *vdso_data;
+
+#endif /* __ASSEMBLY __ */
+
+/* Default link addresses for the vDSOs */
+#define VDSO_LBASE	0
+
+#define VDSO_VERSION_STRING	LINUX_5.18
+
+#endif /* __PARISC_VDSO_H__ */
diff --git a/arch/parisc/include/uapi/asm/auxvec.h b/arch/parisc/include/uapi/asm/auxvec.h
new file mode 100644
index 000000000000..90d2aa699cf3
--- /dev/null
+++ b/arch/parisc/include/uapi/asm/auxvec.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PARISC_AUXVEC_H
+#define _UAPI_PARISC_AUXVEC_H
+
+/* The vDSO location. */
+#define AT_SYSINFO_EHDR		33
+
+#endif /* _UAPI_PARISC_AUXVEC_H */
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index 8fb819bbbb17..d579243edc2f 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -39,3 +39,8 @@ obj-$(CONFIG_KGDB)			+= kgdb.o
 obj-$(CONFIG_KPROBES)			+= kprobes.o
 obj-$(CONFIG_KEXEC_CORE)		+= kexec.o relocate_kernel.o
 obj-$(CONFIG_KEXEC_FILE)		+= kexec_file.o
+
+# vdso
+obj-y			+= vdso.o
+obj-$(CONFIG_64BIT)	+= vdso64/
+obj-y			+= vdso32/
diff --git a/arch/parisc/kernel/alternative.c b/arch/parisc/kernel/alternative.c
index fa28c4c9f972..daa1e9047275 100644
--- a/arch/parisc/kernel/alternative.c
+++ b/arch/parisc/kernel/alternative.c
@@ -8,6 +8,7 @@
 #include <asm/processor.h>
 #include <asm/sections.h>
 #include <asm/alternative.h>
+#include <asm/cacheflush.h>
 
 #include <linux/module.h>
 
@@ -107,5 +108,14 @@ void __init apply_alternatives_all(void)
 	apply_alternatives((struct alt_instr *) &__alt_instructions,
 		(struct alt_instr *) &__alt_instructions_end, NULL);
 
+	if (cache_info.dc_size == 0 && cache_info.ic_size == 0) {
+		pr_info("alternatives: optimizing cache-flushes.\n");
+		static_branch_disable(&parisc_has_cache);
+	}
+	if (cache_info.dc_size == 0)
+		static_branch_disable(&parisc_has_dcache);
+	if (cache_info.ic_size == 0)
+		static_branch_disable(&parisc_has_icache);
+
 	set_kernel_text_rw(0);
 }
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 2a83ef36d216..2673d57eeb00 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -26,7 +26,11 @@
 #include <asm/ptrace.h>
 #include <asm/processor.h>
 #include <asm/pdc.h>
+#include <uapi/asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/rt_sigframe.h>
 #include <linux/uaccess.h>
+#include "signal32.h"
 
 /* Add FRAME_SIZE to the size x and align it to y. All definitions
  * that use align_frame will include space for a frame.
@@ -218,6 +222,11 @@ int main(void)
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_PRE_COUNT, offsetof(struct task_struct, thread_info.preempt_count));
 	BLANK();
+	DEFINE(ASM_SIGFRAME_SIZE, PARISC_RT_SIGFRAME_SIZE);
+	DEFINE(SIGFRAME_CONTEXT_REGS, offsetof(struct rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE);
+	DEFINE(ASM_SIGFRAME_SIZE32, PARISC_RT_SIGFRAME_SIZE32);
+	DEFINE(SIGFRAME_CONTEXT_REGS32, offsetof(struct compat_rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE32);
+	BLANK();
 	DEFINE(ICACHE_BASE, offsetof(struct pdc_cache_info, ic_base));
 	DEFINE(ICACHE_STRIDE, offsetof(struct pdc_cache_info, ic_stride));
 	DEFINE(ICACHE_COUNT, offsetof(struct pdc_cache_info, ic_count));
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 94150b91c96f..456e879d34a8 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -38,6 +38,9 @@ EXPORT_SYMBOL(flush_dcache_page_asm);
 void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
 void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr);
 
+/* Internal implementation in arch/parisc/kernel/pacache.S */
+void flush_data_cache_local(void *);  /* flushes local data-cache only */
+void flush_instruction_cache_local(void); /* flushes local code-cache only */
 
 /* On some machines (i.e., ones with the Merced bus), there can be
  * only a single PxTLB broadcast at a time; this must be guaranteed
@@ -58,26 +61,35 @@ struct pdc_cache_info cache_info __ro_after_init;
 static struct pdc_btlb_info btlb_info __ro_after_init;
 #endif
 
-#ifdef CONFIG_SMP
-void
-flush_data_cache(void)
+DEFINE_STATIC_KEY_TRUE(parisc_has_cache);
+DEFINE_STATIC_KEY_TRUE(parisc_has_dcache);
+DEFINE_STATIC_KEY_TRUE(parisc_has_icache);
+
+static void cache_flush_local_cpu(void *dummy)
 {
-	on_each_cpu(flush_data_cache_local, NULL, 1);
+	if (static_branch_likely(&parisc_has_icache))
+		flush_instruction_cache_local();
+	if (static_branch_likely(&parisc_has_dcache))
+		flush_data_cache_local(NULL);
 }
-void 
-flush_instruction_cache(void)
+
+void flush_cache_all_local(void)
 {
-	on_each_cpu(flush_instruction_cache_local, NULL, 1);
+	cache_flush_local_cpu(NULL);
 }
-#endif
 
-void
-flush_cache_all_local(void)
+void flush_cache_all(void)
 {
-	flush_instruction_cache_local(NULL);
-	flush_data_cache_local(NULL);
+	if (static_branch_likely(&parisc_has_cache))
+		on_each_cpu(cache_flush_local_cpu, NULL, 1);
 }
-EXPORT_SYMBOL(flush_cache_all_local);
+
+static inline void flush_data_cache(void)
+{
+	if (static_branch_likely(&parisc_has_dcache))
+		on_each_cpu(flush_data_cache_local, NULL, 1);
+}
+
 
 /* Virtual address of pfn.  */
 #define pfn_va(pfn)	__va(PFN_PHYS(pfn))
@@ -303,6 +315,8 @@ static inline void
 __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
 		   unsigned long physaddr)
 {
+	if (!static_branch_likely(&parisc_has_cache))
+		return;
 	preempt_disable();
 	flush_dcache_page_asm(physaddr, vmaddr);
 	if (vma->vm_flags & VM_EXEC)
@@ -314,6 +328,8 @@ static inline void
 __purge_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
 		   unsigned long physaddr)
 {
+	if (!static_branch_likely(&parisc_has_cache))
+		return;
 	preempt_disable();
 	purge_dcache_page_asm(physaddr, vmaddr);
 	if (vma->vm_flags & VM_EXEC)
@@ -375,7 +391,6 @@ EXPORT_SYMBOL(flush_dcache_page);
 
 /* Defined in arch/parisc/kernel/pacache.S */
 EXPORT_SYMBOL(flush_kernel_dcache_range_asm);
-EXPORT_SYMBOL(flush_data_cache_local);
 EXPORT_SYMBOL(flush_kernel_icache_range_asm);
 
 #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */
@@ -388,7 +403,7 @@ void __init parisc_setup_cache_timing(void)
 {
 	unsigned long rangetime, alltime;
 	unsigned long size;
-	unsigned long threshold;
+	unsigned long threshold, threshold2;
 
 	alltime = mfctl(16);
 	flush_data_cache();
@@ -403,8 +418,20 @@ void __init parisc_setup_cache_timing(void)
 		alltime, size, rangetime);
 
 	threshold = L1_CACHE_ALIGN(size * alltime / rangetime);
-	if (threshold > cache_info.dc_size)
-		threshold = cache_info.dc_size;
+
+	/*
+	 * The threshold computed above isn't very reliable since the
+	 * flush times depend greatly on the percentage of dirty lines
+	 * in the flush range. Further, the whole cache time doesn't
+	 * include the time to refill lines that aren't in the mm/vma
+	 * being flushed. By timing glibc build and checks on mako cpus,
+	 * the following formula seems to work reasonably well. The
+	 * value from the timing calculation is too small, and increases
+	 * build and check times by almost a factor two.
+	 */
+	threshold2 = cache_info.dc_size * num_online_cpus();
+	if (threshold2 > threshold)
+		threshold = threshold2;
 	if (threshold)
 		parisc_cache_flush_threshold = threshold;
 	printk(KERN_INFO "Cache flush threshold set to %lu KiB\n",
@@ -457,7 +484,7 @@ void flush_kernel_dcache_page_addr(void *addr)
 
 	flush_kernel_dcache_page_asm(addr);
 	purge_tlb_start(flags);
-	pdtlb_kernel(addr);
+	pdtlb(SR_KERNEL, addr);
 	purge_tlb_end(flags);
 }
 EXPORT_SYMBOL(flush_kernel_dcache_page_addr);
@@ -496,25 +523,15 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
 	   but cause a purge request to be broadcast to other TLBs.  */
 	while (start < end) {
 		purge_tlb_start(flags);
-		mtsp(sid, 1);
-		pdtlb(start);
-		pitlb(start);
+		mtsp(sid, SR_TEMP1);
+		pdtlb(SR_TEMP1, start);
+		pitlb(SR_TEMP1, start);
 		purge_tlb_end(flags);
 		start += PAGE_SIZE;
 	}
 	return 0;
 }
 
-static void cacheflush_h_tmp_function(void *dummy)
-{
-	flush_cache_all_local();
-}
-
-void flush_cache_all(void)
-{
-	on_each_cpu(cacheflush_h_tmp_function, NULL, 1);
-}
-
 static inline unsigned long mm_total_size(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
@@ -558,15 +575,6 @@ static void flush_cache_pages(struct vm_area_struct *vma, struct mm_struct *mm,
 	}
 }
 
-static void flush_user_cache_tlb(struct vm_area_struct *vma,
-				 unsigned long start, unsigned long end)
-{
-	flush_user_dcache_range_asm(start, end);
-	if (vma->vm_flags & VM_EXEC)
-		flush_user_icache_range_asm(start, end);
-	flush_tlb_range(vma, start, end);
-}
-
 void flush_cache_mm(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
@@ -575,23 +583,14 @@ void flush_cache_mm(struct mm_struct *mm)
 	   rp3440, etc.  So, avoid it if the mm isn't too big.  */
 	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
 	    mm_total_size(mm) >= parisc_cache_flush_threshold) {
-		if (mm->context)
+		if (mm->context.space_id)
 			flush_tlb_all();
 		flush_cache_all();
 		return;
 	}
 
-	preempt_disable();
-	if (mm->context == mfsp(3)) {
-		for (vma = mm->mmap; vma; vma = vma->vm_next)
-			flush_user_cache_tlb(vma, vma->vm_start, vma->vm_end);
-		preempt_enable();
-		return;
-	}
-
 	for (vma = mm->mmap; vma; vma = vma->vm_next)
 		flush_cache_pages(vma, mm, vma->vm_start, vma->vm_end);
-	preempt_enable();
 }
 
 void flush_cache_range(struct vm_area_struct *vma,
@@ -599,29 +598,21 @@ void flush_cache_range(struct vm_area_struct *vma,
 {
 	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
 	    end - start >= parisc_cache_flush_threshold) {
-		if (vma->vm_mm->context)
+		if (vma->vm_mm->context.space_id)
 			flush_tlb_range(vma, start, end);
 		flush_cache_all();
 		return;
 	}
 
-	preempt_disable();
-	if (vma->vm_mm->context == mfsp(3)) {
-		flush_user_cache_tlb(vma, start, end);
-		preempt_enable();
-		return;
-	}
-
-	flush_cache_pages(vma, vma->vm_mm, vma->vm_start, vma->vm_end);
-	preempt_enable();
+	flush_cache_pages(vma, vma->vm_mm, start, end);
 }
 
 void
 flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
 {
 	if (pfn_valid(pfn)) {
-		if (likely(vma->vm_mm->context)) {
-			flush_tlb_page(vma, vmaddr);
+		flush_tlb_page(vma, vmaddr);
+		if (likely(vma->vm_mm->context.space_id)) {
 			__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
 		} else {
 			__purge_cache_page(vma, vmaddr, PFN_PHYS(pfn));
@@ -633,6 +624,7 @@ void flush_kernel_vmap_range(void *vaddr, int size)
 {
 	unsigned long start = (unsigned long)vaddr;
 	unsigned long end = start + size;
+	unsigned long flags, physaddr;
 
 	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
 	    (unsigned long)size >= parisc_cache_flush_threshold) {
@@ -641,8 +633,14 @@ void flush_kernel_vmap_range(void *vaddr, int size)
 		return;
 	}
 
-	flush_kernel_dcache_range_asm(start, end);
-	flush_tlb_kernel_range(start, end);
+	while (start < end) {
+		physaddr = lpa(start);
+		purge_tlb_start(flags);
+		pdtlb(SR_KERNEL, start);
+		purge_tlb_end(flags);
+		flush_dcache_page_asm(physaddr, start);
+		start += PAGE_SIZE;
+	}
 }
 EXPORT_SYMBOL(flush_kernel_vmap_range);
 
@@ -650,6 +648,7 @@ void invalidate_kernel_vmap_range(void *vaddr, int size)
 {
 	unsigned long start = (unsigned long)vaddr;
 	unsigned long end = start + size;
+	unsigned long flags, physaddr;
 
 	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
 	    (unsigned long)size >= parisc_cache_flush_threshold) {
@@ -658,7 +657,13 @@ void invalidate_kernel_vmap_range(void *vaddr, int size)
 		return;
 	}
 
-	purge_kernel_dcache_range_asm(start, end);
-	flush_tlb_kernel_range(start, end);
+	while (start < end) {
+		physaddr = lpa(start);
+		purge_tlb_start(flags);
+		pdtlb(SR_KERNEL, start);
+		purge_tlb_end(flags);
+		purge_dcache_page_asm(physaddr, start);
+		start += PAGE_SIZE;
+	}
 }
 EXPORT_SYMBOL(invalidate_kernel_vmap_range);
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 6e9cdb269862..ecf50159359e 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -1288,74 +1288,12 @@ nadtlb_check_alias_20:
 nadtlb_emulate:
 
 	/*
-	 * Non access misses can be caused by fdc,fic,pdc,lpa,probe and
-	 * probei instructions. We don't want to fault for these
-	 * instructions (not only does it not make sense, it can cause
-	 * deadlocks, since some flushes are done with the mmap
-	 * semaphore held). If the translation doesn't exist, we can't
-	 * insert a translation, so have to emulate the side effects
-	 * of the instruction. Since we don't insert a translation
-	 * we can get a lot of faults during a flush loop, so it makes
-	 * sense to try to do it here with minimum overhead. We only
-	 * emulate fdc,fic,pdc,probew,prober instructions whose base 
-	 * and index registers are not shadowed. We defer everything 
-	 * else to the "slow" path.
+	 * Non-access misses can be caused by fdc,fic,pdc,lpa,probe and
+	 * probei instructions. The kernel no longer faults doing flushes.
+	 * Use of lpa and probe instructions is rare. Given the issue
+	 * with shadow registers, we defer everything to the "slow" path.
 	 */
-
-	mfctl           %cr19,%r9 /* Get iir */
-
-	/* PA 2.0 Arch Ref. Book pg 382 has a good description of the insn bits.
-	   Checks for fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw */
-
-	/* Checks for fdc,fdce,pdc,"fic,4f" only */
-	ldi             0x280,%r16
-	and             %r9,%r16,%r17
-	cmpb,<>,n       %r16,%r17,nadtlb_probe_check
-	bb,>=,n         %r9,26,nadtlb_nullify  /* m bit not set, just nullify */
-	BL		get_register,%r25
-	extrw,u         %r9,15,5,%r8           /* Get index register # */
-	cmpib,COND(=),n        -1,%r1,nadtlb_fault    /* have to use slow path */
-	copy            %r1,%r24
-	BL		get_register,%r25
-	extrw,u         %r9,10,5,%r8           /* Get base register # */
-	cmpib,COND(=),n        -1,%r1,nadtlb_fault    /* have to use slow path */
-	BL		set_register,%r25
-	add,l           %r1,%r24,%r1           /* doesn't affect c/b bits */
-
-nadtlb_nullify:
-	mfctl           %ipsw,%r8
-	ldil            L%PSW_N,%r9
-	or              %r8,%r9,%r8            /* Set PSW_N */
-	mtctl           %r8,%ipsw
-
-	rfir
-	nop
-
-	/* 
-		When there is no translation for the probe address then we
-		must nullify the insn and return zero in the target register.
-		This will indicate to the calling code that it does not have 
-		write/read privileges to this address.
-
-		This should technically work for prober and probew in PA 1.1,
-		and also probe,r and probe,w in PA 2.0
-
-		WARNING: USE ONLY NON-SHADOW REGISTERS WITH PROBE INSN!
-		THE SLOW-PATH EMULATION HAS NOT BEEN WRITTEN YET.
-
-	*/
-nadtlb_probe_check:
-	ldi             0x80,%r16
-	and             %r9,%r16,%r17
-	cmpb,<>,n       %r16,%r17,nadtlb_fault /* Must be probe,[rw]*/
-	BL              get_register,%r25      /* Find the target register */
-	extrw,u         %r9,31,5,%r8           /* Get target register */
-	cmpib,COND(=),n        -1,%r1,nadtlb_fault    /* have to use slow path */
-	BL		set_register,%r25
-	copy            %r0,%r1                /* Write zero to target register */
-	b nadtlb_nullify                       /* Nullify return insn */
-	nop
-
+	b,n		nadtlb_fault
 
 #ifdef CONFIG_64BIT
 itlb_miss_20w:
diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c
index e2bdb5a5f93e..3343d2fb7889 100644
--- a/arch/parisc/kernel/kprobes.c
+++ b/arch/parisc/kernel/kprobes.c
@@ -5,6 +5,7 @@
  * PA-RISC kprobes implementation
  *
  * Copyright (c) 2019 Sven Schnelle <[email protected]>
+ * Copyright (c) 2022 Helge Deller <[email protected]>
  */
 
 #include <linux/types.h>
@@ -25,9 +26,14 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	if (!p->ainsn.insn)
 		return -ENOMEM;
 
-	memcpy(p->ainsn.insn, p->addr,
-		MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+	/*
+	 * Set up new instructions. Second break instruction will
+	 * trigger call of parisc_kprobe_ss_handler().
+	 */
 	p->opcode = *p->addr;
+	p->ainsn.insn[0] = p->opcode;
+	p->ainsn.insn[1] = PARISC_KPROBES_BREAK_INSN2;
+
 	flush_insn_slot(p);
 	return 0;
 }
@@ -73,9 +79,7 @@ static void __kprobes setup_singlestep(struct kprobe *p,
 {
 	kcb->iaoq[0] = regs->iaoq[0];
 	kcb->iaoq[1] = regs->iaoq[1];
-	regs->iaoq[0] = (unsigned long)p->ainsn.insn;
-	mtctl(0, 0);
-	regs->gr[0] |= PSW_R;
+	instruction_pointer_set(regs, (unsigned long)p->ainsn.insn);
 }
 
 int __kprobes parisc_kprobe_break_handler(struct pt_regs *regs)
@@ -165,9 +169,8 @@ int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs)
 		regs->iaoq[0] = kcb->iaoq[1];
 		break;
 	default:
-		regs->iaoq[1] = kcb->iaoq[0];
-		regs->iaoq[1] += (regs->iaoq[1] - regs->iaoq[0]) + 4;
 		regs->iaoq[0] = kcb->iaoq[1];
+		regs->iaoq[1] = regs->iaoq[0] + 4;
 		break;
 	}
 	kcb->kprobe_status = KPROBE_HIT_SSDONE;
@@ -191,14 +194,17 @@ static struct kprobe trampoline_p = {
 static int __kprobes trampoline_probe_handler(struct kprobe *p,
 					      struct pt_regs *regs)
 {
-	unsigned long orig_ret_address;
-
-	orig_ret_address = __kretprobe_trampoline_handler(regs, NULL);
-	instruction_pointer_set(regs, orig_ret_address);
+	__kretprobe_trampoline_handler(regs, NULL);
 
 	return 1;
 }
 
+void arch_kretprobe_fixup_return(struct pt_regs *regs,
+				 kprobe_opcode_t *correct_ret_addr)
+{
+	regs->gr[2] = (unsigned long)correct_ret_addr;
+}
+
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 36a57aa38e87..160996f2198e 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -91,7 +91,7 @@ static inline int map_pte_uncached(pte_t * pte,
 			printk(KERN_ERR "map_pte_uncached: page already exists\n");
 		purge_tlb_start(flags);
 		set_pte(pte, __mk_pte(*paddr_ptr, PAGE_KERNEL_UNC));
-		pdtlb_kernel(orig_vaddr);
+		pdtlb(SR_KERNEL, orig_vaddr);
 		purge_tlb_end(flags);
 		vaddr += PAGE_SIZE;
 		orig_vaddr += PAGE_SIZE;
@@ -175,7 +175,7 @@ static inline void unmap_uncached_pte(pmd_t * pmd, unsigned long vaddr,
 
 		pte_clear(&init_mm, vaddr, pte);
 		purge_tlb_start(flags);
-		pdtlb_kernel(orig_vaddr);
+		pdtlb(SR_KERNEL, orig_vaddr);
 		purge_tlb_end(flags);
 		vaddr += PAGE_SIZE;
 		orig_vaddr += PAGE_SIZE;
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 46b1050640b8..24443908f905 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -1,16 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- *  linux/arch/parisc/kernel/signal.c: Architecture-specific signal
- *  handling support.
+ *  PA-RISC architecture-specific signal handling support.
  *
  *  Copyright (C) 2000 David Huggins-Daines <[email protected]>
  *  Copyright (C) 2000 Linuxcare, Inc.
+ *  Copyright (C) 2000-2022 Helge Deller <[email protected]>
+ *  Copyright (C) 2022 John David Anglin <[email protected]>
  *
  *  Based on the ia64, i386, and alpha versions.
- *
- *  Like the IA-64, we are a recent enough port (we are *starting*
- *  with glibc2.2) that we do not need to support the old non-realtime
- *  Linux signals.  Therefore we don't.
  */
 
 #include <linux/sched.h>
@@ -32,6 +29,7 @@
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/asm-offsets.h>
+#include <asm/vdso.h>
 
 #ifdef CONFIG_COMPAT
 #include "signal32.h"
@@ -59,14 +57,6 @@
  * Do a signal return - restore sigcontext.
  */
 
-/* Trampoline for calling rt_sigreturn() */
-#define INSN_LDI_R25_0	 0x34190000 /* ldi  0,%r25 (in_syscall=0) */
-#define INSN_LDI_R25_1	 0x34190002 /* ldi  1,%r25 (in_syscall=1) */
-#define INSN_LDI_R20	 0x3414015a /* ldi  __NR_rt_sigreturn,%r20 */
-#define INSN_BLE_SR2_R0  0xe4008200 /* be,l 0x100(%sr2,%r0),%sr0,%r31 */
-/* For debugging */
-#define INSN_DIE_HORRIBLY 0x68000ccc /* stw %r0,0x666(%sr0,%r0) */
-
 static long
 restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
 {
@@ -77,9 +67,9 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
 	err |= __copy_from_user(regs->iaoq, sc->sc_iaoq, sizeof(regs->iaoq));
 	err |= __copy_from_user(regs->iasq, sc->sc_iasq, sizeof(regs->iasq));
 	err |= __get_user(regs->sar, &sc->sc_sar);
-	DBG(2,"restore_sigcontext: iaoq is %#lx / %#lx\n",
-			regs->iaoq[0],regs->iaoq[1]);
-	DBG(2,"restore_sigcontext: r28 is %ld\n", regs->gr[28]);
+	DBG(2, "%s: iaoq is %#lx / %#lx\n",
+			__func__, regs->iaoq[0], regs->iaoq[1]);
+	DBG(2, "%s: r28 is %ld\n", __func__, regs->gr[28]);
 	return err;
 }
 
@@ -102,7 +92,7 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
 	/* Unwind the user stack to get the rt_sigframe structure. */
 	frame = (struct rt_sigframe __user *)
 		(usp - sigframe_size);
-	DBG(2,"sys_rt_sigreturn: frame is %p\n", frame);
+	DBG(2, "%s: frame is %p pid %d\n", __func__, frame, task_pid_nr(current));
 
 	regs->orig_r28 = 1; /* no restarts for sigreturn */
 
@@ -110,7 +100,6 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
 	compat_frame = (struct compat_rt_sigframe __user *)frame;
 	
 	if (is_compat_task()) {
-		DBG(2,"sys_rt_sigreturn: ELF32 process.\n");
 		if (get_compat_sigset(&set, &compat_frame->uc.uc_sigmask))
 			goto give_sigsegv;
 	} else
@@ -125,25 +114,25 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
 	/* Good thing we saved the old gr[30], eh? */
 #ifdef CONFIG_64BIT
 	if (is_compat_task()) {
-		DBG(1,"sys_rt_sigreturn: compat_frame->uc.uc_mcontext 0x%p\n",
-				&compat_frame->uc.uc_mcontext);
+		DBG(1, "%s: compat_frame->uc.uc_mcontext 0x%p\n",
+				__func__, &compat_frame->uc.uc_mcontext);
 // FIXME: Load upper half from register file
 		if (restore_sigcontext32(&compat_frame->uc.uc_mcontext, 
 					&compat_frame->regs, regs))
 			goto give_sigsegv;
-		DBG(1,"sys_rt_sigreturn: usp %#08lx stack 0x%p\n", 
-				usp, &compat_frame->uc.uc_stack);
+		DBG(1, "%s: usp %#08lx stack 0x%p\n",
+				__func__, usp, &compat_frame->uc.uc_stack);
 		if (compat_restore_altstack(&compat_frame->uc.uc_stack))
 			goto give_sigsegv;
 	} else
 #endif
 	{
-		DBG(1,"sys_rt_sigreturn: frame->uc.uc_mcontext 0x%p\n",
-				&frame->uc.uc_mcontext);
+		DBG(1, "%s: frame->uc.uc_mcontext 0x%p\n",
+				__func__, &frame->uc.uc_mcontext);
 		if (restore_sigcontext(&frame->uc.uc_mcontext, regs))
 			goto give_sigsegv;
-		DBG(1,"sys_rt_sigreturn: usp %#08lx stack 0x%p\n", 
-				usp, &frame->uc.uc_stack);
+		DBG(1, "%s: usp %#08lx stack 0x%p\n",
+				__func__, usp, &frame->uc.uc_stack);
 		if (restore_altstack(&frame->uc.uc_stack))
 			goto give_sigsegv;
 	}
@@ -155,14 +144,11 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
 	 */
 	if (in_syscall)
 		regs->gr[31] = regs->iaoq[0];
-#if DEBUG_SIG
-	DBG(1,"sys_rt_sigreturn: returning to %#lx, DUMPING REGS:\n", regs->iaoq[0]);
-	show_regs(regs);
-#endif
+
 	return;
 
 give_sigsegv:
-	DBG(1,"sys_rt_sigreturn: Sending SIGSEGV\n");
+	DBG(1, "%s: Sending SIGSEGV\n", __func__);
 	force_sig(SIGSEGV);
 	return;
 }
@@ -177,15 +163,15 @@ get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size)
 	/*FIXME: ELF32 vs. ELF64 has different frame_size, but since we
 	  don't use the parameter it doesn't matter */
 
-	DBG(1,"get_sigframe: ka = %#lx, sp = %#lx, frame_size = %#lx\n",
-			(unsigned long)ka, sp, frame_size);
+	DBG(1, "%s: ka = %#lx, sp = %#lx, frame_size = %zu\n",
+			__func__, (unsigned long)ka, sp, frame_size);
 	
 	/* Align alternate stack and reserve 64 bytes for the signal
 	   handler's frame marker.  */
 	if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && ! sas_ss_flags(sp))
 		sp = (current->sas_ss_sp + 0x7f) & ~0x3f; /* Stacks grow up! */
 
-	DBG(1,"get_sigframe: Returning sp = %#lx\n", (unsigned long)sp);
+	DBG(1, "%s: Returning sp = %#lx\n", __func__, (unsigned long)sp);
 	return (void __user *) sp; /* Stacks grow up.  Fun. */
 }
 
@@ -205,20 +191,20 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, int in_sysc
 		err |= __put_user(regs->gr[31]+4, &sc->sc_iaoq[1]);
 		err |= __put_user(regs->sr[3], &sc->sc_iasq[0]);
 		err |= __put_user(regs->sr[3], &sc->sc_iasq[1]);
-		DBG(1,"setup_sigcontext: iaoq %#lx / %#lx (in syscall)\n",
-			regs->gr[31], regs->gr[31]+4);
+		DBG(1, "%s: iaoq %#lx / %#lx (in syscall)\n",
+			__func__, regs->gr[31], regs->gr[31]+4);
 	} else {
 		err |= __copy_to_user(sc->sc_iaoq, regs->iaoq, sizeof(regs->iaoq));
 		err |= __copy_to_user(sc->sc_iasq, regs->iasq, sizeof(regs->iasq));
-		DBG(1,"setup_sigcontext: iaoq %#lx / %#lx (not in syscall)\n", 
-			regs->iaoq[0], regs->iaoq[1]);
+		DBG(1, "%s: iaoq %#lx / %#lx (not in syscall)\n",
+			__func__, regs->iaoq[0], regs->iaoq[1]);
 	}
 
 	err |= __put_user(flags, &sc->sc_flags);
 	err |= __copy_to_user(sc->sc_gr, regs->gr, sizeof(regs->gr));
 	err |= __copy_to_user(sc->sc_fr, regs->fr, sizeof(regs->fr));
 	err |= __put_user(regs->sar, &sc->sc_sar);
-	DBG(1,"setup_sigcontext: r28 is %ld\n", regs->gr[28]);
+	DBG(1, "%s: r28 is %ld\n", __func__, regs->gr[28]);
 
 	return err;
 }
@@ -230,7 +216,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 	struct rt_sigframe __user *frame;
 	unsigned long rp, usp;
 	unsigned long haddr, sigframe_size;
-	unsigned long start, end;
+	unsigned long start;
 	int err = 0;
 #ifdef CONFIG_64BIT
 	struct compat_rt_sigframe __user * compat_frame;
@@ -247,8 +233,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 #endif
 	frame = get_sigframe(&ksig->ka, usp, sigframe_size);
 
-	DBG(1,"SETUP_RT_FRAME: START\n");
-	DBG(1,"setup_rt_frame: frame %p info %p\n", frame, ksig->info);
+	DBG(1, "%s: frame %p info %p\n", __func__, frame, &ksig->info);
 
 	start = (unsigned long) frame;
 	if (start >= user_addr_max() - sigframe_size)
@@ -259,11 +244,12 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 	compat_frame = (struct compat_rt_sigframe __user *)frame;
 	
 	if (is_compat_task()) {
-		DBG(1,"setup_rt_frame: frame->info = 0x%p\n", &compat_frame->info);
+		DBG(1, "%s: frame->info = 0x%p\n", __func__, &compat_frame->info);
 		err |= copy_siginfo_to_user32(&compat_frame->info, &ksig->info);
 		err |= __compat_save_altstack( &compat_frame->uc.uc_stack, regs->gr[30]);
-		DBG(1,"setup_rt_frame: frame->uc = 0x%p\n", &compat_frame->uc);
-		DBG(1,"setup_rt_frame: frame->uc.uc_mcontext = 0x%p\n", &compat_frame->uc.uc_mcontext);
+		DBG(1, "%s: frame->uc = 0x%p\n", __func__, &compat_frame->uc);
+		DBG(1, "%s: frame->uc.uc_mcontext = 0x%p\n",
+			__func__, &compat_frame->uc.uc_mcontext);
 		err |= setup_sigcontext32(&compat_frame->uc.uc_mcontext, 
 					&compat_frame->regs, regs, in_syscall);
 		err |= put_compat_sigset(&compat_frame->uc.uc_sigmask, set,
@@ -271,11 +257,12 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 	} else
 #endif
 	{	
-		DBG(1,"setup_rt_frame: frame->info = 0x%p\n", &frame->info);
+		DBG(1, "%s: frame->info = 0x%p\n", __func__, &frame->info);
 		err |= copy_siginfo_to_user(&frame->info, &ksig->info);
 		err |= __save_altstack(&frame->uc.uc_stack, regs->gr[30]);
-		DBG(1,"setup_rt_frame: frame->uc = 0x%p\n", &frame->uc);
-		DBG(1,"setup_rt_frame: frame->uc.uc_mcontext = 0x%p\n", &frame->uc.uc_mcontext);
+		DBG(1, "%s: frame->uc = 0x%p\n", __func__, &frame->uc);
+		DBG(1, "%s: frame->uc.uc_mcontext = 0x%p\n",
+			__func__, &frame->uc.uc_mcontext);
 		err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, in_syscall);
 		/* FIXME: Should probably be converted as well for the compat case */
 		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
@@ -284,32 +271,15 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 	if (err)
 		return -EFAULT;
 
-	/* Set up to return from userspace.  If provided, use a stub
-	   already in userspace. The first words of tramp are used to
-	   save the previous sigrestartblock trampoline that might be
-	   on the stack. We start the sigreturn trampoline at 
-	   SIGRESTARTBLOCK_TRAMP+X. */
-	err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0,
-			&frame->tramp[SIGRESTARTBLOCK_TRAMP+0]);
-	err |= __put_user(INSN_LDI_R20, 
-			&frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
-	err |= __put_user(INSN_BLE_SR2_R0, 
-			&frame->tramp[SIGRESTARTBLOCK_TRAMP+2]);
-	err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]);
-
-	start = (unsigned long) &frame->tramp[0];
-	end = (unsigned long) &frame->tramp[TRAMP_SIZE];
-	flush_user_dcache_range_asm(start, end);
-	flush_user_icache_range_asm(start, end);
-
-	/* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
-	 * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
-	 * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP
-	 */
-	rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP];
+#ifdef CONFIG_64BIT
+	if (!is_compat_task())
+		rp = VDSO64_SYMBOL(current, sigtramp_rt);
+	else
+#endif
+		rp = VDSO32_SYMBOL(current, sigtramp_rt);
 
-	if (err)
-		return -EFAULT;
+	if (in_syscall)
+		rp += 4*4; /* skip 4 instructions and start at ldi 1,%r25 */
 
 	haddr = A(ksig->ka.sa.sa_handler);
 	/* The sa_handler may be a pointer to a function descriptor */
@@ -340,8 +310,8 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 		
 		haddr = fdesc.addr;
 		regs->gr[19] = fdesc.gp;
-		DBG(1,"setup_rt_frame: 64 bit signal, exe=%#lx, r19=%#lx, in_syscall=%d\n",
-		     haddr, regs->gr[19], in_syscall);
+		DBG(1, "%s: 64 bit signal, exe=%#lx, r19=%#lx, in_syscall=%d\n",
+		     __func__, haddr, regs->gr[19], in_syscall);
 	}
 #endif
 
@@ -351,7 +321,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 		regs->gr[31] = haddr;
 #ifdef CONFIG_64BIT
 		if (!test_thread_flag(TIF_32BIT))
-			sigframe_size |= 1;
+			sigframe_size |= 1; /* XXX ???? */
 #endif
 	} else {
 		unsigned long psw = USER_PSW;
@@ -373,11 +343,11 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 		}
 
 		regs->gr[0] = psw;
-		regs->iaoq[0] = haddr | 3;
+		regs->iaoq[0] = haddr | PRIV_USER;
 		regs->iaoq[1] = regs->iaoq[0] + 4;
 	}
 
-	regs->gr[2]  = rp;                /* userland return pointer */
+	regs->gr[2]  = rp;			/* userland return pointer */
 	regs->gr[26] = ksig->sig;               /* signal number */
 	
 #ifdef CONFIG_64BIT
@@ -391,15 +361,15 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 		regs->gr[24] = A(&frame->uc);   /* ucontext pointer */
 	}
 	
-	DBG(1,"setup_rt_frame: making sigreturn frame: %#lx + %#lx = %#lx\n",
+	DBG(1, "%s: making sigreturn frame: %#lx + %#lx = %#lx\n", __func__,
 	       regs->gr[30], sigframe_size,
 	       regs->gr[30] + sigframe_size);
 	/* Raise the user stack pointer to make a proper call frame. */
 	regs->gr[30] = (A(frame) + sigframe_size);
 
 
-	DBG(1,"setup_rt_frame: sig deliver (%s,%d) frame=0x%p sp=%#lx iaoq=%#lx/%#lx rp=%#lx\n",
-	       current->comm, current->pid, frame, regs->gr[30],
+	DBG(1, "%s: sig deliver (%s,%d) frame=0x%p sp=%#lx iaoq=%#lx/%#lx rp=%#lx\n",
+	       __func__, current->comm, current->pid, frame, regs->gr[30],
 	       regs->iaoq[0], regs->iaoq[1], rp);
 
 	return 0;
@@ -415,8 +385,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs, int in_syscall)
 	int ret;
 	sigset_t *oldset = sigmask_to_save();
 
-	DBG(1,"handle_signal: sig=%ld, ka=%p, info=%p, oldset=%p, regs=%p\n",
-	       ksig->sig, ksig->ka, ksig->info, oldset, regs);
+	DBG(1, "%s: sig=%d, ka=%p, info=%p, oldset=%p, regs=%p\n",
+	       __func__, ksig->sig, &ksig->ka, &ksig->info, oldset, regs);
 	
 	/* Set up the stack frame */
 	ret = setup_rt_frame(ksig, oldset, regs, in_syscall);
@@ -424,8 +394,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs, int in_syscall)
 	signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP) ||
 			  test_thread_flag(TIF_BLOCKSTEP));
 
-	DBG(1,KERN_DEBUG "do_signal: Exit (success), regs->gr[28] = %ld\n",
-		regs->gr[28]);
+	DBG(1, "%s: Exit (success), regs->gr[28] = %ld\n",
+		__func__, regs->gr[28]);
 }
 
 /*
@@ -483,21 +453,27 @@ syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
 	if (regs->orig_r28)
 		return;
 	regs->orig_r28 = 1; /* no more restarts */
+
+	DBG(1, "%s:  orig_r28 = %ld  pid %d  r20 %ld\n",
+		__func__, regs->orig_r28, task_pid_nr(current), regs->gr[20]);
+
 	/* Check the return code */
 	switch (regs->gr[28]) {
 	case -ERESTART_RESTARTBLOCK:
 	case -ERESTARTNOHAND:
-		DBG(1,"ERESTARTNOHAND: returning -EINTR\n");
+		DBG(1, "%s: ERESTARTNOHAND: returning -EINTR\n", __func__);
 		regs->gr[28] = -EINTR;
 		break;
 	case -ERESTARTSYS:
 		if (!(ka->sa.sa_flags & SA_RESTART)) {
-			DBG(1,"ERESTARTSYS: putting -EINTR\n");
+			DBG(1, "%s: ERESTARTSYS: putting -EINTR pid %d\n",
+				__func__, task_pid_nr(current));
 			regs->gr[28] = -EINTR;
 			break;
 		}
 		fallthrough;
 	case -ERESTARTNOINTR:
+		DBG(1, "%s: %ld\n", __func__, regs->gr[28]);
 		check_syscallno_in_delay_branch(regs);
 		break;
 	}
@@ -509,50 +485,52 @@ insert_restart_trampoline(struct pt_regs *regs)
 	if (regs->orig_r28)
 		return;
 	regs->orig_r28 = 1; /* no more restarts */
-	switch(regs->gr[28]) {
+
+	DBG(2, "%s: gr28 = %ld pid %d\n",
+		__func__, regs->gr[28], task_pid_nr(current));
+
+	switch (regs->gr[28]) {
 	case -ERESTART_RESTARTBLOCK: {
 		/* Restart the system call - no handlers present */
 		unsigned int *usp = (unsigned int *)regs->gr[30];
-		unsigned long start = (unsigned long) &usp[2];
-		unsigned long end  = (unsigned long) &usp[5];
+		unsigned long rp;
 		long err = 0;
 
 		/* check that we don't exceed the stack */
 		if (A(&usp[0]) >= user_addr_max() - 5 * sizeof(int))
 			return;
 
-		/* Setup a trampoline to restart the syscall
-		 * with __NR_restart_syscall
+		/* Call trampoline in vdso to restart the syscall
+		 * with __NR_restart_syscall.
+		 * Original return addresses are on stack like this:
 		 *
 		 *  0: <return address (orig r31)>
 		 *  4: <2nd half for 64-bit>
-		 *  8: ldw 0(%sp), %r31
-		 * 12: be 0x100(%sr2, %r0)
-		 * 16: ldi __NR_restart_syscall, %r20
 		 */
 #ifdef CONFIG_64BIT
-		err |= put_user(regs->gr[31] >> 32, &usp[0]);
-		err |= put_user(regs->gr[31] & 0xffffffff, &usp[1]);
-		err |= put_user(0x0fc010df, &usp[2]);
-#else
-		err |= put_user(regs->gr[31], &usp[0]);
-		err |= put_user(0x0fc0109f, &usp[2]);
+		if (!is_compat_task()) {
+			err |= put_user(regs->gr[31] >> 32, &usp[0]);
+			err |= put_user(regs->gr[31] & 0xffffffff, &usp[1]);
+			rp = VDSO64_SYMBOL(current, restart_syscall);
+		} else
 #endif
-		err |= put_user(0xe0008200, &usp[3]);
-		err |= put_user(0x34140000, &usp[4]);
-
+		{
+			err |= put_user(regs->gr[31], &usp[0]);
+			rp = VDSO32_SYMBOL(current, restart_syscall);
+		}
 		WARN_ON(err);
 
-		/* flush data/instruction cache for new insns */
-		flush_user_dcache_range_asm(start, end);
-		flush_user_icache_range_asm(start, end);
-
-		regs->gr[31] = regs->gr[30] + 8;
+		regs->gr[31] = rp;
+		DBG(1, "%s: ERESTART_RESTARTBLOCK\n", __func__);
 		return;
 	}
+	case -EINTR:
+		/* ok, was handled before and should be returned. */
+		break;
 	case -ERESTARTNOHAND:
 	case -ERESTARTSYS:
 	case -ERESTARTNOINTR:
+		DBG(1, "%s: Type %ld\n", __func__, regs->gr[28]);
 		check_syscallno_in_delay_branch(regs);
 		return;
 	default:
@@ -567,30 +545,35 @@ insert_restart_trampoline(struct pt_regs *regs)
  * registers).  As noted below, the syscall number gets restored for
  * us due to the magic of delayed branching.
  */
-asmlinkage void
-do_signal(struct pt_regs *regs, long in_syscall)
+static void do_signal(struct pt_regs *regs, long in_syscall)
 {
 	struct ksignal ksig;
+	int restart_syscall;
+	bool has_handler;
 
-	DBG(1,"\ndo_signal: regs=0x%p, sr7 %#lx, in_syscall=%d\n",
-	       regs, regs->sr[7], in_syscall);
+	has_handler = get_signal(&ksig);
 
-	if (get_signal(&ksig)) {
-		DBG(3,"do_signal: signr = %d, regs->gr[28] = %ld\n", signr, regs->gr[28]);
+	restart_syscall = 0;
+	if (in_syscall)
+		restart_syscall = 1;
+
+	if (has_handler) {
 		/* Restart a system call if necessary. */
-		if (in_syscall)
+		if (restart_syscall)
 			syscall_restart(regs, &ksig.ka);
 
 		handle_signal(&ksig, regs, in_syscall);
+		DBG(1, "%s: Handled signal pid %d\n",
+			__func__, task_pid_nr(current));
 		return;
 	}
 
-	/* Did we come from a system call? */
-	if (in_syscall)
+	/* Do we need to restart the system call? */
+	if (restart_syscall)
 		insert_restart_trampoline(regs);
 	
-	DBG(1,"do_signal: Exit (not delivered), regs->gr[28] = %ld\n", 
-		regs->gr[28]);
+	DBG(1, "%s: Exit (not delivered), regs->gr[28] = %ld  orig_r28 = %ld  pid %d\n",
+		__func__, regs->gr[28], regs->orig_r28, task_pid_nr(current));
 
 	restore_saved_sigmask();
 }
diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h
index f166250f2d06..c03eb1ed4c53 100644
--- a/arch/parisc/kernel/signal32.h
+++ b/arch/parisc/kernel/signal32.h
@@ -36,21 +36,12 @@ struct compat_regfile {
         compat_int_t rf_sar;
 };
 
-#define COMPAT_SIGRETURN_TRAMP 4
-#define COMPAT_SIGRESTARTBLOCK_TRAMP 5
-#define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \
-				COMPAT_SIGRESTARTBLOCK_TRAMP)
-
 struct compat_rt_sigframe {
-        /* XXX: Must match trampoline size in arch/parisc/kernel/signal.c
-                Secondary to that it must protect the ERESTART_RESTARTBLOCK
-                trampoline we left on the stack (we were bad and didn't
-                change sp so we could run really fast.) */
-        compat_uint_t tramp[COMPAT_TRAMP_SIZE];
-        compat_siginfo_t info;
-        struct compat_ucontext uc;
-        /* Hidden location of truncated registers, *must* be last. */
-        struct compat_regfile regs;
+	unsigned int tramp[2]; /* holds original return address */
+	compat_siginfo_t info;
+	struct compat_ucontext uc;
+	/* Hidden location of truncated registers, *must* be last. */
+	struct compat_regfile regs;
 };
 
 /*
diff --git a/arch/parisc/kernel/topology.c b/arch/parisc/kernel/topology.c
index 0a10e4ddc528..e88a6ce7c96d 100644
--- a/arch/parisc/kernel/topology.c
+++ b/arch/parisc/kernel/topology.c
@@ -101,8 +101,8 @@ void __init store_cpu_topology(unsigned int cpuid)
 
 	update_siblings_masks(cpuid);
 
-	pr_info("CPU%u: thread %d, cpu %d, socket %d\n",
-		cpuid, cpu_topology[cpuid].thread_id,
+	pr_info("CPU%u: cpu core %d of socket %d\n",
+		cpuid,
 		cpu_topology[cpuid].core_id,
 		cpu_topology[cpuid].socket_id);
 }
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index b6fdebddc8e9..a6e61cf2cad0 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -302,7 +302,10 @@ static void handle_break(struct pt_regs *regs)
 		parisc_kprobe_break_handler(regs);
 		return;
 	}
-
+	if (unlikely(iir == PARISC_KPROBES_BREAK_INSN2)) {
+		parisc_kprobe_ss_handler(regs);
+		return;
+	}
 #endif
 
 #ifdef CONFIG_KGDB
@@ -539,11 +542,6 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
 		/* Recovery counter trap */
 		regs->gr[0] &= ~PSW_R;
 
-#ifdef CONFIG_KPROBES
-		if (parisc_kprobe_ss_handler(regs))
-			return;
-#endif
-
 #ifdef CONFIG_KGDB
 		if (kgdb_single_step) {
 			kgdb_handle_exception(0, SIGTRAP, 0, regs);
@@ -662,6 +660,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
 			 by hand. Technically we need to emulate:
 			 fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw
 		*/
+		if (code == 17 && handle_nadtlb_fault(regs))
+			return;
 		fault_address = regs->ior;
 		fault_space = regs->isr;
 		break;
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 286cec4d86d7..ed1e88a74dc4 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -3,14 +3,11 @@
  *    Unaligned memory access handler
  *
  *    Copyright (C) 2001 Randolph Chung <[email protected]>
+ *    Copyright (C) 2022 Helge Deller <[email protected]>
  *    Significantly tweaked by LaMont Jones <[email protected]>
  */
 
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/sched/signal.h>
-#include <linux/sched/debug.h>
 #include <linux/signal.h>
 #include <linux/ratelimit.h>
 #include <linux/uaccess.h>
@@ -25,18 +22,7 @@
 #define DPRINTF(fmt, args...)
 #endif
 
-#ifdef CONFIG_64BIT
-#define RFMT "%016lx"
-#else
-#define RFMT "%08lx"
-#endif
-
-#define FIXUP_BRANCH(lbl) \
-	"\tldil L%%" #lbl ", %%r1\n"			\
-	"\tldo R%%" #lbl "(%%r1), %%r1\n"		\
-	"\tbv,n %%r0(%%r1)\n"
-/* If you use FIXUP_BRANCH, then you must list this clobber */
-#define FIXUP_BRANCH_CLOBBER "r1"
+#define RFMT "%#08lx"
 
 /* 1111 1100 0000 0000 0001 0011 1100 0000 */
 #define OPCODE1(a,b,c)	((a)<<26|(b)<<12|(c)<<6) 
@@ -114,37 +100,30 @@
 #define IM14(i) IM((i),14)
 
 #define ERR_NOTHANDLED	-1
-#define ERR_PAGEFAULT	-2
 
 int unaligned_enabled __read_mostly = 1;
 
 static int emulate_ldh(struct pt_regs *regs, int toreg)
 {
 	unsigned long saddr = regs->ior;
-	unsigned long val = 0;
-	int ret;
+	unsigned long val = 0, temp1;
+	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	DPRINTF("load " RFMT ":" RFMT " to r%d for 2 bytes\n", 
 		regs->isr, regs->ior, toreg);
 
 	__asm__ __volatile__  (
 "	mtsp	%4, %%sr1\n"
-"1:	ldbs	0(%%sr1,%3), %%r20\n"
+"1:	ldbs	0(%%sr1,%3), %2\n"
 "2:	ldbs	1(%%sr1,%3), %0\n"
-"	depw	%%r20, 23, 24, %0\n"
-"	copy	%%r0, %1\n"
+"	depw	%2, 23, 24, %0\n"
 "3:	\n"
-"	.section .fixup,\"ax\"\n"
-"4:	ldi	-2, %1\n"
-	FIXUP_BRANCH(3b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b, 4b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b, 4b)
-	: "=r" (val), "=r" (ret)
-	: "0" (val), "r" (saddr), "r" (regs->isr)
-	: "r20", FIXUP_BRANCH_CLOBBER );
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+	: "+r" (val), "+r" (ret), "=&r" (temp1)
+	: "r" (saddr), "r" (regs->isr) );
 
-	DPRINTF("val = 0x" RFMT "\n", val);
+	DPRINTF("val = " RFMT "\n", val);
 
 	if (toreg)
 		regs->gr[toreg] = val;
@@ -155,34 +134,28 @@ static int emulate_ldh(struct pt_regs *regs, int toreg)
 static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
 {
 	unsigned long saddr = regs->ior;
-	unsigned long val = 0;
-	int ret;
+	unsigned long val = 0, temp1, temp2;
+	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	DPRINTF("load " RFMT ":" RFMT " to r%d for 4 bytes\n", 
 		regs->isr, regs->ior, toreg);
 
 	__asm__ __volatile__  (
-"	zdep	%3,28,2,%%r19\n"		/* r19=(ofs&3)*8 */
-"	mtsp	%4, %%sr1\n"
-"	depw	%%r0,31,2,%3\n"
-"1:	ldw	0(%%sr1,%3),%0\n"
-"2:	ldw	4(%%sr1,%3),%%r20\n"
-"	subi	32,%%r19,%%r19\n"
-"	mtctl	%%r19,11\n"
-"	vshd	%0,%%r20,%0\n"
-"	copy	%%r0, %1\n"
+"	zdep	%4,28,2,%2\n"		/* r19=(ofs&3)*8 */
+"	mtsp	%5, %%sr1\n"
+"	depw	%%r0,31,2,%4\n"
+"1:	ldw	0(%%sr1,%4),%0\n"
+"2:	ldw	4(%%sr1,%4),%3\n"
+"	subi	32,%4,%2\n"
+"	mtctl	%2,11\n"
+"	vshd	%0,%3,%0\n"
 "3:	\n"
-"	.section .fixup,\"ax\"\n"
-"4:	ldi	-2, %1\n"
-	FIXUP_BRANCH(3b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b, 4b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b, 4b)
-	: "=r" (val), "=r" (ret)
-	: "0" (val), "r" (saddr), "r" (regs->isr)
-	: "r19", "r20", FIXUP_BRANCH_CLOBBER );
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+	: "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2)
+	: "r" (saddr), "r" (regs->isr) );
 
-	DPRINTF("val = 0x" RFMT "\n", val);
+	DPRINTF("val = " RFMT "\n", val);
 
 	if (flop)
 		((__u32*)(regs->fr))[toreg] = val;
@@ -195,16 +168,15 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
 {
 	unsigned long saddr = regs->ior;
 	__u64 val = 0;
-	int ret;
+	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n", 
 		regs->isr, regs->ior, toreg);
-#ifdef CONFIG_PA20
 
-#ifndef CONFIG_64BIT
-	if (!flop)
-		return -1;
-#endif
+	if (!IS_ENABLED(CONFIG_64BIT) && !flop)
+		return ERR_NOTHANDLED;
+
+#ifdef CONFIG_64BIT
 	__asm__ __volatile__  (
 "	depd,z	%3,60,3,%%r19\n"		/* r19=(ofs&7)*8 */
 "	mtsp	%4, %%sr1\n"
@@ -214,44 +186,32 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
 "	subi	64,%%r19,%%r19\n"
 "	mtsar	%%r19\n"
 "	shrpd	%0,%%r20,%%sar,%0\n"
-"	copy	%%r0, %1\n"
 "3:	\n"
-"	.section .fixup,\"ax\"\n"
-"4:	ldi	-2, %1\n"
-	FIXUP_BRANCH(3b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b,4b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,4b)
-	: "=r" (val), "=r" (ret)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+	: "=r" (val), "+r" (ret)
 	: "0" (val), "r" (saddr), "r" (regs->isr)
-	: "r19", "r20", FIXUP_BRANCH_CLOBBER );
+	: "r19", "r20" );
 #else
     {
-	unsigned long valh=0,vall=0;
+	unsigned long shift, temp1;
 	__asm__ __volatile__  (
-"	zdep	%5,29,2,%%r19\n"		/* r19=(ofs&3)*8 */
-"	mtsp	%6, %%sr1\n"
-"	dep	%%r0,31,2,%5\n"
-"1:	ldw	0(%%sr1,%5),%0\n"
-"2:	ldw	4(%%sr1,%5),%1\n"
-"3:	ldw	8(%%sr1,%5),%%r20\n"
-"	subi	32,%%r19,%%r19\n"
-"	mtsar	%%r19\n"
-"	vshd	%0,%1,%0\n"
-"	vshd	%1,%%r20,%1\n"
-"	copy	%%r0, %2\n"
+"	zdep	%2,29,2,%3\n"		/* r19=(ofs&3)*8 */
+"	mtsp	%5, %%sr1\n"
+"	dep	%%r0,31,2,%2\n"
+"1:	ldw	0(%%sr1,%2),%0\n"
+"2:	ldw	4(%%sr1,%2),%R0\n"
+"3:	ldw	8(%%sr1,%2),%4\n"
+"	subi	32,%3,%3\n"
+"	mtsar	%3\n"
+"	vshd	%0,%R0,%0\n"
+"	vshd	%R0,%4,%R0\n"
 "4:	\n"
-"	.section .fixup,\"ax\"\n"
-"5:	ldi	-2, %2\n"
-	FIXUP_BRANCH(4b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b,5b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,5b)
-	ASM_EXCEPTIONTABLE_ENTRY(3b,5b)
-	: "=r" (valh), "=r" (vall), "=r" (ret)
-	: "0" (valh), "1" (vall), "r" (saddr), "r" (regs->isr)
-	: "r19", "r20", FIXUP_BRANCH_CLOBBER );
-	val=((__u64)valh<<32)|(__u64)vall;
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b)
+	: "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
+	: "r" (regs->isr) );
     }
 #endif
 
@@ -267,31 +227,25 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
 
 static int emulate_sth(struct pt_regs *regs, int frreg)
 {
-	unsigned long val = regs->gr[frreg];
-	int ret;
+	unsigned long val = regs->gr[frreg], temp1;
+	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	if (!frreg)
 		val = 0;
 
-	DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 2 bytes\n", frreg, 
+	DPRINTF("store r%d (" RFMT ") to " RFMT ":" RFMT " for 2 bytes\n", frreg,
 		val, regs->isr, regs->ior);
 
 	__asm__ __volatile__ (
-"	mtsp %3, %%sr1\n"
-"	extrw,u %1, 23, 8, %%r19\n"
-"1:	stb %1, 1(%%sr1, %2)\n"
-"2:	stb %%r19, 0(%%sr1, %2)\n"
-"	copy	%%r0, %0\n"
+"	mtsp %4, %%sr1\n"
+"	extrw,u %2, 23, 8, %1\n"
+"1:	stb %1, 0(%%sr1, %3)\n"
+"2:	stb %2, 1(%%sr1, %3)\n"
 "3:	\n"
-"	.section .fixup,\"ax\"\n"
-"4:	ldi	-2, %0\n"
-	FIXUP_BRANCH(3b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b,4b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,4b)
-	: "=r" (ret)
-	: "r" (val), "r" (regs->ior), "r" (regs->isr)
-	: "r19", FIXUP_BRANCH_CLOBBER );
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+	: "+r" (ret), "=&r" (temp1)
+	: "r" (val), "r" (regs->ior), "r" (regs->isr) );
 
 	return ret;
 }
@@ -299,7 +253,7 @@ static int emulate_sth(struct pt_regs *regs, int frreg)
 static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
 {
 	unsigned long val;
-	int ret;
+	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	if (flop)
 		val = ((__u32*)(regs->fr))[frreg];
@@ -308,7 +262,7 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
 	else
 		val = 0;
 
-	DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 4 bytes\n", frreg, 
+	DPRINTF("store r%d (" RFMT ") to " RFMT ":" RFMT " for 4 bytes\n", frreg,
 		val, regs->isr, regs->ior);
 
 
@@ -328,24 +282,19 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
 "	or	%%r1, %%r21, %%r21\n"
 "	stw	%%r20,0(%%sr1,%2)\n"
 "	stw	%%r21,4(%%sr1,%2)\n"
-"	copy	%%r0, %0\n"
 "3:	\n"
-"	.section .fixup,\"ax\"\n"
-"4:	ldi	-2, %0\n"
-	FIXUP_BRANCH(3b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b,4b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,4b)
-	: "=r" (ret)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+	: "+r" (ret)
 	: "r" (val), "r" (regs->ior), "r" (regs->isr)
-	: "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER );
+	: "r19", "r20", "r21", "r22", "r1" );
 
 	return ret;
 }
 static int emulate_std(struct pt_regs *regs, int frreg, int flop)
 {
 	__u64 val;
-	int ret;
+	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	if (flop)
 		val = regs->fr[frreg];
@@ -357,11 +306,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
 	DPRINTF("store r%d (0x%016llx) to " RFMT ":" RFMT " for 8 bytes\n", frreg, 
 		val,  regs->isr, regs->ior);
 
-#ifdef CONFIG_PA20
-#ifndef CONFIG_64BIT
-	if (!flop)
-		return -1;
-#endif
+	if (!IS_ENABLED(CONFIG_64BIT) && !flop)
+		return ERR_NOTHANDLED;
+
+#ifdef CONFIG_64BIT
 	__asm__ __volatile__ (
 "	mtsp %3, %%sr1\n"
 "	depd,z	%2, 60, 3, %%r19\n"
@@ -378,19 +326,14 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
 "	or	%%r1, %%r21, %%r21\n"
 "3:	std	%%r20,0(%%sr1,%2)\n"
 "4:	std	%%r21,8(%%sr1,%2)\n"
-"	copy	%%r0, %0\n"
 "5:	\n"
-"	.section .fixup,\"ax\"\n"
-"6:	ldi	-2, %0\n"
-	FIXUP_BRANCH(5b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b,6b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,6b)
-	ASM_EXCEPTIONTABLE_ENTRY(3b,6b)
-	ASM_EXCEPTIONTABLE_ENTRY(4b,6b)
-	: "=r" (ret)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b)
+	: "+r" (ret)
 	: "r" (val), "r" (regs->ior), "r" (regs->isr)
-	: "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER );
+	: "r19", "r20", "r21", "r22", "r1" );
 #else
     {
 	unsigned long valh=(val>>32),vall=(val&0xffffffffl);
@@ -412,20 +355,15 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
 "3:	stw	%1,0(%%sr1,%3)\n"
 "4:	stw	%%r1,4(%%sr1,%3)\n"
 "5:	stw	%2,8(%%sr1,%3)\n"
-"	copy	%%r0, %0\n"
 "6:	\n"
-"	.section .fixup,\"ax\"\n"
-"7:	ldi	-2, %0\n"
-	FIXUP_BRANCH(6b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b,7b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,7b)
-	ASM_EXCEPTIONTABLE_ENTRY(3b,7b)
-	ASM_EXCEPTIONTABLE_ENTRY(4b,7b)
-	ASM_EXCEPTIONTABLE_ENTRY(5b,7b)
-	: "=r" (ret)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b)
+	: "+r" (ret)
 	: "r" (valh), "r" (vall), "r" (regs->ior), "r" (regs->isr)
-	: "r19", "r20", "r21", "r1", FIXUP_BRANCH_CLOBBER );
+	: "r19", "r20", "r21", "r1" );
     }
 #endif
 
@@ -438,7 +376,6 @@ void handle_unaligned(struct pt_regs *regs)
 	unsigned long newbase = R1(regs->iir)?regs->gr[R1(regs->iir)]:0;
 	int modify = 0;
 	int ret = ERR_NOTHANDLED;
-	register int flop=0;	/* true if this is a flop */
 
 	__inc_irq_stat(irq_unaligned_count);
 
@@ -450,10 +387,10 @@ void handle_unaligned(struct pt_regs *regs)
 
 		if (!(current->thread.flags & PARISC_UAC_NOPRINT) &&
 			__ratelimit(&ratelimit)) {
-			char buf[256];
-			sprintf(buf, "%s(%d): unaligned access to 0x" RFMT " at ip=0x" RFMT "\n",
-				current->comm, task_pid_nr(current), regs->ior, regs->iaoq[0]);
-			printk(KERN_WARNING "%s", buf);
+			printk(KERN_WARNING "%s(%d): unaligned access to " RFMT
+				" at ip " RFMT " (iir " RFMT ")\n",
+				current->comm, task_pid_nr(current), regs->ior,
+				regs->iaoq[0], regs->iir);
 #ifdef DEBUG_UNALIGNED
 			show_regs(regs);
 #endif		
@@ -547,7 +484,7 @@ void handle_unaligned(struct pt_regs *regs)
 		ret = emulate_stw(regs, R2(regs->iir),0);
 		break;
 
-#ifdef CONFIG_PA20
+#ifdef CONFIG_64BIT
 	case OPCODE_LDD_I:
 	case OPCODE_LDDA_I:
 	case OPCODE_LDD_S:
@@ -565,13 +502,11 @@ void handle_unaligned(struct pt_regs *regs)
 	case OPCODE_FLDWS:
 	case OPCODE_FLDWXR:
 	case OPCODE_FLDWSR:
-		flop=1;
 		ret = emulate_ldw(regs,FR3(regs->iir),1);
 		break;
 
 	case OPCODE_FLDDX:
 	case OPCODE_FLDDS:
-		flop=1;
 		ret = emulate_ldd(regs,R3(regs->iir),1);
 		break;
 
@@ -579,13 +514,11 @@ void handle_unaligned(struct pt_regs *regs)
 	case OPCODE_FSTWS:
 	case OPCODE_FSTWXR:
 	case OPCODE_FSTWSR:
-		flop=1;
 		ret = emulate_stw(regs,FR3(regs->iir),1);
 		break;
 
 	case OPCODE_FSTDX:
 	case OPCODE_FSTDS:
-		flop=1;
 		ret = emulate_std(regs,R3(regs->iir),1);
 		break;
 
@@ -599,14 +532,12 @@ void handle_unaligned(struct pt_regs *regs)
 	switch (regs->iir & OPCODE2_MASK)
 	{
 	case OPCODE_FLDD_L:
-		flop=1;
 		ret = emulate_ldd(regs,R2(regs->iir),1);
 		break;
 	case OPCODE_FSTD_L:
-		flop=1;
 		ret = emulate_std(regs, R2(regs->iir),1);
 		break;
-#ifdef CONFIG_PA20
+#ifdef CONFIG_64BIT
 	case OPCODE_LDD_L:
 		ret = emulate_ldd(regs, R2(regs->iir),0);
 		break;
@@ -618,7 +549,6 @@ void handle_unaligned(struct pt_regs *regs)
 	switch (regs->iir & OPCODE3_MASK)
 	{
 	case OPCODE_FLDW_L:
-		flop=1;
 		ret = emulate_ldw(regs, R2(regs->iir), 1);
 		break;
 	case OPCODE_LDW_M:
@@ -626,7 +556,6 @@ void handle_unaligned(struct pt_regs *regs)
 		break;
 
 	case OPCODE_FSTW_L:
-		flop=1;
 		ret = emulate_stw(regs, R2(regs->iir),1);
 		break;
 	case OPCODE_STW_M:
@@ -673,7 +602,7 @@ void handle_unaligned(struct pt_regs *regs)
 		printk(KERN_CRIT "Unaligned handler failed, ret = %d\n", ret);
 		die_if_kernel("Unaligned data reference", regs, 28);
 
-		if (ret == ERR_PAGEFAULT)
+		if (ret == -EFAULT)
 		{
 			force_sig_fault(SIGSEGV, SEGV_MAPERR,
 					(void __user *)regs->ior);
diff --git a/arch/parisc/kernel/vdso.c b/arch/parisc/kernel/vdso.c
new file mode 100644
index 000000000000..63dc44c4c246
--- /dev/null
+++ b/arch/parisc/kernel/vdso.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (c) 2022 Helge Deller <[email protected]>
+ *
+ *  based on arch/s390/kernel/vdso.c which is
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky ([email protected])
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/elf.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/compat.h>
+#include <linux/nsproxy.h>
+#include <linux/time_namespace.h>
+#include <linux/random.h>
+
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+#include <asm/vdso.h>
+#include <asm/cacheflush.h>
+
+extern char vdso32_start, vdso32_end;
+extern char vdso64_start, vdso64_end;
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		       struct vm_area_struct *vma)
+{
+	current->mm->context.vdso_base = vma->vm_start;
+	return 0;
+}
+
+#ifdef CONFIG_64BIT
+static struct vm_special_mapping vdso64_mapping = {
+	.name = "[vdso]",
+	.mremap = vdso_mremap,
+};
+#endif
+
+static struct vm_special_mapping vdso32_mapping = {
+	.name = "[vdso]",
+	.mremap = vdso_mremap,
+};
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm,
+				int executable_stack)
+{
+
+	unsigned long vdso_text_start, vdso_text_len, map_base;
+	struct vm_special_mapping *vdso_mapping;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	int rc;
+
+	if (mmap_write_lock_killable(mm))
+		return -EINTR;
+
+#ifdef CONFIG_64BIT
+	if (!is_compat_task()) {
+		vdso_text_len = &vdso64_end - &vdso64_start;
+		vdso_mapping = &vdso64_mapping;
+	} else
+#endif
+	{
+		vdso_text_len = &vdso32_end - &vdso32_start;
+		vdso_mapping = &vdso32_mapping;
+	}
+
+	map_base = mm->mmap_base;
+	if (current->flags & PF_RANDOMIZE)
+		map_base -= (get_random_int() & 0x1f) * PAGE_SIZE;
+
+	vdso_text_start = get_unmapped_area(NULL, map_base, vdso_text_len, 0, 0);
+
+	/* VM_MAYWRITE for COW so gdb can set breakpoints */
+	vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
+				       VM_READ|VM_EXEC|
+				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+				       vdso_mapping);
+	if (IS_ERR(vma)) {
+		do_munmap(mm, vdso_text_start, PAGE_SIZE, NULL);
+		rc = PTR_ERR(vma);
+	} else {
+		current->mm->context.vdso_base = vdso_text_start;
+		rc = 0;
+	}
+
+	mmap_write_unlock(mm);
+	return rc;
+}
+
+static struct page ** __init vdso_setup_pages(void *start, void *end)
+{
+	int pages = (end - start) >> PAGE_SHIFT;
+	struct page **pagelist;
+	int i;
+
+	pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
+	if (!pagelist)
+		panic("%s: Cannot allocate page list for VDSO", __func__);
+	for (i = 0; i < pages; i++)
+		pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
+	return pagelist;
+}
+
+static int __init vdso_init(void)
+{
+#ifdef CONFIG_64BIT
+	vdso64_mapping.pages = vdso_setup_pages(&vdso64_start, &vdso64_end);
+#endif
+	if (IS_ENABLED(CONFIG_COMPAT) || !IS_ENABLED(CONFIG_64BIT))
+		vdso32_mapping.pages = vdso_setup_pages(&vdso32_start, &vdso32_end);
+	return 0;
+}
+arch_initcall(vdso_init);
diff --git a/arch/parisc/kernel/vdso32/Makefile b/arch/parisc/kernel/vdso32/Makefile
new file mode 100644
index 000000000000..85b1c6d261d1
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/Makefile
@@ -0,0 +1,53 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso32 = note.o sigtramp.o restart_syscall.o
+
+# Build rules
+
+targets := $(obj-vdso32) vdso32.so
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+ccflags-y := -shared -fno-common -fbuiltin -mno-fast-indirect-calls -O2 -mno-long-calls
+#  -march=1.1 -mschedule=7100LC
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
+		$(call ld-option, -Wl$(comma)--hash-style=sysv)
+asflags-y := -D__VDSO32__ -s
+
+KBUILD_AFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
+
+VDSO_LIBGCC := $(shell $(CROSS32CC) -print-libgcc-file-name)
+
+obj-y += vdso32_wrapper.o
+extra-y += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C  #  -U$(ARCH)
+
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so FORCE
+
+# Force dependency (incbin is bad)
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) $(obj-cvdso32) $(VDSO_LIBGCC)
+	$(call if_changed,vdso32ld)
+
+# assembly rules for the .S files
+$(obj-vdso32): %.o: %.S FORCE
+	$(call if_changed_dep,vdso32as)
+
+$(obj-cvdso32): %.o: %.c FORCE
+	$(call if_changed_dep,vdso32cc)
+
+# actual build commands
+quiet_cmd_vdso32ld = VDSO32L $@
+      cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso32as = VDSO32A $@
+      cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdso32cc = VDSO32C $@
+      cmd_vdso32cc = $(CROSS32CC) $(c_flags) -c -fPIC -mno-fast-indirect-calls -o $@ $<
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+	cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso32-offsets.h: $(obj)/vdso32.so FORCE
+	$(call if_changed,vdsosym)
diff --git a/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh b/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh
new file mode 100755
index 000000000000..da39d6cff7f1
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Inspired by arm64 version.
+#
+
+LC_ALL=C
+sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso32_offset_\2\t0x\1/p'
diff --git a/arch/parisc/kernel/vdso32/note.S b/arch/parisc/kernel/vdso32/note.S
new file mode 100644
index 000000000000..bb350918bebd
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/note.S
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+
+#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type)			      \
+	.section name, flags;						      \
+	.balign 4;							      \
+	.long 1f - 0f;		/* name length */			      \
+	.long 3f - 2f;		/* data length */			      \
+	.long type;		/* note type */				      \
+0:	.asciz vendor;		/* vendor name */			      \
+1:	.balign 4;							      \
+2:
+
+#define ASM_ELF_NOTE_END						      \
+3:	.balign 4;		/* pad out section */			      \
+	.previous
+
+	ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
+	.long LINUX_VERSION_CODE
+	ASM_ELF_NOTE_END
diff --git a/arch/parisc/kernel/vdso32/restart_syscall.S b/arch/parisc/kernel/vdso32/restart_syscall.S
new file mode 100644
index 000000000000..7e82008d7e40
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/restart_syscall.S
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Syscall restart trampoline for 32 and 64 bits processes.
+ *
+ * Copyright (C) 2018-2022 Helge Deller <[email protected]>
+ * Copyright (C) 2022 John David Anglin <[email protected]>
+ */
+
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+#include <linux/linkage.h>
+
+	.text
+
+ENTRY_CFI(__kernel_restart_syscall)
+	/*
+	 * Setup a trampoline to restart the syscall
+	 * with __NR_restart_syscall
+	 */
+
+	/* load return pointer */
+#if defined(__VDSO64__)
+	ldd	0(%sp), %r31
+#elif defined(__VDSO32__)
+	ldw	0(%sp), %r31
+#endif
+
+	be	0x100(%sr2, %r0)
+	ldi	__NR_restart_syscall, %r20
+
+ENDPROC_CFI(__kernel_restart_syscall)
diff --git a/arch/parisc/kernel/vdso32/sigtramp.S b/arch/parisc/kernel/vdso32/sigtramp.S
new file mode 100644
index 000000000000..192da7077869
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/sigtramp.S
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Signal trampolines for 32 bit processes.
+ *
+ * Copyright (C) 2006 Randolph Chung <[email protected]>
+ * Copyright (C) 2018-2022 Helge Deller <[email protected]>
+ * Copyright (C) 2022 John David Anglin <[email protected]>
+ */
+#include <asm/unistd.h>
+#include <linux/linkage.h>
+#include <generated/asm-offsets.h>
+
+	.text
+
+/* Gdb expects the trampoline is on the stack and the pc is offset from
+   a 64-byte boundary by 0, 4 or 5 instructions. Since the vdso trampoline
+   is not on the stack, we need a new variant with different offsets and
+   data to tell gdb where to find the signal context on the stack.
+
+   Here we put the offset to the context data at the start of the trampoline
+   region and offset the first trampoline by 2 instructions. Please do
+   not change the trampoline as the code in gdb depends on the following
+   instruction sequence exactly.
+ */
+	.align 64
+	.word SIGFRAME_CONTEXT_REGS32
+
+/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
+   the return address to get an address in the middle of the presumed
+   call instruction. Since we don't have a call here, we artifically
+   extend the range covered by the unwind info by adding a nop before
+   the real start.
+ */
+	nop
+
+	.globl __kernel_sigtramp_rt
+	.type __kernel_sigtramp_rt, @function
+__kernel_sigtramp_rt:
+	.proc
+	.callinfo FRAME=ASM_SIGFRAME_SIZE32,CALLS,SAVE_RP
+	.entry
+
+.Lsigrt_start = . - 4
+0:	ldi	0, %r25			/* (in_syscall=0) */
+	ldi  __NR_rt_sigreturn, %r20
+	ble  0x100(%sr2, %r0)
+	nop
+
+1:	ldi	1, %r25			/* (in_syscall=1) */
+	ldi  __NR_rt_sigreturn, %r20
+	ble  0x100(%sr2, %r0)
+	nop
+.Lsigrt_end:
+	.exit
+	.procend
+	.size __kernel_sigtramp_rt,.-__kernel_sigtramp_rt
+
+
+	.section .eh_frame,"a",@progbits
+
+/* This is where the mcontext_t struct can be found on the stack.  */
+#define PTREGS SIGFRAME_CONTEXT_REGS32	/* 32-bit process offset is -672 */
+
+/* Register REGNO can be found at offset OFS of the mcontext_t structure. */
+	.macro rsave regno,ofs
+	.byte 0x05		/* DW_CFA_offset_extended */
+	.uleb128 \regno;	/*   regno */
+	.uleb128 \ofs		/*   factored offset */
+	.endm
+
+.Lcie:
+	.long .Lcie_end - .Lcie_start
+.Lcie_start:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.stringz "zRS"		/* NUL-terminated augmentation string */
+	.uleb128 4		/* Code alignment factor */
+	.sleb128 4		/* Data alignment factor */
+	.byte 89		/* Return address register column, iaoq[0] */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x1b		/* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
+	.byte 0x0f		/* DW_CFA_def_cfa_expresion */
+	.uleb128 9f - 1f	/*   length */
+1:
+	.byte 0x8e		/*   DW_OP_breg30 */
+	.sleb128 PTREGS
+9:
+	.balign 4
+.Lcie_end:
+
+	.long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+	.long .Lfde0_start - .Lcie	/* CIE pointer. */
+	.long .Lsigrt_start - .		/* PC start, length */
+	.long .Lsigrt_end - .Lsigrt_start
+	.uleb128 0			/* Augmentation */
+
+	/* General registers */
+	rsave  1,  2
+	rsave  2,  3
+	rsave  3,  4
+	rsave  4,  5
+	rsave  5,  6
+	rsave  6,  7
+	rsave  7,  8
+	rsave  8,  9
+	rsave  9, 10
+	rsave 10, 11
+	rsave 11, 12
+	rsave 12, 13
+	rsave 13, 14
+	rsave 14, 15
+	rsave 15, 16
+	rsave 16, 17
+	rsave 17, 18
+	rsave 18, 19
+	rsave 19, 20
+	rsave 20, 21
+	rsave 21, 22
+	rsave 22, 23
+	rsave 23, 24
+	rsave 24, 25
+	rsave 25, 26
+	rsave 26, 27
+	rsave 27, 28
+	rsave 28, 29
+	rsave 29, 30
+	rsave 30, 31
+	rsave 31, 32
+
+	/* Floating-point registers */
+	rsave 32, 42
+	rsave 33, 43
+	rsave 34, 44
+	rsave 35, 45
+	rsave 36, 46
+	rsave 37, 47
+	rsave 38, 48
+	rsave 39, 49
+	rsave 40, 50
+	rsave 41, 51
+	rsave 42, 52
+	rsave 43, 53
+	rsave 44, 54
+	rsave 45, 55
+	rsave 46, 56
+	rsave 47, 57
+	rsave 48, 58
+	rsave 49, 59
+	rsave 50, 60
+	rsave 51, 61
+	rsave 52, 62
+	rsave 53, 63
+	rsave 54, 64
+	rsave 55, 65
+	rsave 56, 66
+	rsave 57, 67
+	rsave 58, 68
+	rsave 59, 69
+	rsave 60, 70
+	rsave 61, 71
+	rsave 62, 72
+	rsave 63, 73
+	rsave 64, 74
+	rsave 65, 75
+	rsave 66, 76
+	rsave 67, 77
+	rsave 68, 78
+	rsave 69, 79
+	rsave 70, 80
+	rsave 71, 81
+	rsave 72, 82
+	rsave 73, 83
+	rsave 74, 84
+	rsave 75, 85
+	rsave 76, 86
+	rsave 77, 87
+	rsave 78, 88
+	rsave 79, 89
+	rsave 80, 90
+	rsave 81, 91
+	rsave 82, 92
+	rsave 83, 93
+	rsave 84, 94
+	rsave 85, 95
+	rsave 86, 96
+	rsave 87, 97
+
+	/* SAR register */
+	rsave 88, 102
+
+	/* iaoq[0] return address register */
+	rsave 89, 100
+	.balign 4
+.Lfde0_end:
diff --git a/arch/parisc/kernel/vdso32/vdso32.lds.S b/arch/parisc/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 000000000000..d4aff3af5262
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 32 bits vdso library
+ */
+#include <asm/vdso.h>
+#include <asm/page.h>
+
+/* Default link addresses for the vDSOs */
+OUTPUT_FORMAT("elf32-hppa-linux")
+OUTPUT_ARCH(hppa)
+ENTRY(_start)
+
+SECTIONS
+{
+  . = VDSO_LBASE + SIZEOF_HEADERS;
+  .hash           : { *(.hash) }			:text
+  .gnu.hash       : { *(.gnu.hash) }
+  .dynsym         : { *(.dynsym) }
+  .dynstr         : { *(.dynstr) }
+  .gnu.version    : { *(.gnu.version) }
+  .gnu.version_d  : { *(.gnu.version_d) }
+  .gnu.version_r  : { *(.gnu.version_r) }
+
+  .note		  : { *(.note.*) } 			:text	:note
+
+  . = ALIGN (16);
+  .text :
+  {
+    *(.text .stub .text.* .gnu.linkonce.t.*)
+  }
+  PROVIDE (__etext = .);
+  PROVIDE (_etext = .);
+  PROVIDE (etext = .);
+
+  /* Other stuff is appended to the text segment: */
+  .rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+  .rodata1		: { *(.rodata1) }
+  .rodata2		: { *(.data.rel.ro) }
+
+  .eh_frame_hdr		: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+  .eh_frame		: { KEEP (*(.eh_frame)) }	:text
+  .gcc_except_table	: { *(.gcc_except_table) }
+  .fixup		: { *(.fixup) }
+
+  .dynamic		: { *(.dynamic) }		:text	:dynamic
+  .plt : { *(.plt) }
+  .got : { *(.got) }
+
+  _end = .;
+  __end = .;
+  PROVIDE (end = .);
+
+
+  /* Stabs debugging sections are here too
+   */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+  .debug 0 : { *(.debug) }
+  .line 0 : { *(.line) }
+
+  .debug_srcinfo 0 : { *(.debug_srcinfo) }
+  .debug_sfnames 0 : { *(.debug_sfnames) }
+
+  .debug_aranges 0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+
+  .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+  .debug_abbrev 0 : { *(.debug_abbrev) }
+  .debug_line 0 : { *(.debug_line) }
+  .debug_frame 0 : { *(.debug_frame) }
+  .debug_str 0 : { *(.debug_str) }
+  .debug_loc 0 : { *(.debug_loc) }
+  .debug_macinfo 0 : { *(.debug_macinfo) }
+
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames 0 : { *(.debug_varnames) }
+
+  /DISCARD/ : { *(.note.GNU-stack) }
+  /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) }
+  /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
+
+
+PHDRS
+{
+  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+  note PT_NOTE FLAGS(4); /* PF_R */
+  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+  eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+  VDSO_VERSION_STRING {
+    global:
+	__kernel_sigtramp_rt32;
+	__kernel_restart_syscall32;
+    local: *;
+  };
+}
diff --git a/arch/parisc/kernel/vdso32/vdso32_wrapper.S b/arch/parisc/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 000000000000..5ac06093e8ec
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso32_start, vdso32_end
+	.balign PAGE_SIZE
+vdso32_start:
+	.incbin "arch/parisc/kernel/vdso32/vdso32.so"
+	.balign PAGE_SIZE
+vdso32_end:
+
+	.previous
diff --git a/arch/parisc/kernel/vdso64/Makefile b/arch/parisc/kernel/vdso64/Makefile
new file mode 100644
index 000000000000..a30f5ec5eb4b
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/Makefile
@@ -0,0 +1,48 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso64 = note.o sigtramp.o restart_syscall.o
+
+# Build rules
+
+targets := $(obj-vdso64) vdso64.so
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+
+
+ccflags-y := -shared -fno-common -fno-builtin
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
+		$(call ld-option, -Wl$(comma)--hash-style=sysv)
+asflags-y := -D__VDSO64__ -s
+
+KBUILD_AFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
+
+VDSO_LIBGCC := $(shell $(CC) -print-libgcc-file-name)
+
+obj-y += vdso64_wrapper.o
+extra-y += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so FORCE
+
+# Force dependency (incbin is bad)
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) $(VDSO_LIBGCC)
+	$(call if_changed,vdso64ld)
+
+# assembly rules for the .S files
+$(obj-vdso64): %.o: %.S FORCE
+	$(call if_changed_dep,vdso64as)
+
+# actual build commands
+quiet_cmd_vdso64ld = VDSO64L $@
+      cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso64as = VDSO64A $@
+      cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+	cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso64-offsets.h: $(obj)/vdso64.so FORCE
+	$(call if_changed,vdsosym)
diff --git a/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh b/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh
new file mode 100755
index 000000000000..37f05cb38dad
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Inspired by arm64 version.
+#
+
+LC_ALL=C
+sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso64_offset_\2\t0x\1/p'
diff --git a/arch/parisc/kernel/vdso64/note.S b/arch/parisc/kernel/vdso64/note.S
new file mode 100644
index 000000000000..bd1fa23597d6
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/note.S
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "../vdso32/note.S"
diff --git a/arch/parisc/kernel/vdso64/restart_syscall.S b/arch/parisc/kernel/vdso64/restart_syscall.S
new file mode 100644
index 000000000000..83004451f6b1
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/restart_syscall.S
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "../vdso32/restart_syscall.S"
diff --git a/arch/parisc/kernel/vdso64/sigtramp.S b/arch/parisc/kernel/vdso64/sigtramp.S
new file mode 100644
index 000000000000..66a6d2b241e1
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/sigtramp.S
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Signal trampolines for 64 bit processes.
+ *
+ * Copyright (C) 2006 Randolph Chung <[email protected]>
+ * Copyright (C) 2018-2022 Helge Deller <[email protected]>
+ * Copyright (C) 2022 John David Anglin <[email protected]>
+ */
+#include <asm/unistd.h>
+#include <linux/linkage.h>
+#include <generated/asm-offsets.h>
+
+	.text
+
+/* Gdb expects the trampoline is on the stack and the pc is offset from
+   a 64-byte boundary by 0, 4 or 5 instructions. Since the vdso trampoline
+   is not on the stack, we need a new variant with different offsets and
+   data to tell gdb where to find the signal context on the stack.
+
+   Here we put the offset to the context data at the start of the trampoline
+   region and offset the first trampoline by 2 instructions. Please do
+   not change the trampoline as the code in gdb depends on the following
+   instruction sequence exactly.
+ */
+	.align 64
+	.word SIGFRAME_CONTEXT_REGS
+
+/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
+   the return address to get an address in the middle of the presumed
+   call instruction. Since we don't have a call here, we artifically
+   extend the range covered by the unwind info by adding a nop before
+   the real start.
+ */
+	nop
+
+	.globl __kernel_sigtramp_rt
+	.type __kernel_sigtramp_rt, @function
+__kernel_sigtramp_rt:
+	.proc
+	.callinfo FRAME=ASM_SIGFRAME_SIZE,CALLS,SAVE_RP
+	.entry
+
+.Lsigrt_start = . - 4
+0:	ldi	0, %r25			/* (in_syscall=0) */
+	ldi  __NR_rt_sigreturn, %r20
+	ble  0x100(%sr2, %r0)
+	nop
+
+1:	ldi	1, %r25			/* (in_syscall=1) */
+	ldi  __NR_rt_sigreturn, %r20
+	ble  0x100(%sr2, %r0)
+	nop
+.Lsigrt_end:
+	.exit
+	.procend
+	.size __kernel_sigtramp_rt,.-__kernel_sigtramp_rt
+
+	.section .eh_frame,"a",@progbits
+
+/* This is where the mcontext_t struct can be found on the stack.  */
+#define PTREGS SIGFRAME_CONTEXT_REGS	/* 64-bit process offset is -720 */
+
+/* Register REGNO can be found at offset OFS of the mcontext_t structure. */
+	.macro rsave regno,ofs
+	.byte 0x05		/* DW_CFA_offset_extended */
+	.uleb128 \regno;	/*   regno */
+	.uleb128 \ofs		/*   factored offset */
+	.endm
+
+.Lcie:
+	.long .Lcie_end - .Lcie_start
+.Lcie_start:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.stringz "zRS"		/* NUL-terminated augmentation string */
+	.uleb128 4		/* Code alignment factor */
+	.sleb128 8		/* Data alignment factor */
+	.byte 61		/* Return address register column, iaoq[0] */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x1b		/* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
+	.byte 0x0f		/* DW_CFA_def_cfa_expresion */
+	.uleb128 9f - 1f	/*   length */
+1:
+	.byte 0x8e		/*   DW_OP_breg30 */
+	.sleb128 PTREGS
+9:
+	.balign 8
+.Lcie_end:
+
+	.long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+	.long .Lfde0_start - .Lcie	/* CIE pointer. */
+	.long .Lsigrt_start - .		/* PC start, length */
+	.long .Lsigrt_end - .Lsigrt_start
+	.uleb128 0			/* Augmentation */
+
+	/* General registers */
+	rsave  1,  2
+	rsave  2,  3
+	rsave  3,  4
+	rsave  4,  5
+	rsave  5,  6
+	rsave  6,  7
+	rsave  7,  8
+	rsave  8,  9
+	rsave  9, 10
+	rsave 10, 11
+	rsave 11, 12
+	rsave 12, 13
+	rsave 13, 14
+	rsave 14, 15
+	rsave 15, 16
+	rsave 16, 17
+	rsave 17, 18
+	rsave 18, 19
+	rsave 19, 20
+	rsave 20, 21
+	rsave 21, 22
+	rsave 22, 23
+	rsave 23, 24
+	rsave 24, 25
+	rsave 25, 26
+	rsave 26, 27
+	rsave 27, 28
+	rsave 28, 29
+	rsave 29, 30
+	rsave 30, 31
+	rsave 31, 32
+
+	/* Floating-point registers */
+	rsave 32, 36
+	rsave 33, 37
+	rsave 34, 38
+	rsave 35, 39
+	rsave 36, 40
+	rsave 37, 41
+	rsave 38, 42
+	rsave 39, 43
+	rsave 40, 44
+	rsave 41, 45
+	rsave 42, 46
+	rsave 43, 47
+	rsave 44, 48
+	rsave 45, 49
+	rsave 46, 50
+	rsave 47, 51
+	rsave 48, 52
+	rsave 49, 53
+	rsave 50, 54
+	rsave 51, 55
+	rsave 52, 56
+	rsave 53, 57
+	rsave 54, 58
+	rsave 55, 59
+	rsave 56, 60
+	rsave 57, 61
+	rsave 58, 62
+	rsave 59, 63
+
+	/* SAR register */
+	rsave 60, 67
+
+	/* iaoq[0] return address register */
+	rsave 61, 65
+	.balign 8
+.Lfde0_end:
diff --git a/arch/parisc/kernel/vdso64/vdso64.lds.S b/arch/parisc/kernel/vdso64/vdso64.lds.S
new file mode 100644
index 000000000000..de1fb4b19286
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/vdso64.lds.S
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 64 bits vdso library
+ */
+#include <asm/vdso.h>
+
+/* Default link addresses for the vDSOs */
+OUTPUT_FORMAT("elf64-hppa-linux")
+OUTPUT_ARCH(hppa:hppa2.0w)
+ENTRY(_start)
+
+SECTIONS
+{
+  . = VDSO_LBASE + SIZEOF_HEADERS;
+  .hash           : { *(.hash) }			:text
+  .gnu.hash       : { *(.gnu.hash) }
+  .dynsym         : { *(.dynsym) }
+  .dynstr         : { *(.dynstr) }
+  .gnu.version    : { *(.gnu.version) }
+  .gnu.version_d  : { *(.gnu.version_d) }
+  .gnu.version_r  : { *(.gnu.version_r) }
+
+  .note		  : { *(.note.*) } 			:text	:note
+
+  . = ALIGN (16);
+  .text :
+  {
+    *(.text .stub .text.* .gnu.linkonce.t.*)
+  }
+  PROVIDE (__etext = .);
+  PROVIDE (_etext = .);
+  PROVIDE (etext = .);
+
+  /* Other stuff is appended to the text segment: */
+  .rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+  .rodata1		: { *(.rodata1) }
+
+  .eh_frame_hdr		: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+  .eh_frame		: { KEEP (*(.eh_frame)) }	:text
+  .gcc_except_table	: { *(.gcc_except_table) }
+  .fixup		: { *(.fixup) }
+
+  .dynamic		: { *(.dynamic) }		:text	:dynamic
+  .plt : { *(.plt) }
+  .got : { *(.got) }
+
+  _end = .;
+  __end = .;
+  PROVIDE (end = .);
+
+
+  /* Stabs debugging sections are here too
+   */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+  .debug 0 : { *(.debug) }
+  .line 0 : { *(.line) }
+
+  .debug_srcinfo 0 : { *(.debug_srcinfo) }
+  .debug_sfnames 0 : { *(.debug_sfnames) }
+
+  .debug_aranges 0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+
+  .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+  .debug_abbrev 0 : { *(.debug_abbrev) }
+  .debug_line 0 : { *(.debug_line) }
+  .debug_frame 0 : { *(.debug_frame) }
+  .debug_str 0 : { *(.debug_str) }
+  .debug_loc 0 : { *(.debug_loc) }
+  .debug_macinfo 0 : { *(.debug_macinfo) }
+
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames 0 : { *(.debug_varnames) }
+
+  /DISCARD/ : { *(.note.GNU-stack) }
+  /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) }
+  /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
+
+
+PHDRS
+{
+  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+  note PT_NOTE FLAGS(4); /* PF_R */
+  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+  eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+  VDSO_VERSION_STRING {
+    global:
+	__kernel_sigtramp_rt64;
+	__kernel_restart_syscall64;
+    local: *;
+  };
+}
diff --git a/arch/parisc/kernel/vdso64/vdso64_wrapper.S b/arch/parisc/kernel/vdso64/vdso64_wrapper.S
new file mode 100644
index 000000000000..66f929482d3d
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/vdso64_wrapper.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso64_start, vdso64_end
+	.balign PAGE_SIZE
+vdso64_start:
+	.incbin "arch/parisc/kernel/vdso64/vdso64.so"
+	.balign PAGE_SIZE
+vdso64_end:
+
+	.previous
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index ea70a0e08321..5fc0c852c84c 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -13,8 +13,8 @@
 #include <linux/compiler.h>
 #include <linux/uaccess.h>
 
-#define get_user_space() (uaccess_kernel() ? 0 : mfsp(3))
-#define get_kernel_space() (0)
+#define get_user_space()	mfsp(SR_USER)
+#define get_kernel_space()	SR_KERNEL
 
 /* Returns 0 for success, otherwise, returns number of bytes not transferred. */
 extern unsigned long pa_memcpy(void *dst, const void *src,
@@ -23,8 +23,8 @@ extern unsigned long pa_memcpy(void *dst, const void *src,
 unsigned long raw_copy_to_user(void __user *dst, const void *src,
 			       unsigned long len)
 {
-	mtsp(get_kernel_space(), 1);
-	mtsp(get_user_space(), 2);
+	mtsp(get_kernel_space(), SR_TEMP1);
+	mtsp(get_user_space(), SR_TEMP2);
 	return pa_memcpy((void __force *)dst, src, len);
 }
 EXPORT_SYMBOL(raw_copy_to_user);
@@ -32,16 +32,16 @@ EXPORT_SYMBOL(raw_copy_to_user);
 unsigned long raw_copy_from_user(void *dst, const void __user *src,
 			       unsigned long len)
 {
-	mtsp(get_user_space(), 1);
-	mtsp(get_kernel_space(), 2);
+	mtsp(get_user_space(), SR_TEMP1);
+	mtsp(get_kernel_space(), SR_TEMP2);
 	return pa_memcpy(dst, (void __force *)src, len);
 }
 EXPORT_SYMBOL(raw_copy_from_user);
 
 void * memcpy(void * dst,const void *src, size_t count)
 {
-	mtsp(get_kernel_space(), 1);
-	mtsp(get_kernel_space(), 2);
+	mtsp(get_kernel_space(), SR_TEMP1);
+	mtsp(get_kernel_space(), SR_TEMP2);
 	pa_memcpy(dst, src, count);
 	return dst;
 }
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index e9eabf8f14d7..f114e102aaf2 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -425,3 +425,92 @@ out_of_memory:
 	}
 	pagefault_out_of_memory();
 }
+
+/* Handle non-access data TLB miss faults.
+ *
+ * For probe instructions, accesses to userspace are considered allowed
+ * if they lie in a valid VMA and the access type matches. We are not
+ * allowed to handle MM faults here so there may be situations where an
+ * actual access would fail even though a probe was successful.
+ */
+int
+handle_nadtlb_fault(struct pt_regs *regs)
+{
+	unsigned long insn = regs->iir;
+	int breg, treg, xreg, val = 0;
+	struct vm_area_struct *vma, *prev_vma;
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	unsigned long address;
+	unsigned long acc_type;
+
+	switch (insn & 0x380) {
+	case 0x280:
+		/* FDC instruction */
+		fallthrough;
+	case 0x380:
+		/* PDC and FIC instructions */
+		if (printk_ratelimit()) {
+			pr_warn("BUG: nullifying cache flush/purge instruction\n");
+			show_regs(regs);
+		}
+		if (insn & 0x20) {
+			/* Base modification */
+			breg = (insn >> 21) & 0x1f;
+			xreg = (insn >> 16) & 0x1f;
+			if (breg && xreg)
+				regs->gr[breg] += regs->gr[xreg];
+		}
+		regs->gr[0] |= PSW_N;
+		return 1;
+
+	case 0x180:
+		/* PROBE instruction */
+		treg = insn & 0x1f;
+		if (regs->isr) {
+			tsk = current;
+			mm = tsk->mm;
+			if (mm) {
+				/* Search for VMA */
+				address = regs->ior;
+				mmap_read_lock(mm);
+				vma = find_vma_prev(mm, address, &prev_vma);
+				mmap_read_unlock(mm);
+
+				/*
+				 * Check if access to the VMA is okay.
+				 * We don't allow for stack expansion.
+				 */
+				acc_type = (insn & 0x40) ? VM_WRITE : VM_READ;
+				if (vma
+				    && address >= vma->vm_start
+				    && (vma->vm_flags & acc_type) == acc_type)
+					val = 1;
+			}
+		}
+		if (treg)
+			regs->gr[treg] = val;
+		regs->gr[0] |= PSW_N;
+		return 1;
+
+	case 0x300:
+		/* LPA instruction */
+		if (insn & 0x20) {
+			/* Base modification */
+			breg = (insn >> 21) & 0x1f;
+			xreg = (insn >> 16) & 0x1f;
+			if (breg && xreg)
+				regs->gr[breg] += regs->gr[xreg];
+		}
+		treg = insn & 0x1f;
+		if (treg)
+			regs->gr[treg] = 0;
+		regs->gr[0] |= PSW_N;
+		return 1;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index 160abcb8e9fa..ea0e487f87b1 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -9,7 +9,7 @@ long soft_nmi_interrupt(struct pt_regs *regs);
 static inline void arch_touch_nmi_watchdog(void) {}
 #endif
 
-#if defined(CONFIG_NMI_IPI) && defined(CONFIG_STACKTRACE)
+#ifdef CONFIG_NMI_IPI
 extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
 					   bool exclude_self);
 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
diff --git a/arch/powerpc/include/asm/xor_altivec.h b/arch/powerpc/include/asm/xor_altivec.h
index 6ca923510b59..294620a25f80 100644
--- a/arch/powerpc/include/asm/xor_altivec.h
+++ b/arch/powerpc/include/asm/xor_altivec.h
@@ -3,17 +3,20 @@
 #define _ASM_POWERPC_XOR_ALTIVEC_H
 
 #ifdef CONFIG_ALTIVEC
-
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
-		   unsigned long *v2_in);
-void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in);
-void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in,
-		   unsigned long *v4_in);
-void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in,
-		   unsigned long *v4_in, unsigned long *v5_in);
+void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2);
+void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3);
+void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4);
+void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4,
+		   const unsigned long * __restrict p5);
 
 #endif
 #endif /* _ASM_POWERPC_XOR_ALTIVEC_H */
diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c
index 54e61979e80e..aab49d056d18 100644
--- a/arch/powerpc/lib/xor_vmx.c
+++ b/arch/powerpc/lib/xor_vmx.c
@@ -49,8 +49,9 @@ typedef vector signed char unative_t;
 		V1##_3 = vec_xor(V1##_3, V2##_3);	\
 	} while (0)
 
-void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
-		     unsigned long *v2_in)
+void __xor_altivec_2(unsigned long bytes,
+		     unsigned long * __restrict v1_in,
+		     const unsigned long * __restrict v2_in)
 {
 	DEFINE(v1);
 	DEFINE(v2);
@@ -67,8 +68,10 @@ void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
 	} while (--lines > 0);
 }
 
-void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
-		     unsigned long *v2_in, unsigned long *v3_in)
+void __xor_altivec_3(unsigned long bytes,
+		     unsigned long * __restrict v1_in,
+		     const unsigned long * __restrict v2_in,
+		     const unsigned long * __restrict v3_in)
 {
 	DEFINE(v1);
 	DEFINE(v2);
@@ -89,9 +92,11 @@ void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
 	} while (--lines > 0);
 }
 
-void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
-		     unsigned long *v2_in, unsigned long *v3_in,
-		     unsigned long *v4_in)
+void __xor_altivec_4(unsigned long bytes,
+		     unsigned long * __restrict v1_in,
+		     const unsigned long * __restrict v2_in,
+		     const unsigned long * __restrict v3_in,
+		     const unsigned long * __restrict v4_in)
 {
 	DEFINE(v1);
 	DEFINE(v2);
@@ -116,9 +121,12 @@ void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
 	} while (--lines > 0);
 }
 
-void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
-		     unsigned long *v2_in, unsigned long *v3_in,
-		     unsigned long *v4_in, unsigned long *v5_in)
+void __xor_altivec_5(unsigned long bytes,
+		     unsigned long * __restrict v1_in,
+		     const unsigned long * __restrict v2_in,
+		     const unsigned long * __restrict v3_in,
+		     const unsigned long * __restrict v4_in,
+		     const unsigned long * __restrict v5_in)
 {
 	DEFINE(v1);
 	DEFINE(v2);
diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h
index 5c2b0839b179..573c41d90dac 100644
--- a/arch/powerpc/lib/xor_vmx.h
+++ b/arch/powerpc/lib/xor_vmx.h
@@ -6,16 +6,17 @@
  * outside of the enable/disable altivec block.
  */
 
-void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
-			     unsigned long *v2_in);
-
-void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
-			     unsigned long *v2_in, unsigned long *v3_in);
-
-void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
-			     unsigned long *v2_in, unsigned long *v3_in,
-			     unsigned long *v4_in);
-
-void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
-			     unsigned long *v2_in, unsigned long *v3_in,
-			     unsigned long *v4_in, unsigned long *v5_in);
+void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2);
+void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3);
+void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4);
+void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4,
+		     const unsigned long * __restrict p5);
diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c
index 80dba916c367..35d917ece4d1 100644
--- a/arch/powerpc/lib/xor_vmx_glue.c
+++ b/arch/powerpc/lib/xor_vmx_glue.c
@@ -12,47 +12,51 @@
 #include <asm/xor_altivec.h>
 #include "xor_vmx.h"
 
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
-		   unsigned long *v2_in)
+void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2)
 {
 	preempt_disable();
 	enable_kernel_altivec();
-	__xor_altivec_2(bytes, v1_in, v2_in);
+	__xor_altivec_2(bytes, p1, p2);
 	disable_kernel_altivec();
 	preempt_enable();
 }
 EXPORT_SYMBOL(xor_altivec_2);
 
-void xor_altivec_3(unsigned long bytes,  unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in)
+void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3)
 {
 	preempt_disable();
 	enable_kernel_altivec();
-	__xor_altivec_3(bytes, v1_in, v2_in, v3_in);
+	__xor_altivec_3(bytes, p1, p2, p3);
 	disable_kernel_altivec();
 	preempt_enable();
 }
 EXPORT_SYMBOL(xor_altivec_3);
 
-void xor_altivec_4(unsigned long bytes,  unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in,
-		   unsigned long *v4_in)
+void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4)
 {
 	preempt_disable();
 	enable_kernel_altivec();
-	__xor_altivec_4(bytes, v1_in, v2_in, v3_in, v4_in);
+	__xor_altivec_4(bytes, p1, p2, p3, p4);
 	disable_kernel_altivec();
 	preempt_enable();
 }
 EXPORT_SYMBOL(xor_altivec_4);
 
-void xor_altivec_5(unsigned long bytes,  unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in,
-		   unsigned long *v4_in, unsigned long *v5_in)
+void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4,
+		   const unsigned long * __restrict p5)
 {
 	preempt_disable();
 	enable_kernel_altivec();
-	__xor_altivec_5(bytes, v1_in, v2_in, v3_in, v4_in, v5_in);
+	__xor_altivec_5(bytes, p1, p2, p3, p4, p5);
 	disable_kernel_altivec();
 	preempt_enable();
 }
diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
index b44d6ecdb46e..0aacd7052585 100644
--- a/arch/riscv/Kconfig.erratas
+++ b/arch/riscv/Kconfig.erratas
@@ -2,6 +2,7 @@ menu "CPU errata selection"
 
 config RISCV_ERRATA_ALTERNATIVE
 	bool "RISC-V alternative scheme"
+	depends on !XIP_KERNEL
 	default y
 	help
 	  This Kconfig allows the kernel to automatically patch the
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index 6ec44a22278a..c112ab2a9052 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -14,8 +14,8 @@ config SOC_SIFIVE
 	select CLK_SIFIVE
 	select CLK_SIFIVE_PRCI
 	select SIFIVE_PLIC
-	select RISCV_ERRATA_ALTERNATIVE
-	select ERRATA_SIFIVE
+	select RISCV_ERRATA_ALTERNATIVE if !XIP_KERNEL
+	select ERRATA_SIFIVE if !XIP_KERNEL
 	help
 	  This enables support for SiFive SoC platform hardware.
 
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 68a9e3d1fe16..4a48287513c3 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -13,6 +13,19 @@
 #include <linux/pgtable.h>
 #include <asm/sections.h>
 
+/*
+ * The auipc+jalr instruction pair can reach any PC-relative offset
+ * in the range [-2^31 - 2^11, 2^31 - 2^11)
+ */
+static bool riscv_insn_valid_32bit_offset(ptrdiff_t val)
+{
+#ifdef CONFIG_32BIT
+	return true;
+#else
+	return (-(1L << 31) - (1L << 11)) <= val && val < ((1L << 31) - (1L << 11));
+#endif
+}
+
 static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v)
 {
 	if (v != (u32)v) {
@@ -95,7 +108,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
 	ptrdiff_t offset = (void *)v - (void *)location;
 	s32 hi20;
 
-	if (offset != (s32)offset) {
+	if (!riscv_insn_valid_32bit_offset(offset)) {
 		pr_err(
 		  "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
 		  me->name, (long long)v, location);
@@ -197,10 +210,9 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
 				       Elf_Addr v)
 {
 	ptrdiff_t offset = (void *)v - (void *)location;
-	s32 fill_v = offset;
 	u32 hi20, lo12;
 
-	if (offset != fill_v) {
+	if (!riscv_insn_valid_32bit_offset(offset)) {
 		/* Only emit the plt entry if offset over 32-bit range */
 		if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
 			offset = module_emit_plt_entry(me, v);
@@ -224,10 +236,9 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location,
 				   Elf_Addr v)
 {
 	ptrdiff_t offset = (void *)v - (void *)location;
-	s32 fill_v = offset;
 	u32 hi20, lo12;
 
-	if (offset != fill_v) {
+	if (!riscv_insn_valid_32bit_offset(offset)) {
 		pr_err(
 		  "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
 		  me->name, (long long)v, location);
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
index a963c3d8ad0d..fb924a8041dc 100644
--- a/arch/s390/lib/xor.c
+++ b/arch/s390/lib/xor.c
@@ -11,7 +11,8 @@
 #include <linux/raid/xor.h>
 #include <asm/xor.h>
 
-static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2)
 {
 	asm volatile(
 		"	larl	1,2f\n"
@@ -32,8 +33,9 @@ static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 		: "0", "1", "cc", "memory");
 }
 
-static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		     unsigned long *p3)
+static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3)
 {
 	asm volatile(
 		"	larl	1,2f\n"
@@ -58,8 +60,10 @@ static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 		: : "0", "1", "cc", "memory");
 }
 
-static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		     unsigned long *p3, unsigned long *p4)
+static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4)
 {
 	asm volatile(
 		"	larl	1,2f\n"
@@ -88,8 +92,11 @@ static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 		: : "0", "1", "cc", "memory");
 }
 
-static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		     unsigned long *p3, unsigned long *p4, unsigned long *p5)
+static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4,
+		     const unsigned long * __restrict p5)
 {
 	asm volatile(
 		"	larl	1,2f\n"
diff --git a/arch/sparc/include/asm/xor_32.h b/arch/sparc/include/asm/xor_32.h
index 3e5af37e4b9c..0351813cf3af 100644
--- a/arch/sparc/include/asm/xor_32.h
+++ b/arch/sparc/include/asm/xor_32.h
@@ -13,7 +13,8 @@
  */
 
 static void
-sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+sparc_2(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2)
 {
 	int lines = bytes / (sizeof (long)) / 8;
 
@@ -50,8 +51,9 @@ sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	unsigned long *p3)
+sparc_3(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3)
 {
 	int lines = bytes / (sizeof (long)) / 8;
 
@@ -101,8 +103,10 @@ sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	unsigned long *p3, unsigned long *p4)
+sparc_4(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3,
+	const unsigned long * __restrict p4)
 {
 	int lines = bytes / (sizeof (long)) / 8;
 
@@ -165,8 +169,11 @@ sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-sparc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	unsigned long *p3, unsigned long *p4, unsigned long *p5)
+sparc_5(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3,
+	const unsigned long * __restrict p4,
+	const unsigned long * __restrict p5)
 {
 	int lines = bytes / (sizeof (long)) / 8;
 
diff --git a/arch/sparc/include/asm/xor_64.h b/arch/sparc/include/asm/xor_64.h
index 16169f3edcd5..caaddea8ad79 100644
--- a/arch/sparc/include/asm/xor_64.h
+++ b/arch/sparc/include/asm/xor_64.h
@@ -12,13 +12,20 @@
 
 #include <asm/spitfire.h>
 
-void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
-void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
-	       unsigned long *);
-void xor_vis_4(unsigned long, unsigned long *, unsigned long *,
-	       unsigned long *, unsigned long *);
-void xor_vis_5(unsigned long, unsigned long *, unsigned long *,
-	       unsigned long *, unsigned long *, unsigned long *);
+void xor_vis_2(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2);
+void xor_vis_3(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3);
+void xor_vis_4(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4);
+void xor_vis_5(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4,
+	       const unsigned long * __restrict p5);
 
 /* XXX Ugh, write cheetah versions... -DaveM */
 
@@ -30,13 +37,20 @@ static struct xor_block_template xor_block_VIS = {
         .do_5	= xor_vis_5,
 };
 
-void xor_niagara_2(unsigned long, unsigned long *, unsigned long *);
-void xor_niagara_3(unsigned long, unsigned long *, unsigned long *,
-		   unsigned long *);
-void xor_niagara_4(unsigned long, unsigned long *, unsigned long *,
-		   unsigned long *, unsigned long *);
-void xor_niagara_5(unsigned long, unsigned long *, unsigned long *,
-		   unsigned long *, unsigned long *, unsigned long *);
+void xor_niagara_2(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2);
+void xor_niagara_3(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3);
+void xor_niagara_4(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4);
+void xor_niagara_5(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4,
+		   const unsigned long * __restrict p5);
 
 static struct xor_block_template xor_block_niagara = {
         .name	= "Niagara",
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index f384cb1a4f7a..5a83da703e87 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += coco/
+
 obj-y += entry/
 
 obj-$(CONFIG_PERF_EVENTS) += events/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9f5bd41bf660..a4f6672d0cb5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -120,6 +120,7 @@ config X86
 	select ARCH_WANTS_NO_INSTR
 	select ARCH_WANT_HUGE_PMD_SHARE
 	select ARCH_WANT_LD_ORPHAN_WARN
+	select ARCH_WANTS_RT_DELAYED_SIGNALS
 	select ARCH_WANTS_THP_SWAP		if X86_64
 	select ARCH_HAS_PARANOID_L1D_FLUSH
 	select BUILDTIME_TABLE_SORT
@@ -1638,7 +1639,7 @@ config ARCH_SPARSEMEM_DEFAULT
 
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
-	depends on ARCH_SPARSEMEM_ENABLE
+	depends on ARCH_SPARSEMEM_ENABLE && ARCH_FLATMEM_ENABLE
 
 config ARCH_MEMORY_PROBE
 	bool "Enable sysfs memory/probe interface"
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 659fad53ca82..3b354eb9516d 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -152,14 +152,13 @@ SYM_FUNC_END(startup_32)
 
 #ifdef CONFIG_EFI_STUB
 SYM_FUNC_START(efi32_stub_entry)
-SYM_FUNC_START_ALIAS(efi_stub_entry)
 	add	$0x4, %esp
 	movl	8(%esp), %esi	/* save boot_params pointer */
 	call	efi_main
 	/* efi_main returns the possibly relocated address of startup_32 */
 	jmp	*%eax
 SYM_FUNC_END(efi32_stub_entry)
-SYM_FUNC_END_ALIAS(efi_stub_entry)
+SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry)
 #endif
 
 	.text
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fd9441f40457..dea95301196b 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -535,7 +535,6 @@ SYM_CODE_END(startup_64)
 #ifdef CONFIG_EFI_STUB
 	.org 0x390
 SYM_FUNC_START(efi64_stub_entry)
-SYM_FUNC_START_ALIAS(efi_stub_entry)
 	and	$~0xf, %rsp			/* realign the stack */
 	movq	%rdx, %rbx			/* save boot_params pointer */
 	call	efi_main
@@ -543,7 +542,7 @@ SYM_FUNC_START_ALIAS(efi_stub_entry)
 	leaq	rva(startup_64)(%rax), %rax
 	jmp	*%rax
 SYM_FUNC_END(efi64_stub_entry)
-SYM_FUNC_END_ALIAS(efi_stub_entry)
+SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry)
 #endif
 
 	.text
diff --git a/arch/x86/coco/Makefile b/arch/x86/coco/Makefile
new file mode 100644
index 000000000000..c1ead00017a7
--- /dev/null
+++ b/arch/x86/coco/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS_REMOVE_core.o	= -pg
+KASAN_SANITIZE_core.o	:= n
+CFLAGS_core.o		+= -fno-stack-protector
+
+obj-y += core.o
diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/coco/core.c
index 6a6ffcd978f6..fc1365dd927e 100644
--- a/arch/x86/kernel/cc_platform.c
+++ b/arch/x86/coco/core.c
@@ -9,18 +9,16 @@
 
 #include <linux/export.h>
 #include <linux/cc_platform.h>
-#include <linux/mem_encrypt.h>
 
-#include <asm/mshyperv.h>
+#include <asm/coco.h>
 #include <asm/processor.h>
 
-static bool __maybe_unused intel_cc_platform_has(enum cc_attr attr)
+static enum cc_vendor vendor __ro_after_init;
+static u64 cc_mask __ro_after_init;
+
+static bool intel_cc_platform_has(enum cc_attr attr)
 {
-#ifdef CONFIG_INTEL_TDX_GUEST
-	return false;
-#else
 	return false;
-#endif
 }
 
 /*
@@ -74,12 +72,46 @@ static bool hyperv_cc_platform_has(enum cc_attr attr)
 
 bool cc_platform_has(enum cc_attr attr)
 {
-	if (sme_me_mask)
+	switch (vendor) {
+	case CC_VENDOR_AMD:
 		return amd_cc_platform_has(attr);
-
-	if (hv_is_isolation_supported())
+	case CC_VENDOR_INTEL:
+		return intel_cc_platform_has(attr);
+	case CC_VENDOR_HYPERV:
 		return hyperv_cc_platform_has(attr);
-
-	return false;
+	default:
+		return false;
+	}
 }
 EXPORT_SYMBOL_GPL(cc_platform_has);
+
+u64 cc_mkenc(u64 val)
+{
+	switch (vendor) {
+	case CC_VENDOR_AMD:
+		return val | cc_mask;
+	default:
+		return val;
+	}
+}
+
+u64 cc_mkdec(u64 val)
+{
+	switch (vendor) {
+	case CC_VENDOR_AMD:
+		return val & ~cc_mask;
+	default:
+		return val;
+	}
+}
+EXPORT_SYMBOL_GPL(cc_mkdec);
+
+__init void cc_set_vendor(enum cc_vendor v)
+{
+	vendor = v;
+}
+
+__init void cc_set_mask(u64 mask)
+{
+	cc_mask = mask;
+}
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index c3af959648e6..2831685adf6f 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -90,6 +90,9 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
 
 obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
 
+obj-$(CONFIG_CRYPTO_SM3_AVX_X86_64) += sm3-avx-x86_64.o
+sm3-avx-x86_64-y := sm3-avx-asm_64.o sm3_avx_glue.o
+
 obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o
 sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o
 
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
index c799838242a6..43852ba6e19c 100644
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -1,65 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */
 /*
- *	Implement AES CTR mode by8 optimization with AVX instructions. (x86_64)
- *
- * This is AES128/192/256 CTR mode optimization implementation. It requires
- * the support of Intel(R) AESNI and AVX instructions.
- *
- * This work was inspired by the AES CTR mode optimization published
- * in Intel Optimized IPSEC Cryptograhpic library.
- * Additional information on it can be found at:
- *    http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
+ * AES CTR mode by8 optimization with AVX instructions. (x86_64)
  *
  * Copyright(c) 2014 Intel Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
  * Contact Information:
  * James Guilford <[email protected]>
  * Sean Gulley <[email protected]>
  * Chandramouli Narayanan <[email protected]>
+ */
+/*
+ * This is AES128/192/256 CTR mode optimization implementation. It requires
+ * the support of Intel(R) AESNI and AVX instructions.
  *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * This work was inspired by the AES CTR mode optimization published
+ * in Intel Optimized IPSEC Cryptographic library.
+ * Additional information on it can be found at:
+ *    https://github.com/intel/intel-ipsec-mb
  */
 
 #include <linux/linkage.h>
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 363699dd7220..837c1e0aa021 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1751,8 +1751,6 @@ SYM_FUNC_END(aesni_gcm_finalize)
 
 #endif
 
-
-SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128)
 SYM_FUNC_START_LOCAL(_key_expansion_256a)
 	pshufd $0b11111111, %xmm1, %xmm1
 	shufps $0b00010000, %xmm0, %xmm4
@@ -1764,7 +1762,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a)
 	add $0x10, TKEYP
 	RET
 SYM_FUNC_END(_key_expansion_256a)
-SYM_FUNC_END_ALIAS(_key_expansion_128)
+SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a)
 
 SYM_FUNC_START_LOCAL(_key_expansion_192a)
 	pshufd $0b01010101, %xmm1, %xmm1
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index a880e0b1c255..fda6066437aa 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -32,24 +32,12 @@ static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
 	__blowfish_enc_blk(ctx, dst, src, false);
 }
 
-static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
-					const u8 *src)
-{
-	__blowfish_enc_blk(ctx, dst, src, true);
-}
-
 static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
 					 const u8 *src)
 {
 	__blowfish_enc_blk_4way(ctx, dst, src, false);
 }
 
-static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
-				      const u8 *src)
-{
-	__blowfish_enc_blk_4way(ctx, dst, src, true);
-}
-
 static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index 787c234d2469..abb8b1fe123b 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -45,14 +45,6 @@ static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
 	des3_ede_x86_64_crypt_blk(dec_ctx, dst, src);
 }
 
-static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
-					 const u8 *src)
-{
-	u32 *enc_ctx = ctx->enc.expkey;
-
-	des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src);
-}
-
 static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
 					 const u8 *src)
 {
diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S
new file mode 100644
index 000000000000..71e6aae23e17
--- /dev/null
+++ b/arch/x86/crypto/sm3-avx-asm_64.S
@@ -0,0 +1,517 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM3 AVX accelerated transform.
+ * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
+ *
+ * Copyright (C) 2021 Jussi Kivilinna <[email protected]>
+ * Copyright (C) 2021 Tianjia Zhang <[email protected]>
+ */
+
+/* Based on SM3 AES/BMI2 accelerated work by libgcrypt at:
+ *  https://gnupg.org/software/libgcrypt/index.html
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+#define state_h5 20
+#define state_h6 24
+#define state_h7 28
+
+/* Constants */
+
+/* Round constant macros */
+
+#define K0   2043430169  /* 0x79cc4519 */
+#define K1   -208106958  /* 0xf3988a32 */
+#define K2   -416213915  /* 0xe7311465 */
+#define K3   -832427829  /* 0xce6228cb */
+#define K4  -1664855657  /* 0x9cc45197 */
+#define K5    965255983  /* 0x3988a32f */
+#define K6   1930511966  /* 0x7311465e */
+#define K7   -433943364  /* 0xe6228cbc */
+#define K8   -867886727  /* 0xcc451979 */
+#define K9  -1735773453  /* 0x988a32f3 */
+#define K10   823420391  /* 0x311465e7 */
+#define K11  1646840782  /* 0x6228cbce */
+#define K12 -1001285732  /* 0xc451979c */
+#define K13 -2002571463  /* 0x88a32f39 */
+#define K14   289824371  /* 0x11465e73 */
+#define K15   579648742  /* 0x228cbce6 */
+#define K16 -1651869049  /* 0x9d8a7a87 */
+#define K17   991229199  /* 0x3b14f50f */
+#define K18  1982458398  /* 0x7629ea1e */
+#define K19  -330050500  /* 0xec53d43c */
+#define K20  -660100999  /* 0xd8a7a879 */
+#define K21 -1320201997  /* 0xb14f50f3 */
+#define K22  1654563303  /* 0x629ea1e7 */
+#define K23  -985840690  /* 0xc53d43ce */
+#define K24 -1971681379  /* 0x8a7a879d */
+#define K25   351604539  /* 0x14f50f3b */
+#define K26   703209078  /* 0x29ea1e76 */
+#define K27  1406418156  /* 0x53d43cec */
+#define K28 -1482130984  /* 0xa7a879d8 */
+#define K29  1330705329  /* 0x4f50f3b1 */
+#define K30 -1633556638  /* 0x9ea1e762 */
+#define K31  1027854021  /* 0x3d43cec5 */
+#define K32  2055708042  /* 0x7a879d8a */
+#define K33  -183551212  /* 0xf50f3b14 */
+#define K34  -367102423  /* 0xea1e7629 */
+#define K35  -734204845  /* 0xd43cec53 */
+#define K36 -1468409689  /* 0xa879d8a7 */
+#define K37  1358147919  /* 0x50f3b14f */
+#define K38 -1578671458  /* 0xa1e7629e */
+#define K39  1137624381  /* 0x43cec53d */
+#define K40 -2019718534  /* 0x879d8a7a */
+#define K41   255530229  /* 0x0f3b14f5 */
+#define K42   511060458  /* 0x1e7629ea */
+#define K43  1022120916  /* 0x3cec53d4 */
+#define K44  2044241832  /* 0x79d8a7a8 */
+#define K45  -206483632  /* 0xf3b14f50 */
+#define K46  -412967263  /* 0xe7629ea1 */
+#define K47  -825934525  /* 0xcec53d43 */
+#define K48 -1651869049  /* 0x9d8a7a87 */
+#define K49   991229199  /* 0x3b14f50f */
+#define K50  1982458398  /* 0x7629ea1e */
+#define K51  -330050500  /* 0xec53d43c */
+#define K52  -660100999  /* 0xd8a7a879 */
+#define K53 -1320201997  /* 0xb14f50f3 */
+#define K54  1654563303  /* 0x629ea1e7 */
+#define K55  -985840690  /* 0xc53d43ce */
+#define K56 -1971681379  /* 0x8a7a879d */
+#define K57   351604539  /* 0x14f50f3b */
+#define K58   703209078  /* 0x29ea1e76 */
+#define K59  1406418156  /* 0x53d43cec */
+#define K60 -1482130984  /* 0xa7a879d8 */
+#define K61  1330705329  /* 0x4f50f3b1 */
+#define K62 -1633556638  /* 0x9ea1e762 */
+#define K63  1027854021  /* 0x3d43cec5 */
+
+/* Register macros */
+
+#define RSTATE %rdi
+#define RDATA  %rsi
+#define RNBLKS %rdx
+
+#define t0 %eax
+#define t1 %ebx
+#define t2 %ecx
+
+#define a %r8d
+#define b %r9d
+#define c %r10d
+#define d %r11d
+#define e %r12d
+#define f %r13d
+#define g %r14d
+#define h %r15d
+
+#define W0 %xmm0
+#define W1 %xmm1
+#define W2 %xmm2
+#define W3 %xmm3
+#define W4 %xmm4
+#define W5 %xmm5
+
+#define XTMP0 %xmm6
+#define XTMP1 %xmm7
+#define XTMP2 %xmm8
+#define XTMP3 %xmm9
+#define XTMP4 %xmm10
+#define XTMP5 %xmm11
+#define XTMP6 %xmm12
+
+#define BSWAP_REG %xmm15
+
+/* Stack structure */
+
+#define STACK_W_SIZE        (32 * 2 * 3)
+#define STACK_REG_SAVE_SIZE (64)
+
+#define STACK_W             (0)
+#define STACK_REG_SAVE      (STACK_W + STACK_W_SIZE)
+#define STACK_SIZE          (STACK_REG_SAVE + STACK_REG_SAVE_SIZE)
+
+/* Instruction helpers. */
+
+#define roll2(v, reg)		\
+	roll $(v), reg;
+
+#define roll3mov(v, src, dst)	\
+	movl src, dst;		\
+	roll $(v), dst;
+
+#define roll3(v, src, dst)	\
+	rorxl $(32-(v)), src, dst;
+
+#define addl2(a, out)		\
+	leal (a, out), out;
+
+/* Round function macros. */
+
+#define GG1(x, y, z, o, t)	\
+	movl x, o;		\
+	xorl y, o;		\
+	xorl z, o;
+
+#define FF1(x, y, z, o, t) GG1(x, y, z, o, t)
+
+#define GG2(x, y, z, o, t)	\
+	andnl z, x, o;		\
+	movl y, t;		\
+	andl x, t;		\
+	addl2(t, o);
+
+#define FF2(x, y, z, o, t)	\
+	movl y, o;		\
+	xorl x, o;		\
+	movl y, t;		\
+	andl x, t;		\
+	andl z, o;		\
+	xorl t, o;
+
+#define R(i, a, b, c, d, e, f, g, h, round, widx, wtype)		\
+	/* rol(a, 12) => t0 */						\
+	roll3mov(12, a, t0); /* rorxl here would reduce perf by 6% on zen3 */ \
+	/* rol (t0 + e + t), 7) => t1 */				\
+	leal K##round(t0, e, 1), t1;					\
+	roll2(7, t1);							\
+	/* h + w1 => h */						\
+	addl wtype##_W1_ADDR(round, widx), h;				\
+	/* h + t1 => h */						\
+	addl2(t1, h);							\
+	/* t1 ^ t0 => t0 */						\
+	xorl t1, t0;							\
+	/* w1w2 + d => d */						\
+	addl wtype##_W1W2_ADDR(round, widx), d;				\
+	/* FF##i(a,b,c) => t1 */					\
+	FF##i(a, b, c, t1, t2);						\
+	/* d + t1 => d */						\
+	addl2(t1, d);							\
+	/* GG#i(e,f,g) => t2 */						\
+	GG##i(e, f, g, t2, t1);						\
+	/* h + t2 => h */						\
+	addl2(t2, h);							\
+	/* rol (f, 19) => f */						\
+	roll2(19, f);							\
+	/* d + t0 => d */						\
+	addl2(t0, d);							\
+	/* rol (b, 9) => b */						\
+	roll2(9, b);							\
+	/* P0(h) => h */						\
+	roll3(9, h, t2);						\
+	roll3(17, h, t1);						\
+	xorl t2, h;							\
+	xorl t1, h;
+
+#define R1(a, b, c, d, e, f, g, h, round, widx, wtype) \
+	R(1, a, b, c, d, e, f, g, h, round, widx, wtype)
+
+#define R2(a, b, c, d, e, f, g, h, round, widx, wtype) \
+	R(2, a, b, c, d, e, f, g, h, round, widx, wtype)
+
+/* Input expansion macros. */
+
+/* Byte-swapped input address. */
+#define IW_W_ADDR(round, widx, offs) \
+	(STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4))(%rsp)
+
+/* Expanded input address. */
+#define XW_W_ADDR(round, widx, offs) \
+	(STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))(%rsp)
+
+/* Rounds 1-12, byte-swapped input block addresses. */
+#define IW_W1_ADDR(round, widx)   IW_W_ADDR(round, widx, 0)
+#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 32)
+
+/* Rounds 1-12, expanded input block addresses. */
+#define XW_W1_ADDR(round, widx)   XW_W_ADDR(round, widx, 0)
+#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 32)
+
+/* Input block loading. */
+#define LOAD_W_XMM_1()							\
+	vmovdqu 0*16(RDATA), XTMP0; /* XTMP0: w3, w2, w1, w0 */		\
+	vmovdqu 1*16(RDATA), XTMP1; /* XTMP1: w7, w6, w5, w4 */		\
+	vmovdqu 2*16(RDATA), XTMP2; /* XTMP2: w11, w10, w9, w8 */	\
+	vmovdqu 3*16(RDATA), XTMP3; /* XTMP3: w15, w14, w13, w12 */	\
+	vpshufb BSWAP_REG, XTMP0, XTMP0;				\
+	vpshufb BSWAP_REG, XTMP1, XTMP1;				\
+	vpshufb BSWAP_REG, XTMP2, XTMP2;				\
+	vpshufb BSWAP_REG, XTMP3, XTMP3;				\
+	vpxor XTMP0, XTMP1, XTMP4;					\
+	vpxor XTMP1, XTMP2, XTMP5;					\
+	vpxor XTMP2, XTMP3, XTMP6;					\
+	leaq 64(RDATA), RDATA;						\
+	vmovdqa XTMP0, IW_W1_ADDR(0, 0);				\
+	vmovdqa XTMP4, IW_W1W2_ADDR(0, 0);				\
+	vmovdqa XTMP1, IW_W1_ADDR(4, 0);				\
+	vmovdqa XTMP5, IW_W1W2_ADDR(4, 0);
+
+#define LOAD_W_XMM_2()				\
+	vmovdqa XTMP2, IW_W1_ADDR(8, 0);	\
+	vmovdqa XTMP6, IW_W1W2_ADDR(8, 0);
+
+#define LOAD_W_XMM_3()							\
+	vpshufd $0b00000000, XTMP0, W0; /* W0: xx, w0, xx, xx */	\
+	vpshufd $0b11111001, XTMP0, W1; /* W1: xx, w3, w2, w1 */	\
+	vmovdqa XTMP1, W2;              /* W2: xx, w6, w5, w4 */	\
+	vpalignr $12, XTMP1, XTMP2, W3; /* W3: xx, w9, w8, w7 */	\
+	vpalignr $8, XTMP2, XTMP3, W4;  /* W4: xx, w12, w11, w10 */	\
+	vpshufd $0b11111001, XTMP3, W5; /* W5: xx, w15, w14, w13 */
+
+/* Message scheduling. Note: 3 words per XMM register. */
+#define SCHED_W_0(round, w0, w1, w2, w3, w4, w5)			\
+	/* Load (w[i - 16]) => XTMP0 */					\
+	vpshufd $0b10111111, w0, XTMP0;					\
+	vpalignr $12, XTMP0, w1, XTMP0; /* XTMP0: xx, w2, w1, w0 */	\
+	/* Load (w[i - 13]) => XTMP1 */					\
+	vpshufd $0b10111111, w1, XTMP1;					\
+	vpalignr $12, XTMP1, w2, XTMP1;					\
+	/* w[i - 9] == w3 */						\
+	/* XMM3 ^ XTMP0 => XTMP0 */					\
+	vpxor w3, XTMP0, XTMP0;
+
+#define SCHED_W_1(round, w0, w1, w2, w3, w4, w5)	\
+	/* w[i - 3] == w5 */				\
+	/* rol(XMM5, 15) ^ XTMP0 => XTMP0 */		\
+	vpslld $15, w5, XTMP2;				\
+	vpsrld $(32-15), w5, XTMP3;			\
+	vpxor XTMP2, XTMP3, XTMP3;			\
+	vpxor XTMP3, XTMP0, XTMP0;			\
+	/* rol(XTMP1, 7) => XTMP1 */			\
+	vpslld $7, XTMP1, XTMP5;			\
+	vpsrld $(32-7), XTMP1, XTMP1;			\
+	vpxor XTMP5, XTMP1, XTMP1;			\
+	/* XMM4 ^ XTMP1 => XTMP1 */			\
+	vpxor w4, XTMP1, XTMP1;				\
+	/* w[i - 6] == XMM4 */				\
+	/* P1(XTMP0) ^ XTMP1 => XMM0 */			\
+	vpslld $15, XTMP0, XTMP5;			\
+	vpsrld $(32-15), XTMP0, XTMP6;			\
+	vpslld $23, XTMP0, XTMP2;			\
+	vpsrld $(32-23), XTMP0, XTMP3;			\
+	vpxor XTMP0, XTMP1, XTMP1;			\
+	vpxor XTMP6, XTMP5, XTMP5;			\
+	vpxor XTMP3, XTMP2, XTMP2;			\
+	vpxor XTMP2, XTMP5, XTMP5;			\
+	vpxor XTMP5, XTMP1, w0;
+
+#define SCHED_W_2(round, w0, w1, w2, w3, w4, w5)	\
+	/* W1 in XMM12 */				\
+	vpshufd $0b10111111, w4, XTMP4;			\
+	vpalignr $12, XTMP4, w5, XTMP4;			\
+	vmovdqa XTMP4, XW_W1_ADDR((round), 0);		\
+	/* W1 ^ W2 => XTMP1 */				\
+	vpxor w0, XTMP4, XTMP1;				\
+	vmovdqa XTMP1, XW_W1W2_ADDR((round), 0);
+
+
+.section	.rodata.cst16, "aM", @progbits, 16
+.align 16
+
+.Lbe32mask:
+	.long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+
+.text
+
+/*
+ * Transform nblocks*64 bytes (nblocks*16 32-bit words) at DATA.
+ *
+ * void sm3_transform_avx(struct sm3_state *state,
+ *                        const u8 *data, int nblocks);
+ */
+.align 16
+SYM_FUNC_START(sm3_transform_avx)
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: data (64*nblks bytes)
+	 *	%rdx: nblocks
+	 */
+	vzeroupper;
+
+	pushq %rbp;
+	movq %rsp, %rbp;
+
+	movq %rdx, RNBLKS;
+
+	subq $STACK_SIZE, %rsp;
+	andq $(~63), %rsp;
+
+	movq %rbx, (STACK_REG_SAVE + 0 * 8)(%rsp);
+	movq %r15, (STACK_REG_SAVE + 1 * 8)(%rsp);
+	movq %r14, (STACK_REG_SAVE + 2 * 8)(%rsp);
+	movq %r13, (STACK_REG_SAVE + 3 * 8)(%rsp);
+	movq %r12, (STACK_REG_SAVE + 4 * 8)(%rsp);
+
+	vmovdqa .Lbe32mask (%rip), BSWAP_REG;
+
+	/* Get the values of the chaining variables. */
+	movl state_h0(RSTATE), a;
+	movl state_h1(RSTATE), b;
+	movl state_h2(RSTATE), c;
+	movl state_h3(RSTATE), d;
+	movl state_h4(RSTATE), e;
+	movl state_h5(RSTATE), f;
+	movl state_h6(RSTATE), g;
+	movl state_h7(RSTATE), h;
+
+.align 16
+.Loop:
+	/* Load data part1. */
+	LOAD_W_XMM_1();
+
+	leaq -1(RNBLKS), RNBLKS;
+
+	/* Transform 0-3 + Load data part2. */
+	R1(a, b, c, d, e, f, g, h, 0, 0, IW); LOAD_W_XMM_2();
+	R1(d, a, b, c, h, e, f, g, 1, 1, IW);
+	R1(c, d, a, b, g, h, e, f, 2, 2, IW);
+	R1(b, c, d, a, f, g, h, e, 3, 3, IW); LOAD_W_XMM_3();
+
+	/* Transform 4-7 + Precalc 12-14. */
+	R1(a, b, c, d, e, f, g, h, 4, 0, IW);
+	R1(d, a, b, c, h, e, f, g, 5, 1, IW);
+	R1(c, d, a, b, g, h, e, f, 6, 2, IW); SCHED_W_0(12, W0, W1, W2, W3, W4, W5);
+	R1(b, c, d, a, f, g, h, e, 7, 3, IW); SCHED_W_1(12, W0, W1, W2, W3, W4, W5);
+
+	/* Transform 8-11 + Precalc 12-17. */
+	R1(a, b, c, d, e, f, g, h, 8, 0, IW); SCHED_W_2(12, W0, W1, W2, W3, W4, W5);
+	R1(d, a, b, c, h, e, f, g, 9, 1, IW); SCHED_W_0(15, W1, W2, W3, W4, W5, W0);
+	R1(c, d, a, b, g, h, e, f, 10, 2, IW); SCHED_W_1(15, W1, W2, W3, W4, W5, W0);
+	R1(b, c, d, a, f, g, h, e, 11, 3, IW); SCHED_W_2(15, W1, W2, W3, W4, W5, W0);
+
+	/* Transform 12-14 + Precalc 18-20 */
+	R1(a, b, c, d, e, f, g, h, 12, 0, XW); SCHED_W_0(18, W2, W3, W4, W5, W0, W1);
+	R1(d, a, b, c, h, e, f, g, 13, 1, XW); SCHED_W_1(18, W2, W3, W4, W5, W0, W1);
+	R1(c, d, a, b, g, h, e, f, 14, 2, XW); SCHED_W_2(18, W2, W3, W4, W5, W0, W1);
+
+	/* Transform 15-17 + Precalc 21-23 */
+	R1(b, c, d, a, f, g, h, e, 15, 0, XW); SCHED_W_0(21, W3, W4, W5, W0, W1, W2);
+	R2(a, b, c, d, e, f, g, h, 16, 1, XW); SCHED_W_1(21, W3, W4, W5, W0, W1, W2);
+	R2(d, a, b, c, h, e, f, g, 17, 2, XW); SCHED_W_2(21, W3, W4, W5, W0, W1, W2);
+
+	/* Transform 18-20 + Precalc 24-26 */
+	R2(c, d, a, b, g, h, e, f, 18, 0, XW); SCHED_W_0(24, W4, W5, W0, W1, W2, W3);
+	R2(b, c, d, a, f, g, h, e, 19, 1, XW); SCHED_W_1(24, W4, W5, W0, W1, W2, W3);
+	R2(a, b, c, d, e, f, g, h, 20, 2, XW); SCHED_W_2(24, W4, W5, W0, W1, W2, W3);
+
+	/* Transform 21-23 + Precalc 27-29 */
+	R2(d, a, b, c, h, e, f, g, 21, 0, XW); SCHED_W_0(27, W5, W0, W1, W2, W3, W4);
+	R2(c, d, a, b, g, h, e, f, 22, 1, XW); SCHED_W_1(27, W5, W0, W1, W2, W3, W4);
+	R2(b, c, d, a, f, g, h, e, 23, 2, XW); SCHED_W_2(27, W5, W0, W1, W2, W3, W4);
+
+	/* Transform 24-26 + Precalc 30-32 */
+	R2(a, b, c, d, e, f, g, h, 24, 0, XW); SCHED_W_0(30, W0, W1, W2, W3, W4, W5);
+	R2(d, a, b, c, h, e, f, g, 25, 1, XW); SCHED_W_1(30, W0, W1, W2, W3, W4, W5);
+	R2(c, d, a, b, g, h, e, f, 26, 2, XW); SCHED_W_2(30, W0, W1, W2, W3, W4, W5);
+
+	/* Transform 27-29 + Precalc 33-35 */
+	R2(b, c, d, a, f, g, h, e, 27, 0, XW); SCHED_W_0(33, W1, W2, W3, W4, W5, W0);
+	R2(a, b, c, d, e, f, g, h, 28, 1, XW); SCHED_W_1(33, W1, W2, W3, W4, W5, W0);
+	R2(d, a, b, c, h, e, f, g, 29, 2, XW); SCHED_W_2(33, W1, W2, W3, W4, W5, W0);
+
+	/* Transform 30-32 + Precalc 36-38 */
+	R2(c, d, a, b, g, h, e, f, 30, 0, XW); SCHED_W_0(36, W2, W3, W4, W5, W0, W1);
+	R2(b, c, d, a, f, g, h, e, 31, 1, XW); SCHED_W_1(36, W2, W3, W4, W5, W0, W1);
+	R2(a, b, c, d, e, f, g, h, 32, 2, XW); SCHED_W_2(36, W2, W3, W4, W5, W0, W1);
+
+	/* Transform 33-35 + Precalc 39-41 */
+	R2(d, a, b, c, h, e, f, g, 33, 0, XW); SCHED_W_0(39, W3, W4, W5, W0, W1, W2);
+	R2(c, d, a, b, g, h, e, f, 34, 1, XW); SCHED_W_1(39, W3, W4, W5, W0, W1, W2);
+	R2(b, c, d, a, f, g, h, e, 35, 2, XW); SCHED_W_2(39, W3, W4, W5, W0, W1, W2);
+
+	/* Transform 36-38 + Precalc 42-44 */
+	R2(a, b, c, d, e, f, g, h, 36, 0, XW); SCHED_W_0(42, W4, W5, W0, W1, W2, W3);
+	R2(d, a, b, c, h, e, f, g, 37, 1, XW); SCHED_W_1(42, W4, W5, W0, W1, W2, W3);
+	R2(c, d, a, b, g, h, e, f, 38, 2, XW); SCHED_W_2(42, W4, W5, W0, W1, W2, W3);
+
+	/* Transform 39-41 + Precalc 45-47 */
+	R2(b, c, d, a, f, g, h, e, 39, 0, XW); SCHED_W_0(45, W5, W0, W1, W2, W3, W4);
+	R2(a, b, c, d, e, f, g, h, 40, 1, XW); SCHED_W_1(45, W5, W0, W1, W2, W3, W4);
+	R2(d, a, b, c, h, e, f, g, 41, 2, XW); SCHED_W_2(45, W5, W0, W1, W2, W3, W4);
+
+	/* Transform 42-44 + Precalc 48-50 */
+	R2(c, d, a, b, g, h, e, f, 42, 0, XW); SCHED_W_0(48, W0, W1, W2, W3, W4, W5);
+	R2(b, c, d, a, f, g, h, e, 43, 1, XW); SCHED_W_1(48, W0, W1, W2, W3, W4, W5);
+	R2(a, b, c, d, e, f, g, h, 44, 2, XW); SCHED_W_2(48, W0, W1, W2, W3, W4, W5);
+
+	/* Transform 45-47 + Precalc 51-53 */
+	R2(d, a, b, c, h, e, f, g, 45, 0, XW); SCHED_W_0(51, W1, W2, W3, W4, W5, W0);
+	R2(c, d, a, b, g, h, e, f, 46, 1, XW); SCHED_W_1(51, W1, W2, W3, W4, W5, W0);
+	R2(b, c, d, a, f, g, h, e, 47, 2, XW); SCHED_W_2(51, W1, W2, W3, W4, W5, W0);
+
+	/* Transform 48-50 + Precalc 54-56 */
+	R2(a, b, c, d, e, f, g, h, 48, 0, XW); SCHED_W_0(54, W2, W3, W4, W5, W0, W1);
+	R2(d, a, b, c, h, e, f, g, 49, 1, XW); SCHED_W_1(54, W2, W3, W4, W5, W0, W1);
+	R2(c, d, a, b, g, h, e, f, 50, 2, XW); SCHED_W_2(54, W2, W3, W4, W5, W0, W1);
+
+	/* Transform 51-53 + Precalc 57-59 */
+	R2(b, c, d, a, f, g, h, e, 51, 0, XW); SCHED_W_0(57, W3, W4, W5, W0, W1, W2);
+	R2(a, b, c, d, e, f, g, h, 52, 1, XW); SCHED_W_1(57, W3, W4, W5, W0, W1, W2);
+	R2(d, a, b, c, h, e, f, g, 53, 2, XW); SCHED_W_2(57, W3, W4, W5, W0, W1, W2);
+
+	/* Transform 54-56 + Precalc 60-62 */
+	R2(c, d, a, b, g, h, e, f, 54, 0, XW); SCHED_W_0(60, W4, W5, W0, W1, W2, W3);
+	R2(b, c, d, a, f, g, h, e, 55, 1, XW); SCHED_W_1(60, W4, W5, W0, W1, W2, W3);
+	R2(a, b, c, d, e, f, g, h, 56, 2, XW); SCHED_W_2(60, W4, W5, W0, W1, W2, W3);
+
+	/* Transform 57-59 + Precalc 63 */
+	R2(d, a, b, c, h, e, f, g, 57, 0, XW); SCHED_W_0(63, W5, W0, W1, W2, W3, W4);
+	R2(c, d, a, b, g, h, e, f, 58, 1, XW);
+	R2(b, c, d, a, f, g, h, e, 59, 2, XW); SCHED_W_1(63, W5, W0, W1, W2, W3, W4);
+
+	/* Transform 60-62 + Precalc 63 */
+	R2(a, b, c, d, e, f, g, h, 60, 0, XW);
+	R2(d, a, b, c, h, e, f, g, 61, 1, XW); SCHED_W_2(63, W5, W0, W1, W2, W3, W4);
+	R2(c, d, a, b, g, h, e, f, 62, 2, XW);
+
+	/* Transform 63 */
+	R2(b, c, d, a, f, g, h, e, 63, 0, XW);
+
+	/* Update the chaining variables. */
+	xorl state_h0(RSTATE), a;
+	xorl state_h1(RSTATE), b;
+	xorl state_h2(RSTATE), c;
+	xorl state_h3(RSTATE), d;
+	movl a, state_h0(RSTATE);
+	movl b, state_h1(RSTATE);
+	movl c, state_h2(RSTATE);
+	movl d, state_h3(RSTATE);
+	xorl state_h4(RSTATE), e;
+	xorl state_h5(RSTATE), f;
+	xorl state_h6(RSTATE), g;
+	xorl state_h7(RSTATE), h;
+	movl e, state_h4(RSTATE);
+	movl f, state_h5(RSTATE);
+	movl g, state_h6(RSTATE);
+	movl h, state_h7(RSTATE);
+
+	cmpq $0, RNBLKS;
+	jne .Loop;
+
+	vzeroall;
+
+	movq (STACK_REG_SAVE + 0 * 8)(%rsp), %rbx;
+	movq (STACK_REG_SAVE + 1 * 8)(%rsp), %r15;
+	movq (STACK_REG_SAVE + 2 * 8)(%rsp), %r14;
+	movq (STACK_REG_SAVE + 3 * 8)(%rsp), %r13;
+	movq (STACK_REG_SAVE + 4 * 8)(%rsp), %r12;
+
+	vmovdqa %xmm0, IW_W1_ADDR(0, 0);
+	vmovdqa %xmm0, IW_W1W2_ADDR(0, 0);
+	vmovdqa %xmm0, IW_W1_ADDR(4, 0);
+	vmovdqa %xmm0, IW_W1W2_ADDR(4, 0);
+	vmovdqa %xmm0, IW_W1_ADDR(8, 0);
+	vmovdqa %xmm0, IW_W1W2_ADDR(8, 0);
+
+	movq %rbp, %rsp;
+	popq %rbp;
+	ret;
+SYM_FUNC_END(sm3_transform_avx)
diff --git a/arch/x86/crypto/sm3_avx_glue.c b/arch/x86/crypto/sm3_avx_glue.c
new file mode 100644
index 000000000000..661b6f22ffcd
--- /dev/null
+++ b/arch/x86/crypto/sm3_avx_glue.c
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM3 Secure Hash Algorithm, AVX assembler accelerated.
+ * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
+ *
+ * Copyright (C) 2021 Tianjia Zhang <[email protected]>
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/sm3.h>
+#include <crypto/sm3_base.h>
+#include <asm/simd.h>
+
+asmlinkage void sm3_transform_avx(struct sm3_state *state,
+			const u8 *data, int nblocks);
+
+static int sm3_avx_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len)
+{
+	struct sm3_state *sctx = shash_desc_ctx(desc);
+
+	if (!crypto_simd_usable() ||
+			(sctx->count % SM3_BLOCK_SIZE) + len < SM3_BLOCK_SIZE) {
+		sm3_update(sctx, data, len);
+		return 0;
+	}
+
+	/*
+	 * Make sure struct sm3_state begins directly with the SM3
+	 * 256-bit internal state, as this is what the asm functions expect.
+	 */
+	BUILD_BUG_ON(offsetof(struct sm3_state, state) != 0);
+
+	kernel_fpu_begin();
+	sm3_base_do_update(desc, data, len, sm3_transform_avx);
+	kernel_fpu_end();
+
+	return 0;
+}
+
+static int sm3_avx_finup(struct shash_desc *desc, const u8 *data,
+		      unsigned int len, u8 *out)
+{
+	if (!crypto_simd_usable()) {
+		struct sm3_state *sctx = shash_desc_ctx(desc);
+
+		if (len)
+			sm3_update(sctx, data, len);
+
+		sm3_final(sctx, out);
+		return 0;
+	}
+
+	kernel_fpu_begin();
+	if (len)
+		sm3_base_do_update(desc, data, len, sm3_transform_avx);
+	sm3_base_do_finalize(desc, sm3_transform_avx);
+	kernel_fpu_end();
+
+	return sm3_base_finish(desc, out);
+}
+
+static int sm3_avx_final(struct shash_desc *desc, u8 *out)
+{
+	if (!crypto_simd_usable()) {
+		sm3_final(shash_desc_ctx(desc), out);
+		return 0;
+	}
+
+	kernel_fpu_begin();
+	sm3_base_do_finalize(desc, sm3_transform_avx);
+	kernel_fpu_end();
+
+	return sm3_base_finish(desc, out);
+}
+
+static struct shash_alg sm3_avx_alg = {
+	.digestsize	=	SM3_DIGEST_SIZE,
+	.init		=	sm3_base_init,
+	.update		=	sm3_avx_update,
+	.final		=	sm3_avx_final,
+	.finup		=	sm3_avx_finup,
+	.descsize	=	sizeof(struct sm3_state),
+	.base		=	{
+		.cra_name	=	"sm3",
+		.cra_driver_name =	"sm3-avx",
+		.cra_priority	=	300,
+		.cra_blocksize	=	SM3_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int __init sm3_avx_mod_init(void)
+{
+	const char *feature_name;
+
+	if (!boot_cpu_has(X86_FEATURE_AVX)) {
+		pr_info("AVX instruction are not detected.\n");
+		return -ENODEV;
+	}
+
+	if (!boot_cpu_has(X86_FEATURE_BMI2)) {
+		pr_info("BMI2 instruction are not detected.\n");
+		return -ENODEV;
+	}
+
+	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+				&feature_name)) {
+		pr_info("CPU feature '%s' is not supported.\n", feature_name);
+		return -ENODEV;
+	}
+
+	return crypto_register_shash(&sm3_avx_alg);
+}
+
+static void __exit sm3_avx_mod_exit(void)
+{
+	crypto_unregister_shash(&sm3_avx_alg);
+}
+
+module_init(sm3_avx_mod_init);
+module_exit(sm3_avx_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tianjia Zhang <[email protected]>");
+MODULE_DESCRIPTION("SM3 Secure Hash Algorithm, AVX assembler accelerated");
+MODULE_ALIAS_CRYPTO("sm3");
+MODULE_ALIAS_CRYPTO("sm3-avx");
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
new file mode 100644
index 000000000000..3d98c3a60d34
--- /dev/null
+++ b/arch/x86/include/asm/coco.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_COCO_H
+#define _ASM_X86_COCO_H
+
+#include <asm/types.h>
+
+enum cc_vendor {
+	CC_VENDOR_NONE,
+	CC_VENDOR_AMD,
+	CC_VENDOR_HYPERV,
+	CC_VENDOR_INTEL,
+};
+
+void cc_set_vendor(enum cc_vendor v);
+void cc_set_mask(u64 mask);
+
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+u64 cc_mkenc(u64 val);
+u64 cc_mkdec(u64 val);
+#else
+static inline u64 cc_mkenc(u64 val)
+{
+	return val;
+}
+
+static inline u64 cc_mkdec(u64 val)
+{
+	return val;
+}
+#endif
+
+#endif /* _ASM_X86_COCO_H */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 6db4e2932b3d..3edf05e98e58 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -204,7 +204,7 @@
 /* FREE!                                ( 7*32+10) */
 #define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
 #define X86_FEATURE_RETPOLINE		( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE	( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_CDP_L2		( 7*32+15) /* Code and Data Prioritization L2 */
 #define X86_FEATURE_MSR_SPEC_CTRL	( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
@@ -299,9 +299,6 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
 #define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
-#define X86_FEATURE_AMX_BF16		(18*32+22) /* AMX bf16 Support */
-#define X86_FEATURE_AMX_TILE		(18*32+24) /* AMX tile Support */
-#define X86_FEATURE_AMX_INT8		(18*32+25) /* AMX int8 Support */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
@@ -330,6 +327,7 @@
 #define X86_FEATURE_HWP_ACT_WINDOW	(14*32+ 9) /* HWP Activity Window */
 #define X86_FEATURE_HWP_EPP		(14*32+10) /* HWP Energy Perf. Preference */
 #define X86_FEATURE_HWP_PKG_REQ		(14*32+11) /* HWP Package Level Request */
+#define X86_FEATURE_HFI			(14*32+19) /* Hardware Feedback Interface */
 
 /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
 #define X86_FEATURE_NPT			(15*32+ 0) /* Nested Page Table support */
@@ -390,7 +388,10 @@
 #define X86_FEATURE_TSXLDTRK		(18*32+16) /* TSX Suspend Load Address Tracking */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_ARCH_LBR		(18*32+19) /* Intel ARCH LBR */
+#define X86_FEATURE_AMX_BF16		(18*32+22) /* AMX bf16 Support */
 #define X86_FEATURE_AVX512_FP16		(18*32+23) /* AVX512 FP16 */
+#define X86_FEATURE_AMX_TILE		(18*32+24) /* AMX tile Support */
+#define X86_FEATURE_AMX_INT8		(18*32+25) /* AMX int8 Support */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_FLUSH_L1D		(18*32+28) /* Flush L1D cache */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 8f28fafa98b3..1231d63f836d 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -56,8 +56,11 @@
 # define DISABLE_PTI		(1 << (X86_FEATURE_PTI & 31))
 #endif
 
-/* Force disable because it's broken beyond repair */
-#define DISABLE_ENQCMD		(1 << (X86_FEATURE_ENQCMD & 31))
+#ifdef CONFIG_INTEL_IOMMU_SVM
+# define DISABLE_ENQCMD		0
+#else
+# define DISABLE_ENQCMD		(1 << (X86_FEATURE_ENQCMD & 31))
+#endif
 
 #ifdef CONFIG_X86_SGX
 # define DISABLE_SGX	0
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 05a6ab940f45..1b29f58f730f 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -124,7 +124,7 @@ struct insn {
 #define X86_VEX_B(vex)	((vex) & 0x20)	/* VEX3 Byte1 */
 #define X86_VEX_L(vex)	((vex) & 0x04)	/* VEX3 Byte2, VEX2 Byte1 */
 /* VEX bit fields */
-#define X86_EVEX_M(vex)	((vex) & 0x03)		/* EVEX Byte1 */
+#define X86_EVEX_M(vex)	((vex) & 0x07)		/* EVEX Byte1 */
 #define X86_VEX3_M(vex)	((vex) & 0x1f)		/* VEX3 Byte1 */
 #define X86_VEX2_M	1			/* VEX2.M always 1 */
 #define X86_VEX_V(vex)	(((vex) & 0x78) >> 3)	/* VEX3 Byte2, VEX2 Byte1 */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a4a39c3e0f19..0e7f303542bf 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -705,12 +705,14 @@
 
 #define PACKAGE_THERM_STATUS_PROCHOT		(1 << 0)
 #define PACKAGE_THERM_STATUS_POWER_LIMIT	(1 << 10)
+#define PACKAGE_THERM_STATUS_HFI_UPDATED	(1 << 26)
 
 #define MSR_IA32_PACKAGE_THERM_INTERRUPT	0x000001b2
 
 #define PACKAGE_THERM_INT_HIGH_ENABLE		(1 << 0)
 #define PACKAGE_THERM_INT_LOW_ENABLE		(1 << 1)
 #define PACKAGE_THERM_INT_PLN_ENABLE		(1 << 24)
+#define PACKAGE_THERM_INT_HFI_ENABLE		(1 << 25)
 
 /* Thermal Thresholds Support */
 #define THERM_INT_THRESHOLD0_ENABLE    (1 << 15)
@@ -959,4 +961,8 @@
 #define MSR_VM_IGNNE                    0xc0010115
 #define MSR_VM_HSAVE_PA                 0xc0010117
 
+/* Hardware Feedback Interface */
+#define MSR_IA32_HW_FEEDBACK_PTR        0x17d0
+#define MSR_IA32_HW_FEEDBACK_CONFIG     0x17d1
+
 #endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index cc74dc584836..acbaeaf83b61 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -84,7 +84,7 @@
 #ifdef CONFIG_RETPOLINE
 	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
 		      __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
-		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
+		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
 #else
 	jmp	*%\reg
 #endif
@@ -94,7 +94,7 @@
 #ifdef CONFIG_RETPOLINE
 	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
 		      __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
-		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
+		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE
 #else
 	call	*%\reg
 #endif
@@ -146,7 +146,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
 	"lfence;\n"						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
-	X86_FEATURE_RETPOLINE_AMD)
+	X86_FEATURE_RETPOLINE_LFENCE)
 
 # define THUNK_TARGET(addr) [thunk_target] "r" (addr)
 
@@ -176,7 +176,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
 	"lfence;\n"						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
-	X86_FEATURE_RETPOLINE_AMD)
+	X86_FEATURE_RETPOLINE_LFENCE)
 
 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
 #endif
@@ -188,9 +188,11 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
 /* The Spectre V2 mitigation variants */
 enum spectre_v2_mitigation {
 	SPECTRE_V2_NONE,
-	SPECTRE_V2_RETPOLINE_GENERIC,
-	SPECTRE_V2_RETPOLINE_AMD,
-	SPECTRE_V2_IBRS_ENHANCED,
+	SPECTRE_V2_RETPOLINE,
+	SPECTRE_V2_LFENCE,
+	SPECTRE_V2_EIBRS,
+	SPECTRE_V2_EIBRS_RETPOLINE,
+	SPECTRE_V2_EIBRS_LFENCE,
 };
 
 /* The indirect branch speculation control variants */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index a69012e1903f..e1591467668e 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -279,7 +279,7 @@ extern void (*paravirt_iret)(void);
 
 #define paravirt_type(op)				\
 	[paravirt_typenum] "i" (PARAVIRT_PATCH(op)),	\
-	[paravirt_opptr] "i" (&(pv_ops.op))
+	[paravirt_opptr] "m" (pv_ops.op)
 #define paravirt_clobber(clobber)		\
 	[paravirt_clobber] "i" (clobber)
 
@@ -316,7 +316,7 @@ int paravirt_disable_iospace(void);
  */
 #define PARAVIRT_CALL					\
 	ANNOTATE_RETPOLINE_SAFE				\
-	"call *%c[paravirt_opptr];"
+	"call *%[paravirt_opptr];"
 
 /*
  * These macros are intended to wrap calls through one of the paravirt
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 8a9432fb3802..62ab07e24aef 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -15,17 +15,12 @@
 		     cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)))	\
 	 : (prot))
 
-/*
- * Macros to add or remove encryption attribute
- */
-#define pgprot_encrypted(prot)	__pgprot(__sme_set(pgprot_val(prot)))
-#define pgprot_decrypted(prot)	__pgprot(__sme_clr(pgprot_val(prot)))
-
 #ifndef __ASSEMBLY__
 #include <linux/spinlock.h>
 #include <asm/x86_init.h>
 #include <asm/pkru.h>
 #include <asm/fpu/api.h>
+#include <asm/coco.h>
 #include <asm-generic/pgtable_uffd.h>
 #include <linux/page_table_check.h>
 
@@ -38,6 +33,12 @@ void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
 void ptdump_walk_pgd_level_checkwx(void);
 void ptdump_walk_user_pgd_level_checkwx(void);
 
+/*
+ * Macros to add or remove encryption attribute
+ */
+#define pgprot_encrypted(prot)	__pgprot(cc_mkenc(pgprot_val(prot)))
+#define pgprot_decrypted(prot)	__pgprot(cc_mkdec(pgprot_val(prot)))
+
 #ifdef CONFIG_DEBUG_WX
 #define debug_checkwx()		ptdump_walk_pgd_level_checkwx()
 #define debug_checkwx_user()	ptdump_walk_user_pgd_level_checkwx()
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2c5f12ae7d04..a87e7c33d5ac 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -119,6 +119,8 @@ struct cpuinfo_x86 {
 	int			x86_cache_mbm_width_offset;
 	int			x86_power;
 	unsigned long		loops_per_jiffy;
+	/* protected processor identification number */
+	u64			ppin;
 	/* cpuid returned max cores value: */
 	u16			x86_max_cores;
 	u16			apicid;
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index ff0f2d90338a..78ca53512486 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -53,7 +53,6 @@ int set_memory_global(unsigned long addr, int numpages);
 
 int set_pages_array_uc(struct page **pages, int addrinarray);
 int set_pages_array_wc(struct page **pages, int addrinarray);
-int set_pages_array_wt(struct page **pages, int addrinarray);
 int set_pages_array_wb(struct page **pages, int addrinarray);
 
 /*
@@ -84,7 +83,6 @@ int set_pages_rw(struct page *page, int numpages);
 int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
 bool kernel_page_present(struct page *page);
-void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
 
 extern int kernel_set_to_readonly;
 
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 2f0b6be8eaab..9619385bf749 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -110,6 +110,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu);
 #define topology_logical_die_id(cpu)		(cpu_data(cpu).logical_die_id)
 #define topology_die_id(cpu)			(cpu_data(cpu).cpu_die_id)
 #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
+#define topology_ppin(cpu)			(cpu_data(cpu).ppin)
 
 extern unsigned int __max_die_per_package;
 
@@ -215,15 +216,26 @@ extern void arch_scale_freq_tick(void);
 #define arch_scale_freq_tick arch_scale_freq_tick
 
 extern void arch_set_max_freq_ratio(bool turbo_disabled);
+void init_freq_invariance(bool secondary, bool cppc_ready);
 #else
 static inline void arch_set_max_freq_ratio(bool turbo_disabled)
 {
 }
+static inline void init_freq_invariance(bool secondary, bool cppc_ready)
+{
+}
 #endif
 
-#if defined(CONFIG_ACPI_CPPC_LIB) && defined(CONFIG_SMP)
+#ifdef CONFIG_ACPI_CPPC_LIB
 void init_freq_invariance_cppc(void);
-#define init_freq_invariance_cppc init_freq_invariance_cppc
+#define arch_init_invariance_cppc init_freq_invariance_cppc
+
+bool amd_set_max_freq_ratio(u64 *ratio);
+#else
+static inline bool amd_set_max_freq_ratio(u64 *ratio)
+{
+	return false;
+}
 #endif
 
 #endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 22b7412c08f6..e9170457697e 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -142,6 +142,21 @@ struct x86_init_acpi {
 };
 
 /**
+ * struct x86_guest - Functions used by misc guest incarnations like SEV, TDX, etc.
+ *
+ * @enc_status_change_prepare	Notify HV before the encryption status of a range is changed
+ * @enc_status_change_finish	Notify HV after the encryption status of a range is changed
+ * @enc_tlb_flush_required	Returns true if a TLB flush is needed before changing page encryption status
+ * @enc_cache_flush_required	Returns true if a cache flush is needed before changing page encryption status
+ */
+struct x86_guest {
+	void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
+	bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
+	bool (*enc_tlb_flush_required)(bool enc);
+	bool (*enc_cache_flush_required)(void);
+};
+
+/**
  * struct x86_init_ops - functions for platform specific setup
  *
  */
@@ -287,6 +302,7 @@ struct x86_platform_ops {
 	struct x86_legacy_features legacy;
 	void (*set_legacy_features)(void);
 	struct x86_hyper_runtime hyper;
+	struct x86_guest guest;
 };
 
 struct x86_apic_ops {
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index 2ee95a7769e6..7b0307acc410 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -57,7 +57,8 @@
 					op(i + 3, 3)
 
 static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_sse_2(unsigned long bytes, unsigned long * __restrict p1,
+	  const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -108,7 +109,8 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -142,8 +144,9 @@ xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	  unsigned long *p3)
+xor_sse_3(unsigned long bytes, unsigned long * __restrict p1,
+	  const unsigned long * __restrict p2,
+	  const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -201,8 +204,9 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	       unsigned long *p3)
+xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -238,8 +242,10 @@ xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	  unsigned long *p3, unsigned long *p4)
+xor_sse_4(unsigned long bytes, unsigned long * __restrict p1,
+	  const unsigned long * __restrict p2,
+	  const unsigned long * __restrict p3,
+	  const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -304,8 +310,10 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	       unsigned long *p3, unsigned long *p4)
+xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -343,8 +351,11 @@ xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	  unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_sse_5(unsigned long bytes, unsigned long * __restrict p1,
+	  const unsigned long * __restrict p2,
+	  const unsigned long * __restrict p3,
+	  const unsigned long * __restrict p4,
+	  const unsigned long * __restrict p5)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -416,8 +427,11 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	       unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4,
+	       const unsigned long * __restrict p5)
 {
 	unsigned long lines = bytes >> 8;
 
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 67ceb790e639..7a6b9474591e 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -21,7 +21,8 @@
 #include <asm/fpu/api.h>
 
 static void
-xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 7;
 
@@ -64,8 +65,9 @@ xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	      unsigned long *p3)
+xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 7;
 
@@ -113,8 +115,10 @@ xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	      unsigned long *p3, unsigned long *p4)
+xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3,
+	      const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 7;
 
@@ -168,8 +172,11 @@ xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 
 
 static void
-xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	      unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3,
+	      const unsigned long * __restrict p4,
+	      const unsigned long * __restrict p5)
 {
 	unsigned long lines = bytes >> 7;
 
@@ -248,7 +255,8 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 #undef BLOCK
 
 static void
-xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 6;
 
@@ -295,8 +303,9 @@ xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	     unsigned long *p3)
+xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 6;
 
@@ -352,8 +361,10 @@ xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	     unsigned long *p3, unsigned long *p4)
+xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3,
+	     const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 6;
 
@@ -418,8 +429,11 @@ xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	     unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3,
+	     const unsigned long * __restrict p4,
+	     const unsigned long * __restrict p5)
 {
 	unsigned long lines = bytes >> 6;
 
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 0c4e5b5e3852..7f81dd5897f4 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -26,7 +26,8 @@
 		BLOCK4(8) \
 		BLOCK4(12)
 
-static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
+static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0,
+		      const unsigned long * __restrict p1)
 {
 	unsigned long lines = bytes >> 9;
 
@@ -52,8 +53,9 @@ do { \
 	kernel_fpu_end();
 }
 
-static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
-	unsigned long *p2)
+static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0,
+		      const unsigned long * __restrict p1,
+		      const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 9;
 
@@ -82,8 +84,10 @@ do { \
 	kernel_fpu_end();
 }
 
-static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
-	unsigned long *p2, unsigned long *p3)
+static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0,
+		      const unsigned long * __restrict p1,
+		      const unsigned long * __restrict p2,
+		      const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 9;
 
@@ -115,8 +119,11 @@ do { \
 	kernel_fpu_end();
 }
 
-static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
-	unsigned long *p2, unsigned long *p3, unsigned long *p4)
+static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0,
+	     const unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3,
+	     const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 9;
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 6aef9ee28a39..6462e3dd98f4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -21,7 +21,6 @@ CFLAGS_REMOVE_ftrace.o = -pg
 CFLAGS_REMOVE_early_printk.o = -pg
 CFLAGS_REMOVE_head64.o = -pg
 CFLAGS_REMOVE_sev.o = -pg
-CFLAGS_REMOVE_cc_platform.o = -pg
 endif
 
 KASAN_SANITIZE_head$(BITS).o				:= n
@@ -30,7 +29,6 @@ KASAN_SANITIZE_dumpstack_$(BITS).o			:= n
 KASAN_SANITIZE_stacktrace.o				:= n
 KASAN_SANITIZE_paravirt.o				:= n
 KASAN_SANITIZE_sev.o					:= n
-KASAN_SANITIZE_cc_platform.o				:= n
 
 # With some compiler versions the generated code results in boot hangs, caused
 # by several compilation units. To be safe, disable all instrumentation.
@@ -49,7 +47,6 @@ endif
 KCOV_INSTRUMENT		:= n
 
 CFLAGS_head$(BITS).o	+= -fno-stack-protector
-CFLAGS_cc_platform.o	+= -fno-stack-protector
 
 CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace
 
@@ -151,8 +148,6 @@ obj-$(CONFIG_UNWINDER_GUESS)		+= unwind_guess.o
 
 obj-$(CONFIG_AMD_MEM_ENCRYPT)		+= sev.o
 
-obj-$(CONFIG_ARCH_HAS_CC_PLATFORM)	+= cc_platform.o
-
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index cf340d85946a..fc17b3f136fe 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -3,7 +3,7 @@
 obj-$(CONFIG_ACPI)		+= boot.o
 obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup_$(BITS).o
 obj-$(CONFIG_ACPI_APEI)		+= apei.o
-obj-$(CONFIG_ACPI_CPPC_LIB)	+= cppc_msr.o
+obj-$(CONFIG_ACPI_CPPC_LIB)	+= cppc.o
 
 ifneq ($(CONFIG_ACPI_PROCESSOR),)
 obj-y				+= cstate.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 5b6d1a95776f..0d01e7f5078c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1328,6 +1328,17 @@ static int __init disable_acpi_pci(const struct dmi_system_id *d)
 	return 0;
 }
 
+static int __init disable_acpi_xsdt(const struct dmi_system_id *d)
+{
+	if (!acpi_force) {
+		pr_notice("%s detected: force use of acpi=rsdt\n", d->ident);
+		acpi_gbl_do_not_use_xsdt = TRUE;
+	} else {
+		pr_notice("Warning: DMI blacklist says broken, but acpi XSDT forced\n");
+	}
+	return 0;
+}
+
 static int __init dmi_disable_acpi(const struct dmi_system_id *d)
 {
 	if (!acpi_force) {
@@ -1451,6 +1462,19 @@ static const struct dmi_system_id acpi_dmi_table[] __initconst = {
 		     DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
 		     },
 	 },
+	/*
+	 * Boxes that need ACPI XSDT use disabled due to corrupted tables
+	 */
+	{
+	 .callback = disable_acpi_xsdt,
+	 .ident = "Advantech DAC-BJ01",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "NEC"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "Bearlake CRB Board"),
+		     DMI_MATCH(DMI_BIOS_VERSION, "V1.12"),
+		     DMI_MATCH(DMI_BIOS_DATE, "02/01/2011"),
+		     },
+	 },
 	{}
 };
 
diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
new file mode 100644
index 000000000000..df1644d9b3b6
--- /dev/null
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * cppc.c: CPPC Interface for x86
+ * Copyright (c) 2016, Intel Corporation.
+ */
+
+#include <acpi/cppc_acpi.h>
+#include <asm/msr.h>
+#include <asm/processor.h>
+#include <asm/topology.h>
+
+/* Refer to drivers/acpi/cppc_acpi.c for the description of functions */
+
+bool cpc_ffh_supported(void)
+{
+	return true;
+}
+
+int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val)
+{
+	int err;
+
+	err = rdmsrl_safe_on_cpu(cpunum, reg->address, val);
+	if (!err) {
+		u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
+				       reg->bit_offset);
+
+		*val &= mask;
+		*val >>= reg->bit_offset;
+	}
+	return err;
+}
+
+int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
+{
+	u64 rd_val;
+	int err;
+
+	err = rdmsrl_safe_on_cpu(cpunum, reg->address, &rd_val);
+	if (!err) {
+		u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
+				       reg->bit_offset);
+
+		val <<= reg->bit_offset;
+		val &= mask;
+		rd_val &= ~mask;
+		rd_val |= val;
+		err = wrmsrl_safe_on_cpu(cpunum, reg->address, rd_val);
+	}
+	return err;
+}
+
+bool amd_set_max_freq_ratio(u64 *ratio)
+{
+	struct cppc_perf_caps perf_caps;
+	u64 highest_perf, nominal_perf;
+	u64 perf_ratio;
+	int rc;
+
+	if (!ratio)
+		return false;
+
+	rc = cppc_get_perf_caps(0, &perf_caps);
+	if (rc) {
+		pr_debug("Could not retrieve perf counters (%d)\n", rc);
+		return false;
+	}
+
+	highest_perf = amd_get_highest_perf();
+	nominal_perf = perf_caps.nominal_perf;
+
+	if (!highest_perf || !nominal_perf) {
+		pr_debug("Could not retrieve highest or nominal performance\n");
+		return false;
+	}
+
+	perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
+	/* midpoint between max_boost and max_P */
+	perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
+	if (!perf_ratio) {
+		pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
+		return false;
+	}
+
+	*ratio = perf_ratio;
+	arch_set_max_freq_ratio(false);
+
+	return true;
+}
+
+static DEFINE_MUTEX(freq_invariance_lock);
+
+void init_freq_invariance_cppc(void)
+{
+	static bool secondary;
+
+	mutex_lock(&freq_invariance_lock);
+
+	init_freq_invariance(secondary, true);
+	secondary = true;
+
+	mutex_unlock(&freq_invariance_lock);
+}
diff --git a/arch/x86/kernel/acpi/cppc_msr.c b/arch/x86/kernel/acpi/cppc_msr.c
deleted file mode 100644
index b961de569e7e..000000000000
--- a/arch/x86/kernel/acpi/cppc_msr.c
+++ /dev/null
@@ -1,49 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * cppc_msr.c:  MSR Interface for CPPC
- * Copyright (c) 2016, Intel Corporation.
- */
-
-#include <acpi/cppc_acpi.h>
-#include <asm/msr.h>
-
-/* Refer to drivers/acpi/cppc_acpi.c for the description of functions */
-
-bool cpc_ffh_supported(void)
-{
-	return true;
-}
-
-int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val)
-{
-	int err;
-
-	err = rdmsrl_safe_on_cpu(cpunum, reg->address, val);
-	if (!err) {
-		u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
-				       reg->bit_offset);
-
-		*val &= mask;
-		*val >>= reg->bit_offset;
-	}
-	return err;
-}
-
-int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
-{
-	u64 rd_val;
-	int err;
-
-	err = rdmsrl_safe_on_cpu(cpunum, reg->address, &rd_val);
-	if (!err) {
-		u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
-				       reg->bit_offset);
-
-		val <<= reg->bit_offset;
-		val &= mask;
-		rd_val &= ~mask;
-		rd_val |= val;
-		err = wrmsrl_safe_on_cpu(cpunum, reg->address, rd_val);
-	}
-	return err;
-}
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 1e97f944b47d..3b7f4cdbf2e0 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -15,6 +15,7 @@
 #include <asm/desc.h>
 #include <asm/cacheflush.h>
 #include <asm/realmode.h>
+#include <asm/hypervisor.h>
 
 #include <linux/ftrace.h>
 #include "../../realmode/rm/wakeup.h"
@@ -140,9 +141,9 @@ static int __init acpi_sleep_setup(char *str)
 			acpi_realmode_flags |= 4;
 #ifdef CONFIG_HIBERNATION
 		if (strncmp(str, "s4_hwsig", 8) == 0)
-			acpi_check_s4_hw_signature(1);
+			acpi_check_s4_hw_signature = 1;
 		if (strncmp(str, "s4_nohwsig", 10) == 0)
-			acpi_check_s4_hw_signature(0);
+			acpi_check_s4_hw_signature = 0;
 #endif
 		if (strncmp(str, "nonvs", 5) == 0)
 			acpi_nvs_nosave();
@@ -160,3 +161,21 @@ static int __init acpi_sleep_setup(char *str)
 }
 
 __setup("acpi_sleep=", acpi_sleep_setup);
+
+#if defined(CONFIG_HIBERNATION) && defined(CONFIG_HYPERVISOR_GUEST)
+static int __init init_s4_sigcheck(void)
+{
+	/*
+	 * If running on a hypervisor, honour the ACPI specification
+	 * by default and trigger a clean reboot when the hardware
+	 * signature in FACS is changed after hibernation.
+	 */
+	if (acpi_check_s4_hw_signature == -1 &&
+	    !hypervisor_is_type(X86_HYPER_NATIVE))
+		acpi_check_s4_hw_signature = 1;
+
+	return 0;
+}
+/* This must happen before acpi_init() which is a subsys initcall */
+arch_initcall(init_s4_sigcheck);
+#endif
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5007c3ffe96f..b4470eabf151 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -389,7 +389,7 @@ static int emit_indirect(int op, int reg, u8 *bytes)
  *
  *   CALL *%\reg
  *
- * It also tries to inline spectre_v2=retpoline,amd when size permits.
+ * It also tries to inline spectre_v2=retpoline,lfence when size permits.
  */
 static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
 {
@@ -407,7 +407,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
 	BUG_ON(reg == 4);
 
 	if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
-	    !cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD))
+	    !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE))
 		return -1;
 
 	op = insn->opcode.bytes[0];
@@ -438,9 +438,9 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
 	}
 
 	/*
-	 * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE.
+	 * For RETPOLINE_LFENCE: prepend the indirect CALL/JMP with an LFENCE.
 	 */
-	if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
+	if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
 		bytes[i++] = 0x0f;
 		bytes[i++] = 0xae;
 		bytes[i++] = 0xe8; /* LFENCE */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4edb6f0f628c..0c0b09796ced 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -394,35 +394,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
 	per_cpu(cpu_llc_id, cpu) = c->cpu_die_id = c->phys_proc_id;
 }
 
-static void amd_detect_ppin(struct cpuinfo_x86 *c)
-{
-	unsigned long long val;
-
-	if (!cpu_has(c, X86_FEATURE_AMD_PPIN))
-		return;
-
-	/* When PPIN is defined in CPUID, still need to check PPIN_CTL MSR */
-	if (rdmsrl_safe(MSR_AMD_PPIN_CTL, &val))
-		goto clear_ppin;
-
-	/* PPIN is locked in disabled mode, clear feature bit */
-	if ((val & 3UL) == 1UL)
-		goto clear_ppin;
-
-	/* If PPIN is disabled, try to enable it */
-	if (!(val & 2UL)) {
-		wrmsrl_safe(MSR_AMD_PPIN_CTL,  val | 2UL);
-		rdmsrl_safe(MSR_AMD_PPIN_CTL, &val);
-	}
-
-	/* If PPIN_EN bit is 1, return from here; otherwise fall through */
-	if (val & 2UL)
-		return;
-
-clear_ppin:
-	clear_cpu_cap(c, X86_FEATURE_AMD_PPIN);
-}
-
 u32 amd_get_nodes_per_socket(void)
 {
 	return nodes_per_socket;
@@ -585,6 +556,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
 	 *	      the SME physical address space reduction value.
 	 *	      If BIOS has not enabled SME then don't advertise the
 	 *	      SME feature (set in scattered.c).
+	 *	      If the kernel has not enabled SME via any means then
+	 *	      don't advertise the SME feature.
 	 *   For SEV: If BIOS has not enabled SEV then don't advertise the
 	 *            SEV and SEV_ES feature (set in scattered.c).
 	 *
@@ -607,6 +580,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
 		if (IS_ENABLED(CONFIG_X86_32))
 			goto clear_all;
 
+		if (!sme_me_mask)
+			setup_clear_cpu_cap(X86_FEATURE_SME);
+
 		rdmsrl(MSR_K7_HWCR, msr);
 		if (!(msr & MSR_K7_HWCR_SMMLOCK))
 			goto clear_sev;
@@ -947,7 +923,6 @@ static void init_amd(struct cpuinfo_x86 *c)
 	amd_detect_cmp(c);
 	amd_get_topology(c);
 	srat_detect_node(c);
-	amd_detect_ppin(c);
 
 	init_amd_cacheinfo(c);
 
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 1c1f218a701d..6296e1ebed1d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -16,6 +16,7 @@
 #include <linux/prctl.h>
 #include <linux/sched/smt.h>
 #include <linux/pgtable.h>
+#include <linux/bpf.h>
 
 #include <asm/spec-ctrl.h>
 #include <asm/cmdline.h>
@@ -650,6 +651,32 @@ static inline const char *spectre_v2_module_string(void)
 static inline const char *spectre_v2_module_string(void) { return ""; }
 #endif
 
+#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
+#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
+#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n"
+
+#ifdef CONFIG_BPF_SYSCALL
+void unpriv_ebpf_notify(int new_state)
+{
+	if (new_state)
+		return;
+
+	/* Unprivileged eBPF is enabled */
+
+	switch (spectre_v2_enabled) {
+	case SPECTRE_V2_EIBRS:
+		pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+		break;
+	case SPECTRE_V2_EIBRS_LFENCE:
+		if (sched_smt_active())
+			pr_err(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
+		break;
+	default:
+		break;
+	}
+}
+#endif
+
 static inline bool match_option(const char *arg, int arglen, const char *opt)
 {
 	int len = strlen(opt);
@@ -664,7 +691,10 @@ enum spectre_v2_mitigation_cmd {
 	SPECTRE_V2_CMD_FORCE,
 	SPECTRE_V2_CMD_RETPOLINE,
 	SPECTRE_V2_CMD_RETPOLINE_GENERIC,
-	SPECTRE_V2_CMD_RETPOLINE_AMD,
+	SPECTRE_V2_CMD_RETPOLINE_LFENCE,
+	SPECTRE_V2_CMD_EIBRS,
+	SPECTRE_V2_CMD_EIBRS_RETPOLINE,
+	SPECTRE_V2_CMD_EIBRS_LFENCE,
 };
 
 enum spectre_v2_user_cmd {
@@ -737,6 +767,13 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
 	return SPECTRE_V2_USER_CMD_AUTO;
 }
 
+static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
+{
+	return (mode == SPECTRE_V2_EIBRS ||
+		mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+		mode == SPECTRE_V2_EIBRS_LFENCE);
+}
+
 static void __init
 spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 {
@@ -804,7 +841,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 	 */
 	if (!boot_cpu_has(X86_FEATURE_STIBP) ||
 	    !smt_possible ||
-	    spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+	    spectre_v2_in_eibrs_mode(spectre_v2_enabled))
 		return;
 
 	/*
@@ -824,9 +861,11 @@ set_mode:
 
 static const char * const spectre_v2_strings[] = {
 	[SPECTRE_V2_NONE]			= "Vulnerable",
-	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
-	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline",
-	[SPECTRE_V2_IBRS_ENHANCED]		= "Mitigation: Enhanced IBRS",
+	[SPECTRE_V2_RETPOLINE]			= "Mitigation: Retpolines",
+	[SPECTRE_V2_LFENCE]			= "Mitigation: LFENCE",
+	[SPECTRE_V2_EIBRS]			= "Mitigation: Enhanced IBRS",
+	[SPECTRE_V2_EIBRS_LFENCE]		= "Mitigation: Enhanced IBRS + LFENCE",
+	[SPECTRE_V2_EIBRS_RETPOLINE]		= "Mitigation: Enhanced IBRS + Retpolines",
 };
 
 static const struct {
@@ -837,8 +876,12 @@ static const struct {
 	{ "off",		SPECTRE_V2_CMD_NONE,		  false },
 	{ "on",			SPECTRE_V2_CMD_FORCE,		  true  },
 	{ "retpoline",		SPECTRE_V2_CMD_RETPOLINE,	  false },
-	{ "retpoline,amd",	SPECTRE_V2_CMD_RETPOLINE_AMD,	  false },
+	{ "retpoline,amd",	SPECTRE_V2_CMD_RETPOLINE_LFENCE,  false },
+	{ "retpoline,lfence",	SPECTRE_V2_CMD_RETPOLINE_LFENCE,  false },
 	{ "retpoline,generic",	SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
+	{ "eibrs",		SPECTRE_V2_CMD_EIBRS,		  false },
+	{ "eibrs,lfence",	SPECTRE_V2_CMD_EIBRS_LFENCE,	  false },
+	{ "eibrs,retpoline",	SPECTRE_V2_CMD_EIBRS_RETPOLINE,	  false },
 	{ "auto",		SPECTRE_V2_CMD_AUTO,		  false },
 };
 
@@ -875,10 +918,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 	}
 
 	if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
-	     cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
-	     cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
+	     cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
+	     cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC ||
+	     cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
+	     cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
 	    !IS_ENABLED(CONFIG_RETPOLINE)) {
-		pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
+		pr_err("%s selected but not compiled in. Switching to AUTO select\n",
+		       mitigation_options[i].option);
+		return SPECTRE_V2_CMD_AUTO;
+	}
+
+	if ((cmd == SPECTRE_V2_CMD_EIBRS ||
+	     cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
+	     cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
+	    !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
+		pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n",
+		       mitigation_options[i].option);
+		return SPECTRE_V2_CMD_AUTO;
+	}
+
+	if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
+	     cmd == SPECTRE_V2_CMD_EIBRS_LFENCE) &&
+	    !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+		pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n",
+		       mitigation_options[i].option);
 		return SPECTRE_V2_CMD_AUTO;
 	}
 
@@ -887,6 +950,16 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 	return cmd;
 }
 
+static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
+{
+	if (!IS_ENABLED(CONFIG_RETPOLINE)) {
+		pr_err("Kernel not compiled with retpoline; no mitigation available!");
+		return SPECTRE_V2_NONE;
+	}
+
+	return SPECTRE_V2_RETPOLINE;
+}
+
 static void __init spectre_v2_select_mitigation(void)
 {
 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -907,49 +980,64 @@ static void __init spectre_v2_select_mitigation(void)
 	case SPECTRE_V2_CMD_FORCE:
 	case SPECTRE_V2_CMD_AUTO:
 		if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
-			mode = SPECTRE_V2_IBRS_ENHANCED;
-			/* Force it so VMEXIT will restore correctly */
-			x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
-			wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
-			goto specv2_set_mode;
+			mode = SPECTRE_V2_EIBRS;
+			break;
 		}
-		if (IS_ENABLED(CONFIG_RETPOLINE))
-			goto retpoline_auto;
+
+		mode = spectre_v2_select_retpoline();
 		break;
-	case SPECTRE_V2_CMD_RETPOLINE_AMD:
-		if (IS_ENABLED(CONFIG_RETPOLINE))
-			goto retpoline_amd;
+
+	case SPECTRE_V2_CMD_RETPOLINE_LFENCE:
+		pr_err(SPECTRE_V2_LFENCE_MSG);
+		mode = SPECTRE_V2_LFENCE;
 		break;
+
 	case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
-		if (IS_ENABLED(CONFIG_RETPOLINE))
-			goto retpoline_generic;
+		mode = SPECTRE_V2_RETPOLINE;
 		break;
+
 	case SPECTRE_V2_CMD_RETPOLINE:
-		if (IS_ENABLED(CONFIG_RETPOLINE))
-			goto retpoline_auto;
+		mode = spectre_v2_select_retpoline();
+		break;
+
+	case SPECTRE_V2_CMD_EIBRS:
+		mode = SPECTRE_V2_EIBRS;
+		break;
+
+	case SPECTRE_V2_CMD_EIBRS_LFENCE:
+		mode = SPECTRE_V2_EIBRS_LFENCE;
+		break;
+
+	case SPECTRE_V2_CMD_EIBRS_RETPOLINE:
+		mode = SPECTRE_V2_EIBRS_RETPOLINE;
 		break;
 	}
-	pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
-	return;
 
-retpoline_auto:
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
-	    boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
-	retpoline_amd:
-		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
-			pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
-			goto retpoline_generic;
-		}
-		mode = SPECTRE_V2_RETPOLINE_AMD;
-		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
-		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
-	} else {
-	retpoline_generic:
-		mode = SPECTRE_V2_RETPOLINE_GENERIC;
+	if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+		pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+
+	if (spectre_v2_in_eibrs_mode(mode)) {
+		/* Force it so VMEXIT will restore correctly */
+		x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+		wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+	}
+
+	switch (mode) {
+	case SPECTRE_V2_NONE:
+	case SPECTRE_V2_EIBRS:
+		break;
+
+	case SPECTRE_V2_LFENCE:
+	case SPECTRE_V2_EIBRS_LFENCE:
+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
+		fallthrough;
+
+	case SPECTRE_V2_RETPOLINE:
+	case SPECTRE_V2_EIBRS_RETPOLINE:
 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+		break;
 	}
 
-specv2_set_mode:
 	spectre_v2_enabled = mode;
 	pr_info("%s\n", spectre_v2_strings[mode]);
 
@@ -975,7 +1063,7 @@ specv2_set_mode:
 	 * the CPU supports Enhanced IBRS, kernel might un-intentionally not
 	 * enable IBRS around firmware calls.
 	 */
-	if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) {
+	if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
 		setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
 		pr_info("Enabling Restricted Speculation for firmware calls\n");
 	}
@@ -1045,6 +1133,10 @@ void cpu_bugs_smt_update(void)
 {
 	mutex_lock(&spec_ctrl_mutex);
 
+	if (sched_smt_active() && unprivileged_ebpf_enabled() &&
+	    spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+		pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
+
 	switch (spectre_v2_user_stibp) {
 	case SPECTRE_V2_USER_NONE:
 		break;
@@ -1684,7 +1776,7 @@ static ssize_t tsx_async_abort_show_state(char *buf)
 
 static char *stibp_state(void)
 {
-	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+	if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
 		return "";
 
 	switch (spectre_v2_user_stibp) {
@@ -1714,6 +1806,27 @@ static char *ibpb_state(void)
 	return "";
 }
 
+static ssize_t spectre_v2_show_state(char *buf)
+{
+	if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+		return sprintf(buf, "Vulnerable: LFENCE\n");
+
+	if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+		return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
+
+	if (sched_smt_active() && unprivileged_ebpf_enabled() &&
+	    spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+		return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
+
+	return sprintf(buf, "%s%s%s%s%s%s\n",
+		       spectre_v2_strings[spectre_v2_enabled],
+		       ibpb_state(),
+		       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+		       stibp_state(),
+		       boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+		       spectre_v2_module_string());
+}
+
 static ssize_t srbds_show_state(char *buf)
 {
 	return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
@@ -1739,12 +1852,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 		return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
 
 	case X86_BUG_SPECTRE_V2:
-		return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
-			       ibpb_state(),
-			       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
-			       stibp_state(),
-			       boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
-			       spectre_v2_module_string());
+		return spectre_v2_show_state(buf);
 
 	case X86_BUG_SPEC_STORE_BYPASS:
 		return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7b8382c11788..64deb7727d00 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -88,6 +88,83 @@ EXPORT_SYMBOL_GPL(get_llc_id);
 /* L2 cache ID of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID;
 
+static struct ppin_info {
+	int	feature;
+	int	msr_ppin_ctl;
+	int	msr_ppin;
+} ppin_info[] = {
+	[X86_VENDOR_INTEL] = {
+		.feature = X86_FEATURE_INTEL_PPIN,
+		.msr_ppin_ctl = MSR_PPIN_CTL,
+		.msr_ppin = MSR_PPIN
+	},
+	[X86_VENDOR_AMD] = {
+		.feature = X86_FEATURE_AMD_PPIN,
+		.msr_ppin_ctl = MSR_AMD_PPIN_CTL,
+		.msr_ppin = MSR_AMD_PPIN
+	},
+};
+
+static const struct x86_cpu_id ppin_cpuids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AMD_PPIN, &ppin_info[X86_VENDOR_AMD]),
+	X86_MATCH_FEATURE(X86_FEATURE_INTEL_PPIN, &ppin_info[X86_VENDOR_INTEL]),
+
+	/* Legacy models without CPUID enumeration */
+	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]),
+
+	{}
+};
+
+static void ppin_init(struct cpuinfo_x86 *c)
+{
+	const struct x86_cpu_id *id;
+	unsigned long long val;
+	struct ppin_info *info;
+
+	id = x86_match_cpu(ppin_cpuids);
+	if (!id)
+		return;
+
+	/*
+	 * Testing the presence of the MSR is not enough. Need to check
+	 * that the PPIN_CTL allows reading of the PPIN.
+	 */
+	info = (struct ppin_info *)id->driver_data;
+
+	if (rdmsrl_safe(info->msr_ppin_ctl, &val))
+		goto clear_ppin;
+
+	if ((val & 3UL) == 1UL) {
+		/* PPIN locked in disabled mode */
+		goto clear_ppin;
+	}
+
+	/* If PPIN is disabled, try to enable */
+	if (!(val & 2UL)) {
+		wrmsrl_safe(info->msr_ppin_ctl,  val | 2UL);
+		rdmsrl_safe(info->msr_ppin_ctl, &val);
+	}
+
+	/* Is the enable bit set? */
+	if (val & 2UL) {
+		c->ppin = __rdmsr(info->msr_ppin);
+		set_cpu_cap(c, info->feature);
+		return;
+	}
+
+clear_ppin:
+	clear_cpu_cap(c, info->feature);
+}
+
 /* correctly size the local cpu masks */
 void __init setup_cpu_local_masks(void)
 {
@@ -1655,6 +1732,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 			c->x86_capability[i] |= boot_cpu_data.x86_capability[i];
 	}
 
+	ppin_init(c);
+
 	/* Init Machine Check Exception if available. */
 	mcheck_cpu_init(c);
 
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 9f4b508886dd..1940d305db1c 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -993,6 +993,7 @@ static struct attribute *default_attrs[] = {
 	NULL,	/* possibly interrupt_enable if supported, see below */
 	NULL,
 };
+ATTRIBUTE_GROUPS(default);
 
 #define to_block(k)	container_of(k, struct threshold_block, kobj)
 #define to_attr(a)	container_of(a, struct threshold_attr, attr)
@@ -1029,7 +1030,7 @@ static void threshold_block_release(struct kobject *kobj);
 
 static struct kobj_type threshold_ktype = {
 	.sysfs_ops		= &threshold_ops,
-	.default_attrs		= default_attrs,
+	.default_groups		= default_groups,
 	.release		= threshold_block_release,
 };
 
@@ -1101,10 +1102,10 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
 	b->threshold_limit	= THRESHOLD_MAX;
 
 	if (b->interrupt_capable) {
-		threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
+		default_attrs[2] = &interrupt_enable.attr;
 		b->interrupt_enable = 1;
 	} else {
-		threshold_ktype.default_attrs[2] = NULL;
+		default_attrs[2] = NULL;
 	}
 
 	INIT_LIST_HEAD(&b->miscj);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5818b837fd4d..4f1e825033ce 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -138,12 +138,7 @@ void mce_setup(struct mce *m)
 	m->socketid = cpu_data(m->extcpu).phys_proc_id;
 	m->apicid = cpu_data(m->extcpu).initial_apicid;
 	m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
-
-	if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
-		m->ppin = __rdmsr(MSR_PPIN);
-	else if (this_cpu_has(X86_FEATURE_AMD_PPIN))
-		m->ppin = __rdmsr(MSR_AMD_PPIN);
-
+	m->ppin = cpu_data(m->extcpu).ppin;
 	m->microcode = boot_cpu_data.microcode;
 }
 
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index baafbb37be67..95275a5e57e0 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -470,47 +470,6 @@ void intel_clear_lmce(void)
 	wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
 }
 
-static void intel_ppin_init(struct cpuinfo_x86 *c)
-{
-	unsigned long long val;
-
-	/*
-	 * Even if testing the presence of the MSR would be enough, we don't
-	 * want to risk the situation where other models reuse this MSR for
-	 * other purposes.
-	 */
-	switch (c->x86_model) {
-	case INTEL_FAM6_IVYBRIDGE_X:
-	case INTEL_FAM6_HASWELL_X:
-	case INTEL_FAM6_BROADWELL_D:
-	case INTEL_FAM6_BROADWELL_X:
-	case INTEL_FAM6_SKYLAKE_X:
-	case INTEL_FAM6_ICELAKE_X:
-	case INTEL_FAM6_ICELAKE_D:
-	case INTEL_FAM6_SAPPHIRERAPIDS_X:
-	case INTEL_FAM6_XEON_PHI_KNL:
-	case INTEL_FAM6_XEON_PHI_KNM:
-
-		if (rdmsrl_safe(MSR_PPIN_CTL, &val))
-			return;
-
-		if ((val & 3UL) == 1UL) {
-			/* PPIN locked in disabled mode */
-			return;
-		}
-
-		/* If PPIN is disabled, try to enable */
-		if (!(val & 2UL)) {
-			wrmsrl_safe(MSR_PPIN_CTL,  val | 2UL);
-			rdmsrl_safe(MSR_PPIN_CTL, &val);
-		}
-
-		/* Is the enable bit set? */
-		if (val & 2UL)
-			set_cpu_cap(c, X86_FEATURE_INTEL_PPIN);
-	}
-}
-
 /*
  * Enable additional error logs from the integrated
  * memory controller on processors that support this.
@@ -535,7 +494,6 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
 	intel_init_cmci();
 	intel_init_lmce();
-	intel_ppin_init(c);
 	intel_imc_init(c);
 }
 
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 5a99f993e639..e0a572472052 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -33,6 +33,7 @@
 #include <asm/nmi.h>
 #include <clocksource/hyperv_timer.h>
 #include <asm/numa.h>
+#include <asm/coco.h>
 
 /* Is Linux running as the root partition? */
 bool hv_root_partition;
@@ -344,6 +345,11 @@ static void __init ms_hyperv_init_platform(void)
 		 */
 		swiotlb_force = SWIOTLB_FORCE;
 #endif
+		/* Isolation VMs are unenlightened SEV-based VMs, thus this check: */
+		if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+			if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE)
+				cc_set_vendor(CC_VENDOR_HYPERV);
+		}
 	}
 
 	if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 21d1f062895a..4143b1e4c5c6 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -26,6 +26,7 @@ struct cpuid_bit {
 static const struct cpuid_bit cpuid_bits[] = {
 	{ X86_FEATURE_APERFMPERF,       CPUID_ECX,  0, 0x00000006, 0 },
 	{ X86_FEATURE_EPB,		CPUID_ECX,  3, 0x00000006, 0 },
+	{ X86_FEATURE_INTEL_PPIN,	CPUID_EBX,  0, 0x00000007, 1 },
 	{ X86_FEATURE_CQM_LLC,		CPUID_EDX,  1, 0x0000000f, 0 },
 	{ X86_FEATURE_CQM_OCCUP_LLC,	CPUID_EDX,  0, 0x0000000f, 1 },
 	{ X86_FEATURE_CQM_MBM_TOTAL,	CPUID_EDX,  1, 0x0000000f, 1 },
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 48afe96ae0f0..7c63a1911fae 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -13,6 +13,30 @@
 #include "sgx.h"
 
 /*
+ * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's
+ * follow right after the EPC data in the backing storage. In addition to the
+ * visible enclave pages, there's one extra page slot for SECS, before PCMD
+ * structs.
+ */
+static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl,
+							    unsigned long page_index)
+{
+	pgoff_t epc_end_off = encl->size + sizeof(struct sgx_secs);
+
+	return epc_end_off + page_index * sizeof(struct sgx_pcmd);
+}
+
+/*
+ * Free a page from the backing storage in the given page index.
+ */
+static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index)
+{
+	struct inode *inode = file_inode(encl->backing);
+
+	shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1);
+}
+
+/*
  * ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC
  * Pages" in the SDM.
  */
@@ -22,9 +46,11 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
 {
 	unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
 	struct sgx_encl *encl = encl_page->encl;
+	pgoff_t page_index, page_pcmd_off;
 	struct sgx_pageinfo pginfo;
 	struct sgx_backing b;
-	pgoff_t page_index;
+	bool pcmd_page_empty;
+	u8 *pcmd_page;
 	int ret;
 
 	if (secs_page)
@@ -32,14 +58,16 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
 	else
 		page_index = PFN_DOWN(encl->size);
 
+	page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
+
 	ret = sgx_encl_get_backing(encl, page_index, &b);
 	if (ret)
 		return ret;
 
 	pginfo.addr = encl_page->desc & PAGE_MASK;
 	pginfo.contents = (unsigned long)kmap_atomic(b.contents);
-	pginfo.metadata = (unsigned long)kmap_atomic(b.pcmd) +
-			  b.pcmd_offset;
+	pcmd_page = kmap_atomic(b.pcmd);
+	pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
 
 	if (secs_page)
 		pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page);
@@ -55,11 +83,24 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
 		ret = -EFAULT;
 	}
 
-	kunmap_atomic((void *)(unsigned long)(pginfo.metadata - b.pcmd_offset));
+	memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd));
+
+	/*
+	 * The area for the PCMD in the page was zeroed above.  Check if the
+	 * whole page is now empty meaning that all PCMD's have been zeroed:
+	 */
+	pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
+
+	kunmap_atomic(pcmd_page);
 	kunmap_atomic((void *)(unsigned long)pginfo.contents);
 
 	sgx_encl_put_backing(&b, false);
 
+	sgx_encl_truncate_backing_page(encl, page_index);
+
+	if (pcmd_page_empty)
+		sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
+
 	return ret;
 }
 
@@ -579,7 +620,7 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl,
 int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
 			 struct sgx_backing *backing)
 {
-	pgoff_t pcmd_index = PFN_DOWN(encl->size) + 1 + (page_index >> 5);
+	pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
 	struct page *contents;
 	struct page *pcmd;
 
@@ -587,7 +628,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
 	if (IS_ERR(contents))
 		return PTR_ERR(contents);
 
-	pcmd = sgx_encl_get_backing_page(encl, pcmd_index);
+	pcmd = sgx_encl_get_backing_page(encl, PFN_DOWN(page_pcmd_off));
 	if (IS_ERR(pcmd)) {
 		put_page(contents);
 		return PTR_ERR(pcmd);
@@ -596,9 +637,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
 	backing->page_index = page_index;
 	backing->contents = contents;
 	backing->pcmd = pcmd;
-	backing->pcmd_offset =
-		(page_index & (PAGE_SIZE / sizeof(struct sgx_pcmd) - 1)) *
-		sizeof(struct sgx_pcmd);
+	backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1);
 
 	return 0;
 }
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index bc0657f0deed..f267205f2d5a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -995,8 +995,10 @@ early_param("memmap", parse_memmap_opt);
  */
 void __init e820__reserve_setup_data(void)
 {
+	struct setup_indirect *indirect;
 	struct setup_data *data;
-	u64 pa_data;
+	u64 pa_data, pa_next;
+	u32 len;
 
 	pa_data = boot_params.hdr.setup_data;
 	if (!pa_data)
@@ -1004,6 +1006,14 @@ void __init e820__reserve_setup_data(void)
 
 	while (pa_data) {
 		data = early_memremap(pa_data, sizeof(*data));
+		if (!data) {
+			pr_warn("e820: failed to memremap setup_data entry\n");
+			return;
+		}
+
+		len = sizeof(*data);
+		pa_next = data->next;
+
 		e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
 
 		/*
@@ -1015,18 +1025,27 @@ void __init e820__reserve_setup_data(void)
 						 sizeof(*data) + data->len,
 						 E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
 
-		if (data->type == SETUP_INDIRECT &&
-		    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
-			e820__range_update(((struct setup_indirect *)data->data)->addr,
-					   ((struct setup_indirect *)data->data)->len,
-					   E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
-			e820__range_update_kexec(((struct setup_indirect *)data->data)->addr,
-						 ((struct setup_indirect *)data->data)->len,
-						 E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+		if (data->type == SETUP_INDIRECT) {
+			len += data->len;
+			early_memunmap(data, sizeof(*data));
+			data = early_memremap(pa_data, len);
+			if (!data) {
+				pr_warn("e820: failed to memremap indirect setup_data\n");
+				return;
+			}
+
+			indirect = (struct setup_indirect *)data->data;
+
+			if (indirect->type != SETUP_INDIRECT) {
+				e820__range_update(indirect->addr, indirect->len,
+						   E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+				e820__range_update_kexec(indirect->addr, indirect->len,
+							 E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+			}
 		}
 
-		pa_data = data->next;
-		early_memunmap(data, sizeof(*data));
+		pa_data = pa_next;
+		early_memunmap(data, len);
 	}
 
 	e820__update_table(e820_table);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 8dea01ffc5c1..19821f027cb3 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -612,6 +612,13 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags)
 		fpu_inherit_perms(dst_fpu);
 	fpregs_unlock();
 
+	/*
+	 * Children never inherit PASID state.
+	 * Force it to have its init value:
+	 */
+	if (use_xsave())
+		dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID;
+
 	trace_x86_fpu_copy_src(src_fpu);
 	trace_x86_fpu_copy_dst(dst_fpu);
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index de563db9cdcd..4f5ecbbaae77 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -126,7 +126,7 @@ static bool __head check_la57_support(unsigned long physaddr)
 }
 #endif
 
-static unsigned long sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd)
+static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd)
 {
 	unsigned long vaddr, vaddr_end;
 	int i;
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 64b6da95af98..e2e89bebcbc3 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -88,11 +88,13 @@ create_setup_data_node(struct dentry *parent, int no,
 
 static int __init create_setup_data_nodes(struct dentry *parent)
 {
+	struct setup_indirect *indirect;
 	struct setup_data_node *node;
 	struct setup_data *data;
-	int error;
+	u64 pa_data, pa_next;
 	struct dentry *d;
-	u64 pa_data;
+	int error;
+	u32 len;
 	int no = 0;
 
 	d = debugfs_create_dir("setup_data", parent);
@@ -112,12 +114,29 @@ static int __init create_setup_data_nodes(struct dentry *parent)
 			error = -ENOMEM;
 			goto err_dir;
 		}
-
-		if (data->type == SETUP_INDIRECT &&
-		    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
-			node->paddr = ((struct setup_indirect *)data->data)->addr;
-			node->type  = ((struct setup_indirect *)data->data)->type;
-			node->len   = ((struct setup_indirect *)data->data)->len;
+		pa_next = data->next;
+
+		if (data->type == SETUP_INDIRECT) {
+			len = sizeof(*data) + data->len;
+			memunmap(data);
+			data = memremap(pa_data, len, MEMREMAP_WB);
+			if (!data) {
+				kfree(node);
+				error = -ENOMEM;
+				goto err_dir;
+			}
+
+			indirect = (struct setup_indirect *)data->data;
+
+			if (indirect->type != SETUP_INDIRECT) {
+				node->paddr = indirect->addr;
+				node->type  = indirect->type;
+				node->len   = indirect->len;
+			} else {
+				node->paddr = pa_data;
+				node->type  = data->type;
+				node->len   = data->len;
+			}
 		} else {
 			node->paddr = pa_data;
 			node->type  = data->type;
@@ -125,7 +144,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
 		}
 
 		create_setup_data_node(d, no, node);
-		pa_data = data->next;
+		pa_data = pa_next;
 
 		memunmap(data);
 		no++;
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index d0a19121c6a4..257892fcefa7 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -91,26 +91,41 @@ static int get_setup_data_paddr(int nr, u64 *paddr)
 
 static int __init get_setup_data_size(int nr, size_t *size)
 {
-	int i = 0;
+	u64 pa_data = boot_params.hdr.setup_data, pa_next;
+	struct setup_indirect *indirect;
 	struct setup_data *data;
-	u64 pa_data = boot_params.hdr.setup_data;
+	int i = 0;
+	u32 len;
 
 	while (pa_data) {
 		data = memremap(pa_data, sizeof(*data), MEMREMAP_WB);
 		if (!data)
 			return -ENOMEM;
+		pa_next = data->next;
+
 		if (nr == i) {
-			if (data->type == SETUP_INDIRECT &&
-			    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT)
-				*size = ((struct setup_indirect *)data->data)->len;
-			else
+			if (data->type == SETUP_INDIRECT) {
+				len = sizeof(*data) + data->len;
+				memunmap(data);
+				data = memremap(pa_data, len, MEMREMAP_WB);
+				if (!data)
+					return -ENOMEM;
+
+				indirect = (struct setup_indirect *)data->data;
+
+				if (indirect->type != SETUP_INDIRECT)
+					*size = indirect->len;
+				else
+					*size = data->len;
+			} else {
 				*size = data->len;
+			}
 
 			memunmap(data);
 			return 0;
 		}
 
-		pa_data = data->next;
+		pa_data = pa_next;
 		memunmap(data);
 		i++;
 	}
@@ -120,9 +135,11 @@ static int __init get_setup_data_size(int nr, size_t *size)
 static ssize_t type_show(struct kobject *kobj,
 			 struct kobj_attribute *attr, char *buf)
 {
+	struct setup_indirect *indirect;
+	struct setup_data *data;
 	int nr, ret;
 	u64 paddr;
-	struct setup_data *data;
+	u32 len;
 
 	ret = kobj_to_setup_data_nr(kobj, &nr);
 	if (ret)
@@ -135,10 +152,20 @@ static ssize_t type_show(struct kobject *kobj,
 	if (!data)
 		return -ENOMEM;
 
-	if (data->type == SETUP_INDIRECT)
-		ret = sprintf(buf, "0x%x\n", ((struct setup_indirect *)data->data)->type);
-	else
+	if (data->type == SETUP_INDIRECT) {
+		len = sizeof(*data) + data->len;
+		memunmap(data);
+		data = memremap(paddr, len, MEMREMAP_WB);
+		if (!data)
+			return -ENOMEM;
+
+		indirect = (struct setup_indirect *)data->data;
+
+		ret = sprintf(buf, "0x%x\n", indirect->type);
+	} else {
 		ret = sprintf(buf, "0x%x\n", data->type);
+	}
+
 	memunmap(data);
 	return ret;
 }
@@ -149,9 +176,10 @@ static ssize_t setup_data_data_read(struct file *fp,
 				    char *buf,
 				    loff_t off, size_t count)
 {
+	struct setup_indirect *indirect;
+	struct setup_data *data;
 	int nr, ret = 0;
 	u64 paddr, len;
-	struct setup_data *data;
 	void *p;
 
 	ret = kobj_to_setup_data_nr(kobj, &nr);
@@ -165,10 +193,27 @@ static ssize_t setup_data_data_read(struct file *fp,
 	if (!data)
 		return -ENOMEM;
 
-	if (data->type == SETUP_INDIRECT &&
-	    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
-		paddr = ((struct setup_indirect *)data->data)->addr;
-		len = ((struct setup_indirect *)data->data)->len;
+	if (data->type == SETUP_INDIRECT) {
+		len = sizeof(*data) + data->len;
+		memunmap(data);
+		data = memremap(paddr, len, MEMREMAP_WB);
+		if (!data)
+			return -ENOMEM;
+
+		indirect = (struct setup_indirect *)data->data;
+
+		if (indirect->type != SETUP_INDIRECT) {
+			paddr = indirect->addr;
+			len = indirect->len;
+		} else {
+			/*
+			 * Even though this is technically undefined, return
+			 * the data as though it is a normal setup_data struct.
+			 * This will at least allow it to be inspected.
+			 */
+			paddr += sizeof(*data);
+			len = data->len;
+		}
 	} else {
 		paddr += sizeof(*data);
 		len = data->len;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 95fa745e310a..96d7c27b7093 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -273,6 +273,14 @@ int module_finalize(const Elf_Ehdr *hdr,
 			retpolines = s;
 	}
 
+	/*
+	 * See alternative_instructions() for the ordering rules between the
+	 * various patching types.
+	 */
+	if (para) {
+		void *pseg = (void *)para->sh_addr;
+		apply_paravirt(pseg, pseg + para->sh_size);
+	}
 	if (retpolines) {
 		void *rseg = (void *)retpolines->sh_addr;
 		apply_retpolines(rseg, rseg + retpolines->sh_size);
@@ -290,11 +298,6 @@ int module_finalize(const Elf_Ehdr *hdr,
 					    tseg, tseg + text->sh_size);
 	}
 
-	if (para) {
-		void *pseg = (void *)para->sh_addr;
-		apply_paravirt(pseg, pseg + para->sh_size);
-	}
-
 	/* make jump label nops */
 	jump_label_apply_nops(me);
 
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 4bce802d25fb..e73f7df362f5 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -292,7 +292,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 	pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
 		 reason, smp_processor_id());
 
-	pr_emerg("Do you have a strange power saving mode enabled?\n");
 	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
 		nmi_panic(regs, "NMI: Not continuing");
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 81d8ef036637..e131d71b3cae 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -765,8 +765,11 @@ void stop_this_cpu(void *dummy)
 	 * without the encryption bit, they don't race each other when flushed
 	 * and potentially end up with the wrong entry being committed to
 	 * memory.
+	 *
+	 * Test the CPUID bit directly because the machine might've cleared
+	 * X86_FEATURE_SME due to cmdline options.
 	 */
-	if (boot_cpu_has(X86_FEATURE_SME))
+	if (cpuid_eax(0x8000001f) & BIT(0))
 		native_wbinvd();
 	for (;;) {
 		/*
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f7a132eb794d..90d7e1788c91 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -369,21 +369,41 @@ static void __init parse_setup_data(void)
 
 static void __init memblock_x86_reserve_range_setup_data(void)
 {
+	struct setup_indirect *indirect;
 	struct setup_data *data;
-	u64 pa_data;
+	u64 pa_data, pa_next;
+	u32 len;
 
 	pa_data = boot_params.hdr.setup_data;
 	while (pa_data) {
 		data = early_memremap(pa_data, sizeof(*data));
+		if (!data) {
+			pr_warn("setup: failed to memremap setup_data entry\n");
+			return;
+		}
+
+		len = sizeof(*data);
+		pa_next = data->next;
+
 		memblock_reserve(pa_data, sizeof(*data) + data->len);
 
-		if (data->type == SETUP_INDIRECT &&
-		    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT)
-			memblock_reserve(((struct setup_indirect *)data->data)->addr,
-					 ((struct setup_indirect *)data->data)->len);
+		if (data->type == SETUP_INDIRECT) {
+			len += data->len;
+			early_memunmap(data, sizeof(*data));
+			data = early_memremap(pa_data, len);
+			if (!data) {
+				pr_warn("setup: failed to memremap indirect setup_data\n");
+				return;
+			}
 
-		pa_data = data->next;
-		early_memunmap(data, sizeof(*data));
+			indirect = (struct setup_indirect *)data->data;
+
+			if (indirect->type != SETUP_INDIRECT)
+				memblock_reserve(indirect->addr, indirect->len);
+		}
+
+		pa_data = pa_next;
+		early_memunmap(data, len);
 	}
 }
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 617012f4619f..2ef14772dc04 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -83,10 +83,6 @@
 #include <asm/hw_irq.h>
 #include <asm/stackprotector.h>
 
-#ifdef CONFIG_ACPI_CPPC_LIB
-#include <acpi/cppc_acpi.h>
-#endif
-
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
@@ -155,8 +151,6 @@ static inline void smpboot_restore_warm_reset_vector(void)
 	*((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
 }
 
-static void init_freq_invariance(bool secondary, bool cppc_ready);
-
 /*
  * Report back to the Boot Processor during boot time or to the caller processor
  * during CPU online.
@@ -2097,48 +2091,6 @@ out:
 	return true;
 }
 
-#ifdef CONFIG_ACPI_CPPC_LIB
-static bool amd_set_max_freq_ratio(void)
-{
-	struct cppc_perf_caps perf_caps;
-	u64 highest_perf, nominal_perf;
-	u64 perf_ratio;
-	int rc;
-
-	rc = cppc_get_perf_caps(0, &perf_caps);
-	if (rc) {
-		pr_debug("Could not retrieve perf counters (%d)\n", rc);
-		return false;
-	}
-
-	highest_perf = amd_get_highest_perf();
-	nominal_perf = perf_caps.nominal_perf;
-
-	if (!highest_perf || !nominal_perf) {
-		pr_debug("Could not retrieve highest or nominal performance\n");
-		return false;
-	}
-
-	perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
-	/* midpoint between max_boost and max_P */
-	perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
-	if (!perf_ratio) {
-		pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
-		return false;
-	}
-
-	arch_turbo_freq_ratio = perf_ratio;
-	arch_set_max_freq_ratio(false);
-
-	return true;
-}
-#else
-static bool amd_set_max_freq_ratio(void)
-{
-	return false;
-}
-#endif
-
 static void init_counter_refs(void)
 {
 	u64 aperf, mperf;
@@ -2167,7 +2119,7 @@ static void register_freq_invariance_syscore_ops(void)
 static inline void register_freq_invariance_syscore_ops(void) {}
 #endif
 
-static void init_freq_invariance(bool secondary, bool cppc_ready)
+void init_freq_invariance(bool secondary, bool cppc_ready)
 {
 	bool ret = false;
 
@@ -2187,7 +2139,7 @@ static void init_freq_invariance(bool secondary, bool cppc_ready)
 		if (!cppc_ready) {
 			return;
 		}
-		ret = amd_set_max_freq_ratio();
+		ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio);
 	}
 
 	if (ret) {
@@ -2200,22 +2152,6 @@ static void init_freq_invariance(bool secondary, bool cppc_ready)
 	}
 }
 
-#ifdef CONFIG_ACPI_CPPC_LIB
-static DEFINE_MUTEX(freq_invariance_lock);
-
-void init_freq_invariance_cppc(void)
-{
-	static bool secondary;
-
-	mutex_lock(&freq_invariance_lock);
-
-	init_freq_invariance(secondary, true);
-	secondary = true;
-
-	mutex_unlock(&freq_invariance_lock);
-}
-#endif
-
 static void disable_freq_invariance_workfn(struct work_struct *work)
 {
 	static_branch_disable(&arch_scale_freq_key);
@@ -2264,8 +2200,4 @@ error:
 	pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
 	schedule_work(&disable_freq_invariance_work);
 }
-#else
-static inline void init_freq_invariance(bool secondary, bool cppc_ready)
-{
-}
 #endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c9d566dcf89a..2e37862e3a8c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -39,6 +39,7 @@
 #include <linux/io.h>
 #include <linux/hardirq.h>
 #include <linux/atomic.h>
+#include <linux/ioasid.h>
 
 #include <asm/stacktrace.h>
 #include <asm/processor.h>
@@ -559,6 +560,57 @@ static bool fixup_iopl_exception(struct pt_regs *regs)
 	return true;
 }
 
+/*
+ * The unprivileged ENQCMD instruction generates #GPs if the
+ * IA32_PASID MSR has not been populated.  If possible, populate
+ * the MSR from a PASID previously allocated to the mm.
+ */
+static bool try_fixup_enqcmd_gp(void)
+{
+#ifdef CONFIG_IOMMU_SVA
+	u32 pasid;
+
+	/*
+	 * MSR_IA32_PASID is managed using XSAVE.  Directly
+	 * writing to the MSR is only possible when fpregs
+	 * are valid and the fpstate is not.  This is
+	 * guaranteed when handling a userspace exception
+	 * in *before* interrupts are re-enabled.
+	 */
+	lockdep_assert_irqs_disabled();
+
+	/*
+	 * Hardware without ENQCMD will not generate
+	 * #GPs that can be fixed up here.
+	 */
+	if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
+		return false;
+
+	pasid = current->mm->pasid;
+
+	/*
+	 * If the mm has not been allocated a
+	 * PASID, the #GP can not be fixed up.
+	 */
+	if (!pasid_valid(pasid))
+		return false;
+
+	/*
+	 * Did this thread already have its PASID activated?
+	 * If so, the #GP must be from something else.
+	 */
+	if (current->pasid_activated)
+		return false;
+
+	wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID);
+	current->pasid_activated = 1;
+
+	return true;
+#else
+	return false;
+#endif
+}
+
 DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 {
 	char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
@@ -567,6 +619,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 	unsigned long gp_addr;
 	int ret;
 
+	if (user_mode(regs) && try_fixup_enqcmd_gp())
+		return;
+
 	cond_local_irq_enable(regs);
 
 	if (static_cpu_has(X86_FEATURE_UMIP)) {
@@ -659,6 +714,7 @@ static bool do_int3(struct pt_regs *regs)
 
 	return res == NOTIFY_STOP;
 }
+NOKPROBE_SYMBOL(do_int3);
 
 static void do_int3_user(struct pt_regs *regs)
 {
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 7d20c1d34a3c..e84ee5cdbd8c 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -129,6 +129,11 @@ struct x86_cpuinit_ops x86_cpuinit = {
 
 static void default_nmi_init(void) { };
 
+static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { }
+static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; }
+static bool enc_tlb_flush_required_noop(bool enc) { return false; }
+static bool enc_cache_flush_required_noop(void) { return false; }
+
 struct x86_platform_ops x86_platform __ro_after_init = {
 	.calibrate_cpu			= native_calibrate_cpu_early,
 	.calibrate_tsc			= native_calibrate_tsc,
@@ -138,9 +143,16 @@ struct x86_platform_ops x86_platform __ro_after_init = {
 	.is_untracked_pat_range		= is_ISA_range,
 	.nmi_init			= default_nmi_init,
 	.get_nmi_reason			= default_get_nmi_reason,
-	.save_sched_clock_state 	= tsc_save_sched_clock_state,
-	.restore_sched_clock_state 	= tsc_restore_sched_clock_state,
+	.save_sched_clock_state		= tsc_save_sched_clock_state,
+	.restore_sched_clock_state	= tsc_restore_sched_clock_state,
 	.hyper.pin_vcpu			= x86_op_int_noop,
+
+	.guest = {
+		.enc_status_change_prepare = enc_status_change_prepare_noop,
+		.enc_status_change_finish  = enc_status_change_finish_noop,
+		.enc_tlb_flush_required	   = enc_tlb_flush_required_noop,
+		.enc_cache_flush_required  = enc_cache_flush_required_noop,
+	},
 };
 
 EXPORT_SYMBOL_GPL(x86_platform);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5719d8cfdbd9..e86d610dc6b7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -429,8 +429,23 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
 	FOP_END
 
 /* Special case for SETcc - 1 instruction per cc */
+
+/*
+ * Depending on .config the SETcc functions look like:
+ *
+ * SETcc %al   [3 bytes]
+ * RET         [1 byte]
+ * INT3        [1 byte; CONFIG_SLS]
+ *
+ * Which gives possible sizes 4 or 5.  When rounded up to the
+ * next power-of-two alignment they become 4 or 8.
+ */
+#define SETCC_LENGTH	(4 + IS_ENABLED(CONFIG_SLS))
+#define SETCC_ALIGN	(4 << IS_ENABLED(CONFIG_SLS))
+static_assert(SETCC_LENGTH <= SETCC_ALIGN);
+
 #define FOP_SETCC(op) \
-	".align 4 \n\t" \
+	".align " __stringify(SETCC_ALIGN) " \n\t" \
 	".type " #op ", @function \n\t" \
 	#op ": \n\t" \
 	#op " %al \n\t" \
@@ -1047,7 +1062,7 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
 {
 	u8 rc;
-	void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
+	void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf);
 
 	flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
 	asm("push %[flags]; popf; " CALL_NOSPEC
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 59cf2343f3d9..d0d7b9bc6cad 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -27,8 +27,7 @@
  * Output:
  * rax original destination
  */
-SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_WEAK(memcpy)
+SYM_FUNC_START(__memcpy)
 	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
 		      "jmp memcpy_erms", X86_FEATURE_ERMS
 
@@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy)
 	movl %edx, %ecx
 	rep movsb
 	RET
-SYM_FUNC_END(memcpy)
-SYM_FUNC_END_ALIAS(__memcpy)
-EXPORT_SYMBOL(memcpy)
+SYM_FUNC_END(__memcpy)
 EXPORT_SYMBOL(__memcpy)
 
+SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
+EXPORT_SYMBOL(memcpy)
+
 /*
  * memcpy_erms() - enhanced fast string memcpy. This is faster and
  * simpler than memcpy. Use memcpy_erms when possible.
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 50ea390df712..d83cba364e31 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -24,7 +24,6 @@
  * Output:
  * rax: dest
  */
-SYM_FUNC_START_WEAK(memmove)
 SYM_FUNC_START(__memmove)
 
 	mov %rdi, %rax
@@ -207,6 +206,7 @@ SYM_FUNC_START(__memmove)
 13:
 	RET
 SYM_FUNC_END(__memmove)
-SYM_FUNC_END_ALIAS(memmove)
 EXPORT_SYMBOL(__memmove)
+
+SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
 EXPORT_SYMBOL(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index d624f2bc42f1..fc9ffd3ff3b2 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -17,7 +17,6 @@
  *
  * rax   original destination
  */
-SYM_FUNC_START_WEAK(memset)
 SYM_FUNC_START(__memset)
 	/*
 	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
@@ -42,10 +41,11 @@ SYM_FUNC_START(__memset)
 	movq %r9,%rax
 	RET
 SYM_FUNC_END(__memset)
-SYM_FUNC_END_ALIAS(memset)
-EXPORT_SYMBOL(memset)
 EXPORT_SYMBOL(__memset)
 
+SYM_FUNC_ALIAS_WEAK(memset, __memset)
+EXPORT_SYMBOL(memset)
+
 /*
  * ISO C memset - set a memory block to a byte value. This function uses
  * enhanced rep stosb to override the fast string function.
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 89b3fb244e15..afbdda539b80 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -34,7 +34,7 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
 
 	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
 		      __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
-		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_AMD
+		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE
 
 .endm
 
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index ec31f5b60323..d12d1358f96d 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -690,7 +690,10 @@ AVXcode: 2
 45: vpsrlvd/q Vx,Hx,Wx (66),(v)
 46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
 47: vpsllvd/q Vx,Hx,Wx (66),(v)
-# Skip 0x48-0x4b
+# Skip 0x48
+49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B)
+# Skip 0x4a
+4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v)
 4c: vrcp14ps/d Vpd,Wpd (66),(ev)
 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
@@ -705,7 +708,10 @@ AVXcode: 2
 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
-# Skip 0x5c-0x61
+5c: TDPBF16PS Vt,Wt,Ht (F3),(v1)
+# Skip 0x5d
+5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1)
+# Skip 0x5f-0x61
 62: vpexpandb/w Vx,Wx (66),(ev)
 63: vpcompressb/w Wx,Vx (66),(ev)
 64: vpblendmd/q Vx,Hx,Wx (66),(ev)
@@ -822,9 +828,9 @@ AVXcode: 3
 05: vpermilpd Vx,Wx,Ib (66),(v)
 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
 07:
-08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo)
 09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
-0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo)
 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
 0c: vblendps Vx,Hx,Wx,Ib (66)
 0d: vblendpd Vx,Hx,Wx,Ib (66)
@@ -846,8 +852,8 @@ AVXcode: 3
 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
 23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
 25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
-26: vgetmantps/d Vx,Wx,Ib (66),(ev)
-27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev)
 30: kshiftrb/w Vk,Uk,Ib (66),(v)
 31: kshiftrd/q Vk,Uk,Ib (66),(v)
 32: kshiftlb/w Vk,Uk,Ib (66),(v)
@@ -871,23 +877,102 @@ AVXcode: 3
 51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
 54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
 55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
-56: vreduceps/d Vx,Wx,Ib (66),(ev)
-57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev)
 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
 61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
 63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
-66: vfpclassps/d Vk,Wx,Ib (66),(ev)
-67: vfpclassss/d Vk,Wx,Ib (66),(ev)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev)
 70: vpshldw Vx,Hx,Wx,Ib (66),(ev)
 71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev)
 72: vpshrdw Vx,Hx,Wx,Ib (66),(ev)
 73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev)
+c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev)
 cc: sha1rnds4 Vdq,Wdq,Ib
 ce: vgf2p8affineqb Vx,Wx,Ib (66)
 cf: vgf2p8affineinvqb Vx,Wx,Ib (66)
 df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
-f0: RORX Gy,Ey,Ib (F2),(v)
+f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B)
+EndTable
+
+Table: EVEX map 5
+Referrer:
+AVXcode: 5
+10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev)
+11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev)
+1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev)
+2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev)
+2c: vcvttsh2si Vx,Wx (F3),(ev)
+2d: vcvtsh2si Vx,Wx (F3),(ev)
+2e: vucomish Vx,Wx (ev)
+2f: vcomish Vx,Wx (ev)
+51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev)
+58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev)
+59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev)
+5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev)
+5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev)
+5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev)
+5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev)
+5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev)
+5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev)
+6e: vmovw Vx,Wx (66),(ev)
+78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev)
+79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev)
+7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev)
+7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev)
+7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev)
+7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev)
+7e: vmovw Wx,Vx (66),(ev)
+EndTable
+
+Table: EVEX map 6
+Referrer:
+AVXcode: 6
+13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev)
+2c: vscalefph Vx,Hx,Wx (66),(ev)
+2d: vscalefsh Vx,Hx,Wx (66),(ev)
+42: vgetexpph Vx,Wx (66),(ev)
+43: vgetexpsh Vx,Hx,Wx (66),(ev)
+4c: vrcpph Vx,Wx (66),(ev)
+4d: vrcpsh Vx,Hx,Wx (66),(ev)
+4e: vrsqrtph Vx,Wx (66),(ev)
+4f: vrsqrtsh Vx,Hx,Wx (66),(ev)
+56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev)
+57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev)
+96: vfmaddsub132ph Vx,Hx,Wx (66),(ev)
+97: vfmsubadd132ph Vx,Hx,Wx (66),(ev)
+98: vfmadd132ph Vx,Hx,Wx (66),(ev)
+99: vfmadd132sh Vx,Hx,Wx (66),(ev)
+9a: vfmsub132ph Vx,Hx,Wx (66),(ev)
+9b: vfmsub132sh Vx,Hx,Wx (66),(ev)
+9c: vfnmadd132ph Vx,Hx,Wx (66),(ev)
+9d: vfnmadd132sh Vx,Hx,Wx (66),(ev)
+9e: vfnmsub132ph Vx,Hx,Wx (66),(ev)
+9f: vfnmsub132sh Vx,Hx,Wx (66),(ev)
+a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev)
+a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev)
+a8: vfmadd213ph Vx,Hx,Wx (66),(ev)
+a9: vfmadd213sh Vx,Hx,Wx (66),(ev)
+aa: vfmsub213ph Vx,Hx,Wx (66),(ev)
+ab: vfmsub213sh Vx,Hx,Wx (66),(ev)
+ac: vfnmadd213ph Vx,Hx,Wx (66),(ev)
+ad: vfnmadd213sh Vx,Hx,Wx (66),(ev)
+ae: vfnmsub213ph Vx,Hx,Wx (66),(ev)
+af: vfnmsub213sh Vx,Hx,Wx (66),(ev)
+b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev)
+b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev)
+b8: vfmadd231ph Vx,Hx,Wx (66),(ev)
+b9: vfmadd231sh Vx,Hx,Wx (66),(ev)
+ba: vfmsub231ph Vx,Hx,Wx (66),(ev)
+bb: vfmsub231sh Vx,Hx,Wx (66),(ev)
+bc: vfnmadd231ph Vx,Hx,Wx (66),(ev)
+bd: vfnmadd231sh Vx,Hx,Wx (66),(ev)
+be: vfnmsub231ph Vx,Hx,Wx (66),(ev)
+bf: vfnmsub231sh Vx,Hx,Wx (66),(ev)
+d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev)
+d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev)
 EndTable
 
 GrpTable: Grp1
@@ -970,7 +1055,7 @@ GrpTable: Grp7
 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
 3: LIDT Ms
 4: SMSW Mw/Rv
-5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B)
+5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B)
 6: LMSW Ew
 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
 EndTable
@@ -987,7 +1072,7 @@ GrpTable: Grp9
 3: xrstors
 4: xsavec
 5: xsaves
-6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3)
 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
 EndTable
 
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 026031b3b782..17a492c27306 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -615,6 +615,7 @@ static bool memremap_is_efi_data(resource_size_t phys_addr,
 static bool memremap_is_setup_data(resource_size_t phys_addr,
 				   unsigned long size)
 {
+	struct setup_indirect *indirect;
 	struct setup_data *data;
 	u64 paddr, paddr_next;
 
@@ -627,6 +628,10 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
 
 		data = memremap(paddr, sizeof(*data),
 				MEMREMAP_WB | MEMREMAP_DEC);
+		if (!data) {
+			pr_warn("failed to memremap setup_data entry\n");
+			return false;
+		}
 
 		paddr_next = data->next;
 		len = data->len;
@@ -636,10 +641,21 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
 			return true;
 		}
 
-		if (data->type == SETUP_INDIRECT &&
-		    ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
-			paddr = ((struct setup_indirect *)data->data)->addr;
-			len = ((struct setup_indirect *)data->data)->len;
+		if (data->type == SETUP_INDIRECT) {
+			memunmap(data);
+			data = memremap(paddr, sizeof(*data) + len,
+					MEMREMAP_WB | MEMREMAP_DEC);
+			if (!data) {
+				pr_warn("failed to memremap indirect setup_data\n");
+				return false;
+			}
+
+			indirect = (struct setup_indirect *)data->data;
+
+			if (indirect->type != SETUP_INDIRECT) {
+				paddr = indirect->addr;
+				len = indirect->len;
+			}
 		}
 
 		memunmap(data);
@@ -660,22 +676,51 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
 static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
 						unsigned long size)
 {
+	struct setup_indirect *indirect;
 	struct setup_data *data;
 	u64 paddr, paddr_next;
 
 	paddr = boot_params.hdr.setup_data;
 	while (paddr) {
-		unsigned int len;
+		unsigned int len, size;
 
 		if (phys_addr == paddr)
 			return true;
 
 		data = early_memremap_decrypted(paddr, sizeof(*data));
+		if (!data) {
+			pr_warn("failed to early memremap setup_data entry\n");
+			return false;
+		}
+
+		size = sizeof(*data);
 
 		paddr_next = data->next;
 		len = data->len;
 
-		early_memunmap(data, sizeof(*data));
+		if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
+			early_memunmap(data, sizeof(*data));
+			return true;
+		}
+
+		if (data->type == SETUP_INDIRECT) {
+			size += len;
+			early_memunmap(data, sizeof(*data));
+			data = early_memremap_decrypted(paddr, size);
+			if (!data) {
+				pr_warn("failed to early memremap indirect setup_data\n");
+				return false;
+			}
+
+			indirect = (struct setup_indirect *)data->data;
+
+			if (indirect->type != SETUP_INDIRECT) {
+				paddr = indirect->addr;
+				len = indirect->len;
+			}
+		}
+
+		early_memunmap(data, size);
 
 		if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
 			return true;
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 2b2d018ea345..6169053c2854 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -177,25 +177,6 @@ void __init sme_map_bootdata(char *real_mode_data)
 	__sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true);
 }
 
-void __init sme_early_init(void)
-{
-	unsigned int i;
-
-	if (!sme_me_mask)
-		return;
-
-	early_pmd_flags = __sme_set(early_pmd_flags);
-
-	__supported_pte_mask = __sme_set(__supported_pte_mask);
-
-	/* Update the protection map with memory encryption mask */
-	for (i = 0; i < ARRAY_SIZE(protection_map); i++)
-		protection_map[i] = pgprot_encrypted(protection_map[i]);
-
-	if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
-		swiotlb_force = SWIOTLB_FORCE;
-}
-
 void __init sev_setup_arch(void)
 {
 	phys_addr_t total_mem = memblock_phys_mem_size();
@@ -256,7 +237,17 @@ static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
 	return pfn;
 }
 
-void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
+static bool amd_enc_tlb_flush_required(bool enc)
+{
+	return true;
+}
+
+static bool amd_enc_cache_flush_required(void)
+{
+	return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT);
+}
+
+static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
 {
 #ifdef CONFIG_PARAVIRT
 	unsigned long sz = npages << PAGE_SHIFT;
@@ -287,6 +278,19 @@ void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
 #endif
 }
 
+static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
+{
+}
+
+/* Return true unconditionally: return value doesn't matter for the SEV side */
+static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
+{
+	if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
+		enc_dec_hypercall(vaddr, npages, enc);
+
+	return true;
+}
+
 static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 {
 	pgprot_t old_prot, new_prot;
@@ -392,7 +396,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
 
 	ret = 0;
 
-	notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
+	early_set_mem_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
 out:
 	__flush_tlb_all();
 	return ret;
@@ -410,7 +414,31 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
 
 void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
 {
-	notify_range_enc_status_changed(vaddr, npages, enc);
+	enc_dec_hypercall(vaddr, npages, enc);
+}
+
+void __init sme_early_init(void)
+{
+	unsigned int i;
+
+	if (!sme_me_mask)
+		return;
+
+	early_pmd_flags = __sme_set(early_pmd_flags);
+
+	__supported_pte_mask = __sme_set(__supported_pte_mask);
+
+	/* Update the protection map with memory encryption mask */
+	for (i = 0; i < ARRAY_SIZE(protection_map); i++)
+		protection_map[i] = pgprot_encrypted(protection_map[i]);
+
+	if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+		swiotlb_force = SWIOTLB_FORCE;
+
+	x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare;
+	x86_platform.guest.enc_status_change_finish  = amd_enc_status_change_finish;
+	x86_platform.guest.enc_tlb_flush_required    = amd_enc_tlb_flush_required;
+	x86_platform.guest.enc_cache_flush_required  = amd_enc_cache_flush_required;
 }
 
 void __init mem_encrypt_free_decrypted_mem(void)
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 3f0abb403340..b43bc24d2bb6 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -44,6 +44,7 @@
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/cmdline.h>
+#include <asm/coco.h>
 
 #include "mm_internal.h"
 
@@ -565,8 +566,7 @@ void __init sme_enable(struct boot_params *bp)
 	} else {
 		/* SEV state cannot be controlled by a command line option */
 		sme_me_mask = me_mask;
-		physical_mask &= ~sme_me_mask;
-		return;
+		goto out;
 	}
 
 	/*
@@ -600,6 +600,10 @@ void __init sme_enable(struct boot_params *bp)
 		sme_me_mask = 0;
 	else
 		sme_me_mask = active_by_default ? me_mask : 0;
-
-	physical_mask &= ~sme_me_mask;
+out:
+	if (sme_me_mask) {
+		physical_mask &= ~sme_me_mask;
+		cc_set_vendor(CC_VENDOR_AMD);
+		cc_set_mask(sme_me_mask);
+	}
 }
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index b4072115c8ef..abf5ed76e4b7 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1989,6 +1989,7 @@ int set_memory_global(unsigned long addr, int numpages)
  */
 static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
 {
+	pgprot_t empty = __pgprot(0);
 	struct cpa_data cpa;
 	int ret;
 
@@ -1999,18 +2000,20 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
 	memset(&cpa, 0, sizeof(cpa));
 	cpa.vaddr = &addr;
 	cpa.numpages = numpages;
-	cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0);
-	cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC);
+	cpa.mask_set = enc ? pgprot_encrypted(empty) : pgprot_decrypted(empty);
+	cpa.mask_clr = enc ? pgprot_decrypted(empty) : pgprot_encrypted(empty);
 	cpa.pgd = init_mm.pgd;
 
 	/* Must avoid aliasing mappings in the highmem code */
 	kmap_flush_unused();
 	vm_unmap_aliases();
 
-	/*
-	 * Before changing the encryption attribute, we need to flush caches.
-	 */
-	cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT));
+	/* Flush the caches as needed before changing the encryption attribute. */
+	if (x86_platform.guest.enc_tlb_flush_required(enc))
+		cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required());
+
+	/* Notify hypervisor that we are about to set/clr encryption attribute. */
+	x86_platform.guest.enc_status_change_prepare(addr, numpages, enc);
 
 	ret = __change_page_attr_set_clr(&cpa, 1);
 
@@ -2023,11 +2026,11 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
 	 */
 	cpa_flush(&cpa, 0);
 
-	/*
-	 * Notify hypervisor that a given memory range is mapped encrypted
-	 * or decrypted.
-	 */
-	notify_range_enc_status_changed(addr, numpages, enc);
+	/* Notify hypervisor that we have successfully set/clr encryption attribute. */
+	if (!ret) {
+		if (!x86_platform.guest.enc_status_change_finish(addr, numpages, enc))
+			ret = -EIO;
+	}
 
 	return ret;
 }
@@ -2121,12 +2124,6 @@ int set_pages_array_wc(struct page **pages, int numpages)
 }
 EXPORT_SYMBOL(set_pages_array_wc);
 
-int set_pages_array_wt(struct page **pages, int numpages)
-{
-	return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT);
-}
-EXPORT_SYMBOL_GPL(set_pages_array_wt);
-
 int set_pages_wb(struct page *page, int numpages)
 {
 	unsigned long addr = (unsigned long)page_address(page);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 2b1e266ff95c..0ecb140864b2 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -394,7 +394,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
 	u8 *prog = *pprog;
 
 #ifdef CONFIG_RETPOLINE
-	if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
+	if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
 		EMIT_LFENCE();
 		EMIT2(0xFF, 0xE0 + reg);
 	} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 40d6a06e41c8..ead7e5b3a975 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -8,6 +8,7 @@ endmenu
 
 config UML_X86
 	def_bool y
+	select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32
 
 config 64BIT
 	bool "64-bit kernel" if "$(SUBARCH)" = "x86"
diff --git a/block/blk-core.c b/block/blk-core.c
index 1039515c99d6..779b4a1f66ac 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -50,6 +50,7 @@
 #include "blk-mq-sched.h"
 #include "blk-pm.h"
 #include "blk-throttle.h"
+#include "blk-rq-qos.h"
 
 struct dentry *blk_debugfs_root;
 
@@ -314,6 +315,9 @@ void blk_cleanup_queue(struct request_queue *q)
 	 */
 	blk_freeze_queue(q);
 
+	/* cleanup rq qos structures for queue without disk */
+	rq_qos_exit(q);
+
 	blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
 
 	blk_sync_queue(q);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d69ca91fbc8b..9a9185a0a2d1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2718,7 +2718,8 @@ static bool blk_mq_attempt_bio_merge(struct request_queue *q,
 
 static struct request *blk_mq_get_new_requests(struct request_queue *q,
 					       struct blk_plug *plug,
-					       struct bio *bio)
+					       struct bio *bio,
+					       unsigned int nsegs)
 {
 	struct blk_mq_alloc_data data = {
 		.q		= q,
@@ -2730,6 +2731,11 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	if (unlikely(bio_queue_enter(bio)))
 		return NULL;
 
+	if (blk_mq_attempt_bio_merge(q, bio, nsegs))
+		goto queue_exit;
+
+	rq_qos_throttle(q, bio);
+
 	if (plug) {
 		data.nr_tags = plug->nr_ios;
 		plug->nr_ios = 1;
@@ -2742,12 +2748,13 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	rq_qos_cleanup(q, bio);
 	if (bio->bi_opf & REQ_NOWAIT)
 		bio_wouldblock_error(bio);
+queue_exit:
 	blk_queue_exit(q);
 	return NULL;
 }
 
 static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
-		struct blk_plug *plug, struct bio *bio)
+		struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
 {
 	struct request *rq;
 
@@ -2757,12 +2764,19 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
 	if (!rq || rq->q != q)
 		return NULL;
 
-	if (blk_mq_get_hctx_type(bio->bi_opf) != rq->mq_hctx->type)
+	if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
+		*bio = NULL;
+		return NULL;
+	}
+
+	rq_qos_throttle(q, *bio);
+
+	if (blk_mq_get_hctx_type((*bio)->bi_opf) != rq->mq_hctx->type)
 		return NULL;
-	if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
+	if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
 		return NULL;
 
-	rq->cmd_flags = bio->bi_opf;
+	rq->cmd_flags = (*bio)->bi_opf;
 	plug->cached_rq = rq_list_next(rq);
 	INIT_LIST_HEAD(&rq->queuelist);
 	return rq;
@@ -2800,14 +2814,11 @@ void blk_mq_submit_bio(struct bio *bio)
 	if (!bio_integrity_prep(bio))
 		return;
 
-	if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
-		return;
-
-	rq_qos_throttle(q, bio);
-
-	rq = blk_mq_get_cached_request(q, plug, bio);
+	rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
 	if (!rq) {
-		rq = blk_mq_get_new_requests(q, plug, bio);
+		if (!bio)
+			return;
+		rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
 		if (unlikely(!rq))
 			return;
 	}
diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 692365dee2bd..05b66ce9d1c9 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -22,6 +22,9 @@ static struct key *builtin_trusted_keys;
 #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
 static struct key *secondary_trusted_keys;
 #endif
+#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
+static struct key *machine_trusted_keys;
+#endif
 #ifdef CONFIG_INTEGRITY_PLATFORM_KEYRING
 static struct key *platform_trusted_keys;
 #endif
@@ -86,11 +89,50 @@ static __init struct key_restriction *get_builtin_and_secondary_restriction(void
 	if (!restriction)
 		panic("Can't allocate secondary trusted keyring restriction\n");
 
-	restriction->check = restrict_link_by_builtin_and_secondary_trusted;
+	if (IS_ENABLED(CONFIG_INTEGRITY_MACHINE_KEYRING))
+		restriction->check = restrict_link_by_builtin_secondary_and_machine;
+	else
+		restriction->check = restrict_link_by_builtin_and_secondary_trusted;
 
 	return restriction;
 }
 #endif
+#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
+void __init set_machine_trusted_keys(struct key *keyring)
+{
+	machine_trusted_keys = keyring;
+
+	if (key_link(secondary_trusted_keys, machine_trusted_keys) < 0)
+		panic("Can't link (machine) trusted keyrings\n");
+}
+
+/**
+ * restrict_link_by_builtin_secondary_and_machine - Restrict keyring addition.
+ * @dest_keyring: Keyring being linked to.
+ * @type: The type of key being added.
+ * @payload: The payload of the new key.
+ * @restrict_key: A ring of keys that can be used to vouch for the new cert.
+ *
+ * Restrict the addition of keys into a keyring based on the key-to-be-added
+ * being vouched for by a key in either the built-in, the secondary, or
+ * the machine keyrings.
+ */
+int restrict_link_by_builtin_secondary_and_machine(
+	struct key *dest_keyring,
+	const struct key_type *type,
+	const union key_payload *payload,
+	struct key *restrict_key)
+{
+	if (machine_trusted_keys && type == &key_type_keyring &&
+	    dest_keyring == secondary_trusted_keys &&
+	    payload == &machine_trusted_keys->payload)
+		/* Allow the machine keyring to be added to the secondary */
+		return 0;
+
+	return restrict_link_by_builtin_and_secondary_trusted(dest_keyring, type,
+							      payload, restrict_key);
+}
+#endif
 
 /*
  * Create the trusted keyrings
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 442765219c37..d6d7e84bb7f8 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -231,6 +231,13 @@ config CRYPTO_DH
 	help
 	  Generic implementation of the Diffie-Hellman algorithm.
 
+config CRYPTO_DH_RFC7919_GROUPS
+	bool "Support for RFC 7919 FFDHE group parameters"
+	depends on CRYPTO_DH
+	select CRYPTO_RNG_DEFAULT
+	help
+	  Provide support for RFC 7919 FFDHE group parameters. If unsure, say N.
+
 config CRYPTO_ECC
 	tristate
 	select CRYPTO_RNG_DEFAULT
@@ -267,7 +274,7 @@ config CRYPTO_ECRDSA
 
 config CRYPTO_SM2
 	tristate "SM2 algorithm"
-	select CRYPTO_SM3
+	select CRYPTO_LIB_SM3
 	select CRYPTO_AKCIPHER
 	select CRYPTO_MANAGER
 	select MPILIB
@@ -425,6 +432,7 @@ config CRYPTO_LRW
 	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	select CRYPTO_GF128MUL
+	select CRYPTO_ECB
 	help
 	  LRW: Liskov Rivest Wagner, a tweakable, non malleable, non movable
 	  narrow block cipher mode for dm-crypt.  Use it with cipher
@@ -999,6 +1007,7 @@ config CRYPTO_SHA3
 config CRYPTO_SM3
 	tristate "SM3 digest algorithm"
 	select CRYPTO_HASH
+	select CRYPTO_LIB_SM3
 	help
 	  SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3).
 	  It is part of the Chinese Commercial Cryptography suite.
@@ -1007,6 +1016,19 @@ config CRYPTO_SM3
 	  http://www.oscca.gov.cn/UpFile/20101222141857786.pdf
 	  https://datatracker.ietf.org/doc/html/draft-shen-sm3-hash
 
+config CRYPTO_SM3_AVX_X86_64
+	tristate "SM3 digest algorithm (x86_64/AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_HASH
+	select CRYPTO_LIB_SM3
+	help
+	  SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3).
+	  It is part of the Chinese Commercial Cryptography suite. This is
+	  SM3 optimized implementation using Advanced Vector Extensions (AVX)
+	  when available.
+
+	  If unsure, say N.
+
 config CRYPTO_STREEBOG
 	tristate "Streebog Hash Function"
 	select CRYPTO_HASH
@@ -1847,6 +1869,7 @@ config CRYPTO_JITTERENTROPY
 
 config CRYPTO_KDF800108_CTR
 	tristate
+	select CRYPTO_HMAC
 	select CRYPTO_SHA256
 
 config CRYPTO_USER_API
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 76fdaa16bd4a..d1c99288af3e 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -6,6 +6,7 @@
  */
 
 #include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/fips.h>
@@ -21,6 +22,11 @@
 
 static LIST_HEAD(crypto_template_list);
 
+#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
+DEFINE_PER_CPU(bool, crypto_simd_disabled_for_test);
+EXPORT_PER_CPU_SYMBOL_GPL(crypto_simd_disabled_for_test);
+#endif
+
 static inline void crypto_check_module_sig(struct module *mod)
 {
 	if (fips_enabled && mod && !module_sig_ok(mod))
@@ -322,9 +328,17 @@ void crypto_alg_tested(const char *name, int err)
 found:
 	q->cra_flags |= CRYPTO_ALG_DEAD;
 	alg = test->adult;
-	if (err || list_empty(&alg->cra_list))
+
+	if (list_empty(&alg->cra_list))
 		goto complete;
 
+	if (err == -ECANCELED)
+		alg->cra_flags |= CRYPTO_ALG_FIPS_INTERNAL;
+	else if (err)
+		goto complete;
+	else
+		alg->cra_flags &= ~CRYPTO_ALG_FIPS_INTERNAL;
+
 	alg->cra_flags |= CRYPTO_ALG_TESTED;
 
 	/* Only satisfy larval waiters if we are the best. */
@@ -604,6 +618,7 @@ int crypto_register_instance(struct crypto_template *tmpl,
 {
 	struct crypto_larval *larval;
 	struct crypto_spawn *spawn;
+	u32 fips_internal = 0;
 	int err;
 
 	err = crypto_check_alg(&inst->alg);
@@ -626,11 +641,15 @@ int crypto_register_instance(struct crypto_template *tmpl,
 		spawn->inst = inst;
 		spawn->registered = true;
 
+		fips_internal |= spawn->alg->cra_flags;
+
 		crypto_mod_put(spawn->alg);
 
 		spawn = next;
 	}
 
+	inst->alg.cra_flags |= (fips_internal & CRYPTO_ALG_FIPS_INTERNAL);
+
 	larval = __crypto_register_alg(&inst->alg);
 	if (IS_ERR(larval))
 		goto unlock;
@@ -683,7 +702,8 @@ int crypto_grab_spawn(struct crypto_spawn *spawn, struct crypto_instance *inst,
 	if (IS_ERR(name))
 		return PTR_ERR(name);
 
-	alg = crypto_find_alg(name, spawn->frontend, type, mask);
+	alg = crypto_find_alg(name, spawn->frontend,
+			      type | CRYPTO_ALG_FIPS_INTERNAL, mask);
 	if (IS_ERR(alg))
 		return PTR_ERR(alg);
 
@@ -1002,7 +1022,13 @@ void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
 	}
 
 	while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
-		*(u64 *)dst = *(u64 *)src1 ^  *(u64 *)src2;
+		if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+			u64 l = get_unaligned((u64 *)src1) ^
+				get_unaligned((u64 *)src2);
+			put_unaligned(l, (u64 *)dst);
+		} else {
+			*(u64 *)dst = *(u64 *)src1 ^ *(u64 *)src2;
+		}
 		dst += 8;
 		src1 += 8;
 		src2 += 8;
@@ -1010,7 +1036,13 @@ void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
 	}
 
 	while (len >= 4 && !(relalign & 3)) {
-		*(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2;
+		if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+			u32 l = get_unaligned((u32 *)src1) ^
+				get_unaligned((u32 *)src2);
+			put_unaligned(l, (u32 *)dst);
+		} else {
+			*(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2;
+		}
 		dst += 4;
 		src1 += 4;
 		src2 += 4;
@@ -1018,7 +1050,13 @@ void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
 	}
 
 	while (len >= 2 && !(relalign & 1)) {
-		*(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2;
+		if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+			u16 l = get_unaligned((u16 *)src1) ^
+				get_unaligned((u16 *)src2);
+			put_unaligned(l, (u16 *)dst);
+		} else {
+			*(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2;
+		}
 		dst += 2;
 		src1 += 2;
 		src2 += 2;
diff --git a/crypto/api.c b/crypto/api.c
index 7ddfe946dd56..69508ae9345e 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -223,6 +223,8 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
 	else if (crypto_is_test_larval(larval) &&
 		 !(alg->cra_flags & CRYPTO_ALG_TESTED))
 		alg = ERR_PTR(-EAGAIN);
+	else if (alg->cra_flags & CRYPTO_ALG_FIPS_INTERNAL)
+		alg = ERR_PTR(-EAGAIN);
 	else if (!crypto_mod_get(alg))
 		alg = ERR_PTR(-EAGAIN);
 	crypto_mod_put(&larval->alg);
@@ -233,6 +235,7 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
 static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
 					    u32 mask)
 {
+	const u32 fips = CRYPTO_ALG_FIPS_INTERNAL;
 	struct crypto_alg *alg;
 	u32 test = 0;
 
@@ -240,8 +243,20 @@ static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
 		test |= CRYPTO_ALG_TESTED;
 
 	down_read(&crypto_alg_sem);
-	alg = __crypto_alg_lookup(name, type | test, mask | test);
-	if (!alg && test) {
+	alg = __crypto_alg_lookup(name, (type | test) & ~fips,
+				  (mask | test) & ~fips);
+	if (alg) {
+		if (((type | mask) ^ fips) & fips)
+			mask |= fips;
+		mask &= fips;
+
+		if (!crypto_is_larval(alg) &&
+		    ((type ^ alg->cra_flags) & mask)) {
+			/* Algorithm is disallowed in FIPS mode. */
+			crypto_mod_put(alg);
+			alg = ERR_PTR(-ENOENT);
+		}
+	} else if (test) {
 		alg = __crypto_alg_lookup(name, type, mask);
 		if (alg && !crypto_is_larval(alg)) {
 			/* Test failed */
diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index 1f1f004dc757..460bc5d0a828 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -22,18 +22,6 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
 	  appropriate hash algorithms (such as SHA-1) must be available.
 	  ENOPKG will be reported if the requisite algorithm is unavailable.
 
-config ASYMMETRIC_TPM_KEY_SUBTYPE
-	tristate "Asymmetric TPM backed private key subtype"
-	depends on TCG_TPM
-	depends on TRUSTED_KEYS
-	select CRYPTO_HMAC
-	select CRYPTO_SHA1
-	select CRYPTO_HASH_INFO
-	help
-	  This option provides support for TPM backed private key type handling.
-	  Operations such as sign, verify, encrypt, decrypt are performed by
-	  the TPM after the private key is loaded.
-
 config X509_CERTIFICATE_PARSER
 	tristate "X.509 certificate parser"
 	depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
@@ -54,15 +42,6 @@ config PKCS8_PRIVATE_KEY_PARSER
 	  private key data and provides the ability to instantiate a crypto key
 	  from that data.
 
-config TPM_KEY_PARSER
-	tristate "TPM private key parser"
-	depends on ASYMMETRIC_TPM_KEY_SUBTYPE
-	select ASN1
-	help
-	  This option provides support for parsing TPM format blobs for
-	  private key data and provides the ability to instantiate a crypto key
-	  from that data.
-
 config PKCS7_MESSAGE_PARSER
 	tristate "PKCS#7 message parser"
 	depends on X509_CERTIFICATE_PARSER
diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile
index 28b91adba2ae..c38424f55b08 100644
--- a/crypto/asymmetric_keys/Makefile
+++ b/crypto/asymmetric_keys/Makefile
@@ -11,7 +11,6 @@ asymmetric_keys-y := \
 	signature.o
 
 obj-$(CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE) += public_key.o
-obj-$(CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE) += asym_tpm.o
 
 #
 # X.509 Certificate handling
@@ -75,14 +74,3 @@ verify_signed_pefile-y := \
 
 $(obj)/mscode_parser.o: $(obj)/mscode.asn1.h $(obj)/mscode.asn1.h
 $(obj)/mscode.asn1.o: $(obj)/mscode.asn1.c $(obj)/mscode.asn1.h
-
-#
-# TPM private key parsing
-#
-obj-$(CONFIG_TPM_KEY_PARSER) += tpm_key_parser.o
-tpm_key_parser-y := \
-	tpm.asn1.o \
-	tpm_parser.o
-
-$(obj)/tpm_parser.o: $(obj)/tpm.asn1.h
-$(obj)/tpm.asn1.o: $(obj)/tpm.asn1.c $(obj)/tpm.asn1.h
diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
deleted file mode 100644
index 0959613560b9..000000000000
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ /dev/null
@@ -1,957 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define pr_fmt(fmt) "ASYM-TPM: "fmt
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/seq_file.h>
-#include <linux/scatterlist.h>
-#include <linux/tpm.h>
-#include <linux/tpm_command.h>
-#include <crypto/akcipher.h>
-#include <crypto/hash.h>
-#include <crypto/sha1.h>
-#include <asm/unaligned.h>
-#include <keys/asymmetric-subtype.h>
-#include <keys/trusted_tpm.h>
-#include <crypto/asym_tpm_subtype.h>
-#include <crypto/public_key.h>
-
-#define TPM_ORD_FLUSHSPECIFIC	186
-#define TPM_ORD_LOADKEY2	65
-#define TPM_ORD_UNBIND		30
-#define TPM_ORD_SIGN		60
-
-#define TPM_RT_KEY                      0x00000001
-
-/*
- * Load a TPM key from the blob provided by userspace
- */
-static int tpm_loadkey2(struct tpm_buf *tb,
-			uint32_t keyhandle, unsigned char *keyauth,
-			const unsigned char *keyblob, int keybloblen,
-			uint32_t *newhandle)
-{
-	unsigned char nonceodd[TPM_NONCE_SIZE];
-	unsigned char enonce[TPM_NONCE_SIZE];
-	unsigned char authdata[SHA1_DIGEST_SIZE];
-	uint32_t authhandle = 0;
-	unsigned char cont = 0;
-	uint32_t ordinal;
-	int ret;
-
-	ordinal = htonl(TPM_ORD_LOADKEY2);
-
-	/* session for loading the key */
-	ret = oiap(tb, &authhandle, enonce);
-	if (ret < 0) {
-		pr_info("oiap failed (%d)\n", ret);
-		return ret;
-	}
-
-	/* generate odd nonce */
-	ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
-	if (ret < 0) {
-		pr_info("tpm_get_random failed (%d)\n", ret);
-		return ret;
-	}
-
-	/* calculate authorization HMAC value */
-	ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
-			   nonceodd, cont, sizeof(uint32_t), &ordinal,
-			   keybloblen, keyblob, 0, 0);
-	if (ret < 0)
-		return ret;
-
-	/* build the request buffer */
-	tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_LOADKEY2);
-	tpm_buf_append_u32(tb, keyhandle);
-	tpm_buf_append(tb, keyblob, keybloblen);
-	tpm_buf_append_u32(tb, authhandle);
-	tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
-	tpm_buf_append_u8(tb, cont);
-	tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
-
-	ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
-	if (ret < 0) {
-		pr_info("authhmac failed (%d)\n", ret);
-		return ret;
-	}
-
-	ret = TSS_checkhmac1(tb->data, ordinal, nonceodd, keyauth,
-			     SHA1_DIGEST_SIZE, 0, 0);
-	if (ret < 0) {
-		pr_info("TSS_checkhmac1 failed (%d)\n", ret);
-		return ret;
-	}
-
-	*newhandle = LOAD32(tb->data, TPM_DATA_OFFSET);
-	return 0;
-}
-
-/*
- * Execute the FlushSpecific TPM command
- */
-static int tpm_flushspecific(struct tpm_buf *tb, uint32_t handle)
-{
-	tpm_buf_reset(tb, TPM_TAG_RQU_COMMAND, TPM_ORD_FLUSHSPECIFIC);
-	tpm_buf_append_u32(tb, handle);
-	tpm_buf_append_u32(tb, TPM_RT_KEY);
-
-	return trusted_tpm_send(tb->data, MAX_BUF_SIZE);
-}
-
-/*
- * Decrypt a blob provided by userspace using a specific key handle.
- * The handle is a well known handle or previously loaded by e.g. LoadKey2
- */
-static int tpm_unbind(struct tpm_buf *tb,
-			uint32_t keyhandle, unsigned char *keyauth,
-			const unsigned char *blob, uint32_t bloblen,
-			void *out, uint32_t outlen)
-{
-	unsigned char nonceodd[TPM_NONCE_SIZE];
-	unsigned char enonce[TPM_NONCE_SIZE];
-	unsigned char authdata[SHA1_DIGEST_SIZE];
-	uint32_t authhandle = 0;
-	unsigned char cont = 0;
-	uint32_t ordinal;
-	uint32_t datalen;
-	int ret;
-
-	ordinal = htonl(TPM_ORD_UNBIND);
-	datalen = htonl(bloblen);
-
-	/* session for loading the key */
-	ret = oiap(tb, &authhandle, enonce);
-	if (ret < 0) {
-		pr_info("oiap failed (%d)\n", ret);
-		return ret;
-	}
-
-	/* generate odd nonce */
-	ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
-	if (ret < 0) {
-		pr_info("tpm_get_random failed (%d)\n", ret);
-		return ret;
-	}
-
-	/* calculate authorization HMAC value */
-	ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
-			   nonceodd, cont, sizeof(uint32_t), &ordinal,
-			   sizeof(uint32_t), &datalen,
-			   bloblen, blob, 0, 0);
-	if (ret < 0)
-		return ret;
-
-	/* build the request buffer */
-	tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_UNBIND);
-	tpm_buf_append_u32(tb, keyhandle);
-	tpm_buf_append_u32(tb, bloblen);
-	tpm_buf_append(tb, blob, bloblen);
-	tpm_buf_append_u32(tb, authhandle);
-	tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
-	tpm_buf_append_u8(tb, cont);
-	tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
-
-	ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
-	if (ret < 0) {
-		pr_info("authhmac failed (%d)\n", ret);
-		return ret;
-	}
-
-	datalen = LOAD32(tb->data, TPM_DATA_OFFSET);
-
-	ret = TSS_checkhmac1(tb->data, ordinal, nonceodd,
-			     keyauth, SHA1_DIGEST_SIZE,
-			     sizeof(uint32_t), TPM_DATA_OFFSET,
-			     datalen, TPM_DATA_OFFSET + sizeof(uint32_t),
-			     0, 0);
-	if (ret < 0) {
-		pr_info("TSS_checkhmac1 failed (%d)\n", ret);
-		return ret;
-	}
-
-	memcpy(out, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t),
-	       min(outlen, datalen));
-
-	return datalen;
-}
-
-/*
- * Sign a blob provided by userspace (that has had the hash function applied)
- * using a specific key handle.  The handle is assumed to have been previously
- * loaded by e.g. LoadKey2.
- *
- * Note that the key signature scheme of the used key should be set to
- * TPM_SS_RSASSAPKCS1v15_DER.  This allows the hashed input to be of any size
- * up to key_length_in_bytes - 11 and not be limited to size 20 like the
- * TPM_SS_RSASSAPKCS1v15_SHA1 signature scheme.
- */
-static int tpm_sign(struct tpm_buf *tb,
-		    uint32_t keyhandle, unsigned char *keyauth,
-		    const unsigned char *blob, uint32_t bloblen,
-		    void *out, uint32_t outlen)
-{
-	unsigned char nonceodd[TPM_NONCE_SIZE];
-	unsigned char enonce[TPM_NONCE_SIZE];
-	unsigned char authdata[SHA1_DIGEST_SIZE];
-	uint32_t authhandle = 0;
-	unsigned char cont = 0;
-	uint32_t ordinal;
-	uint32_t datalen;
-	int ret;
-
-	ordinal = htonl(TPM_ORD_SIGN);
-	datalen = htonl(bloblen);
-
-	/* session for loading the key */
-	ret = oiap(tb, &authhandle, enonce);
-	if (ret < 0) {
-		pr_info("oiap failed (%d)\n", ret);
-		return ret;
-	}
-
-	/* generate odd nonce */
-	ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
-	if (ret < 0) {
-		pr_info("tpm_get_random failed (%d)\n", ret);
-		return ret;
-	}
-
-	/* calculate authorization HMAC value */
-	ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
-			   nonceodd, cont, sizeof(uint32_t), &ordinal,
-			   sizeof(uint32_t), &datalen,
-			   bloblen, blob, 0, 0);
-	if (ret < 0)
-		return ret;
-
-	/* build the request buffer */
-	tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_SIGN);
-	tpm_buf_append_u32(tb, keyhandle);
-	tpm_buf_append_u32(tb, bloblen);
-	tpm_buf_append(tb, blob, bloblen);
-	tpm_buf_append_u32(tb, authhandle);
-	tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
-	tpm_buf_append_u8(tb, cont);
-	tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
-
-	ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
-	if (ret < 0) {
-		pr_info("authhmac failed (%d)\n", ret);
-		return ret;
-	}
-
-	datalen = LOAD32(tb->data, TPM_DATA_OFFSET);
-
-	ret = TSS_checkhmac1(tb->data, ordinal, nonceodd,
-			     keyauth, SHA1_DIGEST_SIZE,
-			     sizeof(uint32_t), TPM_DATA_OFFSET,
-			     datalen, TPM_DATA_OFFSET + sizeof(uint32_t),
-			     0, 0);
-	if (ret < 0) {
-		pr_info("TSS_checkhmac1 failed (%d)\n", ret);
-		return ret;
-	}
-
-	memcpy(out, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t),
-	       min(datalen, outlen));
-
-	return datalen;
-}
-
-/* Room to fit two u32 zeros for algo id and parameters length. */
-#define SETKEY_PARAMS_SIZE (sizeof(u32) * 2)
-
-/*
- * Maximum buffer size for the BER/DER encoded public key.  The public key
- * is of the form SEQUENCE { INTEGER n, INTEGER e } where n is a maximum 2048
- * bit key and e is usually 65537
- * The encoding overhead is:
- * - max 4 bytes for SEQUENCE
- *   - max 4 bytes for INTEGER n type/length
- *     - 257 bytes of n
- *   - max 2 bytes for INTEGER e type/length
- *     - 3 bytes of e
- * - 4+4 of zeros for set_pub_key parameters (SETKEY_PARAMS_SIZE)
- */
-#define PUB_KEY_BUF_SIZE (4 + 4 + 257 + 2 + 3 + SETKEY_PARAMS_SIZE)
-
-/*
- * Provide a part of a description of the key for /proc/keys.
- */
-static void asym_tpm_describe(const struct key *asymmetric_key,
-			      struct seq_file *m)
-{
-	struct tpm_key *tk = asymmetric_key->payload.data[asym_crypto];
-
-	if (!tk)
-		return;
-
-	seq_printf(m, "TPM1.2/Blob");
-}
-
-static void asym_tpm_destroy(void *payload0, void *payload3)
-{
-	struct tpm_key *tk = payload0;
-
-	if (!tk)
-		return;
-
-	kfree(tk->blob);
-	tk->blob_len = 0;
-
-	kfree(tk);
-}
-
-/* How many bytes will it take to encode the length */
-static inline uint32_t definite_length(uint32_t len)
-{
-	if (len <= 127)
-		return 1;
-	if (len <= 255)
-		return 2;
-	return 3;
-}
-
-static inline uint8_t *encode_tag_length(uint8_t *buf, uint8_t tag,
-					 uint32_t len)
-{
-	*buf++ = tag;
-
-	if (len <= 127) {
-		buf[0] = len;
-		return buf + 1;
-	}
-
-	if (len <= 255) {
-		buf[0] = 0x81;
-		buf[1] = len;
-		return buf + 2;
-	}
-
-	buf[0] = 0x82;
-	put_unaligned_be16(len, buf + 1);
-	return buf + 3;
-}
-
-static uint32_t derive_pub_key(const void *pub_key, uint32_t len, uint8_t *buf)
-{
-	uint8_t *cur = buf;
-	uint32_t n_len = definite_length(len) + 1 + len + 1;
-	uint32_t e_len = definite_length(3) + 1 + 3;
-	uint8_t e[3] = { 0x01, 0x00, 0x01 };
-
-	/* SEQUENCE */
-	cur = encode_tag_length(cur, 0x30, n_len + e_len);
-	/* INTEGER n */
-	cur = encode_tag_length(cur, 0x02, len + 1);
-	cur[0] = 0x00;
-	memcpy(cur + 1, pub_key, len);
-	cur += len + 1;
-	cur = encode_tag_length(cur, 0x02, sizeof(e));
-	memcpy(cur, e, sizeof(e));
-	cur += sizeof(e);
-	/* Zero parameters to satisfy set_pub_key ABI. */
-	memzero_explicit(cur, SETKEY_PARAMS_SIZE);
-
-	return cur - buf;
-}
-
-/*
- * Determine the crypto algorithm name.
- */
-static int determine_akcipher(const char *encoding, const char *hash_algo,
-			      char alg_name[CRYPTO_MAX_ALG_NAME])
-{
-	if (strcmp(encoding, "pkcs1") == 0) {
-		if (!hash_algo) {
-			strcpy(alg_name, "pkcs1pad(rsa)");
-			return 0;
-		}
-
-		if (snprintf(alg_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(rsa,%s)",
-			     hash_algo) >= CRYPTO_MAX_ALG_NAME)
-			return -EINVAL;
-
-		return 0;
-	}
-
-	if (strcmp(encoding, "raw") == 0) {
-		strcpy(alg_name, "rsa");
-		return 0;
-	}
-
-	return -ENOPKG;
-}
-
-/*
- * Query information about a key.
- */
-static int tpm_key_query(const struct kernel_pkey_params *params,
-			 struct kernel_pkey_query *info)
-{
-	struct tpm_key *tk = params->key->payload.data[asym_crypto];
-	int ret;
-	char alg_name[CRYPTO_MAX_ALG_NAME];
-	struct crypto_akcipher *tfm;
-	uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
-	uint32_t der_pub_key_len;
-	int len;
-
-	/* TPM only works on private keys, public keys still done in software */
-	ret = determine_akcipher(params->encoding, params->hash_algo, alg_name);
-	if (ret < 0)
-		return ret;
-
-	tfm = crypto_alloc_akcipher(alg_name, 0, 0);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
-					 der_pub_key);
-
-	ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
-	if (ret < 0)
-		goto error_free_tfm;
-
-	len = crypto_akcipher_maxsize(tfm);
-
-	info->key_size = tk->key_len;
-	info->max_data_size = tk->key_len / 8;
-	info->max_sig_size = len;
-	info->max_enc_size = len;
-	info->max_dec_size = tk->key_len / 8;
-
-	info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT |
-			      KEYCTL_SUPPORTS_DECRYPT |
-			      KEYCTL_SUPPORTS_VERIFY |
-			      KEYCTL_SUPPORTS_SIGN;
-
-	ret = 0;
-error_free_tfm:
-	crypto_free_akcipher(tfm);
-	pr_devel("<==%s() = %d\n", __func__, ret);
-	return ret;
-}
-
-/*
- * Encryption operation is performed with the public key.  Hence it is done
- * in software
- */
-static int tpm_key_encrypt(struct tpm_key *tk,
-			   struct kernel_pkey_params *params,
-			   const void *in, void *out)
-{
-	char alg_name[CRYPTO_MAX_ALG_NAME];
-	struct crypto_akcipher *tfm;
-	struct akcipher_request *req;
-	struct crypto_wait cwait;
-	struct scatterlist in_sg, out_sg;
-	uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
-	uint32_t der_pub_key_len;
-	int ret;
-
-	pr_devel("==>%s()\n", __func__);
-
-	ret = determine_akcipher(params->encoding, params->hash_algo, alg_name);
-	if (ret < 0)
-		return ret;
-
-	tfm = crypto_alloc_akcipher(alg_name, 0, 0);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
-					 der_pub_key);
-
-	ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
-	if (ret < 0)
-		goto error_free_tfm;
-
-	ret = -ENOMEM;
-	req = akcipher_request_alloc(tfm, GFP_KERNEL);
-	if (!req)
-		goto error_free_tfm;
-
-	sg_init_one(&in_sg, in, params->in_len);
-	sg_init_one(&out_sg, out, params->out_len);
-	akcipher_request_set_crypt(req, &in_sg, &out_sg, params->in_len,
-				   params->out_len);
-	crypto_init_wait(&cwait);
-	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
-				      CRYPTO_TFM_REQ_MAY_SLEEP,
-				      crypto_req_done, &cwait);
-
-	ret = crypto_akcipher_encrypt(req);
-	ret = crypto_wait_req(ret, &cwait);
-
-	if (ret == 0)
-		ret = req->dst_len;
-
-	akcipher_request_free(req);
-error_free_tfm:
-	crypto_free_akcipher(tfm);
-	pr_devel("<==%s() = %d\n", __func__, ret);
-	return ret;
-}
-
-/*
- * Decryption operation is performed with the private key in the TPM.
- */
-static int tpm_key_decrypt(struct tpm_key *tk,
-			   struct kernel_pkey_params *params,
-			   const void *in, void *out)
-{
-	struct tpm_buf tb;
-	uint32_t keyhandle;
-	uint8_t srkauth[SHA1_DIGEST_SIZE];
-	uint8_t keyauth[SHA1_DIGEST_SIZE];
-	int r;
-
-	pr_devel("==>%s()\n", __func__);
-
-	if (params->hash_algo)
-		return -ENOPKG;
-
-	if (strcmp(params->encoding, "pkcs1"))
-		return -ENOPKG;
-
-	r = tpm_buf_init(&tb, 0, 0);
-	if (r)
-		return r;
-
-	/* TODO: Handle a non-all zero SRK authorization */
-	memset(srkauth, 0, sizeof(srkauth));
-
-	r = tpm_loadkey2(&tb, SRKHANDLE, srkauth,
-				tk->blob, tk->blob_len, &keyhandle);
-	if (r < 0) {
-		pr_devel("loadkey2 failed (%d)\n", r);
-		goto error;
-	}
-
-	/* TODO: Handle a non-all zero key authorization */
-	memset(keyauth, 0, sizeof(keyauth));
-
-	r = tpm_unbind(&tb, keyhandle, keyauth,
-		       in, params->in_len, out, params->out_len);
-	if (r < 0)
-		pr_devel("tpm_unbind failed (%d)\n", r);
-
-	if (tpm_flushspecific(&tb, keyhandle) < 0)
-		pr_devel("flushspecific failed (%d)\n", r);
-
-error:
-	tpm_buf_destroy(&tb);
-	pr_devel("<==%s() = %d\n", __func__, r);
-	return r;
-}
-
-/*
- * Hash algorithm OIDs plus ASN.1 DER wrappings [RFC4880 sec 5.2.2].
- */
-static const u8 digest_info_md5[] = {
-	0x30, 0x20, 0x30, 0x0c, 0x06, 0x08,
-	0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x02, 0x05, /* OID */
-	0x05, 0x00, 0x04, 0x10
-};
-
-static const u8 digest_info_sha1[] = {
-	0x30, 0x21, 0x30, 0x09, 0x06, 0x05,
-	0x2b, 0x0e, 0x03, 0x02, 0x1a,
-	0x05, 0x00, 0x04, 0x14
-};
-
-static const u8 digest_info_rmd160[] = {
-	0x30, 0x21, 0x30, 0x09, 0x06, 0x05,
-	0x2b, 0x24, 0x03, 0x02, 0x01,
-	0x05, 0x00, 0x04, 0x14
-};
-
-static const u8 digest_info_sha224[] = {
-	0x30, 0x2d, 0x30, 0x0d, 0x06, 0x09,
-	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04,
-	0x05, 0x00, 0x04, 0x1c
-};
-
-static const u8 digest_info_sha256[] = {
-	0x30, 0x31, 0x30, 0x0d, 0x06, 0x09,
-	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01,
-	0x05, 0x00, 0x04, 0x20
-};
-
-static const u8 digest_info_sha384[] = {
-	0x30, 0x41, 0x30, 0x0d, 0x06, 0x09,
-	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02,
-	0x05, 0x00, 0x04, 0x30
-};
-
-static const u8 digest_info_sha512[] = {
-	0x30, 0x51, 0x30, 0x0d, 0x06, 0x09,
-	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03,
-	0x05, 0x00, 0x04, 0x40
-};
-
-static const struct asn1_template {
-	const char	*name;
-	const u8	*data;
-	size_t		size;
-} asn1_templates[] = {
-#define _(X) { #X, digest_info_##X, sizeof(digest_info_##X) }
-	_(md5),
-	_(sha1),
-	_(rmd160),
-	_(sha256),
-	_(sha384),
-	_(sha512),
-	_(sha224),
-	{ NULL }
-#undef _
-};
-
-static const struct asn1_template *lookup_asn1(const char *name)
-{
-	const struct asn1_template *p;
-
-	for (p = asn1_templates; p->name; p++)
-		if (strcmp(name, p->name) == 0)
-			return p;
-	return NULL;
-}
-
-/*
- * Sign operation is performed with the private key in the TPM.
- */
-static int tpm_key_sign(struct tpm_key *tk,
-			struct kernel_pkey_params *params,
-			const void *in, void *out)
-{
-	struct tpm_buf tb;
-	uint32_t keyhandle;
-	uint8_t srkauth[SHA1_DIGEST_SIZE];
-	uint8_t keyauth[SHA1_DIGEST_SIZE];
-	void *asn1_wrapped = NULL;
-	uint32_t in_len = params->in_len;
-	int r;
-
-	pr_devel("==>%s()\n", __func__);
-
-	if (strcmp(params->encoding, "pkcs1"))
-		return -ENOPKG;
-
-	if (params->hash_algo) {
-		const struct asn1_template *asn1 =
-						lookup_asn1(params->hash_algo);
-
-		if (!asn1)
-			return -ENOPKG;
-
-		/* request enough space for the ASN.1 template + input hash */
-		asn1_wrapped = kzalloc(in_len + asn1->size, GFP_KERNEL);
-		if (!asn1_wrapped)
-			return -ENOMEM;
-
-		/* Copy ASN.1 template, then the input */
-		memcpy(asn1_wrapped, asn1->data, asn1->size);
-		memcpy(asn1_wrapped + asn1->size, in, in_len);
-
-		in = asn1_wrapped;
-		in_len += asn1->size;
-	}
-
-	if (in_len > tk->key_len / 8 - 11) {
-		r = -EOVERFLOW;
-		goto error_free_asn1_wrapped;
-	}
-
-	r = tpm_buf_init(&tb, 0, 0);
-	if (r)
-		goto error_free_asn1_wrapped;
-
-	/* TODO: Handle a non-all zero SRK authorization */
-	memset(srkauth, 0, sizeof(srkauth));
-
-	r = tpm_loadkey2(&tb, SRKHANDLE, srkauth,
-			 tk->blob, tk->blob_len, &keyhandle);
-	if (r < 0) {
-		pr_devel("loadkey2 failed (%d)\n", r);
-		goto error_free_tb;
-	}
-
-	/* TODO: Handle a non-all zero key authorization */
-	memset(keyauth, 0, sizeof(keyauth));
-
-	r = tpm_sign(&tb, keyhandle, keyauth, in, in_len, out, params->out_len);
-	if (r < 0)
-		pr_devel("tpm_sign failed (%d)\n", r);
-
-	if (tpm_flushspecific(&tb, keyhandle) < 0)
-		pr_devel("flushspecific failed (%d)\n", r);
-
-error_free_tb:
-	tpm_buf_destroy(&tb);
-error_free_asn1_wrapped:
-	kfree(asn1_wrapped);
-	pr_devel("<==%s() = %d\n", __func__, r);
-	return r;
-}
-
-/*
- * Do encryption, decryption and signing ops.
- */
-static int tpm_key_eds_op(struct kernel_pkey_params *params,
-			  const void *in, void *out)
-{
-	struct tpm_key *tk = params->key->payload.data[asym_crypto];
-	int ret = -EOPNOTSUPP;
-
-	/* Perform the encryption calculation. */
-	switch (params->op) {
-	case kernel_pkey_encrypt:
-		ret = tpm_key_encrypt(tk, params, in, out);
-		break;
-	case kernel_pkey_decrypt:
-		ret = tpm_key_decrypt(tk, params, in, out);
-		break;
-	case kernel_pkey_sign:
-		ret = tpm_key_sign(tk, params, in, out);
-		break;
-	default:
-		BUG();
-	}
-
-	return ret;
-}
-
-/*
- * Verify a signature using a public key.
- */
-static int tpm_key_verify_signature(const struct key *key,
-				    const struct public_key_signature *sig)
-{
-	const struct tpm_key *tk = key->payload.data[asym_crypto];
-	struct crypto_wait cwait;
-	struct crypto_akcipher *tfm;
-	struct akcipher_request *req;
-	struct scatterlist src_sg[2];
-	char alg_name[CRYPTO_MAX_ALG_NAME];
-	uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
-	uint32_t der_pub_key_len;
-	int ret;
-
-	pr_devel("==>%s()\n", __func__);
-
-	BUG_ON(!tk);
-	BUG_ON(!sig);
-	BUG_ON(!sig->s);
-
-	if (!sig->digest)
-		return -ENOPKG;
-
-	ret = determine_akcipher(sig->encoding, sig->hash_algo, alg_name);
-	if (ret < 0)
-		return ret;
-
-	tfm = crypto_alloc_akcipher(alg_name, 0, 0);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
-					 der_pub_key);
-
-	ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
-	if (ret < 0)
-		goto error_free_tfm;
-
-	ret = -ENOMEM;
-	req = akcipher_request_alloc(tfm, GFP_KERNEL);
-	if (!req)
-		goto error_free_tfm;
-
-	sg_init_table(src_sg, 2);
-	sg_set_buf(&src_sg[0], sig->s, sig->s_size);
-	sg_set_buf(&src_sg[1], sig->digest, sig->digest_size);
-	akcipher_request_set_crypt(req, src_sg, NULL, sig->s_size,
-				   sig->digest_size);
-	crypto_init_wait(&cwait);
-	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
-				      CRYPTO_TFM_REQ_MAY_SLEEP,
-				      crypto_req_done, &cwait);
-	ret = crypto_wait_req(crypto_akcipher_verify(req), &cwait);
-
-	akcipher_request_free(req);
-error_free_tfm:
-	crypto_free_akcipher(tfm);
-	pr_devel("<==%s() = %d\n", __func__, ret);
-	if (WARN_ON_ONCE(ret > 0))
-		ret = -EINVAL;
-	return ret;
-}
-
-/*
- * Parse enough information out of TPM_KEY structure:
- * TPM_STRUCT_VER -> 4 bytes
- * TPM_KEY_USAGE -> 2 bytes
- * TPM_KEY_FLAGS -> 4 bytes
- * TPM_AUTH_DATA_USAGE -> 1 byte
- * TPM_KEY_PARMS -> variable
- * UINT32 PCRInfoSize -> 4 bytes
- * BYTE* -> PCRInfoSize bytes
- * TPM_STORE_PUBKEY
- * UINT32 encDataSize;
- * BYTE* -> encDataSize;
- *
- * TPM_KEY_PARMS:
- * TPM_ALGORITHM_ID -> 4 bytes
- * TPM_ENC_SCHEME -> 2 bytes
- * TPM_SIG_SCHEME -> 2 bytes
- * UINT32 parmSize -> 4 bytes
- * BYTE* -> variable
- */
-static int extract_key_parameters(struct tpm_key *tk)
-{
-	const void *cur = tk->blob;
-	uint32_t len = tk->blob_len;
-	const void *pub_key;
-	uint32_t sz;
-	uint32_t key_len;
-
-	if (len < 11)
-		return -EBADMSG;
-
-	/* Ensure this is a legacy key */
-	if (get_unaligned_be16(cur + 4) != 0x0015)
-		return -EBADMSG;
-
-	/* Skip to TPM_KEY_PARMS */
-	cur += 11;
-	len -= 11;
-
-	if (len < 12)
-		return -EBADMSG;
-
-	/* Make sure this is an RSA key */
-	if (get_unaligned_be32(cur) != 0x00000001)
-		return -EBADMSG;
-
-	/* Make sure this is TPM_ES_RSAESPKCSv15 encoding scheme */
-	if (get_unaligned_be16(cur + 4) != 0x0002)
-		return -EBADMSG;
-
-	/* Make sure this is TPM_SS_RSASSAPKCS1v15_DER signature scheme */
-	if (get_unaligned_be16(cur + 6) != 0x0003)
-		return -EBADMSG;
-
-	sz = get_unaligned_be32(cur + 8);
-	if (len < sz + 12)
-		return -EBADMSG;
-
-	/* Move to TPM_RSA_KEY_PARMS */
-	len -= 12;
-	cur += 12;
-
-	/* Grab the RSA key length */
-	key_len = get_unaligned_be32(cur);
-
-	switch (key_len) {
-	case 512:
-	case 1024:
-	case 1536:
-	case 2048:
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	/* Move just past TPM_KEY_PARMS */
-	cur += sz;
-	len -= sz;
-
-	if (len < 4)
-		return -EBADMSG;
-
-	sz = get_unaligned_be32(cur);
-	if (len < 4 + sz)
-		return -EBADMSG;
-
-	/* Move to TPM_STORE_PUBKEY */
-	cur += 4 + sz;
-	len -= 4 + sz;
-
-	/* Grab the size of the public key, it should jive with the key size */
-	sz = get_unaligned_be32(cur);
-	if (sz > 256)
-		return -EINVAL;
-
-	pub_key = cur + 4;
-
-	tk->key_len = key_len;
-	tk->pub_key = pub_key;
-	tk->pub_key_len = sz;
-
-	return 0;
-}
-
-/* Given the blob, parse it and load it into the TPM */
-struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len)
-{
-	int r;
-	struct tpm_key *tk;
-
-	r = tpm_is_tpm2(NULL);
-	if (r < 0)
-		goto error;
-
-	/* We don't support TPM2 yet */
-	if (r > 0) {
-		r = -ENODEV;
-		goto error;
-	}
-
-	r = -ENOMEM;
-	tk = kzalloc(sizeof(struct tpm_key), GFP_KERNEL);
-	if (!tk)
-		goto error;
-
-	tk->blob = kmemdup(blob, blob_len, GFP_KERNEL);
-	if (!tk->blob)
-		goto error_memdup;
-
-	tk->blob_len = blob_len;
-
-	r = extract_key_parameters(tk);
-	if (r < 0)
-		goto error_extract;
-
-	return tk;
-
-error_extract:
-	kfree(tk->blob);
-	tk->blob_len = 0;
-error_memdup:
-	kfree(tk);
-error:
-	return ERR_PTR(r);
-}
-EXPORT_SYMBOL_GPL(tpm_key_create);
-
-/*
- * TPM-based asymmetric key subtype
- */
-struct asymmetric_key_subtype asym_tpm_subtype = {
-	.owner			= THIS_MODULE,
-	.name			= "asym_tpm",
-	.name_len		= sizeof("asym_tpm") - 1,
-	.describe		= asym_tpm_describe,
-	.destroy		= asym_tpm_destroy,
-	.query			= tpm_key_query,
-	.eds_op			= tpm_key_eds_op,
-	.verify_signature	= tpm_key_verify_signature,
-};
-EXPORT_SYMBOL_GPL(asym_tpm_subtype);
-
-MODULE_DESCRIPTION("TPM based asymmetric key subtype");
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL v2");
diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index 0b4d07aa8811..f6321c785714 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -174,12 +174,6 @@ static int pkcs7_find_key(struct pkcs7_message *pkcs7,
 		pr_devel("Sig %u: Found cert serial match X.509[%u]\n",
 			 sinfo->index, certix);
 
-		if (strcmp(x509->pub->pkey_algo, sinfo->sig->pkey_algo) != 0) {
-			pr_warn("Sig %u: X.509 algo and PKCS#7 sig algo don't match\n",
-				sinfo->index);
-			continue;
-		}
-
 		sinfo->signer = x509;
 		return 0;
 	}
@@ -226,9 +220,6 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
 			return 0;
 		}
 
-		if (x509->unsupported_key)
-			goto unsupported_crypto_in_x509;
-
 		pr_debug("- issuer %s\n", x509->issuer);
 		sig = x509->sig;
 		if (sig->auth_ids[0])
@@ -245,7 +236,7 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
 			 * authority.
 			 */
 			if (x509->unsupported_sig)
-				goto unsupported_crypto_in_x509;
+				goto unsupported_sig_in_x509;
 			x509->signer = x509;
 			pr_debug("- self-signed\n");
 			return 0;
@@ -309,7 +300,7 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
 		might_sleep();
 	}
 
-unsupported_crypto_in_x509:
+unsupported_sig_in_x509:
 	/* Just prune the certificate chain at this point if we lack some
 	 * crypto module to go further.  Note, however, we don't want to set
 	 * sinfo->unsupported_crypto as the signed info block may still be
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index 4fefb219bfdc..7c9e6be35c30 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -60,39 +60,83 @@ static void public_key_destroy(void *payload0, void *payload3)
 }
 
 /*
- * Determine the crypto algorithm name.
+ * Given a public_key, and an encoding and hash_algo to be used for signing
+ * and/or verification with that key, determine the name of the corresponding
+ * akcipher algorithm.  Also check that encoding and hash_algo are allowed.
  */
-static
-int software_key_determine_akcipher(const char *encoding,
-				    const char *hash_algo,
-				    const struct public_key *pkey,
-				    char alg_name[CRYPTO_MAX_ALG_NAME])
+static int
+software_key_determine_akcipher(const struct public_key *pkey,
+				const char *encoding, const char *hash_algo,
+				char alg_name[CRYPTO_MAX_ALG_NAME])
 {
 	int n;
 
-	if (strcmp(encoding, "pkcs1") == 0) {
-		/* The data wangled by the RSA algorithm is typically padded
-		 * and encoded in some manner, such as EMSA-PKCS1-1_5 [RFC3447
-		 * sec 8.2].
+	if (!encoding)
+		return -EINVAL;
+
+	if (strcmp(pkey->pkey_algo, "rsa") == 0) {
+		/*
+		 * RSA signatures usually use EMSA-PKCS1-1_5 [RFC3447 sec 8.2].
+		 */
+		if (strcmp(encoding, "pkcs1") == 0) {
+			if (!hash_algo)
+				n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
+					     "pkcs1pad(%s)",
+					     pkey->pkey_algo);
+			else
+				n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
+					     "pkcs1pad(%s,%s)",
+					     pkey->pkey_algo, hash_algo);
+			return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0;
+		}
+		if (strcmp(encoding, "raw") != 0)
+			return -EINVAL;
+		/*
+		 * Raw RSA cannot differentiate between different hash
+		 * algorithms.
+		 */
+		if (hash_algo)
+			return -EINVAL;
+	} else if (strncmp(pkey->pkey_algo, "ecdsa", 5) == 0) {
+		if (strcmp(encoding, "x962") != 0)
+			return -EINVAL;
+		/*
+		 * ECDSA signatures are taken over a raw hash, so they don't
+		 * differentiate between different hash algorithms.  That means
+		 * that the verifier should hard-code a specific hash algorithm.
+		 * Unfortunately, in practice ECDSA is used with multiple SHAs,
+		 * so we have to allow all of them and not just one.
 		 */
 		if (!hash_algo)
-			n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
-				     "pkcs1pad(%s)",
-				     pkey->pkey_algo);
-		else
-			n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
-				     "pkcs1pad(%s,%s)",
-				     pkey->pkey_algo, hash_algo);
-		return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0;
-	}
-
-	if (strcmp(encoding, "raw") == 0 ||
-	    strcmp(encoding, "x962") == 0) {
-		strcpy(alg_name, pkey->pkey_algo);
-		return 0;
+			return -EINVAL;
+		if (strcmp(hash_algo, "sha1") != 0 &&
+		    strcmp(hash_algo, "sha224") != 0 &&
+		    strcmp(hash_algo, "sha256") != 0 &&
+		    strcmp(hash_algo, "sha384") != 0 &&
+		    strcmp(hash_algo, "sha512") != 0)
+			return -EINVAL;
+	} else if (strcmp(pkey->pkey_algo, "sm2") == 0) {
+		if (strcmp(encoding, "raw") != 0)
+			return -EINVAL;
+		if (!hash_algo)
+			return -EINVAL;
+		if (strcmp(hash_algo, "sm3") != 0)
+			return -EINVAL;
+	} else if (strcmp(pkey->pkey_algo, "ecrdsa") == 0) {
+		if (strcmp(encoding, "raw") != 0)
+			return -EINVAL;
+		if (!hash_algo)
+			return -EINVAL;
+		if (strcmp(hash_algo, "streebog256") != 0 &&
+		    strcmp(hash_algo, "streebog512") != 0)
+			return -EINVAL;
+	} else {
+		/* Unknown public key algorithm */
+		return -ENOPKG;
 	}
-
-	return -ENOPKG;
+	if (strscpy(alg_name, pkey->pkey_algo, CRYPTO_MAX_ALG_NAME) < 0)
+		return -EINVAL;
+	return 0;
 }
 
 static u8 *pkey_pack_u32(u8 *dst, u32 val)
@@ -113,9 +157,8 @@ static int software_key_query(const struct kernel_pkey_params *params,
 	u8 *key, *ptr;
 	int ret, len;
 
-	ret = software_key_determine_akcipher(params->encoding,
-					      params->hash_algo,
-					      pkey, alg_name);
+	ret = software_key_determine_akcipher(pkey, params->encoding,
+					      params->hash_algo, alg_name);
 	if (ret < 0)
 		return ret;
 
@@ -179,9 +222,8 @@ static int software_key_eds_op(struct kernel_pkey_params *params,
 
 	pr_devel("==>%s()\n", __func__);
 
-	ret = software_key_determine_akcipher(params->encoding,
-					      params->hash_algo,
-					      pkey, alg_name);
+	ret = software_key_determine_akcipher(pkey, params->encoding,
+					      params->hash_algo, alg_name);
 	if (ret < 0)
 		return ret;
 
@@ -325,9 +367,23 @@ int public_key_verify_signature(const struct public_key *pkey,
 	BUG_ON(!sig);
 	BUG_ON(!sig->s);
 
-	ret = software_key_determine_akcipher(sig->encoding,
-					      sig->hash_algo,
-					      pkey, alg_name);
+	/*
+	 * If the signature specifies a public key algorithm, it *must* match
+	 * the key's actual public key algorithm.
+	 *
+	 * Small exception: ECDSA signatures don't specify the curve, but ECDSA
+	 * keys do.  So the strings can mismatch slightly in that case:
+	 * "ecdsa-nist-*" for the key, but "ecdsa" for the signature.
+	 */
+	if (sig->pkey_algo) {
+		if (strcmp(pkey->pkey_algo, sig->pkey_algo) != 0 &&
+		    (strncmp(pkey->pkey_algo, "ecdsa-", 6) != 0 ||
+		     strcmp(sig->pkey_algo, "ecdsa") != 0))
+			return -EKEYREJECTED;
+	}
+
+	ret = software_key_determine_akcipher(pkey, sig->encoding,
+					      sig->hash_algo, alg_name);
 	if (ret < 0)
 		return ret;
 
diff --git a/crypto/asymmetric_keys/signature.c b/crypto/asymmetric_keys/signature.c
index 4aff3eebec17..2deff81f8af5 100644
--- a/crypto/asymmetric_keys/signature.c
+++ b/crypto/asymmetric_keys/signature.c
@@ -35,7 +35,7 @@ void public_key_signature_free(struct public_key_signature *sig)
 EXPORT_SYMBOL_GPL(public_key_signature_free);
 
 /**
- * query_asymmetric_key - Get information about an aymmetric key.
+ * query_asymmetric_key - Get information about an asymmetric key.
  * @params: Various parameters.
  * @info: Where to put the information.
  */
diff --git a/crypto/asymmetric_keys/tpm.asn1 b/crypto/asymmetric_keys/tpm.asn1
deleted file mode 100644
index d7f194232f30..000000000000
--- a/crypto/asymmetric_keys/tpm.asn1
+++ /dev/null
@@ -1,5 +0,0 @@
---
--- Unencryted TPM Blob.  For details of the format, see:
--- http://david.woodhou.se/draft-woodhouse-cert-best-practice.html#I-D.mavrogiannopoulos-tpmuri
---
-PrivateKeyInfo ::= OCTET STRING ({ tpm_note_key })
diff --git a/crypto/asymmetric_keys/tpm_parser.c b/crypto/asymmetric_keys/tpm_parser.c
deleted file mode 100644
index 96405d8dcd98..000000000000
--- a/crypto/asymmetric_keys/tpm_parser.c
+++ /dev/null
@@ -1,102 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define pr_fmt(fmt) "TPM-PARSER: "fmt
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <keys/asymmetric-subtype.h>
-#include <keys/asymmetric-parser.h>
-#include <crypto/asym_tpm_subtype.h>
-#include "tpm.asn1.h"
-
-struct tpm_parse_context {
-	const void	*blob;
-	u32		blob_len;
-};
-
-/*
- * Note the key data of the ASN.1 blob.
- */
-int tpm_note_key(void *context, size_t hdrlen,
-		   unsigned char tag,
-		   const void *value, size_t vlen)
-{
-	struct tpm_parse_context *ctx = context;
-
-	ctx->blob = value;
-	ctx->blob_len = vlen;
-
-	return 0;
-}
-
-/*
- * Parse a TPM-encrypted private key blob.
- */
-static struct tpm_key *tpm_parse(const void *data, size_t datalen)
-{
-	struct tpm_parse_context ctx;
-	long ret;
-
-	memset(&ctx, 0, sizeof(ctx));
-
-	/* Attempt to decode the private key */
-	ret = asn1_ber_decoder(&tpm_decoder, &ctx, data, datalen);
-	if (ret < 0)
-		goto error;
-
-	return tpm_key_create(ctx.blob, ctx.blob_len);
-
-error:
-	return ERR_PTR(ret);
-}
-/*
- * Attempt to parse a data blob for a key as a TPM private key blob.
- */
-static int tpm_key_preparse(struct key_preparsed_payload *prep)
-{
-	struct tpm_key *tk;
-
-	/*
-	 * TPM 1.2 keys are max 2048 bits long, so assume the blob is no
-	 * more than 4x that
-	 */
-	if (prep->datalen > 256 * 4)
-		return -EMSGSIZE;
-
-	tk = tpm_parse(prep->data, prep->datalen);
-
-	if (IS_ERR(tk))
-		return PTR_ERR(tk);
-
-	/* We're pinning the module by being linked against it */
-	__module_get(asym_tpm_subtype.owner);
-	prep->payload.data[asym_subtype] = &asym_tpm_subtype;
-	prep->payload.data[asym_key_ids] = NULL;
-	prep->payload.data[asym_crypto] = tk;
-	prep->payload.data[asym_auth] = NULL;
-	prep->quotalen = 100;
-	return 0;
-}
-
-static struct asymmetric_key_parser tpm_key_parser = {
-	.owner	= THIS_MODULE,
-	.name	= "tpm_parser",
-	.parse	= tpm_key_preparse,
-};
-
-static int __init tpm_key_init(void)
-{
-	return register_asymmetric_key_parser(&tpm_key_parser);
-}
-
-static void __exit tpm_key_exit(void)
-{
-	unregister_asymmetric_key_parser(&tpm_key_parser);
-}
-
-module_init(tpm_key_init);
-module_exit(tpm_key_exit);
-
-MODULE_DESCRIPTION("TPM private key-blob parser");
-MODULE_LICENSE("GPL v2");
diff --git a/crypto/asymmetric_keys/x509.asn1 b/crypto/asymmetric_keys/x509.asn1
index 5c9f4e4a5231..92d59c32f96a 100644
--- a/crypto/asymmetric_keys/x509.asn1
+++ b/crypto/asymmetric_keys/x509.asn1
@@ -7,7 +7,7 @@ Certificate ::= SEQUENCE {
 TBSCertificate ::= SEQUENCE {
 	version           [ 0 ]	Version DEFAULT,
 	serialNumber		CertificateSerialNumber ({ x509_note_serial }),
-	signature		AlgorithmIdentifier ({ x509_note_pkey_algo }),
+	signature		AlgorithmIdentifier ({ x509_note_sig_algo }),
 	issuer			Name ({ x509_note_issuer }),
 	validity		Validity,
 	subject			Name ({ x509_note_subject }),
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index 083405eb80c3..2899ed80bb18 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -19,15 +19,13 @@
 struct x509_parse_context {
 	struct x509_certificate	*cert;		/* Certificate being constructed */
 	unsigned long	data;			/* Start of data */
-	const void	*cert_start;		/* Start of cert content */
 	const void	*key;			/* Key data */
 	size_t		key_size;		/* Size of key data */
 	const void	*params;		/* Key parameters */
 	size_t		params_size;		/* Size of key parameters */
-	enum OID	key_algo;		/* Public key algorithm */
+	enum OID	key_algo;		/* Algorithm used by the cert's key */
 	enum OID	last_oid;		/* Last OID encountered */
-	enum OID	algo_oid;		/* Algorithm OID */
-	unsigned char	nr_mpi;			/* Number of MPIs stored */
+	enum OID	sig_algo;		/* Algorithm used to sign the cert */
 	u8		o_size;			/* Size of organizationName (O) */
 	u8		cn_size;		/* Size of commonName (CN) */
 	u8		email_size;		/* Size of emailAddress */
@@ -187,11 +185,10 @@ int x509_note_tbs_certificate(void *context, size_t hdrlen,
 }
 
 /*
- * Record the public key algorithm
+ * Record the algorithm that was used to sign this certificate.
  */
-int x509_note_pkey_algo(void *context, size_t hdrlen,
-			unsigned char tag,
-			const void *value, size_t vlen)
+int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag,
+		       const void *value, size_t vlen)
 {
 	struct x509_parse_context *ctx = context;
 
@@ -263,22 +260,22 @@ int x509_note_pkey_algo(void *context, size_t hdrlen,
 rsa_pkcs1:
 	ctx->cert->sig->pkey_algo = "rsa";
 	ctx->cert->sig->encoding = "pkcs1";
-	ctx->algo_oid = ctx->last_oid;
+	ctx->sig_algo = ctx->last_oid;
 	return 0;
 ecrdsa:
 	ctx->cert->sig->pkey_algo = "ecrdsa";
 	ctx->cert->sig->encoding = "raw";
-	ctx->algo_oid = ctx->last_oid;
+	ctx->sig_algo = ctx->last_oid;
 	return 0;
 sm2:
 	ctx->cert->sig->pkey_algo = "sm2";
 	ctx->cert->sig->encoding = "raw";
-	ctx->algo_oid = ctx->last_oid;
+	ctx->sig_algo = ctx->last_oid;
 	return 0;
 ecdsa:
 	ctx->cert->sig->pkey_algo = "ecdsa";
 	ctx->cert->sig->encoding = "x962";
-	ctx->algo_oid = ctx->last_oid;
+	ctx->sig_algo = ctx->last_oid;
 	return 0;
 }
 
@@ -291,11 +288,16 @@ int x509_note_signature(void *context, size_t hdrlen,
 {
 	struct x509_parse_context *ctx = context;
 
-	pr_debug("Signature type: %u size %zu\n", ctx->last_oid, vlen);
+	pr_debug("Signature: alg=%u, size=%zu\n", ctx->last_oid, vlen);
 
-	if (ctx->last_oid != ctx->algo_oid) {
-		pr_warn("Got cert with pkey (%u) and sig (%u) algorithm OIDs\n",
-			ctx->algo_oid, ctx->last_oid);
+	/*
+	 * In X.509 certificates, the signature's algorithm is stored in two
+	 * places: inside the TBSCertificate (the data that is signed), and
+	 * alongside the signature.  These *must* match.
+	 */
+	if (ctx->last_oid != ctx->sig_algo) {
+		pr_warn("signatureAlgorithm (%u) differs from tbsCertificate.signature (%u)\n",
+			ctx->last_oid, ctx->sig_algo);
 		return -EINVAL;
 	}
 
diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h
index c233f136fb35..97a886cbe01c 100644
--- a/crypto/asymmetric_keys/x509_parser.h
+++ b/crypto/asymmetric_keys/x509_parser.h
@@ -22,7 +22,7 @@ struct x509_certificate {
 	time64_t	valid_to;
 	const void	*tbs;			/* Signed data */
 	unsigned	tbs_size;		/* Size of signed data */
-	unsigned	raw_sig_size;		/* Size of sigature */
+	unsigned	raw_sig_size;		/* Size of signature */
 	const void	*raw_sig;		/* Signature data */
 	const void	*raw_serial;		/* Raw serial number in ASN.1 */
 	unsigned	raw_serial_size;
@@ -36,7 +36,6 @@ struct x509_certificate {
 	bool		seen;			/* Infinite recursion prevention */
 	bool		verified;
 	bool		self_signed;		/* T if self-signed (check unsupported_sig too) */
-	bool		unsupported_key;	/* T if key uses unsupported crypto */
 	bool		unsupported_sig;	/* T if signature uses unsupported crypto */
 	bool		blacklisted;
 };
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index fe14cae115b5..91a4ad50dea2 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -33,18 +33,6 @@ int x509_get_sig_params(struct x509_certificate *cert)
 	sig->data = cert->tbs;
 	sig->data_size = cert->tbs_size;
 
-	if (!cert->pub->pkey_algo)
-		cert->unsupported_key = true;
-
-	if (!sig->pkey_algo)
-		cert->unsupported_sig = true;
-
-	/* We check the hash if we can - even if we can't then verify it */
-	if (!sig->hash_algo) {
-		cert->unsupported_sig = true;
-		return 0;
-	}
-
 	sig->s = kmemdup(cert->raw_sig, cert->raw_sig_size, GFP_KERNEL);
 	if (!sig->s)
 		return -ENOMEM;
@@ -128,12 +116,6 @@ int x509_check_for_self_signed(struct x509_certificate *cert)
 			goto out;
 	}
 
-	ret = -EKEYREJECTED;
-	if (strcmp(cert->pub->pkey_algo, cert->sig->pkey_algo) != 0 &&
-	    (strncmp(cert->pub->pkey_algo, "ecdsa-", 6) != 0 ||
-	     strcmp(cert->sig->pkey_algo, "ecdsa") != 0))
-		goto out;
-
 	ret = public_key_verify_signature(cert->pub, cert->sig);
 	if (ret < 0) {
 		if (ret == -ENOPKG) {
@@ -173,12 +155,6 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
 
 	pr_devel("Cert Issuer: %s\n", cert->issuer);
 	pr_devel("Cert Subject: %s\n", cert->subject);
-
-	if (cert->unsupported_key) {
-		ret = -ENOPKG;
-		goto error_free_cert;
-	}
-
 	pr_devel("Cert Key Algo: %s\n", cert->pub->pkey_algo);
 	pr_devel("Cert Valid period: %lld-%lld\n", cert->valid_from, cert->valid_to);
 
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index d8a91521144e..1a3855284091 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -170,8 +170,8 @@ dma_xor_aligned_offsets(struct dma_device *device, unsigned int offset,
  *
  * xor_blocks always uses the dest as a source so the
  * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
- * the calculation.  The assumption with dma eninges is that they only
- * use the destination buffer as a source when it is explicity specified
+ * the calculation.  The assumption with dma engines is that they only
+ * use the destination buffer as a source when it is explicitly specified
  * in the source list.
  *
  * src_list note: if the dest is also a source it must be at index zero.
@@ -261,8 +261,8 @@ EXPORT_SYMBOL_GPL(async_xor_offs);
  *
  * xor_blocks always uses the dest as a source so the
  * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
- * the calculation.  The assumption with dma eninges is that they only
- * use the destination buffer as a source when it is explicity specified
+ * the calculation.  The assumption with dma engines is that they only
+ * use the destination buffer as a source when it is explicitly specified
  * in the source list.
  *
  * src_list note: if the dest is also a source it must be at index zero.
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
index 66db82e5a3b1..c9d218e53bcb 100644
--- a/crypto/async_tx/raid6test.c
+++ b/crypto/async_tx/raid6test.c
@@ -217,7 +217,7 @@ static int raid6_test(void)
 		err += test(12, &tests);
 	}
 
-	/* the 24 disk case is special for ioatdma as it is the boudary point
+	/* the 24 disk case is special for ioatdma as it is the boundary point
 	 * at which it needs to switch from 8-source ops to 16-source
 	 * ops for continuation (assumes DMA_HAS_PQ_CONTINUE is not set)
 	 */
@@ -241,7 +241,7 @@ static void raid6_test_exit(void)
 }
 
 /* when compiled-in wait for drivers to load first (assumes dma drivers
- * are also compliled-in)
+ * are also compiled-in)
  */
 late_initcall(raid6_test);
 module_exit(raid6_test_exit);
diff --git a/crypto/authenc.c b/crypto/authenc.c
index 670bf1a01d00..17f674a7cdff 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -253,7 +253,7 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req,
 		dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen);
 
 	skcipher_request_set_tfm(skreq, ctx->enc);
-	skcipher_request_set_callback(skreq, aead_request_flags(req),
+	skcipher_request_set_callback(skreq, flags,
 				      req->base.complete, req->base.data);
 	skcipher_request_set_crypt(skreq, src, dst,
 				   req->cryptlen - authsize, req->iv);
diff --git a/crypto/cfb.c b/crypto/cfb.c
index 0d664dfb47bc..5c36b7b65e2a 100644
--- a/crypto/cfb.c
+++ b/crypto/cfb.c
@@ -1,4 +1,4 @@
-//SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0
 /*
  * CFB: Cipher FeedBack mode
  *
diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index fb07da9920ee..6056a990c9f2 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -53,6 +53,7 @@ static void crypto_finalize_request(struct crypto_engine *engine,
 				dev_err(engine->dev, "failed to unprepare request\n");
 		}
 	}
+	lockdep_assert_in_softirq();
 	req->complete(req, err);
 
 	kthread_queue_work(engine->kworker, &engine->pump_requests);
diff --git a/crypto/dh.c b/crypto/dh.c
index 27e62a2a8027..4406aeb1ff61 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -10,11 +10,11 @@
 #include <crypto/internal/kpp.h>
 #include <crypto/kpp.h>
 #include <crypto/dh.h>
+#include <crypto/rng.h>
 #include <linux/mpi.h>
 
 struct dh_ctx {
 	MPI p;	/* Value is guaranteed to be set. */
-	MPI q;	/* Value is optional. */
 	MPI g;	/* Value is guaranteed to be set. */
 	MPI xa;	/* Value is guaranteed to be set. */
 };
@@ -22,7 +22,6 @@ struct dh_ctx {
 static void dh_clear_ctx(struct dh_ctx *ctx)
 {
 	mpi_free(ctx->p);
-	mpi_free(ctx->q);
 	mpi_free(ctx->g);
 	mpi_free(ctx->xa);
 	memset(ctx, 0, sizeof(*ctx));
@@ -62,12 +61,6 @@ static int dh_set_params(struct dh_ctx *ctx, struct dh *params)
 	if (!ctx->p)
 		return -EINVAL;
 
-	if (params->q && params->q_size) {
-		ctx->q = mpi_read_raw_data(params->q, params->q_size);
-		if (!ctx->q)
-			return -EINVAL;
-	}
-
 	ctx->g = mpi_read_raw_data(params->g, params->g_size);
 	if (!ctx->g)
 		return -EINVAL;
@@ -104,11 +97,12 @@ err_clear_ctx:
 /*
  * SP800-56A public key verification:
  *
- * * If Q is provided as part of the domain paramenters, a full validation
- *   according to SP800-56A section 5.6.2.3.1 is performed.
+ * * For the safe-prime groups in FIPS mode, Q can be computed
+ *   trivially from P and a full validation according to SP800-56A
+ *   section 5.6.2.3.1 is performed.
  *
- * * If Q is not provided, a partial validation according to SP800-56A section
- *   5.6.2.3.2 is performed.
+ * * For all other sets of group parameters, only a partial validation
+ *   according to SP800-56A section 5.6.2.3.2 is performed.
  */
 static int dh_is_pubkey_valid(struct dh_ctx *ctx, MPI y)
 {
@@ -119,21 +113,40 @@ static int dh_is_pubkey_valid(struct dh_ctx *ctx, MPI y)
 	 * Step 1: Verify that 2 <= y <= p - 2.
 	 *
 	 * The upper limit check is actually y < p instead of y < p - 1
-	 * as the mpi_sub_ui function is yet missing.
+	 * in order to save one mpi_sub_ui() invocation here. Note that
+	 * p - 1 is the non-trivial element of the subgroup of order 2 and
+	 * thus, the check on y^q below would fail if y == p - 1.
 	 */
 	if (mpi_cmp_ui(y, 1) < 1 || mpi_cmp(y, ctx->p) >= 0)
 		return -EINVAL;
 
-	/* Step 2: Verify that 1 = y^q mod p */
-	if (ctx->q) {
-		MPI val = mpi_alloc(0);
+	/*
+	 * Step 2: Verify that 1 = y^q mod p
+	 *
+	 * For the safe-prime groups q = (p - 1)/2.
+	 */
+	if (fips_enabled) {
+		MPI val, q;
 		int ret;
 
+		val = mpi_alloc(0);
 		if (!val)
 			return -ENOMEM;
 
-		ret = mpi_powm(val, y, ctx->q, ctx->p);
+		q = mpi_alloc(mpi_get_nlimbs(ctx->p));
+		if (!q) {
+			mpi_free(val);
+			return -ENOMEM;
+		}
+
+		/*
+		 * ->p is odd, so no need to explicitly subtract one
+		 * from it before shifting to the right.
+		 */
+		mpi_rshift(q, ctx->p, 1);
 
+		ret = mpi_powm(val, y, q, ctx->p);
+		mpi_free(q);
 		if (ret) {
 			mpi_free(val);
 			return ret;
@@ -263,13 +276,645 @@ static struct kpp_alg dh = {
 	},
 };
 
+
+struct dh_safe_prime {
+	unsigned int max_strength;
+	unsigned int p_size;
+	const char *p;
+};
+
+static const char safe_prime_g[]  = { 2 };
+
+struct dh_safe_prime_instance_ctx {
+	struct crypto_kpp_spawn dh_spawn;
+	const struct dh_safe_prime *safe_prime;
+};
+
+struct dh_safe_prime_tfm_ctx {
+	struct crypto_kpp *dh_tfm;
+};
+
+static void dh_safe_prime_free_instance(struct kpp_instance *inst)
+{
+	struct dh_safe_prime_instance_ctx *ctx = kpp_instance_ctx(inst);
+
+	crypto_drop_kpp(&ctx->dh_spawn);
+	kfree(inst);
+}
+
+static inline struct dh_safe_prime_instance_ctx *dh_safe_prime_instance_ctx(
+	struct crypto_kpp *tfm)
+{
+	return kpp_instance_ctx(kpp_alg_instance(tfm));
+}
+
+static int dh_safe_prime_init_tfm(struct crypto_kpp *tfm)
+{
+	struct dh_safe_prime_instance_ctx *inst_ctx =
+		dh_safe_prime_instance_ctx(tfm);
+	struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm);
+
+	tfm_ctx->dh_tfm = crypto_spawn_kpp(&inst_ctx->dh_spawn);
+	if (IS_ERR(tfm_ctx->dh_tfm))
+		return PTR_ERR(tfm_ctx->dh_tfm);
+
+	return 0;
+}
+
+static void dh_safe_prime_exit_tfm(struct crypto_kpp *tfm)
+{
+	struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm);
+
+	crypto_free_kpp(tfm_ctx->dh_tfm);
+}
+
+static u64 __add_u64_to_be(__be64 *dst, unsigned int n, u64 val)
+{
+	unsigned int i;
+
+	for (i = n; val && i > 0; --i) {
+		u64 tmp = be64_to_cpu(dst[i - 1]);
+
+		tmp += val;
+		val = tmp >= val ? 0 : 1;
+		dst[i - 1] = cpu_to_be64(tmp);
+	}
+
+	return val;
+}
+
+static void *dh_safe_prime_gen_privkey(const struct dh_safe_prime *safe_prime,
+				       unsigned int *key_size)
+{
+	unsigned int n, oversampling_size;
+	__be64 *key;
+	int err;
+	u64 h, o;
+
+	/*
+	 * Generate a private key following NIST SP800-56Ar3,
+	 * sec. 5.6.1.1.1 and 5.6.1.1.3 resp..
+	 *
+	 * 5.6.1.1.1: choose key length N such that
+	 * 2 * ->max_strength <= N <= log2(q) + 1 = ->p_size * 8 - 1
+	 * with q = (p - 1) / 2 for the safe-prime groups.
+	 * Choose the lower bound's next power of two for N in order to
+	 * avoid excessively large private keys while still
+	 * maintaining some extra reserve beyond the bare minimum in
+	 * most cases. Note that for each entry in safe_prime_groups[],
+	 * the following holds for such N:
+	 * - N >= 256, in particular it is a multiple of 2^6 = 64
+	 *   bits and
+	 * - N < log2(q) + 1, i.e. N respects the upper bound.
+	 */
+	n = roundup_pow_of_two(2 * safe_prime->max_strength);
+	WARN_ON_ONCE(n & ((1u << 6) - 1));
+	n >>= 6; /* Convert N into units of u64. */
+
+	/*
+	 * Reserve one extra u64 to hold the extra random bits
+	 * required as per 5.6.1.1.3.
+	 */
+	oversampling_size = (n + 1) * sizeof(__be64);
+	key = kmalloc(oversampling_size, GFP_KERNEL);
+	if (!key)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * 5.6.1.1.3, step 3 (and implicitly step 4): obtain N + 64
+	 * random bits and interpret them as a big endian integer.
+	 */
+	err = -EFAULT;
+	if (crypto_get_default_rng())
+		goto out_err;
+
+	err = crypto_rng_get_bytes(crypto_default_rng, (u8 *)key,
+				   oversampling_size);
+	crypto_put_default_rng();
+	if (err)
+		goto out_err;
+
+	/*
+	 * 5.6.1.1.3, step 5 is implicit: 2^N < q and thus,
+	 * M = min(2^N, q) = 2^N.
+	 *
+	 * For step 6, calculate
+	 * key = (key[] mod (M - 1)) + 1 = (key[] mod (2^N - 1)) + 1.
+	 *
+	 * In order to avoid expensive divisions, note that
+	 * 2^N mod (2^N - 1) = 1 and thus, for any integer h,
+	 * 2^N * h mod (2^N - 1) = h mod (2^N - 1) always holds.
+	 * The big endian integer key[] composed of n + 1 64bit words
+	 * may be written as key[] = h * 2^N + l, with h = key[0]
+	 * representing the 64 most significant bits and l
+	 * corresponding to the remaining 2^N bits. With the remark
+	 * from above,
+	 * h * 2^N + l mod (2^N - 1) = l + h mod (2^N - 1).
+	 * As both, l and h are less than 2^N, their sum after
+	 * this first reduction is guaranteed to be <= 2^(N + 1) - 2.
+	 * Or equivalently, that their sum can again be written as
+	 * h' * 2^N + l' with h' now either zero or one and if one,
+	 * then l' <= 2^N - 2. Thus, all bits at positions >= N will
+	 * be zero after a second reduction:
+	 * h' * 2^N + l' mod (2^N - 1) = l' + h' mod (2^N - 1).
+	 * At this point, it is still possible that
+	 * l' + h' = 2^N - 1, i.e. that l' + h' mod (2^N - 1)
+	 * is zero. This condition will be detected below by means of
+	 * the final increment overflowing in this case.
+	 */
+	h = be64_to_cpu(key[0]);
+	h = __add_u64_to_be(key + 1, n, h);
+	h = __add_u64_to_be(key + 1, n, h);
+	WARN_ON_ONCE(h);
+
+	/* Increment to obtain the final result. */
+	o = __add_u64_to_be(key + 1, n, 1);
+	/*
+	 * The overflow bit o from the increment is either zero or
+	 * one. If zero, key[1:n] holds the final result in big-endian
+	 * order. If one, key[1:n] is zero now, but needs to be set to
+	 * one, c.f. above.
+	 */
+	if (o)
+		key[n] = cpu_to_be64(1);
+
+	/* n is in units of u64, convert to bytes. */
+	*key_size = n << 3;
+	/* Strip the leading extra __be64, which is (virtually) zero by now. */
+	memmove(key, &key[1], *key_size);
+
+	return key;
+
+out_err:
+	kfree_sensitive(key);
+	return ERR_PTR(err);
+}
+
+static int dh_safe_prime_set_secret(struct crypto_kpp *tfm, const void *buffer,
+				    unsigned int len)
+{
+	struct dh_safe_prime_instance_ctx *inst_ctx =
+		dh_safe_prime_instance_ctx(tfm);
+	struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm);
+	struct dh params = {};
+	void *buf = NULL, *key = NULL;
+	unsigned int buf_size;
+	int err;
+
+	if (buffer) {
+		err = __crypto_dh_decode_key(buffer, len, &params);
+		if (err)
+			return err;
+		if (params.p_size || params.g_size)
+			return -EINVAL;
+	}
+
+	params.p = inst_ctx->safe_prime->p;
+	params.p_size = inst_ctx->safe_prime->p_size;
+	params.g = safe_prime_g;
+	params.g_size = sizeof(safe_prime_g);
+
+	if (!params.key_size) {
+		key = dh_safe_prime_gen_privkey(inst_ctx->safe_prime,
+						&params.key_size);
+		if (IS_ERR(key))
+			return PTR_ERR(key);
+		params.key = key;
+	}
+
+	buf_size = crypto_dh_key_len(&params);
+	buf = kmalloc(buf_size, GFP_KERNEL);
+	if (!buf) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = crypto_dh_encode_key(buf, buf_size, &params);
+	if (err)
+		goto out;
+
+	err = crypto_kpp_set_secret(tfm_ctx->dh_tfm, buf, buf_size);
+out:
+	kfree_sensitive(buf);
+	kfree_sensitive(key);
+	return err;
+}
+
+static void dh_safe_prime_complete_req(struct crypto_async_request *dh_req,
+				       int err)
+{
+	struct kpp_request *req = dh_req->data;
+
+	kpp_request_complete(req, err);
+}
+
+static struct kpp_request *dh_safe_prime_prepare_dh_req(struct kpp_request *req)
+{
+	struct dh_safe_prime_tfm_ctx *tfm_ctx =
+		kpp_tfm_ctx(crypto_kpp_reqtfm(req));
+	struct kpp_request *dh_req = kpp_request_ctx(req);
+
+	kpp_request_set_tfm(dh_req, tfm_ctx->dh_tfm);
+	kpp_request_set_callback(dh_req, req->base.flags,
+				 dh_safe_prime_complete_req, req);
+
+	kpp_request_set_input(dh_req, req->src, req->src_len);
+	kpp_request_set_output(dh_req, req->dst, req->dst_len);
+
+	return dh_req;
+}
+
+static int dh_safe_prime_generate_public_key(struct kpp_request *req)
+{
+	struct kpp_request *dh_req = dh_safe_prime_prepare_dh_req(req);
+
+	return crypto_kpp_generate_public_key(dh_req);
+}
+
+static int dh_safe_prime_compute_shared_secret(struct kpp_request *req)
+{
+	struct kpp_request *dh_req = dh_safe_prime_prepare_dh_req(req);
+
+	return crypto_kpp_compute_shared_secret(dh_req);
+}
+
+static unsigned int dh_safe_prime_max_size(struct crypto_kpp *tfm)
+{
+	struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm);
+
+	return crypto_kpp_maxsize(tfm_ctx->dh_tfm);
+}
+
+static int __maybe_unused __dh_safe_prime_create(
+	struct crypto_template *tmpl, struct rtattr **tb,
+	const struct dh_safe_prime *safe_prime)
+{
+	struct kpp_instance *inst;
+	struct dh_safe_prime_instance_ctx *ctx;
+	const char *dh_name;
+	struct kpp_alg *dh_alg;
+	u32 mask;
+	int err;
+
+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_KPP, &mask);
+	if (err)
+		return err;
+
+	dh_name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(dh_name))
+		return PTR_ERR(dh_name);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	ctx = kpp_instance_ctx(inst);
+
+	err = crypto_grab_kpp(&ctx->dh_spawn, kpp_crypto_instance(inst),
+			      dh_name, 0, mask);
+	if (err)
+		goto err_free_inst;
+
+	err = -EINVAL;
+	dh_alg = crypto_spawn_kpp_alg(&ctx->dh_spawn);
+	if (strcmp(dh_alg->base.cra_name, "dh"))
+		goto err_free_inst;
+
+	ctx->safe_prime = safe_prime;
+
+	err = crypto_inst_setname(kpp_crypto_instance(inst),
+				  tmpl->name, &dh_alg->base);
+	if (err)
+		goto err_free_inst;
+
+	inst->alg.set_secret = dh_safe_prime_set_secret;
+	inst->alg.generate_public_key = dh_safe_prime_generate_public_key;
+	inst->alg.compute_shared_secret = dh_safe_prime_compute_shared_secret;
+	inst->alg.max_size = dh_safe_prime_max_size;
+	inst->alg.init = dh_safe_prime_init_tfm;
+	inst->alg.exit = dh_safe_prime_exit_tfm;
+	inst->alg.reqsize = sizeof(struct kpp_request) + dh_alg->reqsize;
+	inst->alg.base.cra_priority = dh_alg->base.cra_priority;
+	inst->alg.base.cra_module = THIS_MODULE;
+	inst->alg.base.cra_ctxsize = sizeof(struct dh_safe_prime_tfm_ctx);
+
+	inst->free = dh_safe_prime_free_instance;
+
+	err = kpp_register_instance(tmpl, inst);
+	if (err)
+		goto err_free_inst;
+
+	return 0;
+
+err_free_inst:
+	dh_safe_prime_free_instance(inst);
+
+	return err;
+}
+
+#ifdef CONFIG_CRYPTO_DH_RFC7919_GROUPS
+
+static const struct dh_safe_prime ffdhe2048_prime = {
+	.max_strength = 112,
+	.p_size = 256,
+	.p =
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+	"\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+	"\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+	"\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+	"\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+	"\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+	"\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+	"\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+	"\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+	"\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+	"\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+	"\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+	"\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+	"\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+	"\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+	"\x88\x6b\x42\x38\x61\x28\x5c\x97\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static const struct dh_safe_prime ffdhe3072_prime = {
+	.max_strength = 128,
+	.p_size = 384,
+	.p =
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+	"\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+	"\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+	"\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+	"\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+	"\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+	"\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+	"\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+	"\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+	"\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+	"\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+	"\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+	"\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+	"\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+	"\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+	"\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b"
+	"\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07"
+	"\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c"
+	"\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44"
+	"\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff"
+	"\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d"
+	"\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e"
+	"\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c"
+	"\x25\xe4\x1d\x2b\x66\xc6\x2e\x37\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static const struct dh_safe_prime ffdhe4096_prime = {
+	.max_strength = 152,
+	.p_size = 512,
+	.p =
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+	"\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+	"\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+	"\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+	"\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+	"\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+	"\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+	"\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+	"\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+	"\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+	"\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+	"\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+	"\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+	"\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+	"\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+	"\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b"
+	"\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07"
+	"\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c"
+	"\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44"
+	"\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff"
+	"\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d"
+	"\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e"
+	"\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c"
+	"\x25\xe4\x1d\x2b\x66\x9e\x1e\xf1\x6e\x6f\x52\xc3\x16\x4d\xf4\xfb"
+	"\x79\x30\xe9\xe4\xe5\x88\x57\xb6\xac\x7d\x5f\x42\xd6\x9f\x6d\x18"
+	"\x77\x63\xcf\x1d\x55\x03\x40\x04\x87\xf5\x5b\xa5\x7e\x31\xcc\x7a"
+	"\x71\x35\xc8\x86\xef\xb4\x31\x8a\xed\x6a\x1e\x01\x2d\x9e\x68\x32"
+	"\xa9\x07\x60\x0a\x91\x81\x30\xc4\x6d\xc7\x78\xf9\x71\xad\x00\x38"
+	"\x09\x29\x99\xa3\x33\xcb\x8b\x7a\x1a\x1d\xb9\x3d\x71\x40\x00\x3c"
+	"\x2a\x4e\xce\xa9\xf9\x8d\x0a\xcc\x0a\x82\x91\xcd\xce\xc9\x7d\xcf"
+	"\x8e\xc9\xb5\x5a\x7f\x88\xa4\x6b\x4d\xb5\xa8\x51\xf4\x41\x82\xe1"
+	"\xc6\x8a\x00\x7e\x5e\x65\x5f\x6a\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static const struct dh_safe_prime ffdhe6144_prime = {
+	.max_strength = 176,
+	.p_size = 768,
+	.p =
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+	"\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+	"\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+	"\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+	"\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+	"\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+	"\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+	"\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+	"\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+	"\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+	"\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+	"\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+	"\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+	"\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+	"\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+	"\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b"
+	"\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07"
+	"\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c"
+	"\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44"
+	"\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff"
+	"\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d"
+	"\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e"
+	"\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c"
+	"\x25\xe4\x1d\x2b\x66\x9e\x1e\xf1\x6e\x6f\x52\xc3\x16\x4d\xf4\xfb"
+	"\x79\x30\xe9\xe4\xe5\x88\x57\xb6\xac\x7d\x5f\x42\xd6\x9f\x6d\x18"
+	"\x77\x63\xcf\x1d\x55\x03\x40\x04\x87\xf5\x5b\xa5\x7e\x31\xcc\x7a"
+	"\x71\x35\xc8\x86\xef\xb4\x31\x8a\xed\x6a\x1e\x01\x2d\x9e\x68\x32"
+	"\xa9\x07\x60\x0a\x91\x81\x30\xc4\x6d\xc7\x78\xf9\x71\xad\x00\x38"
+	"\x09\x29\x99\xa3\x33\xcb\x8b\x7a\x1a\x1d\xb9\x3d\x71\x40\x00\x3c"
+	"\x2a\x4e\xce\xa9\xf9\x8d\x0a\xcc\x0a\x82\x91\xcd\xce\xc9\x7d\xcf"
+	"\x8e\xc9\xb5\x5a\x7f\x88\xa4\x6b\x4d\xb5\xa8\x51\xf4\x41\x82\xe1"
+	"\xc6\x8a\x00\x7e\x5e\x0d\xd9\x02\x0b\xfd\x64\xb6\x45\x03\x6c\x7a"
+	"\x4e\x67\x7d\x2c\x38\x53\x2a\x3a\x23\xba\x44\x42\xca\xf5\x3e\xa6"
+	"\x3b\xb4\x54\x32\x9b\x76\x24\xc8\x91\x7b\xdd\x64\xb1\xc0\xfd\x4c"
+	"\xb3\x8e\x8c\x33\x4c\x70\x1c\x3a\xcd\xad\x06\x57\xfc\xcf\xec\x71"
+	"\x9b\x1f\x5c\x3e\x4e\x46\x04\x1f\x38\x81\x47\xfb\x4c\xfd\xb4\x77"
+	"\xa5\x24\x71\xf7\xa9\xa9\x69\x10\xb8\x55\x32\x2e\xdb\x63\x40\xd8"
+	"\xa0\x0e\xf0\x92\x35\x05\x11\xe3\x0a\xbe\xc1\xff\xf9\xe3\xa2\x6e"
+	"\x7f\xb2\x9f\x8c\x18\x30\x23\xc3\x58\x7e\x38\xda\x00\x77\xd9\xb4"
+	"\x76\x3e\x4e\x4b\x94\xb2\xbb\xc1\x94\xc6\x65\x1e\x77\xca\xf9\x92"
+	"\xee\xaa\xc0\x23\x2a\x28\x1b\xf6\xb3\xa7\x39\xc1\x22\x61\x16\x82"
+	"\x0a\xe8\xdb\x58\x47\xa6\x7c\xbe\xf9\xc9\x09\x1b\x46\x2d\x53\x8c"
+	"\xd7\x2b\x03\x74\x6a\xe7\x7f\x5e\x62\x29\x2c\x31\x15\x62\xa8\x46"
+	"\x50\x5d\xc8\x2d\xb8\x54\x33\x8a\xe4\x9f\x52\x35\xc9\x5b\x91\x17"
+	"\x8c\xcf\x2d\xd5\xca\xce\xf4\x03\xec\x9d\x18\x10\xc6\x27\x2b\x04"
+	"\x5b\x3b\x71\xf9\xdc\x6b\x80\xd6\x3f\xdd\x4a\x8e\x9a\xdb\x1e\x69"
+	"\x62\xa6\x95\x26\xd4\x31\x61\xc1\xa4\x1d\x57\x0d\x79\x38\xda\xd4"
+	"\xa4\x0e\x32\x9c\xd0\xe4\x0e\x65\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static const struct dh_safe_prime ffdhe8192_prime = {
+	.max_strength = 200,
+	.p_size = 1024,
+	.p =
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+	"\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+	"\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+	"\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+	"\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+	"\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+	"\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+	"\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+	"\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+	"\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+	"\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+	"\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+	"\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+	"\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+	"\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+	"\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b"
+	"\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07"
+	"\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c"
+	"\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44"
+	"\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff"
+	"\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d"
+	"\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e"
+	"\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c"
+	"\x25\xe4\x1d\x2b\x66\x9e\x1e\xf1\x6e\x6f\x52\xc3\x16\x4d\xf4\xfb"
+	"\x79\x30\xe9\xe4\xe5\x88\x57\xb6\xac\x7d\x5f\x42\xd6\x9f\x6d\x18"
+	"\x77\x63\xcf\x1d\x55\x03\x40\x04\x87\xf5\x5b\xa5\x7e\x31\xcc\x7a"
+	"\x71\x35\xc8\x86\xef\xb4\x31\x8a\xed\x6a\x1e\x01\x2d\x9e\x68\x32"
+	"\xa9\x07\x60\x0a\x91\x81\x30\xc4\x6d\xc7\x78\xf9\x71\xad\x00\x38"
+	"\x09\x29\x99\xa3\x33\xcb\x8b\x7a\x1a\x1d\xb9\x3d\x71\x40\x00\x3c"
+	"\x2a\x4e\xce\xa9\xf9\x8d\x0a\xcc\x0a\x82\x91\xcd\xce\xc9\x7d\xcf"
+	"\x8e\xc9\xb5\x5a\x7f\x88\xa4\x6b\x4d\xb5\xa8\x51\xf4\x41\x82\xe1"
+	"\xc6\x8a\x00\x7e\x5e\x0d\xd9\x02\x0b\xfd\x64\xb6\x45\x03\x6c\x7a"
+	"\x4e\x67\x7d\x2c\x38\x53\x2a\x3a\x23\xba\x44\x42\xca\xf5\x3e\xa6"
+	"\x3b\xb4\x54\x32\x9b\x76\x24\xc8\x91\x7b\xdd\x64\xb1\xc0\xfd\x4c"
+	"\xb3\x8e\x8c\x33\x4c\x70\x1c\x3a\xcd\xad\x06\x57\xfc\xcf\xec\x71"
+	"\x9b\x1f\x5c\x3e\x4e\x46\x04\x1f\x38\x81\x47\xfb\x4c\xfd\xb4\x77"
+	"\xa5\x24\x71\xf7\xa9\xa9\x69\x10\xb8\x55\x32\x2e\xdb\x63\x40\xd8"
+	"\xa0\x0e\xf0\x92\x35\x05\x11\xe3\x0a\xbe\xc1\xff\xf9\xe3\xa2\x6e"
+	"\x7f\xb2\x9f\x8c\x18\x30\x23\xc3\x58\x7e\x38\xda\x00\x77\xd9\xb4"
+	"\x76\x3e\x4e\x4b\x94\xb2\xbb\xc1\x94\xc6\x65\x1e\x77\xca\xf9\x92"
+	"\xee\xaa\xc0\x23\x2a\x28\x1b\xf6\xb3\xa7\x39\xc1\x22\x61\x16\x82"
+	"\x0a\xe8\xdb\x58\x47\xa6\x7c\xbe\xf9\xc9\x09\x1b\x46\x2d\x53\x8c"
+	"\xd7\x2b\x03\x74\x6a\xe7\x7f\x5e\x62\x29\x2c\x31\x15\x62\xa8\x46"
+	"\x50\x5d\xc8\x2d\xb8\x54\x33\x8a\xe4\x9f\x52\x35\xc9\x5b\x91\x17"
+	"\x8c\xcf\x2d\xd5\xca\xce\xf4\x03\xec\x9d\x18\x10\xc6\x27\x2b\x04"
+	"\x5b\x3b\x71\xf9\xdc\x6b\x80\xd6\x3f\xdd\x4a\x8e\x9a\xdb\x1e\x69"
+	"\x62\xa6\x95\x26\xd4\x31\x61\xc1\xa4\x1d\x57\x0d\x79\x38\xda\xd4"
+	"\xa4\x0e\x32\x9c\xcf\xf4\x6a\xaa\x36\xad\x00\x4c\xf6\x00\xc8\x38"
+	"\x1e\x42\x5a\x31\xd9\x51\xae\x64\xfd\xb2\x3f\xce\xc9\x50\x9d\x43"
+	"\x68\x7f\xeb\x69\xed\xd1\xcc\x5e\x0b\x8c\xc3\xbd\xf6\x4b\x10\xef"
+	"\x86\xb6\x31\x42\xa3\xab\x88\x29\x55\x5b\x2f\x74\x7c\x93\x26\x65"
+	"\xcb\x2c\x0f\x1c\xc0\x1b\xd7\x02\x29\x38\x88\x39\xd2\xaf\x05\xe4"
+	"\x54\x50\x4a\xc7\x8b\x75\x82\x82\x28\x46\xc0\xba\x35\xc3\x5f\x5c"
+	"\x59\x16\x0c\xc0\x46\xfd\x82\x51\x54\x1f\xc6\x8c\x9c\x86\xb0\x22"
+	"\xbb\x70\x99\x87\x6a\x46\x0e\x74\x51\xa8\xa9\x31\x09\x70\x3f\xee"
+	"\x1c\x21\x7e\x6c\x38\x26\xe5\x2c\x51\xaa\x69\x1e\x0e\x42\x3c\xfc"
+	"\x99\xe9\xe3\x16\x50\xc1\x21\x7b\x62\x48\x16\xcd\xad\x9a\x95\xf9"
+	"\xd5\xb8\x01\x94\x88\xd9\xc0\xa0\xa1\xfe\x30\x75\xa5\x77\xe2\x31"
+	"\x83\xf8\x1d\x4a\x3f\x2f\xa4\x57\x1e\xfc\x8c\xe0\xba\x8a\x4f\xe8"
+	"\xb6\x85\x5d\xfe\x72\xb0\xa6\x6e\xde\xd2\xfb\xab\xfb\xe5\x8a\x30"
+	"\xfa\xfa\xbe\x1c\x5d\x71\xa8\x7e\x2f\x74\x1e\xf8\xc1\xfe\x86\xfe"
+	"\xa6\xbb\xfd\xe5\x30\x67\x7f\x0d\x97\xd1\x1d\x49\xf7\xa8\x44\x3d"
+	"\x08\x22\xe5\x06\xa9\xf4\x61\x4e\x01\x1e\x2a\x94\x83\x8f\xf8\x8c"
+	"\xd6\x8c\x8b\xb7\xc5\xc6\x42\x4c\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static int dh_ffdhe2048_create(struct crypto_template *tmpl,
+			       struct rtattr **tb)
+{
+	return  __dh_safe_prime_create(tmpl, tb, &ffdhe2048_prime);
+}
+
+static int dh_ffdhe3072_create(struct crypto_template *tmpl,
+			       struct rtattr **tb)
+{
+	return  __dh_safe_prime_create(tmpl, tb, &ffdhe3072_prime);
+}
+
+static int dh_ffdhe4096_create(struct crypto_template *tmpl,
+			       struct rtattr **tb)
+{
+	return  __dh_safe_prime_create(tmpl, tb, &ffdhe4096_prime);
+}
+
+static int dh_ffdhe6144_create(struct crypto_template *tmpl,
+			       struct rtattr **tb)
+{
+	return  __dh_safe_prime_create(tmpl, tb, &ffdhe6144_prime);
+}
+
+static int dh_ffdhe8192_create(struct crypto_template *tmpl,
+			       struct rtattr **tb)
+{
+	return  __dh_safe_prime_create(tmpl, tb, &ffdhe8192_prime);
+}
+
+static struct crypto_template crypto_ffdhe_templates[] = {
+	{
+		.name = "ffdhe2048",
+		.create = dh_ffdhe2048_create,
+		.module = THIS_MODULE,
+	},
+	{
+		.name = "ffdhe3072",
+		.create = dh_ffdhe3072_create,
+		.module = THIS_MODULE,
+	},
+	{
+		.name = "ffdhe4096",
+		.create = dh_ffdhe4096_create,
+		.module = THIS_MODULE,
+	},
+	{
+		.name = "ffdhe6144",
+		.create = dh_ffdhe6144_create,
+		.module = THIS_MODULE,
+	},
+	{
+		.name = "ffdhe8192",
+		.create = dh_ffdhe8192_create,
+		.module = THIS_MODULE,
+	},
+};
+
+#else /* ! CONFIG_CRYPTO_DH_RFC7919_GROUPS */
+
+static struct crypto_template crypto_ffdhe_templates[] = {};
+
+#endif /* CONFIG_CRYPTO_DH_RFC7919_GROUPS */
+
+
 static int dh_init(void)
 {
-	return crypto_register_kpp(&dh);
+	int err;
+
+	err = crypto_register_kpp(&dh);
+	if (err)
+		return err;
+
+	err = crypto_register_templates(crypto_ffdhe_templates,
+					ARRAY_SIZE(crypto_ffdhe_templates));
+	if (err) {
+		crypto_unregister_kpp(&dh);
+		return err;
+	}
+
+	return 0;
 }
 
 static void dh_exit(void)
 {
+	crypto_unregister_templates(crypto_ffdhe_templates,
+				    ARRAY_SIZE(crypto_ffdhe_templates));
 	crypto_unregister_kpp(&dh);
 }
 
diff --git a/crypto/dh_helper.c b/crypto/dh_helper.c
index 9fd5a42eea15..2d499879328b 100644
--- a/crypto/dh_helper.c
+++ b/crypto/dh_helper.c
@@ -10,7 +10,7 @@
 #include <crypto/dh.h>
 #include <crypto/kpp.h>
 
-#define DH_KPP_SECRET_MIN_SIZE (sizeof(struct kpp_secret) + 4 * sizeof(int))
+#define DH_KPP_SECRET_MIN_SIZE (sizeof(struct kpp_secret) + 3 * sizeof(int))
 
 static inline u8 *dh_pack_data(u8 *dst, u8 *end, const void *src, size_t size)
 {
@@ -28,7 +28,7 @@ static inline const u8 *dh_unpack_data(void *dst, const void *src, size_t size)
 
 static inline unsigned int dh_data_size(const struct dh *p)
 {
-	return p->key_size + p->p_size + p->q_size + p->g_size;
+	return p->key_size + p->p_size + p->g_size;
 }
 
 unsigned int crypto_dh_key_len(const struct dh *p)
@@ -53,11 +53,9 @@ int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params)
 	ptr = dh_pack_data(ptr, end, &params->key_size,
 			   sizeof(params->key_size));
 	ptr = dh_pack_data(ptr, end, &params->p_size, sizeof(params->p_size));
-	ptr = dh_pack_data(ptr, end, &params->q_size, sizeof(params->q_size));
 	ptr = dh_pack_data(ptr, end, &params->g_size, sizeof(params->g_size));
 	ptr = dh_pack_data(ptr, end, params->key, params->key_size);
 	ptr = dh_pack_data(ptr, end, params->p, params->p_size);
-	ptr = dh_pack_data(ptr, end, params->q, params->q_size);
 	ptr = dh_pack_data(ptr, end, params->g, params->g_size);
 	if (ptr != end)
 		return -EINVAL;
@@ -65,7 +63,7 @@ int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params)
 }
 EXPORT_SYMBOL_GPL(crypto_dh_encode_key);
 
-int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
+int __crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
 {
 	const u8 *ptr = buf;
 	struct kpp_secret secret;
@@ -79,28 +77,36 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
 
 	ptr = dh_unpack_data(&params->key_size, ptr, sizeof(params->key_size));
 	ptr = dh_unpack_data(&params->p_size, ptr, sizeof(params->p_size));
-	ptr = dh_unpack_data(&params->q_size, ptr, sizeof(params->q_size));
 	ptr = dh_unpack_data(&params->g_size, ptr, sizeof(params->g_size));
 	if (secret.len != crypto_dh_key_len(params))
 		return -EINVAL;
 
+	/* Don't allocate memory. Set pointers to data within
+	 * the given buffer
+	 */
+	params->key = (void *)ptr;
+	params->p = (void *)(ptr + params->key_size);
+	params->g = (void *)(ptr + params->key_size + params->p_size);
+
+	return 0;
+}
+
+int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
+{
+	int err;
+
+	err = __crypto_dh_decode_key(buf, len, params);
+	if (err)
+		return err;
+
 	/*
 	 * Don't permit the buffer for 'key' or 'g' to be larger than 'p', since
 	 * some drivers assume otherwise.
 	 */
 	if (params->key_size > params->p_size ||
-	    params->g_size > params->p_size || params->q_size > params->p_size)
+	    params->g_size > params->p_size)
 		return -EINVAL;
 
-	/* Don't allocate memory. Set pointers to data within
-	 * the given buffer
-	 */
-	params->key = (void *)ptr;
-	params->p = (void *)(ptr + params->key_size);
-	params->q = (void *)(ptr + params->key_size + params->p_size);
-	params->g = (void *)(ptr + params->key_size + params->p_size +
-			     params->q_size);
-
 	/*
 	 * Don't permit 'p' to be 0.  It's not a prime number, and it's subject
 	 * to corner cases such as 'mod 0' being undefined or
@@ -109,10 +115,6 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
 	if (memchr_inv(params->p, 0, params->p_size) == NULL)
 		return -EINVAL;
 
-	/* It is permissible to not provide Q. */
-	if (params->q_size == 0)
-		params->q = NULL;
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_dh_decode_key);
diff --git a/crypto/hmac.c b/crypto/hmac.c
index 25856aa7ccbf..3610ff0b6739 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -15,6 +15,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/scatterwalk.h>
 #include <linux/err.h>
+#include <linux/fips.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -51,6 +52,9 @@ static int hmac_setkey(struct crypto_shash *parent,
 	SHASH_DESC_ON_STACK(shash, hash);
 	unsigned int i;
 
+	if (fips_enabled && (keylen < 112 / 8))
+		return -EINVAL;
+
 	shash->tfm = hash;
 
 	if (keylen > bs) {
diff --git a/crypto/kpp.c b/crypto/kpp.c
index 313b2c699963..7aa6ba4b60a4 100644
--- a/crypto/kpp.c
+++ b/crypto/kpp.c
@@ -68,9 +68,17 @@ static int crypto_kpp_init_tfm(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static void crypto_kpp_free_instance(struct crypto_instance *inst)
+{
+	struct kpp_instance *kpp = kpp_instance(inst);
+
+	kpp->free(kpp);
+}
+
 static const struct crypto_type crypto_kpp_type = {
 	.extsize = crypto_alg_extsize,
 	.init_tfm = crypto_kpp_init_tfm,
+	.free = crypto_kpp_free_instance,
 #ifdef CONFIG_PROC_FS
 	.show = crypto_kpp_show,
 #endif
@@ -87,6 +95,15 @@ struct crypto_kpp *crypto_alloc_kpp(const char *alg_name, u32 type, u32 mask)
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_kpp);
 
+int crypto_grab_kpp(struct crypto_kpp_spawn *spawn,
+		    struct crypto_instance *inst,
+		    const char *name, u32 type, u32 mask)
+{
+	spawn->base.frontend = &crypto_kpp_type;
+	return crypto_grab_spawn(&spawn->base, inst, name, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_grab_kpp);
+
 static void kpp_prepare_alg(struct kpp_alg *alg)
 {
 	struct crypto_alg *base = &alg->base;
@@ -111,5 +128,17 @@ void crypto_unregister_kpp(struct kpp_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_kpp);
 
+int kpp_register_instance(struct crypto_template *tmpl,
+			  struct kpp_instance *inst)
+{
+	if (WARN_ON(!inst->free))
+		return -EINVAL;
+
+	kpp_prepare_alg(&inst->alg);
+
+	return crypto_register_instance(tmpl, kpp_crypto_instance(inst));
+}
+EXPORT_SYMBOL_GPL(kpp_register_instance);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Key-agreement Protocol Primitives");
diff --git a/crypto/lrw.c b/crypto/lrw.c
index bcf09fbc750a..8d59a66b6525 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -428,3 +428,4 @@ module_exit(lrw_module_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LRW block cipher mode");
 MODULE_ALIAS_CRYPTO("lrw");
+MODULE_SOFTDEP("pre: ecb");
diff --git a/crypto/memneq.c b/crypto/memneq.c
index afed1bd16aee..fb11608b1ec1 100644
--- a/crypto/memneq.c
+++ b/crypto/memneq.c
@@ -60,6 +60,7 @@
  */
 
 #include <crypto/algapi.h>
+#include <asm/unaligned.h>
 
 #ifndef __HAVE_ARCH_CRYPTO_MEMNEQ
 
@@ -71,7 +72,8 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size)
 
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
 	while (size >= sizeof(unsigned long)) {
-		neq |= *(unsigned long *)a ^ *(unsigned long *)b;
+		neq |= get_unaligned((unsigned long *)a) ^
+		       get_unaligned((unsigned long *)b);
 		OPTIMIZER_HIDE_VAR(neq);
 		a += sizeof(unsigned long);
 		b += sizeof(unsigned long);
@@ -95,18 +97,24 @@ static inline unsigned long __crypto_memneq_16(const void *a, const void *b)
 
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 	if (sizeof(unsigned long) == 8) {
-		neq |= *(unsigned long *)(a)   ^ *(unsigned long *)(b);
+		neq |= get_unaligned((unsigned long *)a) ^
+		       get_unaligned((unsigned long *)b);
 		OPTIMIZER_HIDE_VAR(neq);
-		neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8);
+		neq |= get_unaligned((unsigned long *)(a + 8)) ^
+		       get_unaligned((unsigned long *)(b + 8));
 		OPTIMIZER_HIDE_VAR(neq);
 	} else if (sizeof(unsigned int) == 4) {
-		neq |= *(unsigned int *)(a)    ^ *(unsigned int *)(b);
+		neq |= get_unaligned((unsigned int *)a) ^
+		       get_unaligned((unsigned int *)b);
 		OPTIMIZER_HIDE_VAR(neq);
-		neq |= *(unsigned int *)(a+4)  ^ *(unsigned int *)(b+4);
+		neq |= get_unaligned((unsigned int *)(a + 4)) ^
+		       get_unaligned((unsigned int *)(b + 4));
 		OPTIMIZER_HIDE_VAR(neq);
-		neq |= *(unsigned int *)(a+8)  ^ *(unsigned int *)(b+8);
+		neq |= get_unaligned((unsigned int *)(a + 8)) ^
+		       get_unaligned((unsigned int *)(b + 8));
 		OPTIMIZER_HIDE_VAR(neq);
-		neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12);
+		neq |= get_unaligned((unsigned int *)(a + 12)) ^
+		       get_unaligned((unsigned int *)(b + 12));
 		OPTIMIZER_HIDE_VAR(neq);
 	} else
 #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 8ac3e73e8ea6..3285e3af43e1 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -385,15 +385,15 @@ static int pkcs1pad_sign(struct akcipher_request *req)
 	struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst);
 	const struct rsa_asn1_template *digest_info = ictx->digest_info;
 	int err;
-	unsigned int ps_end, digest_size = 0;
+	unsigned int ps_end, digest_info_size = 0;
 
 	if (!ctx->key_size)
 		return -EINVAL;
 
 	if (digest_info)
-		digest_size = digest_info->size;
+		digest_info_size = digest_info->size;
 
-	if (req->src_len + digest_size > ctx->key_size - 11)
+	if (req->src_len + digest_info_size > ctx->key_size - 11)
 		return -EOVERFLOW;
 
 	if (req->dst_len < ctx->key_size) {
@@ -406,7 +406,7 @@ static int pkcs1pad_sign(struct akcipher_request *req)
 	if (!req_ctx->in_buf)
 		return -ENOMEM;
 
-	ps_end = ctx->key_size - digest_size - req->src_len - 2;
+	ps_end = ctx->key_size - digest_info_size - req->src_len - 2;
 	req_ctx->in_buf[0] = 0x01;
 	memset(req_ctx->in_buf + 1, 0xff, ps_end - 1);
 	req_ctx->in_buf[ps_end] = 0x00;
@@ -441,6 +441,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
 	struct akcipher_instance *inst = akcipher_alg_instance(tfm);
 	struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst);
 	const struct rsa_asn1_template *digest_info = ictx->digest_info;
+	const unsigned int sig_size = req->src_len;
+	const unsigned int digest_size = req->dst_len;
 	unsigned int dst_len;
 	unsigned int pos;
 	u8 *out_buf;
@@ -476,6 +478,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
 	pos++;
 
 	if (digest_info) {
+		if (digest_info->size > dst_len - pos)
+			goto done;
 		if (crypto_memneq(out_buf + pos, digest_info->data,
 				  digest_info->size))
 			goto done;
@@ -485,20 +489,19 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
 
 	err = 0;
 
-	if (req->dst_len != dst_len - pos) {
+	if (digest_size != dst_len - pos) {
 		err = -EKEYREJECTED;
 		req->dst_len = dst_len - pos;
 		goto done;
 	}
 	/* Extract appended digest. */
 	sg_pcopy_to_buffer(req->src,
-			   sg_nents_for_len(req->src,
-					    req->src_len + req->dst_len),
+			   sg_nents_for_len(req->src, sig_size + digest_size),
 			   req_ctx->out_buf + ctx->key_size,
-			   req->dst_len, ctx->key_size);
+			   digest_size, sig_size);
 	/* Do the actual verification step. */
 	if (memcmp(req_ctx->out_buf + ctx->key_size, out_buf + pos,
-		   req->dst_len) != 0)
+		   digest_size) != 0)
 		err = -EKEYREJECTED;
 done:
 	kfree_sensitive(req_ctx->out_buf);
@@ -534,14 +537,15 @@ static int pkcs1pad_verify(struct akcipher_request *req)
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req);
+	const unsigned int sig_size = req->src_len;
+	const unsigned int digest_size = req->dst_len;
 	int err;
 
-	if (WARN_ON(req->dst) ||
-	    WARN_ON(!req->dst_len) ||
-	    !ctx->key_size || req->src_len < ctx->key_size)
+	if (WARN_ON(req->dst) || WARN_ON(!digest_size) ||
+	    !ctx->key_size || sig_size != ctx->key_size)
 		return -EINVAL;
 
-	req_ctx->out_buf = kmalloc(ctx->key_size + req->dst_len, GFP_KERNEL);
+	req_ctx->out_buf = kmalloc(ctx->key_size + digest_size, GFP_KERNEL);
 	if (!req_ctx->out_buf)
 		return -ENOMEM;
 
@@ -554,8 +558,7 @@ static int pkcs1pad_verify(struct akcipher_request *req)
 
 	/* Reuse input buffer, output to a new buffer */
 	akcipher_request_set_crypt(&req_ctx->child_req, req->src,
-				   req_ctx->out_sg, req->src_len,
-				   ctx->key_size);
+				   req_ctx->out_sg, sig_size, ctx->key_size);
 
 	err = crypto_akcipher_encrypt(&req_ctx->child_req);
 	if (err != -EINPROGRESS && err != -EBUSY)
@@ -621,6 +624,11 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 
 	rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn);
 
+	if (strcmp(rsa_alg->base.cra_name, "rsa") != 0) {
+		err = -EINVAL;
+		goto err_free_inst;
+	}
+
 	err = -ENAMETOOLONG;
 	hash_name = crypto_attr_alg_name(tb[2]);
 	if (IS_ERR(hash_name)) {
diff --git a/crypto/sm2.c b/crypto/sm2.c
index db8a4a265669..f3e1592965c0 100644
--- a/crypto/sm2.c
+++ b/crypto/sm2.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * SM2 asymmetric public-key algorithm
  * as specified by OSCCA GM/T 0003.1-2012 -- 0003.5-2012 SM2 and
@@ -13,7 +13,7 @@
 #include <crypto/internal/akcipher.h>
 #include <crypto/akcipher.h>
 #include <crypto/hash.h>
-#include <crypto/sm3_base.h>
+#include <crypto/sm3.h>
 #include <crypto/rng.h>
 #include <crypto/sm2.h>
 #include "sm2signature.asn1.h"
@@ -213,7 +213,7 @@ int sm2_get_signature_s(void *context, size_t hdrlen, unsigned char tag,
 	return 0;
 }
 
-static int sm2_z_digest_update(struct shash_desc *desc,
+static int sm2_z_digest_update(struct sm3_state *sctx,
 			MPI m, unsigned int pbytes)
 {
 	static const unsigned char zero[32];
@@ -226,20 +226,20 @@ static int sm2_z_digest_update(struct shash_desc *desc,
 
 	if (inlen < pbytes) {
 		/* padding with zero */
-		crypto_sm3_update(desc, zero, pbytes - inlen);
-		crypto_sm3_update(desc, in, inlen);
+		sm3_update(sctx, zero, pbytes - inlen);
+		sm3_update(sctx, in, inlen);
 	} else if (inlen > pbytes) {
 		/* skip the starting zero */
-		crypto_sm3_update(desc, in + inlen - pbytes, pbytes);
+		sm3_update(sctx, in + inlen - pbytes, pbytes);
 	} else {
-		crypto_sm3_update(desc, in, inlen);
+		sm3_update(sctx, in, inlen);
 	}
 
 	kfree(in);
 	return 0;
 }
 
-static int sm2_z_digest_update_point(struct shash_desc *desc,
+static int sm2_z_digest_update_point(struct sm3_state *sctx,
 		MPI_POINT point, struct mpi_ec_ctx *ec, unsigned int pbytes)
 {
 	MPI x, y;
@@ -249,8 +249,8 @@ static int sm2_z_digest_update_point(struct shash_desc *desc,
 	y = mpi_new(0);
 
 	if (!mpi_ec_get_affine(x, y, point, ec) &&
-		!sm2_z_digest_update(desc, x, pbytes) &&
-		!sm2_z_digest_update(desc, y, pbytes))
+	    !sm2_z_digest_update(sctx, x, pbytes) &&
+	    !sm2_z_digest_update(sctx, y, pbytes))
 		ret = 0;
 
 	mpi_free(x);
@@ -265,7 +265,7 @@ int sm2_compute_z_digest(struct crypto_akcipher *tfm,
 	struct mpi_ec_ctx *ec = akcipher_tfm_ctx(tfm);
 	uint16_t bits_len;
 	unsigned char entl[2];
-	SHASH_DESC_ON_STACK(desc, NULL);
+	struct sm3_state sctx;
 	unsigned int pbytes;
 
 	if (id_len > (USHRT_MAX / 8) || !ec->Q)
@@ -278,17 +278,17 @@ int sm2_compute_z_digest(struct crypto_akcipher *tfm,
 	pbytes = MPI_NBYTES(ec->p);
 
 	/* ZA = H256(ENTLA | IDA | a | b | xG | yG | xA | yA) */
-	sm3_base_init(desc);
-	crypto_sm3_update(desc, entl, 2);
-	crypto_sm3_update(desc, id, id_len);
-
-	if (sm2_z_digest_update(desc, ec->a, pbytes) ||
-		sm2_z_digest_update(desc, ec->b, pbytes) ||
-		sm2_z_digest_update_point(desc, ec->G, ec, pbytes) ||
-		sm2_z_digest_update_point(desc, ec->Q, ec, pbytes))
+	sm3_init(&sctx);
+	sm3_update(&sctx, entl, 2);
+	sm3_update(&sctx, id, id_len);
+
+	if (sm2_z_digest_update(&sctx, ec->a, pbytes) ||
+	    sm2_z_digest_update(&sctx, ec->b, pbytes) ||
+	    sm2_z_digest_update_point(&sctx, ec->G, ec, pbytes) ||
+	    sm2_z_digest_update_point(&sctx, ec->Q, ec, pbytes))
 		return -EINVAL;
 
-	crypto_sm3_final(desc, dgst);
+	sm3_final(&sctx, dgst);
 	return 0;
 }
 EXPORT_SYMBOL(sm2_compute_z_digest);
diff --git a/crypto/sm3_generic.c b/crypto/sm3_generic.c
index 193c4584bd00..a215c1c37e73 100644
--- a/crypto/sm3_generic.c
+++ b/crypto/sm3_generic.c
@@ -5,6 +5,7 @@
  *
  * Copyright (C) 2017 ARM Limited or its affiliates.
  * Written by Gilad Ben-Yossef <[email protected]>
+ * Copyright (C) 2021 Tianjia Zhang <[email protected]>
  */
 
 #include <crypto/internal/hash.h>
@@ -26,143 +27,29 @@ const u8 sm3_zero_message_hash[SM3_DIGEST_SIZE] = {
 };
 EXPORT_SYMBOL_GPL(sm3_zero_message_hash);
 
-static inline u32 p0(u32 x)
-{
-	return x ^ rol32(x, 9) ^ rol32(x, 17);
-}
-
-static inline u32 p1(u32 x)
-{
-	return x ^ rol32(x, 15) ^ rol32(x, 23);
-}
-
-static inline u32 ff(unsigned int n, u32 a, u32 b, u32 c)
-{
-	return (n < 16) ? (a ^ b ^ c) : ((a & b) | (a & c) | (b & c));
-}
-
-static inline u32 gg(unsigned int n, u32 e, u32 f, u32 g)
-{
-	return (n < 16) ? (e ^ f ^ g) : ((e & f) | ((~e) & g));
-}
-
-static inline u32 t(unsigned int n)
-{
-	return (n < 16) ? SM3_T1 : SM3_T2;
-}
-
-static void sm3_expand(u32 *t, u32 *w, u32 *wt)
-{
-	int i;
-	unsigned int tmp;
-
-	/* load the input */
-	for (i = 0; i <= 15; i++)
-		w[i] = get_unaligned_be32((__u32 *)t + i);
-
-	for (i = 16; i <= 67; i++) {
-		tmp = w[i - 16] ^ w[i - 9] ^ rol32(w[i - 3], 15);
-		w[i] = p1(tmp) ^ (rol32(w[i - 13], 7)) ^ w[i - 6];
-	}
-
-	for (i = 0; i <= 63; i++)
-		wt[i] = w[i] ^ w[i + 4];
-}
-
-static void sm3_compress(u32 *w, u32 *wt, u32 *m)
-{
-	u32 ss1;
-	u32 ss2;
-	u32 tt1;
-	u32 tt2;
-	u32 a, b, c, d, e, f, g, h;
-	int i;
-
-	a = m[0];
-	b = m[1];
-	c = m[2];
-	d = m[3];
-	e = m[4];
-	f = m[5];
-	g = m[6];
-	h = m[7];
-
-	for (i = 0; i <= 63; i++) {
-
-		ss1 = rol32((rol32(a, 12) + e + rol32(t(i), i & 31)), 7);
-
-		ss2 = ss1 ^ rol32(a, 12);
-
-		tt1 = ff(i, a, b, c) + d + ss2 + *wt;
-		wt++;
-
-		tt2 = gg(i, e, f, g) + h + ss1 + *w;
-		w++;
-
-		d = c;
-		c = rol32(b, 9);
-		b = a;
-		a = tt1;
-		h = g;
-		g = rol32(f, 19);
-		f = e;
-		e = p0(tt2);
-	}
-
-	m[0] = a ^ m[0];
-	m[1] = b ^ m[1];
-	m[2] = c ^ m[2];
-	m[3] = d ^ m[3];
-	m[4] = e ^ m[4];
-	m[5] = f ^ m[5];
-	m[6] = g ^ m[6];
-	m[7] = h ^ m[7];
-
-	a = b = c = d = e = f = g = h = ss1 = ss2 = tt1 = tt2 = 0;
-}
-
-static void sm3_transform(struct sm3_state *sst, u8 const *src)
-{
-	unsigned int w[68];
-	unsigned int wt[64];
-
-	sm3_expand((u32 *)src, w, wt);
-	sm3_compress(w, wt, sst->state);
-
-	memzero_explicit(w, sizeof(w));
-	memzero_explicit(wt, sizeof(wt));
-}
-
-static void sm3_generic_block_fn(struct sm3_state *sst, u8 const *src,
-				    int blocks)
-{
-	while (blocks--) {
-		sm3_transform(sst, src);
-		src += SM3_BLOCK_SIZE;
-	}
-}
-
-int crypto_sm3_update(struct shash_desc *desc, const u8 *data,
+static int crypto_sm3_update(struct shash_desc *desc, const u8 *data,
 			  unsigned int len)
 {
-	return sm3_base_do_update(desc, data, len, sm3_generic_block_fn);
+	sm3_update(shash_desc_ctx(desc), data, len);
+	return 0;
 }
-EXPORT_SYMBOL(crypto_sm3_update);
 
-int crypto_sm3_final(struct shash_desc *desc, u8 *out)
+static int crypto_sm3_final(struct shash_desc *desc, u8 *out)
 {
-	sm3_base_do_finalize(desc, sm3_generic_block_fn);
-	return sm3_base_finish(desc, out);
+	sm3_final(shash_desc_ctx(desc), out);
+	return 0;
 }
-EXPORT_SYMBOL(crypto_sm3_final);
 
-int crypto_sm3_finup(struct shash_desc *desc, const u8 *data,
+static int crypto_sm3_finup(struct shash_desc *desc, const u8 *data,
 			unsigned int len, u8 *hash)
 {
-	sm3_base_do_update(desc, data, len, sm3_generic_block_fn);
-	return crypto_sm3_final(desc, hash);
+	struct sm3_state *sctx = shash_desc_ctx(desc);
+
+	if (len)
+		sm3_update(sctx, data, len);
+	sm3_final(sctx, hash);
+	return 0;
 }
-EXPORT_SYMBOL(crypto_sm3_finup);
 
 static struct shash_alg sm3_alg = {
 	.digestsize	=	SM3_DIGEST_SIZE,
@@ -174,6 +61,7 @@ static struct shash_alg sm3_alg = {
 	.base		=	{
 		.cra_name	 =	"sm3",
 		.cra_driver_name =	"sm3-generic",
+		.cra_priority	=	100,
 		.cra_blocksize	 =	SM3_BLOCK_SIZE,
 		.cra_module	 =	THIS_MODULE,
 	}
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 00149657a4bc..2bacf8384f59 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -724,200 +724,6 @@ static inline int do_one_ahash_op(struct ahash_request *req, int ret)
 	return crypto_wait_req(ret, wait);
 }
 
-struct test_mb_ahash_data {
-	struct scatterlist sg[XBUFSIZE];
-	char result[64];
-	struct ahash_request *req;
-	struct crypto_wait wait;
-	char *xbuf[XBUFSIZE];
-};
-
-static inline int do_mult_ahash_op(struct test_mb_ahash_data *data, u32 num_mb,
-				   int *rc)
-{
-	int i, err = 0;
-
-	/* Fire up a bunch of concurrent requests */
-	for (i = 0; i < num_mb; i++)
-		rc[i] = crypto_ahash_digest(data[i].req);
-
-	/* Wait for all requests to finish */
-	for (i = 0; i < num_mb; i++) {
-		rc[i] = crypto_wait_req(rc[i], &data[i].wait);
-
-		if (rc[i]) {
-			pr_info("concurrent request %d error %d\n", i, rc[i]);
-			err = rc[i];
-		}
-	}
-
-	return err;
-}
-
-static int test_mb_ahash_jiffies(struct test_mb_ahash_data *data, int blen,
-				 int secs, u32 num_mb)
-{
-	unsigned long start, end;
-	int bcount;
-	int ret = 0;
-	int *rc;
-
-	rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
-	if (!rc)
-		return -ENOMEM;
-
-	for (start = jiffies, end = start + secs * HZ, bcount = 0;
-	     time_before(jiffies, end); bcount++) {
-		ret = do_mult_ahash_op(data, num_mb, rc);
-		if (ret)
-			goto out;
-	}
-
-	pr_cont("%d operations in %d seconds (%llu bytes)\n",
-		bcount * num_mb, secs, (u64)bcount * blen * num_mb);
-
-out:
-	kfree(rc);
-	return ret;
-}
-
-static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
-				u32 num_mb)
-{
-	unsigned long cycles = 0;
-	int ret = 0;
-	int i;
-	int *rc;
-
-	rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
-	if (!rc)
-		return -ENOMEM;
-
-	/* Warm-up run. */
-	for (i = 0; i < 4; i++) {
-		ret = do_mult_ahash_op(data, num_mb, rc);
-		if (ret)
-			goto out;
-	}
-
-	/* The real thing. */
-	for (i = 0; i < 8; i++) {
-		cycles_t start, end;
-
-		start = get_cycles();
-		ret = do_mult_ahash_op(data, num_mb, rc);
-		end = get_cycles();
-
-		if (ret)
-			goto out;
-
-		cycles += end - start;
-	}
-
-	pr_cont("1 operation in %lu cycles (%d bytes)\n",
-		(cycles + 4) / (8 * num_mb), blen);
-
-out:
-	kfree(rc);
-	return ret;
-}
-
-static void test_mb_ahash_speed(const char *algo, unsigned int secs,
-				struct hash_speed *speed, u32 num_mb)
-{
-	struct test_mb_ahash_data *data;
-	struct crypto_ahash *tfm;
-	unsigned int i, j, k;
-	int ret;
-
-	data = kcalloc(num_mb, sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return;
-
-	tfm = crypto_alloc_ahash(algo, 0, 0);
-	if (IS_ERR(tfm)) {
-		pr_err("failed to load transform for %s: %ld\n",
-			algo, PTR_ERR(tfm));
-		goto free_data;
-	}
-
-	for (i = 0; i < num_mb; ++i) {
-		if (testmgr_alloc_buf(data[i].xbuf))
-			goto out;
-
-		crypto_init_wait(&data[i].wait);
-
-		data[i].req = ahash_request_alloc(tfm, GFP_KERNEL);
-		if (!data[i].req) {
-			pr_err("alg: hash: Failed to allocate request for %s\n",
-			       algo);
-			goto out;
-		}
-
-		ahash_request_set_callback(data[i].req, 0, crypto_req_done,
-					   &data[i].wait);
-
-		sg_init_table(data[i].sg, XBUFSIZE);
-		for (j = 0; j < XBUFSIZE; j++) {
-			sg_set_buf(data[i].sg + j, data[i].xbuf[j], PAGE_SIZE);
-			memset(data[i].xbuf[j], 0xff, PAGE_SIZE);
-		}
-	}
-
-	pr_info("\ntesting speed of multibuffer %s (%s)\n", algo,
-		get_driver_name(crypto_ahash, tfm));
-
-	for (i = 0; speed[i].blen != 0; i++) {
-		/* For some reason this only tests digests. */
-		if (speed[i].blen != speed[i].plen)
-			continue;
-
-		if (speed[i].blen > XBUFSIZE * PAGE_SIZE) {
-			pr_err("template (%u) too big for tvmem (%lu)\n",
-			       speed[i].blen, XBUFSIZE * PAGE_SIZE);
-			goto out;
-		}
-
-		if (klen)
-			crypto_ahash_setkey(tfm, tvmem[0], klen);
-
-		for (k = 0; k < num_mb; k++)
-			ahash_request_set_crypt(data[k].req, data[k].sg,
-						data[k].result, speed[i].blen);
-
-		pr_info("test%3u "
-			"(%5u byte blocks,%5u bytes per update,%4u updates): ",
-			i, speed[i].blen, speed[i].plen,
-			speed[i].blen / speed[i].plen);
-
-		if (secs) {
-			ret = test_mb_ahash_jiffies(data, speed[i].blen, secs,
-						    num_mb);
-			cond_resched();
-		} else {
-			ret = test_mb_ahash_cycles(data, speed[i].blen, num_mb);
-		}
-
-
-		if (ret) {
-			pr_err("At least one hashing failed ret=%d\n", ret);
-			break;
-		}
-	}
-
-out:
-	for (k = 0; k < num_mb; ++k)
-		ahash_request_free(data[k].req);
-
-	for (k = 0; k < num_mb; ++k)
-		testmgr_free_buf(data[k].xbuf);
-
-	crypto_free_ahash(tfm);
-
-free_data:
-	kfree(data);
-}
-
 static int test_ahash_jiffies_digest(struct ahash_request *req, int blen,
 				     char *out, int secs)
 {
@@ -1667,8 +1473,8 @@ static inline int tcrypt_test(const char *alg)
 	pr_debug("testing %s\n", alg);
 
 	ret = alg_test(alg, alg, 0, 0);
-	/* non-fips algs return -EINVAL in fips mode */
-	if (fips_enabled && ret == -EINVAL)
+	/* non-fips algs return -EINVAL or -ECANCELED in fips mode */
+	if (fips_enabled && (ret == -EINVAL || ret == -ECANCELED))
 		ret = 0;
 	return ret;
 }
@@ -2571,33 +2377,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		if (mode > 400 && mode < 500) break;
 		fallthrough;
 	case 422:
-		test_mb_ahash_speed("sha1", sec, generic_hash_speed_template,
-				    num_mb);
-		if (mode > 400 && mode < 500) break;
-		fallthrough;
-	case 423:
-		test_mb_ahash_speed("sha256", sec, generic_hash_speed_template,
-				    num_mb);
-		if (mode > 400 && mode < 500) break;
-		fallthrough;
-	case 424:
-		test_mb_ahash_speed("sha512", sec, generic_hash_speed_template,
-				    num_mb);
-		if (mode > 400 && mode < 500) break;
-		fallthrough;
-	case 425:
-		test_mb_ahash_speed("sm3", sec, generic_hash_speed_template,
-				    num_mb);
-		if (mode > 400 && mode < 500) break;
-		fallthrough;
-	case 426:
-		test_mb_ahash_speed("streebog256", sec,
-				    generic_hash_speed_template, num_mb);
-		if (mode > 400 && mode < 500) break;
-		fallthrough;
-	case 427:
-		test_mb_ahash_speed("streebog512", sec,
-				    generic_hash_speed_template, num_mb);
+		test_ahash_speed("sm3", sec, generic_hash_speed_template);
 		if (mode > 400 && mode < 500) break;
 		fallthrough;
 	case 499:
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 5831d4bbc64f..2d632a285869 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -55,9 +55,6 @@ MODULE_PARM_DESC(noextratests, "disable expensive crypto self-tests");
 static unsigned int fuzz_iterations = 100;
 module_param(fuzz_iterations, uint, 0644);
 MODULE_PARM_DESC(fuzz_iterations, "number of fuzz test iterations");
-
-DEFINE_PER_CPU(bool, crypto_simd_disabled_for_test);
-EXPORT_PER_CPU_SYMBOL_GPL(crypto_simd_disabled_for_test);
 #endif
 
 #ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
@@ -1854,6 +1851,9 @@ static int __alg_test_hash(const struct hash_testvec *vecs,
 	}
 
 	for (i = 0; i < num_vecs; i++) {
+		if (fips_enabled && vecs[i].fips_skip)
+			continue;
+
 		err = test_hash_vec(&vecs[i], i, req, desc, tsgl, hashstate);
 		if (err)
 			goto out;
@@ -4650,7 +4650,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "dh",
 		.test = alg_test_kpp,
-		.fips_allowed = 1,
 		.suite = {
 			.kpp = __VECS(dh_tv_template)
 		}
@@ -4973,6 +4972,43 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.cipher = __VECS(essiv_aes_cbc_tv_template)
 		}
 	}, {
+#if IS_ENABLED(CONFIG_CRYPTO_DH_RFC7919_GROUPS)
+		.alg = "ffdhe2048(dh)",
+		.test = alg_test_kpp,
+		.fips_allowed = 1,
+		.suite = {
+			.kpp = __VECS(ffdhe2048_dh_tv_template)
+		}
+	}, {
+		.alg = "ffdhe3072(dh)",
+		.test = alg_test_kpp,
+		.fips_allowed = 1,
+		.suite = {
+			.kpp = __VECS(ffdhe3072_dh_tv_template)
+		}
+	}, {
+		.alg = "ffdhe4096(dh)",
+		.test = alg_test_kpp,
+		.fips_allowed = 1,
+		.suite = {
+			.kpp = __VECS(ffdhe4096_dh_tv_template)
+		}
+	}, {
+		.alg = "ffdhe6144(dh)",
+		.test = alg_test_kpp,
+		.fips_allowed = 1,
+		.suite = {
+			.kpp = __VECS(ffdhe6144_dh_tv_template)
+		}
+	}, {
+		.alg = "ffdhe8192(dh)",
+		.test = alg_test_kpp,
+		.fips_allowed = 1,
+		.suite = {
+			.kpp = __VECS(ffdhe8192_dh_tv_template)
+		}
+	}, {
+#endif /* CONFIG_CRYPTO_DH_RFC7919_GROUPS */
 		.alg = "gcm(aes)",
 		.generic_driver = "gcm_base(ctr(aes-generic),ghash-generic)",
 		.test = alg_test_aead,
@@ -5613,6 +5649,13 @@ static int alg_find_test(const char *alg)
 	return -1;
 }
 
+static int alg_fips_disabled(const char *driver, const char *alg)
+{
+	pr_info("alg: %s (%s) is disabled due to FIPS\n", alg, driver);
+
+	return -ECANCELED;
+}
+
 int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
 {
 	int i;
@@ -5649,9 +5692,13 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
 	if (i < 0 && j < 0)
 		goto notest;
 
-	if (fips_enabled && ((i >= 0 && !alg_test_descs[i].fips_allowed) ||
-			     (j >= 0 && !alg_test_descs[j].fips_allowed)))
-		goto non_fips_alg;
+	if (fips_enabled) {
+		if (j >= 0 && !alg_test_descs[j].fips_allowed)
+			return -EINVAL;
+
+		if (i >= 0 && !alg_test_descs[i].fips_allowed)
+			goto non_fips_alg;
+	}
 
 	rc = 0;
 	if (i >= 0)
@@ -5681,9 +5728,13 @@ test_done:
 
 notest:
 	printk(KERN_INFO "alg: No test for %s (%s)\n", alg, driver);
+
+	if (type & CRYPTO_ALG_FIPS_INTERNAL)
+		return alg_fips_disabled(driver, alg);
+
 	return 0;
 non_fips_alg:
-	return -EINVAL;
+	return alg_fips_disabled(driver, alg);
 }
 
 #endif /* CONFIG_CRYPTO_MANAGER_DISABLE_TESTS */
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index a253d66ba1c1..d1aa90993bbd 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -33,6 +33,7 @@
  * @ksize:	Length of @key in bytes (0 if no key)
  * @setkey_error: Expected error from setkey()
  * @digest_error: Expected error from digest()
+ * @fips_skip:	Skip the test vector in FIPS mode
  */
 struct hash_testvec {
 	const char *key;
@@ -42,6 +43,7 @@ struct hash_testvec {
 	unsigned short ksize;
 	int setkey_error;
 	int digest_error;
+	bool fips_skip;
 };
 
 /*
@@ -1244,17 +1246,15 @@ static const struct kpp_testvec dh_tv_template[] = {
 	.secret =
 #ifdef __LITTLE_ENDIAN
 	"\x01\x00" /* type */
-	"\x15\x02" /* len */
+	"\x11\x02" /* len */
 	"\x00\x01\x00\x00" /* key_size */
 	"\x00\x01\x00\x00" /* p_size */
-	"\x00\x00\x00\x00" /* q_size */
 	"\x01\x00\x00\x00" /* g_size */
 #else
 	"\x00\x01" /* type */
-	"\x02\x15" /* len */
+	"\x02\x11" /* len */
 	"\x00\x00\x01\x00" /* key_size */
 	"\x00\x00\x01\x00" /* p_size */
-	"\x00\x00\x00\x00" /* q_size */
 	"\x00\x00\x00\x01" /* g_size */
 #endif
 	/* xa */
@@ -1344,7 +1344,7 @@ static const struct kpp_testvec dh_tv_template[] = {
 	"\xd3\x34\x49\xad\x64\xa6\xb1\xc0\x59\x28\x75\x60\xa7\x8a\xb0\x11"
 	"\x56\x89\x42\x74\x11\xf5\xf6\x5e\x6f\x16\x54\x6a\xb1\x76\x4d\x50"
 	"\x8a\x68\xc1\x5b\x82\xb9\x0d\x00\x32\x50\xed\x88\x87\x48\x92\x17",
-	.secret_size = 533,
+	.secret_size = 529,
 	.b_public_size = 256,
 	.expected_a_public_size = 256,
 	.expected_ss_size = 256,
@@ -1353,17 +1353,15 @@ static const struct kpp_testvec dh_tv_template[] = {
 	.secret =
 #ifdef __LITTLE_ENDIAN
 	"\x01\x00" /* type */
-	"\x15\x02" /* len */
+	"\x11\x02" /* len */
 	"\x00\x01\x00\x00" /* key_size */
 	"\x00\x01\x00\x00" /* p_size */
-	"\x00\x00\x00\x00" /* q_size */
 	"\x01\x00\x00\x00" /* g_size */
 #else
 	"\x00\x01" /* type */
-	"\x02\x15" /* len */
+	"\x02\x11" /* len */
 	"\x00\x00\x01\x00" /* key_size */
 	"\x00\x00\x01\x00" /* p_size */
-	"\x00\x00\x00\x00" /* q_size */
 	"\x00\x00\x00\x01" /* g_size */
 #endif
 	/* xa */
@@ -1453,13 +1451,1442 @@ static const struct kpp_testvec dh_tv_template[] = {
 	"\x5e\x5a\x64\xbd\xf6\x85\x04\xe8\x28\x6a\xac\xef\xce\x19\x8e\x9a"
 	"\xfe\x75\xc0\x27\x69\xe3\xb3\x7b\x21\xa7\xb1\x16\xa4\x85\x23\xee"
 	"\xb0\x1b\x04\x6e\xbd\xab\x16\xde\xfd\x86\x6b\xa9\x95\xd7\x0b\xfd",
-	.secret_size = 533,
+	.secret_size = 529,
 	.b_public_size = 256,
 	.expected_a_public_size = 256,
 	.expected_ss_size = 256,
 	}
 };
 
+static const struct kpp_testvec ffdhe2048_dh_tv_template[] __maybe_unused = {
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x01" /* len */
+	"\x00\x01\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x01\x10" /* len */
+	"\x00\x00\x01\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x23\x7d\xd0\x06\xfd\x7a\xe5\x7a\x08\xda\x98\x31\xc0\xb3\xd5\x85"
+	"\xe2\x0d\x2a\x91\x5f\x78\x4b\xa6\x62\xd0\xa6\x35\xd4\xef\x86\x39"
+	"\xf1\xdb\x71\x5e\xb0\x11\x2e\xee\x91\x3a\xaa\xf9\xe3\xdf\x8d\x8b"
+	"\x48\x41\xde\xe8\x78\x53\xc5\x5f\x93\xd2\x79\x0d\xbe\x8d\x83\xe8"
+	"\x8f\x00\xd2\xde\x13\x18\x04\x05\x20\x6d\xda\xfa\x1d\x0b\x24\x52"
+	"\x3a\x18\x2b\xe1\x1e\xae\x15\x3b\x0f\xaa\x09\x09\xf6\x01\x98\xe9"
+	"\x81\x5d\x6b\x83\x6e\x55\xf1\x5d\x6f\x6f\x0d\x9d\xa8\x72\x32\x63"
+	"\x60\xe6\x0b\xc5\x22\xe2\xf9\x46\x58\xa2\x1c\x2a\xb0\xd5\xaf\xe3"
+	"\x5b\x03\xb7\x36\xb7\xba\x55\x20\x08\x7c\x51\xd4\x89\x42\x9c\x14"
+	"\x23\xe2\x71\x3e\x15\x2a\x0d\x34\x8a\xde\xad\x84\x11\x15\x72\x18"
+	"\x42\x43\x0a\xe2\x58\x29\xb3\x90\x0f\x56\xd8\x8a\x0f\x0e\xbc\x0e"
+	"\x9c\xe7\xd5\xe6\x5b\xbf\x06\x64\x38\x12\xa5\x8d\x5b\x68\x34\xdd"
+	"\x75\x48\xc9\xa7\xa3\x58\x5a\x1c\xe1\xb2\xc5\xe3\x39\x03\xcf\xab"
+	"\xc2\x14\x07\xaf\x55\x80\xc7\x63\xe4\x03\xeb\xe9\x0a\x25\x61\x85"
+	"\x1d\x0e\x81\x52\x7b\xbc\x4a\x0c\xc8\x59\x6a\xac\x18\xfb\x8c\x0c"
+	"\xb4\x79\xbd\xa1\x4c\xbb\x02\xc9\xd5\x13\x88\x3d\x25\xaa\x77\x49",
+	.b_public =
+	"\x5c\x00\x6f\xda\xfe\x4c\x0c\xc2\x18\xff\xa9\xec\x7a\xbe\x8a\x51"
+	"\x64\x6b\x57\xf8\xed\xe2\x36\x77\xc1\x23\xbf\x56\xa6\x48\x76\x34"
+	"\x0e\xf3\x68\x05\x45\x6a\x98\x5b\x9e\x8b\xc0\x11\x29\xcb\x5b\x66"
+	"\x2d\xc2\xeb\x4c\xf1\x7d\x85\x30\xaa\xd5\xf5\xb8\xd3\x62\x1e\x97"
+	"\x1e\x34\x18\xf8\x76\x8c\x10\xca\x1f\xe4\x5d\x62\xe1\xbe\x61\xef"
+	"\xaf\x2c\x8d\x97\x15\xa5\x86\xd5\xd3\x12\x6f\xec\xe2\xa4\xb2\x5a"
+	"\x35\x1d\xd4\x91\xa6\xef\x13\x09\x65\x9c\x45\xc0\x12\xad\x7f\xee"
+	"\x93\x5d\xfa\x89\x26\x7d\xae\xee\xea\x8c\xa3\xcf\x04\x2d\xa0\xc7"
+	"\xd9\x14\x62\xaf\xdf\xa0\x33\xd7\x5e\x83\xa2\xe6\x0e\x0e\x5d\x77"
+	"\xce\xe6\x72\xe4\xec\x9d\xff\x72\x9f\x38\x95\x19\x96\xba\x4c\xe3"
+	"\x5f\xb8\x46\x4a\x1d\xe9\x62\x7b\xa8\xdc\xe7\x61\x90\x6b\xb9\xd4"
+	"\xad\x0b\xa3\x06\xb3\x70\xfa\xea\x2b\xc4\x2c\xde\x43\x37\xf6\x8d"
+	"\x72\xf0\x86\x9a\xbb\x3b\x8e\x7a\x71\x03\x30\x30\x2a\x5d\xcd\x1e"
+	"\xe4\xd3\x08\x07\x75\x17\x17\x72\x1e\x77\x6c\x98\x0d\x29\x7f\xac"
+	"\xe7\xb2\xee\xa9\x1c\x33\x9d\x08\x39\xe1\xd8\x5b\xe5\xbc\x48\xb2"
+	"\xb6\xdf\xcd\xa0\x42\x06\xcc\xfb\xed\x60\x6f\xbc\x57\xac\x09\x45",
+	.expected_a_public =
+	"\x8b\xdb\xc1\xf7\xc6\xba\xa1\x38\x95\x6a\xa1\xb6\x04\x5e\xae\x52"
+	"\x72\xfc\xef\x2d\x9d\x71\x05\x9c\xd3\x02\xa9\xfb\x55\x0f\xfa\xc9"
+	"\xb4\x34\x51\xa3\x28\x89\x8d\x93\x92\xcb\xd9\xb5\xb9\x66\xfc\x67"
+	"\x15\x92\x6f\x73\x85\x15\xe2\xfc\x11\x6b\x97\x8c\x4b\x0f\x12\xfa"
+	"\x8d\x72\x76\x9b\x8f\x3b\xfe\x31\xbe\x42\x88\x4c\xd2\xb2\x70\xa6"
+	"\xa5\xe3\x7e\x73\x07\x12\x36\xaa\xc9\x5c\x83\xe1\xf1\x46\x41\x4f"
+	"\x7c\x52\xaf\xdc\xa4\xe6\x82\xa3\x86\x83\x47\x5a\x12\x3a\x0c\xe3"
+	"\xdd\xdb\x94\x03\x2a\x59\x91\xa0\x19\xe5\xf8\x07\xdd\x54\x6a\x22"
+	"\x43\xb7\xf3\x74\xd7\xb9\x30\xfe\x9c\xe8\xd1\xcf\x06\x43\x68\xb9"
+	"\x54\x8f\x54\xa2\xe5\x3c\xf2\xc3\x4c\xee\xd4\x7c\x5d\x0e\xb1\x7b"
+	"\x16\x68\xb5\xb3\x7d\xd4\x11\x83\x5c\x77\x17\xc4\xf0\x59\x76\x7a"
+	"\x83\x40\xe5\xd9\x4c\x76\x23\x5b\x17\x6d\xee\x4a\x92\x68\x4b\x89"
+	"\xa0\x6d\x23\x8c\x80\x31\x33\x3a\x12\xf4\x50\xa6\xcb\x13\x97\x01"
+	"\xb8\x2c\xe6\xd2\x38\xdf\xd0\x7f\xc6\x27\x19\x0e\xb2\x07\xfd\x1f"
+	"\x1b\x9c\x1b\x87\xf9\x73\x6a\x3f\x7f\xb0\xf9\x2f\x3c\x19\x9f\xc9"
+	"\x8f\x97\x21\x0e\x8e\xbb\x1a\x17\x20\x15\xdd\xc6\x42\x60\xae\x4d",
+	.expected_ss =
+	"\xf3\x0e\x64\x7b\x66\xd7\x82\x7e\xab\x7e\x4a\xbe\x13\x6f\x43\x3d"
+	"\xea\x4f\x1f\x8b\x9d\x41\x56\x71\xe1\x06\x96\x02\x68\xfa\x44\x6e"
+	"\xe7\xf2\x26\xd4\x01\x4a\xf0\x28\x25\x76\xad\xd7\xe0\x17\x74\xfe"
+	"\xf9\xe1\x6d\xd3\xf7\xc7\xdf\xc0\x62\xa5\xf3\x4e\x1b\x5c\x77\x2a"
+	"\xfb\x0b\x87\xc3\xde\x1e\xc1\xe0\xd3\x7a\xb8\x02\x02\xec\x9c\x97"
+	"\xfb\x34\xa0\x20\x10\x23\x87\xb2\x9a\x72\xe3\x3d\xb2\x18\x50\xf3"
+	"\x6a\xd3\xd3\x19\xc4\x36\xd5\x59\xd6\xd6\xa7\x5c\xc3\xf9\x09\x33"
+	"\xa1\xf5\xb9\x4b\xf3\x0b\xe1\x4f\x79\x6b\x45\xf2\xec\x8b\xe5\x69"
+	"\x9f\xc6\x05\x01\xfe\x3a\x13\xfd\x6d\xea\x03\x83\x29\x7c\x7f\xf5"
+	"\x41\x55\x95\xde\x7e\x62\xae\xaf\x28\xdb\x7c\xa9\x90\x1e\xb2\xb1"
+	"\x1b\xef\xf1\x2e\xde\x47\xaa\xa8\x92\x9a\x49\x3d\xc0\xe0\x8d\xbb"
+	"\x0c\x42\x86\xaf\x00\xce\xb0\xab\x22\x7c\xe9\xbe\xb9\x72\x2f\xcf"
+	"\x5e\x5d\x62\x52\x2a\xd1\xfe\xcc\xa2\xf3\x40\xfd\x01\xa7\x54\x0a"
+	"\xa1\xfb\x1c\xf2\x44\xa6\x47\x30\x5a\xba\x2a\x05\xff\xd0\x6c\xab"
+	"\xeb\xe6\x8f\xf6\xd7\x73\xa3\x0e\x6c\x0e\xcf\xfd\x8e\x16\x5d\xe0"
+	"\x2c\x11\x05\x82\x3c\x22\x16\x6c\x52\x61\xcf\xbb\xff\xf8\x06\xd0",
+	.secret_size = 272,
+	.b_public_size = 256,
+	.expected_a_public_size = 256,
+	.expected_ss_size = 256,
+	},
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x00" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x00\x10" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#endif
+	.b_secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x01" /* len */
+	"\x00\x01\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x01\x10" /* len */
+	"\x00\x00\x01\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x23\x7d\xd0\x06\xfd\x7a\xe5\x7a\x08\xda\x98\x31\xc0\xb3\xd5\x85"
+	"\xe2\x0d\x2a\x91\x5f\x78\x4b\xa6\x62\xd0\xa6\x35\xd4\xef\x86\x39"
+	"\xf1\xdb\x71\x5e\xb0\x11\x2e\xee\x91\x3a\xaa\xf9\xe3\xdf\x8d\x8b"
+	"\x48\x41\xde\xe8\x78\x53\xc5\x5f\x93\xd2\x79\x0d\xbe\x8d\x83\xe8"
+	"\x8f\x00\xd2\xde\x13\x18\x04\x05\x20\x6d\xda\xfa\x1d\x0b\x24\x52"
+	"\x3a\x18\x2b\xe1\x1e\xae\x15\x3b\x0f\xaa\x09\x09\xf6\x01\x98\xe9"
+	"\x81\x5d\x6b\x83\x6e\x55\xf1\x5d\x6f\x6f\x0d\x9d\xa8\x72\x32\x63"
+	"\x60\xe6\x0b\xc5\x22\xe2\xf9\x46\x58\xa2\x1c\x2a\xb0\xd5\xaf\xe3"
+	"\x5b\x03\xb7\x36\xb7\xba\x55\x20\x08\x7c\x51\xd4\x89\x42\x9c\x14"
+	"\x23\xe2\x71\x3e\x15\x2a\x0d\x34\x8a\xde\xad\x84\x11\x15\x72\x18"
+	"\x42\x43\x0a\xe2\x58\x29\xb3\x90\x0f\x56\xd8\x8a\x0f\x0e\xbc\x0e"
+	"\x9c\xe7\xd5\xe6\x5b\xbf\x06\x64\x38\x12\xa5\x8d\x5b\x68\x34\xdd"
+	"\x75\x48\xc9\xa7\xa3\x58\x5a\x1c\xe1\xb2\xc5\xe3\x39\x03\xcf\xab"
+	"\xc2\x14\x07\xaf\x55\x80\xc7\x63\xe4\x03\xeb\xe9\x0a\x25\x61\x85"
+	"\x1d\x0e\x81\x52\x7b\xbc\x4a\x0c\xc8\x59\x6a\xac\x18\xfb\x8c\x0c"
+	"\xb4\x79\xbd\xa1\x4c\xbb\x02\xc9\xd5\x13\x88\x3d\x25\xaa\x77\x49",
+	.b_public =
+	"\x8b\xdb\xc1\xf7\xc6\xba\xa1\x38\x95\x6a\xa1\xb6\x04\x5e\xae\x52"
+	"\x72\xfc\xef\x2d\x9d\x71\x05\x9c\xd3\x02\xa9\xfb\x55\x0f\xfa\xc9"
+	"\xb4\x34\x51\xa3\x28\x89\x8d\x93\x92\xcb\xd9\xb5\xb9\x66\xfc\x67"
+	"\x15\x92\x6f\x73\x85\x15\xe2\xfc\x11\x6b\x97\x8c\x4b\x0f\x12\xfa"
+	"\x8d\x72\x76\x9b\x8f\x3b\xfe\x31\xbe\x42\x88\x4c\xd2\xb2\x70\xa6"
+	"\xa5\xe3\x7e\x73\x07\x12\x36\xaa\xc9\x5c\x83\xe1\xf1\x46\x41\x4f"
+	"\x7c\x52\xaf\xdc\xa4\xe6\x82\xa3\x86\x83\x47\x5a\x12\x3a\x0c\xe3"
+	"\xdd\xdb\x94\x03\x2a\x59\x91\xa0\x19\xe5\xf8\x07\xdd\x54\x6a\x22"
+	"\x43\xb7\xf3\x74\xd7\xb9\x30\xfe\x9c\xe8\xd1\xcf\x06\x43\x68\xb9"
+	"\x54\x8f\x54\xa2\xe5\x3c\xf2\xc3\x4c\xee\xd4\x7c\x5d\x0e\xb1\x7b"
+	"\x16\x68\xb5\xb3\x7d\xd4\x11\x83\x5c\x77\x17\xc4\xf0\x59\x76\x7a"
+	"\x83\x40\xe5\xd9\x4c\x76\x23\x5b\x17\x6d\xee\x4a\x92\x68\x4b\x89"
+	"\xa0\x6d\x23\x8c\x80\x31\x33\x3a\x12\xf4\x50\xa6\xcb\x13\x97\x01"
+	"\xb8\x2c\xe6\xd2\x38\xdf\xd0\x7f\xc6\x27\x19\x0e\xb2\x07\xfd\x1f"
+	"\x1b\x9c\x1b\x87\xf9\x73\x6a\x3f\x7f\xb0\xf9\x2f\x3c\x19\x9f\xc9"
+	"\x8f\x97\x21\x0e\x8e\xbb\x1a\x17\x20\x15\xdd\xc6\x42\x60\xae\x4d",
+	.secret_size = 16,
+	.b_secret_size = 272,
+	.b_public_size = 256,
+	.expected_a_public_size = 256,
+	.expected_ss_size = 256,
+	.genkey = true,
+	},
+};
+
+static const struct kpp_testvec ffdhe3072_dh_tv_template[] __maybe_unused = {
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x90\x01" /* len */
+	"\x80\x01\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x01\x90" /* len */
+	"\x00\x00\x01\x80" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x6b\xb4\x97\x23\xfa\xc8\x5e\xa9\x7b\x63\xe7\x3e\x0e\x99\xc3\xb9"
+	"\xda\xb7\x48\x0d\xc3\xb1\xbf\x4f\x17\xc7\xa9\x51\xf6\x64\xff\xc4"
+	"\x31\x58\x87\x25\x83\x2c\x00\xf0\x41\x29\xf7\xee\xf9\xe6\x36\x76"
+	"\xd6\x3a\x24\xbe\xa7\x07\x0b\x93\xc7\x9f\x6c\x75\x0a\x26\x75\x76"
+	"\xe3\x0c\x42\xe0\x00\x04\x69\xd9\xec\x0b\x59\x54\x28\x8f\xd7\x9a"
+	"\x63\xf4\x5b\xdf\x85\x65\xc4\xe1\x95\x27\x4a\x42\xad\x36\x47\xa9"
+	"\x0a\xf8\x14\x1c\xf3\x94\x3b\x7e\x47\x99\x35\xa8\x18\xec\x70\x10"
+	"\xdf\xcb\xd2\x78\x88\xc1\x2d\x59\x93\xc1\xa4\x6d\xd7\x1d\xb9\xd5"
+	"\xf8\x30\x06\x7f\x98\x90\x0c\x74\x5e\x89\x2f\x64\x5a\xad\x5f\x53"
+	"\xb2\xa3\xa8\x83\xbf\xfc\x37\xef\xb8\x36\x0a\x5c\x62\x81\x64\x74"
+	"\x16\x2f\x45\x39\x2a\x91\x26\x87\xc0\x12\xcc\x75\x11\xa3\xa1\xc5"
+	"\xae\x20\xcf\xcb\x20\x25\x6b\x7a\x31\x93\x9d\x38\xb9\x57\x72\x46"
+	"\xd4\x84\x65\x87\xf1\xb5\xd3\xab\xfc\xc3\x4d\x40\x92\x94\x1e\xcd"
+	"\x1c\x87\xec\x3f\xcd\xbe\xd0\x95\x6b\x40\x02\xdd\x62\xeb\x0a\xda"
+	"\x4f\xbe\x8e\x32\x48\x8b\x6d\x83\xa0\x96\x62\x23\xec\x83\x91\x44"
+	"\xf9\x72\x01\xac\xa0\xe4\x72\x1d\x5a\x75\x05\x57\x90\xae\x7e\xb4"
+	"\x71\x39\x01\x05\xdc\xe9\xee\xcb\xf0\x61\x28\x91\x69\x8c\x31\x03"
+	"\x7a\x92\x15\xa1\x58\x67\x3d\x70\x82\xa6\x2c\xfe\x10\x56\x58\xd3"
+	"\x94\x67\xe1\xbe\xee\xc1\x64\x5c\x4b\xc8\x28\x3d\xc5\x66\x3a\xab"
+	"\x22\xc1\x7e\xa1\xbb\xf3\x19\x3b\xda\x46\x82\x45\xd4\x3c\x7c\xc6"
+	"\xce\x1f\x7f\x95\xa2\x17\xff\x88\xba\xd6\x4d\xdb\xd2\xea\xde\x39"
+	"\xd6\xa5\x18\x73\xbb\x64\x6e\x79\xe9\xdc\x3f\x92\x7f\xda\x1f\x49"
+	"\x33\x70\x65\x73\xa2\xd9\x06\xb8\x1b\x29\x29\x1a\xe0\xa3\xe6\x05"
+	"\x9a\xa8\xc2\x4e\x7a\x78\x1d\x22\x57\x21\xc8\xa3\x8d\x66\x3e\x23",
+	.b_public =
+	"\x73\x40\x8b\xce\xe8\x6a\x1c\x03\x50\x54\x42\x36\x22\xc6\x1d\xe8"
+	"\xe1\xef\x5c\x89\xa5\x55\xc1\xc4\x1c\xd7\x4f\xee\x5d\xba\x62\x60"
+	"\xfe\x93\x2f\xfd\x93\x2c\x8f\x70\xc6\x47\x17\x25\xb2\x95\xd7\x7d"
+	"\x41\x81\x4d\x52\x1c\xbe\x4d\x57\x3e\x26\x51\x28\x03\x8f\x67\xf5"
+	"\x22\x16\x1c\x67\xf7\x62\xcb\xfd\xa3\xee\x8d\xe0\xfa\x15\x9a\x53"
+	"\xbe\x7b\x9f\xc0\x12\x7a\xfc\x5e\x77\x2d\x60\x06\xba\x71\xc5\xca"
+	"\xd7\x26\xaf\x3b\xba\x6f\xd3\xc4\x82\x57\x19\x26\xb0\x16\x7b\xbd"
+	"\x83\xf2\x21\x03\x79\xff\x0a\x6f\xc5\x7b\x00\x15\xad\x5b\xf4\x42"
+	"\x1f\xcb\x7f\x3d\x34\x77\x3c\xc3\xe0\x38\xa5\x40\x51\xbe\x6f\xd9"
+	"\xc9\x77\x9c\xfc\x0d\xc1\x8e\xef\x0f\xaa\x5e\xa8\xbb\x16\x4a\x3e"
+	"\x26\x55\xae\xc1\xb6\x3e\xfd\x73\xf7\x59\xd2\xe5\x4b\x91\x8e\x28"
+	"\x77\x1e\x5a\xe2\xcd\xce\x92\x35\xbb\x1e\xbb\xcf\x79\x94\xdf\x31"
+	"\xde\x31\xa8\x75\xf6\xe0\xaa\x2e\xe9\x4f\x44\xc8\xba\xb9\xab\x80"
+	"\x29\xa1\xea\x58\x2e\x40\x96\xa0\x1a\xf5\x2c\x38\x47\x43\x5d\x26"
+	"\x2c\xd8\xad\xea\xd3\xad\xe8\x51\x49\xad\x45\x2b\x25\x7c\xde\xe4"
+	"\xaf\x03\x2a\x39\x26\x86\x66\x10\xbc\xa8\x71\xda\xe0\xe8\xf1\xdd"
+	"\x50\xff\x44\xb2\xd3\xc7\xff\x66\x63\xf6\x42\xe3\x97\x9d\x9e\xf4"
+	"\xa6\x89\xb9\xab\x12\x17\xf2\x85\x56\x9c\x6b\x24\x71\x83\x57\x7d"
+	"\x3c\x7b\x2b\x88\x92\x19\xd7\x1a\x00\xd5\x38\x94\x43\x60\x4d\xa7"
+	"\x12\x9e\x0d\xf6\x5c\x9a\xd3\xe2\x9e\xb1\x21\xe8\xe2\x9e\xe9\x1e"
+	"\x9d\xa5\x94\x95\xa6\x3d\x12\x15\xd8\x8b\xac\xe0\x8c\xde\xe6\x40"
+	"\x98\xaa\x5e\x55\x4f\x3d\x86\x87\x0d\xe3\xc6\x68\x15\xe6\xde\x17"
+	"\x78\x21\xc8\x6c\x06\xc7\x94\x56\xb4\xaf\xa2\x35\x0b\x0c\x97\xd7"
+	"\xa4\x12\xee\xf4\xd2\xef\x80\x28\xb3\xee\xe9\x15\x8b\x01\x32\x79",
+	.expected_a_public =
+	"\x1b\x6a\xba\xea\xa3\xcc\x50\x69\xa9\x41\x89\xaf\x04\xe1\x44\x22"
+	"\x97\x20\xd1\xf6\x1e\xcb\x64\x36\x6f\xee\x0b\x16\xc1\xd9\x91\xbe"
+	"\x57\xc8\xd9\xf2\xa1\x96\x91\xec\x41\xc7\x79\x00\x1a\x48\x25\x55"
+	"\xbe\xf3\x20\x8c\x38\xc6\x7b\xf2\x8b\x5a\xc3\xb5\x87\x0a\x86\x3d"
+	"\xb7\xd6\xce\xb0\x96\x2e\x5d\xc4\x00\x5e\x42\xe4\xe5\x50\x4f\xb8"
+	"\x6f\x18\xa4\xe1\xd3\x20\xfc\x3c\xf5\x0a\xff\x23\xa6\x5b\xb4\x17"
+	"\x3e\x7b\xdf\xb9\xb5\x3c\x1b\x76\x29\xcd\xb4\x46\x4f\x27\x8f\xd2"
+	"\xe8\x27\x66\xdb\xe8\xb3\xf5\xe1\xd0\x04\xcd\x89\xff\xba\x76\x67"
+	"\xe8\x4d\xcf\x86\x1c\x8a\xd1\xcf\x99\x27\xfb\xa9\x78\xcc\x94\xaf"
+	"\x3d\x04\xfd\x25\xc0\x47\xfa\x29\x80\x05\xf4\xde\xad\xdb\xab\x12"
+	"\xb0\x2b\x8e\xca\x02\x06\x6d\xad\x3e\x09\xb1\x22\xa3\xf5\x4c\x6d"
+	"\x69\x99\x58\x8b\xd8\x45\x2e\xe0\xc9\x3c\xf7\x92\xce\x21\x90\x6b"
+	"\x3b\x65\x9f\x64\x79\x8d\x67\x22\x1a\x37\xd3\xee\x51\xe2\xe7\x5a"
+	"\x93\x51\xaa\x3c\x4b\x04\x16\x32\xef\xe3\x66\xbe\x18\x94\x88\x64"
+	"\x79\xce\x06\x3f\xb8\xd6\xee\xdc\x13\x79\x6f\x20\x14\xc2\x6b\xce"
+	"\xc8\xda\x42\xa5\x93\x5b\xe4\x7f\x1a\xe6\xda\x0f\xb3\xc1\x5f\x30"
+	"\x50\x76\xe8\x37\x3d\xca\x77\x2c\xa8\xe4\x3b\xf9\x6f\xe0\x17\xed"
+	"\x0e\xef\xb7\x31\x14\xb5\xea\xd9\x39\x22\x89\xb6\x40\x57\xcc\x84"
+	"\xef\x73\xa7\xe9\x27\x21\x85\x89\xfa\xaf\x03\xda\x9c\x8b\xfd\x52"
+	"\x7d\xb0\xa4\xe4\xf9\xd8\x90\x55\xc4\x39\xd6\x9d\xaf\x3b\xce\xac"
+	"\xaa\x36\x14\x7a\x9b\x8b\x12\x43\xe1\xca\x61\xae\x46\x5b\xe7\xe5"
+	"\x88\x32\x80\xa0\x2d\x51\xbb\x2f\xea\xeb\x3c\x71\xb2\xae\xce\xca"
+	"\x61\xd2\x76\xe0\x45\x46\x78\x4e\x09\x2d\xc2\x54\xc2\xa9\xc7\xa8"
+	"\x55\x8e\x72\xa4\x8b\x8a\xc9\x01\xdb\xe9\x58\x11\xa1\xc4\xe7\x12",
+	.expected_ss =
+	"\x47\x8e\xb2\x19\x09\xf0\x46\x99\x6b\x41\x86\xf7\x34\xad\xbf\x2a"
+	"\x18\x1b\x7d\xec\xa9\xb2\x47\x2f\x40\xfb\x9a\x64\x30\x44\xf3\x4c"
+	"\x01\x67\xad\x57\x5a\xbc\xd4\xc8\xef\x7e\x8a\x14\x74\x1d\x6d\x8c"
+	"\x7b\xce\xc5\x57\x5f\x95\xe8\x72\xba\xdf\xa3\xcd\x00\xbe\x09\x4c"
+	"\x06\x72\xe7\x17\xb0\xe5\xe5\xb7\x20\xa5\xcb\xd9\x68\x99\xad\x3f"
+	"\xde\xf3\xde\x1d\x1c\x00\x74\xd2\xd1\x57\x55\x5d\xce\x76\x0c\xc4"
+	"\x7a\xc4\x65\x7c\x19\x17\x0a\x09\x66\x7d\x3a\xab\xf7\x61\x3a\xe3"
+	"\x5b\xac\xcf\x69\xb0\x8b\xee\x5d\x28\x36\xbb\x3f\x74\xce\x6e\x38"
+	"\x1e\x39\xab\x26\xca\x89\xdc\x58\x59\xcb\x95\xe4\xbc\xd6\x19\x48"
+	"\xd0\x55\x68\x7b\xb4\x27\x95\x3c\xd9\x58\x10\x4f\x8f\x55\x1c\x3f"
+	"\x04\xce\x89\x1f\x82\x28\xe9\x48\x17\x47\x8f\xee\xb7\x8f\xeb\xb1"
+	"\x29\xa8\x23\x18\x73\x33\x9f\x83\x08\xca\xcd\x54\x6e\xca\xec\x78"
+	"\x7b\x16\x83\x3f\xdb\x0a\xef\xfd\x87\x94\x19\x08\x6e\x6e\x22\x57"
+	"\xd7\xd2\x79\xf9\xf6\xeb\xe0\x6c\x93\x9d\x95\xfa\x41\x7a\xa9\xd6"
+	"\x2a\xa3\x26\x9b\x24\x1b\x8b\xa0\xed\x04\xb2\xe4\x6c\x4e\xc4\x3f"
+	"\x61\xe5\xe0\x4d\x09\x28\xaf\x58\x35\x25\x0b\xd5\x38\x18\x69\x51"
+	"\x18\x51\x73\x7b\x28\x19\x9f\xe4\x69\xfc\x2c\x25\x08\x99\x8f\x62"
+	"\x65\x62\xa5\x28\xf1\xf4\xfb\x02\x29\x27\xb0\x5e\xbb\x4f\xf9\x1a"
+	"\xa7\xc4\x38\x63\x5b\x01\xfe\x00\x66\xe3\x47\x77\x21\x85\x17\xd5"
+	"\x34\x19\xd3\x87\xab\x44\x62\x08\x59\xb2\x6b\x1f\x21\x0c\x23\x84"
+	"\xf7\xba\x92\x67\xf9\x16\x85\x6a\xe0\xeb\xe7\x4f\x06\x80\x81\x81"
+	"\x28\x9c\xe8\x2e\x71\x97\x48\xe0\xd1\xbc\xce\xe9\x42\x2c\x89\xdf"
+	"\x0b\xa9\xa1\x07\x84\x33\x78\x7f\x49\x2f\x1c\x55\xc3\x7f\xc3\x37"
+	"\x40\xdf\x13\xf4\xa0\x21\x79\x6e\x3a\xe3\xb8\x23\x9e\x8a\x6e\x9c",
+	.secret_size = 400,
+	.b_public_size = 384,
+	.expected_a_public_size = 384,
+	.expected_ss_size = 384,
+	},
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x00" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x00\x10" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#endif
+	.b_secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x90\x01" /* len */
+	"\x80\x01\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x01\x90" /* len */
+	"\x00\x00\x01\x80" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x6b\xb4\x97\x23\xfa\xc8\x5e\xa9\x7b\x63\xe7\x3e\x0e\x99\xc3\xb9"
+	"\xda\xb7\x48\x0d\xc3\xb1\xbf\x4f\x17\xc7\xa9\x51\xf6\x64\xff\xc4"
+	"\x31\x58\x87\x25\x83\x2c\x00\xf0\x41\x29\xf7\xee\xf9\xe6\x36\x76"
+	"\xd6\x3a\x24\xbe\xa7\x07\x0b\x93\xc7\x9f\x6c\x75\x0a\x26\x75\x76"
+	"\xe3\x0c\x42\xe0\x00\x04\x69\xd9\xec\x0b\x59\x54\x28\x8f\xd7\x9a"
+	"\x63\xf4\x5b\xdf\x85\x65\xc4\xe1\x95\x27\x4a\x42\xad\x36\x47\xa9"
+	"\x0a\xf8\x14\x1c\xf3\x94\x3b\x7e\x47\x99\x35\xa8\x18\xec\x70\x10"
+	"\xdf\xcb\xd2\x78\x88\xc1\x2d\x59\x93\xc1\xa4\x6d\xd7\x1d\xb9\xd5"
+	"\xf8\x30\x06\x7f\x98\x90\x0c\x74\x5e\x89\x2f\x64\x5a\xad\x5f\x53"
+	"\xb2\xa3\xa8\x83\xbf\xfc\x37\xef\xb8\x36\x0a\x5c\x62\x81\x64\x74"
+	"\x16\x2f\x45\x39\x2a\x91\x26\x87\xc0\x12\xcc\x75\x11\xa3\xa1\xc5"
+	"\xae\x20\xcf\xcb\x20\x25\x6b\x7a\x31\x93\x9d\x38\xb9\x57\x72\x46"
+	"\xd4\x84\x65\x87\xf1\xb5\xd3\xab\xfc\xc3\x4d\x40\x92\x94\x1e\xcd"
+	"\x1c\x87\xec\x3f\xcd\xbe\xd0\x95\x6b\x40\x02\xdd\x62\xeb\x0a\xda"
+	"\x4f\xbe\x8e\x32\x48\x8b\x6d\x83\xa0\x96\x62\x23\xec\x83\x91\x44"
+	"\xf9\x72\x01\xac\xa0\xe4\x72\x1d\x5a\x75\x05\x57\x90\xae\x7e\xb4"
+	"\x71\x39\x01\x05\xdc\xe9\xee\xcb\xf0\x61\x28\x91\x69\x8c\x31\x03"
+	"\x7a\x92\x15\xa1\x58\x67\x3d\x70\x82\xa6\x2c\xfe\x10\x56\x58\xd3"
+	"\x94\x67\xe1\xbe\xee\xc1\x64\x5c\x4b\xc8\x28\x3d\xc5\x66\x3a\xab"
+	"\x22\xc1\x7e\xa1\xbb\xf3\x19\x3b\xda\x46\x82\x45\xd4\x3c\x7c\xc6"
+	"\xce\x1f\x7f\x95\xa2\x17\xff\x88\xba\xd6\x4d\xdb\xd2\xea\xde\x39"
+	"\xd6\xa5\x18\x73\xbb\x64\x6e\x79\xe9\xdc\x3f\x92\x7f\xda\x1f\x49"
+	"\x33\x70\x65\x73\xa2\xd9\x06\xb8\x1b\x29\x29\x1a\xe0\xa3\xe6\x05"
+	"\x9a\xa8\xc2\x4e\x7a\x78\x1d\x22\x57\x21\xc8\xa3\x8d\x66\x3e\x23",
+	.b_public =
+	"\x1b\x6a\xba\xea\xa3\xcc\x50\x69\xa9\x41\x89\xaf\x04\xe1\x44\x22"
+	"\x97\x20\xd1\xf6\x1e\xcb\x64\x36\x6f\xee\x0b\x16\xc1\xd9\x91\xbe"
+	"\x57\xc8\xd9\xf2\xa1\x96\x91\xec\x41\xc7\x79\x00\x1a\x48\x25\x55"
+	"\xbe\xf3\x20\x8c\x38\xc6\x7b\xf2\x8b\x5a\xc3\xb5\x87\x0a\x86\x3d"
+	"\xb7\xd6\xce\xb0\x96\x2e\x5d\xc4\x00\x5e\x42\xe4\xe5\x50\x4f\xb8"
+	"\x6f\x18\xa4\xe1\xd3\x20\xfc\x3c\xf5\x0a\xff\x23\xa6\x5b\xb4\x17"
+	"\x3e\x7b\xdf\xb9\xb5\x3c\x1b\x76\x29\xcd\xb4\x46\x4f\x27\x8f\xd2"
+	"\xe8\x27\x66\xdb\xe8\xb3\xf5\xe1\xd0\x04\xcd\x89\xff\xba\x76\x67"
+	"\xe8\x4d\xcf\x86\x1c\x8a\xd1\xcf\x99\x27\xfb\xa9\x78\xcc\x94\xaf"
+	"\x3d\x04\xfd\x25\xc0\x47\xfa\x29\x80\x05\xf4\xde\xad\xdb\xab\x12"
+	"\xb0\x2b\x8e\xca\x02\x06\x6d\xad\x3e\x09\xb1\x22\xa3\xf5\x4c\x6d"
+	"\x69\x99\x58\x8b\xd8\x45\x2e\xe0\xc9\x3c\xf7\x92\xce\x21\x90\x6b"
+	"\x3b\x65\x9f\x64\x79\x8d\x67\x22\x1a\x37\xd3\xee\x51\xe2\xe7\x5a"
+	"\x93\x51\xaa\x3c\x4b\x04\x16\x32\xef\xe3\x66\xbe\x18\x94\x88\x64"
+	"\x79\xce\x06\x3f\xb8\xd6\xee\xdc\x13\x79\x6f\x20\x14\xc2\x6b\xce"
+	"\xc8\xda\x42\xa5\x93\x5b\xe4\x7f\x1a\xe6\xda\x0f\xb3\xc1\x5f\x30"
+	"\x50\x76\xe8\x37\x3d\xca\x77\x2c\xa8\xe4\x3b\xf9\x6f\xe0\x17\xed"
+	"\x0e\xef\xb7\x31\x14\xb5\xea\xd9\x39\x22\x89\xb6\x40\x57\xcc\x84"
+	"\xef\x73\xa7\xe9\x27\x21\x85\x89\xfa\xaf\x03\xda\x9c\x8b\xfd\x52"
+	"\x7d\xb0\xa4\xe4\xf9\xd8\x90\x55\xc4\x39\xd6\x9d\xaf\x3b\xce\xac"
+	"\xaa\x36\x14\x7a\x9b\x8b\x12\x43\xe1\xca\x61\xae\x46\x5b\xe7\xe5"
+	"\x88\x32\x80\xa0\x2d\x51\xbb\x2f\xea\xeb\x3c\x71\xb2\xae\xce\xca"
+	"\x61\xd2\x76\xe0\x45\x46\x78\x4e\x09\x2d\xc2\x54\xc2\xa9\xc7\xa8"
+	"\x55\x8e\x72\xa4\x8b\x8a\xc9\x01\xdb\xe9\x58\x11\xa1\xc4\xe7\x12",
+	.secret_size = 16,
+	.b_secret_size = 400,
+	.b_public_size = 384,
+	.expected_a_public_size = 384,
+	.expected_ss_size = 384,
+	.genkey = true,
+	},
+};
+
+static const struct kpp_testvec ffdhe4096_dh_tv_template[] __maybe_unused = {
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x02" /* len */
+	"\x00\x02\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x02\x10" /* len */
+	"\x00\x00\x02\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x1a\x48\xf3\x6c\x61\x03\x42\x43\xd7\x42\x3b\xfa\xdb\x55\x6f\xa2"
+	"\xe1\x79\x52\x0b\x47\xc5\x03\x60\x2f\x26\xb9\x1a\x14\x15\x1a\xd9"
+	"\xe0\xbb\xa7\x82\x63\x41\xec\x26\x55\x00\xab\xe5\x21\x9d\x31\x14"
+	"\x0e\xe2\xc2\xb2\xb8\x37\xe6\xc3\x5a\xab\xae\x25\xdb\x71\x1e\xed"
+	"\xe8\x75\x9a\x04\xa7\x92\x2a\x99\x7e\xc0\x5b\x64\x75\x7f\xe5\xb5"
+	"\xdb\x6c\x95\x4f\xe9\xdc\x39\x76\x79\xb0\xf7\x00\x30\x8e\x86\xe7"
+	"\x36\xd1\xd2\x0c\x68\x7b\x94\xe9\x91\x85\x08\x86\xbc\x64\x87\xd2"
+	"\xf5\x5b\xaf\x03\xf6\x5f\x28\x25\xf1\xa3\x20\x5c\x1b\xb5\x26\x45"
+	"\x9a\x47\xab\xd6\xad\x49\xab\x92\x8e\x62\x6f\x48\x31\xea\xf6\x76"
+	"\xff\xa2\xb6\x28\x78\xef\x59\xc3\x71\x5d\xa8\xd9\x70\x89\xcc\xe2"
+	"\x63\x58\x5e\x3a\xa2\xa2\x88\xbf\x77\x20\x84\x33\x65\x64\x4e\x73"
+	"\xe5\x08\xd5\x89\x23\xd6\x07\xac\x29\x65\x2e\x02\xa8\x35\x96\x48"
+	"\xe7\x5d\x43\x6a\x42\xcc\xda\x98\xc4\x75\x90\x2e\xf6\xc4\xbf\xd4"
+	"\xbc\x31\x14\x0d\x54\x30\x11\xb2\xc9\xcf\xbb\xba\xbc\xc6\xf2\xcf"
+	"\xfe\x4a\x9d\xf3\xec\x78\x5d\x5d\xb4\x99\xd0\x67\x0f\x5a\x21\x1c"
+	"\x7b\x95\x2b\xcf\x49\x44\x94\x05\x1a\x21\x81\x25\x7f\xe3\x8a\x2a"
+	"\xdd\x88\xac\x44\x94\x23\x20\x3b\x75\xf6\x2a\x8a\x45\xf8\xb5\x1f"
+	"\xb9\x8b\xeb\xab\x9b\x38\x23\x26\xf1\x0f\x34\x47\x4f\x7f\xe1\x9e"
+	"\x84\x84\x78\xe5\xe3\x49\xeb\xcc\x2f\x02\x85\xa4\x18\x91\xde\x1a"
+	"\x60\x54\x33\x81\xd5\xae\xdb\x23\x9c\x4d\xa4\xdb\x22\x5b\xdf\xf4"
+	"\x8e\x05\x2b\x60\xba\xe8\x75\xfc\x34\x99\xcf\x35\xe1\x06\xba\xdc"
+	"\x79\x2a\x5e\xec\x1c\xbe\x79\x33\x63\x1c\xe7\x5f\x1e\x30\xd6\x1b"
+	"\xdb\x11\xb8\xea\x63\xff\xfe\x1a\x3c\x24\xf4\x78\x9c\xcc\x5d\x9a"
+	"\xc9\x2d\xc4\x9a\xd4\xa7\x65\x84\x98\xdb\x66\x76\xf0\x34\x31\x9f"
+	"\xce\xb5\xfb\x28\x07\xde\x1e\x0d\x9b\x01\x64\xeb\x2a\x37\x2f\x20"
+	"\xa5\x95\x72\x2b\x54\x51\x59\x91\xea\x50\x54\x0f\x2e\xb0\x1d\xf6"
+	"\xb9\x46\x43\xf9\xd0\x13\x21\x20\x47\x61\x1a\x1c\x30\xc6\x9e\x75"
+	"\x22\xe4\xf2\xb1\xab\x01\xdc\x5b\x3c\x1e\xa2\x6d\xc0\xb9\x9a\x2a"
+	"\x84\x61\xea\x85\x63\xa0\x77\xd0\xeb\x20\x68\xd5\x95\x6a\x1b\x8f"
+	"\x1f\x9a\xba\x44\x49\x8c\x77\xa6\xd9\xa0\x14\xf8\x7d\x9b\x4e\xfa"
+	"\xdc\x4f\x1c\x4d\x60\x50\x26\x7f\xd6\xc1\x91\x2b\xa6\x37\x5d\x94"
+	"\x69\xb2\x47\x59\xd6\xc3\x59\xbb\xd6\x9b\x71\x52\x85\x7a\xcb\x2d",
+	.b_public =
+	"\x24\x38\x02\x02\x2f\xeb\x54\xdd\x73\x21\x91\x4a\xd8\xa4\x0a\xbf"
+	"\xf4\xf5\x9a\x45\xb5\xcd\x42\xa3\x57\xcc\x65\x4a\x23\x2e\xee\x59"
+	"\xba\x6f\x14\x89\xae\x2e\x14\x0a\x72\x77\x23\x7f\x6c\x2e\xba\x52"
+	"\x3f\x71\xbf\xe4\x60\x03\x16\xaa\x61\xf5\x80\x1d\x8a\x45\x9e\x53"
+	"\x7b\x07\xd9\x7e\xfe\xaf\xcb\xda\xff\x20\x71\xba\x89\x39\x75\xc3"
+	"\xb3\x65\x0c\xb1\xa7\xfa\x4a\xe7\xe0\x85\xc5\x4e\x91\x47\x41\xf4"
+	"\xdd\xcd\xc5\x3d\x17\x12\xed\xee\xc0\x31\xb1\xaf\xc1\xd5\x3c\x07"
+	"\xa1\x5a\xc4\x05\x45\xe3\x10\x0c\xc3\x14\xae\x65\xca\x40\xae\x31"
+	"\x5c\x13\x0d\x32\x85\xa7\x6e\xf4\x5e\x29\x3d\x4e\xd3\xd7\x49\x58"
+	"\xe1\x73\xbb\x0a\x7b\xd6\x13\xea\x49\xd7\x20\x3d\x31\xaa\x77\xab"
+	"\x21\x74\xe9\x2f\xe9\x5e\xbe\x2f\xb4\xa2\x79\xf2\xbc\xcc\x51\x94"
+	"\xd2\x1d\xb2\xe6\xc5\x39\x66\xd7\xe5\x46\x75\x53\x76\xed\x49\xea"
+	"\x3b\xdd\x01\x27\xdb\x83\xa5\x9f\xd2\xee\xc8\xde\x9e\xde\xd2\xe7"
+	"\x99\xad\x9c\xe0\x71\x66\x29\xd8\x0d\xfe\xdc\xd1\xbc\xc7\x9a\xbe"
+	"\x8b\x26\x46\x57\xb6\x79\xfa\xad\x8b\x45\x2e\xb5\xe5\x89\x34\x01"
+	"\x93\x00\x9d\xe9\x58\x74\x8b\xda\x07\x92\xb5\x01\x4a\xe1\x44\x36"
+	"\xc7\x6c\xde\xc8\x7a\x17\xd0\xde\xee\x68\x92\xb5\xde\x21\x2b\x1c"
+	"\xbc\x65\x30\x1e\xae\x15\x3d\x9a\xaf\x20\xa3\xc4\x21\x70\xfb\x2f"
+	"\x36\x72\x31\xc0\xe8\x85\xdf\xc5\x50\x4c\x90\x10\x32\xa4\xc7\xee"
+	"\x59\x5a\x21\xf4\xf1\x33\xcf\xbe\xac\x67\xb1\x40\x7c\x0b\x3f\x64"
+	"\xe5\xd2\x2d\xb7\x7d\x0f\xce\xf7\x9b\x05\xee\x37\x61\xd2\x61\x9e"
+	"\x1a\x80\x2e\x79\xe6\x1b\x25\xb3\x61\x3d\x53\xe7\xe5\x97\x9a\xc2"
+	"\x39\xb1\xe3\x91\xc6\xee\x96\x2e\xa9\xb4\xb8\xad\xd8\x04\x3e\x11"
+	"\x31\x67\xb8\x6a\xcb\x6e\x1a\x4c\x7f\x74\xc7\x1f\x09\xd1\xd0\x6b"
+	"\x17\xde\xea\xe8\x0b\xe6\x6a\xee\x2f\xe3\x5b\x9c\x59\x5d\x00\x57"
+	"\xbf\x24\x25\xba\x22\x34\xb9\xc5\x3c\xc4\x57\x26\xd0\x6d\x89\xee"
+	"\x67\x79\x3c\x70\xf9\xc3\xb4\x30\xf0\x2e\xca\xfa\x74\x00\xd1\x00"
+	"\x6d\x03\x97\xd5\x08\x3f\x0b\x8e\xb8\x1d\xa3\x91\x7f\xa9\x3a\xf0"
+	"\x37\x57\x46\x87\x82\xa3\xb5\x8f\x51\xaa\xc7\x7b\xfe\x86\x26\xb9"
+	"\xfa\xe6\x1e\xee\x92\x9d\x3a\xed\x5b\x5e\x3f\xe5\xca\x5e\x13\x01"
+	"\xdd\x4c\x8d\x85\xf0\x60\x61\xb7\x60\x24\x83\x9f\xbe\x72\x21\x81"
+	"\x55\x7e\x7e\x6d\xf3\x28\xc8\x77\x5a\xae\x5a\x32\x86\xd5\x61\xad",
+	.expected_a_public =
+	"\x1f\xff\xd6\xc4\x59\xf3\x4a\x9e\x81\x74\x4d\x27\xa7\xc6\x6b\x35"
+	"\xd8\xf5\xb3\x24\x97\x82\xe7\x2e\xf3\x21\x91\x23\x2f\x3d\x57\x7f"
+	"\x15\x8c\x84\x71\xe7\x25\x35\xe8\x07\x14\x06\x4c\x83\xdc\x55\x4a"
+	"\xf8\x45\xc5\xe9\xfa\x6e\xae\x6e\xcf\x4d\x11\x91\x26\x16\x6f\x86"
+	"\x89\x78\xaa\xb4\x25\x54\xb2\x74\x07\xe5\x26\x26\x0c\xad\xa4\x57"
+	"\x59\x61\x66\x71\x43\x22\xff\x49\x51\xa4\x76\x0e\x55\x7b\x60\x45"
+	"\x4f\xaf\xbd\x9c\xec\x64\x3f\x80\x0b\x0c\x31\x41\xf0\xfe\x2c\xb7"
+	"\x0a\xbe\xa5\x71\x08\x0d\x8d\x1e\x8a\x77\x9a\xd2\x90\x31\x96\xd0"
+	"\x3b\x31\xdc\xc6\x18\x59\x43\xa1\x19\x5a\x84\x68\x29\xad\x5e\x58"
+	"\xa2\x50\x3e\x83\xf5\x7a\xbd\x88\x17\x60\x89\x98\x9c\x19\x89\x27"
+	"\x89\xfc\x33\x87\x42\xd5\xde\x19\x14\xf2\x95\x82\x10\x87\xad\x82"
+	"\xdd\x6b\x51\x2d\x8d\x0e\x81\x4b\xde\xb3\x35\x6c\x0f\x4b\x56\x45"
+	"\x48\x87\xe9\x5a\xf9\x70\x10\x30\x8e\xa1\xbb\xa4\x70\xbf\xa0\xab"
+	"\x10\x31\x3c\x2c\xdc\xc4\xed\xe3\x51\xdc\xee\xd2\xa5\x5c\x4e\x6e"
+	"\xf6\xed\x60\x5a\xeb\xf3\x02\x19\x2a\x95\xe9\x46\xff\x37\x1b\xf0"
+	"\x1d\x10\x4a\x8f\x4f\x3a\x6e\xf5\xfc\x02\x6d\x09\x7d\xea\x69\x7b"
+	"\x13\xb0\xb6\x80\x5c\x15\x20\xa8\x4d\x15\x56\x11\x72\x49\xdb\x48"
+	"\x54\x40\x66\xd5\xcd\x17\x3a\x26\x95\xf6\xd7\xf2\x59\xa3\xda\xbb"
+	"\x26\xd0\xe5\x46\xbf\xee\x0e\x7d\xf1\xe0\x11\x02\x4d\xd3\xdc\xe2"
+	"\x3f\xc2\x51\x7e\xc7\x90\x33\x3c\x1c\xa0\x4c\x69\xcc\x1e\xc7\xac"
+	"\x17\xe0\xe5\xf4\x8c\x05\x64\x34\xfe\x84\x70\xd7\x6b\xed\xab\xf5"
+	"\x88\x9d\x3e\x4c\x5a\x9e\xd4\x74\xfd\xdd\x91\xd5\xd4\xcb\xbf\xf8"
+	"\xb7\x56\xb5\xe9\x22\xa6\x6d\x7a\x44\x05\x41\xbf\xdb\x61\x28\xc6"
+	"\x99\x49\x87\x3d\x28\x77\xf8\x83\x23\x7e\xa9\xa7\xee\x20\xdb\x6d"
+	"\x21\x50\xb7\xc9\x52\x57\x53\xa3\xcf\xdf\xd0\xf9\xb9\x62\x96\x89"
+	"\xf5\x5c\xa9\x8a\x11\x95\x01\x25\xc9\x81\x15\x76\xae\xf0\xc7\xc5"
+	"\x50\xae\x6f\xb5\xd2\x8a\x8e\x9a\xd4\x30\x55\xc6\xe9\x2c\x81\x6e"
+	"\x95\xf6\x45\x89\x55\x28\x34\x7b\xe5\x72\x9a\x2a\xe2\x98\x09\x35"
+	"\xe0\xe9\x75\x94\xe9\x34\x95\xb9\x13\x6e\xd5\xa1\x62\x5a\x1c\x94"
+	"\x28\xed\x84\x46\x76\x6d\x10\x37\x71\xa3\x31\x46\x64\xe4\x59\x44"
+	"\x17\x70\x1c\x23\xc9\x7e\xf6\xab\x8a\x24\xae\x25\xe2\xb2\x5f\x33"
+	"\xe4\xd7\xd3\x34\x2a\x49\x22\x16\x15\x9b\x90\x40\xda\x99\xd5\xaf",
+	.expected_ss =
+	"\xe2\xce\x0e\x4b\x64\xf3\x84\x62\x38\xfd\xe3\x6f\x69\x40\x22\xb0"
+	"\x73\x27\x03\x12\x82\xa4\x6e\x03\x57\xec\x3d\xa0\xc1\x4f\x4b\x09"
+	"\xa1\xd4\xe0\x1a\x5d\x91\x2e\x08\xad\x57\xfa\xcc\x55\x90\x5f\xa0"
+	"\x52\x27\x62\x8d\xe5\x2d\xa1\x5f\xf0\x30\x43\x77\x4e\x3f\x02\x58"
+	"\xcb\xa0\x51\xae\x1d\x24\xf9\x0a\xd1\x36\x0b\x95\x0f\x07\xd9\xf7"
+	"\xe2\x36\x14\x2f\xf0\x11\xc2\xc9\xaf\x66\x4e\x0d\xb4\x60\x01\x4e"
+	"\xa8\x49\xc6\xec\x5f\xb2\xbc\x05\x48\x91\x4e\xe1\xc3\x99\x9f\xeb"
+	"\x4a\xc1\xde\x05\x9a\x65\x39\x7d\x2f\x89\x85\xb2\xcf\xec\x25\x27"
+	"\x5f\x1c\x11\x63\xcf\x7b\x86\x98\x39\xae\xc2\x16\x8f\x79\xd1\x20"
+	"\xd0\xb4\xa0\xba\x44\xd8\xf5\x3a\x0a\x08\x4c\xd1\xb9\xdd\x0a\x5b"
+	"\x9e\x62\xf3\x52\x0c\x84\x12\x43\x9b\xd7\xdf\x86\x71\x03\xdd\x04"
+	"\x98\x55\x0c\x7b\xe2\xe8\x03\x17\x25\x84\xd9\xbd\xe1\xce\x64\xbe"
+	"\xca\x55\xd4\x5b\xef\x61\x5b\x68\x4b\x80\x37\x40\xae\x28\x87\x81"
+	"\x55\x34\x96\x50\x21\x47\x49\xc0\xda\x26\x46\xb8\xe8\xcc\x5a\x27"
+	"\x9c\x9d\x0a\x3d\xcc\x4c\x63\x27\x81\x82\x2e\xf4\xa8\x91\x37\x3e"
+	"\xa7\x34\x6a\x0f\x60\x44\xdd\x2e\xdc\xf9\x19\xf2\x2e\x81\x05\x51"
+	"\x16\xbc\xc0\x85\xa5\xd5\x08\x09\x1f\xcd\xed\xa4\xc5\xdb\x16\x43"
+	"\xb5\x7a\x71\x66\x19\x2e\xef\x13\xbc\x40\x39\x0a\x00\x45\x7e\x61"
+	"\xe9\x68\x60\x83\x00\x70\xd1\x71\xd3\xa2\x61\x3e\x00\x46\x93\x0d"
+	"\xbf\xe6\xa2\x07\xe6\x40\x1a\xf4\x57\xc6\x67\x39\xd8\xd7\x6b\xc5"
+	"\xa5\xd8\x38\x78\x12\xb4\x97\x12\xbe\x97\x13\xef\xe4\x74\x0c\xe0"
+	"\x75\x89\x64\xf4\xe8\x85\xda\x84\x7b\x1d\xfe\xdd\x21\xba\xda\x01"
+	"\x52\xdc\x59\xe5\x47\x50\x7e\x15\x20\xd0\x43\x37\x6e\x48\x39\x00"
+	"\xee\xd9\x54\x6d\x00\x65\xc9\x4b\x85\xa2\x8a\x40\x55\xd0\x63\x0c"
+	"\xb5\x7a\x0d\x37\x67\x27\x73\x18\x7f\x5a\xf5\x0e\x22\xb9\xb0\x3f"
+	"\xda\xf1\xec\x7c\x24\x01\x49\xa9\x09\x0e\x0f\xc4\xa9\xef\xc8\x2b"
+	"\x13\xd1\x0a\x6f\xf8\x92\x4b\x1d\xdd\x6c\x9c\x35\xde\x75\x46\x32"
+	"\xe6\xfb\xda\x58\xba\x81\x08\xca\xa9\xb6\x69\x71\x96\x2a\x1f\x2e"
+	"\x25\xe0\x37\xfe\xee\x4d\x27\xaa\x04\xda\x95\xbb\x93\xcf\x8f\xa2"
+	"\x1d\x67\x35\xe3\x51\x8f\x87\x3b\xa9\x62\x05\xee\x44\xb7\x2e\xd0"
+	"\x07\x63\x32\xf5\xcd\x64\x18\x20\xcf\x22\x42\x28\x22\x1a\xa8\xbb"
+	"\x74\x8a\x6f\x2a\xea\x8a\x48\x0a\xad\xd7\xed\xba\xa3\x89\x37\x01",
+	.secret_size = 528,
+	.b_public_size = 512,
+	.expected_a_public_size = 512,
+	.expected_ss_size = 512,
+	},
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x00" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x00\x10" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#endif
+	.b_secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x02" /* len */
+	"\x00\x02\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x02\x10" /* len */
+	"\x00\x00\x02\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x1a\x48\xf3\x6c\x61\x03\x42\x43\xd7\x42\x3b\xfa\xdb\x55\x6f\xa2"
+	"\xe1\x79\x52\x0b\x47\xc5\x03\x60\x2f\x26\xb9\x1a\x14\x15\x1a\xd9"
+	"\xe0\xbb\xa7\x82\x63\x41\xec\x26\x55\x00\xab\xe5\x21\x9d\x31\x14"
+	"\x0e\xe2\xc2\xb2\xb8\x37\xe6\xc3\x5a\xab\xae\x25\xdb\x71\x1e\xed"
+	"\xe8\x75\x9a\x04\xa7\x92\x2a\x99\x7e\xc0\x5b\x64\x75\x7f\xe5\xb5"
+	"\xdb\x6c\x95\x4f\xe9\xdc\x39\x76\x79\xb0\xf7\x00\x30\x8e\x86\xe7"
+	"\x36\xd1\xd2\x0c\x68\x7b\x94\xe9\x91\x85\x08\x86\xbc\x64\x87\xd2"
+	"\xf5\x5b\xaf\x03\xf6\x5f\x28\x25\xf1\xa3\x20\x5c\x1b\xb5\x26\x45"
+	"\x9a\x47\xab\xd6\xad\x49\xab\x92\x8e\x62\x6f\x48\x31\xea\xf6\x76"
+	"\xff\xa2\xb6\x28\x78\xef\x59\xc3\x71\x5d\xa8\xd9\x70\x89\xcc\xe2"
+	"\x63\x58\x5e\x3a\xa2\xa2\x88\xbf\x77\x20\x84\x33\x65\x64\x4e\x73"
+	"\xe5\x08\xd5\x89\x23\xd6\x07\xac\x29\x65\x2e\x02\xa8\x35\x96\x48"
+	"\xe7\x5d\x43\x6a\x42\xcc\xda\x98\xc4\x75\x90\x2e\xf6\xc4\xbf\xd4"
+	"\xbc\x31\x14\x0d\x54\x30\x11\xb2\xc9\xcf\xbb\xba\xbc\xc6\xf2\xcf"
+	"\xfe\x4a\x9d\xf3\xec\x78\x5d\x5d\xb4\x99\xd0\x67\x0f\x5a\x21\x1c"
+	"\x7b\x95\x2b\xcf\x49\x44\x94\x05\x1a\x21\x81\x25\x7f\xe3\x8a\x2a"
+	"\xdd\x88\xac\x44\x94\x23\x20\x3b\x75\xf6\x2a\x8a\x45\xf8\xb5\x1f"
+	"\xb9\x8b\xeb\xab\x9b\x38\x23\x26\xf1\x0f\x34\x47\x4f\x7f\xe1\x9e"
+	"\x84\x84\x78\xe5\xe3\x49\xeb\xcc\x2f\x02\x85\xa4\x18\x91\xde\x1a"
+	"\x60\x54\x33\x81\xd5\xae\xdb\x23\x9c\x4d\xa4\xdb\x22\x5b\xdf\xf4"
+	"\x8e\x05\x2b\x60\xba\xe8\x75\xfc\x34\x99\xcf\x35\xe1\x06\xba\xdc"
+	"\x79\x2a\x5e\xec\x1c\xbe\x79\x33\x63\x1c\xe7\x5f\x1e\x30\xd6\x1b"
+	"\xdb\x11\xb8\xea\x63\xff\xfe\x1a\x3c\x24\xf4\x78\x9c\xcc\x5d\x9a"
+	"\xc9\x2d\xc4\x9a\xd4\xa7\x65\x84\x98\xdb\x66\x76\xf0\x34\x31\x9f"
+	"\xce\xb5\xfb\x28\x07\xde\x1e\x0d\x9b\x01\x64\xeb\x2a\x37\x2f\x20"
+	"\xa5\x95\x72\x2b\x54\x51\x59\x91\xea\x50\x54\x0f\x2e\xb0\x1d\xf6"
+	"\xb9\x46\x43\xf9\xd0\x13\x21\x20\x47\x61\x1a\x1c\x30\xc6\x9e\x75"
+	"\x22\xe4\xf2\xb1\xab\x01\xdc\x5b\x3c\x1e\xa2\x6d\xc0\xb9\x9a\x2a"
+	"\x84\x61\xea\x85\x63\xa0\x77\xd0\xeb\x20\x68\xd5\x95\x6a\x1b\x8f"
+	"\x1f\x9a\xba\x44\x49\x8c\x77\xa6\xd9\xa0\x14\xf8\x7d\x9b\x4e\xfa"
+	"\xdc\x4f\x1c\x4d\x60\x50\x26\x7f\xd6\xc1\x91\x2b\xa6\x37\x5d\x94"
+	"\x69\xb2\x47\x59\xd6\xc3\x59\xbb\xd6\x9b\x71\x52\x85\x7a\xcb\x2d",
+	.b_public =
+	"\x1f\xff\xd6\xc4\x59\xf3\x4a\x9e\x81\x74\x4d\x27\xa7\xc6\x6b\x35"
+	"\xd8\xf5\xb3\x24\x97\x82\xe7\x2e\xf3\x21\x91\x23\x2f\x3d\x57\x7f"
+	"\x15\x8c\x84\x71\xe7\x25\x35\xe8\x07\x14\x06\x4c\x83\xdc\x55\x4a"
+	"\xf8\x45\xc5\xe9\xfa\x6e\xae\x6e\xcf\x4d\x11\x91\x26\x16\x6f\x86"
+	"\x89\x78\xaa\xb4\x25\x54\xb2\x74\x07\xe5\x26\x26\x0c\xad\xa4\x57"
+	"\x59\x61\x66\x71\x43\x22\xff\x49\x51\xa4\x76\x0e\x55\x7b\x60\x45"
+	"\x4f\xaf\xbd\x9c\xec\x64\x3f\x80\x0b\x0c\x31\x41\xf0\xfe\x2c\xb7"
+	"\x0a\xbe\xa5\x71\x08\x0d\x8d\x1e\x8a\x77\x9a\xd2\x90\x31\x96\xd0"
+	"\x3b\x31\xdc\xc6\x18\x59\x43\xa1\x19\x5a\x84\x68\x29\xad\x5e\x58"
+	"\xa2\x50\x3e\x83\xf5\x7a\xbd\x88\x17\x60\x89\x98\x9c\x19\x89\x27"
+	"\x89\xfc\x33\x87\x42\xd5\xde\x19\x14\xf2\x95\x82\x10\x87\xad\x82"
+	"\xdd\x6b\x51\x2d\x8d\x0e\x81\x4b\xde\xb3\x35\x6c\x0f\x4b\x56\x45"
+	"\x48\x87\xe9\x5a\xf9\x70\x10\x30\x8e\xa1\xbb\xa4\x70\xbf\xa0\xab"
+	"\x10\x31\x3c\x2c\xdc\xc4\xed\xe3\x51\xdc\xee\xd2\xa5\x5c\x4e\x6e"
+	"\xf6\xed\x60\x5a\xeb\xf3\x02\x19\x2a\x95\xe9\x46\xff\x37\x1b\xf0"
+	"\x1d\x10\x4a\x8f\x4f\x3a\x6e\xf5\xfc\x02\x6d\x09\x7d\xea\x69\x7b"
+	"\x13\xb0\xb6\x80\x5c\x15\x20\xa8\x4d\x15\x56\x11\x72\x49\xdb\x48"
+	"\x54\x40\x66\xd5\xcd\x17\x3a\x26\x95\xf6\xd7\xf2\x59\xa3\xda\xbb"
+	"\x26\xd0\xe5\x46\xbf\xee\x0e\x7d\xf1\xe0\x11\x02\x4d\xd3\xdc\xe2"
+	"\x3f\xc2\x51\x7e\xc7\x90\x33\x3c\x1c\xa0\x4c\x69\xcc\x1e\xc7\xac"
+	"\x17\xe0\xe5\xf4\x8c\x05\x64\x34\xfe\x84\x70\xd7\x6b\xed\xab\xf5"
+	"\x88\x9d\x3e\x4c\x5a\x9e\xd4\x74\xfd\xdd\x91\xd5\xd4\xcb\xbf\xf8"
+	"\xb7\x56\xb5\xe9\x22\xa6\x6d\x7a\x44\x05\x41\xbf\xdb\x61\x28\xc6"
+	"\x99\x49\x87\x3d\x28\x77\xf8\x83\x23\x7e\xa9\xa7\xee\x20\xdb\x6d"
+	"\x21\x50\xb7\xc9\x52\x57\x53\xa3\xcf\xdf\xd0\xf9\xb9\x62\x96\x89"
+	"\xf5\x5c\xa9\x8a\x11\x95\x01\x25\xc9\x81\x15\x76\xae\xf0\xc7\xc5"
+	"\x50\xae\x6f\xb5\xd2\x8a\x8e\x9a\xd4\x30\x55\xc6\xe9\x2c\x81\x6e"
+	"\x95\xf6\x45\x89\x55\x28\x34\x7b\xe5\x72\x9a\x2a\xe2\x98\x09\x35"
+	"\xe0\xe9\x75\x94\xe9\x34\x95\xb9\x13\x6e\xd5\xa1\x62\x5a\x1c\x94"
+	"\x28\xed\x84\x46\x76\x6d\x10\x37\x71\xa3\x31\x46\x64\xe4\x59\x44"
+	"\x17\x70\x1c\x23\xc9\x7e\xf6\xab\x8a\x24\xae\x25\xe2\xb2\x5f\x33"
+	"\xe4\xd7\xd3\x34\x2a\x49\x22\x16\x15\x9b\x90\x40\xda\x99\xd5\xaf",
+	.secret_size = 16,
+	.b_secret_size = 528,
+	.b_public_size = 512,
+	.expected_a_public_size = 512,
+	.expected_ss_size = 512,
+	.genkey = true,
+	},
+};
+
+static const struct kpp_testvec ffdhe6144_dh_tv_template[] __maybe_unused = {
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x03" /* len */
+	"\x00\x03\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x03\x10" /* len */
+	"\x00\x00\x03\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x63\x3e\x6f\xe0\xfe\x9f\x4a\x01\x62\x77\xce\xf1\xc7\xcc\x49\x4d"
+	"\x92\x53\x56\xe3\x39\x15\x81\xb2\xcd\xdc\xaf\x5e\xbf\x31\x1f\x69"
+	"\xce\x41\x35\x24\xaa\x46\x53\xb5\xb7\x3f\x2b\xad\x95\x14\xfb\xe4"
+	"\x9a\x61\xcd\x0f\x1f\x02\xee\xa4\x79\x2c\x9d\x1a\x7c\x62\x82\x39"
+	"\xdd\x43\xcc\x58\x9f\x62\x47\x56\x1d\x0f\xc2\x67\xbc\x24\xd0\xf9"
+	"\x0a\x50\x1b\x10\xe7\xbb\xd1\xc2\x01\xbb\xc4\x4c\xda\x12\x60\x0e"
+	"\x95\x2b\xde\x09\xd6\x67\xe1\xbc\x4c\xb9\x67\xdf\xd0\x1f\x97\xb4"
+	"\xde\xcb\x6b\x78\x83\x51\x74\x33\x01\x7f\xf6\x0a\x95\x69\x93\x00"
+	"\x2a\xc3\x75\x8e\xef\xbe\x53\x11\x6d\xc4\xd0\x9f\x6d\x63\x48\xc1"
+	"\x91\x1f\x7d\x88\xa7\x90\x78\xd1\x7e\x52\x42\x10\x01\xb4\x27\x95"
+	"\x91\x43\xcc\x82\x91\x86\x62\xa0\x9d\xef\x65\x6e\x67\xcf\x19\x11"
+	"\x35\x37\x5e\x94\x97\x83\xa6\x83\x1c\x7e\x8a\x3e\x32\xb0\xce\xff"
+	"\x20\xdc\x7b\x6e\x18\xd9\x6b\x27\x31\xfc\xc3\xef\x47\x8d\xbe\x34"
+	"\x2b\xc7\x60\x74\x3c\x93\xb3\x8e\x54\x77\x4e\x73\xe6\x40\x72\x35"
+	"\xb0\xf0\x06\x53\x43\xbe\xd0\xc3\x87\xcc\x38\x96\xa9\x10\xa0\xd6"
+	"\x17\xed\xa5\x6a\xf4\xf6\xaa\x77\x40\xed\x7d\x2e\x58\x0f\x5b\x04"
+	"\x5a\x41\x12\x95\x22\xcb\xa3\xce\x8b\x6d\x6d\x89\xec\x7c\x1d\x25"
+	"\x27\x52\x50\xa0\x5b\x93\x8c\x5d\x3f\x56\xb9\xa6\x5e\xe5\xf7\x9b"
+	"\xc7\x9a\x4a\x2e\x79\xb5\xca\x29\x58\x52\xa0\x63\xe4\x9d\xeb\x4c"
+	"\x4c\xa8\x37\x0b\xe9\xa0\x18\xf1\x86\xf6\x4d\x32\xfb\x9e\x4f\xb3"
+	"\x7b\x5d\x58\x78\x70\xbd\x56\xac\x99\x75\x25\x71\x66\x76\x4e\x5e"
+	"\x67\x4f\xb1\x17\xa7\x8b\x55\x12\x87\x01\x4e\xd1\x66\xef\xd0\x70"
+	"\xaf\x14\x34\xee\x2a\x76\x49\x25\xa6\x2e\x43\x37\x75\x7d\x1a\xad"
+	"\x08\xd5\x01\x85\x9c\xe1\x20\xd8\x38\x5c\x57\xa5\xed\x9d\x46\x3a"
+	"\xb7\x46\x60\x29\x8b\xc4\x21\x50\x0a\x30\x9c\x57\x42\xe4\x35\xf8"
+	"\x12\x5c\x4f\xa2\x20\xc2\xc9\x43\xe3\x6d\x20\xbc\xdf\xb8\x37\x33"
+	"\x45\x43\x06\x4e\x08\x6f\x8a\xcd\x61\xc3\x1b\x05\x28\x82\xbe\xf0"
+	"\x48\x33\xe5\x93\xc9\x1a\x61\x16\x67\x03\x9d\x47\x9d\x74\xeb\xae"
+	"\x13\xf2\xb4\x1b\x09\x11\xf5\x15\xcb\x28\xfd\x50\xe0\xbc\x58\x36"
+	"\x38\x91\x2c\x07\x27\x1f\x49\x68\xf4\xce\xad\xf7\xba\xec\x5d\x3d"
+	"\xfd\x27\xe2\xcf\xf4\x56\xfe\x08\xa6\x11\x61\xcb\x6c\x9f\xf9\x3c"
+	"\x57\x0b\x8b\xaa\x00\x16\x18\xba\x1f\xe8\x4f\x01\xe2\x79\x2a\x0b"
+	"\xc1\xbd\x52\xef\xe6\xf7\x5a\x66\xfe\x07\x3b\x50\x6b\xbb\xcb\x39"
+	"\x3c\x94\xf6\x21\x0d\x68\x69\xa4\xed\x2e\xb5\x85\x03\x11\x38\x79"
+	"\xec\xb5\x22\x23\xdf\x9e\xad\xb4\xbe\xd7\xc7\xdf\xea\x30\x23\x8a"
+	"\xb7\x21\x0a\x9d\xbd\x99\x13\x7d\x5f\x7e\xaf\x28\x54\x3f\xca\x5e"
+	"\xf4\xfc\x05\x0d\x65\x67\xd8\xf6\x8e\x90\x9d\x0d\xcf\x62\x82\xd6"
+	"\x9f\x02\xf8\xca\xfa\x42\x24\x7f\x4d\xb7\xfc\x92\xa6\x4a\x51\xc4"
+	"\xd8\xae\x19\x87\xc6\xa3\x83\xbe\x7b\x6d\xc3\xf5\xb8\xad\x4a\x05"
+	"\x78\x84\x3a\x15\x2e\x40\xbe\x79\xa9\xc0\x12\xa1\x48\x39\xc3\xdb"
+	"\x47\x4f\x7d\xea\x6d\xc7\xfa\x2c\x4e\xe9\xa5\x85\x81\xea\x6c\xcd"
+	"\x8a\xe5\x74\x17\x76\x31\x31\x75\x96\x83\xca\x81\xbb\x5c\xa9\x79"
+	"\x2c\xbd\x09\xfe\xe4\x86\x0d\x8c\x76\x9c\xbc\xe8\x93\xe4\xd0\xe4"
+	"\x0f\xf8\xff\x24\x7e\x66\x61\x69\xfb\xe4\x46\x08\x94\x99\xa5\x53"
+	"\xd7\xe4\x29\x72\x86\x86\xe8\x1d\x37\xfa\xcb\xd0\x8d\x51\xd0\xbf"
+	"\x81\xcf\x55\xb9\xc5\x78\x8c\x74\xa0\x16\x3a\xd2\x19\x94\x29\x6a"
+	"\x5e\xec\xd3\x20\xa0\xb2\xfd\xce\xd4\x14\xa3\x39\x10\xa9\xf4\x4e"
+	"\xba\x21\x09\x5c\xe6\x61\x43\x51\xae\xc4\x71\xd7\x21\xef\x98\x39",
+	.b_public =
+	"\x30\x31\xbe\x43\xd0\x14\x22\x6b\x4b\x8c\x9a\xca\xc6\xdd\xe5\x99"
+	"\xce\xb8\x30\x23\xb6\xa8\x8c\x4d\xfa\xef\xad\xa6\x6a\x21\x50\xa6"
+	"\x45\x2d\x19\x2a\x29\x81\xc5\xac\xb4\xa8\x5f\x6d\x5b\xc8\x5f\x12"
+	"\x35\x21\xfb\x37\xaa\x0c\x79\xeb\xd4\x83\x01\xda\xa3\xf3\x51\x6e"
+	"\x17\xf9\xef\x3f\xbd\x2f\xd2\x43\x82\x12\x48\xeb\x61\x4c\x8e\xf2"
+	"\x6c\x76\xf9\x6d\x42\x2a\xcb\x10\x13\x3b\xf6\x9b\xcd\x46\x1e\xa2"
+	"\xa7\x2c\x08\x56\xd2\x42\xf5\x03\xf0\x3e\xef\xa2\xa2\xf2\x4c\xf2"
+	"\xdb\x4f\xeb\x40\x15\x53\x27\xf7\xd4\x8e\x58\x23\xf5\x2c\x88\x04"
+	"\x1e\xb1\xb6\xe3\xd6\x9c\x49\x08\xa1\x4b\xb8\x33\xe4\x75\x85\xa1"
+	"\x86\x97\xce\x1d\xe9\x9f\xe2\xd8\xf2\x7e\xad\xdc\x8a\x4d\xbd\x06"
+	"\x52\x00\x9a\x2c\x69\xdd\x02\x0c\x69\x5a\xf9\x1d\xfd\xdc\xfb\x82"
+	"\xb2\xe5\xf3\x24\xba\xd1\x09\x76\x90\xb5\x7a\x92\xa6\x6b\x97\xc0"
+	"\xce\x13\x9b\x4b\xbc\x30\x91\xb2\x13\x8b\x57\x6c\x8b\x66\x6e\x58"
+	"\x3e\x91\x50\xc7\x6c\xe1\x18\xec\xbf\x69\xcd\xcb\xa0\xbc\x0d\x05"
+	"\xc4\xf8\x45\x92\xe0\x05\xd3\x08\xb3\x30\x19\xc8\x80\xf8\x17\x9f"
+	"\x1e\x6a\x49\x8e\x43\xef\x7a\x49\xa5\x93\xd9\xed\xd1\x07\x03\xe4"
+	"\xa3\x55\xeb\x1e\x2f\x69\xd7\x40\x8f\x6e\x1c\xb6\x94\xfb\xba\x4e"
+	"\x46\xd0\x38\x71\x00\x88\x93\x6a\x55\xfc\x16\x95\x1f\xb1\xf6\x2f"
+	"\x26\x45\x50\x54\x30\x62\x62\xe8\x80\xe5\x24\x0b\xe4\x15\x6b\x32"
+	"\x16\xc2\x30\x9b\x56\xb4\xc9\x5e\x50\xb4\x27\x82\x86\x01\xda\x68"
+	"\x44\x4b\x15\x81\x31\x13\x52\xd8\x08\xbc\xae\xf3\xa5\x94\x1c\x81"
+	"\xe8\x42\xd6\x42\xd6\xff\x99\x58\x0f\x61\x3e\x82\x9e\x2d\x13\x03"
+	"\x54\x02\x74\xf4\x6b\x43\x43\xce\x54\x44\x36\x3f\x55\xfa\xb2\x56"
+	"\xdc\xac\xb5\x65\x89\xbe\x36\xd2\x58\x65\x79\x4c\xf3\xe2\x01\xf1"
+	"\x69\x96\x29\x20\x5d\xee\xf5\x8a\x8b\x9f\x72\xf7\x27\x02\xde\x3b"
+	"\xc7\x52\x19\xdc\x8e\x22\x36\x09\x14\x59\x07\xbb\x1e\x49\x69\x4f"
+	"\x00\x7b\x9a\x5d\x23\xe9\xbe\x0d\x52\x90\xa3\x0d\xde\xe7\x80\x57"
+	"\x53\x69\x39\xe6\xf8\x33\xeb\x92\x0d\x9e\x04\x8b\x16\x16\x16\x1c"
+	"\xa9\xe6\xe3\x0e\x0a\xc6\xf6\x61\xd1\x44\x2b\x3e\x5e\x02\xfe\xaa"
+	"\xe3\xf3\x8f\xf9\xc8\x20\x37\xad\xbc\x95\xb8\xc5\xe7\x95\xda\xfb"
+	"\x80\x5b\xf6\x40\x28\xae\xc1\x4c\x09\xde\xff\x1e\xbf\x51\xd2\xfe"
+	"\x08\xdc\xb0\x48\x21\xf5\x4c\x43\xdc\x7b\x69\x83\xc8\x69\x5c\xc4"
+	"\xa9\x98\x76\x4b\xc4\x4a\xac\x1d\xa5\x52\xe3\x35\x43\xdd\x30\xd4"
+	"\xa0\x51\x9c\xc2\x62\x4c\x7e\xa5\xfb\xd3\x2c\x8a\x09\x7f\x53\xa3"
+	"\xcd\xca\x58\x1b\x4c\xaf\xba\x21\x8b\x88\x1d\xc0\xe9\x0a\x17\x30"
+	"\x33\xd6\xa2\xa5\x49\x50\x61\x3b\xff\x37\x71\x66\xef\x61\xbc\xb2"
+	"\x53\x82\xe5\x70\xef\x32\xff\x9d\x97\xe0\x82\xe0\xbb\x49\xc2\x29"
+	"\x58\x89\xdd\xe9\x62\x52\xfb\xba\x22\xa6\xd9\x16\xfa\x55\xb3\x06"
+	"\xed\x6d\x70\x6e\xdc\x47\x7c\x67\x1a\xcc\x27\x98\xd4\xd7\xe6\xf0"
+	"\xf8\x9f\x51\x3e\xf0\xee\xad\xb6\x78\x69\x71\xb5\xcb\x09\xa3\xa6"
+	"\x3f\x29\x24\x46\xe0\x65\xbc\x9f\x6c\xe9\xf9\x49\x49\x96\x75\xe5"
+	"\xe1\xff\x82\x70\xf4\x7e\xff\x8f\xec\x47\x98\x6d\x5b\x88\x60\xee"
+	"\x43\xb1\xe2\x14\xc1\x49\x95\x74\x46\xd3\x3f\x73\xb2\xe9\x88\xe0"
+	"\xd3\xb1\xc4\x2c\xef\xee\xdd\x6c\xc5\xa1\x29\xef\x86\xd2\x36\x8a"
+	"\x2f\x7c\x9d\x28\x0a\x6d\xc9\x5a\xdb\xd4\x04\x06\x36\x96\x09\x03"
+	"\x71\x5d\x38\x67\xa2\x08\x2a\x04\xe7\xd6\x51\x5a\x19\x9d\xe7\xf1"
+	"\x5d\x6f\xe2\xff\x48\x37\xb7\x8b\xb1\x14\xb4\x96\xcd\xf0\xa7\xbd"
+	"\xef\x20\xff\x0a\x8d\x08\xb7\x15\x98\x5a\x13\xd2\xda\x2a\x27\x75",
+	.expected_a_public =
+	"\x45\x96\x5a\xb7\x78\x5c\xa4\x4d\x39\xb2\x5f\xc8\xc2\xaa\x1a\xf4"
+	"\xa6\x68\xf6\x6f\x7e\xa8\x4a\x5b\x0e\xba\x0a\x99\x85\xf9\x63\xd4"
+	"\x58\x21\x6d\xa8\x3c\xf4\x05\x10\xb0\x0d\x6f\x1c\xa0\x17\x85\xae"
+	"\x68\xbf\xcc\x00\xc8\x86\x1b\x24\x31\xc9\x49\x23\x91\xe0\x71\x29"
+	"\x06\x39\x39\x93\x49\x9c\x75\x18\x1a\x8b\x61\x73\x1c\x7f\x37\xd5"
+	"\xf1\xab\x20\x5e\x62\x25\xeb\x58\xd5\xfa\xc9\x7f\xad\x57\xd5\xcc"
+	"\x0d\xc1\x7a\x2b\x33\x2a\x76\x84\x33\x26\x97\xcf\x47\x9d\x72\x2a"
+	"\xc9\x39\xde\xa8\x42\x27\x2d\xdc\xee\x00\x60\xd2\x4f\x13\xe0\xde"
+	"\xd5\xc7\xf6\x7d\x8b\x2a\x43\x49\x40\x99\xc2\x61\x84\x8e\x57\x09"
+	"\x7c\xcc\x19\x46\xbd\x4c\xd2\x7c\x7d\x02\x4d\x88\xdf\x58\x24\x80"
+	"\xeb\x19\x3b\x2a\x13\x2b\x19\x85\x3c\xd8\x31\x03\x00\xa4\xd4\x57"
+	"\x23\x2c\x24\x37\xb3\x62\xea\x35\x29\xd0\x2c\xac\xfd\xbd\xdf\x3d"
+	"\xa6\xce\xfa\x0d\x5b\xb6\x15\x8b\xe3\x58\xe9\xad\x99\x87\x29\x51"
+	"\x8d\x97\xd7\xa9\x55\xf0\x72\x6e\x4e\x58\xcb\x2b\x4d\xbd\xd0\x48"
+	"\x7d\x14\x86\xdb\x3f\xa2\x5f\x6e\x35\x4a\xe1\x70\xb1\x53\x72\xb7"
+	"\xbc\xe9\x3d\x1b\x33\xc0\x54\x6f\x43\x55\x76\x85\x7f\x9b\xa5\xb3"
+	"\xc1\x1d\xd3\xfe\xe2\xd5\x96\x3d\xdd\x92\x04\xb1\xad\x75\xdb\x13"
+	"\x4e\x49\xfc\x35\x34\xc5\xda\x13\x98\xb8\x12\xbe\xda\x90\x55\x7c"
+	"\x11\x6c\xbe\x2b\x8c\x51\x29\x23\xc1\x51\xbc\x0c\x1c\xe2\x20\xfc"
+	"\xfe\xf2\xaa\x71\x9b\x21\xdf\x25\x1f\x68\x21\x7e\xe1\xc9\x87\xa0"
+	"\x20\xf6\x8d\x4f\x27\x8c\x3c\x0f\x9d\xf4\x69\x25\xaa\x49\xab\x94"
+	"\x22\x5a\x92\x3a\xba\xb4\xc2\x8c\x5a\xaa\x04\xbf\x46\xc5\xaa\x93"
+	"\xab\x0d\xe9\x54\x6c\x3a\x64\xa6\xa2\x21\x66\xee\x1c\x10\x21\x84"
+	"\xf2\x9e\xcc\x57\xac\xc2\x25\x62\xad\xbb\x59\xef\x25\x61\x6c\x81"
+	"\x38\x8a\xdc\x8c\xeb\x7b\x18\x1d\xaf\xa9\xc5\x9a\xf4\x49\x26\x8a"
+	"\x25\xc4\x3e\x31\x95\x28\xef\xf7\x72\xe9\xc5\xaa\x59\x72\x2b\x67"
+	"\x47\xe8\x6b\x51\x05\x24\xb8\x18\xb3\x34\x0f\x8c\x2b\x80\xba\x61"
+	"\x1c\xbe\x9e\x9a\x7c\xe3\x60\x5e\x49\x02\xff\x50\x8a\x64\x28\x64"
+	"\x46\x7b\x83\x14\x72\x6e\x59\x9b\x56\x09\xb4\xf0\xde\x52\xc3\xf3"
+	"\x58\x17\x6a\xae\xb1\x0f\xf4\x39\xcc\xd8\xce\x4d\xe1\x51\x17\x88"
+	"\xe4\x98\xd9\xd1\xa9\x55\xbc\xbf\x7e\xc4\x51\x96\xdb\x44\x1d\xcd"
+	"\x8d\x74\xad\xa7\x8f\x87\x83\x75\xfc\x36\xb7\xd2\xd4\x89\x16\x97"
+	"\xe4\xc6\x2a\xe9\x65\xc8\xca\x1c\xbd\x86\xaf\x57\x80\xf7\xdd\x42"
+	"\xc0\x3b\x3f\x87\x51\x02\x2f\xf8\xd8\x68\x0f\x3d\x95\x2d\xf1\x67"
+	"\x09\xa6\x5d\x0b\x7e\x01\xb4\xb2\x32\x01\xa8\xd0\x58\x0d\xe6\xa2"
+	"\xd8\x4b\x22\x10\x7d\x11\xf3\xc2\x4e\xb8\x43\x8e\x31\x79\x59\xe2"
+	"\xc4\x96\x29\x17\x40\x06\x0d\xdf\xdf\xc3\x02\x30\x2a\xd1\x8e\xf2"
+	"\xee\x2d\xd2\x12\x63\x5a\x1d\x3c\xba\x4a\xc4\x56\x90\xc6\x12\x0b"
+	"\xe0\x04\x3f\x35\x59\x8e\x40\x75\xf4\x4c\x10\x61\xb9\x30\x89\x7c"
+	"\x8d\x0e\x25\xb7\x5a\x6b\x97\x05\xc6\x37\x80\x6e\x94\x56\xa8\x5f"
+	"\x03\x94\x59\xc8\xc5\x3e\xdc\x23\xe5\x68\x4f\xd7\xbb\x6d\x7e\xc1"
+	"\x8d\xf9\xcc\x3f\x38\xad\x77\xb3\x18\x61\xed\x04\xc0\x71\xa7\x96"
+	"\xb1\xaf\x1d\x69\x78\xda\x6d\x89\x8b\x50\x75\x99\x44\xb3\xb2\x75"
+	"\xd1\xc8\x14\x40\xa1\x0a\xbf\xc4\x45\xc4\xee\x12\x90\x76\x26\x64"
+	"\xb7\x73\x2e\x0b\x0c\xfa\xc3\x55\x29\x24\x1b\x7a\x00\x27\x07\x26"
+	"\x36\xf0\x38\x1a\xe3\xb7\xc4\x8d\x1c\x9c\xa9\xc0\xc1\x45\x91\x9e"
+	"\x86\xdd\x82\x94\x45\xfa\xcd\x5a\x19\x12\x7d\xef\xda\x17\xad\x21"
+	"\x17\x89\x8b\x45\xa7\xf5\xed\x51\x9e\x58\x13\xdc\x84\xa4\xe6\x37",
+	.expected_ss =
+	"\x9a\x9c\x1c\xb7\x73\x2f\xf2\x12\xed\x59\x01\xbb\x75\xf7\xf5\xe4"
+	"\xa0\xa8\xbc\x3f\x3f\xb6\xf7\x74\x6e\xc4\xba\x6d\x6c\x4d\x93\x31"
+	"\x2b\xa7\xa4\xb3\x47\x8f\x77\x04\xb5\xa5\xab\xca\x6b\x5a\xe2\x86"
+	"\x02\x60\xca\xb4\xd7\x5e\xe0\x0f\x73\xdd\xa2\x38\x7c\xae\x0f\x5a"
+	"\x1a\xd7\xfd\xb6\xc8\x6f\xdd\xe0\x98\xd5\x07\xea\x1f\x2a\xbb\x9e"
+	"\xef\x01\x24\x04\xee\xf5\x89\xb1\x12\x26\x54\x95\xef\xcb\x84\xe9"
+	"\xae\x05\xef\x63\x25\x15\x65\x79\x79\x79\x91\xc3\x76\x72\xb4\x85"
+	"\x86\xd9\xd3\x03\xb0\xff\x04\x96\x05\x3c\xde\xbf\x47\x34\x76\x70"
+	"\x17\xd2\x24\x83\xb9\xbb\xcf\x70\x7c\xb8\xc6\x7b\x4e\x01\x86\x36"
+	"\xc7\xc5\xe5\x8b\x7c\x69\x74\x9a\xfe\x1f\x58\x85\x0f\x00\xf8\x4e"
+	"\xf1\x56\xdc\xd1\x11\x28\x2c\xcf\x6c\xb9\xc9\x57\x17\x2e\x19\x19"
+	"\x55\xb3\x4c\xd8\xfb\xe7\x6f\x70\x63\xf9\x53\x45\xdd\xd5\x62\x95"
+	"\xd3\x7d\x7e\xa0\x00\x1a\x62\x9f\x96\x0a\x5d\x0a\x25\x02\xbb\xff"
+	"\x5a\xe8\x9e\x5a\x66\x08\x93\xbc\x92\xaf\xd2\x28\x04\x97\xc1\x54"
+	"\xfe\xcc\x0a\x25\xa2\xf4\x1d\x5a\x9a\xb1\x3e\x9c\xba\x78\xe2\xcf"
+	"\x71\x70\xe3\x40\xea\xba\x69\x9b\x03\xdd\x99\x26\x09\x84\x9d\x69"
+	"\x4d\x3d\x0b\xe9\x3f\x51\xcd\x05\xe5\x00\xaf\x2c\xd3\xf6\xc0\x68"
+	"\xb5\x23\x53\x33\x14\xbd\x39\x1c\xbd\x1b\xe6\x72\x90\xcc\xc2\x86"
+	"\x1a\x42\x83\x55\xb3\xed\x0b\x62\x6d\x0e\xbb\x9e\x2a\x42\x32\x05"
+	"\x3f\xf2\x2c\xc8\x9f\x3c\xd2\xb1\x0b\xb6\x4c\xa0\x22\x36\xee\xb9"
+	"\x55\x23\x3e\x80\xc7\x28\x7c\x39\x11\xd3\x4a\x96\x2e\xef\x52\x34"
+	"\xf2\xda\xb1\xc6\xf5\x02\x10\xbf\x56\x6b\x50\x56\xcd\x2c\xfe\xe1"
+	"\x94\x14\x19\x24\x6e\x9a\xdf\x0c\xb8\xe2\xb8\xd5\xa3\xc1\x22\x8e"
+	"\x84\x92\x00\x16\xf1\x3f\x83\xf6\x36\x31\xa5\x38\xc6\xcf\xf8\x9b"
+	"\x03\xc7\x6f\xb9\xa1\x04\xdf\x20\x0f\x0b\x0f\x70\xff\x57\x36\x7f"
+	"\xb3\x6b\xcb\x8f\x48\xf7\xb2\xdb\x85\x05\xd1\xfe\x34\x05\xf6\x57"
+	"\xb4\x5b\xcc\x3f\x0e\xba\x36\x59\xb0\xfd\x4d\xf6\xf4\x5e\xd2\x65"
+	"\x1d\x98\x87\xb4\x5e\xff\x29\xaa\x84\x9b\x44\x0f\x06\x36\x61\xbd"
+	"\xdb\x51\xda\x56\xc2\xd6\x19\xe2\x57\x4f\xd0\x29\x71\xc8\xe4\xd6"
+	"\xfb\x8c\xd0\xfc\x4f\x25\x09\xa6\xfc\x67\xe2\xb8\xac\xd3\x88\x8f"
+	"\x1f\xf6\xa1\xe3\x45\xa6\x34\xe3\xb1\x6b\xb7\x37\x0e\x06\xc7\x63"
+	"\xde\xac\x3b\xac\x07\x91\x64\xcc\x12\x10\x46\x85\x14\x0b\x6b\x03"
+	"\xba\x4a\x85\xae\xc5\x8c\xa5\x9d\x36\x38\x33\xca\x42\x9c\x4b\x0c"
+	"\x46\xe1\x77\xe9\x1f\x80\xfe\xb7\x1d\x5a\xf4\xc6\x11\x26\x78\xea"
+	"\x81\x25\x77\x47\xed\x8b\x59\xc2\x6b\x49\xff\x83\x56\xec\xa5\xf0"
+	"\xe0\x8b\x15\xd4\x99\x40\x2a\x65\x2a\x98\xf4\x71\x35\x63\x84\x08"
+	"\x4d\xcd\x71\x85\x55\xbc\xa4\x1c\x90\x93\x03\x41\xde\xed\x78\x62"
+	"\x07\x30\x50\xac\x60\x21\x06\xc3\xab\xa4\x04\xc0\xc2\x32\x07\xc4"
+	"\x1f\x2f\xec\xe2\x32\xbf\xbe\x5e\x50\x5b\x2a\x19\x71\x44\x37\x76"
+	"\x8b\xbc\xdb\x73\x98\x65\x78\xc9\x33\x97\x7e\xdc\x60\xa8\x87\xf2"
+	"\xb5\x96\x55\x7f\x44\x07\xcb\x3b\xf3\xd7\x82\xfd\x77\x21\x82\x21"
+	"\x1a\x8b\xa2\xf5\x1f\x66\xd0\x57\x00\x4f\xa9\xa5\x33\xb8\x69\x91"
+	"\xe8\x2e\xf7\x73\x47\x89\x30\x9b\xb1\xfd\xe1\x5d\x11\xfd\x84\xd9"
+	"\xa2\x91\x1f\x8a\xa7\x7a\x77\x8e\x3b\x10\x1d\x0a\x59\x50\x34\xb0"
+	"\xc3\x90\x9f\x56\xb7\x43\xeb\x51\x99\x2b\x8e\x6d\x7b\x58\xe7\xc0"
+	"\x7f\x3d\xa0\x27\x50\xf2\x6e\xc8\x1e\x7f\x84\xb3\xe1\xf7\x09\x85"
+	"\xd2\x9b\x56\x6b\xba\xa5\x19\x2e\xec\xd8\x5c\xf5\x4e\x43\x36\x2e"
+	"\x89\x85\x41\x7f\x9c\x91\x2e\x62\xc3\x41\xcf\x0e\xa1\x7f\xeb\x50",
+	.secret_size = 784,
+	.b_public_size = 768,
+	.expected_a_public_size = 768,
+	.expected_ss_size = 768,
+	},
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x00" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x00\x10" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#endif
+	.b_secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x03" /* len */
+	"\x00\x03\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x03\x10" /* len */
+	"\x00\x00\x03\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x63\x3e\x6f\xe0\xfe\x9f\x4a\x01\x62\x77\xce\xf1\xc7\xcc\x49\x4d"
+	"\x92\x53\x56\xe3\x39\x15\x81\xb2\xcd\xdc\xaf\x5e\xbf\x31\x1f\x69"
+	"\xce\x41\x35\x24\xaa\x46\x53\xb5\xb7\x3f\x2b\xad\x95\x14\xfb\xe4"
+	"\x9a\x61\xcd\x0f\x1f\x02\xee\xa4\x79\x2c\x9d\x1a\x7c\x62\x82\x39"
+	"\xdd\x43\xcc\x58\x9f\x62\x47\x56\x1d\x0f\xc2\x67\xbc\x24\xd0\xf9"
+	"\x0a\x50\x1b\x10\xe7\xbb\xd1\xc2\x01\xbb\xc4\x4c\xda\x12\x60\x0e"
+	"\x95\x2b\xde\x09\xd6\x67\xe1\xbc\x4c\xb9\x67\xdf\xd0\x1f\x97\xb4"
+	"\xde\xcb\x6b\x78\x83\x51\x74\x33\x01\x7f\xf6\x0a\x95\x69\x93\x00"
+	"\x2a\xc3\x75\x8e\xef\xbe\x53\x11\x6d\xc4\xd0\x9f\x6d\x63\x48\xc1"
+	"\x91\x1f\x7d\x88\xa7\x90\x78\xd1\x7e\x52\x42\x10\x01\xb4\x27\x95"
+	"\x91\x43\xcc\x82\x91\x86\x62\xa0\x9d\xef\x65\x6e\x67\xcf\x19\x11"
+	"\x35\x37\x5e\x94\x97\x83\xa6\x83\x1c\x7e\x8a\x3e\x32\xb0\xce\xff"
+	"\x20\xdc\x7b\x6e\x18\xd9\x6b\x27\x31\xfc\xc3\xef\x47\x8d\xbe\x34"
+	"\x2b\xc7\x60\x74\x3c\x93\xb3\x8e\x54\x77\x4e\x73\xe6\x40\x72\x35"
+	"\xb0\xf0\x06\x53\x43\xbe\xd0\xc3\x87\xcc\x38\x96\xa9\x10\xa0\xd6"
+	"\x17\xed\xa5\x6a\xf4\xf6\xaa\x77\x40\xed\x7d\x2e\x58\x0f\x5b\x04"
+	"\x5a\x41\x12\x95\x22\xcb\xa3\xce\x8b\x6d\x6d\x89\xec\x7c\x1d\x25"
+	"\x27\x52\x50\xa0\x5b\x93\x8c\x5d\x3f\x56\xb9\xa6\x5e\xe5\xf7\x9b"
+	"\xc7\x9a\x4a\x2e\x79\xb5\xca\x29\x58\x52\xa0\x63\xe4\x9d\xeb\x4c"
+	"\x4c\xa8\x37\x0b\xe9\xa0\x18\xf1\x86\xf6\x4d\x32\xfb\x9e\x4f\xb3"
+	"\x7b\x5d\x58\x78\x70\xbd\x56\xac\x99\x75\x25\x71\x66\x76\x4e\x5e"
+	"\x67\x4f\xb1\x17\xa7\x8b\x55\x12\x87\x01\x4e\xd1\x66\xef\xd0\x70"
+	"\xaf\x14\x34\xee\x2a\x76\x49\x25\xa6\x2e\x43\x37\x75\x7d\x1a\xad"
+	"\x08\xd5\x01\x85\x9c\xe1\x20\xd8\x38\x5c\x57\xa5\xed\x9d\x46\x3a"
+	"\xb7\x46\x60\x29\x8b\xc4\x21\x50\x0a\x30\x9c\x57\x42\xe4\x35\xf8"
+	"\x12\x5c\x4f\xa2\x20\xc2\xc9\x43\xe3\x6d\x20\xbc\xdf\xb8\x37\x33"
+	"\x45\x43\x06\x4e\x08\x6f\x8a\xcd\x61\xc3\x1b\x05\x28\x82\xbe\xf0"
+	"\x48\x33\xe5\x93\xc9\x1a\x61\x16\x67\x03\x9d\x47\x9d\x74\xeb\xae"
+	"\x13\xf2\xb4\x1b\x09\x11\xf5\x15\xcb\x28\xfd\x50\xe0\xbc\x58\x36"
+	"\x38\x91\x2c\x07\x27\x1f\x49\x68\xf4\xce\xad\xf7\xba\xec\x5d\x3d"
+	"\xfd\x27\xe2\xcf\xf4\x56\xfe\x08\xa6\x11\x61\xcb\x6c\x9f\xf9\x3c"
+	"\x57\x0b\x8b\xaa\x00\x16\x18\xba\x1f\xe8\x4f\x01\xe2\x79\x2a\x0b"
+	"\xc1\xbd\x52\xef\xe6\xf7\x5a\x66\xfe\x07\x3b\x50\x6b\xbb\xcb\x39"
+	"\x3c\x94\xf6\x21\x0d\x68\x69\xa4\xed\x2e\xb5\x85\x03\x11\x38\x79"
+	"\xec\xb5\x22\x23\xdf\x9e\xad\xb4\xbe\xd7\xc7\xdf\xea\x30\x23\x8a"
+	"\xb7\x21\x0a\x9d\xbd\x99\x13\x7d\x5f\x7e\xaf\x28\x54\x3f\xca\x5e"
+	"\xf4\xfc\x05\x0d\x65\x67\xd8\xf6\x8e\x90\x9d\x0d\xcf\x62\x82\xd6"
+	"\x9f\x02\xf8\xca\xfa\x42\x24\x7f\x4d\xb7\xfc\x92\xa6\x4a\x51\xc4"
+	"\xd8\xae\x19\x87\xc6\xa3\x83\xbe\x7b\x6d\xc3\xf5\xb8\xad\x4a\x05"
+	"\x78\x84\x3a\x15\x2e\x40\xbe\x79\xa9\xc0\x12\xa1\x48\x39\xc3\xdb"
+	"\x47\x4f\x7d\xea\x6d\xc7\xfa\x2c\x4e\xe9\xa5\x85\x81\xea\x6c\xcd"
+	"\x8a\xe5\x74\x17\x76\x31\x31\x75\x96\x83\xca\x81\xbb\x5c\xa9\x79"
+	"\x2c\xbd\x09\xfe\xe4\x86\x0d\x8c\x76\x9c\xbc\xe8\x93\xe4\xd0\xe4"
+	"\x0f\xf8\xff\x24\x7e\x66\x61\x69\xfb\xe4\x46\x08\x94\x99\xa5\x53"
+	"\xd7\xe4\x29\x72\x86\x86\xe8\x1d\x37\xfa\xcb\xd0\x8d\x51\xd0\xbf"
+	"\x81\xcf\x55\xb9\xc5\x78\x8c\x74\xa0\x16\x3a\xd2\x19\x94\x29\x6a"
+	"\x5e\xec\xd3\x20\xa0\xb2\xfd\xce\xd4\x14\xa3\x39\x10\xa9\xf4\x4e"
+	"\xba\x21\x09\x5c\xe6\x61\x43\x51\xae\xc4\x71\xd7\x21\xef\x98\x39",
+	.b_public =
+	"\x45\x96\x5a\xb7\x78\x5c\xa4\x4d\x39\xb2\x5f\xc8\xc2\xaa\x1a\xf4"
+	"\xa6\x68\xf6\x6f\x7e\xa8\x4a\x5b\x0e\xba\x0a\x99\x85\xf9\x63\xd4"
+	"\x58\x21\x6d\xa8\x3c\xf4\x05\x10\xb0\x0d\x6f\x1c\xa0\x17\x85\xae"
+	"\x68\xbf\xcc\x00\xc8\x86\x1b\x24\x31\xc9\x49\x23\x91\xe0\x71\x29"
+	"\x06\x39\x39\x93\x49\x9c\x75\x18\x1a\x8b\x61\x73\x1c\x7f\x37\xd5"
+	"\xf1\xab\x20\x5e\x62\x25\xeb\x58\xd5\xfa\xc9\x7f\xad\x57\xd5\xcc"
+	"\x0d\xc1\x7a\x2b\x33\x2a\x76\x84\x33\x26\x97\xcf\x47\x9d\x72\x2a"
+	"\xc9\x39\xde\xa8\x42\x27\x2d\xdc\xee\x00\x60\xd2\x4f\x13\xe0\xde"
+	"\xd5\xc7\xf6\x7d\x8b\x2a\x43\x49\x40\x99\xc2\x61\x84\x8e\x57\x09"
+	"\x7c\xcc\x19\x46\xbd\x4c\xd2\x7c\x7d\x02\x4d\x88\xdf\x58\x24\x80"
+	"\xeb\x19\x3b\x2a\x13\x2b\x19\x85\x3c\xd8\x31\x03\x00\xa4\xd4\x57"
+	"\x23\x2c\x24\x37\xb3\x62\xea\x35\x29\xd0\x2c\xac\xfd\xbd\xdf\x3d"
+	"\xa6\xce\xfa\x0d\x5b\xb6\x15\x8b\xe3\x58\xe9\xad\x99\x87\x29\x51"
+	"\x8d\x97\xd7\xa9\x55\xf0\x72\x6e\x4e\x58\xcb\x2b\x4d\xbd\xd0\x48"
+	"\x7d\x14\x86\xdb\x3f\xa2\x5f\x6e\x35\x4a\xe1\x70\xb1\x53\x72\xb7"
+	"\xbc\xe9\x3d\x1b\x33\xc0\x54\x6f\x43\x55\x76\x85\x7f\x9b\xa5\xb3"
+	"\xc1\x1d\xd3\xfe\xe2\xd5\x96\x3d\xdd\x92\x04\xb1\xad\x75\xdb\x13"
+	"\x4e\x49\xfc\x35\x34\xc5\xda\x13\x98\xb8\x12\xbe\xda\x90\x55\x7c"
+	"\x11\x6c\xbe\x2b\x8c\x51\x29\x23\xc1\x51\xbc\x0c\x1c\xe2\x20\xfc"
+	"\xfe\xf2\xaa\x71\x9b\x21\xdf\x25\x1f\x68\x21\x7e\xe1\xc9\x87\xa0"
+	"\x20\xf6\x8d\x4f\x27\x8c\x3c\x0f\x9d\xf4\x69\x25\xaa\x49\xab\x94"
+	"\x22\x5a\x92\x3a\xba\xb4\xc2\x8c\x5a\xaa\x04\xbf\x46\xc5\xaa\x93"
+	"\xab\x0d\xe9\x54\x6c\x3a\x64\xa6\xa2\x21\x66\xee\x1c\x10\x21\x84"
+	"\xf2\x9e\xcc\x57\xac\xc2\x25\x62\xad\xbb\x59\xef\x25\x61\x6c\x81"
+	"\x38\x8a\xdc\x8c\xeb\x7b\x18\x1d\xaf\xa9\xc5\x9a\xf4\x49\x26\x8a"
+	"\x25\xc4\x3e\x31\x95\x28\xef\xf7\x72\xe9\xc5\xaa\x59\x72\x2b\x67"
+	"\x47\xe8\x6b\x51\x05\x24\xb8\x18\xb3\x34\x0f\x8c\x2b\x80\xba\x61"
+	"\x1c\xbe\x9e\x9a\x7c\xe3\x60\x5e\x49\x02\xff\x50\x8a\x64\x28\x64"
+	"\x46\x7b\x83\x14\x72\x6e\x59\x9b\x56\x09\xb4\xf0\xde\x52\xc3\xf3"
+	"\x58\x17\x6a\xae\xb1\x0f\xf4\x39\xcc\xd8\xce\x4d\xe1\x51\x17\x88"
+	"\xe4\x98\xd9\xd1\xa9\x55\xbc\xbf\x7e\xc4\x51\x96\xdb\x44\x1d\xcd"
+	"\x8d\x74\xad\xa7\x8f\x87\x83\x75\xfc\x36\xb7\xd2\xd4\x89\x16\x97"
+	"\xe4\xc6\x2a\xe9\x65\xc8\xca\x1c\xbd\x86\xaf\x57\x80\xf7\xdd\x42"
+	"\xc0\x3b\x3f\x87\x51\x02\x2f\xf8\xd8\x68\x0f\x3d\x95\x2d\xf1\x67"
+	"\x09\xa6\x5d\x0b\x7e\x01\xb4\xb2\x32\x01\xa8\xd0\x58\x0d\xe6\xa2"
+	"\xd8\x4b\x22\x10\x7d\x11\xf3\xc2\x4e\xb8\x43\x8e\x31\x79\x59\xe2"
+	"\xc4\x96\x29\x17\x40\x06\x0d\xdf\xdf\xc3\x02\x30\x2a\xd1\x8e\xf2"
+	"\xee\x2d\xd2\x12\x63\x5a\x1d\x3c\xba\x4a\xc4\x56\x90\xc6\x12\x0b"
+	"\xe0\x04\x3f\x35\x59\x8e\x40\x75\xf4\x4c\x10\x61\xb9\x30\x89\x7c"
+	"\x8d\x0e\x25\xb7\x5a\x6b\x97\x05\xc6\x37\x80\x6e\x94\x56\xa8\x5f"
+	"\x03\x94\x59\xc8\xc5\x3e\xdc\x23\xe5\x68\x4f\xd7\xbb\x6d\x7e\xc1"
+	"\x8d\xf9\xcc\x3f\x38\xad\x77\xb3\x18\x61\xed\x04\xc0\x71\xa7\x96"
+	"\xb1\xaf\x1d\x69\x78\xda\x6d\x89\x8b\x50\x75\x99\x44\xb3\xb2\x75"
+	"\xd1\xc8\x14\x40\xa1\x0a\xbf\xc4\x45\xc4\xee\x12\x90\x76\x26\x64"
+	"\xb7\x73\x2e\x0b\x0c\xfa\xc3\x55\x29\x24\x1b\x7a\x00\x27\x07\x26"
+	"\x36\xf0\x38\x1a\xe3\xb7\xc4\x8d\x1c\x9c\xa9\xc0\xc1\x45\x91\x9e"
+	"\x86\xdd\x82\x94\x45\xfa\xcd\x5a\x19\x12\x7d\xef\xda\x17\xad\x21"
+	"\x17\x89\x8b\x45\xa7\xf5\xed\x51\x9e\x58\x13\xdc\x84\xa4\xe6\x37",
+	.secret_size = 16,
+	.b_secret_size = 784,
+	.b_public_size = 768,
+	.expected_a_public_size = 768,
+	.expected_ss_size = 768,
+	.genkey = true,
+	},
+};
+
+static const struct kpp_testvec ffdhe8192_dh_tv_template[] __maybe_unused = {
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x04" /* len */
+	"\x00\x04\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x04\x10" /* len */
+	"\x00\x00\x04\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x76\x6e\xeb\xf9\xeb\x76\xae\x37\xcb\x19\x49\x8b\xeb\xaf\xb0\x4b"
+	"\x6d\xe9\x15\xad\xda\xf2\xef\x58\xe9\xd6\xdd\x4c\xb3\x56\xd0\x3b"
+	"\x00\xb0\x65\xed\xae\xe0\x2e\xdf\x8f\x45\x3f\x3c\x5d\x2f\xfa\x96"
+	"\x36\x33\xb2\x01\x8b\x0f\xe8\x46\x15\x6d\x60\x5b\xec\x32\xc3\x3b"
+	"\x06\xf3\xb4\x1b\x9a\xef\x3c\x03\x0e\xcc\xce\x1d\x24\xa0\xc9\x08"
+	"\x65\xf9\x45\xe5\xd2\x43\x08\x88\x58\xd6\x46\xe7\xbb\x25\xac\xed"
+	"\x3b\xac\x6f\x5e\xfb\xd6\x19\xa6\x20\x3a\x1d\x0c\xe8\x00\x72\x54"
+	"\xd7\xd9\xc9\x26\x49\x18\xc6\xb8\xbc\xdd\xf3\xce\xf3\x7b\x69\x04"
+	"\x5c\x6f\x11\xdb\x44\x42\x72\xb6\xb7\x84\x17\x86\x47\x3f\xc5\xa1"
+	"\xd8\x86\xef\xe2\x27\x49\x2b\x8f\x3e\x91\x12\xd9\x45\x96\xf7\xe6"
+	"\x77\x76\x36\x58\x71\x9a\xb1\xdb\xcf\x24\x9e\x7e\xad\xce\x45\xba"
+	"\xb5\xec\x8e\xb9\xd6\x7b\x3d\x76\xa4\x85\xad\xd8\x49\x9b\x80\x9d"
+	"\x7f\x9f\x85\x09\x9e\x86\x5b\x6b\xf3\x8d\x39\x5e\x6f\xe4\x30\xc8"
+	"\xa5\xf3\xdf\x68\x73\x6b\x2e\x9a\xcb\xac\x0a\x0d\x44\xc1\xaf\xb2"
+	"\x11\x1b\x7c\x43\x08\x44\x43\xe2\x4e\xfd\x93\x30\x99\x09\x12\xbb"
+	"\xf6\x31\x34\xa5\x3d\x45\x98\xee\xd7\x2a\x1a\x89\xf5\x37\x92\x33"
+	"\xa0\xdd\xf5\xfb\x1f\x90\x42\x55\x5a\x0b\x82\xff\xf0\x96\x92\x15"
+	"\x65\x5a\x55\x96\xca\x1b\xd5\xe5\xb5\x94\xde\x2e\xa6\x03\x57\x9e"
+	"\x15\xe4\x32\x2b\x1f\xb2\x22\x21\xe9\xa0\x05\xd3\x65\x6c\x11\x66"
+	"\x25\x38\xbb\xa3\x6c\xc2\x0b\x2b\xd0\x7a\x20\x26\x29\x37\x5d\x5f"
+	"\xd8\xff\x2a\xcd\x46\x6c\xd6\x6e\xe5\x77\x1a\xe6\x33\xf1\x8e\xc8"
+	"\x10\x30\x11\x00\x27\xf9\x7d\x0e\x28\x43\xa7\x67\x38\x7f\x16\xda"
+	"\xd0\x01\x8e\xa4\xe8\x6f\xcd\x23\xaf\x77\x52\x34\xad\x7e\xc3\xed"
+	"\x2d\x10\x0a\x33\xdc\xcf\x1b\x88\x0f\xcc\x48\x7f\x42\xf0\x9e\x13"
+	"\x1f\xf5\xd1\xe9\x90\x87\xbd\xfa\x5f\x1d\x77\x55\xcb\xc3\x05\xaf"
+	"\x71\xd0\xe0\xab\x46\x31\xd7\xea\x89\x54\x2d\x39\xaf\xf6\x4f\x74"
+	"\xaf\x46\x58\x89\x78\x95\x2e\xe6\x90\xb7\xaa\x00\x73\x9f\xed\xb9"
+	"\x00\xd6\xf6\x6d\x26\x59\xcd\x56\xdb\xf7\x3d\x5f\xeb\x6e\x46\x33"
+	"\xb1\x23\xed\x9f\x8d\x58\xdc\xb4\x28\x3b\x90\x09\xc4\x61\x02\x1f"
+	"\xf8\x62\xf2\x6e\xc1\x94\x71\x66\x93\x11\xdf\xaa\x3e\xd7\xb5\xe5"
+	"\xc1\x78\xe9\x14\xcd\x55\x16\x51\xdf\x8d\xd0\x94\x8c\x43\xe9\xb8"
+	"\x1d\x42\x7f\x76\xbc\x6f\x87\x42\x88\xde\xd7\x52\x78\x00\x4f\x18"
+	"\x02\xe7\x7b\xe2\x8a\xc3\xd1\x43\xa5\xac\xda\xb0\x8d\x19\x96\xd4"
+	"\x81\xe0\x75\xe9\xca\x41\x7e\x1f\x93\x0b\x26\x24\xb3\xaa\xdd\x10"
+	"\x20\xd3\xf2\x9f\x3f\xdf\x65\xde\x67\x79\xdc\x76\x9f\x3c\x72\x75"
+	"\x65\x8a\x30\xcc\xd2\xcc\x06\xb1\xab\x62\x86\x78\x5d\xb8\xce\x72"
+	"\xb3\x12\xc7\x9f\x07\xd0\x6b\x98\x82\x9b\x6c\xbb\x15\xe5\xcc\xf4"
+	"\xc8\xf4\x60\x81\xdc\xd3\x09\x1b\x5e\xd4\xf3\x55\xcf\x1c\x16\x83"
+	"\x61\xb4\x2e\xcc\x08\x67\x58\xfd\x46\x64\xbc\x29\x4b\xdd\xda\xec"
+	"\xdc\xc6\xa9\xa5\x73\xfb\xf8\xf3\xaf\x89\xa8\x9e\x25\x14\xfa\xac"
+	"\xeb\x1c\x7c\x80\x96\x66\x4d\x41\x67\x9b\x07\x4f\x0a\x97\x17\x1c"
+	"\x4d\x61\xc7\x2e\x6f\x36\x98\x29\x50\x39\x6d\xe7\x70\xda\xf0\xc8"
+	"\x05\x80\x7b\x32\xff\xfd\x12\xde\x61\x0d\xf9\x4c\x21\xf1\x56\x72"
+	"\x3d\x61\x46\xc0\x2d\x07\xd1\x6c\xd3\xbe\x9a\x21\x83\x85\xf7\xed"
+	"\x53\x95\x44\x40\x8f\x75\x12\x18\xc2\x9a\xfd\x5e\xce\x66\xa6\x7f"
+	"\x57\xc0\xd7\x73\x76\xb3\x13\xda\x2e\x58\xc6\x27\x40\xb2\x2d\xef"
+	"\x7d\x72\xb4\xa8\x75\x6f\xcc\x5f\x42\x3e\x2c\x90\x36\x59\xa0\x34"
+	"\xaa\xce\xbc\x04\x4c\xe6\x56\xc2\xcd\xa6\x1c\x59\x04\x56\x53\xcf"
+	"\x6d\xd7\xf0\xb1\x4f\x91\xfa\x84\xcf\x4b\x8d\x50\x4c\xf8\x2a\x31"
+	"\x5f\xe3\xba\x79\xb4\xcc\x59\x64\xe3\x7a\xfa\xf6\x06\x9d\x04\xbb"
+	"\xce\x61\xbf\x9e\x59\x0a\x09\x51\x6a\xbb\x0b\x80\xe0\x91\xc1\x51"
+	"\x04\x58\x67\x67\x4b\x42\x4f\x95\x68\x75\xe2\x1f\x9c\x14\x70\xfd"
+	"\x3a\x8a\xce\x8b\x04\xa1\x89\xe7\xb4\xbf\x70\xfe\xf3\x0c\x48\x04"
+	"\x3a\xd2\x85\x68\x03\xe7\xfa\xec\x5b\x55\xb7\x95\xfd\x5b\x19\x35"
+	"\xad\xcb\x4a\x63\x03\x44\x64\x2a\x48\x59\x9a\x26\x43\x96\x8c\xe6"
+	"\xbd\xb7\x90\xd4\x5f\x8d\x08\x28\xa8\xc5\x89\x70\xb9\x6e\xd3\x3b"
+	"\x76\x0e\x37\x98\x15\x27\xca\xc9\xb0\xe0\xfd\xf3\xc6\xdf\x69\xce"
+	"\xe1\x5f\x6a\x3e\x5c\x86\xe2\x58\x41\x11\xf0\x7e\x56\xec\xe4\xc9"
+	"\x0d\x87\x91\xfb\xb9\xc8\x0d\x34\xab\xb0\xc6\xf2\xa6\x00\x7b\x18"
+	"\x92\xf4\x43\x7f\x01\x85\x2e\xef\x8c\x72\x50\x10\xdb\xf1\x37\x62"
+	"\x16\x85\x71\x01\xa8\x2b\xf0\x13\xd3\x7c\x0b\xaf\xf1\xf3\xd1\xee"
+	"\x90\x41\x5f\x7d\x5b\xa9\x83\x4b\xfa\x80\x59\x50\x73\xe1\xc4\xf9"
+	"\x5e\x4b\xde\xd9\xf5\x22\x68\x5e\x65\xd9\x37\xe4\x1a\x08\x0e\xb1"
+	"\x28\x2f\x40\x9e\x37\xa8\x12\x56\xb7\xb8\x64\x94\x68\x94\xff\x9f",
+	.b_public =
+	"\x26\xa8\x3a\x97\xe0\x52\x76\x07\x26\xa7\xbb\x21\xfd\xe5\x69\xde"
+	"\xe6\xe0\xb5\xa0\xf1\xaa\x51\x2b\x56\x1c\x3c\x6c\xe5\x9f\x8f\x75"
+	"\x71\x04\x86\xf6\x43\x2f\x20\x7f\x45\x4f\x5c\xb9\xf3\x90\xbe\xa9"
+	"\xa0\xd7\xe8\x03\x0e\xfe\x99\x9b\x8a\x1c\xbe\xa7\x63\xe8\x2b\x45"
+	"\xd4\x2c\x65\x25\x4c\x33\xda\xc5\x85\x77\x5d\x62\xea\x93\xe4\x45"
+	"\x59\xff\xa1\xd2\xf1\x73\x11\xed\x02\x64\x8a\x1a\xfb\xe1\x88\xa6"
+	"\x50\x6f\xff\x87\x12\xbb\xfc\x10\xcf\x19\x41\xb0\x35\x44\x7d\x51"
+	"\xe9\xc0\x77\xf2\x73\x21\x2e\x62\xbf\x65\xa5\xd1\x3b\xb1\x3e\x19"
+	"\x75\x4b\xb7\x8e\x03\xc3\xdf\xc8\xb2\xe6\xec\x2d\x7d\xa5\x6a\xba"
+	"\x93\x47\x50\xeb\x6e\xdb\x88\x05\x45\xad\x03\x8c\xf7\x9a\xe1\xc9"
+	"\x1e\x16\x96\x37\xa5\x3e\xe9\xb9\xa8\xdc\xb9\xa9\xf6\xa1\x3d\xed"
+	"\xbe\x12\x29\x8a\x3d\x3d\x90\xfc\x94\xfe\x66\x28\x1c\x1b\xa4\x89"
+	"\x47\x66\x4f\xac\x14\x00\x22\x2d\x5c\x03\xea\x71\x4d\x19\x7d\xd6"
+	"\x58\x39\x4c\x3d\x06\x2b\x30\xa6\xdc\x2c\x8d\xd1\xde\x79\x77\xfa"
+	"\x9c\x6b\x72\x11\x8a\x7f\x7d\x37\x28\x2a\x88\xbf\x0a\xdb\xac\x3b"
+	"\xc5\xa5\xd5\x7e\x25\xec\xa6\x7f\x5b\x53\x75\x83\x49\xd4\x77\xcc"
+	"\x7d\x7e\xd3\x3d\x30\x2c\x98\x3f\x18\x9a\x11\x8a\x37\xda\x99\x0f"
+	"\x3b\x06\xe1\x87\xd5\xe9\x4e\xe0\x9c\x0e\x39\x34\xe2\xdd\xf6\x58"
+	"\x60\x63\xa6\xea\xe8\xc0\xb4\xde\xdf\xa0\xbc\x21\xc3\x2d\xf4\xa4"
+	"\xc8\x6f\x62\x6c\x0f\x71\x88\xf9\xda\x2d\x30\xd5\x95\xe1\xfc\x6d"
+	"\x88\xc5\xc3\x95\x51\x83\xde\x41\x46\x6f\x7e\x1b\x10\x48\xad\x2b"
+	"\x82\x88\xa2\x6f\x57\x4d\x4a\xbd\x90\xc8\x06\x8f\x52\x5d\x6e\xee"
+	"\x09\xe6\xa3\xcb\x30\x9c\x14\xf6\xac\x66\x9b\x81\x0a\x75\x42\x6b"
+	"\xab\x27\xec\x76\xfb\x8d\xc5\xbf\x0e\x93\x81\x7b\x81\xd4\x85\xa6"
+	"\x90\x5a\xa6\xa2\x8b\xa9\xb7\x34\xe6\x15\x36\x93\x8b\xe2\x99\xc7"
+	"\xad\x66\x7e\xd6\x89\xa9\xc8\x15\xcb\xc5\xeb\x06\x85\xd4\x2f\x6e"
+	"\x9b\x95\x7a\x06\x6c\xfa\x31\x1d\xc4\xe5\x7d\xfb\x10\x35\x88\xc2"
+	"\xbe\x1c\x16\x5d\xc2\xf4\x0d\xf3\xc9\x94\xb2\x7e\xa7\xbd\x9c\x03"
+	"\x32\xaf\x8b\x1a\xc8\xcc\x82\xd8\x87\x96\x6e\x3d\xcc\x93\xd2\x43"
+	"\x73\xf9\xde\xec\x49\x49\xf4\x56\x2a\xc8\x6e\x32\x70\x48\xf8\x70"
+	"\xa3\x96\x31\xf4\xf2\x08\xc5\x12\xd2\xeb\xb6\xea\xa3\x07\x05\x61"
+	"\x74\xa3\x04\x2f\x17\x82\x40\x5e\x4c\xd1\x51\xb8\x10\x5b\xc8\x9f"
+	"\x87\x73\x80\x0d\x6f\xc6\xb9\xf6\x7c\x31\x0a\xcc\xd9\x03\x0f\x7a"
+	"\x47\x69\xb1\x55\xab\xe9\xb5\x75\x62\x9e\x95\xbe\x7b\xa9\x53\x6e"
+	"\x28\x73\xdc\xb3\xa4\x8a\x1c\x91\xf5\x8a\xf9\x32\x2b\xbd\xa5\xdc"
+	"\x07\xb5\xaf\x49\xdb\x9c\x35\xc9\x69\xde\xac\xb1\xd0\x86\xcb\x31"
+	"\x0b\xc4\x4f\x63\x4e\x70\xa7\x80\xe3\xbc\x0b\x73\x0e\xf2\x8c\x87"
+	"\x88\x7b\xa9\x6d\xde\x8a\x73\x14\xb9\x80\x55\x03\x2b\x29\x64\x6a"
+	"\xda\x48\x0e\x78\x07\x40\x48\x46\x58\xa9\x4e\x68\x1d\xd1\xc1\xc8"
+	"\x3b\x35\x53\x61\xd5\xe3\x0d\x4c\x42\x74\x10\x67\x85\x9f\x66\x2a"
+	"\xf7\x2b\x7b\x77\x8b\x6e\xda\x2c\xc1\x5a\x20\x34\x3f\xf5\x8b\x6f"
+	"\xe4\x61\xf5\x58\xab\x72\x1a\xf1\x8d\x28\xcc\xa5\x30\x68\xb5\x50"
+	"\x7b\x81\x43\x89\x8e\xa9\xac\x63\x3a\x4a\x78\x7b\xd2\x45\xe6\xe0"
+	"\xdc\x5d\xf2\x1a\x2b\x54\x50\xa5\x9d\xf6\xe7\x9f\x25\xaf\x56\x6a"
+	"\x84\x2a\x75\xa3\x9a\xc7\xfa\x94\xec\x83\xab\xa5\xaa\xe1\xf9\x89"
+	"\x29\xa9\xf6\x53\x24\x24\xae\x4a\xe8\xbc\xe8\x9e\x5c\xd7\x54\x7c"
+	"\x65\x20\x97\x28\x94\x76\xf9\x9e\x81\xcf\x98\x6a\x3a\x7b\xec\xf3"
+	"\x09\x60\x2e\x43\x18\xb5\xf6\x8c\x44\x0f\xf2\x0a\x17\x5b\xac\x98"
+	"\x30\xab\x6e\xd5\xb3\xef\x25\x68\x50\xb6\xe1\xc0\xe4\x5a\x63\x43"
+	"\xea\xca\xda\x23\xc1\xc2\xe9\x30\xec\xb3\x9f\xbf\x1f\x09\x76\xaf"
+	"\x65\xbc\xb5\xab\x30\xac\x0b\x05\xef\x5c\xa3\x65\x77\x33\x1c\xc5"
+	"\xdf\xc9\x39\xab\xca\xf4\x3b\x88\x25\x6d\x50\x87\xb1\x79\xc2\x23"
+	"\x9d\xb5\x21\x01\xaa\xa3\xb7\x61\xa3\x48\x91\x72\x3d\x54\x85\x86"
+	"\x91\x81\x35\x78\xbf\x8f\x27\x57\xcb\x9b\x34\xab\x63\x40\xf1\xbc"
+	"\x23\x5a\x26\x6a\xba\x57\xe2\x8f\x2a\xdc\x82\xe0\x3b\x7f\xec\xd3"
+	"\xd8\x9d\xd3\x13\x54\x70\x64\xc3\xfd\xbf\xa3\x46\xa7\x53\x42\x7f"
+	"\xc1\xbd\x7b\xb3\x13\x47\x2a\x45\x1e\x76\x2c\x0d\x6d\x46\x26\x24"
+	"\xa8\xc7\x00\x2b\x10\x7f\x2a\x6c\xfc\x68\x4e\x6e\x85\x53\x00\xaf"
+	"\xd5\xfb\x59\x64\xc7\x9b\x24\xd1\x05\xdc\x34\x53\x6d\x27\xa9\x79"
+	"\xff\xd7\x5e\x7a\x40\x81\x8e\xc3\xf2\x38\xc9\x8d\x87\xb5\x38\xda"
+	"\x43\x64\x1b\x59\x62\x88\xc1\x6e\x85\x84\x33\xcd\x6d\x7b\x62\x1d"
+	"\x60\xf9\x98\xf7\xd1\xb1\xd4\xbe\x56\x6e\xa8\x6f\xff\xe7\x8b\x60"
+	"\x53\x80\xc7\x7c\xe0\x78\x89\xa9\xab\x42\x8f\x8e\x4d\x92\xac\xa7"
+	"\xfd\x47\x11\xc7\xdb\x7c\x77\xfb\xa4\x1d\x70\xaf\x56\x14\x52\xb0",
+	.expected_a_public =
+	"\xa1\x6c\x9e\xda\x45\x4d\xf6\x59\x04\x00\xc1\xc6\x8b\x12\x3b\xcd"
+	"\x07\xe4\x3e\xec\xac\x9b\xfc\xf7\x6d\x73\x39\x9e\x52\xf8\xbe\x33"
+	"\xe2\xca\xea\x99\x76\xc7\xc9\x94\x5c\xf3\x1b\xea\x6b\x66\x4b\x51"
+	"\x90\xf6\x4f\x75\xd5\x85\xf4\x28\xfd\x74\xa5\x57\xb1\x71\x0c\xb6"
+	"\xb6\x95\x70\x2d\xfa\x4b\x56\xe0\x56\x10\x21\xe5\x60\xa6\x18\xa4"
+	"\x78\x8c\x07\xc0\x2b\x59\x9c\x84\x5b\xe9\xb9\x74\xbf\xbc\x65\x48"
+	"\x27\x82\x40\x53\x46\x32\xa2\x92\x91\x9d\xf6\xd1\x07\x0e\x1d\x07"
+	"\x1b\x41\x04\xb1\xd4\xce\xae\x6e\x46\xf1\x72\x50\x7f\xff\xa8\xa2"
+	"\xbc\x3a\xc1\xbb\x28\xd7\x7d\xcd\x7a\x22\x01\xaf\x57\xb0\xa9\x02"
+	"\xd4\x8a\x92\xd5\xe6\x8e\x6f\x11\x39\xfe\x36\x87\x89\x42\x25\x42"
+	"\xd9\xbe\x67\x15\xe1\x82\x8a\x5e\x98\xc2\xd5\xde\x9e\x13\x1a\xe7"
+	"\xf9\x9f\x8e\x2d\x49\xdc\x4d\x98\x8c\xdd\xfd\x24\x7c\x46\xa9\x69"
+	"\x3b\x31\xb3\x12\xce\x54\xf6\x65\x75\x40\xc2\xf1\x04\x92\xe3\x83"
+	"\xeb\x02\x3d\x79\xc0\xf9\x7c\x28\xb3\x97\x03\xf7\x61\x1c\xce\x95"
+	"\x1a\xa0\xb3\x77\x1b\xc1\x9f\xf8\xf6\x3f\x4d\x0a\xfb\xfa\x64\x1c"
+	"\xcb\x37\x5b\xc3\x28\x60\x9f\xd1\xf2\xc4\xee\x77\xaa\x1f\xe9\xa2"
+	"\x89\x4c\xc6\xb7\xb3\xe4\xa5\xed\xa7\xe8\xac\x90\xdc\xc3\xfb\x56"
+	"\x9c\xda\x2c\x1d\x1a\x9a\x8c\x82\x92\xee\xdc\xa0\xa4\x01\x6e\x7f"
+	"\xc7\x0e\xc2\x73\x7d\xa6\xac\x12\x01\xc0\xc0\xc8\x7c\x84\x86\xc7"
+	"\xa5\x94\xe5\x33\x84\x71\x6e\x36\xe3\x3b\x81\x30\xe0\xc8\x51\x52"
+	"\x2b\x9e\x68\xa2\x6e\x09\x95\x8c\x7f\x78\x82\xbd\x53\x26\xe7\x95"
+	"\xe0\x03\xda\xc0\xc3\x6e\xcf\xdc\xb3\x14\xfc\xe9\x5b\x9b\x70\x6c"
+	"\x93\x04\xab\x13\xf7\x17\x6d\xee\xad\x32\x48\xe9\xa0\x94\x1b\x14"
+	"\x64\x4f\xa1\xb3\x8d\x6a\xca\x28\xfe\x4a\xf4\xf0\xc5\xb7\xf9\x8a"
+	"\x8e\xff\xfe\x57\x6f\x20\xdb\x04\xab\x02\x31\x22\x42\xfd\xbd\x77"
+	"\xea\xce\xe8\xc7\x5d\xe0\x8e\xd6\x66\xd0\xe4\x04\x2f\x5f\x71\xc7"
+	"\x61\x2d\xa5\x3f\x2f\x46\xf2\xd8\x5b\x25\x82\xf0\x52\x88\xc0\x59"
+	"\xd3\xa3\x90\x17\xc2\x04\x13\xc3\x13\x69\x4f\x17\xb1\xb3\x46\x4f"
+	"\xa7\xe6\x8b\x5e\x3e\x95\x0e\xf5\x42\x17\x7f\x4d\x1f\x1b\x7d\x65"
+	"\x86\xc5\xc8\xae\xae\xd8\x4f\xe7\x89\x41\x69\xfd\x06\xce\x5d\xed"
+	"\x44\x55\xad\x51\x98\x15\x78\x8d\x68\xfc\x93\x72\x9d\x22\xe5\x1d"
+	"\x21\xc3\xbe\x3a\x44\x34\xc0\xa3\x1f\xca\xdf\x45\xd0\x5c\xcd\xb7"
+	"\x72\xeb\xae\x7a\xad\x3f\x05\xa0\xe3\x6e\x5a\xd8\x52\xa7\xf1\x1e"
+	"\xb4\xf2\xcf\xe7\xdf\xa7\xf2\x22\x00\xb2\xc4\x17\x3d\x2c\x15\x04"
+	"\x71\x28\x69\x5c\x69\x21\xc8\xf1\x9b\xd8\xc7\xbc\x27\xa3\x85\xe9"
+	"\x53\x77\xd3\x65\xc3\x86\xdd\xb3\x76\x13\xfb\xa1\xd4\xee\x9d\xe4"
+	"\x51\x3f\x83\x59\xe4\x47\xa8\xa6\x0d\x68\xd5\xf6\xf4\xca\x31\xcd"
+	"\x30\x48\x34\x90\x11\x8e\x87\xe9\xea\xc9\xd0\xc3\xba\x28\xf9\xc0"
+	"\xc9\x8e\x23\xe5\xc2\xee\xf2\x47\x9c\x41\x1c\x10\x33\x27\x23\x49"
+	"\xe5\x0d\x18\xbe\x19\xc1\xba\x6c\xdc\xb7\xa1\xe7\xc5\x0d\x6f\xf0"
+	"\x8c\x62\x6e\x0d\x14\xef\xef\xf2\x8e\x01\xd2\x76\xf5\xc1\xe1\x92"
+	"\x3c\xb3\x76\xcd\xd8\xdd\x9b\xe0\x8e\xdc\x24\x34\x13\x65\x0f\x11"
+	"\xaf\x99\x7a\x2f\xe6\x1f\x7d\x17\x3e\x8a\x68\x9a\x37\xc8\x8d\x3e"
+	"\xa3\xfe\xfe\x57\x22\xe6\x0e\x50\xb5\x98\x0b\x71\xd8\x01\xa2\x8d"
+	"\x51\x96\x50\xc2\x41\x31\xd8\x23\x98\xfc\xd1\x9d\x7e\x27\xbb\x69"
+	"\x78\xe0\x87\xf7\xe4\xdd\x58\x13\x9d\xec\x00\xe4\xb9\x70\xa2\x94"
+	"\x5d\x52\x4e\xf2\x5c\xd1\xbc\xfd\xee\x9b\xb9\xe5\xc4\xc0\xa8\x77"
+	"\x67\xa4\xd1\x95\x34\xe4\x6d\x5f\x25\x02\x8d\x65\xdd\x11\x63\x55"
+	"\x04\x01\x21\x60\xc1\x5c\xef\x77\x33\x01\x1c\xa2\x11\x2b\xdd\x2b"
+	"\x74\x99\x23\x38\x05\x1b\x7e\x2e\x01\x52\xfe\x9c\x23\xde\x3e\x1a"
+	"\x72\xf4\xff\x7b\x02\xaa\x08\xcf\xe0\x5b\x83\xbe\x85\x5a\xe8\x9d"
+	"\x11\x3e\xff\x2f\xc6\x97\x67\x36\x6c\x0f\x81\x9c\x26\x29\xb1\x0f"
+	"\xbb\x53\xbd\xf4\xec\x2a\x84\x41\x28\x3b\x86\x40\x95\x69\x55\x5f"
+	"\x30\xee\xda\x1e\x6c\x4b\x25\xd6\x2f\x2c\x0e\x3c\x1a\x26\xa0\x3e"
+	"\xef\x09\xc6\x2b\xe5\xa1\x0c\x03\xa8\xf5\x39\x70\x31\xc4\x32\x79"
+	"\xd1\xd9\xc2\xcc\x32\x4a\xf1\x2f\x57\x5a\xcc\xe5\xc3\xc5\xd5\x4e"
+	"\x86\x56\xca\x64\xdb\xab\x61\x85\x8f\xf9\x20\x02\x40\x66\x76\x9e"
+	"\x5e\xd4\xac\xf0\x47\xa6\x50\x5f\xc2\xaf\x55\x9b\xa3\xc9\x8b\xf8"
+	"\x42\xd5\xcf\x1a\x95\x22\xd9\xd1\x0b\x92\x51\xca\xde\x46\x02\x0d"
+	"\x8b\xee\xd9\xa0\x04\x74\xf5\x0e\xb0\x3a\x62\xec\x3c\x91\x29\x33"
+	"\xa7\x78\x22\x92\xac\x27\xe6\x2d\x6f\x56\x8a\x5d\x72\xc2\xf1\x5c"
+	"\x54\x11\x97\x24\x61\xcb\x0c\x52\xd4\x57\x56\x22\x86\xf0\x19\x27"
+	"\x76\x30\x04\xf4\x39\x7b\x1a\x5a\x04\x0d\xec\x59\x9a\x31\x4c\x40"
+	"\x19\x6d\x3c\x41\x1b\x0c\xca\xeb\x25\x39\x6c\x96\xf8\x55\xd0\xec",
+	.expected_ss =
+	"\xf9\x55\x4f\x48\x38\x74\xb7\x46\xa3\xc4\x2e\x88\xf0\x34\xab\x1d"
+	"\xcd\xa5\x58\xa7\x95\x88\x36\x62\x6f\x8a\xbd\xf2\xfb\x6f\x3e\xb9"
+	"\x91\x65\x58\xef\x70\x2f\xd5\xc2\x97\x70\xcb\xce\x8b\x78\x1c\xe0"
+	"\xb9\xfa\x77\x34\xd2\x4a\x19\x58\x11\xfd\x93\x84\x40\xc0\x8c\x19"
+	"\x8b\x98\x50\x83\xba\xfb\xe2\xad\x8b\x81\x84\x63\x90\x41\x4b\xf8"
+	"\xe8\x78\x86\x04\x09\x8d\x84\xd1\x43\xfd\xa3\x58\x21\x2a\x3b\xb1"
+	"\xa2\x5b\x48\x74\x3c\xa9\x16\x34\x28\xf0\x8e\xde\xe2\xcf\x8e\x68"
+	"\x53\xab\x65\x06\xb7\x86\xb1\x08\x4f\x73\x97\x00\x10\x95\xd1\x84"
+	"\x72\xcf\x14\xdb\xff\xa7\x80\xd8\xe5\xf2\x2c\x89\x37\xb0\x81\x2c"
+	"\xf5\xd6\x7d\x1b\xb0\xe2\x8e\x87\x32\x3d\x37\x6a\x79\xaa\xe7\x08"
+	"\xc9\x67\x55\x5f\x1c\xae\xa6\xf5\xef\x79\x3a\xaf\x3f\x82\x14\xe2"
+	"\xf3\x69\x91\xed\xb7\x9e\xc9\xde\xd0\x29\x70\xd9\xeb\x0f\xf5\xc7"
+	"\xf6\x7c\xa7\x7f\xec\xed\xe1\xbd\x13\xe1\x43\xe4\x42\x30\xe3\x5f"
+	"\xe0\xf3\x15\x55\x2f\x7a\x42\x17\x67\xcb\xc2\x4f\xd0\x85\xfc\x6c"
+	"\xec\xe8\xfc\x25\x78\x4b\xe4\x0f\xd4\x3d\x78\x28\xd3\x53\x79\xcb"
+	"\x2c\x82\x67\x9a\xdc\x32\x55\xd2\xda\xae\xd8\x61\xce\xd6\x59\x0b"
+	"\xc5\x44\xeb\x08\x81\x8c\x65\xb2\xb7\xa6\xff\xf7\xbf\x99\xc6\x8a"
+	"\xbe\xde\xc2\x17\x56\x05\x6e\xd2\xf1\x1e\xa2\x04\xeb\x02\x74\xaa"
+	"\x04\xfc\xf0\x6b\xd4\xfc\xf0\x7a\x5f\xfe\xe2\x74\x7f\xeb\x9b\x6a"
+	"\x8a\x09\x96\x5d\xe1\x91\xb6\x9e\x37\xd7\x63\xd7\xb3\x5c\xb5\xa3"
+	"\x5f\x62\x00\xdf\xc5\xbf\x85\xba\xa7\xa9\xb6\x1f\x76\x78\x65\x01"
+	"\xfe\x1d\x6c\xfe\x15\x9e\xf4\xb1\xbc\x8d\xad\x3c\xec\x69\x27\x57"
+	"\xa4\x89\x77\x46\xe1\x49\xc7\x22\xde\x79\xe0\xf7\x3a\xa1\x59\x8b"
+	"\x59\x71\xcc\xd6\x18\x24\xc1\x8a\x2f\xe3\xdf\xdd\x6c\xf7\x62\xaa"
+	"\x15\xaa\x39\x37\x3b\xaf\x7d\x6e\x88\xeb\x19\xa8\xa0\x26\xd3\xaa"
+	"\x2d\xcc\x5f\x56\x99\x86\xa9\xed\x4d\x02\x31\x40\x97\x70\x83\xa7"
+	"\x08\x98\x7e\x49\x46\xd9\x75\xb5\x7a\x6a\x40\x69\xa0\x6d\xb2\x18"
+	"\xc0\xad\x88\x05\x02\x95\x6f\xf7\x8f\xcb\xa2\xe4\x7b\xab\x4a\x0f"
+	"\x9a\x1b\xef\xcc\xd1\x6a\x5d\x1e\x6a\x2a\x8b\x5b\x80\xbc\x5f\x38"
+	"\xdd\xaf\xad\x44\x15\xb4\xaf\x26\x1c\x1a\x4d\xa7\x4b\xec\x88\x33"
+	"\x24\x42\xb5\x0c\x9c\x56\xd4\xba\xa7\xb9\x65\xd5\x76\xb2\xbc\x16"
+	"\x8e\xfa\x0c\x7a\xc0\xa2\x2c\x5a\x39\x56\x7d\xe6\xf8\xa9\xf4\x49"
+	"\xd0\x50\xf2\x5e\x4b\x0a\x43\xe4\x9a\xbb\xea\x35\x28\x99\x84\x83"
+	"\xec\xc1\xa0\x68\x15\x9a\x2b\x01\x04\x48\x09\x11\x1b\xb6\xa4\xd8"
+	"\x03\xad\xb6\x4c\x9e\x1d\x90\xae\x88\x0f\x75\x95\x25\xa0\x27\x13"
+	"\xb7\x4f\xe2\x3e\xd5\x59\x1a\x7c\xde\x95\x14\x28\xd1\xde\x84\xe4"
+	"\x07\x7c\x5b\x06\xd6\xe6\x9c\x8a\xbe\xd2\xb4\x62\xd1\x67\x8a\x9c"
+	"\xac\x4f\xfa\x70\xd6\xc8\xc0\xeb\x5e\xf6\x3e\xdc\x48\x8e\xce\x3f"
+	"\x92\x3e\x60\x77\x63\x60\x6b\x76\x04\xa5\xba\xc9\xab\x92\x4e\x0d"
+	"\xdc\xca\x82\x44\x5f\x3a\x42\xeb\x01\xe7\xe0\x33\xb3\x32\xaf\x4b"
+	"\x81\x35\x2d\xb6\x57\x15\xfe\x52\xc7\x54\x2e\x41\x3b\x22\x6b\x12"
+	"\x72\xdb\x5c\x66\xd0\xb6\xb4\xfe\x90\xc0\x20\x34\x95\xf9\xe4\xc7"
+	"\x7e\x71\x89\x4f\x6f\xfb\x2a\xf3\xdf\x3f\xe3\xcf\x0e\x1a\xd9\xf2"
+	"\xc1\x02\x67\x5d\xdc\xf1\x7d\xe8\xcf\x64\x77\x4d\x12\x03\x77\x2c"
+	"\xfb\xe1\x59\xf7\x2c\x96\x9c\xaf\x46\x9c\xc7\x67\xcf\xee\x94\x50"
+	"\xc7\xa1\x23\xe6\x9f\x4d\x73\x92\xad\xf9\x4a\xce\xdb\x44\xd5\xe3"
+	"\x17\x05\x37\xdb\x9c\x6c\xc5\x7e\xb7\xd4\x11\x4a\x8c\x51\x03\xaa"
+	"\x73\x4b\x16\xd9\x79\xf5\xf1\x67\x20\x9b\x25\xe5\x41\x52\x59\x06"
+	"\x8b\xf2\x23\x2f\x6e\xea\xf3\x24\x0a\x94\xbb\xb8\x7e\xd9\x23\x4a"
+	"\x9f\x1f\xe1\x13\xb5\xfe\x85\x2f\x4c\xbe\x6a\x66\x02\x1d\x90\xd2"
+	"\x01\x25\x8a\xfd\x78\x3a\x28\xb8\x18\xc1\x38\x16\x21\x6b\xb4\xf9"
+	"\x64\x0f\xf1\x73\xc4\x5c\xd1\x41\xf2\xfe\xe7\x26\xad\x79\x12\x75"
+	"\x49\x48\xdb\x21\x71\x35\xf7\xb7\x46\x5a\xa1\x81\x25\x47\x31\xea"
+	"\x1d\x76\xbb\x32\x5a\x90\xb0\x42\x1a\x47\xe8\x0c\x82\x92\x43\x1c"
+	"\x0b\xdd\xe5\x25\xce\xd3\x06\xcc\x59\x5a\xc9\xa0\x01\xac\x29\x12"
+	"\x31\x2e\x3d\x1a\xed\x3b\xf3\xa7\xef\x52\xc2\x0d\x18\x1f\x03\x28"
+	"\xc9\x2b\x38\x61\xa4\x01\xc9\x3c\x11\x08\x14\xd4\xe5\x31\xe9\x3c"
+	"\x1d\xad\xf8\x76\xc4\x84\x9f\xea\x16\x61\x3d\x6d\xa3\x32\x31\xcd"
+	"\x1c\xca\xb8\x74\xc2\x45\xf3\x01\x9c\x7a\xaf\xfd\xe7\x1e\x5a\x18"
+	"\xb1\x9d\xbb\x7a\x2d\x34\x40\x17\x49\xad\x1f\xeb\x2d\xa2\x26\xb8"
+	"\x16\x28\x4b\x72\xdd\xd0\x8d\x85\x4c\xdd\xf8\x57\x48\xd5\x1d\xfb"
+	"\xbd\xec\x11\x5d\x1e\x9c\x26\x81\xbf\xf1\x16\x12\x32\xc3\xf3\x07"
+	"\x0e\x6e\x7f\x17\xec\xfb\xf4\x5d\xe2\xb1\xca\x97\xca\x46\x20\x2d"
+	"\x09\x85\x19\x25\x89\xa8\x9b\x51\x74\xae\xc9\x1b\x4c\xb6\x80\x62",
+	.secret_size = 1040,
+	.b_public_size = 1024,
+	.expected_a_public_size = 1024,
+	.expected_ss_size = 1024,
+	},
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x00" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x00\x10" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#endif
+	.b_secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x04" /* len */
+	"\x00\x04\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x04\x10" /* len */
+	"\x00\x00\x04\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x76\x6e\xeb\xf9\xeb\x76\xae\x37\xcb\x19\x49\x8b\xeb\xaf\xb0\x4b"
+	"\x6d\xe9\x15\xad\xda\xf2\xef\x58\xe9\xd6\xdd\x4c\xb3\x56\xd0\x3b"
+	"\x00\xb0\x65\xed\xae\xe0\x2e\xdf\x8f\x45\x3f\x3c\x5d\x2f\xfa\x96"
+	"\x36\x33\xb2\x01\x8b\x0f\xe8\x46\x15\x6d\x60\x5b\xec\x32\xc3\x3b"
+	"\x06\xf3\xb4\x1b\x9a\xef\x3c\x03\x0e\xcc\xce\x1d\x24\xa0\xc9\x08"
+	"\x65\xf9\x45\xe5\xd2\x43\x08\x88\x58\xd6\x46\xe7\xbb\x25\xac\xed"
+	"\x3b\xac\x6f\x5e\xfb\xd6\x19\xa6\x20\x3a\x1d\x0c\xe8\x00\x72\x54"
+	"\xd7\xd9\xc9\x26\x49\x18\xc6\xb8\xbc\xdd\xf3\xce\xf3\x7b\x69\x04"
+	"\x5c\x6f\x11\xdb\x44\x42\x72\xb6\xb7\x84\x17\x86\x47\x3f\xc5\xa1"
+	"\xd8\x86\xef\xe2\x27\x49\x2b\x8f\x3e\x91\x12\xd9\x45\x96\xf7\xe6"
+	"\x77\x76\x36\x58\x71\x9a\xb1\xdb\xcf\x24\x9e\x7e\xad\xce\x45\xba"
+	"\xb5\xec\x8e\xb9\xd6\x7b\x3d\x76\xa4\x85\xad\xd8\x49\x9b\x80\x9d"
+	"\x7f\x9f\x85\x09\x9e\x86\x5b\x6b\xf3\x8d\x39\x5e\x6f\xe4\x30\xc8"
+	"\xa5\xf3\xdf\x68\x73\x6b\x2e\x9a\xcb\xac\x0a\x0d\x44\xc1\xaf\xb2"
+	"\x11\x1b\x7c\x43\x08\x44\x43\xe2\x4e\xfd\x93\x30\x99\x09\x12\xbb"
+	"\xf6\x31\x34\xa5\x3d\x45\x98\xee\xd7\x2a\x1a\x89\xf5\x37\x92\x33"
+	"\xa0\xdd\xf5\xfb\x1f\x90\x42\x55\x5a\x0b\x82\xff\xf0\x96\x92\x15"
+	"\x65\x5a\x55\x96\xca\x1b\xd5\xe5\xb5\x94\xde\x2e\xa6\x03\x57\x9e"
+	"\x15\xe4\x32\x2b\x1f\xb2\x22\x21\xe9\xa0\x05\xd3\x65\x6c\x11\x66"
+	"\x25\x38\xbb\xa3\x6c\xc2\x0b\x2b\xd0\x7a\x20\x26\x29\x37\x5d\x5f"
+	"\xd8\xff\x2a\xcd\x46\x6c\xd6\x6e\xe5\x77\x1a\xe6\x33\xf1\x8e\xc8"
+	"\x10\x30\x11\x00\x27\xf9\x7d\x0e\x28\x43\xa7\x67\x38\x7f\x16\xda"
+	"\xd0\x01\x8e\xa4\xe8\x6f\xcd\x23\xaf\x77\x52\x34\xad\x7e\xc3\xed"
+	"\x2d\x10\x0a\x33\xdc\xcf\x1b\x88\x0f\xcc\x48\x7f\x42\xf0\x9e\x13"
+	"\x1f\xf5\xd1\xe9\x90\x87\xbd\xfa\x5f\x1d\x77\x55\xcb\xc3\x05\xaf"
+	"\x71\xd0\xe0\xab\x46\x31\xd7\xea\x89\x54\x2d\x39\xaf\xf6\x4f\x74"
+	"\xaf\x46\x58\x89\x78\x95\x2e\xe6\x90\xb7\xaa\x00\x73\x9f\xed\xb9"
+	"\x00\xd6\xf6\x6d\x26\x59\xcd\x56\xdb\xf7\x3d\x5f\xeb\x6e\x46\x33"
+	"\xb1\x23\xed\x9f\x8d\x58\xdc\xb4\x28\x3b\x90\x09\xc4\x61\x02\x1f"
+	"\xf8\x62\xf2\x6e\xc1\x94\x71\x66\x93\x11\xdf\xaa\x3e\xd7\xb5\xe5"
+	"\xc1\x78\xe9\x14\xcd\x55\x16\x51\xdf\x8d\xd0\x94\x8c\x43\xe9\xb8"
+	"\x1d\x42\x7f\x76\xbc\x6f\x87\x42\x88\xde\xd7\x52\x78\x00\x4f\x18"
+	"\x02\xe7\x7b\xe2\x8a\xc3\xd1\x43\xa5\xac\xda\xb0\x8d\x19\x96\xd4"
+	"\x81\xe0\x75\xe9\xca\x41\x7e\x1f\x93\x0b\x26\x24\xb3\xaa\xdd\x10"
+	"\x20\xd3\xf2\x9f\x3f\xdf\x65\xde\x67\x79\xdc\x76\x9f\x3c\x72\x75"
+	"\x65\x8a\x30\xcc\xd2\xcc\x06\xb1\xab\x62\x86\x78\x5d\xb8\xce\x72"
+	"\xb3\x12\xc7\x9f\x07\xd0\x6b\x98\x82\x9b\x6c\xbb\x15\xe5\xcc\xf4"
+	"\xc8\xf4\x60\x81\xdc\xd3\x09\x1b\x5e\xd4\xf3\x55\xcf\x1c\x16\x83"
+	"\x61\xb4\x2e\xcc\x08\x67\x58\xfd\x46\x64\xbc\x29\x4b\xdd\xda\xec"
+	"\xdc\xc6\xa9\xa5\x73\xfb\xf8\xf3\xaf\x89\xa8\x9e\x25\x14\xfa\xac"
+	"\xeb\x1c\x7c\x80\x96\x66\x4d\x41\x67\x9b\x07\x4f\x0a\x97\x17\x1c"
+	"\x4d\x61\xc7\x2e\x6f\x36\x98\x29\x50\x39\x6d\xe7\x70\xda\xf0\xc8"
+	"\x05\x80\x7b\x32\xff\xfd\x12\xde\x61\x0d\xf9\x4c\x21\xf1\x56\x72"
+	"\x3d\x61\x46\xc0\x2d\x07\xd1\x6c\xd3\xbe\x9a\x21\x83\x85\xf7\xed"
+	"\x53\x95\x44\x40\x8f\x75\x12\x18\xc2\x9a\xfd\x5e\xce\x66\xa6\x7f"
+	"\x57\xc0\xd7\x73\x76\xb3\x13\xda\x2e\x58\xc6\x27\x40\xb2\x2d\xef"
+	"\x7d\x72\xb4\xa8\x75\x6f\xcc\x5f\x42\x3e\x2c\x90\x36\x59\xa0\x34"
+	"\xaa\xce\xbc\x04\x4c\xe6\x56\xc2\xcd\xa6\x1c\x59\x04\x56\x53\xcf"
+	"\x6d\xd7\xf0\xb1\x4f\x91\xfa\x84\xcf\x4b\x8d\x50\x4c\xf8\x2a\x31"
+	"\x5f\xe3\xba\x79\xb4\xcc\x59\x64\xe3\x7a\xfa\xf6\x06\x9d\x04\xbb"
+	"\xce\x61\xbf\x9e\x59\x0a\x09\x51\x6a\xbb\x0b\x80\xe0\x91\xc1\x51"
+	"\x04\x58\x67\x67\x4b\x42\x4f\x95\x68\x75\xe2\x1f\x9c\x14\x70\xfd"
+	"\x3a\x8a\xce\x8b\x04\xa1\x89\xe7\xb4\xbf\x70\xfe\xf3\x0c\x48\x04"
+	"\x3a\xd2\x85\x68\x03\xe7\xfa\xec\x5b\x55\xb7\x95\xfd\x5b\x19\x35"
+	"\xad\xcb\x4a\x63\x03\x44\x64\x2a\x48\x59\x9a\x26\x43\x96\x8c\xe6"
+	"\xbd\xb7\x90\xd4\x5f\x8d\x08\x28\xa8\xc5\x89\x70\xb9\x6e\xd3\x3b"
+	"\x76\x0e\x37\x98\x15\x27\xca\xc9\xb0\xe0\xfd\xf3\xc6\xdf\x69\xce"
+	"\xe1\x5f\x6a\x3e\x5c\x86\xe2\x58\x41\x11\xf0\x7e\x56\xec\xe4\xc9"
+	"\x0d\x87\x91\xfb\xb9\xc8\x0d\x34\xab\xb0\xc6\xf2\xa6\x00\x7b\x18"
+	"\x92\xf4\x43\x7f\x01\x85\x2e\xef\x8c\x72\x50\x10\xdb\xf1\x37\x62"
+	"\x16\x85\x71\x01\xa8\x2b\xf0\x13\xd3\x7c\x0b\xaf\xf1\xf3\xd1\xee"
+	"\x90\x41\x5f\x7d\x5b\xa9\x83\x4b\xfa\x80\x59\x50\x73\xe1\xc4\xf9"
+	"\x5e\x4b\xde\xd9\xf5\x22\x68\x5e\x65\xd9\x37\xe4\x1a\x08\x0e\xb1"
+	"\x28\x2f\x40\x9e\x37\xa8\x12\x56\xb7\xb8\x64\x94\x68\x94\xff\x9f",
+	.b_public =
+	"\xa1\x6c\x9e\xda\x45\x4d\xf6\x59\x04\x00\xc1\xc6\x8b\x12\x3b\xcd"
+	"\x07\xe4\x3e\xec\xac\x9b\xfc\xf7\x6d\x73\x39\x9e\x52\xf8\xbe\x33"
+	"\xe2\xca\xea\x99\x76\xc7\xc9\x94\x5c\xf3\x1b\xea\x6b\x66\x4b\x51"
+	"\x90\xf6\x4f\x75\xd5\x85\xf4\x28\xfd\x74\xa5\x57\xb1\x71\x0c\xb6"
+	"\xb6\x95\x70\x2d\xfa\x4b\x56\xe0\x56\x10\x21\xe5\x60\xa6\x18\xa4"
+	"\x78\x8c\x07\xc0\x2b\x59\x9c\x84\x5b\xe9\xb9\x74\xbf\xbc\x65\x48"
+	"\x27\x82\x40\x53\x46\x32\xa2\x92\x91\x9d\xf6\xd1\x07\x0e\x1d\x07"
+	"\x1b\x41\x04\xb1\xd4\xce\xae\x6e\x46\xf1\x72\x50\x7f\xff\xa8\xa2"
+	"\xbc\x3a\xc1\xbb\x28\xd7\x7d\xcd\x7a\x22\x01\xaf\x57\xb0\xa9\x02"
+	"\xd4\x8a\x92\xd5\xe6\x8e\x6f\x11\x39\xfe\x36\x87\x89\x42\x25\x42"
+	"\xd9\xbe\x67\x15\xe1\x82\x8a\x5e\x98\xc2\xd5\xde\x9e\x13\x1a\xe7"
+	"\xf9\x9f\x8e\x2d\x49\xdc\x4d\x98\x8c\xdd\xfd\x24\x7c\x46\xa9\x69"
+	"\x3b\x31\xb3\x12\xce\x54\xf6\x65\x75\x40\xc2\xf1\x04\x92\xe3\x83"
+	"\xeb\x02\x3d\x79\xc0\xf9\x7c\x28\xb3\x97\x03\xf7\x61\x1c\xce\x95"
+	"\x1a\xa0\xb3\x77\x1b\xc1\x9f\xf8\xf6\x3f\x4d\x0a\xfb\xfa\x64\x1c"
+	"\xcb\x37\x5b\xc3\x28\x60\x9f\xd1\xf2\xc4\xee\x77\xaa\x1f\xe9\xa2"
+	"\x89\x4c\xc6\xb7\xb3\xe4\xa5\xed\xa7\xe8\xac\x90\xdc\xc3\xfb\x56"
+	"\x9c\xda\x2c\x1d\x1a\x9a\x8c\x82\x92\xee\xdc\xa0\xa4\x01\x6e\x7f"
+	"\xc7\x0e\xc2\x73\x7d\xa6\xac\x12\x01\xc0\xc0\xc8\x7c\x84\x86\xc7"
+	"\xa5\x94\xe5\x33\x84\x71\x6e\x36\xe3\x3b\x81\x30\xe0\xc8\x51\x52"
+	"\x2b\x9e\x68\xa2\x6e\x09\x95\x8c\x7f\x78\x82\xbd\x53\x26\xe7\x95"
+	"\xe0\x03\xda\xc0\xc3\x6e\xcf\xdc\xb3\x14\xfc\xe9\x5b\x9b\x70\x6c"
+	"\x93\x04\xab\x13\xf7\x17\x6d\xee\xad\x32\x48\xe9\xa0\x94\x1b\x14"
+	"\x64\x4f\xa1\xb3\x8d\x6a\xca\x28\xfe\x4a\xf4\xf0\xc5\xb7\xf9\x8a"
+	"\x8e\xff\xfe\x57\x6f\x20\xdb\x04\xab\x02\x31\x22\x42\xfd\xbd\x77"
+	"\xea\xce\xe8\xc7\x5d\xe0\x8e\xd6\x66\xd0\xe4\x04\x2f\x5f\x71\xc7"
+	"\x61\x2d\xa5\x3f\x2f\x46\xf2\xd8\x5b\x25\x82\xf0\x52\x88\xc0\x59"
+	"\xd3\xa3\x90\x17\xc2\x04\x13\xc3\x13\x69\x4f\x17\xb1\xb3\x46\x4f"
+	"\xa7\xe6\x8b\x5e\x3e\x95\x0e\xf5\x42\x17\x7f\x4d\x1f\x1b\x7d\x65"
+	"\x86\xc5\xc8\xae\xae\xd8\x4f\xe7\x89\x41\x69\xfd\x06\xce\x5d\xed"
+	"\x44\x55\xad\x51\x98\x15\x78\x8d\x68\xfc\x93\x72\x9d\x22\xe5\x1d"
+	"\x21\xc3\xbe\x3a\x44\x34\xc0\xa3\x1f\xca\xdf\x45\xd0\x5c\xcd\xb7"
+	"\x72\xeb\xae\x7a\xad\x3f\x05\xa0\xe3\x6e\x5a\xd8\x52\xa7\xf1\x1e"
+	"\xb4\xf2\xcf\xe7\xdf\xa7\xf2\x22\x00\xb2\xc4\x17\x3d\x2c\x15\x04"
+	"\x71\x28\x69\x5c\x69\x21\xc8\xf1\x9b\xd8\xc7\xbc\x27\xa3\x85\xe9"
+	"\x53\x77\xd3\x65\xc3\x86\xdd\xb3\x76\x13\xfb\xa1\xd4\xee\x9d\xe4"
+	"\x51\x3f\x83\x59\xe4\x47\xa8\xa6\x0d\x68\xd5\xf6\xf4\xca\x31\xcd"
+	"\x30\x48\x34\x90\x11\x8e\x87\xe9\xea\xc9\xd0\xc3\xba\x28\xf9\xc0"
+	"\xc9\x8e\x23\xe5\xc2\xee\xf2\x47\x9c\x41\x1c\x10\x33\x27\x23\x49"
+	"\xe5\x0d\x18\xbe\x19\xc1\xba\x6c\xdc\xb7\xa1\xe7\xc5\x0d\x6f\xf0"
+	"\x8c\x62\x6e\x0d\x14\xef\xef\xf2\x8e\x01\xd2\x76\xf5\xc1\xe1\x92"
+	"\x3c\xb3\x76\xcd\xd8\xdd\x9b\xe0\x8e\xdc\x24\x34\x13\x65\x0f\x11"
+	"\xaf\x99\x7a\x2f\xe6\x1f\x7d\x17\x3e\x8a\x68\x9a\x37\xc8\x8d\x3e"
+	"\xa3\xfe\xfe\x57\x22\xe6\x0e\x50\xb5\x98\x0b\x71\xd8\x01\xa2\x8d"
+	"\x51\x96\x50\xc2\x41\x31\xd8\x23\x98\xfc\xd1\x9d\x7e\x27\xbb\x69"
+	"\x78\xe0\x87\xf7\xe4\xdd\x58\x13\x9d\xec\x00\xe4\xb9\x70\xa2\x94"
+	"\x5d\x52\x4e\xf2\x5c\xd1\xbc\xfd\xee\x9b\xb9\xe5\xc4\xc0\xa8\x77"
+	"\x67\xa4\xd1\x95\x34\xe4\x6d\x5f\x25\x02\x8d\x65\xdd\x11\x63\x55"
+	"\x04\x01\x21\x60\xc1\x5c\xef\x77\x33\x01\x1c\xa2\x11\x2b\xdd\x2b"
+	"\x74\x99\x23\x38\x05\x1b\x7e\x2e\x01\x52\xfe\x9c\x23\xde\x3e\x1a"
+	"\x72\xf4\xff\x7b\x02\xaa\x08\xcf\xe0\x5b\x83\xbe\x85\x5a\xe8\x9d"
+	"\x11\x3e\xff\x2f\xc6\x97\x67\x36\x6c\x0f\x81\x9c\x26\x29\xb1\x0f"
+	"\xbb\x53\xbd\xf4\xec\x2a\x84\x41\x28\x3b\x86\x40\x95\x69\x55\x5f"
+	"\x30\xee\xda\x1e\x6c\x4b\x25\xd6\x2f\x2c\x0e\x3c\x1a\x26\xa0\x3e"
+	"\xef\x09\xc6\x2b\xe5\xa1\x0c\x03\xa8\xf5\x39\x70\x31\xc4\x32\x79"
+	"\xd1\xd9\xc2\xcc\x32\x4a\xf1\x2f\x57\x5a\xcc\xe5\xc3\xc5\xd5\x4e"
+	"\x86\x56\xca\x64\xdb\xab\x61\x85\x8f\xf9\x20\x02\x40\x66\x76\x9e"
+	"\x5e\xd4\xac\xf0\x47\xa6\x50\x5f\xc2\xaf\x55\x9b\xa3\xc9\x8b\xf8"
+	"\x42\xd5\xcf\x1a\x95\x22\xd9\xd1\x0b\x92\x51\xca\xde\x46\x02\x0d"
+	"\x8b\xee\xd9\xa0\x04\x74\xf5\x0e\xb0\x3a\x62\xec\x3c\x91\x29\x33"
+	"\xa7\x78\x22\x92\xac\x27\xe6\x2d\x6f\x56\x8a\x5d\x72\xc2\xf1\x5c"
+	"\x54\x11\x97\x24\x61\xcb\x0c\x52\xd4\x57\x56\x22\x86\xf0\x19\x27"
+	"\x76\x30\x04\xf4\x39\x7b\x1a\x5a\x04\x0d\xec\x59\x9a\x31\x4c\x40"
+	"\x19\x6d\x3c\x41\x1b\x0c\xca\xeb\x25\x39\x6c\x96\xf8\x55\xd0\xec",
+	.secret_size = 16,
+	.b_secret_size = 1040,
+	.b_public_size = 1024,
+	.expected_a_public_size = 1024,
+	.expected_ss_size = 1024,
+	.genkey = true,
+	},
+};
+
 static const struct kpp_testvec curve25519_tv_template[] = {
 {
 	.secret = (u8[32]){ 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d,
@@ -5713,6 +7140,7 @@ static const struct hash_testvec hmac_sha1_tv_template[] = {
 		.psize	= 28,
 		.digest	= "\xef\xfc\xdf\x6a\xe5\xeb\x2f\xa2\xd2\x74"
 			  "\x16\xd5\xf1\x84\xdf\x9c\x25\x9a\x7c\x79",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa",
 		.ksize	= 20,
@@ -5802,6 +7230,7 @@ static const struct hash_testvec hmac_sha224_tv_template[] = {
 			"\x45\x69\x0f\x3a\x7e\x9e\x6d\x0f"
 			"\x8b\xbe\xa2\xa3\x9e\x61\x48\x00"
 			"\x8f\xd0\x5e\x44",
+		.fips_skip = 1,
 	}, {
 		.key    = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			"\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -5945,6 +7374,7 @@ static const struct hash_testvec hmac_sha256_tv_template[] = {
 			  "\x6a\x04\x24\x26\x08\x95\x75\xc7"
 			  "\x5a\x00\x3f\x08\x9d\x27\x39\x83"
 			  "\x9d\xec\x58\xb9\x64\xec\x38\x43",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			"\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6443,6 +7873,7 @@ static const struct hash_testvec hmac_sha384_tv_template[] = {
 			  "\xe4\x2e\xc3\x73\x63\x22\x44\x5e"
 			  "\x8e\x22\x40\xca\x5e\x69\xe2\xc7"
 			  "\x8b\x32\x39\xec\xfa\xb2\x16\x49",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6543,6 +7974,7 @@ static const struct hash_testvec hmac_sha512_tv_template[] = {
 			  "\x6d\x03\x4f\x65\xf8\xf0\xe6\xfd"
 			  "\xca\xea\xb1\xa3\x4d\x4a\x6b\x4b"
 			  "\x63\x6e\x07\x0a\x38\xbc\xe7\x37",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6638,6 +8070,7 @@ static const struct hash_testvec hmac_sha3_224_tv_template[] = {
 			  "\x1b\x79\x86\x34\xad\x38\x68\x11"
 			  "\xc2\xcf\xc8\x5b\xfa\xf5\xd5\x2b"
 			  "\xba\xce\x5e\x66",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6725,6 +8158,7 @@ static const struct hash_testvec hmac_sha3_256_tv_template[] = {
 			  "\x35\x96\xbb\xb0\xda\x73\xb8\x87"
 			  "\xc9\x17\x1f\x93\x09\x5b\x29\x4a"
 			  "\xe8\x57\xfb\xe2\x64\x5e\x1b\xa5",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6816,6 +8250,7 @@ static const struct hash_testvec hmac_sha3_384_tv_template[] = {
 			  "\x3c\xa1\x35\x08\xa9\x32\x43\xce"
 			  "\x48\xc0\x45\xdc\x00\x7f\x26\xa2"
 			  "\x1b\x3f\x5e\x0e\x9d\xf4\xc2\x0a",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6915,6 +8350,7 @@ static const struct hash_testvec hmac_sha3_512_tv_template[] = {
 			  "\xee\x7a\x0c\x31\xd0\x22\xa9\x5e"
 			  "\x1f\xc9\x2b\xa9\xd7\x7d\xf8\x83"
 			  "\x96\x02\x75\xbe\xb4\xe6\x20\x24",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
diff --git a/crypto/xts.c b/crypto/xts.c
index 6c12f30dbdd6..63c85b9e64e0 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -466,3 +466,4 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("XTS block cipher mode");
 MODULE_ALIAS_CRYPTO("xts");
 MODULE_IMPORT_NS(CRYPTO_INTERNAL);
+MODULE_SOFTDEP("pre: ecb");
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 273741dedfd2..1e34f846508f 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -302,7 +302,7 @@ config ACPI_IPMI
 	help
 	  This driver enables the ACPI to access the BMC controller. And it
 	  uses the IPMI request/response message to communicate with BMC
-	  controller, which can be found on on the server.
+	  controller, which can be found on the server.
 
 	  To compile this driver as a module, choose M here:
 	  the module will be called as acpi_ipmi.
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index bb757148e7ba..b5a8d3e00a52 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -81,6 +81,9 @@ obj-$(CONFIG_ACPI_AC) 		+= ac.o
 obj-$(CONFIG_ACPI_BUTTON)	+= button.o
 obj-$(CONFIG_ACPI_TINY_POWER_BUTTON)	+= tiny-power-button.o
 obj-$(CONFIG_ACPI_FAN)		+= fan.o
+fan-objs			:= fan_core.o
+fan-objs			+= fan_attr.o
+
 obj-$(CONFIG_ACPI_VIDEO)	+= video.o
 obj-$(CONFIG_ACPI_TAD)		+= acpi_tad.o
 obj-$(CONFIG_ACPI_PCI_SLOT)	+= pci_slot.o
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index bcae0f03572b..fbe0756259c5 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -21,6 +21,7 @@
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
 #include <linux/pwm.h>
+#include <linux/pxa2xx_ssp.h>
 #include <linux/suspend.h>
 #include <linux/delay.h>
 
@@ -82,7 +83,7 @@ struct lpss_device_desc {
 	const char *clk_con_id;
 	unsigned int prv_offset;
 	size_t prv_size_override;
-	struct property_entry *properties;
+	const struct property_entry *properties;
 	void (*setup)(struct lpss_private_data *pdata);
 	bool resume_from_noirq;
 };
@@ -219,10 +220,16 @@ static void bsw_pwm_setup(struct lpss_private_data *pdata)
 	pwm_add_table(bsw_pwm_lookup, ARRAY_SIZE(bsw_pwm_lookup));
 }
 
-static const struct lpss_device_desc lpt_dev_desc = {
+static const struct property_entry lpt_spi_properties[] = {
+	PROPERTY_ENTRY_U32("intel,spi-pxa2xx-type", LPSS_LPT_SSP),
+	{ }
+};
+
+static const struct lpss_device_desc lpt_spi_dev_desc = {
 	.flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_LTR
 			| LPSS_SAVE_CTX,
 	.prv_offset = 0x800,
+	.properties = lpt_spi_properties,
 };
 
 static const struct lpss_device_desc lpt_i2c_dev_desc = {
@@ -282,9 +289,15 @@ static const struct lpss_device_desc bsw_uart_dev_desc = {
 	.properties = uart_properties,
 };
 
+static const struct property_entry byt_spi_properties[] = {
+	PROPERTY_ENTRY_U32("intel,spi-pxa2xx-type", LPSS_BYT_SSP),
+	{ }
+};
+
 static const struct lpss_device_desc byt_spi_dev_desc = {
 	.flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_SAVE_CTX,
 	.prv_offset = 0x400,
+	.properties = byt_spi_properties,
 };
 
 static const struct lpss_device_desc byt_sdio_dev_desc = {
@@ -305,11 +318,17 @@ static const struct lpss_device_desc bsw_i2c_dev_desc = {
 	.resume_from_noirq = true,
 };
 
+static const struct property_entry bsw_spi_properties[] = {
+	PROPERTY_ENTRY_U32("intel,spi-pxa2xx-type", LPSS_BSW_SSP),
+	{ }
+};
+
 static const struct lpss_device_desc bsw_spi_dev_desc = {
 	.flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_SAVE_CTX
 			| LPSS_NO_D3_DELAY,
 	.prv_offset = 0x400,
 	.setup = lpss_deassert_reset,
+	.properties = bsw_spi_properties,
 };
 
 static const struct x86_cpu_id lpss_cpu_ids[] = {
@@ -329,8 +348,8 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = {
 	{ "INTL9C60", LPSS_ADDR(lpss_dma_desc) },
 
 	/* Lynxpoint LPSS devices */
-	{ "INT33C0", LPSS_ADDR(lpt_dev_desc) },
-	{ "INT33C1", LPSS_ADDR(lpt_dev_desc) },
+	{ "INT33C0", LPSS_ADDR(lpt_spi_dev_desc) },
+	{ "INT33C1", LPSS_ADDR(lpt_spi_dev_desc) },
 	{ "INT33C2", LPSS_ADDR(lpt_i2c_dev_desc) },
 	{ "INT33C3", LPSS_ADDR(lpt_i2c_dev_desc) },
 	{ "INT33C4", LPSS_ADDR(lpt_uart_dev_desc) },
@@ -356,8 +375,8 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = {
 	{ "808622C1", LPSS_ADDR(bsw_i2c_dev_desc) },
 
 	/* Broadwell LPSS devices */
-	{ "INT3430", LPSS_ADDR(lpt_dev_desc) },
-	{ "INT3431", LPSS_ADDR(lpt_dev_desc) },
+	{ "INT3430", LPSS_ADDR(lpt_spi_dev_desc) },
+	{ "INT3431", LPSS_ADDR(lpt_spi_dev_desc) },
 	{ "INT3432", LPSS_ADDR(lpt_i2c_dev_desc) },
 	{ "INT3433", LPSS_ADDR(lpt_i2c_dev_desc) },
 	{ "INT3434", LPSS_ADDR(lpt_uart_dev_desc) },
@@ -366,7 +385,7 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = {
 	{ "INT3437", },
 
 	/* Wildcat Point LPSS devices */
-	{ "INT3438", LPSS_ADDR(lpt_dev_desc) },
+	{ "INT3438", LPSS_ADDR(lpt_spi_dev_desc) },
 
 	{ }
 };
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 78d621290a35..de3cbf152dee 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -95,7 +95,7 @@ static void acpi_platform_fill_resource(struct acpi_device *adev,
  * Name of the platform device will be the same as @adev's.
  */
 struct platform_device *acpi_create_platform_device(struct acpi_device *adev,
-					struct property_entry *properties)
+						    const struct property_entry *properties)
 {
 	struct platform_device *pdev = NULL;
 	struct platform_device_info pdevinfo;
diff --git a/drivers/acpi/acpica/nswalk.c b/drivers/acpi/acpica/nswalk.c
index 915c2433463d..e7c30ce06e18 100644
--- a/drivers/acpi/acpica/nswalk.c
+++ b/drivers/acpi/acpica/nswalk.c
@@ -169,6 +169,9 @@ acpi_ns_walk_namespace(acpi_object_type type,
 
 	if (start_node == ACPI_ROOT_OBJECT) {
 		start_node = acpi_gbl_root_node;
+		if (!start_node) {
+			return_ACPI_STATUS(AE_NO_NAMESPACE);
+		}
 	}
 
 	/* Null child means "get first node" */
diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
index 19e50fcbf4d6..598fd19b65fa 100644
--- a/drivers/acpi/apei/bert.c
+++ b/drivers/acpi/apei/bert.c
@@ -29,6 +29,7 @@
 
 #undef pr_fmt
 #define pr_fmt(fmt) "BERT: " fmt
+#define ACPI_BERT_PRINT_MAX_LEN 1024
 
 static int bert_disable;
 
@@ -58,8 +59,11 @@ static void __init bert_print_all(struct acpi_bert_region *region,
 		}
 
 		pr_info_once("Error records from previous boot:\n");
-
-		cper_estatus_print(KERN_INFO HW_ERR, estatus);
+		if (region_len < ACPI_BERT_PRINT_MAX_LEN)
+			cper_estatus_print(KERN_INFO HW_ERR, estatus);
+		else
+			pr_info_once("Max print length exceeded, table data is available at:\n"
+				     "/sys/firmware/acpi/tables/data/BERT");
 
 		/*
 		 * Because the boot error source is "one-time polled" type,
@@ -77,7 +81,7 @@ static int __init setup_bert_disable(char *str)
 {
 	bert_disable = 1;
 
-	return 0;
+	return 1;
 }
 __setup("bert_disable", setup_bert_disable);
 
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 242f3c2d5533..698d67cee052 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -891,7 +891,7 @@ EXPORT_SYMBOL_GPL(erst_clear);
 static int __init setup_erst_disable(char *str)
 {
 	erst_disable = 1;
-	return 0;
+	return 1;
 }
 
 __setup("erst_disable", setup_erst_disable);
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0c5c9acc6254..d91ad378c00d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1457,33 +1457,35 @@ static struct platform_driver ghes_platform_driver = {
 	.remove		= ghes_remove,
 };
 
-static int __init ghes_init(void)
+void __init acpi_ghes_init(void)
 {
 	int rc;
 
+	sdei_init();
+
 	if (acpi_disabled)
-		return -ENODEV;
+		return;
 
 	switch (hest_disable) {
 	case HEST_NOT_FOUND:
-		return -ENODEV;
+		return;
 	case HEST_DISABLED:
 		pr_info(GHES_PFX "HEST is not enabled!\n");
-		return -EINVAL;
+		return;
 	default:
 		break;
 	}
 
 	if (ghes_disable) {
 		pr_info(GHES_PFX "GHES is not enabled!\n");
-		return -EINVAL;
+		return;
 	}
 
 	ghes_nmi_init_cxt();
 
 	rc = platform_driver_register(&ghes_platform_driver);
 	if (rc)
-		goto err;
+		return;
 
 	rc = apei_osc_setup();
 	if (rc == 0 && osc_sb_apei_support_acked)
@@ -1494,9 +1496,4 @@ static int __init ghes_init(void)
 		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
 	else
 		pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
-
-	return 0;
-err:
-	return rc;
 }
-device_initcall(ghes_init);
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 0edc1ed47673..6aef1ee5e1bd 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -224,7 +224,7 @@ err:
 static int __init setup_hest_disable(char *str)
 {
 	hest_disable = HEST_DISABLED;
-	return 0;
+	return 1;
 }
 
 __setup("hest_disable", setup_hest_disable);
diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig
index 6dba187f4f2e..d4a72835f328 100644
--- a/drivers/acpi/arm64/Kconfig
+++ b/drivers/acpi/arm64/Kconfig
@@ -8,3 +8,13 @@ config ACPI_IORT
 
 config ACPI_GTDT
 	bool
+
+config ACPI_AGDI
+	bool "Arm Generic Diagnostic Dump and Reset Device Interface"
+	depends on ARM_SDE_INTERFACE
+	help
+	  Arm Generic Diagnostic Dump and Reset Device Interface (AGDI) is
+	  a standard that enables issuing a non-maskable diagnostic dump and
+	  reset command.
+
+	  If set, the kernel parses AGDI table and listens for the command.
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index 66acbe77f46e..7b9e4045659d 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_ACPI_AGDI) 	+= agdi.o
 obj-$(CONFIG_ACPI_IORT) 	+= iort.o
 obj-$(CONFIG_ACPI_GTDT) 	+= gtdt.o
 obj-y				+= dma.o
diff --git a/drivers/acpi/arm64/agdi.c b/drivers/acpi/arm64/agdi.c
new file mode 100644
index 000000000000..4df337d545b7
--- /dev/null
+++ b/drivers/acpi/arm64/agdi.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This file implements handling of
+ * Arm Generic Diagnostic Dump and Reset Interface table (AGDI)
+ *
+ * Copyright (c) 2022, Ampere Computing LLC
+ */
+
+#define pr_fmt(fmt) "ACPI: AGDI: " fmt
+
+#include <linux/acpi.h>
+#include <linux/arm_sdei.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+struct agdi_data {
+	int sdei_event;
+};
+
+static int agdi_sdei_handler(u32 sdei_event, struct pt_regs *regs, void *arg)
+{
+	nmi_panic(regs, "Arm Generic Diagnostic Dump and Reset SDEI event issued");
+	return 0;
+}
+
+static int agdi_sdei_probe(struct platform_device *pdev,
+			   struct agdi_data *adata)
+{
+	int err;
+
+	err = sdei_event_register(adata->sdei_event, agdi_sdei_handler, pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to register for SDEI event %d",
+			adata->sdei_event);
+		return err;
+	}
+
+	err = sdei_event_enable(adata->sdei_event);
+	if (err)  {
+		sdei_event_unregister(adata->sdei_event);
+		dev_err(&pdev->dev, "Failed to enable event %d\n",
+			adata->sdei_event);
+		return err;
+	}
+
+	return 0;
+}
+
+static int agdi_probe(struct platform_device *pdev)
+{
+	struct agdi_data *adata = dev_get_platdata(&pdev->dev);
+
+	if (!adata)
+		return -EINVAL;
+
+	return agdi_sdei_probe(pdev, adata);
+}
+
+static int agdi_remove(struct platform_device *pdev)
+{
+	struct agdi_data *adata = dev_get_platdata(&pdev->dev);
+	int err, i;
+
+	err = sdei_event_disable(adata->sdei_event);
+	if (err)
+		return err;
+
+	for (i = 0; i < 3; i++) {
+		err = sdei_event_unregister(adata->sdei_event);
+		if (err != -EINPROGRESS)
+			break;
+
+		schedule();
+	}
+
+	return err;
+}
+
+static struct platform_driver agdi_driver = {
+	.driver = {
+		.name = "agdi",
+	},
+	.probe = agdi_probe,
+	.remove = agdi_remove,
+};
+
+void __init acpi_agdi_init(void)
+{
+	struct acpi_table_agdi *agdi_table;
+	struct agdi_data pdata;
+	struct platform_device *pdev;
+	acpi_status status;
+
+	status = acpi_get_table(ACPI_SIG_AGDI, 0,
+				(struct acpi_table_header **) &agdi_table);
+	if (ACPI_FAILURE(status))
+		return;
+
+	if (agdi_table->flags & ACPI_AGDI_SIGNALING_MODE) {
+		pr_warn("Interrupt signaling is not supported");
+		goto err_put_table;
+	}
+
+	pdata.sdei_event = agdi_table->sdei_event;
+
+	pdev = platform_device_register_data(NULL, "agdi", 0, &pdata, sizeof(pdata));
+	if (IS_ERR(pdev))
+		goto err_put_table;
+
+	if (platform_driver_register(&agdi_driver))
+		platform_device_unregister(pdev);
+
+err_put_table:
+	acpi_put_table((struct acpi_table_header *)agdi_table);
+}
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index ea31ae01458b..dc208f5f5a1f 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -59,6 +59,10 @@ MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
 
 static const struct acpi_device_id battery_device_ids[] = {
 	{"PNP0C0A", 0},
+
+	/* Microsoft Surface Go 3 */
+	{"MSHW0146", 0},
+
 	{"", 0},
 };
 
@@ -1148,6 +1152,14 @@ static const struct dmi_system_id bat_dmi_table[] __initconst = {
 			DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad"),
 		},
 	},
+	{
+		/* Microsoft Surface Go 3 */
+		.callback = battery_notification_delay_quirk,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"),
+		},
+	},
 	{},
 };
 
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 07f604832fd6..3e58b613a2c4 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -26,6 +26,7 @@
 #include <asm/mpspec.h>
 #include <linux/dmi.h>
 #endif
+#include <linux/acpi_agdi.h>
 #include <linux/acpi_iort.h>
 #include <linux/acpi_viot.h>
 #include <linux/pci.h>
@@ -283,6 +284,8 @@ EXPORT_SYMBOL_GPL(osc_pc_lpi_support_confirmed);
 bool osc_sb_native_usb4_support_confirmed;
 EXPORT_SYMBOL_GPL(osc_sb_native_usb4_support_confirmed);
 
+bool osc_sb_cppc_not_supported;
+
 static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
 static void acpi_bus_osc_negotiate_platform_control(void)
 {
@@ -332,21 +335,38 @@ static void acpi_bus_osc_negotiate_platform_control(void)
 	if (ACPI_FAILURE(acpi_run_osc(handle, &context)))
 		return;
 
-	kfree(context.ret.pointer);
+	capbuf_ret = context.ret.pointer;
+	if (context.ret.length <= OSC_SUPPORT_DWORD) {
+		kfree(context.ret.pointer);
+		return;
+	}
+
+#ifdef CONFIG_X86
+	if (boot_cpu_has(X86_FEATURE_HWP))
+		osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] &
+				(OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT));
+#endif
 
-	/* Now run _OSC again with query flag clear */
+	/*
+	 * Now run _OSC again with query flag clear and with the caps
+	 * supported by both the OS and the platform.
+	 */
 	capbuf[OSC_QUERY_DWORD] = 0;
+	capbuf[OSC_SUPPORT_DWORD] = capbuf_ret[OSC_SUPPORT_DWORD];
+	kfree(context.ret.pointer);
 
 	if (ACPI_FAILURE(acpi_run_osc(handle, &context)))
 		return;
 
 	capbuf_ret = context.ret.pointer;
-	osc_sb_apei_support_acked =
-		capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
-	osc_pc_lpi_support_confirmed =
-		capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
-	osc_sb_native_usb4_support_confirmed =
-		capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
+	if (context.ret.length > OSC_SUPPORT_DWORD) {
+		osc_sb_apei_support_acked =
+			capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
+		osc_pc_lpi_support_confirmed =
+			capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
+		osc_sb_native_usb4_support_confirmed =
+			capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
+	}
 
 	kfree(context.ret.pointer);
 }
@@ -1043,7 +1063,12 @@ struct bus_type acpi_bus_type = {
 	.remove		= acpi_device_remove,
 	.uevent		= acpi_device_uevent,
 };
-EXPORT_SYMBOL_GPL(acpi_bus_type);
+
+int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data)
+{
+	return bus_for_each_dev(&acpi_bus_type, NULL, data, fn);
+}
+EXPORT_SYMBOL_GPL(acpi_bus_for_each_dev);
 
 /* --------------------------------------------------------------------------
                              Initialization/Cleanup
@@ -1331,6 +1356,8 @@ static int __init acpi_init(void)
 
 	pci_mmcfg_late_init();
 	acpi_iort_init();
+	acpi_hest_init();
+	acpi_ghes_init();
 	acpi_scan_init();
 	acpi_ec_init();
 	acpi_debugfs_init();
@@ -1339,6 +1366,7 @@ static int __init acpi_init(void)
 	acpi_debugger_init();
 	acpi_setup_sb_notify_handler();
 	acpi_viot_init();
+	acpi_agdi_init();
 	return 0;
 }
 
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 866560cbb082..d418449194ee 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -633,8 +633,8 @@ static bool is_cppc_supported(int revision, int num_ent)
  *  )
  */
 
-#ifndef init_freq_invariance_cppc
-static inline void init_freq_invariance_cppc(void) { }
+#ifndef arch_init_invariance_cppc
+static inline void arch_init_invariance_cppc(void) { }
 #endif
 
 /**
@@ -656,6 +656,9 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
 	acpi_status status;
 	int ret = -EFAULT;
 
+	if (osc_sb_cppc_not_supported)
+		return -ENODEV;
+
 	/* Parse the ACPI _CPC table for this CPU. */
 	status = acpi_evaluate_object_typed(handle, "_CPC", NULL, &output,
 			ACPI_TYPE_PACKAGE);
@@ -816,7 +819,7 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
 		goto out_free;
 	}
 
-	init_freq_invariance_cppc();
+	arch_init_invariance_cppc();
 
 	kfree(output.pointer);
 	return 0;
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 46710380a402..a1b871a418f8 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -168,7 +168,7 @@ struct acpi_ec_query {
 };
 
 static int acpi_ec_submit_query(struct acpi_ec *ec);
-static bool advance_transaction(struct acpi_ec *ec, bool interrupt);
+static void advance_transaction(struct acpi_ec *ec, bool interrupt);
 static void acpi_ec_event_handler(struct work_struct *work);
 
 struct acpi_ec *first_ec;
@@ -441,36 +441,35 @@ static bool acpi_ec_submit_flushable_request(struct acpi_ec *ec)
 	return true;
 }
 
-static bool acpi_ec_submit_event(struct acpi_ec *ec)
+static void acpi_ec_submit_event(struct acpi_ec *ec)
 {
+	/*
+	 * It is safe to mask the events here, because acpi_ec_close_event()
+	 * will run at least once after this.
+	 */
 	acpi_ec_mask_events(ec);
 	if (!acpi_ec_event_enabled(ec))
-		return false;
-
-	if (ec->event_state == EC_EVENT_READY) {
-		ec_dbg_evt("Command(%s) submitted/blocked",
-			   acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY));
+		return;
 
-		ec->event_state = EC_EVENT_IN_PROGRESS;
-		/*
-		 * If events_to_process is greqter than 0 at this point, the
-		 * while () loop in acpi_ec_event_handler() is still running
-		 * and incrementing events_to_process will cause it to invoke
-		 * acpi_ec_submit_query() once more, so it is not necessary to
-		 * queue up the event work to start the same loop again.
-		 */
-		if (ec->events_to_process++ > 0)
-			return true;
+	if (ec->event_state != EC_EVENT_READY)
+		return;
 
-		ec->events_in_progress++;
-		return queue_work(ec_wq, &ec->work);
-	}
+	ec_dbg_evt("Command(%s) submitted/blocked",
+		   acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY));
 
+	ec->event_state = EC_EVENT_IN_PROGRESS;
 	/*
-	 * The event handling work has not been completed yet, so it needs to be
-	 * flushed.
+	 * If events_to_process is greater than 0 at this point, the while ()
+	 * loop in acpi_ec_event_handler() is still running and incrementing
+	 * events_to_process will cause it to invoke acpi_ec_submit_query() once
+	 * more, so it is not necessary to queue up the event work to start the
+	 * same loop again.
 	 */
-	return true;
+	if (ec->events_to_process++ > 0)
+		return;
+
+	ec->events_in_progress++;
+	queue_work(ec_wq, &ec->work);
 }
 
 static void acpi_ec_complete_event(struct acpi_ec *ec)
@@ -655,11 +654,10 @@ static void acpi_ec_spurious_interrupt(struct acpi_ec *ec, struct transaction *t
 		acpi_ec_mask_events(ec);
 }
 
-static bool advance_transaction(struct acpi_ec *ec, bool interrupt)
+static void advance_transaction(struct acpi_ec *ec, bool interrupt)
 {
 	struct transaction *t = ec->curr;
 	bool wakeup = false;
-	bool ret = false;
 	u8 status;
 
 	ec_dbg_stm("%s (%d)", interrupt ? "IRQ" : "TASK", smp_processor_id());
@@ -724,12 +722,10 @@ static bool advance_transaction(struct acpi_ec *ec, bool interrupt)
 
 out:
 	if (status & ACPI_EC_FLAG_SCI)
-		ret = acpi_ec_submit_event(ec);
+		acpi_ec_submit_event(ec);
 
 	if (wakeup && interrupt)
 		wake_up(&ec->wait);
-
-	return ret;
 }
 
 static void start_transaction(struct acpi_ec *ec)
@@ -1242,6 +1238,7 @@ static void acpi_ec_event_handler(struct work_struct *work)
 		acpi_ec_submit_query(ec);
 
 		spin_lock_irq(&ec->lock);
+
 		ec->events_to_process--;
 	}
 
@@ -1250,27 +1247,30 @@ static void acpi_ec_event_handler(struct work_struct *work)
 	 * event handling work again regardless of whether or not the query
 	 * queued up above is processed successfully.
 	 */
-	if (ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT)
+	if (ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT) {
+		bool guard_timeout;
+
 		acpi_ec_complete_event(ec);
-	else
-		acpi_ec_close_event(ec);
 
-	spin_unlock_irq(&ec->lock);
+		ec_dbg_evt("Event stopped");
 
-	ec_dbg_evt("Event stopped");
+		spin_unlock_irq(&ec->lock);
+
+		guard_timeout = !!ec_guard(ec);
 
-	if (ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT && ec_guard(ec)) {
 		spin_lock_irq(&ec->lock);
 
 		/* Take care of SCI_EVT unless someone else is doing that. */
-		if (!ec->curr)
+		if (guard_timeout && !ec->curr)
 			advance_transaction(ec, false);
+	} else {
+		acpi_ec_close_event(ec);
 
-		spin_unlock_irq(&ec->lock);
+		ec_dbg_evt("Event stopped");
 	}
 
-	spin_lock_irq(&ec->lock);
 	ec->events_in_progress--;
+
 	spin_unlock_irq(&ec->lock);
 }
 
@@ -2051,6 +2051,11 @@ void acpi_ec_set_gpe_wake_mask(u8 action)
 		acpi_set_gpe_wake_mask(NULL, first_ec->gpe, action);
 }
 
+static bool acpi_ec_work_in_progress(struct acpi_ec *ec)
+{
+	return ec->events_in_progress + ec->queries_in_progress > 0;
+}
+
 bool acpi_ec_dispatch_gpe(void)
 {
 	bool work_in_progress = false;
@@ -2081,8 +2086,12 @@ bool acpi_ec_dispatch_gpe(void)
 	 */
 	spin_lock_irq(&first_ec->lock);
 
-	if (acpi_ec_gpe_status_set(first_ec))
-		work_in_progress = advance_transaction(first_ec, false);
+	if (acpi_ec_gpe_status_set(first_ec)) {
+		pm_pr_dbg("ACPI EC GPE status set\n");
+
+		advance_transaction(first_ec, false);
+		work_in_progress = acpi_ec_work_in_progress(first_ec);
+	}
 
 	spin_unlock_irq(&first_ec->lock);
 
@@ -2099,8 +2108,7 @@ bool acpi_ec_dispatch_gpe(void)
 
 		spin_lock_irq(&first_ec->lock);
 
-		work_in_progress = first_ec->events_in_progress +
-			first_ec->queries_in_progress > 0;
+		work_in_progress = acpi_ec_work_in_progress(first_ec);
 
 		spin_unlock_irq(&first_ec->lock);
 	} while (work_in_progress && !pm_wakeup_pending());
diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h
index dd9bb8ca2244..44728529a5b6 100644
--- a/drivers/acpi/fan.h
+++ b/drivers/acpi/fan.h
@@ -6,9 +6,53 @@
  *
  * Add new device IDs before the generic ACPI fan one.
  */
+
+#ifndef _ACPI_FAN_H_
+#define _ACPI_FAN_H_
+
 #define ACPI_FAN_DEVICE_IDS	\
 	{"INT3404", }, /* Fan */ \
 	{"INTC1044", }, /* Fan for Tiger Lake generation */ \
 	{"INTC1048", }, /* Fan for Alder Lake generation */ \
 	{"INTC10A2", }, /* Fan for Raptor Lake generation */ \
 	{"PNP0C0B", } /* Generic ACPI fan */
+
+#define ACPI_FPS_NAME_LEN	20
+
+struct acpi_fan_fps {
+	u64 control;
+	u64 trip_point;
+	u64 speed;
+	u64 noise_level;
+	u64 power;
+	char name[ACPI_FPS_NAME_LEN];
+	struct device_attribute dev_attr;
+};
+
+struct acpi_fan_fif {
+	u8 revision;
+	u8 fine_grain_ctrl;
+	u8 step_size;
+	u8 low_speed_notification;
+};
+
+struct acpi_fan_fst {
+	u64 revision;
+	u64 control;
+	u64 speed;
+};
+
+struct acpi_fan {
+	bool acpi4;
+	struct acpi_fan_fif fif;
+	struct acpi_fan_fps *fps;
+	int fps_count;
+	struct thermal_cooling_device *cdev;
+	struct device_attribute fst_speed;
+	struct device_attribute fine_grain_control;
+};
+
+int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst);
+int acpi_fan_create_attributes(struct acpi_device *device);
+void acpi_fan_delete_attributes(struct acpi_device *device);
+#endif
diff --git a/drivers/acpi/fan_attr.c b/drivers/acpi/fan_attr.c
new file mode 100644
index 000000000000..f15157d40713
--- /dev/null
+++ b/drivers/acpi/fan_attr.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  fan_attr.c - Create extra attributes for ACPI Fan driver
+ *
+ *  Copyright (C) 2001, 2002 Andy Grover <[email protected]>
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <[email protected]>
+ *  Copyright (C) 2022 Intel Corporation. All rights reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+
+#include "fan.h"
+
+MODULE_LICENSE("GPL");
+
+static ssize_t show_state(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct acpi_fan_fps *fps = container_of(attr, struct acpi_fan_fps, dev_attr);
+	int count;
+
+	if (fps->control == 0xFFFFFFFF || fps->control > 100)
+		count = scnprintf(buf, PAGE_SIZE, "not-defined:");
+	else
+		count = scnprintf(buf, PAGE_SIZE, "%lld:", fps->control);
+
+	if (fps->trip_point == 0xFFFFFFFF || fps->trip_point > 9)
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
+	else
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->trip_point);
+
+	if (fps->speed == 0xFFFFFFFF)
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
+	else
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->speed);
+
+	if (fps->noise_level == 0xFFFFFFFF)
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
+	else
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->noise_level * 100);
+
+	if (fps->power == 0xFFFFFFFF)
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined\n");
+	else
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld\n", fps->power);
+
+	return count;
+}
+
+static ssize_t show_fan_speed(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct acpi_device *acpi_dev = container_of(dev, struct acpi_device, dev);
+	struct acpi_fan_fst fst;
+	int status;
+
+	status = acpi_fan_get_fst(acpi_dev, &fst);
+	if (status)
+		return status;
+
+	return sprintf(buf, "%lld\n", fst.speed);
+}
+
+static ssize_t show_fine_grain_control(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct acpi_device *acpi_dev = container_of(dev, struct acpi_device, dev);
+	struct acpi_fan *fan = acpi_driver_data(acpi_dev);
+
+	return sprintf(buf, "%d\n", fan->fif.fine_grain_ctrl);
+}
+
+int acpi_fan_create_attributes(struct acpi_device *device)
+{
+	struct acpi_fan *fan = acpi_driver_data(device);
+	int i, status;
+
+	sysfs_attr_init(&fan->fine_grain_control.attr);
+	fan->fine_grain_control.show = show_fine_grain_control;
+	fan->fine_grain_control.store = NULL;
+	fan->fine_grain_control.attr.name = "fine_grain_control";
+	fan->fine_grain_control.attr.mode = 0444;
+	status = sysfs_create_file(&device->dev.kobj, &fan->fine_grain_control.attr);
+	if (status)
+		return status;
+
+	/* _FST is present if we are here */
+	sysfs_attr_init(&fan->fst_speed.attr);
+	fan->fst_speed.show = show_fan_speed;
+	fan->fst_speed.store = NULL;
+	fan->fst_speed.attr.name = "fan_speed_rpm";
+	fan->fst_speed.attr.mode = 0444;
+	status = sysfs_create_file(&device->dev.kobj, &fan->fst_speed.attr);
+	if (status)
+		goto rem_fine_grain_attr;
+
+	for (i = 0; i < fan->fps_count; ++i) {
+		struct acpi_fan_fps *fps = &fan->fps[i];
+
+		snprintf(fps->name, ACPI_FPS_NAME_LEN, "state%d", i);
+		sysfs_attr_init(&fps->dev_attr.attr);
+		fps->dev_attr.show = show_state;
+		fps->dev_attr.store = NULL;
+		fps->dev_attr.attr.name = fps->name;
+		fps->dev_attr.attr.mode = 0444;
+		status = sysfs_create_file(&device->dev.kobj, &fps->dev_attr.attr);
+		if (status) {
+			int j;
+
+			for (j = 0; j < i; ++j)
+				sysfs_remove_file(&device->dev.kobj, &fan->fps[j].dev_attr.attr);
+			goto rem_fst_attr;
+		}
+	}
+
+	return 0;
+
+rem_fst_attr:
+	sysfs_remove_file(&device->dev.kobj, &fan->fst_speed.attr);
+
+rem_fine_grain_attr:
+	sysfs_remove_file(&device->dev.kobj, &fan->fine_grain_control.attr);
+
+	return status;
+}
+
+void acpi_fan_delete_attributes(struct acpi_device *device)
+{
+	struct acpi_fan *fan = acpi_driver_data(device);
+	int i;
+
+	for (i = 0; i < fan->fps_count; ++i)
+		sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr);
+
+	sysfs_remove_file(&device->dev.kobj, &fan->fst_speed.attr);
+	sysfs_remove_file(&device->dev.kobj, &fan->fine_grain_control.attr);
+}
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan_core.c
index 5cd0ceb50bc8..b9a9a59ddcc1 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan_core.c
@@ -1,9 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- *  acpi_fan.c - ACPI Fan Driver ($Revision: 29 $)
+ *  fan_core.c - ACPI Fan core Driver
  *
  *  Copyright (C) 2001, 2002 Andy Grover <[email protected]>
  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <[email protected]>
+ *  Copyright (C) 2022 Intel Corporation. All rights reserved.
  */
 
 #include <linux/kernel.h>
@@ -45,33 +46,6 @@ static const struct dev_pm_ops acpi_fan_pm = {
 #define FAN_PM_OPS_PTR NULL
 #endif
 
-#define ACPI_FPS_NAME_LEN	20
-
-struct acpi_fan_fps {
-	u64 control;
-	u64 trip_point;
-	u64 speed;
-	u64 noise_level;
-	u64 power;
-	char name[ACPI_FPS_NAME_LEN];
-	struct device_attribute dev_attr;
-};
-
-struct acpi_fan_fif {
-	u64 revision;
-	u64 fine_grain_ctrl;
-	u64 step_size;
-	u64 low_speed_notification;
-};
-
-struct acpi_fan {
-	bool acpi4;
-	struct acpi_fan_fif fif;
-	struct acpi_fan_fps *fps;
-	int fps_count;
-	struct thermal_cooling_device *cdev;
-};
-
 static struct platform_driver acpi_fan_driver = {
 	.probe = acpi_fan_probe,
 	.remove = acpi_fan_remove,
@@ -89,25 +63,29 @@ static int fan_get_max_state(struct thermal_cooling_device *cdev, unsigned long
 	struct acpi_device *device = cdev->devdata;
 	struct acpi_fan *fan = acpi_driver_data(device);
 
-	if (fan->acpi4)
-		*state = fan->fps_count - 1;
-	else
+	if (fan->acpi4) {
+		if (fan->fif.fine_grain_ctrl)
+			*state = 100 / fan->fif.step_size;
+		else
+			*state = fan->fps_count - 1;
+	} else {
 		*state = 1;
+	}
+
 	return 0;
 }
 
-static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state)
+int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst)
 {
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-	struct acpi_fan *fan = acpi_driver_data(device);
 	union acpi_object *obj;
 	acpi_status status;
-	int control, i;
+	int ret = 0;
 
 	status = acpi_evaluate_object(device->handle, "_FST", NULL, &buffer);
 	if (ACPI_FAILURE(status)) {
 		dev_err(&device->dev, "Get fan state failed\n");
-		return status;
+		return -ENODEV;
 	}
 
 	obj = buffer.pointer;
@@ -115,35 +93,52 @@ static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state)
 	    obj->package.count != 3 ||
 	    obj->package.elements[1].type != ACPI_TYPE_INTEGER) {
 		dev_err(&device->dev, "Invalid _FST data\n");
-		status = -EINVAL;
+		ret = -EINVAL;
 		goto err;
 	}
 
-	control = obj->package.elements[1].integer.value;
+	fst->revision = obj->package.elements[0].integer.value;
+	fst->control = obj->package.elements[1].integer.value;
+	fst->speed = obj->package.elements[2].integer.value;
+
+err:
+	kfree(obj);
+	return ret;
+}
+
+static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state)
+{
+	struct acpi_fan *fan = acpi_driver_data(device);
+	struct acpi_fan_fst fst;
+	int status, i;
+
+	status = acpi_fan_get_fst(device, &fst);
+	if (status)
+		return status;
+
+	if (fan->fif.fine_grain_ctrl) {
+		/* This control should be same what we set using _FSL by spec */
+		if (fst.control > 100) {
+			dev_dbg(&device->dev, "Invalid control value returned\n");
+			goto match_fps;
+		}
+
+		*state = (int) fst.control / fan->fif.step_size;
+		return 0;
+	}
+
+match_fps:
 	for (i = 0; i < fan->fps_count; i++) {
-		/*
-		 * When Fine Grain Control is set, return the state
-		 * corresponding to maximum fan->fps[i].control
-		 * value compared to the current speed. Here the
-		 * fan->fps[] is sorted array with increasing speed.
-		 */
-		if (fan->fif.fine_grain_ctrl && control < fan->fps[i].control) {
-			i = (i > 0) ? i - 1 : 0;
+		if (fst.control == fan->fps[i].control)
 			break;
-		} else if (control == fan->fps[i].control) {
-			break;
-		}
 	}
 	if (i == fan->fps_count) {
 		dev_dbg(&device->dev, "Invalid control value returned\n");
-		status = -EINVAL;
-		goto err;
+		return -EINVAL;
 	}
 
 	*state = i;
 
-err:
-	kfree(obj);
 	return status;
 }
 
@@ -187,15 +182,30 @@ static int fan_set_state_acpi4(struct acpi_device *device, unsigned long state)
 {
 	struct acpi_fan *fan = acpi_driver_data(device);
 	acpi_status status;
+	u64 value = state;
+	int max_state;
 
-	if (state >= fan->fps_count)
+	if (fan->fif.fine_grain_ctrl)
+		max_state = 100 / fan->fif.step_size;
+	else
+		max_state = fan->fps_count - 1;
+
+	if (state > max_state)
 		return -EINVAL;
 
-	status = acpi_execute_simple_method(device->handle, "_FSL",
-					    fan->fps[state].control);
+	if (fan->fif.fine_grain_ctrl) {
+		value *= fan->fif.step_size;
+		/* Spec allows compensate the last step only */
+		if (value + fan->fif.step_size > 100)
+			value = 100;
+	} else {
+		value = fan->fps[state].control;
+	}
+
+	status = acpi_execute_simple_method(device->handle, "_FSL", value);
 	if (ACPI_FAILURE(status)) {
 		dev_dbg(&device->dev, "Failed to set state by _FSL\n");
-		return status;
+		return -ENODEV;
 	}
 
 	return 0;
@@ -237,7 +247,8 @@ static int acpi_fan_get_fif(struct acpi_device *device)
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_fan *fan = acpi_driver_data(device);
 	struct acpi_buffer format = { sizeof("NNNN"), "NNNN" };
-	struct acpi_buffer fif = { sizeof(fan->fif), &fan->fif };
+	u64 fields[4];
+	struct acpi_buffer fif = { sizeof(fields), fields };
 	union acpi_object *obj;
 	acpi_status status;
 
@@ -258,6 +269,17 @@ static int acpi_fan_get_fif(struct acpi_device *device)
 		status = -EINVAL;
 	}
 
+	fan->fif.revision = fields[0];
+	fan->fif.fine_grain_ctrl = fields[1];
+	fan->fif.step_size = fields[2];
+	fan->fif.low_speed_notification = fields[3];
+
+	/* If there is a bug in step size and set as 0, change to 1 */
+	if (!fan->fif.step_size)
+		fan->fif.step_size = 1;
+	/* If step size > 9, change to 9 (by spec valid values 1-9) */
+	else if (fan->fif.step_size > 9)
+		fan->fif.step_size = 9;
 err:
 	kfree(obj);
 	return status;
@@ -270,39 +292,6 @@ static int acpi_fan_speed_cmp(const void *a, const void *b)
 	return fps1->speed - fps2->speed;
 }
 
-static ssize_t show_state(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct acpi_fan_fps *fps = container_of(attr, struct acpi_fan_fps, dev_attr);
-	int count;
-
-	if (fps->control == 0xFFFFFFFF || fps->control > 100)
-		count = scnprintf(buf, PAGE_SIZE, "not-defined:");
-	else
-		count = scnprintf(buf, PAGE_SIZE, "%lld:", fps->control);
-
-	if (fps->trip_point == 0xFFFFFFFF || fps->trip_point > 9)
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
-	else
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->trip_point);
-
-	if (fps->speed == 0xFFFFFFFF)
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
-	else
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->speed);
-
-	if (fps->noise_level == 0xFFFFFFFF)
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
-	else
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->noise_level * 100);
-
-	if (fps->power == 0xFFFFFFFF)
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined\n");
-	else
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld\n", fps->power);
-
-	return count;
-}
-
 static int acpi_fan_get_fps(struct acpi_device *device)
 {
 	struct acpi_fan *fan = acpi_driver_data(device);
@@ -347,25 +336,6 @@ static int acpi_fan_get_fps(struct acpi_device *device)
 	sort(fan->fps, fan->fps_count, sizeof(*fan->fps),
 	     acpi_fan_speed_cmp, NULL);
 
-	for (i = 0; i < fan->fps_count; ++i) {
-		struct acpi_fan_fps *fps = &fan->fps[i];
-
-		snprintf(fps->name, ACPI_FPS_NAME_LEN, "state%d", i);
-		sysfs_attr_init(&fps->dev_attr.attr);
-		fps->dev_attr.show = show_state;
-		fps->dev_attr.store = NULL;
-		fps->dev_attr.attr.name = fps->name;
-		fps->dev_attr.attr.mode = 0444;
-		status = sysfs_create_file(&device->dev.kobj, &fps->dev_attr.attr);
-		if (status) {
-			int j;
-
-			for (j = 0; j < i; ++j)
-				sysfs_remove_file(&device->dev.kobj, &fan->fps[j].dev_attr.attr);
-			break;
-		}
-	}
-
 err:
 	kfree(obj);
 	return status;
@@ -396,6 +366,10 @@ static int acpi_fan_probe(struct platform_device *pdev)
 		if (result)
 			return result;
 
+		result = acpi_fan_create_attributes(device);
+		if (result)
+			return result;
+
 		fan->acpi4 = true;
 	} else {
 		result = acpi_device_update_power(device, NULL);
@@ -437,12 +411,8 @@ static int acpi_fan_probe(struct platform_device *pdev)
 	return 0;
 
 err_end:
-	if (fan->acpi4) {
-		int i;
-
-		for (i = 0; i < fan->fps_count; ++i)
-			sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr);
-	}
+	if (fan->acpi4)
+		acpi_fan_delete_attributes(device);
 
 	return result;
 }
@@ -453,10 +423,8 @@ static int acpi_fan_remove(struct platform_device *pdev)
 
 	if (fan->acpi4) {
 		struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
-		int i;
 
-		for (i = 0; i < fan->fps_count; ++i)
-			sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr);
+		acpi_fan_delete_attributes(device);
 	}
 	sysfs_remove_link(&pdev->dev.kobj, "thermal_cooling");
 	sysfs_remove_link(&fan->cdev->device.kobj, "device");
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 457e11d851b8..628bf8f18130 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -96,8 +96,6 @@ void acpi_scan_table_notify(void);
 
 extern struct list_head acpi_bus_id_list;
 
-#define ACPI_MAX_DEVICE_INSTANCES	4096
-
 struct acpi_device_bus_id {
 	const char *bus_id;
 	struct ida instance_ida;
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 45c5c0e45e33..7a70c4bfc23c 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -642,22 +642,24 @@ u64 acpi_os_get_timer(void)
 		(ACPI_100NSEC_PER_SEC / HZ);
 }
 
-acpi_status acpi_os_read_port(acpi_io_address port, u32 * value, u32 width)
+acpi_status acpi_os_read_port(acpi_io_address port, u32 *value, u32 width)
 {
 	u32 dummy;
 
-	if (!value)
+	if (value)
+		*value = 0;
+	else
 		value = &dummy;
 
-	*value = 0;
 	if (width <= 8) {
-		*(u8 *) value = inb(port);
+		*value = inb(port);
 	} else if (width <= 16) {
-		*(u16 *) value = inw(port);
+		*value = inw(port);
 	} else if (width <= 32) {
-		*(u32 *) value = inl(port);
+		*value = inl(port);
 	} else {
-		BUG();
+		pr_debug("%s: Access width %d not supported\n", __func__, width);
+		return AE_BAD_PARAMETER;
 	}
 
 	return AE_OK;
@@ -674,7 +676,8 @@ acpi_status acpi_os_write_port(acpi_io_address port, u32 value, u32 width)
 	} else if (width <= 32) {
 		outl(value, port);
 	} else {
-		BUG();
+		pr_debug("%s: Access width %d not supported\n", __func__, width);
+		return AE_BAD_PARAMETER;
 	}
 
 	return AE_OK;
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index d54fb8e54671..58647051c948 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -185,7 +185,7 @@ static acpi_status acpi_pci_link_check_current(struct acpi_resource *resource,
 			if (!p || !p->interrupt_count) {
 				/*
 				 * IRQ descriptors may have no IRQ# bits set,
-				 * particularly those those w/ _STA disabled
+				 * particularly those w/ _STA disabled
 				 */
 				pr_debug("Blank _CRS IRQ resource\n");
 				return AE_OK;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index b76db99cced3..6f9e75d14808 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -22,8 +22,6 @@
 #include <linux/slab.h>
 #include <linux/dmi.h>
 #include <linux/platform_data/x86/apple.h>
-#include <acpi/apei.h>	/* for acpi_hest_init() */
-
 #include "internal.h"
 
 #define ACPI_PCI_ROOT_CLASS		"pci_bridge"
@@ -943,7 +941,6 @@ out_release_info:
 
 void __init acpi_pci_root_init(void)
 {
-	acpi_hest_init();
 	if (acpi_pci_disabled)
 		return;
 
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index f8e9fa82cb9b..32b20efff5f8 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -1080,6 +1080,11 @@ static int flatten_lpi_states(struct acpi_processor *pr,
 	return 0;
 }
 
+int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu)
+{
+	return -EOPNOTSUPP;
+}
+
 static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
 {
 	int ret, i;
@@ -1088,6 +1093,11 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
 	struct acpi_device *d = NULL;
 	struct acpi_lpi_states_array info[2], *tmp, *prev, *curr;
 
+	/* make sure our architecture has support */
+	ret = acpi_processor_ffh_lpi_probe(pr->id);
+	if (ret == -EOPNOTSUPP)
+		return ret;
+
 	if (!osc_pc_lpi_support_confirmed)
 		return -EOPNOTSUPP;
 
@@ -1139,11 +1149,6 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
 	return 0;
 }
 
-int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu)
-{
-	return -ENODEV;
-}
-
 int __weak acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
 {
 	return -ENODEV;
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index d0986bda2964..12bbfe833609 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -541,7 +541,8 @@ acpi_device_data_of_node(const struct fwnode_handle *fwnode)
 	if (is_acpi_device_node(fwnode)) {
 		const struct acpi_device *adev = to_acpi_device_node(fwnode);
 		return &adev->data;
-	} else if (is_acpi_data_node(fwnode)) {
+	}
+	if (is_acpi_data_node(fwnode)) {
 		const struct acpi_data_node *dn = to_acpi_data_node(fwnode);
 		return &dn->data;
 	}
@@ -685,7 +686,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 	 */
 	if (obj->type == ACPI_TYPE_LOCAL_REFERENCE) {
 		if (index)
-			return -EINVAL;
+			return -ENOENT;
 
 		device = acpi_fetch_acpi_dev(obj->reference.handle);
 		if (!device)
@@ -739,14 +740,19 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 					return -EINVAL;
 			}
 
-			/* assume following integer elements are all args */
+			/*
+			 * Assume the following integer elements are all args.
+			 * Stop counting on the first reference or end of the
+			 * package arguments. In case of neither reference,
+			 * nor integer, return an error, we can't parse it.
+			 */
 			for (i = 0; element + i < end && i < num_args; i++) {
 				int type = element[i].type;
 
+				if (type == ACPI_TYPE_LOCAL_REFERENCE)
+					break;
 				if (type == ACPI_TYPE_INTEGER)
 					nargs++;
-				else if (type == ACPI_TYPE_LOCAL_REFERENCE)
-					break;
 				else
 					return -EINVAL;
 			}
@@ -950,7 +956,7 @@ static int acpi_data_prop_read(const struct acpi_device_data *data,
 
 	if (proptype != DEV_PROP_STRING && nval > obj->package.count)
 		return -EOVERFLOW;
-	else if (nval <= 0)
+	if (nval == 0)
 		return -EINVAL;
 
 	items = obj->package.elements;
@@ -1012,14 +1018,10 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 	const struct list_head *head;
 	struct list_head *next;
 
-	if (!child || is_acpi_device_node(child)) {
+	if ((!child || is_acpi_device_node(child)) && adev) {
 		struct acpi_device *child_adev;
 
-		if (adev)
-			head = &adev->children;
-		else
-			goto nondev;
-
+		head = &adev->children;
 		if (list_empty(head))
 			goto nondev;
 
@@ -1089,7 +1091,8 @@ acpi_node_get_parent(const struct fwnode_handle *fwnode)
 	if (is_acpi_data_node(fwnode)) {
 		/* All data nodes have parent pointer so just return that */
 		return to_acpi_data_node(fwnode)->parent;
-	} else if (is_acpi_device_node(fwnode)) {
+	}
+	if (is_acpi_device_node(fwnode)) {
 		struct device *dev = to_acpi_device_node(fwnode)->dev.parent;
 
 		if (dev)
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 1331756d4cfc..5ffd87ac42b3 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -477,7 +477,8 @@ static void acpi_device_del(struct acpi_device *device)
 	list_for_each_entry(acpi_device_bus_id, &acpi_bus_id_list, node)
 		if (!strcmp(acpi_device_bus_id->bus_id,
 			    acpi_device_hid(device))) {
-			ida_simple_remove(&acpi_device_bus_id->instance_ida, device->pnp.instance_no);
+			ida_free(&acpi_device_bus_id->instance_ida,
+				 device->pnp.instance_no);
 			if (ida_is_empty(&acpi_device_bus_id->instance_ida)) {
 				list_del(&acpi_device_bus_id->node);
 				kfree_const(acpi_device_bus_id->bus_id);
@@ -642,7 +643,7 @@ static int acpi_device_set_name(struct acpi_device *device,
 	struct ida *instance_ida = &acpi_device_bus_id->instance_ida;
 	int result;
 
-	result = ida_simple_get(instance_ida, 0, ACPI_MAX_DEVICE_INSTANCES, GFP_KERNEL);
+	result = ida_alloc(instance_ida, GFP_KERNEL);
 	if (result < 0)
 		return result;
 
@@ -1377,11 +1378,11 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp,
 		if (info->valid & ACPI_VALID_HID) {
 			acpi_add_id(pnp, info->hardware_id.string);
 			pnp->type.platform_id = 1;
-			if (info->valid & ACPI_VALID_CID) {
-				cid_list = &info->compatible_id_list;
-				for (i = 0; i < cid_list->count; i++)
-					acpi_add_id(pnp, cid_list->ids[i].string);
-			}
+		}
+		if (info->valid & ACPI_VALID_CID) {
+			cid_list = &info->compatible_id_list;
+			for (i = 0; i < cid_list->count; i++)
+				acpi_add_id(pnp, cid_list->ids[i].string);
 		}
 		if (info->valid & ACPI_VALID_ADR) {
 			pnp->bus_address = info->address;
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index d4fbea91ab6b..c992e57b2c79 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -758,6 +758,8 @@ bool acpi_s2idle_wake(void)
 			return true;
 		}
 
+		pm_pr_dbg("Rearming ACPI SCI for wakeup\n");
+
 		pm_wakeup_clear(acpi_sci_irq);
 		rearm_wake_irq(acpi_sci_irq);
 	}
@@ -869,12 +871,7 @@ static inline void acpi_sleep_syscore_init(void) {}
 #ifdef CONFIG_HIBERNATION
 static unsigned long s4_hardware_signature;
 static struct acpi_table_facs *facs;
-static int sigcheck = -1; /* Default behaviour is just to warn */
-
-void __init acpi_check_s4_hw_signature(int check)
-{
-	sigcheck = check;
-}
+int acpi_check_s4_hw_signature = -1; /* Default behaviour is just to warn */
 
 static int acpi_hibernation_begin(pm_message_t stage)
 {
@@ -999,7 +996,7 @@ static void acpi_sleep_hibernate_setup(void)
 	hibernation_set_ops(old_suspend_ordering ?
 			&acpi_hibernation_ops_old : &acpi_hibernation_ops);
 	sleep_states[ACPI_STATE_S4] = 1;
-	if (!sigcheck)
+	if (!acpi_check_s4_hw_signature)
 		return;
 
 	acpi_get_table(ACPI_SIG_FACS, 1, (struct acpi_table_header **)&facs);
@@ -1011,7 +1008,7 @@ static void acpi_sleep_hibernate_setup(void)
 		 */
 		s4_hardware_signature = facs->hardware_signature;
 
-		if (sigcheck > 0) {
+		if (acpi_check_s4_hw_signature > 0) {
 			/*
 			 * If we're actually obeying the ACPI specification
 			 * then the signature is written out as part of the
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 34600b5b9d8e..ceee808f7f2a 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -545,7 +545,7 @@ static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = {
 	ACPI_SIG_WDDT, ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT,
 	ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT,
 	ACPI_SIG_IORT, ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT,
-	ACPI_SIG_NHLT, ACPI_SIG_AEST };
+	ACPI_SIG_NHLT, ACPI_SIG_AEST, ACPI_SIG_CEDT, ACPI_SIG_AGDI };
 
 #define ACPI_HEADER_SIZE sizeof(struct acpi_table_header)
 
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 4f64713e9917..becc198e4c22 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -415,6 +415,81 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "GA503"),
 		},
 	},
+	/*
+	 * Clevo NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2 have both a
+	 * working native and video interface. However the default detection
+	 * mechanism first registers the video interface before unregistering
+	 * it again and switching to the native interface during boot. This
+	 * results in a dangling SBIOS request for backlight change for some
+	 * reason, causing the backlight to switch to ~2% once per boot on the
+	 * first power cord connect or disconnect event. Setting the native
+	 * interface explicitly circumvents this buggy behaviour, by avoiding
+	 * the unregistering process.
+	 */
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xRU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+		DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
+		},
+	},
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xRU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"),
+		DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
+		},
+	},
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xRU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+		DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
+		},
+	},
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xRU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+		DMI_MATCH(DMI_BOARD_NAME, "AURA1501"),
+		},
+	},
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xRU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+		DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"),
+		},
+	},
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xNU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+		DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
+		},
+	},
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xNU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"),
+		DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
+		},
+	},
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xNU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+		DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
+		},
+	},
 
 	/*
 	 * Desktops which falsely report a backlight and which our heuristics
diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index ffdeed5334d6..664070fc8349 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -285,6 +285,27 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
 					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
 	},
 	{
+		/* Lenovo Yoga Tablet 1050F/L */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Intel Corp."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "VALLEYVIEW C0 PLATFORM"),
+			DMI_MATCH(DMI_BOARD_NAME, "BYT-T FFD8"),
+			/* Partial match on beginning of BIOS version */
+			DMI_MATCH(DMI_BIOS_VERSION, "BLADE_21"),
+		},
+		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+	},
+	{
+		/* Nextbook Ares 8 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "M890BAP"),
+		},
+		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+	},
+	{
 		/* Whitelabel (sold as various brands) TM800A550L */
 		.matches = {
 			DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index e1a5eca3ae3c..d3bd14aaabf6 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -370,6 +370,7 @@ int amba_driver_register(struct amba_driver *drv)
 
 	return driver_register(&drv->drv);
 }
+EXPORT_SYMBOL(amba_driver_register);
 
 /**
  *	amba_driver_unregister - remove an AMBA device driver
@@ -383,7 +384,7 @@ void amba_driver_unregister(struct amba_driver *drv)
 {
 	driver_unregister(&drv->drv);
 }
-
+EXPORT_SYMBOL(amba_driver_unregister);
 
 static void amba_device_release(struct device *dev)
 {
@@ -642,6 +643,7 @@ int amba_device_register(struct amba_device *dev, struct resource *parent)
 
 	return amba_device_add(dev, parent);
 }
+EXPORT_SYMBOL(amba_device_register);
 
 /**
  *	amba_device_put - put an AMBA device
@@ -668,66 +670,7 @@ void amba_device_unregister(struct amba_device *dev)
 {
 	device_unregister(&dev->dev);
 }
-
-
-struct find_data {
-	struct amba_device *dev;
-	struct device *parent;
-	const char *busid;
-	unsigned int id;
-	unsigned int mask;
-};
-
-static int amba_find_match(struct device *dev, void *data)
-{
-	struct find_data *d = data;
-	struct amba_device *pcdev = to_amba_device(dev);
-	int r;
-
-	r = (pcdev->periphid & d->mask) == d->id;
-	if (d->parent)
-		r &= d->parent == dev->parent;
-	if (d->busid)
-		r &= strcmp(dev_name(dev), d->busid) == 0;
-
-	if (r) {
-		get_device(dev);
-		d->dev = pcdev;
-	}
-
-	return r;
-}
-
-/**
- *	amba_find_device - locate an AMBA device given a bus id
- *	@busid: bus id for device (or NULL)
- *	@parent: parent device (or NULL)
- *	@id: peripheral ID (or 0)
- *	@mask: peripheral ID mask (or 0)
- *
- *	Return the AMBA device corresponding to the supplied parameters.
- *	If no device matches, returns NULL.
- *
- *	NOTE: When a valid device is found, its refcount is
- *	incremented, and must be decremented before the returned
- *	reference.
- */
-struct amba_device *
-amba_find_device(const char *busid, struct device *parent, unsigned int id,
-		 unsigned int mask)
-{
-	struct find_data data;
-
-	data.dev = NULL;
-	data.parent = parent;
-	data.busid = busid;
-	data.id = id;
-	data.mask = mask;
-
-	bus_for_each_dev(&amba_bustype, NULL, &data, amba_find_match);
-
-	return data.dev;
-}
+EXPORT_SYMBOL(amba_device_unregister);
 
 /**
  *	amba_request_regions - request all mem regions associated with device
@@ -749,6 +692,7 @@ int amba_request_regions(struct amba_device *dev, const char *name)
 
 	return ret;
 }
+EXPORT_SYMBOL(amba_request_regions);
 
 /**
  *	amba_release_regions - release mem regions associated with device
@@ -763,11 +707,4 @@ void amba_release_regions(struct amba_device *dev)
 	size = resource_size(&dev->res);
 	release_mem_region(dev->res.start, size);
 }
-
-EXPORT_SYMBOL(amba_driver_register);
-EXPORT_SYMBOL(amba_driver_unregister);
-EXPORT_SYMBOL(amba_device_register);
-EXPORT_SYMBOL(amba_device_unregister);
-EXPORT_SYMBOL(amba_find_device);
-EXPORT_SYMBOL(amba_request_regions);
 EXPORT_SYMBOL(amba_release_regions);
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 422753d52244..a31ffe16e626 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -1112,6 +1112,8 @@ DPRINTK("iovcnt = %d\n",skb_shinfo(skb)->nr_frags);
 	skb_data3 = skb->data[3];
 	paddr = dma_map_single(&eni_dev->pci_dev->dev,skb->data,skb->len,
 			       DMA_TO_DEVICE);
+	if (dma_mapping_error(&eni_dev->pci_dev->dev, paddr))
+		return enq_next;
 	ENI_PRV_PADDR(skb) = paddr;
 	/* prepare DMA queue entries */
 	j = 0;
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 976154140f0b..1d6636ebaac5 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -339,6 +339,46 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
 	return !ret;
 }
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+#include <acpi/cppc_acpi.h>
+
+void topology_init_cpu_capacity_cppc(void)
+{
+	struct cppc_perf_caps perf_caps;
+	int cpu;
+
+	if (likely(acpi_disabled || !acpi_cpc_valid()))
+		return;
+
+	raw_capacity = kcalloc(num_possible_cpus(), sizeof(*raw_capacity),
+			       GFP_KERNEL);
+	if (!raw_capacity)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		if (!cppc_get_perf_caps(cpu, &perf_caps) &&
+		    (perf_caps.highest_perf >= perf_caps.nominal_perf) &&
+		    (perf_caps.highest_perf >= perf_caps.lowest_perf)) {
+			raw_capacity[cpu] = perf_caps.highest_perf;
+			pr_debug("cpu_capacity: CPU%d cpu_capacity=%u (raw).\n",
+				 cpu, raw_capacity[cpu]);
+			continue;
+		}
+
+		pr_err("cpu_capacity: CPU%d missing/invalid highest performance.\n", cpu);
+		pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
+		goto exit;
+	}
+
+	topology_normalize_cpu_scale();
+	schedule_work(&update_topology_flags_work);
+	pr_debug("cpu_capacity: cpu_capacity initialization done\n");
+
+exit:
+	free_raw_capacity();
+}
+#endif
+
 #ifdef CONFIG_CPU_FREQ
 static cpumask_var_t cpus_to_visit;
 static void parsing_done_workfn(struct work_struct *work);
@@ -387,9 +427,8 @@ static int __init register_cpufreq_notifier(void)
 	int ret;
 
 	/*
-	 * on ACPI-based systems we need to use the default cpu capacity
-	 * until we have the necessary code to parse the cpu capacity, so
-	 * skip registering cpufreq notifier.
+	 * On ACPI-based systems skip registering cpufreq notifier as cpufreq
+	 * information is not needed for cpu capacity initialization.
 	 */
 	if (!acpi_disabled || !raw_capacity)
 		return -EINVAL;
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 5db704f02e71..1ee878d126fd 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -636,6 +636,18 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
 			atomic_read(&genpd->sd_count) > 0)
 		return -EBUSY;
 
+	/*
+	 * The children must be in their deepest (powered-off) states to allow
+	 * the parent to be powered off. Note that, there's no need for
+	 * additional locking, as powering on a child, requires the parent's
+	 * lock to be acquired first.
+	 */
+	list_for_each_entry(link, &genpd->parent_links, parent_node) {
+		struct generic_pm_domain *child = link->child;
+		if (child->state_idx < child->state_count - 1)
+			return -EBUSY;
+	}
+
 	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
 		enum pm_qos_flags_status stat;
 
@@ -1073,6 +1085,13 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
 	    || atomic_read(&genpd->sd_count) > 0)
 		return;
 
+	/* Check that the children are in their deepest (powered-off) state. */
+	list_for_each_entry(link, &genpd->parent_links, parent_node) {
+		struct generic_pm_domain *child = link->child;
+		if (child->state_idx < child->state_count - 1)
+			return;
+	}
+
 	/* Choose the deepest state when suspending */
 	genpd->state_idx = genpd->state_count - 1;
 	if (_genpd_power_off(genpd, false))
@@ -2058,9 +2077,9 @@ static int genpd_remove(struct generic_pm_domain *genpd)
 		kfree(link);
 	}
 
-	genpd_debug_remove(genpd);
 	list_del(&genpd->gpd_list_node);
 	genpd_unlock(genpd);
+	genpd_debug_remove(genpd);
 	cancel_work_sync(&genpd->power_off_work);
 	if (genpd_is_cpu_domain(genpd))
 		free_cpumask_var(genpd->cpus);
@@ -2248,12 +2267,8 @@ int of_genpd_add_provider_simple(struct device_node *np,
 	/* Parse genpd OPP table */
 	if (genpd->set_performance_state) {
 		ret = dev_pm_opp_of_add_table(&genpd->dev);
-		if (ret) {
-			if (ret != -EPROBE_DEFER)
-				dev_err(&genpd->dev, "Failed to add OPP table: %d\n",
-					ret);
-			return ret;
-		}
+		if (ret)
+			return dev_err_probe(&genpd->dev, ret, "Failed to add OPP table\n");
 
 		/*
 		 * Save table for faster processing while setting performance
@@ -2312,9 +2327,8 @@ int of_genpd_add_provider_onecell(struct device_node *np,
 		if (genpd->set_performance_state) {
 			ret = dev_pm_opp_of_add_table_indexed(&genpd->dev, i);
 			if (ret) {
-				if (ret != -EPROBE_DEFER)
-					dev_err(&genpd->dev, "Failed to add OPP table for index %d: %d\n",
-						i, ret);
+				dev_err_probe(&genpd->dev, ret,
+					      "Failed to add OPP table for index %d\n", i);
 				goto error;
 			}
 
@@ -2672,12 +2686,8 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
 	ret = genpd_add_device(pd, dev, base_dev);
 	mutex_unlock(&gpd_list_lock);
 
-	if (ret < 0) {
-		if (ret != -EPROBE_DEFER)
-			dev_err(dev, "failed to add to PM domain %s: %d",
-				pd->name, ret);
-		return ret;
-	}
+	if (ret < 0)
+		return dev_err_probe(dev, ret, "failed to add to PM domain %s\n", pd->name);
 
 	dev->pm_domain->detach = genpd_dev_pm_detach;
 	dev->pm_domain->sync = genpd_dev_pm_sync;
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 04ea92cbd9cf..c50139207794 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -485,7 +485,7 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev,
 	trace_device_pm_callback_start(dev, info, state.event);
 	error = cb(dev);
 	trace_device_pm_callback_end(dev, error);
-	suspend_report_result(cb, error);
+	suspend_report_result(dev, cb, error);
 
 	initcall_debug_report(dev, calltime, cb, error);
 
@@ -1568,7 +1568,7 @@ static int legacy_suspend(struct device *dev, pm_message_t state,
 	trace_device_pm_callback_start(dev, info, state.event);
 	error = cb(dev, state);
 	trace_device_pm_callback_end(dev, error);
-	suspend_report_result(cb, error);
+	suspend_report_result(dev, cb, error);
 
 	initcall_debug_report(dev, calltime, cb, error);
 
@@ -1855,7 +1855,7 @@ unlock:
 	device_unlock(dev);
 
 	if (ret < 0) {
-		suspend_report_result(callback, ret);
+		suspend_report_result(dev, callback, ret);
 		pm_runtime_put(dev);
 		return ret;
 	}
@@ -1960,10 +1960,10 @@ int dpm_suspend_start(pm_message_t state)
 }
 EXPORT_SYMBOL_GPL(dpm_suspend_start);
 
-void __suspend_report_result(const char *function, void *fn, int ret)
+void __suspend_report_result(const char *function, struct device *dev, void *fn, int ret)
 {
 	if (ret)
-		pr_err("%s(): %pS returns %d\n", function, fn, ret);
+		dev_err(dev, "%s(): %pS returns %d\n", function, fn, ret);
 }
 EXPORT_SYMBOL_GPL(__suspend_report_result);
 
@@ -2018,7 +2018,9 @@ static bool pm_ops_is_empty(const struct dev_pm_ops *ops)
 
 void device_pm_check_callbacks(struct device *dev)
 {
-	spin_lock_irq(&dev->power.lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
 	dev->power.no_pm_callbacks =
 		(!dev->bus || (pm_ops_is_empty(dev->bus->pm) &&
 		 !dev->bus->suspend && !dev->bus->resume)) &&
@@ -2027,7 +2029,7 @@ void device_pm_check_callbacks(struct device *dev)
 		(!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) &&
 		(!dev->driver || (pm_ops_is_empty(dev->driver->pm) &&
 		 !dev->driver->suspend && !dev->driver->resume));
-	spin_unlock_irq(&dev->power.lock);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
 }
 
 bool dev_pm_skip_suspend(struct device *dev)
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 2f3cce17219b..d4059e6ffeae 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1476,11 +1476,16 @@ EXPORT_SYMBOL_GPL(pm_runtime_enable);
 
 static void pm_runtime_disable_action(void *data)
 {
+	pm_runtime_dont_use_autosuspend(data);
 	pm_runtime_disable(data);
 }
 
 /**
  * devm_pm_runtime_enable - devres-enabled version of pm_runtime_enable.
+ *
+ * NOTE: this will also handle calling pm_runtime_dont_use_autosuspend() for
+ * you at driver exit time if needed.
+ *
  * @dev: Device to handle.
  */
 int devm_pm_runtime_enable(struct device *dev)
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index 0004db4a9d3b..d487a6bac630 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -289,7 +289,7 @@ EXPORT_SYMBOL_GPL(dev_pm_disable_wake_irq);
  *
  * Enables wakeirq conditionally. We need to enable wake-up interrupt
  * lazily on the first rpm_suspend(). This is needed as the consumer device
- * starts in RPM_SUSPENDED state, and the the first pm_runtime_get() would
+ * starts in RPM_SUSPENDED state, and the first pm_runtime_get() would
  * otherwise try to disable already disabled wakeirq. The wake-up interrupt
  * starts disabled with IRQ_NOAUTOEN set.
  *
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 8666590201c9..a57d469676ca 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -587,7 +587,7 @@ static bool wakeup_source_not_registered(struct wakeup_source *ws)
  * @ws: Wakeup source to handle.
  *
  * Update the @ws' statistics and, if @ws has just been activated, notify the PM
- * core of the event by incrementing the counter of of wakeup events being
+ * core of the event by incrementing the counter of the wakeup events being
  * processed.
  */
 static void wakeup_source_activate(struct wakeup_source *ws)
@@ -733,7 +733,7 @@ static void wakeup_source_deactivate(struct wakeup_source *ws)
 
 	/*
 	 * Increment the counter of registered wakeup events and decrement the
-	 * couter of wakeup events in progress simultaneously.
+	 * counter of wakeup events in progress simultaneously.
 	 */
 	cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count);
 	trace_wakeup_source_deactivate(ws->name, cec);
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index fc24e89f9592..e9d1efcda89b 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -14,11 +14,11 @@
 #include <linux/hardirq.h>
 #include <linux/topology.h>
 
-#define define_id_show_func(name)					\
+#define define_id_show_func(name, fmt)					\
 static ssize_t name##_show(struct device *dev,				\
 			   struct device_attribute *attr, char *buf)	\
 {									\
-	return sysfs_emit(buf, "%d\n", topology_##name(dev->id));	\
+	return sysfs_emit(buf, fmt "\n", topology_##name(dev->id));	\
 }
 
 #define define_siblings_read_func(name, mask)					\
@@ -42,22 +42,25 @@ static ssize_t name##_list_read(struct file *file, struct kobject *kobj,	\
 					off, count);				\
 }
 
-define_id_show_func(physical_package_id);
+define_id_show_func(physical_package_id, "%d");
 static DEVICE_ATTR_RO(physical_package_id);
 
 #ifdef TOPOLOGY_DIE_SYSFS
-define_id_show_func(die_id);
+define_id_show_func(die_id, "%d");
 static DEVICE_ATTR_RO(die_id);
 #endif
 
 #ifdef TOPOLOGY_CLUSTER_SYSFS
-define_id_show_func(cluster_id);
+define_id_show_func(cluster_id, "%d");
 static DEVICE_ATTR_RO(cluster_id);
 #endif
 
-define_id_show_func(core_id);
+define_id_show_func(core_id, "%d");
 static DEVICE_ATTR_RO(core_id);
 
+define_id_show_func(ppin, "0x%llx");
+static DEVICE_ATTR_ADMIN_RO(ppin);
+
 define_siblings_read_func(thread_siblings, sibling_cpumask);
 static BIN_ATTR_RO(thread_siblings, 0);
 static BIN_ATTR_RO(thread_siblings_list, 0);
@@ -87,7 +90,7 @@ static BIN_ATTR_RO(package_cpus, 0);
 static BIN_ATTR_RO(package_cpus_list, 0);
 
 #ifdef TOPOLOGY_BOOK_SYSFS
-define_id_show_func(book_id);
+define_id_show_func(book_id, "%d");
 static DEVICE_ATTR_RO(book_id);
 define_siblings_read_func(book_siblings, book_cpumask);
 static BIN_ATTR_RO(book_siblings, 0);
@@ -95,7 +98,7 @@ static BIN_ATTR_RO(book_siblings_list, 0);
 #endif
 
 #ifdef TOPOLOGY_DRAWER_SYSFS
-define_id_show_func(drawer_id);
+define_id_show_func(drawer_id, "%d");
 static DEVICE_ATTR_RO(drawer_id);
 define_siblings_read_func(drawer_siblings, drawer_cpumask);
 static BIN_ATTR_RO(drawer_siblings, 0);
@@ -145,6 +148,7 @@ static struct attribute *default_attrs[] = {
 #ifdef TOPOLOGY_DRAWER_SYSFS
 	&dev_attr_drawer_id.attr,
 #endif
+	&dev_attr_ppin.attr,
 	NULL
 };
 
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c443cd64fc9b..8c415be86732 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -76,9 +76,6 @@ struct virtio_blk {
 	 */
 	refcount_t refs;
 
-	/* What host tells us, plus 2 for header & tailer. */
-	unsigned int sg_elems;
-
 	/* Ida index - used to track minor number allocations. */
 	int index;
 
@@ -322,8 +319,6 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_status_t status;
 	int err;
 
-	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
-
 	status = virtblk_setup_cmd(vblk->vdev, req, vbr);
 	if (unlikely(status))
 		return status;
@@ -783,8 +778,6 @@ static int virtblk_probe(struct virtio_device *vdev)
 	/* Prevent integer overflows and honor max vq size */
 	sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2);
 
-	/* We need extra sg elements at head and tail. */
-	sg_elems += 2;
 	vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
 	if (!vblk) {
 		err = -ENOMEM;
@@ -796,7 +789,6 @@ static int virtblk_probe(struct virtio_device *vdev)
 	mutex_init(&vblk->vdev_mutex);
 
 	vblk->vdev = vdev;
-	vblk->sg_elems = sg_elems;
 
 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
 
@@ -853,7 +845,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 		set_disk_ro(vblk->disk, 1);
 
 	/* We can handle whatever the host told us to handle. */
-	blk_queue_max_segments(q, vblk->sg_elems-2);
+	blk_queue_max_segments(q, sg_elems);
 
 	/* No real sector limit. */
 	blk_queue_max_hw_sectors(q, -1U);
@@ -925,9 +917,15 @@ static int virtblk_probe(struct virtio_device *vdev)
 
 		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
 			     &v);
+
+		/*
+		 * max_discard_seg == 0 is out of spec but we always
+		 * handled it.
+		 */
+		if (!v)
+			v = sg_elems;
 		blk_queue_max_discard_segments(q,
-					       min_not_zero(v,
-							    MAX_DISCARD_SEGMENTS));
+					       min(v, MAX_DISCARD_SEGMENTS));
 
 		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 	}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index ca71a0585333..03b5fb341e58 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1288,7 +1288,8 @@ free_shadow:
 			rinfo->ring_ref[i] = GRANT_INVALID_REF;
 		}
 	}
-	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
+	free_pages_exact(rinfo->ring.sring,
+			 info->nr_ring_pages * XEN_PAGE_SIZE);
 	rinfo->ring.sring = NULL;
 
 	if (rinfo->irq)
@@ -1372,9 +1373,15 @@ static int blkif_get_final_status(enum blk_req_status s1,
 	return BLKIF_RSP_OKAY;
 }
 
-static bool blkif_completion(unsigned long *id,
-			     struct blkfront_ring_info *rinfo,
-			     struct blkif_response *bret)
+/*
+ * Return values:
+ *  1 response processed.
+ *  0 missing further responses.
+ * -1 error while processing.
+ */
+static int blkif_completion(unsigned long *id,
+			    struct blkfront_ring_info *rinfo,
+			    struct blkif_response *bret)
 {
 	int i = 0;
 	struct scatterlist *sg;
@@ -1397,7 +1404,7 @@ static bool blkif_completion(unsigned long *id,
 
 		/* Wait the second response if not yet here. */
 		if (s2->status < REQ_DONE)
-			return false;
+			return 0;
 
 		bret->status = blkif_get_final_status(s->status,
 						      s2->status);
@@ -1448,42 +1455,43 @@ static bool blkif_completion(unsigned long *id,
 	}
 	/* Add the persistent grant into the list of free grants */
 	for (i = 0; i < num_grant; i++) {
-		if (gnttab_query_foreign_access(s->grants_used[i]->gref)) {
+		if (!gnttab_try_end_foreign_access(s->grants_used[i]->gref)) {
 			/*
 			 * If the grant is still mapped by the backend (the
 			 * backend has chosen to make this grant persistent)
 			 * we add it at the head of the list, so it will be
 			 * reused first.
 			 */
-			if (!info->feature_persistent)
-				pr_alert_ratelimited("backed has not unmapped grant: %u\n",
-						     s->grants_used[i]->gref);
+			if (!info->feature_persistent) {
+				pr_alert("backed has not unmapped grant: %u\n",
+					 s->grants_used[i]->gref);
+				return -1;
+			}
 			list_add(&s->grants_used[i]->node, &rinfo->grants);
 			rinfo->persistent_gnts_c++;
 		} else {
 			/*
-			 * If the grant is not mapped by the backend we end the
-			 * foreign access and add it to the tail of the list,
-			 * so it will not be picked again unless we run out of
-			 * persistent grants.
+			 * If the grant is not mapped by the backend we add it
+			 * to the tail of the list, so it will not be picked
+			 * again unless we run out of persistent grants.
 			 */
-			gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
 			s->grants_used[i]->gref = GRANT_INVALID_REF;
 			list_add_tail(&s->grants_used[i]->node, &rinfo->grants);
 		}
 	}
 	if (s->req.operation == BLKIF_OP_INDIRECT) {
 		for (i = 0; i < INDIRECT_GREFS(num_grant); i++) {
-			if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) {
-				if (!info->feature_persistent)
-					pr_alert_ratelimited("backed has not unmapped grant: %u\n",
-							     s->indirect_grants[i]->gref);
+			if (!gnttab_try_end_foreign_access(s->indirect_grants[i]->gref)) {
+				if (!info->feature_persistent) {
+					pr_alert("backed has not unmapped grant: %u\n",
+						 s->indirect_grants[i]->gref);
+					return -1;
+				}
 				list_add(&s->indirect_grants[i]->node, &rinfo->grants);
 				rinfo->persistent_gnts_c++;
 			} else {
 				struct page *indirect_page;
 
-				gnttab_end_foreign_access(s->indirect_grants[i]->gref, 0, 0UL);
 				/*
 				 * Add the used indirect page back to the list of
 				 * available pages for indirect grefs.
@@ -1498,7 +1506,7 @@ static bool blkif_completion(unsigned long *id,
 		}
 	}
 
-	return true;
+	return 1;
 }
 
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -1564,12 +1572,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 		}
 
 		if (bret.operation != BLKIF_OP_DISCARD) {
+			int ret;
+
 			/*
 			 * We may need to wait for an extra response if the
 			 * I/O request is split in 2
 			 */
-			if (!blkif_completion(&id, rinfo, &bret))
+			ret = blkif_completion(&id, rinfo, &bret);
+			if (!ret)
 				continue;
+			if (unlikely(ret < 0))
+				goto err;
 		}
 
 		if (add_id_to_freelist(rinfo, id)) {
@@ -1676,8 +1689,7 @@ static int setup_blkring(struct xenbus_device *dev,
 	for (i = 0; i < info->nr_ring_pages; i++)
 		rinfo->ring_ref[i] = GRANT_INVALID_REF;
 
-	sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
-						       get_order(ring_size));
+	sring = alloc_pages_exact(ring_size, GFP_NOIO);
 	if (!sring) {
 		xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
 		return -ENOMEM;
@@ -1687,7 +1699,7 @@ static int setup_blkring(struct xenbus_device *dev,
 
 	err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref);
 	if (err < 0) {
-		free_pages((unsigned long)sring, get_order(ring_size));
+		free_pages_exact(sring, ring_size);
 		rinfo->ring.sring = NULL;
 		goto fail;
 	}
@@ -2532,11 +2544,10 @@ static void purge_persistent_grants(struct blkfront_info *info)
 		list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants,
 					 node) {
 			if (gnt_list_entry->gref == GRANT_INVALID_REF ||
-			    gnttab_query_foreign_access(gnt_list_entry->gref))
+			    !gnttab_try_end_foreign_access(gnt_list_entry->gref))
 				continue;
 
 			list_del(&gnt_list_entry->node);
-			gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
 			rinfo->persistent_gnts_c--;
 			gnt_list_entry->gref = GRANT_INVALID_REF;
 			list_add_tail(&gnt_list_entry->node, &rinfo->grants);
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 9704963f9d50..a087156a5818 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -401,7 +401,7 @@ config HW_RANDOM_MESON
 
 config HW_RANDOM_CAVIUM
 	tristate "Cavium ThunderX Random Number Generator support"
-	depends on HW_RANDOM && PCI && ARM64
+	depends on HW_RANDOM && PCI && ARCH_THUNDER
 	default HW_RANDOM
 	help
 	  This driver provides kernel-side support for the Random Number
diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index ecb71c4317a5..b8effe77d80f 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -13,13 +13,16 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/hw_random.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 
 #define TRNG_CR		0x00
 #define TRNG_MR		0x04
 #define TRNG_ISR	0x1c
+#define TRNG_ISR_DATRDY	BIT(0)
 #define TRNG_ODATA	0x50
 
 #define TRNG_KEY	0x524e4700 /* RNG */
@@ -34,37 +37,79 @@ struct atmel_trng {
 	struct clk *clk;
 	void __iomem *base;
 	struct hwrng rng;
+	bool has_half_rate;
 };
 
+static bool atmel_trng_wait_ready(struct atmel_trng *trng, bool wait)
+{
+	int ready;
+
+	ready = readl(trng->base + TRNG_ISR) & TRNG_ISR_DATRDY;
+	if (!ready && wait)
+		readl_poll_timeout(trng->base + TRNG_ISR, ready,
+				   ready & TRNG_ISR_DATRDY, 1000, 20000);
+
+	return !!ready;
+}
+
 static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max,
 			   bool wait)
 {
 	struct atmel_trng *trng = container_of(rng, struct atmel_trng, rng);
 	u32 *data = buf;
+	int ret;
 
-	/* data ready? */
-	if (readl(trng->base + TRNG_ISR) & 1) {
-		*data = readl(trng->base + TRNG_ODATA);
-		/*
-		  ensure data ready is only set again AFTER the next data
-		  word is ready in case it got set between checking ISR
-		  and reading ODATA, so we don't risk re-reading the
-		  same word
-		*/
-		readl(trng->base + TRNG_ISR);
-		return 4;
-	} else
-		return 0;
+	ret = pm_runtime_get_sync((struct device *)trng->rng.priv);
+	if (ret < 0) {
+		pm_runtime_put_sync((struct device *)trng->rng.priv);
+		return ret;
+	}
+
+	ret = atmel_trng_wait_ready(trng, wait);
+	if (!ret)
+		goto out;
+
+	*data = readl(trng->base + TRNG_ODATA);
+	/*
+	 * ensure data ready is only set again AFTER the next data word is ready
+	 * in case it got set between checking ISR and reading ODATA, so we
+	 * don't risk re-reading the same word
+	 */
+	readl(trng->base + TRNG_ISR);
+	ret = 4;
+
+out:
+	pm_runtime_mark_last_busy((struct device *)trng->rng.priv);
+	pm_runtime_put_sync_autosuspend((struct device *)trng->rng.priv);
+	return ret;
 }
 
-static void atmel_trng_enable(struct atmel_trng *trng)
+static int atmel_trng_init(struct atmel_trng *trng)
 {
+	unsigned long rate;
+	int ret;
+
+	ret = clk_prepare_enable(trng->clk);
+	if (ret)
+		return ret;
+
+	if (trng->has_half_rate) {
+		rate = clk_get_rate(trng->clk);
+
+		/* if peripheral clk is above 100MHz, set HALFR */
+		if (rate > 100000000)
+			writel(TRNG_HALFR, trng->base + TRNG_MR);
+	}
+
 	writel(TRNG_KEY | 1, trng->base + TRNG_CR);
+
+	return 0;
 }
 
-static void atmel_trng_disable(struct atmel_trng *trng)
+static void atmel_trng_cleanup(struct atmel_trng *trng)
 {
 	writel(TRNG_KEY, trng->base + TRNG_CR);
+	clk_disable_unprepare(trng->clk);
 }
 
 static int atmel_trng_probe(struct platform_device *pdev)
@@ -88,32 +133,31 @@ static int atmel_trng_probe(struct platform_device *pdev)
 	if (!data)
 		return -ENODEV;
 
-	if (data->has_half_rate) {
-		unsigned long rate = clk_get_rate(trng->clk);
-
-		/* if peripheral clk is above 100MHz, set HALFR */
-		if (rate > 100000000)
-			writel(TRNG_HALFR, trng->base + TRNG_MR);
-	}
-
-	ret = clk_prepare_enable(trng->clk);
-	if (ret)
-		return ret;
-
-	atmel_trng_enable(trng);
+	trng->has_half_rate = data->has_half_rate;
 	trng->rng.name = pdev->name;
 	trng->rng.read = atmel_trng_read;
+	trng->rng.priv = (unsigned long)&pdev->dev;
+	platform_set_drvdata(pdev, trng);
 
-	ret = devm_hwrng_register(&pdev->dev, &trng->rng);
+#ifndef CONFIG_PM
+	ret = atmel_trng_init(trng);
 	if (ret)
-		goto err_register;
+		return ret;
+#endif
 
-	platform_set_drvdata(pdev, trng);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, 100);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
 
-	return 0;
+	ret = devm_hwrng_register(&pdev->dev, &trng->rng);
+	if (ret) {
+		pm_runtime_disable(&pdev->dev);
+		pm_runtime_set_suspended(&pdev->dev);
+#ifndef CONFIG_PM
+		atmel_trng_cleanup(trng);
+#endif
+	}
 
-err_register:
-	clk_disable_unprepare(trng->clk);
 	return ret;
 }
 
@@ -121,43 +165,35 @@ static int atmel_trng_remove(struct platform_device *pdev)
 {
 	struct atmel_trng *trng = platform_get_drvdata(pdev);
 
-
-	atmel_trng_disable(trng);
-	clk_disable_unprepare(trng->clk);
+	atmel_trng_cleanup(trng);
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_set_suspended(&pdev->dev);
 
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int atmel_trng_suspend(struct device *dev)
+static int __maybe_unused atmel_trng_runtime_suspend(struct device *dev)
 {
 	struct atmel_trng *trng = dev_get_drvdata(dev);
 
-	atmel_trng_disable(trng);
-	clk_disable_unprepare(trng->clk);
+	atmel_trng_cleanup(trng);
 
 	return 0;
 }
 
-static int atmel_trng_resume(struct device *dev)
+static int __maybe_unused atmel_trng_runtime_resume(struct device *dev)
 {
 	struct atmel_trng *trng = dev_get_drvdata(dev);
-	int ret;
 
-	ret = clk_prepare_enable(trng->clk);
-	if (ret)
-		return ret;
-
-	atmel_trng_enable(trng);
-
-	return 0;
+	return atmel_trng_init(trng);
 }
 
-static const struct dev_pm_ops atmel_trng_pm_ops = {
-	.suspend	= atmel_trng_suspend,
-	.resume		= atmel_trng_resume,
+static const struct dev_pm_ops __maybe_unused atmel_trng_pm_ops = {
+	SET_RUNTIME_PM_OPS(atmel_trng_runtime_suspend,
+			   atmel_trng_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
 };
-#endif /* CONFIG_PM */
 
 static const struct atmel_trng_data at91sam9g45_config = {
 	.has_half_rate = false,
@@ -185,9 +221,7 @@ static struct platform_driver atmel_trng_driver = {
 	.remove		= atmel_trng_remove,
 	.driver		= {
 		.name	= "atmel-trng",
-#ifdef CONFIG_PM
-		.pm	= &atmel_trng_pm_ops,
-#endif /* CONFIG_PM */
+		.pm	= pm_ptr(&atmel_trng_pm_ops),
 		.of_match_table = atmel_trng_dt_ids,
 	},
 };
diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c
index 6f66919652bf..7c55f4cf4a8b 100644
--- a/drivers/char/hw_random/cavium-rng-vf.c
+++ b/drivers/char/hw_random/cavium-rng-vf.c
@@ -179,7 +179,7 @@ static int cavium_map_pf_regs(struct cavium_rng *rng)
 	pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
 			      PCI_DEVID_CAVIUM_RNG_PF, NULL);
 	if (!pdev) {
-		dev_err(&pdev->dev, "Cannot find RNG PF device\n");
+		pr_err("Cannot find RNG PF device\n");
 		return -EIO;
 	}
 
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index a3db27916256..16f227b995e8 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -15,6 +15,7 @@
 #include <linux/err.h>
 #include <linux/fs.h>
 #include <linux/hw_random.h>
+#include <linux/random.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <linux/sched/signal.h>
@@ -31,7 +32,7 @@ static struct hwrng *current_rng;
 /* the current rng has been explicitly chosen by user via sysfs */
 static int cur_rng_set_by_user;
 static struct task_struct *hwrng_fill;
-/* list of registered rngs, sorted decending by quality */
+/* list of registered rngs */
 static LIST_HEAD(rng_list);
 /* Protects rng_list and current_rng */
 static DEFINE_MUTEX(rng_mutex);
@@ -44,14 +45,14 @@ static unsigned short default_quality; /* = 0; default to "off" */
 
 module_param(current_quality, ushort, 0644);
 MODULE_PARM_DESC(current_quality,
-		 "current hwrng entropy estimation per 1024 bits of input");
+		 "current hwrng entropy estimation per 1024 bits of input -- obsolete, use rng_quality instead");
 module_param(default_quality, ushort, 0644);
 MODULE_PARM_DESC(default_quality,
 		 "default entropy content of hwrng per 1024 bits of input");
 
 static void drop_current_rng(void);
 static int hwrng_init(struct hwrng *rng);
-static void start_khwrngd(void);
+static void hwrng_manage_rngd(struct hwrng *rng);
 
 static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size,
 			       int wait);
@@ -64,13 +65,12 @@ static size_t rng_buffer_size(void)
 static void add_early_randomness(struct hwrng *rng)
 {
 	int bytes_read;
-	size_t size = min_t(size_t, 16, rng_buffer_size());
 
 	mutex_lock(&reading_mutex);
-	bytes_read = rng_get_data(rng, rng_buffer, size, 0);
+	bytes_read = rng_get_data(rng, rng_fillbuf, 32, 0);
 	mutex_unlock(&reading_mutex);
 	if (bytes_read > 0)
-		add_device_randomness(rng_buffer, bytes_read);
+		add_device_randomness(rng_fillbuf, bytes_read);
 }
 
 static inline void cleanup_rng(struct kref *kref)
@@ -161,14 +161,13 @@ static int hwrng_init(struct hwrng *rng)
 	reinit_completion(&rng->cleanup_done);
 
 skip_init:
-	current_quality = rng->quality ? : default_quality;
-	if (current_quality > 1024)
-		current_quality = 1024;
+	if (!rng->quality)
+		rng->quality = default_quality;
+	if (rng->quality > 1024)
+		rng->quality = 1024;
+	current_quality = rng->quality; /* obsolete */
 
-	if (current_quality == 0 && hwrng_fill)
-		kthread_stop(hwrng_fill);
-	if (current_quality > 0 && !hwrng_fill)
-		start_khwrngd();
+	hwrng_manage_rngd(rng);
 
 	return 0;
 }
@@ -298,24 +297,28 @@ static struct miscdevice rng_miscdev = {
 
 static int enable_best_rng(void)
 {
+	struct hwrng *rng, *new_rng = NULL;
 	int ret = -ENODEV;
 
 	BUG_ON(!mutex_is_locked(&rng_mutex));
 
-	/* rng_list is sorted by quality, use the best (=first) one */
-	if (!list_empty(&rng_list)) {
-		struct hwrng *new_rng;
-
-		new_rng = list_entry(rng_list.next, struct hwrng, list);
-		ret = ((new_rng == current_rng) ? 0 : set_current_rng(new_rng));
-		if (!ret)
-			cur_rng_set_by_user = 0;
-	} else {
+	/* no rng to use? */
+	if (list_empty(&rng_list)) {
 		drop_current_rng();
 		cur_rng_set_by_user = 0;
-		ret = 0;
+		return 0;
+	}
+
+	/* use the rng which offers the best quality */
+	list_for_each_entry(rng, &rng_list, list) {
+		if (!new_rng || rng->quality > new_rng->quality)
+			new_rng = rng;
 	}
 
+	ret = ((new_rng == current_rng) ? 0 : set_current_rng(new_rng));
+	if (!ret)
+		cur_rng_set_by_user = 0;
+
 	return ret;
 }
 
@@ -336,8 +339,9 @@ static ssize_t rng_current_store(struct device *dev,
 	} else {
 		list_for_each_entry(rng, &rng_list, list) {
 			if (sysfs_streq(rng->name, buf)) {
-				cur_rng_set_by_user = 1;
 				err = set_current_rng(rng);
+				if (!err)
+					cur_rng_set_by_user = 1;
 				break;
 			}
 		}
@@ -399,14 +403,76 @@ static ssize_t rng_selected_show(struct device *dev,
 	return sysfs_emit(buf, "%d\n", cur_rng_set_by_user);
 }
 
+static ssize_t rng_quality_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret;
+	struct hwrng *rng;
+
+	rng = get_current_rng();
+	if (IS_ERR(rng))
+		return PTR_ERR(rng);
+
+	if (!rng) /* no need to put_rng */
+		return -ENODEV;
+
+	ret = sysfs_emit(buf, "%hu\n", rng->quality);
+	put_rng(rng);
+
+	return ret;
+}
+
+static ssize_t rng_quality_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t len)
+{
+	u16 quality;
+	int ret = -EINVAL;
+
+	if (len < 2)
+		return -EINVAL;
+
+	ret = mutex_lock_interruptible(&rng_mutex);
+	if (ret)
+		return -ERESTARTSYS;
+
+	ret = kstrtou16(buf, 0, &quality);
+	if (ret || quality > 1024) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!current_rng) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	current_rng->quality = quality;
+	current_quality = quality; /* obsolete */
+
+	/* the best available RNG may have changed */
+	ret = enable_best_rng();
+
+	/* start/stop rngd if necessary */
+	if (current_rng)
+		hwrng_manage_rngd(current_rng);
+
+out:
+	mutex_unlock(&rng_mutex);
+	return ret ? ret : len;
+}
+
 static DEVICE_ATTR_RW(rng_current);
 static DEVICE_ATTR_RO(rng_available);
 static DEVICE_ATTR_RO(rng_selected);
+static DEVICE_ATTR_RW(rng_quality);
 
 static struct attribute *rng_dev_attrs[] = {
 	&dev_attr_rng_current.attr,
 	&dev_attr_rng_available.attr,
 	&dev_attr_rng_selected.attr,
+	&dev_attr_rng_quality.attr,
 	NULL
 };
 
@@ -424,9 +490,11 @@ static int __init register_miscdev(void)
 
 static int hwrng_fillfn(void *unused)
 {
+	size_t entropy, entropy_credit = 0; /* in 1/1024 of a bit */
 	long rc;
 
 	while (!kthread_should_stop()) {
+		unsigned short quality;
 		struct hwrng *rng;
 
 		rng = get_current_rng();
@@ -435,27 +503,49 @@ static int hwrng_fillfn(void *unused)
 		mutex_lock(&reading_mutex);
 		rc = rng_get_data(rng, rng_fillbuf,
 				  rng_buffer_size(), 1);
+		if (current_quality != rng->quality)
+			rng->quality = current_quality; /* obsolete */
+		quality = rng->quality;
 		mutex_unlock(&reading_mutex);
 		put_rng(rng);
+
+		if (!quality)
+			break;
+
 		if (rc <= 0) {
 			pr_warn("hwrng: no data available\n");
 			msleep_interruptible(10000);
 			continue;
 		}
+
+		/* If we cannot credit at least one bit of entropy,
+		 * keep track of the remainder for the next iteration
+		 */
+		entropy = rc * quality * 8 + entropy_credit;
+		if ((entropy >> 10) == 0)
+			entropy_credit = entropy;
+
 		/* Outside lock, sure, but y'know: randomness. */
 		add_hwgenerator_randomness((void *)rng_fillbuf, rc,
-					   rc * current_quality * 8 >> 10);
+					   entropy >> 10);
 	}
 	hwrng_fill = NULL;
 	return 0;
 }
 
-static void start_khwrngd(void)
+static void hwrng_manage_rngd(struct hwrng *rng)
 {
-	hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng");
-	if (IS_ERR(hwrng_fill)) {
-		pr_err("hwrng_fill thread creation failed\n");
-		hwrng_fill = NULL;
+	if (WARN_ON(!mutex_is_locked(&rng_mutex)))
+		return;
+
+	if (rng->quality == 0 && hwrng_fill)
+		kthread_stop(hwrng_fill);
+	if (rng->quality > 0 && !hwrng_fill) {
+		hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng");
+		if (IS_ERR(hwrng_fill)) {
+			pr_err("hwrng_fill thread creation failed\n");
+			hwrng_fill = NULL;
+		}
 	}
 }
 
@@ -463,7 +553,6 @@ int hwrng_register(struct hwrng *rng)
 {
 	int err = -EINVAL;
 	struct hwrng *tmp;
-	struct list_head *rng_list_ptr;
 	bool is_new_current = false;
 
 	if (!rng->name || (!rng->data_read && !rng->read))
@@ -477,18 +566,11 @@ int hwrng_register(struct hwrng *rng)
 		if (strcmp(tmp->name, rng->name) == 0)
 			goto out_unlock;
 	}
+	list_add_tail(&rng->list, &rng_list);
 
 	init_completion(&rng->cleanup_done);
 	complete(&rng->cleanup_done);
 
-	/* rng_list is sorted by decreasing quality */
-	list_for_each(rng_list_ptr, &rng_list) {
-		tmp = list_entry(rng_list_ptr, struct hwrng, list);
-		if (tmp->quality < rng->quality)
-			break;
-	}
-	list_add_tail(&rng->list, rng_list_ptr);
-
 	if (!current_rng ||
 	    (!cur_rng_set_by_user && rng->quality > current_rng->quality)) {
 		/*
@@ -638,7 +720,7 @@ static void __exit hwrng_modexit(void)
 	unregister_miscdev();
 }
 
-module_init(hwrng_modinit);
+fs_initcall(hwrng_modinit); /* depends on misc_register() */
 module_exit(hwrng_modexit);
 
 MODULE_DESCRIPTION("H/W Random Number Generator (RNG) driver");
diff --git a/drivers/char/hw_random/nomadik-rng.c b/drivers/char/hw_random/nomadik-rng.c
index 67947a19aa22..e8f9621e7954 100644
--- a/drivers/char/hw_random/nomadik-rng.c
+++ b/drivers/char/hw_random/nomadik-rng.c
@@ -65,14 +65,14 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id)
 out_release:
 	amba_release_regions(dev);
 out_clk:
-	clk_disable(rng_clk);
+	clk_disable_unprepare(rng_clk);
 	return ret;
 }
 
 static void nmk_rng_remove(struct amba_device *dev)
 {
 	amba_release_regions(dev);
-	clk_disable(rng_clk);
+	clk_disable_unprepare(rng_clk);
 }
 
 static const struct amba_id nmk_rng_ids[] = {
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index cc296f0823bd..9f586025dbe6 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -707,7 +707,7 @@ static const struct memdev {
 	 [5] = { "zero", 0666, &zero_fops, FMODE_NOWAIT },
 	 [7] = { "full", 0666, &full_fops, 0 },
 	 [8] = { "random", 0666, &random_fops, 0 },
-	 [9] = { "urandom", 0666, &urandom_fops, 0 },
+	 [9] = { "urandom", 0666, &random_fops, 0 },
 #ifdef CONFIG_PRINTK
 	[11] = { "kmsg", 0644, &kmsg_fops, 0 },
 #endif
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 3404a91edf29..0bdefada7453 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1,320 +1,28 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
 /*
- * random.c -- A strong random number generator
- *
  * Copyright (C) 2017-2022 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
- *
  * Copyright Matt Mackall <[email protected]>, 2003, 2004, 2005
- *
- * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999.  All
- * rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, and the entire permission notice in its entirety,
- *    including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- *    products derived from this software without specific prior
- *    written permission.
- *
- * ALTERNATIVELY, this product may be distributed under the terms of
- * the GNU General Public License, in which case the provisions of the GPL are
- * required INSTEAD OF the above restrictions.  (This clause is
- * necessary due to a potential bad interaction between the GPL and
- * the restrictions contained in a BSD-style copyright.)
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- */
-
-/*
- * (now, with legal B.S. out of the way.....)
- *
- * This routine gathers environmental noise from device drivers, etc.,
- * and returns good random numbers, suitable for cryptographic use.
- * Besides the obvious cryptographic uses, these numbers are also good
- * for seeding TCP sequence numbers, and other places where it is
- * desirable to have numbers which are not only random, but hard to
- * predict by an attacker.
- *
- * Theory of operation
- * ===================
- *
- * Computers are very predictable devices.  Hence it is extremely hard
- * to produce truly random numbers on a computer --- as opposed to
- * pseudo-random numbers, which can easily generated by using a
- * algorithm.  Unfortunately, it is very easy for attackers to guess
- * the sequence of pseudo-random number generators, and for some
- * applications this is not acceptable.  So instead, we must try to
- * gather "environmental noise" from the computer's environment, which
- * must be hard for outside attackers to observe, and use that to
- * generate random numbers.  In a Unix environment, this is best done
- * from inside the kernel.
- *
- * Sources of randomness from the environment include inter-keyboard
- * timings, inter-interrupt timings from some interrupts, and other
- * events which are both (a) non-deterministic and (b) hard for an
- * outside observer to measure.  Randomness from these sources are
- * added to an "entropy pool", which is mixed using a CRC-like function.
- * This is not cryptographically strong, but it is adequate assuming
- * the randomness is not chosen maliciously, and it is fast enough that
- * the overhead of doing it on every interrupt is very reasonable.
- * As random bytes are mixed into the entropy pool, the routines keep
- * an *estimate* of how many bits of randomness have been stored into
- * the random number generator's internal state.
- *
- * When random bytes are desired, they are obtained by taking the BLAKE2s
- * hash of the contents of the "entropy pool".  The BLAKE2s hash avoids
- * exposing the internal state of the entropy pool.  It is believed to
- * be computationally infeasible to derive any useful information
- * about the input of BLAKE2s from its output.  Even if it is possible to
- * analyze BLAKE2s in some clever way, as long as the amount of data
- * returned from the generator is less than the inherent entropy in
- * the pool, the output data is totally unpredictable.  For this
- * reason, the routine decreases its internal estimate of how many
- * bits of "true randomness" are contained in the entropy pool as it
- * outputs random numbers.
- *
- * If this estimate goes to zero, the routine can still generate
- * random numbers; however, an attacker may (at least in theory) be
- * able to infer the future output of the generator from prior
- * outputs.  This requires successful cryptanalysis of BLAKE2s, which is
- * not believed to be feasible, but there is a remote possibility.
- * Nonetheless, these numbers should be useful for the vast majority
- * of purposes.
- *
- * Exported interfaces ---- output
- * ===============================
- *
- * There are four exported interfaces; two for use within the kernel,
- * and two for use from userspace.
- *
- * Exported interfaces ---- userspace output
- * -----------------------------------------
- *
- * The userspace interfaces are two character devices /dev/random and
- * /dev/urandom.  /dev/random is suitable for use when very high
- * quality randomness is desired (for example, for key generation or
- * one-time pads), as it will only return a maximum of the number of
- * bits of randomness (as estimated by the random number generator)
- * contained in the entropy pool.
- *
- * The /dev/urandom device does not have this limit, and will return
- * as many bytes as are requested.  As more and more random bytes are
- * requested without giving time for the entropy pool to recharge,
- * this will result in random numbers that are merely cryptographically
- * strong.  For many applications, however, this is acceptable.
- *
- * Exported interfaces ---- kernel output
- * --------------------------------------
- *
- * The primary kernel interface is
- *
- *	void get_random_bytes(void *buf, int nbytes);
- *
- * This interface will return the requested number of random bytes,
- * and place it in the requested buffer.  This is equivalent to a
- * read from /dev/urandom.
- *
- * For less critical applications, there are the functions:
- *
- *	u32 get_random_u32()
- *	u64 get_random_u64()
- *	unsigned int get_random_int()
- *	unsigned long get_random_long()
- *
- * These are produced by a cryptographic RNG seeded from get_random_bytes,
- * and so do not deplete the entropy pool as much.  These are recommended
- * for most in-kernel operations *if the result is going to be stored in
- * the kernel*.
- *
- * Specifically, the get_random_int() family do not attempt to do
- * "anti-backtracking".  If you capture the state of the kernel (e.g.
- * by snapshotting the VM), you can figure out previous get_random_int()
- * return values.  But if the value is stored in the kernel anyway,
- * this is not a problem.
- *
- * It *is* safe to expose get_random_int() output to attackers (e.g. as
- * network cookies); given outputs 1..n, it's not feasible to predict
- * outputs 0 or n+1.  The only concern is an attacker who breaks into
- * the kernel later; the get_random_int() engine is not reseeded as
- * often as the get_random_bytes() one.
- *
- * get_random_bytes() is needed for keys that need to stay secret after
- * they are erased from the kernel.  For example, any key that will
- * be wrapped and stored encrypted.  And session encryption keys: we'd
- * like to know that after the session is closed and the keys erased,
- * the plaintext is unrecoverable to someone who recorded the ciphertext.
- *
- * But for network ports/cookies, stack canaries, PRNG seeds, address
- * space layout randomization, session *authentication* keys, or other
- * applications where the sensitive data is stored in the kernel in
- * plaintext for as long as it's sensitive, the get_random_int() family
- * is just fine.
- *
- * Consider ASLR.  We want to keep the address space secret from an
- * outside attacker while the process is running, but once the address
- * space is torn down, it's of no use to an attacker any more.  And it's
- * stored in kernel data structures as long as it's alive, so worrying
- * about an attacker's ability to extrapolate it from the get_random_int()
- * CRNG is silly.
- *
- * Even some cryptographic keys are safe to generate with get_random_int().
- * In particular, keys for SipHash are generally fine.  Here, knowledge
- * of the key authorizes you to do something to a kernel object (inject
- * packets to a network connection, or flood a hash table), and the
- * key is stored with the object being protected.  Once it goes away,
- * we no longer care if anyone knows the key.
- *
- * prandom_u32()
- * -------------
- *
- * For even weaker applications, see the pseudorandom generator
- * prandom_u32(), prandom_max(), and prandom_bytes().  If the random
- * numbers aren't security-critical at all, these are *far* cheaper.
- * Useful for self-tests, random error simulation, randomized backoffs,
- * and any other application where you trust that nobody is trying to
- * maliciously mess with you by guessing the "random" numbers.
- *
- * Exported interfaces ---- input
- * ==============================
- *
- * The current exported interfaces for gathering environmental noise
- * from the devices are:
- *
- *	void add_device_randomness(const void *buf, unsigned int size);
- *	void add_input_randomness(unsigned int type, unsigned int code,
- *                                unsigned int value);
- *	void add_interrupt_randomness(int irq);
- *	void add_disk_randomness(struct gendisk *disk);
- *	void add_hwgenerator_randomness(const char *buffer, size_t count,
- *					size_t entropy);
- *	void add_bootloader_randomness(const void *buf, unsigned int size);
- *
- * add_device_randomness() is for adding data to the random pool that
- * is likely to differ between two devices (or possibly even per boot).
- * This would be things like MAC addresses or serial numbers, or the
- * read-out of the RTC. This does *not* add any actual entropy to the
- * pool, but it initializes the pool to different values for devices
- * that might otherwise be identical and have very little entropy
- * available to them (particularly common in the embedded world).
- *
- * add_input_randomness() uses the input layer interrupt timing, as well as
- * the event type information from the hardware.
- *
- * add_interrupt_randomness() uses the interrupt timing as random
- * inputs to the entropy pool. Using the cycle counters and the irq source
- * as inputs, it feeds the randomness roughly once a second.
- *
- * add_disk_randomness() uses what amounts to the seek time of block
- * layer request events, on a per-disk_devt basis, as input to the
- * entropy pool. Note that high-speed solid state drives with very low
- * seek times do not make for good sources of entropy, as their seek
- * times are usually fairly consistent.
- *
- * All of these routines try to estimate how many bits of randomness a
- * particular randomness source.  They do this by keeping track of the
- * first and second order deltas of the event timings.
- *
- * add_hwgenerator_randomness() is for true hardware RNGs, and will credit
- * entropy as specified by the caller. If the entropy pool is full it will
- * block until more entropy is needed.
- *
- * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or
- * add_device_randomness(), depending on whether or not the configuration
- * option CONFIG_RANDOM_TRUST_BOOTLOADER is set.
- *
- * Ensuring unpredictability at system startup
- * ============================================
- *
- * When any operating system starts up, it will go through a sequence
- * of actions that are fairly predictable by an adversary, especially
- * if the start-up does not involve interaction with a human operator.
- * This reduces the actual number of bits of unpredictability in the
- * entropy pool below the value in entropy_count.  In order to
- * counteract this effect, it helps to carry information in the
- * entropy pool across shut-downs and start-ups.  To do this, put the
- * following lines an appropriate script which is run during the boot
- * sequence:
- *
- *	echo "Initializing random number generator..."
- *	random_seed=/var/run/random-seed
- *	# Carry a random seed from start-up to start-up
- *	# Load and then save the whole entropy pool
- *	if [ -f $random_seed ]; then
- *		cat $random_seed >/dev/urandom
- *	else
- *		touch $random_seed
- *	fi
- *	chmod 600 $random_seed
- *	dd if=/dev/urandom of=$random_seed count=1 bs=512
- *
- * and the following lines in an appropriate script which is run as
- * the system is shutdown:
- *
- *	# Carry a random seed from shut-down to start-up
- *	# Save the whole entropy pool
- *	echo "Saving random seed..."
- *	random_seed=/var/run/random-seed
- *	touch $random_seed
- *	chmod 600 $random_seed
- *	dd if=/dev/urandom of=$random_seed count=1 bs=512
- *
- * For example, on most modern systems using the System V init
- * scripts, such code fragments would be found in
- * /etc/rc.d/init.d/random.  On older Linux systems, the correct script
- * location might be in /etc/rcb.d/rc.local or /etc/rc.d/rc.0.
- *
- * Effectively, these commands cause the contents of the entropy pool
- * to be saved at shut-down time and reloaded into the entropy pool at
- * start-up.  (The 'dd' in the addition to the bootup script is to
- * make sure that /etc/random-seed is different for every start-up,
- * even if the system crashes without executing rc.0.)  Even with
- * complete knowledge of the start-up activities, predicting the state
- * of the entropy pool requires knowledge of the previous history of
- * the system.
- *
- * Configuring the /dev/random driver under Linux
- * ==============================================
- *
- * The /dev/random driver under Linux uses minor numbers 8 and 9 of
- * the /dev/mem major number (#1).  So if your system does not have
- * /dev/random and /dev/urandom created already, they can be created
- * by using the commands:
- *
- *	mknod /dev/random c 1 8
- *	mknod /dev/urandom c 1 9
- *
- * Acknowledgements:
- * =================
- *
- * Ideas for constructing this random number generator were derived
- * from Pretty Good Privacy's random number generator, and from private
- * discussions with Phil Karn.  Colin Plumb provided a faster random
- * number generator, which speed up the mixing function of the entropy
- * pool, taken from PGPfone.  Dale Worley has also contributed many
- * useful ideas and suggestions to improve this driver.
- *
- * Any flaws in the design are solely my responsibility, and should
- * not be attributed to the Phil, Colin, or any of authors of PGP.
- *
- * Further background information on this topic may be obtained from
- * RFC 1750, "Randomness Recommendations for Security", by Donald
- * Eastlake, Steve Crocker, and Jeff Schiller.
+ * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All rights reserved.
+ *
+ * This driver produces cryptographically secure pseudorandom data. It is divided
+ * into roughly six sections, each with a section header:
+ *
+ *   - Initialization and readiness waiting.
+ *   - Fast key erasure RNG, the "crng".
+ *   - Entropy accumulation and extraction routines.
+ *   - Entropy collection routines.
+ *   - Userspace reader/writer interfaces.
+ *   - Sysctl interface.
+ *
+ * The high level overview is that there is one input pool, into which
+ * various pieces of data are hashed. Some of that data is then "credited" as
+ * having a certain number of bits of entropy. When enough bits of entropy are
+ * available, the hash is finalized and handed as a key to a stream cipher that
+ * expands it indefinitely for various consumers. This key is periodically
+ * refreshed as the various entropy collectors, described below, add data to the
+ * input pool and credit it. There is currently no Fortuna-like scheduler
+ * involved, which can lead to malicious entropy sources causing a premature
+ * reseed, and the entropy estimates are, at best, conservative guesses.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -344,744 +52,937 @@
 #include <linux/syscalls.h>
 #include <linux/completion.h>
 #include <linux/uuid.h>
+#include <linux/uaccess.h>
 #include <crypto/chacha.h>
 #include <crypto/blake2s.h>
-
 #include <asm/processor.h>
-#include <linux/uaccess.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
 #include <asm/io.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/random.h>
-
-/* #define ADD_INTERRUPT_BENCH */
-
-/*
- * If the entropy count falls under this number of bits, then we
- * should wake up processes which are selecting or polling on write
- * access to /dev/random.
- */
-static int random_write_wakeup_bits = 28 * (1 << 5);
-
-/*
- * Originally, we used a primitive polynomial of degree .poolwords
- * over GF(2).  The taps for various sizes are defined below.  They
- * were chosen to be evenly spaced except for the last tap, which is 1
- * to get the twisting happening as fast as possible.
- *
- * For the purposes of better mixing, we use the CRC-32 polynomial as
- * well to make a (modified) twisted Generalized Feedback Shift
- * Register.  (See M. Matsumoto & Y. Kurita, 1992.  Twisted GFSR
- * generators.  ACM Transactions on Modeling and Computer Simulation
- * 2(3):179-194.  Also see M. Matsumoto & Y. Kurita, 1994.  Twisted
- * GFSR generators II.  ACM Transactions on Modeling and Computer
- * Simulation 4:254-266)
+/*********************************************************************
  *
- * Thanks to Colin Plumb for suggesting this.
+ * Initialization and readiness waiting.
  *
- * The mixing operation is much less sensitive than the output hash,
- * where we use BLAKE2s.  All that we want of mixing operation is that
- * it be a good non-cryptographic hash; i.e. it not produce collisions
- * when fed "random" data of the sort we expect to see.  As long as
- * the pool state differs for different inputs, we have preserved the
- * input entropy and done a good job.  The fact that an intelligent
- * attacker can construct inputs that will produce controlled
- * alterations to the pool's state is not important because we don't
- * consider such inputs to contribute any randomness.  The only
- * property we need with respect to them is that the attacker can't
- * increase his/her knowledge of the pool's state.  Since all
- * additions are reversible (knowing the final state and the input,
- * you can reconstruct the initial state), if an attacker has any
- * uncertainty about the initial state, he/she can only shuffle that
- * uncertainty about, but never cause any collisions (which would
- * decrease the uncertainty).
+ * Much of the RNG infrastructure is devoted to various dependencies
+ * being able to wait until the RNG has collected enough entropy and
+ * is ready for safe consumption.
  *
- * Our mixing functions were analyzed by Lacharme, Roeck, Strubel, and
- * Videau in their paper, "The Linux Pseudorandom Number Generator
- * Revisited" (see: http://eprint.iacr.org/2012/251.pdf).  In their
- * paper, they point out that we are not using a true Twisted GFSR,
- * since Matsumoto & Kurita used a trinomial feedback polynomial (that
- * is, with only three taps, instead of the six that we are using).
- * As a result, the resulting polynomial is neither primitive nor
- * irreducible, and hence does not have a maximal period over
- * GF(2**32).  They suggest a slight change to the generator
- * polynomial which improves the resulting TGFSR polynomial to be
- * irreducible, which we have made here.
- */
-enum poolinfo {
-	POOL_WORDS = 128,
-	POOL_WORDMASK = POOL_WORDS - 1,
-	POOL_BYTES = POOL_WORDS * sizeof(u32),
-	POOL_BITS = POOL_BYTES * 8,
-	POOL_BITSHIFT = ilog2(POOL_BITS),
-
-	/* To allow fractional bits to be tracked, the entropy_count field is
-	 * denominated in units of 1/8th bits. */
-	POOL_ENTROPY_SHIFT = 3,
-#define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT)
-	POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT,
-
-	/* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */
-	POOL_TAP1 = 104,
-	POOL_TAP2 = 76,
-	POOL_TAP3 = 51,
-	POOL_TAP4 = 25,
-	POOL_TAP5 = 1,
-
-	EXTRACT_SIZE = BLAKE2S_HASH_SIZE / 2
-};
-
-/*
- * Static global variables
- */
-static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
-static struct fasync_struct *fasync;
-
-static DEFINE_SPINLOCK(random_ready_list_lock);
-static LIST_HEAD(random_ready_list);
-
-struct crng_state {
-	u32 state[16];
-	unsigned long init_time;
-	spinlock_t lock;
-};
-
-static struct crng_state primary_crng = {
-	.lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock),
-	.state[0] = CHACHA_CONSTANT_EXPA,
-	.state[1] = CHACHA_CONSTANT_ND_3,
-	.state[2] = CHACHA_CONSTANT_2_BY,
-	.state[3] = CHACHA_CONSTANT_TE_K,
-};
+ *********************************************************************/
 
 /*
  * crng_init =  0 --> Uninitialized
  *		1 --> Initialized
  *		2 --> Initialized from input_pool
  *
- * crng_init is protected by primary_crng->lock, and only increases
+ * crng_init is protected by base_crng->lock, and only increases
  * its value (from 0->1->2).
  */
 static int crng_init = 0;
-static bool crng_need_final_init = false;
 #define crng_ready() (likely(crng_init > 1))
-static int crng_init_cnt = 0;
-static unsigned long crng_global_init_time = 0;
-#define CRNG_INIT_CNT_THRESH (2 * CHACHA_KEY_SIZE)
-static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]);
-static void _crng_backtrack_protect(struct crng_state *crng,
-				    u8 tmp[CHACHA_BLOCK_SIZE], int used);
-static void process_random_ready_list(void);
-static void _get_random_bytes(void *buf, int nbytes);
+/* Various types of waiters for crng_init->2 transition. */
+static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
+static struct fasync_struct *fasync;
+static DEFINE_SPINLOCK(random_ready_chain_lock);
+static RAW_NOTIFIER_HEAD(random_ready_chain);
 
+/* Control how we warn userspace. */
 static struct ratelimit_state unseeded_warning =
 	RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3);
-static struct ratelimit_state urandom_warning =
-	RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3);
-
 static int ratelimit_disable __read_mostly;
-
 module_param_named(ratelimit_disable, ratelimit_disable, int, 0644);
 MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression");
 
-/**********************************************************************
- *
- * OS independent entropy store.   Here are the functions which handle
- * storing entropy in an entropy pool.
+/*
+ * Returns whether or not the input pool has been seeded and thus guaranteed
+ * to supply cryptographically secure random numbers. This applies to
+ * get_random_bytes() and get_random_{u32,u64,int,long}().
  *
- **********************************************************************/
-
-static u32 input_pool_data[POOL_WORDS] __latent_entropy;
-
-static struct {
-	spinlock_t lock;
-	u16 add_ptr;
-	u16 input_rotate;
-	int entropy_count;
-} input_pool = {
-	.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
-};
+ * Returns: true if the input pool has been seeded.
+ *          false if the input pool has not been seeded.
+ */
+bool rng_is_initialized(void)
+{
+	return crng_ready();
+}
+EXPORT_SYMBOL(rng_is_initialized);
 
-static ssize_t extract_entropy(void *buf, size_t nbytes, int min);
-static ssize_t _extract_entropy(void *buf, size_t nbytes);
+/* Used by wait_for_random_bytes(), and considered an entropy collector, below. */
+static void try_to_generate_entropy(void);
 
-static void crng_reseed(struct crng_state *crng, bool use_input_pool);
+/*
+ * Wait for the input pool to be seeded and thus guaranteed to supply
+ * cryptographically secure random numbers. This applies to
+ * get_random_bytes() and get_random_{u32,u64,int,long}(). Using any
+ * of these functions without first calling this function means that
+ * the returned numbers might not be cryptographically secure.
+ *
+ * Returns: 0 if the input pool has been seeded.
+ *          -ERESTARTSYS if the function was interrupted by a signal.
+ */
+int wait_for_random_bytes(void)
+{
+	while (!crng_ready()) {
+		int ret;
 
-static const u32 twist_table[8] = {
-	0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
-	0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
+		try_to_generate_entropy();
+		ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ);
+		if (ret)
+			return ret > 0 ? 0 : ret;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(wait_for_random_bytes);
 
 /*
- * This function adds bytes into the entropy "pool".  It does not
- * update the entropy estimate.  The caller should call
- * credit_entropy_bits if this is appropriate.
+ * Add a callback function that will be invoked when the input
+ * pool is initialised.
  *
- * The pool is stirred with a primitive polynomial of the appropriate
- * degree, and then twisted.  We twist by three bits at a time because
- * it's cheap to do so and helps slightly in the expected case where
- * the entropy is concentrated in the low-order bits.
+ * returns: 0 if callback is successfully added
+ *	    -EALREADY if pool is already initialised (callback not called)
  */
-static void _mix_pool_bytes(const void *in, int nbytes)
+int register_random_ready_notifier(struct notifier_block *nb)
 {
-	unsigned long i;
-	int input_rotate;
-	const u8 *bytes = in;
-	u32 w;
-
-	input_rotate = input_pool.input_rotate;
-	i = input_pool.add_ptr;
-
-	/* mix one byte at a time to simplify size handling and churn faster */
-	while (nbytes--) {
-		w = rol32(*bytes++, input_rotate);
-		i = (i - 1) & POOL_WORDMASK;
-
-		/* XOR in the various taps */
-		w ^= input_pool_data[i];
-		w ^= input_pool_data[(i + POOL_TAP1) & POOL_WORDMASK];
-		w ^= input_pool_data[(i + POOL_TAP2) & POOL_WORDMASK];
-		w ^= input_pool_data[(i + POOL_TAP3) & POOL_WORDMASK];
-		w ^= input_pool_data[(i + POOL_TAP4) & POOL_WORDMASK];
-		w ^= input_pool_data[(i + POOL_TAP5) & POOL_WORDMASK];
-
-		/* Mix the result back in with a twist */
-		input_pool_data[i] = (w >> 3) ^ twist_table[w & 7];
+	unsigned long flags;
+	int ret = -EALREADY;
 
-		/*
-		 * Normally, we add 7 bits of rotation to the pool.
-		 * At the beginning of the pool, add an extra 7 bits
-		 * rotation, so that successive passes spread the
-		 * input bits across the pool evenly.
-		 */
-		input_rotate = (input_rotate + (i ? 7 : 14)) & 31;
-	}
+	if (crng_ready())
+		return ret;
 
-	input_pool.input_rotate = input_rotate;
-	input_pool.add_ptr = i;
+	spin_lock_irqsave(&random_ready_chain_lock, flags);
+	if (!crng_ready())
+		ret = raw_notifier_chain_register(&random_ready_chain, nb);
+	spin_unlock_irqrestore(&random_ready_chain_lock, flags);
+	return ret;
 }
 
-static void __mix_pool_bytes(const void *in, int nbytes)
+/*
+ * Delete a previously registered readiness callback function.
+ */
+int unregister_random_ready_notifier(struct notifier_block *nb)
 {
-	trace_mix_pool_bytes_nolock(nbytes, _RET_IP_);
-	_mix_pool_bytes(in, nbytes);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&random_ready_chain_lock, flags);
+	ret = raw_notifier_chain_unregister(&random_ready_chain, nb);
+	spin_unlock_irqrestore(&random_ready_chain_lock, flags);
+	return ret;
 }
 
-static void mix_pool_bytes(const void *in, int nbytes)
+static void process_random_ready_list(void)
 {
 	unsigned long flags;
 
-	trace_mix_pool_bytes(nbytes, _RET_IP_);
-	spin_lock_irqsave(&input_pool.lock, flags);
-	_mix_pool_bytes(in, nbytes);
-	spin_unlock_irqrestore(&input_pool.lock, flags);
+	spin_lock_irqsave(&random_ready_chain_lock, flags);
+	raw_notifier_call_chain(&random_ready_chain, 0, NULL);
+	spin_unlock_irqrestore(&random_ready_chain_lock, flags);
 }
 
-struct fast_pool {
-	u32 pool[4];
-	unsigned long last;
-	u16 reg_idx;
-	u8 count;
-};
+#define warn_unseeded_randomness(previous) \
+	_warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous))
 
-/*
- * This is a fast mixing routine used by the interrupt randomness
- * collector.  It's hardcoded for an 128 bit pool and assumes that any
- * locks that might be needed are taken by the caller.
- */
-static void fast_mix(struct fast_pool *f)
+static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous)
 {
-	u32 a = f->pool[0],	b = f->pool[1];
-	u32 c = f->pool[2],	d = f->pool[3];
+#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM
+	const bool print_once = false;
+#else
+	static bool print_once __read_mostly;
+#endif
+
+	if (print_once || crng_ready() ||
+	    (previous && (caller == READ_ONCE(*previous))))
+		return;
+	WRITE_ONCE(*previous, caller);
+#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM
+	print_once = true;
+#endif
+	if (__ratelimit(&unseeded_warning))
+		printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n",
+				func_name, caller, crng_init);
+}
+
 
-	a += b;			c += d;
-	b = rol32(b, 6);	d = rol32(d, 27);
-	d ^= a;			b ^= c;
+/*********************************************************************
+ *
+ * Fast key erasure RNG, the "crng".
+ *
+ * These functions expand entropy from the entropy extractor into
+ * long streams for external consumption using the "fast key erasure"
+ * RNG described at <https://blog.cr.yp.to/20170723-random.html>.
+ *
+ * There are a few exported interfaces for use by other drivers:
+ *
+ *	void get_random_bytes(void *buf, size_t nbytes)
+ *	u32 get_random_u32()
+ *	u64 get_random_u64()
+ *	unsigned int get_random_int()
+ *	unsigned long get_random_long()
+ *
+ * These interfaces will return the requested number of random bytes
+ * into the given buffer or as a return value. The returned numbers are
+ * the same as those of getrandom(0). The integer family of functions may
+ * be higher performance for one-off random integers, because they do a
+ * bit of buffering and do not invoke reseeding.
+ *
+ *********************************************************************/
 
-	a += b;			c += d;
-	b = rol32(b, 16);	d = rol32(d, 14);
-	d ^= a;			b ^= c;
+enum {
+	CRNG_RESEED_INTERVAL = 300 * HZ,
+	CRNG_INIT_CNT_THRESH = 2 * CHACHA_KEY_SIZE
+};
 
-	a += b;			c += d;
-	b = rol32(b, 6);	d = rol32(d, 27);
-	d ^= a;			b ^= c;
+static struct {
+	u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long));
+	unsigned long birth;
+	unsigned long generation;
+	spinlock_t lock;
+} base_crng = {
+	.lock = __SPIN_LOCK_UNLOCKED(base_crng.lock)
+};
 
-	a += b;			c += d;
-	b = rol32(b, 16);	d = rol32(d, 14);
-	d ^= a;			b ^= c;
+struct crng {
+	u8 key[CHACHA_KEY_SIZE];
+	unsigned long generation;
+	local_lock_t lock;
+};
 
-	f->pool[0] = a;  f->pool[1] = b;
-	f->pool[2] = c;  f->pool[3] = d;
-	f->count++;
-}
+static DEFINE_PER_CPU(struct crng, crngs) = {
+	.generation = ULONG_MAX,
+	.lock = INIT_LOCAL_LOCK(crngs.lock),
+};
 
-static void process_random_ready_list(void)
+/* Used by crng_reseed() to extract a new seed from the input pool. */
+static bool drain_entropy(void *buf, size_t nbytes, bool force);
+
+/*
+ * This extracts a new crng key from the input pool, but only if there is a
+ * sufficient amount of entropy available or force is true, in order to
+ * mitigate bruteforcing of newly added bits.
+ */
+static void crng_reseed(bool force)
 {
 	unsigned long flags;
-	struct random_ready_callback *rdy, *tmp;
+	unsigned long next_gen;
+	u8 key[CHACHA_KEY_SIZE];
+	bool finalize_init = false;
 
-	spin_lock_irqsave(&random_ready_list_lock, flags);
-	list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) {
-		struct module *owner = rdy->owner;
+	/* Only reseed if we can, to prevent brute forcing a small amount of new bits. */
+	if (!drain_entropy(key, sizeof(key), force))
+		return;
 
-		list_del_init(&rdy->list);
-		rdy->func(rdy);
-		module_put(owner);
+	/*
+	 * We copy the new key into the base_crng, overwriting the old one,
+	 * and update the generation counter. We avoid hitting ULONG_MAX,
+	 * because the per-cpu crngs are initialized to ULONG_MAX, so this
+	 * forces new CPUs that come online to always initialize.
+	 */
+	spin_lock_irqsave(&base_crng.lock, flags);
+	memcpy(base_crng.key, key, sizeof(base_crng.key));
+	next_gen = base_crng.generation + 1;
+	if (next_gen == ULONG_MAX)
+		++next_gen;
+	WRITE_ONCE(base_crng.generation, next_gen);
+	WRITE_ONCE(base_crng.birth, jiffies);
+	if (!crng_ready()) {
+		crng_init = 2;
+		finalize_init = true;
+	}
+	spin_unlock_irqrestore(&base_crng.lock, flags);
+	memzero_explicit(key, sizeof(key));
+	if (finalize_init) {
+		process_random_ready_list();
+		wake_up_interruptible(&crng_init_wait);
+		kill_fasync(&fasync, SIGIO, POLL_IN);
+		pr_notice("crng init done\n");
+		if (unseeded_warning.missed) {
+			pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n",
+				  unseeded_warning.missed);
+			unseeded_warning.missed = 0;
+		}
 	}
-	spin_unlock_irqrestore(&random_ready_list_lock, flags);
 }
 
 /*
- * Credit (or debit) the entropy store with n bits of entropy.
- * Use credit_entropy_bits_safe() if the value comes from userspace
- * or otherwise should be checked for extreme values.
+ * This generates a ChaCha block using the provided key, and then
+ * immediately overwites that key with half the block. It returns
+ * the resultant ChaCha state to the user, along with the second
+ * half of the block containing 32 bytes of random data that may
+ * be used; random_data_len may not be greater than 32.
  */
-static void credit_entropy_bits(int nbits)
+static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
+				  u32 chacha_state[CHACHA_STATE_WORDS],
+				  u8 *random_data, size_t random_data_len)
 {
-	int entropy_count, entropy_bits, orig;
-	int nfrac = nbits << POOL_ENTROPY_SHIFT;
+	u8 first_block[CHACHA_BLOCK_SIZE];
 
-	/* Ensure that the multiplication can avoid being 64 bits wide. */
-	BUILD_BUG_ON(2 * (POOL_ENTROPY_SHIFT + POOL_BITSHIFT) > 31);
+	BUG_ON(random_data_len > 32);
 
-	if (!nbits)
-		return;
+	chacha_init_consts(chacha_state);
+	memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE);
+	memset(&chacha_state[12], 0, sizeof(u32) * 4);
+	chacha20_block(chacha_state, first_block);
 
-retry:
-	entropy_count = orig = READ_ONCE(input_pool.entropy_count);
-	if (nfrac < 0) {
-		/* Debit */
-		entropy_count += nfrac;
-	} else {
-		/*
-		 * Credit: we have to account for the possibility of
-		 * overwriting already present entropy.	 Even in the
-		 * ideal case of pure Shannon entropy, new contributions
-		 * approach the full value asymptotically:
-		 *
-		 * entropy <- entropy + (pool_size - entropy) *
-		 *	(1 - exp(-add_entropy/pool_size))
-		 *
-		 * For add_entropy <= pool_size/2 then
-		 * (1 - exp(-add_entropy/pool_size)) >=
-		 *    (add_entropy/pool_size)*0.7869...
-		 * so we can approximate the exponential with
-		 * 3/4*add_entropy/pool_size and still be on the
-		 * safe side by adding at most pool_size/2 at a time.
-		 *
-		 * The use of pool_size-2 in the while statement is to
-		 * prevent rounding artifacts from making the loop
-		 * arbitrarily long; this limits the loop to log2(pool_size)*2
-		 * turns no matter how large nbits is.
-		 */
-		int pnfrac = nfrac;
-		const int s = POOL_BITSHIFT + POOL_ENTROPY_SHIFT + 2;
-		/* The +2 corresponds to the /4 in the denominator */
-
-		do {
-			unsigned int anfrac = min(pnfrac, POOL_FRACBITS / 2);
-			unsigned int add =
-				((POOL_FRACBITS - entropy_count) * anfrac * 3) >> s;
-
-			entropy_count += add;
-			pnfrac -= anfrac;
-		} while (unlikely(entropy_count < POOL_FRACBITS - 2 && pnfrac));
-	}
-
-	if (WARN_ON(entropy_count < 0)) {
-		pr_warn("negative entropy/overflow: count %d\n", entropy_count);
-		entropy_count = 0;
-	} else if (entropy_count > POOL_FRACBITS)
-		entropy_count = POOL_FRACBITS;
-	if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig)
-		goto retry;
-
-	trace_credit_entropy_bits(nbits, entropy_count >> POOL_ENTROPY_SHIFT, _RET_IP_);
+	memcpy(key, first_block, CHACHA_KEY_SIZE);
+	memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len);
+	memzero_explicit(first_block, sizeof(first_block));
+}
 
-	entropy_bits = entropy_count >> POOL_ENTROPY_SHIFT;
-	if (crng_init < 2 && entropy_bits >= 128)
-		crng_reseed(&primary_crng, true);
+/*
+ * Return whether the crng seed is considered to be sufficiently
+ * old that a reseeding might be attempted. This happens if the last
+ * reseeding was CRNG_RESEED_INTERVAL ago, or during early boot, at
+ * an interval proportional to the uptime.
+ */
+static bool crng_has_old_seed(void)
+{
+	static bool early_boot = true;
+	unsigned long interval = CRNG_RESEED_INTERVAL;
+
+	if (unlikely(READ_ONCE(early_boot))) {
+		time64_t uptime = ktime_get_seconds();
+		if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2)
+			WRITE_ONCE(early_boot, false);
+		else
+			interval = max_t(unsigned int, 5 * HZ,
+					 (unsigned int)uptime / 2 * HZ);
+	}
+	return time_after(jiffies, READ_ONCE(base_crng.birth) + interval);
 }
 
-static int credit_entropy_bits_safe(int nbits)
+/*
+ * This function returns a ChaCha state that you may use for generating
+ * random data. It also returns up to 32 bytes on its own of random data
+ * that may be used; random_data_len may not be greater than 32.
+ */
+static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
+			    u8 *random_data, size_t random_data_len)
 {
-	if (nbits < 0)
-		return -EINVAL;
+	unsigned long flags;
+	struct crng *crng;
 
-	/* Cap the value to avoid overflows */
-	nbits = min(nbits, POOL_BITS);
+	BUG_ON(random_data_len > 32);
 
-	credit_entropy_bits(nbits);
-	return 0;
-}
+	/*
+	 * For the fast path, we check whether we're ready, unlocked first, and
+	 * then re-check once locked later. In the case where we're really not
+	 * ready, we do fast key erasure with the base_crng directly, because
+	 * this is what crng_pre_init_inject() mutates during early init.
+	 */
+	if (!crng_ready()) {
+		bool ready;
+
+		spin_lock_irqsave(&base_crng.lock, flags);
+		ready = crng_ready();
+		if (!ready)
+			crng_fast_key_erasure(base_crng.key, chacha_state,
+					      random_data, random_data_len);
+		spin_unlock_irqrestore(&base_crng.lock, flags);
+		if (!ready)
+			return;
+	}
 
-/*********************************************************************
- *
- * CRNG using CHACHA20
- *
- *********************************************************************/
+	/*
+	 * If the base_crng is old enough, we try to reseed, which in turn
+	 * bumps the generation counter that we check below.
+	 */
+	if (unlikely(crng_has_old_seed()))
+		crng_reseed(false);
 
-#define CRNG_RESEED_INTERVAL (300 * HZ)
+	local_lock_irqsave(&crngs.lock, flags);
+	crng = raw_cpu_ptr(&crngs);
 
-static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
+	/*
+	 * If our per-cpu crng is older than the base_crng, then it means
+	 * somebody reseeded the base_crng. In that case, we do fast key
+	 * erasure on the base_crng, and use its output as the new key
+	 * for our per-cpu crng. This brings us up to date with base_crng.
+	 */
+	if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) {
+		spin_lock(&base_crng.lock);
+		crng_fast_key_erasure(base_crng.key, chacha_state,
+				      crng->key, sizeof(crng->key));
+		crng->generation = base_crng.generation;
+		spin_unlock(&base_crng.lock);
+	}
+
+	/*
+	 * Finally, when we've made it this far, our per-cpu crng has an up
+	 * to date key, and we can do fast key erasure with it to produce
+	 * some random data and a ChaCha state for the caller. All other
+	 * branches of this function are "unlikely", so most of the time we
+	 * should wind up here immediately.
+	 */
+	crng_fast_key_erasure(crng->key, chacha_state, random_data, random_data_len);
+	local_unlock_irqrestore(&crngs.lock, flags);
+}
 
 /*
- * Hack to deal with crazy userspace progams when they are all trying
- * to access /dev/urandom in parallel.  The programs are almost
- * certainly doing something terribly wrong, but we'll work around
- * their brain damage.
+ * This function is for crng_init == 0 only. It loads entropy directly
+ * into the crng's key, without going through the input pool. It is,
+ * generally speaking, not very safe, but we use this only at early
+ * boot time when it's better to have something there rather than
+ * nothing.
+ *
+ * If account is set, then the crng_init_cnt counter is incremented.
+ * This shouldn't be set by functions like add_device_randomness(),
+ * where we can't trust the buffer passed to it is guaranteed to be
+ * unpredictable (so it might not have any entropy at all).
+ *
+ * Returns the number of bytes processed from input, which is bounded
+ * by CRNG_INIT_CNT_THRESH if account is true.
  */
-static struct crng_state **crng_node_pool __read_mostly;
+static size_t crng_pre_init_inject(const void *input, size_t len, bool account)
+{
+	static int crng_init_cnt = 0;
+	struct blake2s_state hash;
+	unsigned long flags;
 
-static void invalidate_batched_entropy(void);
-static void numa_crng_init(void);
+	blake2s_init(&hash, sizeof(base_crng.key));
 
-static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
-static int __init parse_trust_cpu(char *arg)
-{
-	return kstrtobool(arg, &trust_cpu);
-}
-early_param("random.trust_cpu", parse_trust_cpu);
+	spin_lock_irqsave(&base_crng.lock, flags);
+	if (crng_init != 0) {
+		spin_unlock_irqrestore(&base_crng.lock, flags);
+		return 0;
+	}
 
-static bool crng_init_try_arch(struct crng_state *crng)
-{
-	int i;
-	bool arch_init = true;
-	unsigned long rv;
+	if (account)
+		len = min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt);
 
-	for (i = 4; i < 16; i++) {
-		if (!arch_get_random_seed_long(&rv) &&
-		    !arch_get_random_long(&rv)) {
-			rv = random_get_entropy();
-			arch_init = false;
+	blake2s_update(&hash, base_crng.key, sizeof(base_crng.key));
+	blake2s_update(&hash, input, len);
+	blake2s_final(&hash, base_crng.key);
+
+	if (account) {
+		crng_init_cnt += len;
+		if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
+			++base_crng.generation;
+			crng_init = 1;
 		}
-		crng->state[i] ^= rv;
 	}
 
-	return arch_init;
+	spin_unlock_irqrestore(&base_crng.lock, flags);
+
+	if (crng_init == 1)
+		pr_notice("fast init done\n");
+
+	return len;
 }
 
-static bool __init crng_init_try_arch_early(void)
+static void _get_random_bytes(void *buf, size_t nbytes)
 {
-	int i;
-	bool arch_init = true;
-	unsigned long rv;
+	u32 chacha_state[CHACHA_STATE_WORDS];
+	u8 tmp[CHACHA_BLOCK_SIZE];
+	size_t len;
 
-	for (i = 4; i < 16; i++) {
-		if (!arch_get_random_seed_long_early(&rv) &&
-		    !arch_get_random_long_early(&rv)) {
-			rv = random_get_entropy();
-			arch_init = false;
+	if (!nbytes)
+		return;
+
+	len = min_t(size_t, 32, nbytes);
+	crng_make_state(chacha_state, buf, len);
+	nbytes -= len;
+	buf += len;
+
+	while (nbytes) {
+		if (nbytes < CHACHA_BLOCK_SIZE) {
+			chacha20_block(chacha_state, tmp);
+			memcpy(buf, tmp, nbytes);
+			memzero_explicit(tmp, sizeof(tmp));
+			break;
 		}
-		primary_crng.state[i] ^= rv;
+
+		chacha20_block(chacha_state, buf);
+		if (unlikely(chacha_state[12] == 0))
+			++chacha_state[13];
+		nbytes -= CHACHA_BLOCK_SIZE;
+		buf += CHACHA_BLOCK_SIZE;
 	}
 
-	return arch_init;
+	memzero_explicit(chacha_state, sizeof(chacha_state));
 }
 
-static void crng_initialize_secondary(struct crng_state *crng)
+/*
+ * This function is the exported kernel interface.  It returns some
+ * number of good random numbers, suitable for key generation, seeding
+ * TCP sequence numbers, etc.  It does not rely on the hardware random
+ * number generator.  For random bytes direct from the hardware RNG
+ * (when available), use get_random_bytes_arch(). In order to ensure
+ * that the randomness provided by this function is okay, the function
+ * wait_for_random_bytes() should be called and return 0 at least once
+ * at any point prior.
+ */
+void get_random_bytes(void *buf, size_t nbytes)
 {
-	chacha_init_consts(crng->state);
-	_get_random_bytes(&crng->state[4], sizeof(u32) * 12);
-	crng_init_try_arch(crng);
-	crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
-}
+	static void *previous;
 
-static void __init crng_initialize_primary(void)
-{
-	_extract_entropy(&primary_crng.state[4], sizeof(u32) * 12);
-	if (crng_init_try_arch_early() && trust_cpu && crng_init < 2) {
-		invalidate_batched_entropy();
-		numa_crng_init();
-		crng_init = 2;
-		pr_notice("crng init done (trusting CPU's manufacturer)\n");
-	}
-	primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
+	warn_unseeded_randomness(&previous);
+	_get_random_bytes(buf, nbytes);
 }
+EXPORT_SYMBOL(get_random_bytes);
 
-static void crng_finalize_init(void)
+static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes)
 {
-	if (!system_wq) {
-		/* We can't call numa_crng_init until we have workqueues,
-		 * so mark this for processing later. */
-		crng_need_final_init = true;
-		return;
-	}
+	bool large_request = nbytes > 256;
+	ssize_t ret = 0;
+	size_t len;
+	u32 chacha_state[CHACHA_STATE_WORDS];
+	u8 output[CHACHA_BLOCK_SIZE];
 
-	invalidate_batched_entropy();
-	numa_crng_init();
-	crng_init = 2;
-	crng_need_final_init = false;
-	process_random_ready_list();
-	wake_up_interruptible(&crng_init_wait);
-	kill_fasync(&fasync, SIGIO, POLL_IN);
-	pr_notice("crng init done\n");
-	if (unseeded_warning.missed) {
-		pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n",
-			  unseeded_warning.missed);
-		unseeded_warning.missed = 0;
-	}
-	if (urandom_warning.missed) {
-		pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
-			  urandom_warning.missed);
-		urandom_warning.missed = 0;
-	}
-}
+	if (!nbytes)
+		return 0;
 
-static void do_numa_crng_init(struct work_struct *work)
-{
-	int i;
-	struct crng_state *crng;
-	struct crng_state **pool;
-
-	pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL | __GFP_NOFAIL);
-	for_each_online_node(i) {
-		crng = kmalloc_node(sizeof(struct crng_state),
-				    GFP_KERNEL | __GFP_NOFAIL, i);
-		spin_lock_init(&crng->lock);
-		crng_initialize_secondary(crng);
-		pool[i] = crng;
-	}
-	/* pairs with READ_ONCE() in select_crng() */
-	if (cmpxchg_release(&crng_node_pool, NULL, pool) != NULL) {
-		for_each_node(i)
-			kfree(pool[i]);
-		kfree(pool);
-	}
-}
+	len = min_t(size_t, 32, nbytes);
+	crng_make_state(chacha_state, output, len);
 
-static DECLARE_WORK(numa_crng_init_work, do_numa_crng_init);
+	if (copy_to_user(buf, output, len))
+		return -EFAULT;
+	nbytes -= len;
+	buf += len;
+	ret += len;
 
-static void numa_crng_init(void)
-{
-	if (IS_ENABLED(CONFIG_NUMA))
-		schedule_work(&numa_crng_init_work);
-}
+	while (nbytes) {
+		if (large_request && need_resched()) {
+			if (signal_pending(current))
+				break;
+			schedule();
+		}
 
-static struct crng_state *select_crng(void)
-{
-	if (IS_ENABLED(CONFIG_NUMA)) {
-		struct crng_state **pool;
-		int nid = numa_node_id();
-
-		/* pairs with cmpxchg_release() in do_numa_crng_init() */
-		pool = READ_ONCE(crng_node_pool);
-		if (pool && pool[nid])
-			return pool[nid];
+		chacha20_block(chacha_state, output);
+		if (unlikely(chacha_state[12] == 0))
+			++chacha_state[13];
+
+		len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE);
+		if (copy_to_user(buf, output, len)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		nbytes -= len;
+		buf += len;
+		ret += len;
 	}
 
-	return &primary_crng;
+	memzero_explicit(chacha_state, sizeof(chacha_state));
+	memzero_explicit(output, sizeof(output));
+	return ret;
 }
 
 /*
- * crng_fast_load() can be called by code in the interrupt service
- * path.  So we can't afford to dilly-dally. Returns the number of
- * bytes processed from cp.
+ * Batched entropy returns random integers. The quality of the random
+ * number is good as /dev/urandom. In order to ensure that the randomness
+ * provided by this function is okay, the function wait_for_random_bytes()
+ * should be called and return 0 at least once at any point prior.
  */
-static size_t crng_fast_load(const u8 *cp, size_t len)
+struct batched_entropy {
+	union {
+		/*
+		 * We make this 1.5x a ChaCha block, so that we get the
+		 * remaining 32 bytes from fast key erasure, plus one full
+		 * block from the detached ChaCha state. We can increase
+		 * the size of this later if needed so long as we keep the
+		 * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE.
+		 */
+		u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))];
+		u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))];
+	};
+	local_lock_t lock;
+	unsigned long generation;
+	unsigned int position;
+};
+
+
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = {
+	.lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock),
+	.position = UINT_MAX
+};
+
+u64 get_random_u64(void)
 {
+	u64 ret;
 	unsigned long flags;
-	u8 *p;
-	size_t ret = 0;
+	struct batched_entropy *batch;
+	static void *previous;
+	unsigned long next_gen;
 
-	if (!spin_trylock_irqsave(&primary_crng.lock, flags))
-		return 0;
-	if (crng_init != 0) {
-		spin_unlock_irqrestore(&primary_crng.lock, flags);
-		return 0;
-	}
-	p = (u8 *)&primary_crng.state[4];
-	while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) {
-		p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp;
-		cp++; crng_init_cnt++; len--; ret++;
-	}
-	spin_unlock_irqrestore(&primary_crng.lock, flags);
-	if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
-		invalidate_batched_entropy();
-		crng_init = 1;
-		pr_notice("fast init done\n");
+	warn_unseeded_randomness(&previous);
+
+	local_lock_irqsave(&batched_entropy_u64.lock, flags);
+	batch = raw_cpu_ptr(&batched_entropy_u64);
+
+	next_gen = READ_ONCE(base_crng.generation);
+	if (batch->position >= ARRAY_SIZE(batch->entropy_u64) ||
+	    next_gen != batch->generation) {
+		_get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64));
+		batch->position = 0;
+		batch->generation = next_gen;
 	}
+
+	ret = batch->entropy_u64[batch->position];
+	batch->entropy_u64[batch->position] = 0;
+	++batch->position;
+	local_unlock_irqrestore(&batched_entropy_u64.lock, flags);
 	return ret;
 }
+EXPORT_SYMBOL(get_random_u64);
 
-/*
- * crng_slow_load() is called by add_device_randomness, which has two
- * attributes.  (1) We can't trust the buffer passed to it is
- * guaranteed to be unpredictable (so it might not have any entropy at
- * all), and (2) it doesn't have the performance constraints of
- * crng_fast_load().
- *
- * So we do something more comprehensive which is guaranteed to touch
- * all of the primary_crng's state, and which uses a LFSR with a
- * period of 255 as part of the mixing algorithm.  Finally, we do
- * *not* advance crng_init_cnt since buffer we may get may be something
- * like a fixed DMI table (for example), which might very well be
- * unique to the machine, but is otherwise unvarying.
- */
-static int crng_slow_load(const u8 *cp, size_t len)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = {
+	.lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock),
+	.position = UINT_MAX
+};
+
+u32 get_random_u32(void)
 {
+	u32 ret;
 	unsigned long flags;
-	static u8 lfsr = 1;
-	u8 tmp;
-	unsigned int i, max = CHACHA_KEY_SIZE;
-	const u8 *src_buf = cp;
-	u8 *dest_buf = (u8 *)&primary_crng.state[4];
+	struct batched_entropy *batch;
+	static void *previous;
+	unsigned long next_gen;
 
-	if (!spin_trylock_irqsave(&primary_crng.lock, flags))
-		return 0;
-	if (crng_init != 0) {
-		spin_unlock_irqrestore(&primary_crng.lock, flags);
-		return 0;
-	}
-	if (len > max)
-		max = len;
-
-	for (i = 0; i < max; i++) {
-		tmp = lfsr;
-		lfsr >>= 1;
-		if (tmp & 1)
-			lfsr ^= 0xE1;
-		tmp = dest_buf[i % CHACHA_KEY_SIZE];
-		dest_buf[i % CHACHA_KEY_SIZE] ^= src_buf[i % len] ^ lfsr;
-		lfsr += (tmp << 3) | (tmp >> 5);
+	warn_unseeded_randomness(&previous);
+
+	local_lock_irqsave(&batched_entropy_u32.lock, flags);
+	batch = raw_cpu_ptr(&batched_entropy_u32);
+
+	next_gen = READ_ONCE(base_crng.generation);
+	if (batch->position >= ARRAY_SIZE(batch->entropy_u32) ||
+	    next_gen != batch->generation) {
+		_get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32));
+		batch->position = 0;
+		batch->generation = next_gen;
 	}
-	spin_unlock_irqrestore(&primary_crng.lock, flags);
-	return 1;
+
+	ret = batch->entropy_u32[batch->position];
+	batch->entropy_u32[batch->position] = 0;
+	++batch->position;
+	local_unlock_irqrestore(&batched_entropy_u32.lock, flags);
+	return ret;
 }
+EXPORT_SYMBOL(get_random_u32);
 
-static void crng_reseed(struct crng_state *crng, bool use_input_pool)
+#ifdef CONFIG_SMP
+/*
+ * This function is called when the CPU is coming up, with entry
+ * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP.
+ */
+int random_prepare_cpu(unsigned int cpu)
 {
-	unsigned long flags;
-	int i, num;
-	union {
-		u8 block[CHACHA_BLOCK_SIZE];
-		u32 key[8];
-	} buf;
+	/*
+	 * When the cpu comes back online, immediately invalidate both
+	 * the per-cpu crng and all batches, so that we serve fresh
+	 * randomness.
+	 */
+	per_cpu_ptr(&crngs, cpu)->generation = ULONG_MAX;
+	per_cpu_ptr(&batched_entropy_u32, cpu)->position = UINT_MAX;
+	per_cpu_ptr(&batched_entropy_u64, cpu)->position = UINT_MAX;
+	return 0;
+}
+#endif
 
-	if (use_input_pool) {
-		num = extract_entropy(&buf, 32, 16);
-		if (num == 0)
-			return;
-	} else {
-		_extract_crng(&primary_crng, buf.block);
-		_crng_backtrack_protect(&primary_crng, buf.block,
-					CHACHA_KEY_SIZE);
-	}
-	spin_lock_irqsave(&crng->lock, flags);
-	for (i = 0; i < 8; i++) {
-		unsigned long rv;
-		if (!arch_get_random_seed_long(&rv) &&
-		    !arch_get_random_long(&rv))
-			rv = random_get_entropy();
-		crng->state[i + 4] ^= buf.key[i] ^ rv;
+/**
+ * randomize_page - Generate a random, page aligned address
+ * @start:	The smallest acceptable address the caller will take.
+ * @range:	The size of the area, starting at @start, within which the
+ *		random address must fall.
+ *
+ * If @start + @range would overflow, @range is capped.
+ *
+ * NOTE: Historical use of randomize_range, which this replaces, presumed that
+ * @start was already page aligned.  We now align it regardless.
+ *
+ * Return: A page aligned address within [start, start + range).  On error,
+ * @start is returned.
+ */
+unsigned long randomize_page(unsigned long start, unsigned long range)
+{
+	if (!PAGE_ALIGNED(start)) {
+		range -= PAGE_ALIGN(start) - start;
+		start = PAGE_ALIGN(start);
 	}
-	memzero_explicit(&buf, sizeof(buf));
-	WRITE_ONCE(crng->init_time, jiffies);
-	spin_unlock_irqrestore(&crng->lock, flags);
-	if (crng == &primary_crng && crng_init < 2)
-		crng_finalize_init();
+
+	if (start > ULONG_MAX - range)
+		range = ULONG_MAX - start;
+
+	range >>= PAGE_SHIFT;
+
+	if (range == 0)
+		return start;
+
+	return start + (get_random_long() % range << PAGE_SHIFT);
 }
 
-static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE])
+/*
+ * This function will use the architecture-specific hardware random
+ * number generator if it is available. It is not recommended for
+ * use. Use get_random_bytes() instead. It returns the number of
+ * bytes filled in.
+ */
+size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes)
 {
-	unsigned long flags, init_time;
+	size_t left = nbytes;
+	u8 *p = buf;
 
-	if (crng_ready()) {
-		init_time = READ_ONCE(crng->init_time);
-		if (time_after(READ_ONCE(crng_global_init_time), init_time) ||
-		    time_after(jiffies, init_time + CRNG_RESEED_INTERVAL))
-			crng_reseed(crng, crng == &primary_crng);
+	while (left) {
+		unsigned long v;
+		size_t chunk = min_t(size_t, left, sizeof(unsigned long));
+
+		if (!arch_get_random_long(&v))
+			break;
+
+		memcpy(p, &v, chunk);
+		p += chunk;
+		left -= chunk;
 	}
-	spin_lock_irqsave(&crng->lock, flags);
-	chacha20_block(&crng->state[0], out);
-	if (crng->state[12] == 0)
-		crng->state[13]++;
-	spin_unlock_irqrestore(&crng->lock, flags);
+
+	return nbytes - left;
 }
+EXPORT_SYMBOL(get_random_bytes_arch);
+
+
+/**********************************************************************
+ *
+ * Entropy accumulation and extraction routines.
+ *
+ * Callers may add entropy via:
+ *
+ *     static void mix_pool_bytes(const void *in, size_t nbytes)
+ *
+ * After which, if added entropy should be credited:
+ *
+ *     static void credit_entropy_bits(size_t nbits)
+ *
+ * Finally, extract entropy via these two, with the latter one
+ * setting the entropy count to zero and extracting only if there
+ * is POOL_MIN_BITS entropy credited prior or force is true:
+ *
+ *     static void extract_entropy(void *buf, size_t nbytes)
+ *     static bool drain_entropy(void *buf, size_t nbytes, bool force)
+ *
+ **********************************************************************/
+
+enum {
+	POOL_BITS = BLAKE2S_HASH_SIZE * 8,
+	POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */
+};
+
+/* For notifying userspace should write into /dev/random. */
+static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
+
+static struct {
+	struct blake2s_state hash;
+	spinlock_t lock;
+	unsigned int entropy_count;
+} input_pool = {
+	.hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE),
+		    BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4,
+		    BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 },
+	.hash.outlen = BLAKE2S_HASH_SIZE,
+	.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
+};
 
-static void extract_crng(u8 out[CHACHA_BLOCK_SIZE])
+static void _mix_pool_bytes(const void *in, size_t nbytes)
 {
-	_extract_crng(select_crng(), out);
+	blake2s_update(&input_pool.hash, in, nbytes);
 }
 
 /*
- * Use the leftover bytes from the CRNG block output (if there is
- * enough) to mutate the CRNG key to provide backtracking protection.
+ * This function adds bytes into the entropy "pool".  It does not
+ * update the entropy estimate.  The caller should call
+ * credit_entropy_bits if this is appropriate.
  */
-static void _crng_backtrack_protect(struct crng_state *crng,
-				    u8 tmp[CHACHA_BLOCK_SIZE], int used)
+static void mix_pool_bytes(const void *in, size_t nbytes)
 {
 	unsigned long flags;
-	u32 *s, *d;
-	int i;
 
-	used = round_up(used, sizeof(u32));
-	if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) {
-		extract_crng(tmp);
-		used = 0;
-	}
-	spin_lock_irqsave(&crng->lock, flags);
-	s = (u32 *)&tmp[used];
-	d = &crng->state[4];
-	for (i = 0; i < 8; i++)
-		*d++ ^= *s++;
-	spin_unlock_irqrestore(&crng->lock, flags);
+	spin_lock_irqsave(&input_pool.lock, flags);
+	_mix_pool_bytes(in, nbytes);
+	spin_unlock_irqrestore(&input_pool.lock, flags);
 }
 
-static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used)
+static void credit_entropy_bits(size_t nbits)
 {
-	_crng_backtrack_protect(select_crng(), tmp, used);
+	unsigned int entropy_count, orig, add;
+
+	if (!nbits)
+		return;
+
+	add = min_t(size_t, nbits, POOL_BITS);
+
+	do {
+		orig = READ_ONCE(input_pool.entropy_count);
+		entropy_count = min_t(unsigned int, POOL_BITS, orig + add);
+	} while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig);
+
+	if (!crng_ready() && entropy_count >= POOL_MIN_BITS)
+		crng_reseed(false);
 }
 
-static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
+/*
+ * This is an HKDF-like construction for using the hashed collected entropy
+ * as a PRF key, that's then expanded block-by-block.
+ */
+static void extract_entropy(void *buf, size_t nbytes)
 {
-	ssize_t ret = 0, i = CHACHA_BLOCK_SIZE;
-	u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
-	int large_request = (nbytes > 256);
+	unsigned long flags;
+	u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE];
+	struct {
+		unsigned long rdseed[32 / sizeof(long)];
+		size_t counter;
+	} block;
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) {
+		if (!arch_get_random_seed_long(&block.rdseed[i]) &&
+		    !arch_get_random_long(&block.rdseed[i]))
+			block.rdseed[i] = random_get_entropy();
+	}
 
-	while (nbytes) {
-		if (large_request && need_resched()) {
-			if (signal_pending(current)) {
-				if (ret == 0)
-					ret = -ERESTARTSYS;
-				break;
-			}
-			schedule();
-		}
+	spin_lock_irqsave(&input_pool.lock, flags);
 
-		extract_crng(tmp);
-		i = min_t(int, nbytes, CHACHA_BLOCK_SIZE);
-		if (copy_to_user(buf, tmp, i)) {
-			ret = -EFAULT;
-			break;
-		}
+	/* seed = HASHPRF(last_key, entropy_input) */
+	blake2s_final(&input_pool.hash, seed);
+
+	/* next_key = HASHPRF(seed, RDSEED || 0) */
+	block.counter = 0;
+	blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed));
+	blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key));
+
+	spin_unlock_irqrestore(&input_pool.lock, flags);
+	memzero_explicit(next_key, sizeof(next_key));
 
+	while (nbytes) {
+		i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE);
+		/* output = HASHPRF(seed, RDSEED || ++counter) */
+		++block.counter;
+		blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed));
 		nbytes -= i;
 		buf += i;
-		ret += i;
 	}
-	crng_backtrack_protect(tmp, i);
 
-	/* Wipe data just written to memory */
-	memzero_explicit(tmp, sizeof(tmp));
+	memzero_explicit(seed, sizeof(seed));
+	memzero_explicit(&block, sizeof(block));
+}
 
-	return ret;
+/*
+ * First we make sure we have POOL_MIN_BITS of entropy in the pool unless force
+ * is true, and then we set the entropy count to zero (but don't actually touch
+ * any data). Only then can we extract a new key with extract_entropy().
+ */
+static bool drain_entropy(void *buf, size_t nbytes, bool force)
+{
+	unsigned int entropy_count;
+	do {
+		entropy_count = READ_ONCE(input_pool.entropy_count);
+		if (!force && entropy_count < POOL_MIN_BITS)
+			return false;
+	} while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count);
+	extract_entropy(buf, nbytes);
+	wake_up_interruptible(&random_write_wait);
+	kill_fasync(&fasync, SIGIO, POLL_OUT);
+	return true;
 }
 
-/*********************************************************************
+
+/**********************************************************************
  *
- * Entropy input management
+ * Entropy collection routines.
  *
- *********************************************************************/
+ * The following exported functions are used for pushing entropy into
+ * the above entropy accumulation routines:
+ *
+ *	void add_device_randomness(const void *buf, size_t size);
+ *	void add_input_randomness(unsigned int type, unsigned int code,
+ *	                          unsigned int value);
+ *	void add_disk_randomness(struct gendisk *disk);
+ *	void add_hwgenerator_randomness(const void *buffer, size_t count,
+ *					size_t entropy);
+ *	void add_bootloader_randomness(const void *buf, size_t size);
+ *	void add_vmfork_randomness(const void *unique_vm_id, size_t size);
+ *	void add_interrupt_randomness(int irq);
+ *
+ * add_device_randomness() adds data to the input pool that
+ * is likely to differ between two devices (or possibly even per boot).
+ * This would be things like MAC addresses or serial numbers, or the
+ * read-out of the RTC. This does *not* credit any actual entropy to
+ * the pool, but it initializes the pool to different values for devices
+ * that might otherwise be identical and have very little entropy
+ * available to them (particularly common in the embedded world).
+ *
+ * add_input_randomness() uses the input layer interrupt timing, as well
+ * as the event type information from the hardware.
+ *
+ * add_disk_randomness() uses what amounts to the seek time of block
+ * layer request events, on a per-disk_devt basis, as input to the
+ * entropy pool. Note that high-speed solid state drives with very low
+ * seek times do not make for good sources of entropy, as their seek
+ * times are usually fairly consistent.
+ *
+ * The above two routines try to estimate how many bits of entropy
+ * to credit. They do this by keeping track of the first and second
+ * order deltas of the event timings.
+ *
+ * add_hwgenerator_randomness() is for true hardware RNGs, and will credit
+ * entropy as specified by the caller. If the entropy pool is full it will
+ * block until more entropy is needed.
+ *
+ * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or
+ * add_device_randomness(), depending on whether or not the configuration
+ * option CONFIG_RANDOM_TRUST_BOOTLOADER is set.
+ *
+ * add_vmfork_randomness() adds a unique (but not necessarily secret) ID
+ * representing the current instance of a VM to the pool, without crediting,
+ * and then force-reseeds the crng so that it takes effect immediately.
+ *
+ * add_interrupt_randomness() uses the interrupt timing as random
+ * inputs to the entropy pool. Using the cycle counters and the irq source
+ * as inputs, it feeds the input pool roughly once a second or after 64
+ * interrupts, crediting 1 bit of entropy for whichever comes first.
+ *
+ **********************************************************************/
 
-/* There is one of these per entropy source */
-struct timer_rand_state {
-	cycles_t last_time;
-	long last_delta, last_delta2;
-};
+static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
+static int __init parse_trust_cpu(char *arg)
+{
+	return kstrtobool(arg, &trust_cpu);
+}
+early_param("random.trust_cpu", parse_trust_cpu);
+
+/*
+ * The first collection of entropy occurs at system boot while interrupts
+ * are still turned off. Here we push in RDSEED, a timestamp, and utsname().
+ * Depending on the above configuration knob, RDSEED may be considered
+ * sufficient for initialization. Note that much earlier setup may already
+ * have pushed entropy into the input pool by the time we get here.
+ */
+int __init rand_initialize(void)
+{
+	size_t i;
+	ktime_t now = ktime_get_real();
+	bool arch_init = true;
+	unsigned long rv;
+
+	for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) {
+		if (!arch_get_random_seed_long_early(&rv) &&
+		    !arch_get_random_long_early(&rv)) {
+			rv = random_get_entropy();
+			arch_init = false;
+		}
+		_mix_pool_bytes(&rv, sizeof(rv));
+	}
+	_mix_pool_bytes(&now, sizeof(now));
+	_mix_pool_bytes(utsname(), sizeof(*(utsname())));
+
+	extract_entropy(base_crng.key, sizeof(base_crng.key));
+	++base_crng.generation;
 
-#define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, };
+	if (arch_init && trust_cpu && !crng_ready()) {
+		crng_init = 2;
+		pr_notice("crng init done (trusting CPU's manufacturer)\n");
+	}
+
+	if (ratelimit_disable)
+		unseeded_warning.interval = 0;
+	return 0;
+}
 
 /*
  * Add device- or boot-specific data to the input pool to help
@@ -1091,23 +992,27 @@ struct timer_rand_state {
  * the entropy pool having similar initial state across largely
  * identical devices.
  */
-void add_device_randomness(const void *buf, unsigned int size)
+void add_device_randomness(const void *buf, size_t size)
 {
-	unsigned long time = random_get_entropy() ^ jiffies;
-	unsigned long flags;
+	cycles_t cycles = random_get_entropy();
+	unsigned long flags, now = jiffies;
 
-	if (!crng_ready() && size)
-		crng_slow_load(buf, size);
+	if (crng_init == 0 && size)
+		crng_pre_init_inject(buf, size, false);
 
-	trace_add_device_randomness(size, _RET_IP_);
 	spin_lock_irqsave(&input_pool.lock, flags);
+	_mix_pool_bytes(&cycles, sizeof(cycles));
+	_mix_pool_bytes(&now, sizeof(now));
 	_mix_pool_bytes(buf, size);
-	_mix_pool_bytes(&time, sizeof(time));
 	spin_unlock_irqrestore(&input_pool.lock, flags);
 }
 EXPORT_SYMBOL(add_device_randomness);
 
-static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE;
+/* There is one of these per entropy source */
+struct timer_rand_state {
+	unsigned long last_time;
+	long last_delta, last_delta2;
+};
 
 /*
  * This function adds entropy to the entropy "pool" by using timing
@@ -1117,29 +1022,26 @@ static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE;
  * The number "num" is also added to the pool - it should somehow describe
  * the type of event which just happened.  This is currently 0-255 for
  * keyboard scan codes, and 256 upwards for interrupts.
- *
  */
-static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
+static void add_timer_randomness(struct timer_rand_state *state, unsigned int num)
 {
-	struct {
-		long jiffies;
-		unsigned int cycles;
-		unsigned int num;
-	} sample;
+	cycles_t cycles = random_get_entropy();
+	unsigned long flags, now = jiffies;
 	long delta, delta2, delta3;
 
-	sample.jiffies = jiffies;
-	sample.cycles = random_get_entropy();
-	sample.num = num;
-	mix_pool_bytes(&sample, sizeof(sample));
+	spin_lock_irqsave(&input_pool.lock, flags);
+	_mix_pool_bytes(&cycles, sizeof(cycles));
+	_mix_pool_bytes(&now, sizeof(now));
+	_mix_pool_bytes(&num, sizeof(num));
+	spin_unlock_irqrestore(&input_pool.lock, flags);
 
 	/*
 	 * Calculate number of bits of randomness we probably added.
 	 * We take into account the first, second and third-order deltas
 	 * in order to make our estimate.
 	 */
-	delta = sample.jiffies - READ_ONCE(state->last_time);
-	WRITE_ONCE(state->last_time, sample.jiffies);
+	delta = now - READ_ONCE(state->last_time);
+	WRITE_ONCE(state->last_time, now);
 
 	delta2 = delta - READ_ONCE(state->last_delta);
 	WRITE_ONCE(state->last_delta, delta);
@@ -1163,318 +1065,303 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
 	 * Round down by 1 bit on general principles,
 	 * and limit entropy estimate to 12 bits.
 	 */
-	credit_entropy_bits(min_t(int, fls(delta >> 1), 11));
+	credit_entropy_bits(min_t(unsigned int, fls(delta >> 1), 11));
 }
 
 void add_input_randomness(unsigned int type, unsigned int code,
 			  unsigned int value)
 {
 	static unsigned char last_value;
+	static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES };
 
-	/* ignore autorepeat and the like */
+	/* Ignore autorepeat and the like. */
 	if (value == last_value)
 		return;
 
 	last_value = value;
 	add_timer_randomness(&input_timer_state,
 			     (type << 4) ^ code ^ (code >> 4) ^ value);
-	trace_add_input_randomness(POOL_ENTROPY_BITS());
 }
 EXPORT_SYMBOL_GPL(add_input_randomness);
 
-static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
-
-#ifdef ADD_INTERRUPT_BENCH
-static unsigned long avg_cycles, avg_deviation;
-
-#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */
-#define FIXED_1_2 (1 << (AVG_SHIFT - 1))
-
-static void add_interrupt_bench(cycles_t start)
+#ifdef CONFIG_BLOCK
+void add_disk_randomness(struct gendisk *disk)
 {
-	long delta = random_get_entropy() - start;
-
-	/* Use a weighted moving average */
-	delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT);
-	avg_cycles += delta;
-	/* And average deviation */
-	delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT);
-	avg_deviation += delta;
+	if (!disk || !disk->random)
+		return;
+	/* First major is 1, so we get >= 0x200 here. */
+	add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
 }
-#else
-#define add_interrupt_bench(x)
-#endif
+EXPORT_SYMBOL_GPL(add_disk_randomness);
 
-static u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
+void rand_initialize_disk(struct gendisk *disk)
 {
-	u32 *ptr = (u32 *)regs;
-	unsigned int idx;
+	struct timer_rand_state *state;
 
-	if (regs == NULL)
-		return 0;
-	idx = READ_ONCE(f->reg_idx);
-	if (idx >= sizeof(struct pt_regs) / sizeof(u32))
-		idx = 0;
-	ptr += idx++;
-	WRITE_ONCE(f->reg_idx, idx);
-	return *ptr;
+	/*
+	 * If kzalloc returns null, we just won't use that entropy
+	 * source.
+	 */
+	state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
+	if (state) {
+		state->last_time = INITIAL_JIFFIES;
+		disk->random = state;
+	}
 }
+#endif
 
-void add_interrupt_randomness(int irq)
+/*
+ * Interface for in-kernel drivers of true hardware RNGs.
+ * Those devices may produce endless random bits and will be throttled
+ * when our pool is full.
+ */
+void add_hwgenerator_randomness(const void *buffer, size_t count,
+				size_t entropy)
 {
-	struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
-	struct pt_regs *regs = get_irq_regs();
-	unsigned long now = jiffies;
-	cycles_t cycles = random_get_entropy();
-	u32 c_high, j_high;
-	u64 ip;
-
-	if (cycles == 0)
-		cycles = get_reg(fast_pool, regs);
-	c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
-	j_high = (sizeof(now) > 4) ? now >> 32 : 0;
-	fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
-	fast_pool->pool[1] ^= now ^ c_high;
-	ip = regs ? instruction_pointer(regs) : _RET_IP_;
-	fast_pool->pool[2] ^= ip;
-	fast_pool->pool[3] ^=
-		(sizeof(ip) > 4) ? ip >> 32 : get_reg(fast_pool, regs);
-
-	fast_mix(fast_pool);
-	add_interrupt_bench(cycles);
-
 	if (unlikely(crng_init == 0)) {
-		if ((fast_pool->count >= 64) &&
-		    crng_fast_load((u8 *)fast_pool->pool, sizeof(fast_pool->pool)) > 0) {
-			fast_pool->count = 0;
-			fast_pool->last = now;
-		}
-		return;
+		size_t ret = crng_pre_init_inject(buffer, count, true);
+		mix_pool_bytes(buffer, ret);
+		count -= ret;
+		buffer += ret;
+		if (!count || crng_init == 0)
+			return;
 	}
 
-	if ((fast_pool->count < 64) && !time_after(now, fast_pool->last + HZ))
-		return;
+	/*
+	 * Throttle writing if we're above the trickle threshold.
+	 * We'll be woken up again once below POOL_MIN_BITS, when
+	 * the calling thread is about to terminate, or once
+	 * CRNG_RESEED_INTERVAL has elapsed.
+	 */
+	wait_event_interruptible_timeout(random_write_wait,
+			!system_wq || kthread_should_stop() ||
+			input_pool.entropy_count < POOL_MIN_BITS,
+			CRNG_RESEED_INTERVAL);
+	mix_pool_bytes(buffer, count);
+	credit_entropy_bits(entropy);
+}
+EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
 
-	if (!spin_trylock(&input_pool.lock))
-		return;
+/*
+ * Handle random seed passed by bootloader.
+ * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise
+ * it would be regarded as device data.
+ * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER.
+ */
+void add_bootloader_randomness(const void *buf, size_t size)
+{
+	if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER))
+		add_hwgenerator_randomness(buf, size, size * 8);
+	else
+		add_device_randomness(buf, size);
+}
+EXPORT_SYMBOL_GPL(add_bootloader_randomness);
 
-	fast_pool->last = now;
-	__mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool));
-	spin_unlock(&input_pool.lock);
+#if IS_ENABLED(CONFIG_VMGENID)
+static BLOCKING_NOTIFIER_HEAD(vmfork_chain);
 
-	fast_pool->count = 0;
+/*
+ * Handle a new unique VM ID, which is unique, not secret, so we
+ * don't credit it, but we do immediately force a reseed after so
+ * that it's used by the crng posthaste.
+ */
+void add_vmfork_randomness(const void *unique_vm_id, size_t size)
+{
+	add_device_randomness(unique_vm_id, size);
+	if (crng_ready()) {
+		crng_reseed(true);
+		pr_notice("crng reseeded due to virtual machine fork\n");
+	}
+	blocking_notifier_call_chain(&vmfork_chain, 0, NULL);
+}
+#if IS_MODULE(CONFIG_VMGENID)
+EXPORT_SYMBOL_GPL(add_vmfork_randomness);
+#endif
 
-	/* award one bit for the contents of the fast pool */
-	credit_entropy_bits(1);
+int register_random_vmfork_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&vmfork_chain, nb);
 }
-EXPORT_SYMBOL_GPL(add_interrupt_randomness);
+EXPORT_SYMBOL_GPL(register_random_vmfork_notifier);
 
-#ifdef CONFIG_BLOCK
-void add_disk_randomness(struct gendisk *disk)
+int unregister_random_vmfork_notifier(struct notifier_block *nb)
 {
-	if (!disk || !disk->random)
-		return;
-	/* first major is 1, so we get >= 0x200 here */
-	add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
-	trace_add_disk_randomness(disk_devt(disk), POOL_ENTROPY_BITS());
+	return blocking_notifier_chain_unregister(&vmfork_chain, nb);
 }
-EXPORT_SYMBOL_GPL(add_disk_randomness);
+EXPORT_SYMBOL_GPL(unregister_random_vmfork_notifier);
 #endif
 
-/*********************************************************************
- *
- * Entropy extraction routines
- *
- *********************************************************************/
+struct fast_pool {
+	struct work_struct mix;
+	unsigned long pool[4];
+	unsigned long last;
+	unsigned int count;
+	u16 reg_idx;
+};
+
+static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = {
+#ifdef CONFIG_64BIT
+	/* SipHash constants */
+	.pool = { 0x736f6d6570736575UL, 0x646f72616e646f6dUL,
+		  0x6c7967656e657261UL, 0x7465646279746573UL }
+#else
+	/* HalfSipHash constants */
+	.pool = { 0, 0, 0x6c796765U, 0x74656462U }
+#endif
+};
 
 /*
- * This function decides how many bytes to actually take from the
- * given pool, and also debits the entropy count accordingly.
+ * This is [Half]SipHash-1-x, starting from an empty key. Because
+ * the key is fixed, it assumes that its inputs are non-malicious,
+ * and therefore this has no security on its own. s represents the
+ * 128 or 256-bit SipHash state, while v represents a 128-bit input.
  */
-static size_t account(size_t nbytes, int min)
+static void fast_mix(unsigned long s[4], const unsigned long *v)
 {
-	int entropy_count, orig;
-	size_t ibytes, nfrac;
-
-	BUG_ON(input_pool.entropy_count > POOL_FRACBITS);
-
-	/* Can we pull enough? */
-retry:
-	entropy_count = orig = READ_ONCE(input_pool.entropy_count);
-	if (WARN_ON(entropy_count < 0)) {
-		pr_warn("negative entropy count: count %d\n", entropy_count);
-		entropy_count = 0;
-	}
+	size_t i;
 
-	/* never pull more than available */
-	ibytes = min_t(size_t, nbytes, entropy_count >> (POOL_ENTROPY_SHIFT + 3));
-	if (ibytes < min)
-		ibytes = 0;
-	nfrac = ibytes << (POOL_ENTROPY_SHIFT + 3);
-	if ((size_t)entropy_count > nfrac)
-		entropy_count -= nfrac;
-	else
-		entropy_count = 0;
-
-	if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig)
-		goto retry;
-
-	trace_debit_entropy(8 * ibytes);
-	if (ibytes && POOL_ENTROPY_BITS() < random_write_wakeup_bits) {
-		wake_up_interruptible(&random_write_wait);
-		kill_fasync(&fasync, SIGIO, POLL_OUT);
+	for (i = 0; i < 16 / sizeof(long); ++i) {
+		s[3] ^= v[i];
+#ifdef CONFIG_64BIT
+		s[0] += s[1]; s[1] = rol64(s[1], 13); s[1] ^= s[0]; s[0] = rol64(s[0], 32);
+		s[2] += s[3]; s[3] = rol64(s[3], 16); s[3] ^= s[2];
+		s[0] += s[3]; s[3] = rol64(s[3], 21); s[3] ^= s[0];
+		s[2] += s[1]; s[1] = rol64(s[1], 17); s[1] ^= s[2]; s[2] = rol64(s[2], 32);
+#else
+		s[0] += s[1]; s[1] = rol32(s[1],  5); s[1] ^= s[0]; s[0] = rol32(s[0], 16);
+		s[2] += s[3]; s[3] = rol32(s[3],  8); s[3] ^= s[2];
+		s[0] += s[3]; s[3] = rol32(s[3],  7); s[3] ^= s[0];
+		s[2] += s[1]; s[1] = rol32(s[1], 13); s[1] ^= s[2]; s[2] = rol32(s[2], 16);
+#endif
+		s[0] ^= v[i];
 	}
-
-	return ibytes;
 }
 
+#ifdef CONFIG_SMP
 /*
- * This function does the actual extraction for extract_entropy.
- *
- * Note: we assume that .poolwords is a multiple of 16 words.
+ * This function is called when the CPU has just come online, with
+ * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE.
  */
-static void extract_buf(u8 *out)
+int random_online_cpu(unsigned int cpu)
 {
-	struct blake2s_state state __aligned(__alignof__(unsigned long));
-	u8 hash[BLAKE2S_HASH_SIZE];
-	unsigned long *salt;
-	unsigned long flags;
-
-	blake2s_init(&state, sizeof(hash));
-
 	/*
-	 * If we have an architectural hardware random number
-	 * generator, use it for BLAKE2's salt & personal fields.
+	 * During CPU shutdown and before CPU onlining, add_interrupt_
+	 * randomness() may schedule mix_interrupt_randomness(), and
+	 * set the MIX_INFLIGHT flag. However, because the worker can
+	 * be scheduled on a different CPU during this period, that
+	 * flag will never be cleared. For that reason, we zero out
+	 * the flag here, which runs just after workqueues are onlined
+	 * for the CPU again. This also has the effect of setting the
+	 * irq randomness count to zero so that new accumulated irqs
+	 * are fresh.
 	 */
-	for (salt = (unsigned long *)&state.h[4];
-	     salt < (unsigned long *)&state.h[8]; ++salt) {
-		unsigned long v;
-		if (!arch_get_random_long(&v))
-			break;
-		*salt ^= v;
-	}
-
-	/* Generate a hash across the pool */
-	spin_lock_irqsave(&input_pool.lock, flags);
-	blake2s_update(&state, (const u8 *)input_pool_data, POOL_BYTES);
-	blake2s_final(&state, hash); /* final zeros out state */
+	per_cpu_ptr(&irq_randomness, cpu)->count = 0;
+	return 0;
+}
+#endif
 
-	/*
-	 * We mix the hash back into the pool to prevent backtracking
-	 * attacks (where the attacker knows the state of the pool
-	 * plus the current outputs, and attempts to find previous
-	 * outputs), unless the hash function can be inverted. By
-	 * mixing at least a hash worth of hash data back, we make
-	 * brute-forcing the feedback as hard as brute-forcing the
-	 * hash.
-	 */
-	__mix_pool_bytes(hash, sizeof(hash));
-	spin_unlock_irqrestore(&input_pool.lock, flags);
+static unsigned long get_reg(struct fast_pool *f, struct pt_regs *regs)
+{
+	unsigned long *ptr = (unsigned long *)regs;
+	unsigned int idx;
 
-	/* Note that EXTRACT_SIZE is half of hash size here, because above
-	 * we've dumped the full length back into mixer. By reducing the
-	 * amount that we emit, we retain a level of forward secrecy.
-	 */
-	memcpy(out, hash, EXTRACT_SIZE);
-	memzero_explicit(hash, sizeof(hash));
+	if (regs == NULL)
+		return 0;
+	idx = READ_ONCE(f->reg_idx);
+	if (idx >= sizeof(struct pt_regs) / sizeof(unsigned long))
+		idx = 0;
+	ptr += idx++;
+	WRITE_ONCE(f->reg_idx, idx);
+	return *ptr;
 }
 
-static ssize_t _extract_entropy(void *buf, size_t nbytes)
+static void mix_interrupt_randomness(struct work_struct *work)
 {
-	ssize_t ret = 0, i;
-	u8 tmp[EXTRACT_SIZE];
+	struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix);
+	/*
+	 * The size of the copied stack pool is explicitly 16 bytes so that we
+	 * tax mix_pool_byte()'s compression function the same amount on all
+	 * platforms. This means on 64-bit we copy half the pool into this,
+	 * while on 32-bit we copy all of it. The entropy is supposed to be
+	 * sufficiently dispersed between bits that in the sponge-like
+	 * half case, on average we don't wind up "losing" some.
+	 */
+	u8 pool[16];
 
-	while (nbytes) {
-		extract_buf(tmp);
-		i = min_t(int, nbytes, EXTRACT_SIZE);
-		memcpy(buf, tmp, i);
-		nbytes -= i;
-		buf += i;
-		ret += i;
+	/* Check to see if we're running on the wrong CPU due to hotplug. */
+	local_irq_disable();
+	if (fast_pool != this_cpu_ptr(&irq_randomness)) {
+		local_irq_enable();
+		return;
 	}
 
-	/* Wipe data just returned from memory */
-	memzero_explicit(tmp, sizeof(tmp));
+	/*
+	 * Copy the pool to the stack so that the mixer always has a
+	 * consistent view, before we reenable irqs again.
+	 */
+	memcpy(pool, fast_pool->pool, sizeof(pool));
+	fast_pool->count = 0;
+	fast_pool->last = jiffies;
+	local_irq_enable();
 
-	return ret;
-}
+	if (unlikely(crng_init == 0)) {
+		crng_pre_init_inject(pool, sizeof(pool), true);
+		mix_pool_bytes(pool, sizeof(pool));
+	} else {
+		mix_pool_bytes(pool, sizeof(pool));
+		credit_entropy_bits(1);
+	}
 
-/*
- * This function extracts randomness from the "entropy pool", and
- * returns it in a buffer.
- *
- * The min parameter specifies the minimum amount we can pull before
- * failing to avoid races that defeat catastrophic reseeding.
- */
-static ssize_t extract_entropy(void *buf, size_t nbytes, int min)
-{
-	trace_extract_entropy(nbytes, POOL_ENTROPY_BITS(), _RET_IP_);
-	nbytes = account(nbytes, min);
-	return _extract_entropy(buf, nbytes);
+	memzero_explicit(pool, sizeof(pool));
 }
 
-#define warn_unseeded_randomness(previous) \
-	_warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous))
-
-static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous)
+void add_interrupt_randomness(int irq)
 {
-#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM
-	const bool print_once = false;
-#else
-	static bool print_once __read_mostly;
-#endif
-
-	if (print_once || crng_ready() ||
-	    (previous && (caller == READ_ONCE(*previous))))
-		return;
-	WRITE_ONCE(*previous, caller);
-#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM
-	print_once = true;
-#endif
-	if (__ratelimit(&unseeded_warning))
-		printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n",
-				func_name, caller, crng_init);
-}
+	enum { MIX_INFLIGHT = 1U << 31 };
+	cycles_t cycles = random_get_entropy();
+	unsigned long now = jiffies;
+	struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
+	struct pt_regs *regs = get_irq_regs();
+	unsigned int new_count;
+	union {
+		u32 u32[4];
+		u64 u64[2];
+		unsigned long longs[16 / sizeof(long)];
+	} irq_data;
 
-/*
- * This function is the exported kernel interface.  It returns some
- * number of good random numbers, suitable for key generation, seeding
- * TCP sequence numbers, etc.  It does not rely on the hardware random
- * number generator.  For random bytes direct from the hardware RNG
- * (when available), use get_random_bytes_arch(). In order to ensure
- * that the randomness provided by this function is okay, the function
- * wait_for_random_bytes() should be called and return 0 at least once
- * at any point prior.
- */
-static void _get_random_bytes(void *buf, int nbytes)
-{
-	u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
+	if (cycles == 0)
+		cycles = get_reg(fast_pool, regs);
 
-	trace_get_random_bytes(nbytes, _RET_IP_);
+	if (sizeof(cycles) == 8)
+		irq_data.u64[0] = cycles ^ rol64(now, 32) ^ irq;
+	else {
+		irq_data.u32[0] = cycles ^ irq;
+		irq_data.u32[1] = now;
+	}
 
-	while (nbytes >= CHACHA_BLOCK_SIZE) {
-		extract_crng(buf);
-		buf += CHACHA_BLOCK_SIZE;
-		nbytes -= CHACHA_BLOCK_SIZE;
+	if (sizeof(unsigned long) == 8)
+		irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_;
+	else {
+		irq_data.u32[2] = regs ? instruction_pointer(regs) : _RET_IP_;
+		irq_data.u32[3] = get_reg(fast_pool, regs);
 	}
 
-	if (nbytes > 0) {
-		extract_crng(tmp);
-		memcpy(buf, tmp, nbytes);
-		crng_backtrack_protect(tmp, nbytes);
-	} else
-		crng_backtrack_protect(tmp, CHACHA_BLOCK_SIZE);
-	memzero_explicit(tmp, sizeof(tmp));
-}
+	fast_mix(fast_pool->pool, irq_data.longs);
+	new_count = ++fast_pool->count;
 
-void get_random_bytes(void *buf, int nbytes)
-{
-	static void *previous;
+	if (new_count & MIX_INFLIGHT)
+		return;
 
-	warn_unseeded_randomness(&previous);
-	_get_random_bytes(buf, nbytes);
+	if (new_count < 64 && (!time_after(now, fast_pool->last + HZ) ||
+			       unlikely(crng_init == 0)))
+		return;
+
+	if (unlikely(!fast_pool->mix.func))
+		INIT_WORK(&fast_pool->mix, mix_interrupt_randomness);
+	fast_pool->count |= MIX_INFLIGHT;
+	queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix);
 }
-EXPORT_SYMBOL(get_random_bytes);
+EXPORT_SYMBOL_GPL(add_interrupt_randomness);
 
 /*
  * Each time the timer fires, we expect that we got an unpredictable
@@ -1501,264 +1388,81 @@ static void entropy_timer(struct timer_list *t)
 static void try_to_generate_entropy(void)
 {
 	struct {
-		unsigned long now;
+		cycles_t cycles;
 		struct timer_list timer;
 	} stack;
 
-	stack.now = random_get_entropy();
+	stack.cycles = random_get_entropy();
 
 	/* Slow counter - or none. Don't even bother */
-	if (stack.now == random_get_entropy())
+	if (stack.cycles == random_get_entropy())
 		return;
 
 	timer_setup_on_stack(&stack.timer, entropy_timer, 0);
-	while (!crng_ready()) {
+	while (!crng_ready() && !signal_pending(current)) {
 		if (!timer_pending(&stack.timer))
 			mod_timer(&stack.timer, jiffies + 1);
-		mix_pool_bytes(&stack.now, sizeof(stack.now));
+		mix_pool_bytes(&stack.cycles, sizeof(stack.cycles));
 		schedule();
-		stack.now = random_get_entropy();
+		stack.cycles = random_get_entropy();
 	}
 
 	del_timer_sync(&stack.timer);
 	destroy_timer_on_stack(&stack.timer);
-	mix_pool_bytes(&stack.now, sizeof(stack.now));
+	mix_pool_bytes(&stack.cycles, sizeof(stack.cycles));
 }
 
-/*
- * Wait for the urandom pool to be seeded and thus guaranteed to supply
- * cryptographically secure random numbers. This applies to: the /dev/urandom
- * device, the get_random_bytes function, and the get_random_{u32,u64,int,long}
- * family of functions. Using any of these functions without first calling
- * this function forfeits the guarantee of security.
- *
- * Returns: 0 if the urandom pool has been seeded.
- *          -ERESTARTSYS if the function was interrupted by a signal.
- */
-int wait_for_random_bytes(void)
-{
-	if (likely(crng_ready()))
-		return 0;
-
-	do {
-		int ret;
-		ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ);
-		if (ret)
-			return ret > 0 ? 0 : ret;
-
-		try_to_generate_entropy();
-	} while (!crng_ready());
 
-	return 0;
-}
-EXPORT_SYMBOL(wait_for_random_bytes);
-
-/*
- * Returns whether or not the urandom pool has been seeded and thus guaranteed
- * to supply cryptographically secure random numbers. This applies to: the
- * /dev/urandom device, the get_random_bytes function, and the get_random_{u32,
- * ,u64,int,long} family of functions.
+/**********************************************************************
  *
- * Returns: true if the urandom pool has been seeded.
- *          false if the urandom pool has not been seeded.
- */
-bool rng_is_initialized(void)
-{
-	return crng_ready();
-}
-EXPORT_SYMBOL(rng_is_initialized);
-
-/*
- * Add a callback function that will be invoked when the nonblocking
- * pool is initialised.
+ * Userspace reader/writer interfaces.
  *
- * returns: 0 if callback is successfully added
- *	    -EALREADY if pool is already initialised (callback not called)
- *	    -ENOENT if module for callback is not alive
- */
-int add_random_ready_callback(struct random_ready_callback *rdy)
-{
-	struct module *owner;
-	unsigned long flags;
-	int err = -EALREADY;
-
-	if (crng_ready())
-		return err;
-
-	owner = rdy->owner;
-	if (!try_module_get(owner))
-		return -ENOENT;
-
-	spin_lock_irqsave(&random_ready_list_lock, flags);
-	if (crng_ready())
-		goto out;
-
-	owner = NULL;
-
-	list_add(&rdy->list, &random_ready_list);
-	err = 0;
-
-out:
-	spin_unlock_irqrestore(&random_ready_list_lock, flags);
-
-	module_put(owner);
-
-	return err;
-}
-EXPORT_SYMBOL(add_random_ready_callback);
-
-/*
- * Delete a previously registered readiness callback function.
- */
-void del_random_ready_callback(struct random_ready_callback *rdy)
-{
-	unsigned long flags;
-	struct module *owner = NULL;
-
-	spin_lock_irqsave(&random_ready_list_lock, flags);
-	if (!list_empty(&rdy->list)) {
-		list_del_init(&rdy->list);
-		owner = rdy->owner;
-	}
-	spin_unlock_irqrestore(&random_ready_list_lock, flags);
-
-	module_put(owner);
-}
-EXPORT_SYMBOL(del_random_ready_callback);
-
-/*
- * This function will use the architecture-specific hardware random
- * number generator if it is available.  The arch-specific hw RNG will
- * almost certainly be faster than what we can do in software, but it
- * is impossible to verify that it is implemented securely (as
- * opposed, to, say, the AES encryption of a sequence number using a
- * key known by the NSA).  So it's useful if we need the speed, but
- * only if we're willing to trust the hardware manufacturer not to
- * have put in a back door.
+ * getrandom(2) is the primary modern interface into the RNG and should
+ * be used in preference to anything else.
  *
- * Return number of bytes filled in.
- */
-int __must_check get_random_bytes_arch(void *buf, int nbytes)
-{
-	int left = nbytes;
-	u8 *p = buf;
-
-	trace_get_random_bytes_arch(left, _RET_IP_);
-	while (left) {
-		unsigned long v;
-		int chunk = min_t(int, left, sizeof(unsigned long));
-
-		if (!arch_get_random_long(&v))
-			break;
-
-		memcpy(p, &v, chunk);
-		p += chunk;
-		left -= chunk;
-	}
-
-	return nbytes - left;
-}
-EXPORT_SYMBOL(get_random_bytes_arch);
-
-/*
- * init_std_data - initialize pool with system data
+ * Reading from /dev/random and /dev/urandom both have the same effect
+ * as calling getrandom(2) with flags=0. (In earlier versions, however,
+ * they each had different semantics.)
  *
- * This function clears the pool's entropy count and mixes some system
- * data into the pool to prepare it for use. The pool is not cleared
- * as that can only decrease the entropy in the pool.
- */
-static void __init init_std_data(void)
-{
-	int i;
-	ktime_t now = ktime_get_real();
-	unsigned long rv;
-
-	mix_pool_bytes(&now, sizeof(now));
-	for (i = POOL_BYTES; i > 0; i -= sizeof(rv)) {
-		if (!arch_get_random_seed_long(&rv) &&
-		    !arch_get_random_long(&rv))
-			rv = random_get_entropy();
-		mix_pool_bytes(&rv, sizeof(rv));
-	}
-	mix_pool_bytes(utsname(), sizeof(*(utsname())));
-}
-
-/*
- * Note that setup_arch() may call add_device_randomness()
- * long before we get here. This allows seeding of the pools
- * with some platform dependent data very early in the boot
- * process. But it limits our options here. We must use
- * statically allocated structures that already have all
- * initializations complete at compile time. We should also
- * take care not to overwrite the precious per platform data
- * we were given.
- */
-int __init rand_initialize(void)
-{
-	init_std_data();
-	if (crng_need_final_init)
-		crng_finalize_init();
-	crng_initialize_primary();
-	crng_global_init_time = jiffies;
-	if (ratelimit_disable) {
-		urandom_warning.interval = 0;
-		unseeded_warning.interval = 0;
-	}
-	return 0;
-}
+ * Writing to either /dev/random or /dev/urandom adds entropy to
+ * the input pool but does not credit it.
+ *
+ * Polling on /dev/random or /dev/urandom indicates when the RNG
+ * is initialized, on the read side, and when it wants new entropy,
+ * on the write side.
+ *
+ * Both /dev/random and /dev/urandom have the same set of ioctls for
+ * adding entropy, getting the entropy count, zeroing the count, and
+ * reseeding the crng.
+ *
+ **********************************************************************/
 
-#ifdef CONFIG_BLOCK
-void rand_initialize_disk(struct gendisk *disk)
+SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
+		flags)
 {
-	struct timer_rand_state *state;
+	if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))
+		return -EINVAL;
 
 	/*
-	 * If kzalloc returns null, we just won't use that entropy
-	 * source.
+	 * Requesting insecure and blocking randomness at the same time makes
+	 * no sense.
 	 */
-	state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
-	if (state) {
-		state->last_time = INITIAL_JIFFIES;
-		disk->random = state;
-	}
-}
-#endif
-
-static ssize_t urandom_read_nowarn(struct file *file, char __user *buf,
-				   size_t nbytes, loff_t *ppos)
-{
-	int ret;
+	if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM))
+		return -EINVAL;
 
-	nbytes = min_t(size_t, nbytes, INT_MAX >> (POOL_ENTROPY_SHIFT + 3));
-	ret = extract_crng_user(buf, nbytes);
-	trace_urandom_read(8 * nbytes, 0, POOL_ENTROPY_BITS());
-	return ret;
-}
+	if (count > INT_MAX)
+		count = INT_MAX;
 
-static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes,
-			    loff_t *ppos)
-{
-	static int maxwarn = 10;
+	if (!(flags & GRND_INSECURE) && !crng_ready()) {
+		int ret;
 
-	if (!crng_ready() && maxwarn > 0) {
-		maxwarn--;
-		if (__ratelimit(&urandom_warning))
-			pr_notice("%s: uninitialized urandom read (%zd bytes read)\n",
-				  current->comm, nbytes);
+		if (flags & GRND_NONBLOCK)
+			return -EAGAIN;
+		ret = wait_for_random_bytes();
+		if (unlikely(ret))
+			return ret;
 	}
-
-	return urandom_read_nowarn(file, buf, nbytes, ppos);
-}
-
-static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes,
-			   loff_t *ppos)
-{
-	int ret;
-
-	ret = wait_for_random_bytes();
-	if (ret != 0)
-		return ret;
-	return urandom_read_nowarn(file, buf, nbytes, ppos);
+	return get_random_bytes_user(buf, count);
 }
 
 static __poll_t random_poll(struct file *file, poll_table *wait)
@@ -1770,44 +1474,38 @@ static __poll_t random_poll(struct file *file, poll_table *wait)
 	mask = 0;
 	if (crng_ready())
 		mask |= EPOLLIN | EPOLLRDNORM;
-	if (POOL_ENTROPY_BITS() < random_write_wakeup_bits)
+	if (input_pool.entropy_count < POOL_MIN_BITS)
 		mask |= EPOLLOUT | EPOLLWRNORM;
 	return mask;
 }
 
-static int write_pool(const char __user *buffer, size_t count)
+static int write_pool(const char __user *ubuf, size_t count)
 {
-	size_t bytes;
-	u32 t, buf[16];
-	const char __user *p = buffer;
-
-	while (count > 0) {
-		int b, i = 0;
-
-		bytes = min(count, sizeof(buf));
-		if (copy_from_user(&buf, p, bytes))
-			return -EFAULT;
+	size_t len;
+	int ret = 0;
+	u8 block[BLAKE2S_BLOCK_SIZE];
 
-		for (b = bytes; b > 0; b -= sizeof(u32), i++) {
-			if (!arch_get_random_int(&t))
-				break;
-			buf[i] ^= t;
+	while (count) {
+		len = min(count, sizeof(block));
+		if (copy_from_user(block, ubuf, len)) {
+			ret = -EFAULT;
+			goto out;
 		}
-
-		count -= bytes;
-		p += bytes;
-
-		mix_pool_bytes(buf, bytes);
+		count -= len;
+		ubuf += len;
+		mix_pool_bytes(block, len);
 		cond_resched();
 	}
 
-	return 0;
+out:
+	memzero_explicit(block, sizeof(block));
+	return ret;
 }
 
 static ssize_t random_write(struct file *file, const char __user *buffer,
 			    size_t count, loff_t *ppos)
 {
-	size_t ret;
+	int ret;
 
 	ret = write_pool(buffer, count);
 	if (ret)
@@ -1816,6 +1514,17 @@ static ssize_t random_write(struct file *file, const char __user *buffer,
 	return (ssize_t)count;
 }
 
+static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes,
+			   loff_t *ppos)
+{
+	int ret;
+
+	ret = wait_for_random_bytes();
+	if (ret != 0)
+		return ret;
+	return get_random_bytes_user(buf, nbytes);
+}
+
 static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 {
 	int size, ent_count;
@@ -1824,9 +1533,8 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case RNDGETENTCNT:
-		/* inherently racy, no point locking */
-		ent_count = POOL_ENTROPY_BITS();
-		if (put_user(ent_count, p))
+		/* Inherently racy, no point locking. */
+		if (put_user(input_pool.entropy_count, p))
 			return -EFAULT;
 		return 0;
 	case RNDADDTOENTCNT:
@@ -1834,7 +1542,10 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			return -EPERM;
 		if (get_user(ent_count, p))
 			return -EFAULT;
-		return credit_entropy_bits_safe(ent_count);
+		if (ent_count < 0)
+			return -EINVAL;
+		credit_entropy_bits(ent_count);
+		return 0;
 	case RNDADDENTROPY:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
@@ -1847,7 +1558,8 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 		retval = write_pool((const char __user *)p, size);
 		if (retval < 0)
 			return retval;
-		return credit_entropy_bits_safe(ent_count);
+		credit_entropy_bits(ent_count);
+		return 0;
 	case RNDZAPENTCNT:
 	case RNDCLEARPOOL:
 		/*
@@ -1856,7 +1568,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 		 */
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		if (xchg(&input_pool.entropy_count, 0) && random_write_wakeup_bits) {
+		if (xchg(&input_pool.entropy_count, 0) >= POOL_MIN_BITS) {
 			wake_up_interruptible(&random_write_wait);
 			kill_fasync(&fasync, SIGIO, POLL_OUT);
 		}
@@ -1864,10 +1576,9 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 	case RNDRESEEDCRNG:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		if (crng_init < 2)
+		if (!crng_ready())
 			return -ENODATA;
-		crng_reseed(&primary_crng, true);
-		WRITE_ONCE(crng_global_init_time, jiffies - 1);
+		crng_reseed(false);
 		return 0;
 	default:
 		return -EINVAL;
@@ -1889,46 +1600,34 @@ const struct file_operations random_fops = {
 	.llseek = noop_llseek,
 };
 
-const struct file_operations urandom_fops = {
-	.read = urandom_read,
-	.write = random_write,
-	.unlocked_ioctl = random_ioctl,
-	.compat_ioctl = compat_ptr_ioctl,
-	.fasync = random_fasync,
-	.llseek = noop_llseek,
-};
-
-SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
-		flags)
-{
-	int ret;
-
-	if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))
-		return -EINVAL;
-
-	/*
-	 * Requesting insecure and blocking randomness at the same time makes
-	 * no sense.
-	 */
-	if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM))
-		return -EINVAL;
-
-	if (count > INT_MAX)
-		count = INT_MAX;
-
-	if (!(flags & GRND_INSECURE) && !crng_ready()) {
-		if (flags & GRND_NONBLOCK)
-			return -EAGAIN;
-		ret = wait_for_random_bytes();
-		if (unlikely(ret))
-			return ret;
-	}
-	return urandom_read_nowarn(NULL, buf, count, NULL);
-}
 
 /********************************************************************
  *
- * Sysctl interface
+ * Sysctl interface.
+ *
+ * These are partly unused legacy knobs with dummy values to not break
+ * userspace and partly still useful things. They are usually accessible
+ * in /proc/sys/kernel/random/ and are as follows:
+ *
+ * - boot_id - a UUID representing the current boot.
+ *
+ * - uuid - a random UUID, different each time the file is read.
+ *
+ * - poolsize - the number of bits of entropy that the input pool can
+ *   hold, tied to the POOL_BITS constant.
+ *
+ * - entropy_avail - the number of bits of entropy currently in the
+ *   input pool. Always <= poolsize.
+ *
+ * - write_wakeup_threshold - the amount of entropy in the input pool
+ *   below which write polls to /dev/random will unblock, requesting
+ *   more entropy, tied to the POOL_MIN_BITS constant. It is writable
+ *   to avoid breaking old userspaces, but writing to it does not
+ *   change any behavior of the RNG.
+ *
+ * - urandom_min_reseed_secs - fixed to the value CRNG_RESEED_INTERVAL.
+ *   It is writable to avoid breaking old userspaces, but writing
+ *   to it does not change any behavior of the RNG.
  *
  ********************************************************************/
 
@@ -1936,25 +1635,28 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
 
 #include <linux/sysctl.h>
 
-static int min_write_thresh;
-static int max_write_thresh = POOL_BITS;
-static int random_min_urandom_seed = 60;
-static char sysctl_bootid[16];
+static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ;
+static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS;
+static int sysctl_poolsize = POOL_BITS;
+static u8 sysctl_bootid[UUID_SIZE];
 
 /*
  * This function is used to return both the bootid UUID, and random
- * UUID.  The difference is in whether table->data is NULL; if it is,
+ * UUID. The difference is in whether table->data is NULL; if it is,
  * then a new UUID is generated and returned to the user.
- *
- * If the user accesses this via the proc interface, the UUID will be
- * returned as an ASCII string in the standard UUID format; if via the
- * sysctl system call, as 16 bytes of binary data.
  */
 static int proc_do_uuid(struct ctl_table *table, int write, void *buffer,
 			size_t *lenp, loff_t *ppos)
 {
-	struct ctl_table fake_table;
-	unsigned char buf[64], tmp_uuid[16], *uuid;
+	u8 tmp_uuid[UUID_SIZE], *uuid;
+	char uuid_string[UUID_STRING_LEN + 1];
+	struct ctl_table fake_table = {
+		.data = uuid_string,
+		.maxlen = UUID_STRING_LEN
+	};
+
+	if (write)
+		return -EPERM;
 
 	uuid = table->data;
 	if (!uuid) {
@@ -1969,32 +1671,17 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer,
 		spin_unlock(&bootid_spinlock);
 	}
 
-	sprintf(buf, "%pU", uuid);
-
-	fake_table.data = buf;
-	fake_table.maxlen = sizeof(buf);
-
-	return proc_dostring(&fake_table, write, buffer, lenp, ppos);
+	snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid);
+	return proc_dostring(&fake_table, 0, buffer, lenp, ppos);
 }
 
-/*
- * Return entropy available scaled to integral bits
- */
-static int proc_do_entropy(struct ctl_table *table, int write, void *buffer,
-			   size_t *lenp, loff_t *ppos)
+/* The same as proc_dointvec, but writes don't change anything. */
+static int proc_do_rointvec(struct ctl_table *table, int write, void *buffer,
+			    size_t *lenp, loff_t *ppos)
 {
-	struct ctl_table fake_table;
-	int entropy_count;
-
-	entropy_count = *(int *)table->data >> POOL_ENTROPY_SHIFT;
-
-	fake_table.data = &entropy_count;
-	fake_table.maxlen = sizeof(entropy_count);
-
-	return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
+	return write ? 0 : proc_dointvec(table, 0, buffer, lenp, ppos);
 }
 
-static int sysctl_poolsize = POOL_BITS;
 static struct ctl_table random_table[] = {
 	{
 		.procname	= "poolsize",
@@ -2005,56 +1692,36 @@ static struct ctl_table random_table[] = {
 	},
 	{
 		.procname	= "entropy_avail",
+		.data		= &input_pool.entropy_count,
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_do_entropy,
-		.data		= &input_pool.entropy_count,
+		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "write_wakeup_threshold",
-		.data		= &random_write_wakeup_bits,
+		.data		= &sysctl_random_write_wakeup_bits,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_write_thresh,
-		.extra2		= &max_write_thresh,
+		.proc_handler	= proc_do_rointvec,
 	},
 	{
 		.procname	= "urandom_min_reseed_secs",
-		.data		= &random_min_urandom_seed,
+		.data		= &sysctl_random_min_urandom_seed,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_do_rointvec,
 	},
 	{
 		.procname	= "boot_id",
 		.data		= &sysctl_bootid,
-		.maxlen		= 16,
 		.mode		= 0444,
 		.proc_handler	= proc_do_uuid,
 	},
 	{
 		.procname	= "uuid",
-		.maxlen		= 16,
 		.mode		= 0444,
 		.proc_handler	= proc_do_uuid,
 	},
-#ifdef ADD_INTERRUPT_BENCH
-	{
-		.procname	= "add_interrupt_avg_cycles",
-		.data		= &avg_cycles,
-		.maxlen		= sizeof(avg_cycles),
-		.mode		= 0444,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-	{
-		.procname	= "add_interrupt_avg_deviation",
-		.data		= &avg_deviation,
-		.maxlen		= sizeof(avg_deviation),
-		.mode		= 0444,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-#endif
 	{ }
 };
 
@@ -2068,170 +1735,4 @@ static int __init random_sysctls_init(void)
 	return 0;
 }
 device_initcall(random_sysctls_init);
-#endif	/* CONFIG_SYSCTL */
-
-struct batched_entropy {
-	union {
-		u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)];
-		u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)];
-	};
-	unsigned int position;
-	spinlock_t batch_lock;
-};
-
-/*
- * Get a random word for internal kernel use only. The quality of the random
- * number is good as /dev/urandom, but there is no backtrack protection, with
- * the goal of being quite fast and not depleting entropy. In order to ensure
- * that the randomness provided by this function is okay, the function
- * wait_for_random_bytes() should be called and return 0 at least once at any
- * point prior.
- */
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = {
-	.batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock),
-};
-
-u64 get_random_u64(void)
-{
-	u64 ret;
-	unsigned long flags;
-	struct batched_entropy *batch;
-	static void *previous;
-
-	warn_unseeded_randomness(&previous);
-
-	batch = raw_cpu_ptr(&batched_entropy_u64);
-	spin_lock_irqsave(&batch->batch_lock, flags);
-	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
-		extract_crng((u8 *)batch->entropy_u64);
-		batch->position = 0;
-	}
-	ret = batch->entropy_u64[batch->position++];
-	spin_unlock_irqrestore(&batch->batch_lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL(get_random_u64);
-
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = {
-	.batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock),
-};
-u32 get_random_u32(void)
-{
-	u32 ret;
-	unsigned long flags;
-	struct batched_entropy *batch;
-	static void *previous;
-
-	warn_unseeded_randomness(&previous);
-
-	batch = raw_cpu_ptr(&batched_entropy_u32);
-	spin_lock_irqsave(&batch->batch_lock, flags);
-	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
-		extract_crng((u8 *)batch->entropy_u32);
-		batch->position = 0;
-	}
-	ret = batch->entropy_u32[batch->position++];
-	spin_unlock_irqrestore(&batch->batch_lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL(get_random_u32);
-
-/* It's important to invalidate all potential batched entropy that might
- * be stored before the crng is initialized, which we can do lazily by
- * simply resetting the counter to zero so that it's re-extracted on the
- * next usage. */
-static void invalidate_batched_entropy(void)
-{
-	int cpu;
-	unsigned long flags;
-
-	for_each_possible_cpu(cpu) {
-		struct batched_entropy *batched_entropy;
-
-		batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu);
-		spin_lock_irqsave(&batched_entropy->batch_lock, flags);
-		batched_entropy->position = 0;
-		spin_unlock(&batched_entropy->batch_lock);
-
-		batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu);
-		spin_lock(&batched_entropy->batch_lock);
-		batched_entropy->position = 0;
-		spin_unlock_irqrestore(&batched_entropy->batch_lock, flags);
-	}
-}
-
-/**
- * randomize_page - Generate a random, page aligned address
- * @start:	The smallest acceptable address the caller will take.
- * @range:	The size of the area, starting at @start, within which the
- *		random address must fall.
- *
- * If @start + @range would overflow, @range is capped.
- *
- * NOTE: Historical use of randomize_range, which this replaces, presumed that
- * @start was already page aligned.  We now align it regardless.
- *
- * Return: A page aligned address within [start, start + range).  On error,
- * @start is returned.
- */
-unsigned long randomize_page(unsigned long start, unsigned long range)
-{
-	if (!PAGE_ALIGNED(start)) {
-		range -= PAGE_ALIGN(start) - start;
-		start = PAGE_ALIGN(start);
-	}
-
-	if (start > ULONG_MAX - range)
-		range = ULONG_MAX - start;
-
-	range >>= PAGE_SHIFT;
-
-	if (range == 0)
-		return start;
-
-	return start + (get_random_long() % range << PAGE_SHIFT);
-}
-
-/* Interface for in-kernel drivers of true hardware RNGs.
- * Those devices may produce endless random bits and will be throttled
- * when our pool is full.
- */
-void add_hwgenerator_randomness(const char *buffer, size_t count,
-				size_t entropy)
-{
-	if (unlikely(crng_init == 0)) {
-		size_t ret = crng_fast_load(buffer, count);
-		mix_pool_bytes(buffer, ret);
-		count -= ret;
-		buffer += ret;
-		if (!count || crng_init == 0)
-			return;
-	}
-
-	/* Throttle writing if we're above the trickle threshold.
-	 * We'll be woken up again once below random_write_wakeup_thresh,
-	 * when the calling thread is about to terminate, or once
-	 * CRNG_RESEED_INTERVAL has lapsed.
-	 */
-	wait_event_interruptible_timeout(random_write_wait,
-			!system_wq || kthread_should_stop() ||
-			POOL_ENTROPY_BITS() <= random_write_wakeup_bits,
-			CRNG_RESEED_INTERVAL);
-	mix_pool_bytes(buffer, count);
-	credit_entropy_bits(entropy);
-}
-EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
-
-/* Handle random seed passed by bootloader.
- * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise
- * it would be regarded as device data.
- * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER.
- */
-void add_bootloader_randomness(const void *buf, unsigned int size)
-{
-	if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER))
-		add_hwgenerator_randomness(buf, size, size * 8);
-	else
-		add_device_randomness(buf, size);
-}
-EXPORT_SYMBOL_GPL(add_bootloader_randomness);
+#endif
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index b009e7479b70..783d65fc71f0 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -274,14 +274,6 @@ static void tpm_dev_release(struct device *dev)
 	kfree(chip);
 }
 
-static void tpm_devs_release(struct device *dev)
-{
-	struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs);
-
-	/* release the master device reference */
-	put_device(&chip->dev);
-}
-
 /**
  * tpm_class_shutdown() - prepare the TPM device for loss of power.
  * @dev: device to which the chip is associated.
@@ -344,7 +336,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
 	chip->dev_num = rc;
 
 	device_initialize(&chip->dev);
-	device_initialize(&chip->devs);
 
 	chip->dev.class = tpm_class;
 	chip->dev.class->shutdown_pre = tpm_class_shutdown;
@@ -352,39 +343,20 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
 	chip->dev.parent = pdev;
 	chip->dev.groups = chip->groups;
 
-	chip->devs.parent = pdev;
-	chip->devs.class = tpmrm_class;
-	chip->devs.release = tpm_devs_release;
-	/* get extra reference on main device to hold on
-	 * behalf of devs.  This holds the chip structure
-	 * while cdevs is in use.  The corresponding put
-	 * is in the tpm_devs_release (TPM2 only)
-	 */
-	if (chip->flags & TPM_CHIP_FLAG_TPM2)
-		get_device(&chip->dev);
-
 	if (chip->dev_num == 0)
 		chip->dev.devt = MKDEV(MISC_MAJOR, TPM_MINOR);
 	else
 		chip->dev.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num);
 
-	chip->devs.devt =
-		MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES);
-
 	rc = dev_set_name(&chip->dev, "tpm%d", chip->dev_num);
 	if (rc)
 		goto out;
-	rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num);
-	if (rc)
-		goto out;
 
 	if (!pdev)
 		chip->flags |= TPM_CHIP_FLAG_VIRTUAL;
 
 	cdev_init(&chip->cdev, &tpm_fops);
-	cdev_init(&chip->cdevs, &tpmrm_fops);
 	chip->cdev.owner = THIS_MODULE;
-	chip->cdevs.owner = THIS_MODULE;
 
 	rc = tpm2_init_space(&chip->work_space, TPM2_SPACE_BUFFER_SIZE);
 	if (rc) {
@@ -396,7 +368,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
 	return chip;
 
 out:
-	put_device(&chip->devs);
 	put_device(&chip->dev);
 	return ERR_PTR(rc);
 }
@@ -445,14 +416,9 @@ static int tpm_add_char_device(struct tpm_chip *chip)
 	}
 
 	if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip)) {
-		rc = cdev_device_add(&chip->cdevs, &chip->devs);
-		if (rc) {
-			dev_err(&chip->devs,
-				"unable to cdev_device_add() %s, major %d, minor %d, err=%d\n",
-				dev_name(&chip->devs), MAJOR(chip->devs.devt),
-				MINOR(chip->devs.devt), rc);
-			return rc;
-		}
+		rc = tpm_devs_add(chip);
+		if (rc)
+			goto err_del_cdev;
 	}
 
 	/* Make the chip available. */
@@ -460,6 +426,10 @@ static int tpm_add_char_device(struct tpm_chip *chip)
 	idr_replace(&dev_nums_idr, chip, chip->dev_num);
 	mutex_unlock(&idr_lock);
 
+	return 0;
+
+err_del_cdev:
+	cdev_device_del(&chip->cdev, &chip->dev);
 	return rc;
 }
 
@@ -654,7 +624,7 @@ void tpm_chip_unregister(struct tpm_chip *chip)
 		hwrng_unregister(&chip->hwrng);
 	tpm_bios_log_teardown(chip);
 	if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip))
-		cdev_device_del(&chip->cdevs, &chip->devs);
+		tpm_devs_remove(chip);
 	tpm_del_char_device(chip);
 }
 EXPORT_SYMBOL_GPL(tpm_chip_unregister);
diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c
index c08cbb306636..dc4c0a0a5129 100644
--- a/drivers/char/tpm/tpm-dev-common.c
+++ b/drivers/char/tpm/tpm-dev-common.c
@@ -69,7 +69,13 @@ static void tpm_dev_async_work(struct work_struct *work)
 	ret = tpm_dev_transmit(priv->chip, priv->space, priv->data_buffer,
 			       sizeof(priv->data_buffer));
 	tpm_put_ops(priv->chip);
-	if (ret > 0) {
+
+	/*
+	 * If ret is > 0 then tpm_dev_transmit returned the size of the
+	 * response. If ret is < 0 then tpm_dev_transmit failed and
+	 * returned an error code.
+	 */
+	if (ret != 0) {
 		priv->response_length = ret;
 		mod_timer(&priv->user_read_timer, jiffies + (120 * HZ));
 	}
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 283f78211c3a..2163c6ee0d36 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -234,6 +234,8 @@ int tpm2_prepare_space(struct tpm_chip *chip, struct tpm_space *space, u8 *cmd,
 		       size_t cmdsiz);
 int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space, void *buf,
 		      size_t *bufsiz);
+int tpm_devs_add(struct tpm_chip *chip);
+void tpm_devs_remove(struct tpm_chip *chip);
 
 void tpm_bios_log_setup(struct tpm_chip *chip);
 void tpm_bios_log_teardown(struct tpm_chip *chip);
diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c
index 97e916856cf3..ffb35f0154c1 100644
--- a/drivers/char/tpm/tpm2-space.c
+++ b/drivers/char/tpm/tpm2-space.c
@@ -58,12 +58,12 @@ int tpm2_init_space(struct tpm_space *space, unsigned int buf_size)
 
 void tpm2_del_space(struct tpm_chip *chip, struct tpm_space *space)
 {
-	mutex_lock(&chip->tpm_mutex);
-	if (!tpm_chip_start(chip)) {
+
+	if (tpm_try_get_ops(chip) == 0) {
 		tpm2_flush_sessions(chip, space);
-		tpm_chip_stop(chip);
+		tpm_put_ops(chip);
 	}
-	mutex_unlock(&chip->tpm_mutex);
+
 	kfree(space->context_buf);
 	kfree(space->session_buf);
 }
@@ -574,3 +574,68 @@ out:
 	dev_err(&chip->dev, "%s: error %d\n", __func__, rc);
 	return rc;
 }
+
+/*
+ * Put the reference to the main device.
+ */
+static void tpm_devs_release(struct device *dev)
+{
+	struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs);
+
+	/* release the master device reference */
+	put_device(&chip->dev);
+}
+
+/*
+ * Remove the device file for exposed TPM spaces and release the device
+ * reference. This may also release the reference to the master device.
+ */
+void tpm_devs_remove(struct tpm_chip *chip)
+{
+	cdev_device_del(&chip->cdevs, &chip->devs);
+	put_device(&chip->devs);
+}
+
+/*
+ * Add a device file to expose TPM spaces. Also take a reference to the
+ * main device.
+ */
+int tpm_devs_add(struct tpm_chip *chip)
+{
+	int rc;
+
+	device_initialize(&chip->devs);
+	chip->devs.parent = chip->dev.parent;
+	chip->devs.class = tpmrm_class;
+
+	/*
+	 * Get extra reference on main device to hold on behalf of devs.
+	 * This holds the chip structure while cdevs is in use. The
+	 * corresponding put is in the tpm_devs_release.
+	 */
+	get_device(&chip->dev);
+	chip->devs.release = tpm_devs_release;
+	chip->devs.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES);
+	cdev_init(&chip->cdevs, &tpmrm_fops);
+	chip->cdevs.owner = THIS_MODULE;
+
+	rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num);
+	if (rc)
+		goto err_put_devs;
+
+	rc = cdev_device_add(&chip->cdevs, &chip->devs);
+	if (rc) {
+		dev_err(&chip->devs,
+			"unable to cdev_device_add() %s, major %d, minor %d, err=%d\n",
+			dev_name(&chip->devs), MAJOR(chip->devs.devt),
+			MINOR(chip->devs.devt), rc);
+		goto err_put_devs;
+	}
+
+	return 0;
+
+err_put_devs:
+	put_device(&chip->devs);
+
+	return rc;
+}
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index da5b30771418..f53e0cf1ec7e 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -126,16 +126,16 @@ static void vtpm_cancel(struct tpm_chip *chip)
 	notify_remote_via_evtchn(priv->evtchn);
 }
 
-static unsigned int shr_data_offset(struct vtpm_shared_page *shr)
+static size_t shr_data_offset(struct vtpm_shared_page *shr)
 {
-	return sizeof(*shr) + sizeof(u32) * shr->nr_extra_pages;
+	return struct_size(shr, extra_pages, shr->nr_extra_pages);
 }
 
 static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
 {
 	struct tpm_private *priv = dev_get_drvdata(&chip->dev);
 	struct vtpm_shared_page *shr = priv->shr;
-	unsigned int offset = shr_data_offset(shr);
+	size_t offset = shr_data_offset(shr);
 
 	u32 ordinal;
 	unsigned long duration;
@@ -177,7 +177,7 @@ static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 {
 	struct tpm_private *priv = dev_get_drvdata(&chip->dev);
 	struct vtpm_shared_page *shr = priv->shr;
-	unsigned int offset = shr_data_offset(shr);
+	size_t offset = shr_data_offset(shr);
 	size_t length = shr->length;
 
 	if (shr->state == VTPM_STATE_IDLE)
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 2359889a35a0..e3c430539a17 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1957,6 +1957,13 @@ static void virtcons_remove(struct virtio_device *vdev)
 	list_del(&portdev->list);
 	spin_unlock_irq(&pdrvdata_lock);
 
+	/* Device is going away, exit any polling for buffers */
+	virtio_break_device(vdev);
+	if (use_multiport(portdev))
+		flush_work(&portdev->control_work);
+	else
+		flush_work(&portdev->config_work);
+
 	/* Disable interrupts for vqs */
 	virtio_reset_device(vdev);
 	/* Finish up work that's lined up */
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index ad4256d54361..d4d67fbae869 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -231,6 +231,8 @@ config COMMON_CLK_GEMINI
 
 config COMMON_CLK_LAN966X
 	bool "Generic Clock Controller driver for LAN966X SoC"
+	depends on HAS_IOMEM
+	depends on OF
 	help
 	  This driver provides support for Generic Clock Controller(GCK) on
 	  LAN966X SoC. GCK generates and supplies clock to various peripherals
diff --git a/drivers/clk/qcom/dispcc-sc7180.c b/drivers/clk/qcom/dispcc-sc7180.c
index 538e4963c915..5d2ae297e741 100644
--- a/drivers/clk/qcom/dispcc-sc7180.c
+++ b/drivers/clk/qcom/dispcc-sc7180.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2019, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2019, 2022, The Linux Foundation. All rights reserved.
  */
 
 #include <linux/clk-provider.h>
@@ -625,6 +625,9 @@ static struct clk_branch disp_cc_mdss_vsync_clk = {
 
 static struct gdsc mdss_gdsc = {
 	.gdscr = 0x3000,
+	.en_rest_wait_val = 0x2,
+	.en_few_wait_val = 0x2,
+	.clk_dis_wait_val = 0xf,
 	.pd = {
 		.name = "mdss_gdsc",
 	},
diff --git a/drivers/clk/qcom/dispcc-sc7280.c b/drivers/clk/qcom/dispcc-sc7280.c
index 4ef4ae231794..ad596d567f6a 100644
--- a/drivers/clk/qcom/dispcc-sc7280.c
+++ b/drivers/clk/qcom/dispcc-sc7280.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021-2022, The Linux Foundation. All rights reserved.
  */
 
 #include <linux/clk-provider.h>
@@ -787,6 +787,9 @@ static struct clk_branch disp_cc_sleep_clk = {
 
 static struct gdsc disp_cc_mdss_core_gdsc = {
 	.gdscr = 0x1004,
+	.en_rest_wait_val = 0x2,
+	.en_few_wait_val = 0x2,
+	.clk_dis_wait_val = 0xf,
 	.pd = {
 		.name = "disp_cc_mdss_core_gdsc",
 	},
diff --git a/drivers/clk/qcom/dispcc-sm8250.c b/drivers/clk/qcom/dispcc-sm8250.c
index 566fdfa0a15b..db9379634fb2 100644
--- a/drivers/clk/qcom/dispcc-sm8250.c
+++ b/drivers/clk/qcom/dispcc-sm8250.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2018-2020, 2022, The Linux Foundation. All rights reserved.
  */
 
 #include <linux/clk-provider.h>
@@ -1126,6 +1126,9 @@ static struct clk_branch disp_cc_mdss_vsync_clk = {
 
 static struct gdsc mdss_gdsc = {
 	.gdscr = 0x3000,
+	.en_rest_wait_val = 0x2,
+	.en_few_wait_val = 0x2,
+	.clk_dis_wait_val = 0xf,
 	.pd = {
 		.name = "mdss_gdsc",
 	},
diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c
index 7e1dd8ccfa38..44520efc6c72 100644
--- a/drivers/clk/qcom/gdsc.c
+++ b/drivers/clk/qcom/gdsc.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2015, 2017-2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2015, 2017-2018, 2022, The Linux Foundation. All rights reserved.
  */
 
 #include <linux/bitops.h>
@@ -35,9 +35,14 @@
 #define CFG_GDSCR_OFFSET		0x4
 
 /* Wait 2^n CXO cycles between all states. Here, n=2 (4 cycles). */
-#define EN_REST_WAIT_VAL	(0x2 << 20)
-#define EN_FEW_WAIT_VAL		(0x8 << 16)
-#define CLK_DIS_WAIT_VAL	(0x2 << 12)
+#define EN_REST_WAIT_VAL	0x2
+#define EN_FEW_WAIT_VAL		0x8
+#define CLK_DIS_WAIT_VAL	0x2
+
+/* Transition delay shifts */
+#define EN_REST_WAIT_SHIFT	20
+#define EN_FEW_WAIT_SHIFT	16
+#define CLK_DIS_WAIT_SHIFT	12
 
 #define RETAIN_MEM		BIT(14)
 #define RETAIN_PERIPH		BIT(13)
@@ -380,7 +385,18 @@ static int gdsc_init(struct gdsc *sc)
 	 */
 	mask = HW_CONTROL_MASK | SW_OVERRIDE_MASK |
 	       EN_REST_WAIT_MASK | EN_FEW_WAIT_MASK | CLK_DIS_WAIT_MASK;
-	val = EN_REST_WAIT_VAL | EN_FEW_WAIT_VAL | CLK_DIS_WAIT_VAL;
+
+	if (!sc->en_rest_wait_val)
+		sc->en_rest_wait_val = EN_REST_WAIT_VAL;
+	if (!sc->en_few_wait_val)
+		sc->en_few_wait_val = EN_FEW_WAIT_VAL;
+	if (!sc->clk_dis_wait_val)
+		sc->clk_dis_wait_val = CLK_DIS_WAIT_VAL;
+
+	val = sc->en_rest_wait_val << EN_REST_WAIT_SHIFT |
+		sc->en_few_wait_val << EN_FEW_WAIT_SHIFT |
+		sc->clk_dis_wait_val << CLK_DIS_WAIT_SHIFT;
+
 	ret = regmap_update_bits(sc->regmap, sc->gdscr, mask, val);
 	if (ret)
 		return ret;
diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h
index d7cc4c21a9d4..ad313d7210bd 100644
--- a/drivers/clk/qcom/gdsc.h
+++ b/drivers/clk/qcom/gdsc.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2015, 2017-2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2015, 2017-2018, 2022, The Linux Foundation. All rights reserved.
  */
 
 #ifndef __QCOM_GDSC_H__
@@ -22,6 +22,9 @@ struct reset_controller_dev;
  * @cxcs: offsets of branch registers to toggle mem/periph bits in
  * @cxc_count: number of @cxcs
  * @pwrsts: Possible powerdomain power states
+ * @en_rest_wait_val: transition delay value for receiving enr ack signal
+ * @en_few_wait_val: transition delay value for receiving enf ack signal
+ * @clk_dis_wait_val: transition delay value for halting clock
  * @resets: ids of resets associated with this gdsc
  * @reset_count: number of @resets
  * @rcdev: reset controller
@@ -36,6 +39,9 @@ struct gdsc {
 	unsigned int			clamp_io_ctrl;
 	unsigned int			*cxcs;
 	unsigned int			cxc_count;
+	unsigned int			en_rest_wait_val;
+	unsigned int			en_few_wait_val;
+	unsigned int			clk_dis_wait_val;
 	const u8			pwrsts;
 /* Powerdomain allowable state bitfields */
 #define PWRSTS_OFF		BIT(0)
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index cfb8ea0df3b1..1ea556e75494 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -713,7 +713,6 @@ config INGENIC_OST
 config MICROCHIP_PIT64B
 	bool "Microchip PIT64B support"
 	depends on OF || COMPILE_TEST
-	select CLKSRC_MMIO
 	select TIMER_OF
 	help
 	  This option enables Microchip PIT64B timer for Atmel
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index eb596ff9e7bb..279ddff81ab4 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -229,8 +229,10 @@ static int __init parse_pmtmr(char *arg)
 	int ret;
 
 	ret = kstrtouint(arg, 16, &base);
-	if (ret)
-		return ret;
+	if (ret) {
+		pr_warn("PMTMR: invalid 'pmtmr=' value: '%s'\n", arg);
+		return 1;
+	}
 
 	pr_info("PMTMR IOPort override: 0x%04x -> 0x%04x\n", pmtmr_ioport,
 		base);
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 1ecd52f903b8..9ab8221ee3c6 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -880,10 +880,19 @@ static void __arch_timer_setup(unsigned type,
 	clockevents_config_and_register(clk, arch_timer_rate, 0xf, max_delta);
 }
 
-static void arch_timer_evtstrm_enable(int divider)
+static void arch_timer_evtstrm_enable(unsigned int divider)
 {
 	u32 cntkctl = arch_timer_get_cntkctl();
 
+#ifdef CONFIG_ARM64
+	/* ECV is likely to require a large divider. Use the EVNTIS flag. */
+	if (cpus_have_const_cap(ARM64_HAS_ECV) && divider > 15) {
+		cntkctl |= ARCH_TIMER_EVT_INTERVAL_SCALE;
+		divider -= 8;
+	}
+#endif
+
+	divider = min(divider, 15U);
 	cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK;
 	/* Set the divider and enable virtual event stream */
 	cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
@@ -912,7 +921,7 @@ static void arch_timer_configure_evtstream(void)
 		lsb++;
 
 	/* enable event stream */
-	arch_timer_evtstrm_enable(max(0, min(lsb, 15)));
+	arch_timer_evtstrm_enable(max(0, lsb));
 }
 
 static void arch_counter_set_user_access(void)
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index 6db3d5511b0f..f29c812b70c9 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -60,27 +60,18 @@
 #define MCT_CLKEVENTS_RATING		350
 #endif
 
+/* There are four Global timers starting with 0 offset */
+#define MCT_G0_IRQ	0
+/* Local timers count starts after global timer count */
+#define MCT_L0_IRQ	4
+/* Max number of IRQ as per DT binding document */
+#define MCT_NR_IRQS	20
+
 enum {
 	MCT_INT_SPI,
 	MCT_INT_PPI
 };
 
-enum {
-	MCT_G0_IRQ,
-	MCT_G1_IRQ,
-	MCT_G2_IRQ,
-	MCT_G3_IRQ,
-	MCT_L0_IRQ,
-	MCT_L1_IRQ,
-	MCT_L2_IRQ,
-	MCT_L3_IRQ,
-	MCT_L4_IRQ,
-	MCT_L5_IRQ,
-	MCT_L6_IRQ,
-	MCT_L7_IRQ,
-	MCT_NR_IRQS,
-};
-
 static void __iomem *reg_base;
 static unsigned long clk_rate;
 static unsigned int mct_int_type;
@@ -89,7 +80,11 @@ static int mct_irqs[MCT_NR_IRQS];
 struct mct_clock_event_device {
 	struct clock_event_device evt;
 	unsigned long base;
-	char name[10];
+	/**
+	 *  The length of the name must be adjusted if number of
+	 *  local timer interrupts grow over two digits
+	 */
+	char name[11];
 };
 
 static void exynos4_mct_write(unsigned int value, unsigned long offset)
@@ -541,6 +536,11 @@ static int __init exynos4_timer_interrupts(struct device_node *np,
 	 * irqs are specified.
 	 */
 	nr_irqs = of_irq_count(np);
+	if (nr_irqs > ARRAY_SIZE(mct_irqs)) {
+		pr_err("exynos-mct: too many (%d) interrupts configured in DT\n",
+			nr_irqs);
+		nr_irqs = ARRAY_SIZE(mct_irqs);
+	}
 	for (i = MCT_L0_IRQ; i < nr_irqs; i++)
 		mct_irqs[i] = irq_of_parse_and_map(np, i);
 
@@ -553,11 +553,14 @@ static int __init exynos4_timer_interrupts(struct device_node *np,
 		     mct_irqs[MCT_L0_IRQ], err);
 	} else {
 		for_each_possible_cpu(cpu) {
-			int mct_irq = mct_irqs[MCT_L0_IRQ + cpu];
+			int mct_irq;
 			struct mct_clock_event_device *pcpu_mevt =
 				per_cpu_ptr(&percpu_mct_tick, cpu);
 
 			pcpu_mevt->evt.irq = -1;
+			if (MCT_L0_IRQ + cpu >= ARRAY_SIZE(mct_irqs))
+				break;
+			mct_irq = mct_irqs[MCT_L0_IRQ + cpu];
 
 			irq_set_status_flags(mct_irq, IRQ_NOAUTOEN);
 			if (request_irq(mct_irq,
diff --git a/drivers/clocksource/timer-imx-sysctr.c b/drivers/clocksource/timer-imx-sysctr.c
index 55a8e198d2a1..523e37662a6e 100644
--- a/drivers/clocksource/timer-imx-sysctr.c
+++ b/drivers/clocksource/timer-imx-sysctr.c
@@ -110,7 +110,7 @@ static struct timer_of to_sysctr = {
 	},
 	.of_irq = {
 		.handler		= sysctr_timer_interrupt,
-		.flags			= IRQF_TIMER | IRQF_IRQPOLL,
+		.flags			= IRQF_TIMER,
 	},
 	.of_clk = {
 		.name = "per",
diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c
index 2cdc077a39f5..bd64a8a8427f 100644
--- a/drivers/clocksource/timer-imx-tpm.c
+++ b/drivers/clocksource/timer-imx-tpm.c
@@ -32,8 +32,8 @@
 #define TPM_C0SC_CHF_MASK		(0x1 << 7)
 #define TPM_C0V				0x24
 
-static int counter_width;
-static void __iomem *timer_base;
+static int counter_width __ro_after_init;
+static void __iomem *timer_base __ro_after_init;
 
 static inline void tpm_timer_disable(void)
 {
@@ -73,12 +73,12 @@ static unsigned long tpm_read_current_timer(void)
 {
 	return tpm_read_counter();
 }
-#endif
 
 static u64 notrace tpm_read_sched_clock(void)
 {
 	return tpm_read_counter();
 }
+#endif
 
 static int tpm_set_next_event(unsigned long delta,
 				struct clock_event_device *evt)
@@ -127,9 +127,9 @@ static irqreturn_t tpm_timer_interrupt(int irq, void *dev_id)
 static struct timer_of to_tpm = {
 	.flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK,
 	.clkevt = {
-		.name			= "i.MX7ULP TPM Timer",
+		.name			= "i.MX TPM Timer",
 		.rating			= 200,
-		.features		= CLOCK_EVT_FEAT_ONESHOT,
+		.features		= CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ,
 		.set_state_shutdown	= tpm_set_state_shutdown,
 		.set_state_oneshot	= tpm_set_state_oneshot,
 		.set_next_event		= tpm_set_next_event,
@@ -137,7 +137,7 @@ static struct timer_of to_tpm = {
 	},
 	.of_irq = {
 		.handler		= tpm_timer_interrupt,
-		.flags			= IRQF_TIMER | IRQF_IRQPOLL,
+		.flags			= IRQF_TIMER,
 	},
 	.of_clk = {
 		.name = "per",
@@ -150,10 +150,10 @@ static int __init tpm_clocksource_init(void)
 	tpm_delay_timer.read_current_timer = &tpm_read_current_timer;
 	tpm_delay_timer.freq = timer_of_rate(&to_tpm) >> 3;
 	register_current_timer_delay(&tpm_delay_timer);
-#endif
 
 	sched_clock_register(tpm_read_sched_clock, counter_width,
 			     timer_of_rate(&to_tpm) >> 3);
+#endif
 
 	return clocksource_mmio_init(timer_base + TPM_CNT,
 				     "imx-tpm",
diff --git a/drivers/clocksource/timer-microchip-pit64b.c b/drivers/clocksource/timer-microchip-pit64b.c
index cfa4ec7ef396..abce83d2f00b 100644
--- a/drivers/clocksource/timer-microchip-pit64b.c
+++ b/drivers/clocksource/timer-microchip-pit64b.c
@@ -42,8 +42,7 @@
 #define MCHP_PIT64B_LSBMASK		GENMASK_ULL(31, 0)
 #define MCHP_PIT64B_PRES_TO_MODE(p)	(MCHP_PIT64B_MR_PRES & ((p) << 8))
 #define MCHP_PIT64B_MODE_TO_PRES(m)	((MCHP_PIT64B_MR_PRES & (m)) >> 8)
-#define MCHP_PIT64B_DEF_CS_FREQ		5000000UL	/* 5 MHz */
-#define MCHP_PIT64B_DEF_CE_FREQ		32768		/* 32 KHz */
+#define MCHP_PIT64B_DEF_FREQ		5000000UL	/* 5 MHz */
 
 #define MCHP_PIT64B_NAME		"pit64b"
 
@@ -165,7 +164,7 @@ static u64 mchp_pit64b_clksrc_read(struct clocksource *cs)
 	return mchp_pit64b_cnt_read(mchp_pit64b_cs_base);
 }
 
-static u64 mchp_pit64b_sched_read_clk(void)
+static u64 notrace mchp_pit64b_sched_read_clk(void)
 {
 	return mchp_pit64b_cnt_read(mchp_pit64b_cs_base);
 }
@@ -418,7 +417,6 @@ static int __init mchp_pit64b_init_clkevt(struct mchp_pit64b_timer *timer,
 static int __init mchp_pit64b_dt_init_timer(struct device_node *node,
 					    bool clkevt)
 {
-	u32 freq = clkevt ? MCHP_PIT64B_DEF_CE_FREQ : MCHP_PIT64B_DEF_CS_FREQ;
 	struct mchp_pit64b_timer timer;
 	unsigned long clk_rate;
 	u32 irq = 0;
@@ -446,7 +444,7 @@ static int __init mchp_pit64b_dt_init_timer(struct device_node *node,
 	}
 
 	/* Initialize mode (prescaler + SGCK bit). To be used at runtime. */
-	ret = mchp_pit64b_init_mode(&timer, freq);
+	ret = mchp_pit64b_init_mode(&timer, MCHP_PIT64B_DEF_FREQ);
 	if (ret)
 		goto irq_unmap;
 
diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c
index 529cc6a51cdb..c3f54d9912be 100644
--- a/drivers/clocksource/timer-of.c
+++ b/drivers/clocksource/timer-of.c
@@ -157,9 +157,9 @@ static __init int timer_of_base_init(struct device_node *np,
 	of_base->base = of_base->name ?
 		of_io_request_and_map(np, of_base->index, of_base->name) :
 		of_iomap(np, of_base->index);
-	if (IS_ERR(of_base->base)) {
-		pr_err("Failed to iomap (%s)\n", of_base->name);
-		return PTR_ERR(of_base->base);
+	if (IS_ERR_OR_NULL(of_base->base)) {
+		pr_err("Failed to iomap (%s:%s)\n", np->name, of_base->name);
+		return of_base->base ? PTR_ERR(of_base->base) : -ENOMEM;
 	}
 
 	return 0;
diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c
index 1fccb457fcc5..2737407ff069 100644
--- a/drivers/clocksource/timer-ti-dm-systimer.c
+++ b/drivers/clocksource/timer-ti-dm-systimer.c
@@ -694,9 +694,9 @@ static int __init dmtimer_percpu_quirk_init(struct device_node *np, u32 pa)
 		return 0;
 	}
 
-	if (pa == 0x48034000)		/* dra7 dmtimer3 */
+	if (pa == 0x4882c000)           /* dra7 dmtimer15 */
 		return dmtimer_percpu_timer_init(np, 0);
-	else if (pa == 0x48036000)	/* dra7 dmtimer4 */
+	else if (pa == 0x4882e000)      /* dra7 dmtimer16 */
 		return dmtimer_percpu_timer_init(np, 1);
 
 	return 0;
diff --git a/drivers/counter/counter-sysfs.c b/drivers/counter/counter-sysfs.c
index 7cc4d1d523ea..04eac41dad33 100644
--- a/drivers/counter/counter-sysfs.c
+++ b/drivers/counter/counter-sysfs.c
@@ -19,6 +19,11 @@
 
 #include "counter-sysfs.h"
 
+static inline struct counter_device *counter_from_dev(struct device *dev)
+{
+	return container_of(dev, struct counter_device, dev);
+}
+
 /**
  * struct counter_attribute - Counter sysfs attribute
  * @dev_attr:	device attribute for sysfs
@@ -90,7 +95,7 @@ static ssize_t counter_comp_u8_show(struct device *dev,
 				    struct device_attribute *attr, char *buf)
 {
 	const struct counter_attribute *const a = to_counter_attribute(attr);
-	struct counter_device *const counter = dev_get_drvdata(dev);
+	struct counter_device *const counter = counter_from_dev(dev);
 	int err;
 	u8 data = 0;
 
@@ -122,7 +127,7 @@ static ssize_t counter_comp_u8_store(struct device *dev,
 				     const char *buf, size_t len)
 {
 	const struct counter_attribute *const a = to_counter_attribute(attr);
-	struct counter_device *const counter = dev_get_drvdata(dev);
+	struct counter_device *const counter = counter_from_dev(dev);
 	int err;
 	bool bool_data = 0;
 	u8 data = 0;
@@ -158,7 +163,7 @@ static ssize_t counter_comp_u32_show(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
 	const struct counter_attribute *const a = to_counter_attribute(attr);
-	struct counter_device *const counter = dev_get_drvdata(dev);
+	struct counter_device *const counter = counter_from_dev(dev);
 	const struct counter_available *const avail = a->comp.priv;
 	int err;
 	u32 data = 0;
@@ -221,7 +226,7 @@ static ssize_t counter_comp_u32_store(struct device *dev,
 				      const char *buf, size_t len)
 {
 	const struct counter_attribute *const a = to_counter_attribute(attr);
-	struct counter_device *const counter = dev_get_drvdata(dev);
+	struct counter_device *const counter = counter_from_dev(dev);
 	struct counter_count *const count = a->parent;
 	struct counter_synapse *const synapse = a->comp.priv;
 	const struct counter_available *const avail = a->comp.priv;
@@ -281,7 +286,7 @@ static ssize_t counter_comp_u64_show(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
 	const struct counter_attribute *const a = to_counter_attribute(attr);
-	struct counter_device *const counter = dev_get_drvdata(dev);
+	struct counter_device *const counter = counter_from_dev(dev);
 	int err;
 	u64 data = 0;
 
@@ -309,7 +314,7 @@ static ssize_t counter_comp_u64_store(struct device *dev,
 				      const char *buf, size_t len)
 {
 	const struct counter_attribute *const a = to_counter_attribute(attr);
-	struct counter_device *const counter = dev_get_drvdata(dev);
+	struct counter_device *const counter = counter_from_dev(dev);
 	int err;
 	u64 data = 0;
 
diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h
index 647505957d4f..35f38ae67fb1 100644
--- a/drivers/cpufreq/amd-pstate-trace.h
+++ b/drivers/cpufreq/amd-pstate-trace.h
@@ -27,6 +27,10 @@ TRACE_EVENT(amd_pstate_perf,
 	TP_PROTO(unsigned long min_perf,
 		 unsigned long target_perf,
 		 unsigned long capacity,
+		 u64 freq,
+		 u64 mperf,
+		 u64 aperf,
+		 u64 tsc,
 		 unsigned int cpu_id,
 		 bool changed,
 		 bool fast_switch
@@ -35,6 +39,10 @@ TRACE_EVENT(amd_pstate_perf,
 	TP_ARGS(min_perf,
 		target_perf,
 		capacity,
+		freq,
+		mperf,
+		aperf,
+		tsc,
 		cpu_id,
 		changed,
 		fast_switch
@@ -44,6 +52,10 @@ TRACE_EVENT(amd_pstate_perf,
 		__field(unsigned long, min_perf)
 		__field(unsigned long, target_perf)
 		__field(unsigned long, capacity)
+		__field(unsigned long long, freq)
+		__field(unsigned long long, mperf)
+		__field(unsigned long long, aperf)
+		__field(unsigned long long, tsc)
 		__field(unsigned int, cpu_id)
 		__field(bool, changed)
 		__field(bool, fast_switch)
@@ -53,15 +65,23 @@ TRACE_EVENT(amd_pstate_perf,
 		__entry->min_perf = min_perf;
 		__entry->target_perf = target_perf;
 		__entry->capacity = capacity;
+		__entry->freq = freq;
+		__entry->mperf = mperf;
+		__entry->aperf = aperf;
+		__entry->tsc = tsc;
 		__entry->cpu_id = cpu_id;
 		__entry->changed = changed;
 		__entry->fast_switch = fast_switch;
 		),
 
-	TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu cpu_id=%u changed=%s fast_switch=%s",
+	TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s",
 		  (unsigned long)__entry->min_perf,
 		  (unsigned long)__entry->target_perf,
 		  (unsigned long)__entry->capacity,
+		  (unsigned long long)__entry->freq,
+		  (unsigned long long)__entry->mperf,
+		  (unsigned long long)__entry->aperf,
+		  (unsigned long long)__entry->tsc,
 		  (unsigned int)__entry->cpu_id,
 		  (__entry->changed) ? "true" : "false",
 		  (__entry->fast_switch) ? "true" : "false"
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 9ce75ed11f8e..7be38bc6a673 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -66,6 +66,18 @@ MODULE_PARM_DESC(shared_mem,
 static struct cpufreq_driver amd_pstate_driver;
 
 /**
+ * struct  amd_aperf_mperf
+ * @aperf: actual performance frequency clock count
+ * @mperf: maximum performance frequency clock count
+ * @tsc:   time stamp counter
+ */
+struct amd_aperf_mperf {
+	u64 aperf;
+	u64 mperf;
+	u64 tsc;
+};
+
+/**
  * struct amd_cpudata - private CPU data for AMD P-State
  * @cpu: CPU number
  * @req: constraint request to apply
@@ -81,6 +93,9 @@ static struct cpufreq_driver amd_pstate_driver;
  * @min_freq: the frequency that mapped to lowest_perf
  * @nominal_freq: the frequency that mapped to nominal_perf
  * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf
+ * @cur: Difference of Aperf/Mperf/tsc count between last and current sample
+ * @prev: Last Aperf/Mperf/tsc count value read from register
+ * @freq: current cpu frequency value
  * @boost_supported: check whether the Processor or SBIOS supports boost mode
  *
  * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
@@ -102,6 +117,10 @@ struct amd_cpudata {
 	u32	nominal_freq;
 	u32	lowest_nonlinear_freq;
 
+	struct amd_aperf_mperf cur;
+	struct amd_aperf_mperf prev;
+
+	u64 freq;
 	bool	boost_supported;
 };
 
@@ -211,6 +230,39 @@ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
 					    max_perf, fast_switch);
 }
 
+static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
+{
+	u64 aperf, mperf, tsc;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	rdmsrl(MSR_IA32_APERF, aperf);
+	rdmsrl(MSR_IA32_MPERF, mperf);
+	tsc = rdtsc();
+
+	if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
+		local_irq_restore(flags);
+		return false;
+	}
+
+	local_irq_restore(flags);
+
+	cpudata->cur.aperf = aperf;
+	cpudata->cur.mperf = mperf;
+	cpudata->cur.tsc =  tsc;
+	cpudata->cur.aperf -= cpudata->prev.aperf;
+	cpudata->cur.mperf -= cpudata->prev.mperf;
+	cpudata->cur.tsc -= cpudata->prev.tsc;
+
+	cpudata->prev.aperf = aperf;
+	cpudata->prev.mperf = mperf;
+	cpudata->prev.tsc = tsc;
+
+	cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf);
+
+	return true;
+}
+
 static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
 			      u32 des_perf, u32 max_perf, bool fast_switch)
 {
@@ -226,8 +278,11 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
 	value &= ~AMD_CPPC_MAX_PERF(~0L);
 	value |= AMD_CPPC_MAX_PERF(max_perf);
 
-	trace_amd_pstate_perf(min_perf, des_perf, max_perf,
-			      cpudata->cpu, (value != prev), fast_switch);
+	if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
+		trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
+			cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc,
+				cpudata->cpu, (value != prev), fast_switch);
+	}
 
 	if (value == prev)
 		return;
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 08515f7e515f..b6bd0ff35323 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -146,7 +146,7 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy)
 
 /************************** sysfs interface ************************/
 
-static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
+static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set,
 					  const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -161,7 +161,7 @@ static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
 	return count;
 }
 
-static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
+static ssize_t up_threshold_store(struct gov_attr_set *attr_set,
 				  const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -177,7 +177,7 @@ static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
 	return count;
 }
 
-static ssize_t store_down_threshold(struct gov_attr_set *attr_set,
+static ssize_t down_threshold_store(struct gov_attr_set *attr_set,
 				    const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -195,7 +195,7 @@ static ssize_t store_down_threshold(struct gov_attr_set *attr_set,
 	return count;
 }
 
-static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
+static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set,
 				      const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -220,7 +220,7 @@ static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
 	return count;
 }
 
-static ssize_t store_freq_step(struct gov_attr_set *attr_set, const char *buf,
+static ssize_t freq_step_store(struct gov_attr_set *attr_set, const char *buf,
 			       size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 63f7c219062b..0d42cf8b88d8 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -27,7 +27,7 @@ static DEFINE_MUTEX(gov_dbs_data_mutex);
 
 /* Common sysfs tunables */
 /*
- * store_sampling_rate - update sampling rate effective immediately if needed.
+ * sampling_rate_store - update sampling rate effective immediately if needed.
  *
  * If new rate is smaller than the old, simply updating
  * dbs.sampling_rate might not be appropriate. For example, if the
@@ -41,7 +41,7 @@ static DEFINE_MUTEX(gov_dbs_data_mutex);
  * This must be called with dbs_data->mutex held, otherwise traversing
  * policy_dbs_list isn't safe.
  */
-ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
+ssize_t sampling_rate_store(struct gov_attr_set *attr_set, const char *buf,
 			    size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -80,7 +80,7 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
 
 	return count;
 }
-EXPORT_SYMBOL_GPL(store_sampling_rate);
+EXPORT_SYMBOL_GPL(sampling_rate_store);
 
 /**
  * gov_update_cpu_data - Update CPU load data.
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index bab8e6140377..a5a0bc3cc23e 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -51,7 +51,7 @@ static inline struct dbs_data *to_dbs_data(struct gov_attr_set *attr_set)
 }
 
 #define gov_show_one(_gov, file_name)					\
-static ssize_t show_##file_name						\
+static ssize_t file_name##_show						\
 (struct gov_attr_set *attr_set, char *buf)				\
 {									\
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);		\
@@ -60,7 +60,7 @@ static ssize_t show_##file_name						\
 }
 
 #define gov_show_one_common(file_name)					\
-static ssize_t show_##file_name						\
+static ssize_t file_name##_show						\
 (struct gov_attr_set *attr_set, char *buf)				\
 {									\
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);		\
@@ -68,12 +68,10 @@ static ssize_t show_##file_name						\
 }
 
 #define gov_attr_ro(_name)						\
-static struct governor_attr _name =					\
-__ATTR(_name, 0444, show_##_name, NULL)
+static struct governor_attr _name = __ATTR_RO(_name)
 
 #define gov_attr_rw(_name)						\
-static struct governor_attr _name =					\
-__ATTR(_name, 0644, show_##_name, store_##_name)
+static struct governor_attr _name = __ATTR_RW(_name)
 
 /* Common to all CPUs of a policy */
 struct policy_dbs_info {
@@ -176,7 +174,7 @@ void od_register_powersave_bias_handler(unsigned int (*f)
 		(struct cpufreq_policy *, unsigned int, unsigned int),
 		unsigned int powersave_bias);
 void od_unregister_powersave_bias_handler(void);
-ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
+ssize_t sampling_rate_store(struct gov_attr_set *attr_set, const char *buf,
 			    size_t count);
 void gov_update_cpu_data(struct dbs_data *dbs_data);
 #endif /* _CPUFREQ_GOVERNOR_H */
diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c
index a6f365b9cc1a..771770ea0ed0 100644
--- a/drivers/cpufreq/cpufreq_governor_attr_set.c
+++ b/drivers/cpufreq/cpufreq_governor_attr_set.c
@@ -8,11 +8,6 @@
 
 #include "cpufreq_governor.h"
 
-static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj)
-{
-	return container_of(kobj, struct gov_attr_set, kobj);
-}
-
 static inline struct governor_attr *to_gov_attr(struct attribute *attr)
 {
 	return container_of(attr, struct governor_attr, attr);
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6a41ea4729b8..e8fbf970ff07 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -202,7 +202,7 @@ static unsigned int od_dbs_update(struct cpufreq_policy *policy)
 /************************** sysfs interface ************************/
 static struct dbs_governor od_dbs_gov;
 
-static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf,
+static ssize_t io_is_busy_store(struct gov_attr_set *attr_set, const char *buf,
 				size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -220,7 +220,7 @@ static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf,
 	return count;
 }
 
-static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
+static ssize_t up_threshold_store(struct gov_attr_set *attr_set,
 				  const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -237,7 +237,7 @@ static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
 	return count;
 }
 
-static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
+static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set,
 					  const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -265,7 +265,7 @@ static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
 	return count;
 }
 
-static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
+static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set,
 				      const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -290,7 +290,7 @@ static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
 	return count;
 }
 
-static ssize_t store_powersave_bias(struct gov_attr_set *attr_set,
+static ssize_t powersave_bias_store(struct gov_attr_set *attr_set,
 				    const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index bc7f7e6759bd..846bb3a78788 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1692,6 +1692,37 @@ static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
 	}
 }
 
+static void intel_pstate_update_epp_defaults(struct cpudata *cpudata)
+{
+	cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
+
+	/*
+	 * If this CPU gen doesn't call for change in balance_perf
+	 * EPP return.
+	 */
+	if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE)
+		return;
+
+	/*
+	 * If powerup EPP is something other than chipset default 0x80 and
+	 * - is more performance oriented than 0x80 (default balance_perf EPP)
+	 * - But less performance oriented than performance EPP
+	 *   then use this as new balance_perf EPP.
+	 */
+	if (cpudata->epp_default < HWP_EPP_BALANCE_PERFORMANCE &&
+	    cpudata->epp_default > HWP_EPP_PERFORMANCE) {
+		epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = cpudata->epp_default;
+		return;
+	}
+
+	/*
+	 * Use hard coded value per gen to update the balance_perf
+	 * and default EPP.
+	 */
+	cpudata->epp_default = epp_values[EPP_INDEX_BALANCE_PERFORMANCE];
+	intel_pstate_set_epp(cpudata, cpudata->epp_default);
+}
+
 static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 {
 	/* First disable HWP notification interrupt till we activate again */
@@ -1705,12 +1736,7 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 	if (cpudata->epp_default >= 0)
 		return;
 
-	if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) {
-		cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
-	} else {
-		cpudata->epp_default = epp_values[EPP_INDEX_BALANCE_PERFORMANCE];
-		intel_pstate_set_epp(cpudata, cpudata->epp_default);
-	}
+	intel_pstate_update_epp_defaults(cpudata);
 }
 
 static int atom_get_min_pstate(void)
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index c538a153ee82..3e000e1a75c6 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -668,9 +668,9 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
 					  u32 nesting_level,
 					  void *context, void **return_value)
 {
-	struct acpi_device *d;
+	struct acpi_device *d = acpi_fetch_acpi_dev(obj_handle);
 
-	if (acpi_bus_get_device(obj_handle, &d))
+	if (!d)
 		return 0;
 
 	*return_value = acpi_driver_data(d);
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index 12ab4014af71..d289036beff2 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -1172,14 +1172,14 @@ static int powernowk8_init(void)
 	unsigned int i, supported_cpus = 0;
 	int ret;
 
+	if (!x86_match_cpu(powernow_k8_ids))
+		return -ENODEV;
+
 	if (boot_cpu_has(X86_FEATURE_HW_PSTATE)) {
 		__request_acpi_cpufreq();
 		return -ENODEV;
 	}
 
-	if (!x86_match_cpu(powernow_k8_ids))
-		return -ENODEV;
-
 	cpus_read_lock();
 	for_each_online_cpu(i) {
 		smp_call_function_single(i, check_supported_cpu, &ret, 1);
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c
index fcc53215bac8..3a39a7f48b77 100644
--- a/drivers/cpuidle/cpuidle-haltpoll.c
+++ b/drivers/cpuidle/cpuidle-haltpoll.c
@@ -108,11 +108,11 @@ static int __init haltpoll_init(void)
 	if (boot_option_idle_override != IDLE_NO_OVERRIDE)
 		return -ENODEV;
 
-	cpuidle_poll_state_init(drv);
-
 	if (!kvm_para_available() || !haltpoll_want())
 		return -ENODEV;
 
+	cpuidle_poll_state_init(drv);
+
 	ret = cpuidle_register_driver(drv);
 	if (ret < 0)
 		return ret;
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 4f705674f94f..7b2d138bc83e 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -808,6 +808,16 @@ config CRYPTO_DEV_ZYNQMP_AES
 	  accelerator. Select this if you want to use the ZynqMP module
 	  for AES algorithms.
 
+config CRYPTO_DEV_ZYNQMP_SHA3
+	tristate "Support for Xilinx ZynqMP SHA3 hardware accelerator"
+	depends on ZYNQMP_FIRMWARE || COMPILE_TEST
+	select CRYPTO_SHA3
+	help
+	  Xilinx ZynqMP has SHA3 engine used for secure hash calculation.
+	  This driver interfaces with SHA3 hardware engine.
+	  Select this if you want to use the ZynqMP module
+	  for SHA3 hash computation.
+
 source "drivers/crypto/chelsio/Kconfig"
 
 source "drivers/crypto/virtio/Kconfig"
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 1fe5120eb966..0a4fff23d272 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -47,7 +47,7 @@ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
 obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
 obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/
 obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/
-obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += xilinx/
+obj-y += xilinx/
 obj-y += hisilicon/
 obj-$(CONFIG_CRYPTO_DEV_AMLOGIC_GXL) += amlogic/
 obj-y += keembay/
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
index 54ae8d16e493..35e3cadccac2 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
@@ -11,6 +11,7 @@
  * You could find a link for the datasheet in Documentation/arm/sunxi.rst
  */
 
+#include <linux/bottom_half.h>
 #include <linux/crypto.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
@@ -283,7 +284,9 @@ static int sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq)
 
 	flow = rctx->flow;
 	err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm));
+	local_bh_disable();
 	crypto_finalize_skcipher_request(engine, breq, err);
+	local_bh_enable();
 	return 0;
 }
 
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
index 88194718a806..859b7522faaa 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
@@ -9,6 +9,7 @@
  *
  * You could find the datasheet in Documentation/arm/sunxi.rst
  */
+#include <linux/bottom_half.h>
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
 #include <linux/scatterlist.h>
@@ -414,6 +415,8 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
 theend:
 	kfree(buf);
 	kfree(result);
+	local_bh_disable();
 	crypto_finalize_hash_request(engine, breq, err);
+	local_bh_enable();
 	return 0;
 }
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
index 9ef1c85c4aaa..554e400d41ca 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
@@ -11,6 +11,7 @@
  * You could find a link for the datasheet in Documentation/arm/sunxi.rst
  */
 
+#include <linux/bottom_half.h>
 #include <linux/crypto.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
@@ -274,7 +275,9 @@ static int sun8i_ss_handle_cipher_request(struct crypto_engine *engine, void *ar
 	struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
 
 	err = sun8i_ss_cipher(breq);
+	local_bh_disable();
 	crypto_finalize_skcipher_request(engine, breq, err);
+	local_bh_enable();
 
 	return 0;
 }
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
index 80e89066dbd1..319fe3279a71 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
@@ -30,6 +30,8 @@
 static const struct ss_variant ss_a80_variant = {
 	.alg_cipher = { SS_ALG_AES, SS_ALG_DES, SS_ALG_3DES,
 	},
+	.alg_hash = { SS_ID_NOTSUPP, SS_ID_NOTSUPP, SS_ID_NOTSUPP, SS_ID_NOTSUPP,
+	},
 	.op_mode = { SS_OP_ECB, SS_OP_CBC,
 	},
 	.ss_clks = {
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
index 3c073eb3db03..1a71ed49d233 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
@@ -9,6 +9,7 @@
  *
  * You could find the datasheet in Documentation/arm/sunxi.rst
  */
+#include <linux/bottom_half.h>
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
 #include <linux/scatterlist.h>
@@ -442,6 +443,8 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
 theend:
 	kfree(pad);
 	kfree(result);
+	local_bh_disable();
 	crypto_finalize_hash_request(engine, breq, err);
+	local_bh_enable();
 	return 0;
 }
diff --git a/drivers/crypto/amlogic/amlogic-gxl-cipher.c b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
index c6865cbd334b..e79514fce731 100644
--- a/drivers/crypto/amlogic/amlogic-gxl-cipher.c
+++ b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
@@ -265,7 +265,9 @@ static int meson_handle_cipher_request(struct crypto_engine *engine,
 	struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
 
 	err = meson_cipher(breq);
+	local_bh_disable();
 	crypto_finalize_skcipher_request(engine, breq, err);
+	local_bh_enable();
 
 	return 0;
 }
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index fe0558403191..f72c6b3e4ad8 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -2509,6 +2509,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
 
 	/* keep only major version number */
 	switch (dd->hw_version & 0xff0) {
+	case 0x700:
 	case 0x500:
 		dd->caps.has_dualbuff = 1;
 		dd->caps.has_cfb64 = 1;
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 1b13f601fd95..d1628112dacc 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -2508,6 +2508,7 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd)
 
 	/* keep only major version number */
 	switch (dd->hw_version & 0xff0) {
+	case 0x700:
 	case 0x510:
 		dd->caps.has_dma = 1;
 		dd->caps.has_dualbuff = 1;
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index e30786ec9f2d..9fd7b8e439d2 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -1130,6 +1130,7 @@ static void atmel_tdes_get_cap(struct atmel_tdes_dev *dd)
 
 	/* keep only major version number */
 	switch (dd->hw_version & 0xf00) {
+	case 0x800:
 	case 0x700:
 		dd->caps.has_dma = 1;
 		dd->caps.has_cfb_3keys = 1;
diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
index 2e9c0d214363..9e7308e39b30 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_mbx.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/bitmap.h>
 #include <linux/workqueue.h>
 
 #include "nitrox_csr.h"
@@ -120,6 +121,7 @@ static void pf2vf_resp_handler(struct work_struct *work)
 
 void nitrox_pf2vf_mbox_handler(struct nitrox_device *ndev)
 {
+	DECLARE_BITMAP(csr, BITS_PER_TYPE(u64));
 	struct nitrox_vfdev *vfdev;
 	struct pf2vf_work *pfwork;
 	u64 value, reg_addr;
@@ -129,7 +131,8 @@ void nitrox_pf2vf_mbox_handler(struct nitrox_device *ndev)
 	/* loop for VF(0..63) */
 	reg_addr = NPS_PKT_MBOX_INT_LO;
 	value = nitrox_read_csr(ndev, reg_addr);
-	for_each_set_bit(i, (const unsigned long *)&value, BITS_PER_LONG) {
+	bitmap_from_u64(csr, value);
+	for_each_set_bit(i, csr, BITS_PER_TYPE(csr)) {
 		/* get the vfno from ring */
 		vfno = RING_TO_VFNO(i, ndev->iov.max_vf_queues);
 		vfdev = ndev->iov.vfdev + vfno;
@@ -151,7 +154,8 @@ void nitrox_pf2vf_mbox_handler(struct nitrox_device *ndev)
 	/* loop for VF(64..127) */
 	reg_addr = NPS_PKT_MBOX_INT_HI;
 	value = nitrox_read_csr(ndev, reg_addr);
-	for_each_set_bit(i, (const unsigned long *)&value, BITS_PER_LONG) {
+	bitmap_from_u64(csr, value);
+	for_each_set_bit(i, csr, BITS_PER_TYPE(csr)) {
 		/* get the vfno from ring */
 		vfno = RING_TO_VFNO(i + 64, ndev->iov.max_vf_queues);
 		vfdev = ndev->iov.vfdev + vfno;
diff --git a/drivers/crypto/cavium/nitrox/nitrox_req.h b/drivers/crypto/cavium/nitrox/nitrox_req.h
index ed174883c8e3..6bf088bcdd11 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_req.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_req.h
@@ -440,7 +440,7 @@ struct aqmq_command_s {
 /**
  * struct ctx_hdr - Book keeping data about the crypto context
  * @pool: Pool used to allocate crypto context
- * @dma: Base DMA address of the cypto context
+ * @dma: Base DMA address of the crypto context
  * @ctx_dma: Actual usable crypto context for NITROX
  */
 struct ctx_hdr {
diff --git a/drivers/crypto/cavium/zip/zip_main.c b/drivers/crypto/cavium/zip/zip_main.c
index 812b4ac9afd6..dc5b7bf7e1fd 100644
--- a/drivers/crypto/cavium/zip/zip_main.c
+++ b/drivers/crypto/cavium/zip/zip_main.c
@@ -55,6 +55,11 @@ static const struct pci_device_id zip_id_table[] = {
 	{ 0, }
 };
 
+static void zip_debugfs_init(void);
+static void zip_debugfs_exit(void);
+static int zip_register_compression_device(void);
+static void zip_unregister_compression_device(void);
+
 void zip_reg_write(u64 val, u64 __iomem *addr)
 {
 	writeq(val, addr);
@@ -235,6 +240,15 @@ static int zip_init_hw(struct zip_device *zip)
 	return 0;
 }
 
+static void zip_reset(struct zip_device *zip)
+{
+	union zip_cmd_ctl cmd_ctl;
+
+	cmd_ctl.u_reg64 = 0x0ull;
+	cmd_ctl.s.reset = 1;  /* Forces ZIP cores to do reset */
+	zip_reg_write(cmd_ctl.u_reg64, (zip->reg_base + ZIP_CMD_CTL));
+}
+
 static int zip_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct device *dev = &pdev->dev;
@@ -282,8 +296,21 @@ static int zip_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto err_release_regions;
 
+	/* Register with the Kernel Crypto Interface */
+	err = zip_register_compression_device();
+	if (err < 0) {
+		zip_err("ZIP: Kernel Crypto Registration failed\n");
+		goto err_register;
+	}
+
+	/* comp-decomp statistics are handled with debugfs interface */
+	zip_debugfs_init();
+
 	return 0;
 
+err_register:
+	zip_reset(zip);
+
 err_release_regions:
 	if (zip->reg_base)
 		iounmap(zip->reg_base);
@@ -305,16 +332,17 @@ err_free_device:
 static void zip_remove(struct pci_dev *pdev)
 {
 	struct zip_device *zip = pci_get_drvdata(pdev);
-	union zip_cmd_ctl cmd_ctl;
 	int q = 0;
 
 	if (!zip)
 		return;
 
+	zip_debugfs_exit();
+
+	zip_unregister_compression_device();
+
 	if (zip->reg_base) {
-		cmd_ctl.u_reg64 = 0x0ull;
-		cmd_ctl.s.reset = 1;  /* Forces ZIP cores to do reset */
-		zip_reg_write(cmd_ctl.u_reg64, (zip->reg_base + ZIP_CMD_CTL));
+		zip_reset(zip);
 		iounmap(zip->reg_base);
 	}
 
@@ -585,7 +613,7 @@ DEFINE_SHOW_ATTRIBUTE(zip_regs);
 /* Root directory for thunderx_zip debugfs entry */
 static struct dentry *zip_debugfs_root;
 
-static void __init zip_debugfs_init(void)
+static void zip_debugfs_init(void)
 {
 	if (!debugfs_initialized())
 		return;
@@ -604,7 +632,7 @@ static void __init zip_debugfs_init(void)
 
 }
 
-static void __exit zip_debugfs_exit(void)
+static void zip_debugfs_exit(void)
 {
 	debugfs_remove_recursive(zip_debugfs_root);
 }
@@ -615,48 +643,7 @@ static void __exit zip_debugfs_exit(void) { }
 #endif
 /* debugfs - end */
 
-static int __init zip_init_module(void)
-{
-	int ret;
-
-	zip_msg("%s\n", DRV_NAME);
-
-	ret = pci_register_driver(&zip_driver);
-	if (ret < 0) {
-		zip_err("ZIP: pci_register_driver() failed\n");
-		return ret;
-	}
-
-	/* Register with the Kernel Crypto Interface */
-	ret = zip_register_compression_device();
-	if (ret < 0) {
-		zip_err("ZIP: Kernel Crypto Registration failed\n");
-		goto err_pci_unregister;
-	}
-
-	/* comp-decomp statistics are handled with debugfs interface */
-	zip_debugfs_init();
-
-	return ret;
-
-err_pci_unregister:
-	pci_unregister_driver(&zip_driver);
-	return ret;
-}
-
-static void __exit zip_cleanup_module(void)
-{
-	zip_debugfs_exit();
-
-	/* Unregister from the kernel crypto interface */
-	zip_unregister_compression_device();
-
-	/* Unregister this driver for pci zip devices */
-	pci_unregister_driver(&zip_driver);
-}
-
-module_init(zip_init_module);
-module_exit(zip_cleanup_module);
+module_pci_driver(zip_driver);
 
 MODULE_AUTHOR("Cavium Inc");
 MODULE_DESCRIPTION("Cavium Inc ThunderX ZIP Driver");
diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c
index e6dcd8cedd53..bed331953ff9 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -69,7 +69,6 @@ static int ccp_aes_crypt(struct skcipher_request *req, bool encrypt)
 	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
 	struct scatterlist *iv_sg = NULL;
 	unsigned int iv_len = 0;
-	int ret;
 
 	if (!ctx->u.aes.key_len)
 		return -EINVAL;
@@ -104,9 +103,7 @@ static int ccp_aes_crypt(struct skcipher_request *req, bool encrypt)
 	rctx->cmd.u.aes.src_len = req->cryptlen;
 	rctx->cmd.u.aes.dst = req->dst;
 
-	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
-
-	return ret;
+	return ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
 }
 
 static int ccp_aes_encrypt(struct skcipher_request *req)
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index d718db224be4..7d4b4ad1db1f 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -632,6 +632,20 @@ static int ccp_terminate_all(struct dma_chan *dma_chan)
 	return 0;
 }
 
+static void ccp_dma_release(struct ccp_device *ccp)
+{
+	struct ccp_dma_chan *chan;
+	struct dma_chan *dma_chan;
+	unsigned int i;
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		chan = ccp->ccp_dma_chan + i;
+		dma_chan = &chan->dma_chan;
+		tasklet_kill(&chan->cleanup_tasklet);
+		list_del_rcu(&dma_chan->device_node);
+	}
+}
+
 int ccp_dmaengine_register(struct ccp_device *ccp)
 {
 	struct ccp_dma_chan *chan;
@@ -736,6 +750,7 @@ int ccp_dmaengine_register(struct ccp_device *ccp)
 	return 0;
 
 err_reg:
+	ccp_dma_release(ccp);
 	kmem_cache_destroy(ccp->dma_desc_cache);
 
 err_cache:
@@ -752,6 +767,7 @@ void ccp_dmaengine_unregister(struct ccp_device *ccp)
 		return;
 
 	dma_async_device_unregister(dma_dev);
+	ccp_dma_release(ccp);
 
 	kmem_cache_destroy(ccp->dma_desc_cache);
 	kmem_cache_destroy(ccp->dma_cmd_cache);
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 8fd774a10edc..6ab93dfd478a 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -413,7 +413,7 @@ static int __sev_platform_init_locked(int *error)
 {
 	struct psp_device *psp = psp_master;
 	struct sev_device *sev;
-	int rc, psp_ret;
+	int rc, psp_ret = -1;
 	int (*init_function)(int *error);
 
 	if (!psp || !psp->sev_data)
diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c
index a5e041d9d2cf..11e0278c8631 100644
--- a/drivers/crypto/ccree/cc_buffer_mgr.c
+++ b/drivers/crypto/ccree/cc_buffer_mgr.c
@@ -258,6 +258,13 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg,
 {
 	int ret = 0;
 
+	if (!nbytes) {
+		*mapped_nents = 0;
+		*lbytes = 0;
+		*nents = 0;
+		return 0;
+	}
+
 	*nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes);
 	if (*nents > max_sg_nents) {
 		*nents = 0;
diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c
index 78833491f534..309da6334a0a 100644
--- a/drivers/crypto/ccree/cc_cipher.c
+++ b/drivers/crypto/ccree/cc_cipher.c
@@ -257,8 +257,8 @@ static void cc_cipher_exit(struct crypto_tfm *tfm)
 		&ctx_p->user.key_dma_addr);
 
 	/* Free key buffer in context */
-	kfree_sensitive(ctx_p->user.key);
 	dev_dbg(dev, "Free key buffer in context. key=@%p\n", ctx_p->user.key);
+	kfree_sensitive(ctx_p->user.key);
 }
 
 struct tdes_keys {
diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c
index c1c2b1d86663..14d0d83d388d 100644
--- a/drivers/crypto/gemini/sl3516-ce-cipher.c
+++ b/drivers/crypto/gemini/sl3516-ce-cipher.c
@@ -23,8 +23,8 @@ static bool sl3516_ce_need_fallback(struct skcipher_request *areq)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
 	struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
 	struct sl3516_ce_dev *ce = op->ce;
-	struct scatterlist *in_sg = areq->src;
-	struct scatterlist *out_sg = areq->dst;
+	struct scatterlist *in_sg;
+	struct scatterlist *out_sg;
 	struct scatterlist *sg;
 
 	if (areq->cryptlen == 0 || areq->cryptlen % 16) {
@@ -264,7 +264,9 @@ static int sl3516_ce_handle_cipher_request(struct crypto_engine *engine, void *a
 	struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
 
 	err = sl3516_ce_cipher(breq);
+	local_bh_disable();
 	crypto_finalize_skcipher_request(engine, breq, err);
+	local_bh_enable();
 
 	return 0;
 }
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index c5b84a5ea350..453390044181 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -3840,7 +3840,7 @@ static void qm_clear_queues(struct hisi_qm *qm)
 
 	for (i = 0; i < qm->qp_num; i++) {
 		qp = &qm->qp_array[i];
-		if (qp->is_resetting)
+		if (qp->is_in_kernel && qp->is_resetting)
 			memset(qp->qdma.va, 0, qp->qdma.size);
 	}
 
@@ -4295,7 +4295,7 @@ static void qm_vf_get_qos(struct hisi_qm *qm, u32 fun_num)
 static int qm_vf_read_qos(struct hisi_qm *qm)
 {
 	int cnt = 0;
-	int ret;
+	int ret = -EINVAL;
 
 	/* reset mailbox qos val */
 	qm->mb_qos = 0;
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 6a45bd23b363..a91635c348b5 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -42,6 +42,8 @@
 #define SEC_DE_OFFSET_V3		9
 #define SEC_SCENE_OFFSET_V3	5
 #define SEC_CKEY_OFFSET_V3	13
+#define SEC_CTR_CNT_OFFSET	25
+#define SEC_CTR_CNT_ROLLOVER	2
 #define SEC_SRC_SGL_OFFSET_V3	11
 #define SEC_DST_SGL_OFFSET_V3	14
 #define SEC_CALG_OFFSET_V3	4
@@ -63,6 +65,7 @@
 #define SEC_AUTH_CIPHER		0x1
 #define SEC_MAX_MAC_LEN		64
 #define SEC_MAX_AAD_LEN		65535
+#define SEC_MAX_CCM_AAD_LEN	65279
 #define SEC_TOTAL_MAC_SZ	(SEC_MAX_MAC_LEN * QM_Q_DEPTH)
 
 #define SEC_PBUF_SZ			512
@@ -237,7 +240,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
 
 	if (unlikely(type != type_supported)) {
 		atomic64_inc(&dfx->err_bd_cnt);
-		pr_err("err bd type [%d]\n", type);
+		pr_err("err bd type [%u]\n", type);
 		return;
 	}
 
@@ -641,13 +644,15 @@ static int sec_skcipher_fbtfm_init(struct crypto_skcipher *tfm)
 	struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
 
 	c_ctx->fallback = false;
+
+	/* Currently, only XTS mode need fallback tfm when using 192bit key */
 	if (likely(strncmp(alg, "xts", SEC_XTS_NAME_SZ)))
 		return 0;
 
 	c_ctx->fbtfm = crypto_alloc_sync_skcipher(alg, 0,
 						  CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(c_ctx->fbtfm)) {
-		pr_err("failed to alloc fallback tfm!\n");
+		pr_err("failed to alloc xts mode fallback tfm!\n");
 		return PTR_ERR(c_ctx->fbtfm);
 	}
 
@@ -808,7 +813,7 @@ static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
 	}
 
 	memcpy(c_ctx->c_key, key, keylen);
-	if (c_ctx->fallback) {
+	if (c_ctx->fallback && c_ctx->fbtfm) {
 		ret = crypto_sync_skcipher_setkey(c_ctx->fbtfm, key, keylen);
 		if (ret) {
 			dev_err(dev, "failed to set fallback skcipher key!\n");
@@ -1300,6 +1305,10 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req)
 		cipher = SEC_CIPHER_DEC;
 	sec_sqe3->c_icv_key |= cpu_to_le16(cipher);
 
+	/* Set the CTR counter mode is 128bit rollover */
+	sec_sqe3->auth_mac_key = cpu_to_le32((u32)SEC_CTR_CNT_ROLLOVER <<
+					SEC_CTR_CNT_OFFSET);
+
 	if (req->use_pbuf) {
 		bd_param |= SEC_PBUF << SEC_SRC_SGL_OFFSET_V3;
 		bd_param |= SEC_PBUF << SEC_DST_SGL_OFFSET_V3;
@@ -1614,7 +1623,7 @@ static void sec_auth_bd_fill_ex_v3(struct sec_auth_ctx *ctx, int dir,
 		sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE1);
 		sqe3->huk_iv_seq &= SEC_CIPHER_AUTH_V3;
 	} else {
-		sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE1);
+		sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE2);
 		sqe3->huk_iv_seq |= SEC_AUTH_CIPHER_V3;
 	}
 	sqe3->a_len_key = cpu_to_le32(c_req->c_len + aq->assoclen);
@@ -2032,13 +2041,12 @@ static int sec_skcipher_soft_crypto(struct sec_ctx *ctx,
 				    struct skcipher_request *sreq, bool encrypt)
 {
 	struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, c_ctx->fbtfm);
 	struct device *dev = ctx->dev;
 	int ret;
 
-	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, c_ctx->fbtfm);
-
 	if (!c_ctx->fbtfm) {
-		dev_err(dev, "failed to check fallback tfm\n");
+		dev_err_ratelimited(dev, "the soft tfm isn't supported in the current system.\n");
 		return -EINVAL;
 	}
 
@@ -2219,6 +2227,10 @@ static int sec_aead_spec_check(struct sec_ctx *ctx, struct sec_req *sreq)
 	}
 
 	if (c_mode == SEC_CMODE_CCM) {
+		if (unlikely(req->assoclen > SEC_MAX_CCM_AAD_LEN)) {
+			dev_err_ratelimited(dev, "CCM input aad parameter is too long!\n");
+			return -EINVAL;
+		}
 		ret = aead_iv_demension_check(req);
 		if (ret) {
 			dev_err(dev, "aead input iv param error!\n");
@@ -2256,7 +2268,6 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
 	if (ctx->sec->qm.ver == QM_HW_V2) {
 		if (unlikely(!req->cryptlen || (!sreq->c_req.encrypt &&
 		    req->cryptlen <= authsize))) {
-			dev_err(dev, "Kunpeng920 not support 0 length!\n");
 			ctx->a_ctx.fallback = true;
 			return -EINVAL;
 		}
@@ -2284,9 +2295,10 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx,
 				struct aead_request *aead_req,
 				bool encrypt)
 {
-	struct aead_request *subreq = aead_request_ctx(aead_req);
 	struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
 	struct device *dev = ctx->dev;
+	struct aead_request *subreq;
+	int ret;
 
 	/* Kunpeng920 aead mode not support input 0 size */
 	if (!a_ctx->fallback_aead_tfm) {
@@ -2294,6 +2306,10 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx,
 		return -EINVAL;
 	}
 
+	subreq = aead_request_alloc(a_ctx->fallback_aead_tfm, GFP_KERNEL);
+	if (!subreq)
+		return -ENOMEM;
+
 	aead_request_set_tfm(subreq, a_ctx->fallback_aead_tfm);
 	aead_request_set_callback(subreq, aead_req->base.flags,
 				  aead_req->base.complete, aead_req->base.data);
@@ -2301,8 +2317,13 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx,
 			       aead_req->cryptlen, aead_req->iv);
 	aead_request_set_ad(subreq, aead_req->assoclen);
 
-	return encrypt ? crypto_aead_encrypt(subreq) :
-		   crypto_aead_decrypt(subreq);
+	if (encrypt)
+		ret = crypto_aead_encrypt(subreq);
+	else
+		ret = crypto_aead_decrypt(subreq);
+	aead_request_free(subreq);
+
+	return ret;
 }
 
 static int sec_aead_crypto(struct aead_request *a_req, bool encrypt)
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h
index 9f71c358a6d3..5e039b50e9d4 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.h
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h
@@ -354,8 +354,10 @@ struct sec_sqe3 {
 	 * akey_len: 9~14 bits
 	 * a_alg: 15~20 bits
 	 * key_sel: 21~24 bits
-	 * updata_key: 25 bits
-	 * reserved: 26~31 bits
+	 * ctr_count_mode/sm4_xts: 25~26 bits
+	 * sva_prefetch: 27 bits
+	 * key_wrap_num: 28~30 bits
+	 * update_key: 31 bits
 	 */
 	__le32 auth_mac_key;
 	__le32 salt;
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 26d3ab1d308b..0b9906ff69e3 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -90,6 +90,10 @@
 					SEC_USER1_WB_DATA_SSV)
 #define SEC_USER1_SMMU_SVA		(SEC_USER1_SMMU_NORMAL | SEC_USER1_SVA_SET)
 #define SEC_USER1_SMMU_MASK		(~SEC_USER1_SVA_SET)
+#define SEC_INTERFACE_USER_CTRL0_REG_V3	0x302220
+#define SEC_INTERFACE_USER_CTRL1_REG_V3	0x302224
+#define SEC_USER1_SMMU_NORMAL_V3	(BIT(23) | BIT(17) | BIT(11) | BIT(5))
+#define SEC_USER1_SMMU_MASK_V3		0xFF79E79E
 #define SEC_CORE_INT_STATUS_M_ECC	BIT(2)
 
 #define SEC_PREFETCH_CFG		0x301130
@@ -335,6 +339,41 @@ static void sec_set_endian(struct hisi_qm *qm)
 	writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
 }
 
+static void sec_engine_sva_config(struct hisi_qm *qm)
+{
+	u32 reg;
+
+	if (qm->ver > QM_HW_V2) {
+		reg = readl_relaxed(qm->io_base +
+				SEC_INTERFACE_USER_CTRL0_REG_V3);
+		reg |= SEC_USER0_SMMU_NORMAL;
+		writel_relaxed(reg, qm->io_base +
+				SEC_INTERFACE_USER_CTRL0_REG_V3);
+
+		reg = readl_relaxed(qm->io_base +
+				SEC_INTERFACE_USER_CTRL1_REG_V3);
+		reg &= SEC_USER1_SMMU_MASK_V3;
+		reg |= SEC_USER1_SMMU_NORMAL_V3;
+		writel_relaxed(reg, qm->io_base +
+				SEC_INTERFACE_USER_CTRL1_REG_V3);
+	} else {
+		reg = readl_relaxed(qm->io_base +
+				SEC_INTERFACE_USER_CTRL0_REG);
+		reg |= SEC_USER0_SMMU_NORMAL;
+		writel_relaxed(reg, qm->io_base +
+				SEC_INTERFACE_USER_CTRL0_REG);
+		reg = readl_relaxed(qm->io_base +
+				SEC_INTERFACE_USER_CTRL1_REG);
+		reg &= SEC_USER1_SMMU_MASK;
+		if (qm->use_sva)
+			reg |= SEC_USER1_SMMU_SVA;
+		else
+			reg |= SEC_USER1_SMMU_NORMAL;
+		writel_relaxed(reg, qm->io_base +
+				SEC_INTERFACE_USER_CTRL1_REG);
+	}
+}
+
 static void sec_open_sva_prefetch(struct hisi_qm *qm)
 {
 	u32 val;
@@ -426,26 +465,18 @@ static int sec_engine_init(struct hisi_qm *qm)
 	reg |= (0x1 << SEC_TRNG_EN_SHIFT);
 	writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
 
-	reg = readl_relaxed(qm->io_base + SEC_INTERFACE_USER_CTRL0_REG);
-	reg |= SEC_USER0_SMMU_NORMAL;
-	writel_relaxed(reg, qm->io_base + SEC_INTERFACE_USER_CTRL0_REG);
-
-	reg = readl_relaxed(qm->io_base + SEC_INTERFACE_USER_CTRL1_REG);
-	reg &= SEC_USER1_SMMU_MASK;
-	if (qm->use_sva && qm->ver == QM_HW_V2)
-		reg |= SEC_USER1_SMMU_SVA;
-	else
-		reg |= SEC_USER1_SMMU_NORMAL;
-	writel_relaxed(reg, qm->io_base + SEC_INTERFACE_USER_CTRL1_REG);
+	sec_engine_sva_config(qm);
 
 	writel(SEC_SINGLE_PORT_MAX_TRANS,
 	       qm->io_base + AM_CFG_SINGLE_PORT_MAX_TRANS);
 
 	writel(SEC_SAA_ENABLE, qm->io_base + SEC_SAA_EN_REG);
 
-	/* Enable sm4 extra mode, as ctr/ecb */
-	writel_relaxed(SEC_BD_ERR_CHK_EN0,
-		       qm->io_base + SEC_BD_ERR_CHK_EN_REG0);
+	/* HW V2 enable sm4 extra mode, as ctr/ecb */
+	if (qm->ver < QM_HW_V3)
+		writel_relaxed(SEC_BD_ERR_CHK_EN0,
+			       qm->io_base + SEC_BD_ERR_CHK_EN_REG0);
+
 	/* Enable sm4 xts mode multiple iv */
 	writel_relaxed(SEC_BD_ERR_CHK_EN1,
 		       qm->io_base + SEC_BD_ERR_CHK_EN_REG1);
diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig
index 9125199f1702..a48591af12d0 100644
--- a/drivers/crypto/marvell/Kconfig
+++ b/drivers/crypto/marvell/Kconfig
@@ -47,6 +47,7 @@ config CRYPTO_DEV_OCTEONTX2_CPT
 	select CRYPTO_SKCIPHER
 	select CRYPTO_HASH
 	select CRYPTO_AEAD
+	select NET_DEVLINK
 	help
 		This driver allows you to utilize the Marvell Cryptographic
 		Accelerator Unit(CPT) found in OcteonTX2 series of processors.
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
index ccbef01888d4..01c48ddc4eeb 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
@@ -1639,11 +1639,8 @@ static void swap_func(void *lptr, void *rptr, int size)
 {
 	struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr;
 	struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr;
-	struct cpt_device_desc desc;
 
-	desc = *ldesc;
-	*ldesc = *rdesc;
-	*rdesc = desc;
+	swap(*ldesc, *rdesc);
 }
 
 int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod,
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
index b681bd2dc6ad..36d72e35ebeb 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
@@ -204,7 +204,6 @@ static int alloc_command_queues(struct otx_cptvf *cptvf,
 
 	/* per queue initialization */
 	for (i = 0; i < cptvf->num_queues; i++) {
-		c_size = 0;
 		rem_q_size = q_size;
 		first = NULL;
 		last = NULL;
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
index fb56824cb0a6..5012b7e669f0 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
@@ -157,5 +157,6 @@ struct otx2_cptlfs_info;
 int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs);
 int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs);
 int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs);
+int otx2_cpt_sync_mbox_msg(struct otx2_mbox *mbox);
 
 #endif /* __OTX2_CPT_COMMON_H */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
index 9074876d38e5..a317319696ef 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
@@ -202,3 +202,17 @@ int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs)
 	}
 	return ret;
 }
+
+int otx2_cpt_sync_mbox_msg(struct otx2_mbox *mbox)
+{
+	int err;
+
+	if (!otx2_mbox_nonempty(mbox, 0))
+		return 0;
+	otx2_mbox_msg_send(mbox, 0);
+	err = otx2_mbox_wait_for_rsp(mbox, 0);
+	if (err)
+		return err;
+
+	return otx2_mbox_check_rsp_msgs(mbox, 0);
+}
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
index b691b6c1d5c4..4fcaf61a70e3 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
@@ -26,12 +26,22 @@
  */
 #define OTX2_CPT_INST_QLEN_MSGS	((OTX2_CPT_SIZE_DIV40 - 1) * 40)
 
+/*
+ * LDWB is getting incorrectly used when IQB_LDWB = 1 and CPT instruction
+ * queue has less than 320 free entries. So, increase HW instruction queue
+ * size by 320 and give 320 entries less for SW/NIX RX as a workaround.
+ */
+#define OTX2_CPT_INST_QLEN_EXTRA_BYTES  (320 * OTX2_CPT_INST_SIZE)
+#define OTX2_CPT_EXTRA_SIZE_DIV40       (320/40)
+
 /* CPT instruction queue length in bytes */
-#define OTX2_CPT_INST_QLEN_BYTES (OTX2_CPT_SIZE_DIV40 * 40 * \
-				  OTX2_CPT_INST_SIZE)
+#define OTX2_CPT_INST_QLEN_BYTES                                               \
+		((OTX2_CPT_SIZE_DIV40 * 40 * OTX2_CPT_INST_SIZE) +             \
+		OTX2_CPT_INST_QLEN_EXTRA_BYTES)
 
 /* CPT instruction group queue length in bytes */
-#define OTX2_CPT_INST_GRP_QLEN_BYTES (OTX2_CPT_SIZE_DIV40 * 16)
+#define OTX2_CPT_INST_GRP_QLEN_BYTES                                           \
+		((OTX2_CPT_SIZE_DIV40 + OTX2_CPT_EXTRA_SIZE_DIV40) * 16)
 
 /* CPT FC length in bytes */
 #define OTX2_CPT_Q_FC_LEN 128
@@ -179,7 +189,8 @@ static inline void otx2_cptlf_do_set_iqueue_size(struct otx2_cptlf_info *lf)
 {
 	union otx2_cptx_lf_q_size lf_q_size = { .u = 0x0 };
 
-	lf_q_size.s.size_div40 = OTX2_CPT_SIZE_DIV40;
+	lf_q_size.s.size_div40 = OTX2_CPT_SIZE_DIV40 +
+				 OTX2_CPT_EXTRA_SIZE_DIV40;
 	otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot,
 			 OTX2_CPT_LF_Q_SIZE, lf_q_size.u);
 }
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
index 05b2d9c650e1..936174b012e8 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
@@ -46,6 +46,7 @@ struct otx2_cptpf_dev {
 
 	struct workqueue_struct	*flr_wq;
 	struct cptpf_flr_work   *flr_work;
+	struct mutex            lock;   /* serialize mailbox access */
 
 	unsigned long cap_flag;
 	u8 pf_id;               /* RVU PF number */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
index 1720a5bb7016..a402ccfac557 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
@@ -140,10 +140,13 @@ static void cptpf_flr_wq_handler(struct work_struct *work)
 
 	vf = flr_work - pf->flr_work;
 
+	mutex_lock(&pf->lock);
 	req = otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req),
 				      sizeof(struct msg_rsp));
-	if (!req)
+	if (!req) {
+		mutex_unlock(&pf->lock);
 		return;
+	}
 
 	req->sig = OTX2_MBOX_REQ_SIG;
 	req->id = MBOX_MSG_VF_FLR;
@@ -151,16 +154,19 @@ static void cptpf_flr_wq_handler(struct work_struct *work)
 	req->pcifunc |= (vf + 1) & RVU_PFVF_FUNC_MASK;
 
 	otx2_cpt_send_mbox_msg(mbox, pf->pdev);
+	if (!otx2_cpt_sync_mbox_msg(&pf->afpf_mbox)) {
 
-	if (vf >= 64) {
-		reg = 1;
-		vf = vf - 64;
+		if (vf >= 64) {
+			reg = 1;
+			vf = vf - 64;
+		}
+		/* Clear transaction pending register */
+		otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0,
+				 RVU_PF_VFTRPENDX(reg), BIT_ULL(vf));
+		otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0,
+				 RVU_PF_VFFLR_INT_ENA_W1SX(reg), BIT_ULL(vf));
 	}
-	/* Clear transaction pending register */
-	otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0,
-			 RVU_PF_VFTRPENDX(reg), BIT_ULL(vf));
-	otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0,
-			 RVU_PF_VFFLR_INT_ENA_W1SX(reg), BIT_ULL(vf));
+	mutex_unlock(&pf->lock);
 }
 
 static irqreturn_t cptpf_vf_flr_intr(int __always_unused irq, void *arg)
@@ -468,6 +474,7 @@ static int cptpf_afpf_mbox_init(struct otx2_cptpf_dev *cptpf)
 		goto error;
 
 	INIT_WORK(&cptpf->afpf_mbox_work, otx2_cptpf_afpf_mbox_handler);
+	mutex_init(&cptpf->lock);
 	return 0;
 
 error:
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
index 186f1c1190c1..dee0aa60b698 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
@@ -18,9 +18,12 @@ static int forward_to_af(struct otx2_cptpf_dev *cptpf,
 	struct mbox_msghdr *msg;
 	int ret;
 
+	mutex_lock(&cptpf->lock);
 	msg = otx2_mbox_alloc_msg(&cptpf->afpf_mbox, 0, size);
-	if (msg == NULL)
+	if (msg == NULL) {
+		mutex_unlock(&cptpf->lock);
 		return -ENOMEM;
+	}
 
 	memcpy((uint8_t *)msg + sizeof(struct mbox_msghdr),
 	       (uint8_t *)req + sizeof(struct mbox_msghdr), size);
@@ -29,15 +32,19 @@ static int forward_to_af(struct otx2_cptpf_dev *cptpf,
 	msg->sig = req->sig;
 	msg->ver = req->ver;
 
-	otx2_mbox_msg_send(&cptpf->afpf_mbox, 0);
-	ret = otx2_mbox_wait_for_rsp(&cptpf->afpf_mbox, 0);
+	ret = otx2_cpt_sync_mbox_msg(&cptpf->afpf_mbox);
+	/* Error code -EIO indicate there is a communication failure
+	 * to the AF. Rest of the error codes indicate that AF processed
+	 * VF messages and set the error codes in response messages
+	 * (if any) so simply forward responses to VF.
+	 */
 	if (ret == -EIO) {
-		dev_err(&cptpf->pdev->dev, "RVU MBOX timeout.\n");
+		dev_warn(&cptpf->pdev->dev,
+			 "AF not responding to VF%d messages\n", vf->vf_id);
+		mutex_unlock(&cptpf->lock);
 		return ret;
-	} else if (ret) {
-		dev_err(&cptpf->pdev->dev, "RVU MBOX error: %d.\n", ret);
-		return -EFAULT;
 	}
+	mutex_unlock(&cptpf->lock);
 	return 0;
 }
 
@@ -204,6 +211,10 @@ void otx2_cptpf_vfpf_mbox_handler(struct work_struct *work)
 		if (err == -ENOMEM || err == -EIO)
 			break;
 		offset = msg->next_msgoff;
+		/* Write barrier required for VF responses which are handled by
+		 * PF driver and not forwarded to AF.
+		 */
+		smp_wmb();
 	}
 	/* Send mbox responses to VF */
 	if (mdev->num_msgs)
@@ -350,6 +361,8 @@ void otx2_cptpf_afpf_mbox_handler(struct work_struct *work)
 			process_afpf_mbox_msg(cptpf, msg);
 
 		offset = msg->next_msgoff;
+		/* Sync VF response ready to be sent */
+		smp_wmb();
 		mdev->msgs_acked++;
 	}
 	otx2_mbox_reset(afpf_mbox, 0);
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
index 1b4d425bbf0e..9cba2f714c7e 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
@@ -1076,6 +1076,39 @@ static void delete_engine_grps(struct pci_dev *pdev,
 		delete_engine_group(&pdev->dev, &eng_grps->grp[i]);
 }
 
+#define PCI_DEVID_CN10K_RNM 0xA098
+#define RNM_ENTROPY_STATUS  0x8
+
+static void rnm_to_cpt_errata_fixup(struct device *dev)
+{
+	struct pci_dev *pdev;
+	void __iomem *base;
+	int timeout = 5000;
+
+	pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10K_RNM, NULL);
+	if (!pdev)
+		return;
+
+	base = pci_ioremap_bar(pdev, 0);
+	if (!base)
+		goto put_pdev;
+
+	while ((readq(base + RNM_ENTROPY_STATUS) & 0x7F) != 0x40) {
+		cpu_relax();
+		udelay(1);
+		timeout--;
+		if (!timeout) {
+			dev_warn(dev, "RNM is not producing entropy\n");
+			break;
+		}
+	}
+
+	iounmap(base);
+
+put_pdev:
+	pci_dev_put(pdev);
+}
+
 int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type)
 {
 
@@ -1111,6 +1144,7 @@ int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf,
 	struct otx2_cpt_engines engs[OTX2_CPT_MAX_ETYPES_PER_GRP] = { {0} };
 	struct pci_dev *pdev = cptpf->pdev;
 	struct fw_info_t fw_info;
+	u64 reg_val;
 	int ret = 0;
 
 	mutex_lock(&eng_grps->lock);
@@ -1189,9 +1223,17 @@ int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf,
 
 	if (is_dev_otx2(pdev))
 		goto unlock;
+
+	/*
+	 * Ensure RNM_ENTROPY_STATUS[NORMAL_CNT] = 0x40 before writing
+	 * CPT_AF_CTL[RNM_REQ_EN] = 1 as a workaround for HW errata.
+	 */
+	rnm_to_cpt_errata_fixup(&pdev->dev);
+
 	/*
 	 * Configure engine group mask to allow context prefetching
-	 * for the groups.
+	 * for the groups and enable random number request, to enable
+	 * CPT to request random numbers from RNM.
 	 */
 	otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTL,
 			      OTX2_CPT_ALL_ENG_GRPS_MASK << 3 | BIT_ULL(16),
@@ -1203,6 +1245,18 @@ int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf,
 	 */
 	otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTX_FLUSH_TIMER,
 			      CTX_FLUSH_TIMER_CNT, BLKADDR_CPT0);
+
+	/*
+	 * Set CPT_AF_DIAG[FLT_DIS], as a workaround for HW errata, when
+	 * CPT_AF_DIAG[FLT_DIS] = 0 and a CPT engine access to LLC/DRAM
+	 * encounters a fault/poison, a rare case may result in
+	 * unpredictable data being delivered to a CPT engine.
+	 */
+	otx2_cpt_read_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_DIAG, &reg_val,
+			     BLKADDR_CPT0);
+	otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_DIAG,
+			      reg_val | BIT_ULL(24), BLKADDR_CPT0);
+
 	mutex_unlock(&eng_grps->lock);
 	return 0;
 
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
index 2748a3327e39..f8f8542ce3e4 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
@@ -1634,16 +1634,13 @@ static inline int cpt_register_algs(void)
 {
 	int i, err = 0;
 
-	if (!IS_ENABLED(CONFIG_DM_CRYPT)) {
-		for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++)
-			otx2_cpt_skciphers[i].base.cra_flags &=
-							~CRYPTO_ALG_DEAD;
-
-		err = crypto_register_skciphers(otx2_cpt_skciphers,
-						ARRAY_SIZE(otx2_cpt_skciphers));
-		if (err)
-			return err;
-	}
+	for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++)
+		otx2_cpt_skciphers[i].base.cra_flags &= ~CRYPTO_ALG_DEAD;
+
+	err = crypto_register_skciphers(otx2_cpt_skciphers,
+					ARRAY_SIZE(otx2_cpt_skciphers));
+	if (err)
+		return err;
 
 	for (i = 0; i < ARRAY_SIZE(otx2_cpt_aeads); i++)
 		otx2_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD;
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index d19e5ffb5104..d6f9e2fe863d 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -331,7 +331,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
 		memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128);
 	}
 
-	for_each_sg(req->src, src, sg_nents(src), i) {
+	for_each_sg(req->src, src, sg_nents(req->src), i) {
 		src_buf = sg_virt(src);
 		len = sg_dma_len(src);
 		tlen += len;
diff --git a/drivers/crypto/nx/nx-common-pseries.c b/drivers/crypto/nx/nx-common-pseries.c
index 4e304f6081e4..7584a34ba88c 100644
--- a/drivers/crypto/nx/nx-common-pseries.c
+++ b/drivers/crypto/nx/nx-common-pseries.c
@@ -962,7 +962,7 @@ static struct attribute *nx842_sysfs_entries[] = {
 	NULL,
 };
 
-static struct attribute_group nx842_attribute_group = {
+static const struct attribute_group nx842_attribute_group = {
 	.name = NULL,		/* put in device directory */
 	.attrs = nx842_sysfs_entries,
 };
@@ -992,7 +992,7 @@ static struct attribute *nxcop_caps_sysfs_entries[] = {
 	NULL,
 };
 
-static struct attribute_group nxcop_caps_attr_group = {
+static const struct attribute_group nxcop_caps_attr_group = {
 	.name	=	"nx_gzip_caps",
 	.attrs	=	nxcop_caps_sysfs_entries,
 };
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index a196bb8b1701..581211a92628 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -1093,7 +1093,7 @@ static struct attribute *omap_aes_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group omap_aes_attr_group = {
+static const struct attribute_group omap_aes_attr_group = {
 	.attrs = omap_aes_attrs,
 };
 
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index f6bf53c00b61..4b37dc69a50c 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -2045,7 +2045,7 @@ static struct attribute *omap_sham_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group omap_sham_attr_group = {
+static const struct attribute_group omap_sham_attr_group = {
 	.attrs = omap_sham_attrs,
 };
 
diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
index 6d10edc40aca..fb5970a68484 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -6,6 +6,7 @@
 #include <adf_common_drv.h>
 #include <adf_gen4_hw_data.h>
 #include <adf_gen4_pfvf.h>
+#include <adf_gen4_pm.h>
 #include "adf_4xxx_hw_data.h"
 #include "icp_qat_hw.h"
 
@@ -52,7 +53,7 @@ static const char *const dev_cfg_services[] = {
 static int get_service_enabled(struct adf_accel_dev *accel_dev)
 {
 	char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};
-	u32 ret;
+	int ret;
 
 	ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC,
 				      ADF_SERVICES_ENABLED, services);
@@ -229,7 +230,7 @@ static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
 	void __iomem *csr = misc_bar->virt_addr;
 
 	/* Enable all in errsou3 except VFLR notification on host */
-	ADF_CSR_WR(csr, ADF_4XXX_ERRMSK3, ADF_4XXX_VFLNOTIFY);
+	ADF_CSR_WR(csr, ADF_GEN4_ERRMSK3, ADF_GEN4_VFLNOTIFY);
 }
 
 static void adf_enable_ints(struct adf_accel_dev *accel_dev)
@@ -256,19 +257,19 @@ static int adf_init_device(struct adf_accel_dev *accel_dev)
 	addr = (&GET_BARS(accel_dev)[ADF_4XXX_PMISC_BAR])->virt_addr;
 
 	/* Temporarily mask PM interrupt */
-	csr = ADF_CSR_RD(addr, ADF_4XXX_ERRMSK2);
-	csr |= ADF_4XXX_PM_SOU;
-	ADF_CSR_WR(addr, ADF_4XXX_ERRMSK2, csr);
+	csr = ADF_CSR_RD(addr, ADF_GEN4_ERRMSK2);
+	csr |= ADF_GEN4_PM_SOU;
+	ADF_CSR_WR(addr, ADF_GEN4_ERRMSK2, csr);
 
 	/* Set DRV_ACTIVE bit to power up the device */
-	ADF_CSR_WR(addr, ADF_4XXX_PM_INTERRUPT, ADF_4XXX_PM_DRV_ACTIVE);
+	ADF_CSR_WR(addr, ADF_GEN4_PM_INTERRUPT, ADF_GEN4_PM_DRV_ACTIVE);
 
 	/* Poll status register to make sure the device is powered up */
 	ret = read_poll_timeout(ADF_CSR_RD, status,
-				status & ADF_4XXX_PM_INIT_STATE,
-				ADF_4XXX_PM_POLL_DELAY_US,
-				ADF_4XXX_PM_POLL_TIMEOUT_US, true, addr,
-				ADF_4XXX_PM_STATUS);
+				status & ADF_GEN4_PM_INIT_STATE,
+				ADF_GEN4_PM_POLL_DELAY_US,
+				ADF_GEN4_PM_POLL_TIMEOUT_US, true, addr,
+				ADF_GEN4_PM_STATUS);
 	if (ret)
 		dev_err(&GET_DEV(accel_dev), "Failed to power up the device\n");
 
@@ -354,6 +355,8 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data)
 	hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer;
 	hw_data->disable_iov = adf_disable_sriov;
 	hw_data->ring_pair_reset = adf_gen4_ring_pair_reset;
+	hw_data->enable_pm = adf_gen4_enable_pm;
+	hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt;
 
 	adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
 	adf_gen4_init_pf_pfvf_ops(&hw_data->pfvf_ops);
diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
index 12e4fb9b40ce..1034752845ca 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
@@ -39,20 +39,6 @@
 #define ADF_4XXX_NUM_RINGS_PER_BANK	2
 #define ADF_4XXX_NUM_BANKS_PER_VF	4
 
-/* Error source registers */
-#define ADF_4XXX_ERRSOU0	(0x41A200)
-#define ADF_4XXX_ERRSOU1	(0x41A204)
-#define ADF_4XXX_ERRSOU2	(0x41A208)
-#define ADF_4XXX_ERRSOU3	(0x41A20C)
-
-/* Error source mask registers */
-#define ADF_4XXX_ERRMSK0	(0x41A210)
-#define ADF_4XXX_ERRMSK1	(0x41A214)
-#define ADF_4XXX_ERRMSK2	(0x41A218)
-#define ADF_4XXX_ERRMSK3	(0x41A21C)
-
-#define ADF_4XXX_VFLNOTIFY	BIT(7)
-
 /* Arbiter configuration */
 #define ADF_4XXX_ARB_CONFIG			(BIT(31) | BIT(6) | BIT(0))
 #define ADF_4XXX_ARB_OFFSET			(0x0)
@@ -63,16 +49,6 @@
 #define ADF_4XXX_ADMINMSGLR_OFFSET	(0x500578)
 #define ADF_4XXX_MAILBOX_BASE_OFFSET	(0x600970)
 
-/* Power management */
-#define ADF_4XXX_PM_POLL_DELAY_US	20
-#define ADF_4XXX_PM_POLL_TIMEOUT_US	USEC_PER_SEC
-#define ADF_4XXX_PM_STATUS		(0x50A00C)
-#define ADF_4XXX_PM_INTERRUPT		(0x50A028)
-#define ADF_4XXX_PM_DRV_ACTIVE		BIT(20)
-#define ADF_4XXX_PM_INIT_STATE		BIT(21)
-/* Power management source in ERRSOU2 and ERRMSK2 */
-#define ADF_4XXX_PM_SOU			BIT(18)
-
 /* Firmware Binaries */
 #define ADF_4XXX_FW		"qat_4xxx.bin"
 #define ADF_4XXX_MMP		"qat_4xxx_mmp.bin"
diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c
index a6c78b9c730b..fa4c350c1bf9 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c
@@ -75,6 +75,13 @@ static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev)
 	if (ret)
 		goto err;
 
+	/* Temporarily set the number of crypto instances to zero to avoid
+	 * registering the crypto algorithms.
+	 * This will be removed when the algorithms will support the
+	 * CRYPTO_TFM_REQ_MAY_BACKLOG flag
+	 */
+	instances = 0;
+
 	for (i = 0; i < instances; i++) {
 		val = i;
 		bank = i * 2;
diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile
index 7e191a42a5c7..f25a6c8edfc7 100644
--- a/drivers/crypto/qat/qat_common/Makefile
+++ b/drivers/crypto/qat/qat_common/Makefile
@@ -12,6 +12,7 @@ intel_qat-objs := adf_cfg.o \
 	adf_hw_arbiter.o \
 	adf_gen2_hw_data.o \
 	adf_gen4_hw_data.o \
+	adf_gen4_pm.o \
 	qat_crypto.o \
 	qat_algs.o \
 	qat_asym_algs.o \
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index 2d4cd7c7cf33..a03c6cf72331 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -184,6 +184,8 @@ struct adf_hw_device_data {
 	void (*exit_arb)(struct adf_accel_dev *accel_dev);
 	const u32 *(*get_arb_mapping)(void);
 	int (*init_device)(struct adf_accel_dev *accel_dev);
+	int (*enable_pm)(struct adf_accel_dev *accel_dev);
+	bool (*handle_pm_interrupt)(struct adf_accel_dev *accel_dev);
 	void (*disable_iov)(struct adf_accel_dev *accel_dev);
 	void (*configure_iov_threads)(struct adf_accel_dev *accel_dev,
 				      bool enable);
diff --git a/drivers/crypto/qat/qat_common/adf_admin.c b/drivers/crypto/qat/qat_common/adf_admin.c
index 498eb6f690e3..3b6184c35081 100644
--- a/drivers/crypto/qat/qat_common/adf_admin.c
+++ b/drivers/crypto/qat/qat_common/adf_admin.c
@@ -251,6 +251,43 @@ int adf_send_admin_init(struct adf_accel_dev *accel_dev)
 }
 EXPORT_SYMBOL_GPL(adf_send_admin_init);
 
+/**
+ * adf_init_admin_pm() - Function sends PM init message to FW
+ * @accel_dev: Pointer to acceleration device.
+ * @idle_delay: QAT HW idle time before power gating is initiated.
+ *		000 - 64us
+ *		001 - 128us
+ *		010 - 256us
+ *		011 - 512us
+ *		100 - 1ms
+ *		101 - 2ms
+ *		110 - 4ms
+ *		111 - 8ms
+ *
+ * Function sends to the FW the admin init message for the PM state
+ * configuration.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+int adf_init_admin_pm(struct adf_accel_dev *accel_dev, u32 idle_delay)
+{
+	struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+	struct icp_qat_fw_init_admin_resp resp = {0};
+	struct icp_qat_fw_init_admin_req req = {0};
+	u32 ae_mask = hw_data->admin_ae_mask;
+
+	if (!accel_dev->admin) {
+		dev_err(&GET_DEV(accel_dev), "adf_admin is not available\n");
+		return -EFAULT;
+	}
+
+	req.cmd_id = ICP_QAT_FW_PM_STATE_CONFIG;
+	req.idle_filter = idle_delay;
+
+	return adf_send_admin(accel_dev, &req, &resp, ae_mask);
+}
+EXPORT_SYMBOL_GPL(adf_init_admin_pm);
+
 int adf_init_admin_comms(struct adf_accel_dev *accel_dev)
 {
 	struct adf_admin_comms *admin;
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index 76f4f96ec5eb..e8c9b77c0d66 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -102,6 +102,7 @@ void adf_exit_aer(void);
 int adf_init_admin_comms(struct adf_accel_dev *accel_dev);
 void adf_exit_admin_comms(struct adf_accel_dev *accel_dev);
 int adf_send_admin_init(struct adf_accel_dev *accel_dev);
+int adf_init_admin_pm(struct adf_accel_dev *accel_dev, u32 idle_delay);
 int adf_init_arb(struct adf_accel_dev *accel_dev);
 void adf_exit_arb(struct adf_accel_dev *accel_dev);
 void adf_update_ring_arb(struct adf_etr_ring_data *ring);
@@ -188,6 +189,9 @@ int qat_uclo_map_obj(struct icp_qat_fw_loader_handle *handle,
 		     void *addr_ptr, u32 mem_size, char *obj_name);
 int qat_uclo_set_cfg_ae_mask(struct icp_qat_fw_loader_handle *handle,
 			     unsigned int cfg_ae_mask);
+int adf_init_misc_wq(void);
+void adf_exit_misc_wq(void);
+bool adf_misc_wq_queue_work(struct work_struct *work);
 #if defined(CONFIG_PCI_IOV)
 int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
 void adf_disable_sriov(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index 6f64aa693146..e8ac932bbaab 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -419,6 +419,9 @@ static int __init adf_register_ctl_device_driver(void)
 	if (adf_chr_drv_create())
 		goto err_chr_dev;
 
+	if (adf_init_misc_wq())
+		goto err_misc_wq;
+
 	if (adf_init_aer())
 		goto err_aer;
 
@@ -440,6 +443,8 @@ err_vf_wq:
 err_pf_wq:
 	adf_exit_aer();
 err_aer:
+	adf_exit_misc_wq();
+err_misc_wq:
 	adf_chr_drv_destroy();
 err_chr_dev:
 	mutex_destroy(&adf_ctl_lock);
@@ -449,6 +454,7 @@ err_chr_dev:
 static void __exit adf_unregister_ctl_device_driver(void)
 {
 	adf_chr_drv_destroy();
+	adf_exit_misc_wq();
 	adf_exit_aer();
 	adf_exit_vf_wq();
 	adf_exit_pf_wq();
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
index f0f71ca44ca3..43b8f864806b 100644
--- a/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
+++ b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
@@ -122,6 +122,20 @@ do { \
 #define ADF_WQM_CSR_RPRESETSTS_STATUS	BIT(0)
 #define ADF_WQM_CSR_RPRESETSTS(bank)	(ADF_WQM_CSR_RPRESETCTL(bank) + 4)
 
+/* Error source registers */
+#define ADF_GEN4_ERRSOU0	(0x41A200)
+#define ADF_GEN4_ERRSOU1	(0x41A204)
+#define ADF_GEN4_ERRSOU2	(0x41A208)
+#define ADF_GEN4_ERRSOU3	(0x41A20C)
+
+/* Error source mask registers */
+#define ADF_GEN4_ERRMSK0	(0x41A210)
+#define ADF_GEN4_ERRMSK1	(0x41A214)
+#define ADF_GEN4_ERRMSK2	(0x41A218)
+#define ADF_GEN4_ERRMSK3	(0x41A21C)
+
+#define ADF_GEN4_VFLNOTIFY	BIT(7)
+
 void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev);
 void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops);
 int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number);
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
index 8efbedf63bc8..d80d493a7756 100644
--- a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
+++ b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
@@ -9,15 +9,12 @@
 #include "adf_pfvf_pf_proto.h"
 #include "adf_pfvf_utils.h"
 
-#define ADF_4XXX_MAX_NUM_VFS		16
-
 #define ADF_4XXX_PF2VM_OFFSET(i)	(0x40B010 + ((i) * 0x20))
 #define ADF_4XXX_VM2PF_OFFSET(i)	(0x40B014 + ((i) * 0x20))
 
 /* VF2PF interrupt source registers */
-#define ADF_4XXX_VM2PF_SOU(i)		(0x41A180 + ((i) * 4))
-#define ADF_4XXX_VM2PF_MSK(i)		(0x41A1C0 + ((i) * 4))
-#define ADF_4XXX_VM2PF_INT_EN_MSK	BIT(0)
+#define ADF_4XXX_VM2PF_SOU		0x41A180
+#define ADF_4XXX_VM2PF_MSK		0x41A1C0
 
 #define ADF_PFVF_GEN4_MSGTYPE_SHIFT	2
 #define ADF_PFVF_GEN4_MSGTYPE_MASK	0x3F
@@ -41,51 +38,30 @@ static u32 adf_gen4_pf_get_vf2pf_offset(u32 i)
 
 static u32 adf_gen4_get_vf2pf_sources(void __iomem *pmisc_addr)
 {
-	int i;
 	u32 sou, mask;
-	int num_csrs = ADF_4XXX_MAX_NUM_VFS;
-	u32 vf_mask = 0;
 
-	for (i = 0; i < num_csrs; i++) {
-		sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU(i));
-		mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK(i));
-		sou &= ~mask;
-		vf_mask |= sou << i;
-	}
+	sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU);
+	mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK);
 
-	return vf_mask;
+	return sou & ~mask;
 }
 
 static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr,
 					     u32 vf_mask)
 {
-	int num_csrs = ADF_4XXX_MAX_NUM_VFS;
-	unsigned long mask = vf_mask;
 	unsigned int val;
-	int i;
-
-	for_each_set_bit(i, &mask, num_csrs) {
-		unsigned int offset = ADF_4XXX_VM2PF_MSK(i);
 
-		val = ADF_CSR_RD(pmisc_addr, offset) & ~ADF_4XXX_VM2PF_INT_EN_MSK;
-		ADF_CSR_WR(pmisc_addr, offset, val);
-	}
+	val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) & ~vf_mask;
+	ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val);
 }
 
 static void adf_gen4_disable_vf2pf_interrupts(void __iomem *pmisc_addr,
 					      u32 vf_mask)
 {
-	int num_csrs = ADF_4XXX_MAX_NUM_VFS;
-	unsigned long mask = vf_mask;
 	unsigned int val;
-	int i;
-
-	for_each_set_bit(i, &mask, num_csrs) {
-		unsigned int offset = ADF_4XXX_VM2PF_MSK(i);
 
-		val = ADF_CSR_RD(pmisc_addr, offset) | ADF_4XXX_VM2PF_INT_EN_MSK;
-		ADF_CSR_WR(pmisc_addr, offset, val);
-	}
+	val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) | vf_mask;
+	ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val);
 }
 
 static int adf_gen4_pfvf_send(struct adf_accel_dev *accel_dev,
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pm.c b/drivers/crypto/qat/qat_common/adf_gen4_pm.c
new file mode 100644
index 000000000000..7037c0892a8a
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/adf_gen4_pm.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2022 Intel Corporation */
+#include <linux/bitfield.h>
+#include <linux/iopoll.h>
+#include "adf_accel_devices.h"
+#include "adf_common_drv.h"
+#include "adf_gen4_pm.h"
+#include "adf_cfg_strings.h"
+#include "icp_qat_fw_init_admin.h"
+#include "adf_gen4_hw_data.h"
+#include "adf_cfg.h"
+
+enum qat_pm_host_msg {
+	PM_NO_CHANGE = 0,
+	PM_SET_MIN,
+};
+
+struct adf_gen4_pm_data {
+	struct work_struct pm_irq_work;
+	struct adf_accel_dev *accel_dev;
+	u32 pm_int_sts;
+};
+
+static int send_host_msg(struct adf_accel_dev *accel_dev)
+{
+	void __iomem *pmisc = adf_get_pmisc_base(accel_dev);
+	u32 msg;
+
+	msg = ADF_CSR_RD(pmisc, ADF_GEN4_PM_HOST_MSG);
+	if (msg & ADF_GEN4_PM_MSG_PENDING)
+		return -EBUSY;
+
+	/* Send HOST_MSG */
+	msg = FIELD_PREP(ADF_GEN4_PM_MSG_PAYLOAD_BIT_MASK, PM_SET_MIN);
+	msg |= ADF_GEN4_PM_MSG_PENDING;
+	ADF_CSR_WR(pmisc, ADF_GEN4_PM_HOST_MSG, msg);
+
+	/* Poll status register to make sure the HOST_MSG has been processed */
+	return read_poll_timeout(ADF_CSR_RD, msg,
+				!(msg & ADF_GEN4_PM_MSG_PENDING),
+				ADF_GEN4_PM_MSG_POLL_DELAY_US,
+				ADF_GEN4_PM_POLL_TIMEOUT_US, true, pmisc,
+				ADF_GEN4_PM_HOST_MSG);
+}
+
+static void pm_bh_handler(struct work_struct *work)
+{
+	struct adf_gen4_pm_data *pm_data =
+		container_of(work, struct adf_gen4_pm_data, pm_irq_work);
+	struct adf_accel_dev *accel_dev = pm_data->accel_dev;
+	void __iomem *pmisc = adf_get_pmisc_base(accel_dev);
+	u32 pm_int_sts = pm_data->pm_int_sts;
+	u32 val;
+
+	/* PM Idle interrupt */
+	if (pm_int_sts & ADF_GEN4_PM_IDLE_STS) {
+		/* Issue host message to FW */
+		if (send_host_msg(accel_dev))
+			dev_warn_ratelimited(&GET_DEV(accel_dev),
+					     "Failed to send host msg to FW\n");
+	}
+
+	/* Clear interrupt status */
+	ADF_CSR_WR(pmisc, ADF_GEN4_PM_INTERRUPT, pm_int_sts);
+
+	/* Reenable PM interrupt */
+	val = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2);
+	val &= ~ADF_GEN4_PM_SOU;
+	ADF_CSR_WR(pmisc, ADF_GEN4_ERRMSK2, val);
+
+	kfree(pm_data);
+}
+
+bool adf_gen4_handle_pm_interrupt(struct adf_accel_dev *accel_dev)
+{
+	void __iomem *pmisc = adf_get_pmisc_base(accel_dev);
+	struct adf_gen4_pm_data *pm_data = NULL;
+	u32 errsou2;
+	u32 errmsk2;
+	u32 val;
+
+	/* Only handle the interrupt triggered by PM */
+	errmsk2 = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2);
+	if (errmsk2 & ADF_GEN4_PM_SOU)
+		return false;
+
+	errsou2 = ADF_CSR_RD(pmisc, ADF_GEN4_ERRSOU2);
+	if (!(errsou2 & ADF_GEN4_PM_SOU))
+		return false;
+
+	/* Disable interrupt */
+	val = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2);
+	val |= ADF_GEN4_PM_SOU;
+	ADF_CSR_WR(pmisc, ADF_GEN4_ERRMSK2, val);
+
+	val = ADF_CSR_RD(pmisc, ADF_GEN4_PM_INTERRUPT);
+
+	pm_data = kzalloc(sizeof(*pm_data), GFP_ATOMIC);
+	if (!pm_data)
+		return false;
+
+	pm_data->pm_int_sts = val;
+	pm_data->accel_dev = accel_dev;
+
+	INIT_WORK(&pm_data->pm_irq_work, pm_bh_handler);
+	adf_misc_wq_queue_work(&pm_data->pm_irq_work);
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_handle_pm_interrupt);
+
+int adf_gen4_enable_pm(struct adf_accel_dev *accel_dev)
+{
+	void __iomem *pmisc = adf_get_pmisc_base(accel_dev);
+	int ret;
+	u32 val;
+
+	ret = adf_init_admin_pm(accel_dev, ADF_GEN4_PM_DEFAULT_IDLE_FILTER);
+	if (ret)
+		return ret;
+
+	/* Enable default PM interrupts: IDLE, THROTTLE */
+	val = ADF_CSR_RD(pmisc, ADF_GEN4_PM_INTERRUPT);
+	val |= ADF_GEN4_PM_INT_EN_DEFAULT;
+
+	/* Clear interrupt status */
+	val |= ADF_GEN4_PM_INT_STS_MASK;
+	ADF_CSR_WR(pmisc, ADF_GEN4_PM_INTERRUPT, val);
+
+	/* Unmask PM Interrupt */
+	val = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2);
+	val &= ~ADF_GEN4_PM_SOU;
+	ADF_CSR_WR(pmisc, ADF_GEN4_ERRMSK2, val);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_enable_pm);
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pm.h b/drivers/crypto/qat/qat_common/adf_gen4_pm.h
new file mode 100644
index 000000000000..f8f8a9ee29e5
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/adf_gen4_pm.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2022 Intel Corporation */
+#ifndef ADF_GEN4_PM_H
+#define ADF_GEN4_PM_H
+
+#include "adf_accel_devices.h"
+
+/* Power management registers */
+#define ADF_GEN4_PM_HOST_MSG (0x50A01C)
+
+/* Power management */
+#define ADF_GEN4_PM_POLL_DELAY_US	20
+#define ADF_GEN4_PM_POLL_TIMEOUT_US	USEC_PER_SEC
+#define ADF_GEN4_PM_MSG_POLL_DELAY_US	(10 * USEC_PER_MSEC)
+#define ADF_GEN4_PM_STATUS		(0x50A00C)
+#define ADF_GEN4_PM_INTERRUPT		(0x50A028)
+
+/* Power management source in ERRSOU2 and ERRMSK2 */
+#define ADF_GEN4_PM_SOU			BIT(18)
+
+#define ADF_GEN4_PM_IDLE_INT_EN		BIT(18)
+#define ADF_GEN4_PM_THROTTLE_INT_EN	BIT(19)
+#define ADF_GEN4_PM_DRV_ACTIVE		BIT(20)
+#define ADF_GEN4_PM_INIT_STATE		BIT(21)
+#define ADF_GEN4_PM_INT_EN_DEFAULT	(ADF_GEN4_PM_IDLE_INT_EN | \
+					ADF_GEN4_PM_THROTTLE_INT_EN)
+
+#define ADF_GEN4_PM_THR_STS	BIT(0)
+#define ADF_GEN4_PM_IDLE_STS	BIT(1)
+#define ADF_GEN4_PM_FW_INT_STS	BIT(2)
+#define ADF_GEN4_PM_INT_STS_MASK (ADF_GEN4_PM_THR_STS | \
+				 ADF_GEN4_PM_IDLE_STS | \
+				 ADF_GEN4_PM_FW_INT_STS)
+
+#define ADF_GEN4_PM_MSG_PENDING			BIT(0)
+#define ADF_GEN4_PM_MSG_PAYLOAD_BIT_MASK	GENMASK(28, 1)
+
+#define ADF_GEN4_PM_DEFAULT_IDLE_FILTER		(0x0)
+#define ADF_GEN4_PM_MAX_IDLE_FILTER		(0x7)
+
+int adf_gen4_enable_pm(struct adf_accel_dev *accel_dev);
+bool adf_gen4_handle_pm_interrupt(struct adf_accel_dev *accel_dev);
+
+#endif
diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c
index 2edc63c6b6ca..c2c718f1b489 100644
--- a/drivers/crypto/qat/qat_common/adf_init.c
+++ b/drivers/crypto/qat/qat_common/adf_init.c
@@ -181,6 +181,12 @@ int adf_dev_start(struct adf_accel_dev *accel_dev)
 	if (hw_data->set_ssm_wdtimer)
 		hw_data->set_ssm_wdtimer(accel_dev);
 
+	/* Enable Power Management */
+	if (hw_data->enable_pm && hw_data->enable_pm(accel_dev)) {
+		dev_err(&GET_DEV(accel_dev), "Failed to configure Power Management\n");
+		return -EFAULT;
+	}
+
 	list_for_each(list_itr, &service_table) {
 		service = list_entry(list_itr, struct service_hndl, list);
 		if (service->event_hld(accel_dev, ADF_EVENT_START)) {
diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c
index 4ca482aa69f7..a35149f8bf1e 100644
--- a/drivers/crypto/qat/qat_common/adf_isr.c
+++ b/drivers/crypto/qat/qat_common/adf_isr.c
@@ -16,6 +16,7 @@
 #include "adf_transport_internal.h"
 
 #define ADF_MAX_NUM_VFS	32
+static struct workqueue_struct *adf_misc_wq;
 
 static int adf_enable_msix(struct adf_accel_dev *accel_dev)
 {
@@ -123,6 +124,17 @@ static bool adf_handle_vf2pf_int(struct adf_accel_dev *accel_dev)
 }
 #endif /* CONFIG_PCI_IOV */
 
+static bool adf_handle_pm_int(struct adf_accel_dev *accel_dev)
+{
+	struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+
+	if (hw_data->handle_pm_interrupt &&
+	    hw_data->handle_pm_interrupt(accel_dev))
+		return true;
+
+	return false;
+}
+
 static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
 {
 	struct adf_accel_dev *accel_dev = dev_ptr;
@@ -133,6 +145,9 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
 		return IRQ_HANDLED;
 #endif /* CONFIG_PCI_IOV */
 
+	if (adf_handle_pm_int(accel_dev))
+		return IRQ_HANDLED;
+
 	dev_dbg(&GET_DEV(accel_dev), "qat_dev%d spurious AE interrupt\n",
 		accel_dev->accel_id);
 
@@ -341,3 +356,30 @@ err_out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(adf_isr_resource_alloc);
+
+/**
+ * adf_init_misc_wq() - Init misc workqueue
+ *
+ * Function init workqueue 'qat_misc_wq' for general purpose.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+int __init adf_init_misc_wq(void)
+{
+	adf_misc_wq = alloc_workqueue("qat_misc_wq", WQ_MEM_RECLAIM, 0);
+
+	return !adf_misc_wq ? -ENOMEM : 0;
+}
+
+void adf_exit_misc_wq(void)
+{
+	if (adf_misc_wq)
+		destroy_workqueue(adf_misc_wq);
+
+	adf_misc_wq = NULL;
+}
+
+bool adf_misc_wq_queue_work(struct work_struct *work)
+{
+	return queue_work(adf_misc_wq, work);
+}
diff --git a/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c b/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c
index 14b222691c9c..1141258db4b6 100644
--- a/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c
+++ b/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c
@@ -96,7 +96,7 @@ int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev)
 int adf_vf2pf_get_capabilities(struct adf_accel_dev *accel_dev)
 {
 	struct adf_hw_device_data *hw_data = accel_dev->hw_device;
-	struct capabilities_v3 cap_msg = { { 0 }, };
+	struct capabilities_v3 cap_msg = { 0 };
 	unsigned int len = sizeof(cap_msg);
 
 	if (accel_dev->vf.pf_compat_ver < ADF_PFVF_COMPAT_CAPABILITIES)
@@ -141,7 +141,7 @@ int adf_vf2pf_get_capabilities(struct adf_accel_dev *accel_dev)
 
 int adf_vf2pf_get_ring_to_svc(struct adf_accel_dev *accel_dev)
 {
-	struct ring_to_svc_map_v1 rts_map_msg = { { 0 }, };
+	struct ring_to_svc_map_v1 rts_map_msg = { 0 };
 	unsigned int len = sizeof(rts_map_msg);
 
 	if (accel_dev->vf.pf_compat_ver < ADF_PFVF_COMPAT_RING_TO_SVC_MAP)
diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h b/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h
index afe59a7684ac..56cb827f93ea 100644
--- a/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h
+++ b/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h
@@ -16,6 +16,7 @@ enum icp_qat_fw_init_admin_cmd_id {
 	ICP_QAT_FW_HEARTBEAT_SYNC = 7,
 	ICP_QAT_FW_HEARTBEAT_GET = 8,
 	ICP_QAT_FW_COMP_CAPABILITY_GET = 9,
+	ICP_QAT_FW_PM_STATE_CONFIG = 128,
 };
 
 enum icp_qat_fw_init_admin_resp_status {
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c
index 7234c4940fae..67c9588e89df 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.c
+++ b/drivers/crypto/qat/qat_common/qat_crypto.c
@@ -161,6 +161,13 @@ int qat_crypto_dev_config(struct adf_accel_dev *accel_dev)
 	if (ret)
 		goto err;
 
+	/* Temporarily set the number of crypto instances to zero to avoid
+	 * registering the crypto algorithms.
+	 * This will be removed when the algorithms will support the
+	 * CRYPTO_TFM_REQ_MAY_BACKLOG flag
+	 */
+	instances = 0;
+
 	for (i = 0; i < instances; i++) {
 		val = i;
 		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i);
diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c
index 2026cc6be8f0..6356402a2c9e 100644
--- a/drivers/crypto/qat/qat_common/qat_uclo.c
+++ b/drivers/crypto/qat/qat_common/qat_uclo.c
@@ -387,7 +387,9 @@ static int qat_uclo_init_ustore(struct icp_qat_fw_loader_handle *handle,
 	page = image->page;
 
 	for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) {
-		if (!test_bit(ae, (unsigned long *)&uof_image->ae_assigned))
+		unsigned long ae_assigned = uof_image->ae_assigned;
+
+		if (!test_bit(ae, &ae_assigned))
 			continue;
 
 		if (!test_bit(ae, &cfg_ae_mask))
@@ -664,8 +666,9 @@ static int qat_uclo_map_ae(struct icp_qat_fw_loader_handle *handle, int max_ae)
 			continue;
 
 		for (i = 0; i < obj_handle->uimage_num; i++) {
-			if (!test_bit(ae, (unsigned long *)
-			&obj_handle->ae_uimage[i].img_ptr->ae_assigned))
+			unsigned long ae_assigned = obj_handle->ae_uimage[i].img_ptr->ae_assigned;
+
+			if (!test_bit(ae, &ae_assigned))
 				continue;
 			mflag = 1;
 			if (qat_uclo_init_ae_data(obj_handle, ae, i))
diff --git a/drivers/crypto/qcom-rng.c b/drivers/crypto/qcom-rng.c
index 99ba8d51d102..11f30fd48c14 100644
--- a/drivers/crypto/qcom-rng.c
+++ b/drivers/crypto/qcom-rng.c
@@ -8,6 +8,7 @@
 #include <linux/clk.h>
 #include <linux/crypto.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
@@ -43,16 +44,19 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max)
 {
 	unsigned int currsize = 0;
 	u32 val;
+	int ret;
 
 	/* read random data from hardware */
 	do {
-		val = readl_relaxed(rng->base + PRNG_STATUS);
-		if (!(val & PRNG_STATUS_DATA_AVAIL))
-			break;
+		ret = readl_poll_timeout(rng->base + PRNG_STATUS, val,
+					 val & PRNG_STATUS_DATA_AVAIL,
+					 200, 10000);
+		if (ret)
+			return ret;
 
 		val = readl_relaxed(rng->base + PRNG_DATA_OUT);
 		if (!val)
-			break;
+			return -EINVAL;
 
 		if ((max - currsize) >= WORD_SZ) {
 			memcpy(data, &val, WORD_SZ);
@@ -61,11 +65,10 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max)
 		} else {
 			/* copy only remaining bytes */
 			memcpy(data, &val, max - currsize);
-			break;
 		}
 	} while (currsize < max);
 
-	return currsize;
+	return 0;
 }
 
 static int qcom_rng_generate(struct crypto_rng *tfm,
@@ -87,7 +90,7 @@ static int qcom_rng_generate(struct crypto_rng *tfm,
 	mutex_unlock(&rng->lock);
 	clk_disable_unprepare(rng->clk);
 
-	return 0;
+	return ret;
 }
 
 static int qcom_rng_seed(struct crypto_rng *tfm, const u8 *seed,
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 1cece1a7d3f0..5bbf0d2722e1 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -506,7 +506,6 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
 		.exit			= rk_ablk_exit_tfm,
 		.min_keysize		= DES3_EDE_KEY_SIZE,
 		.max_keysize		= DES3_EDE_KEY_SIZE,
-		.ivsize			= DES_BLOCK_SIZE,
 		.setkey			= rk_tdes_setkey,
 		.encrypt		= rk_des3_ede_ecb_encrypt,
 		.decrypt		= rk_des3_ede_ecb_decrypt,
diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index 97277b7150cb..5a57c9afd8c8 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -1264,7 +1264,7 @@ static int ux500_cryp_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 
 	dev_dbg(dev, "[%s]", __func__);
-	device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_ATOMIC);
+	device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_KERNEL);
 	if (!device_data) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 51a6e1a42434..5157c118d642 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -1658,7 +1658,7 @@ static int ux500_hash_probe(struct platform_device *pdev)
 	struct hash_device_data *device_data;
 	struct device		*dev = &pdev->dev;
 
-	device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_ATOMIC);
+	device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_KERNEL);
 	if (!device_data) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig
index c85fab7ef0bd..b2c28b87f14b 100644
--- a/drivers/crypto/vmx/Kconfig
+++ b/drivers/crypto/vmx/Kconfig
@@ -2,7 +2,11 @@
 config CRYPTO_DEV_VMX_ENCRYPT
 	tristate "Encryption acceleration support on P8 CPU"
 	depends on CRYPTO_DEV_VMX
+	select CRYPTO_AES
+	select CRYPTO_CBC
+	select CRYPTO_CTR
 	select CRYPTO_GHASH
+	select CRYPTO_XTS
 	default m
 	help
 	  Support for VMX cryptographic acceleration instructions on Power8 CPU.
diff --git a/drivers/crypto/xilinx/Makefile b/drivers/crypto/xilinx/Makefile
index 534e32daf76a..730feff5b5f2 100644
--- a/drivers/crypto/xilinx/Makefile
+++ b/drivers/crypto/xilinx/Makefile
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += zynqmp-aes-gcm.o
+obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_SHA3) += zynqmp-sha.o
diff --git a/drivers/crypto/xilinx/zynqmp-sha.c b/drivers/crypto/xilinx/zynqmp-sha.c
new file mode 100644
index 000000000000..43ff170ff1c2
--- /dev/null
+++ b/drivers/crypto/xilinx/zynqmp-sha.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx ZynqMP SHA Driver.
+ * Copyright (c) 2022 Xilinx Inc.
+ */
+#include <linux/cacheflush.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha3.h>
+#include <linux/crypto.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware/xlnx-zynqmp.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#define ZYNQMP_DMA_BIT_MASK		32U
+#define ZYNQMP_DMA_ALLOC_FIXED_SIZE	0x1000U
+
+enum zynqmp_sha_op {
+	ZYNQMP_SHA3_INIT = 1,
+	ZYNQMP_SHA3_UPDATE = 2,
+	ZYNQMP_SHA3_FINAL = 4,
+};
+
+struct zynqmp_sha_drv_ctx {
+	struct shash_alg sha3_384;
+	struct device *dev;
+};
+
+struct zynqmp_sha_tfm_ctx {
+	struct device *dev;
+	struct crypto_shash *fbk_tfm;
+};
+
+struct zynqmp_sha_desc_ctx {
+	struct shash_desc fbk_req;
+};
+
+static dma_addr_t update_dma_addr, final_dma_addr;
+static char *ubuf, *fbuf;
+
+static int zynqmp_sha_init_tfm(struct crypto_shash *hash)
+{
+	const char *fallback_driver_name = crypto_shash_alg_name(hash);
+	struct zynqmp_sha_tfm_ctx *tfm_ctx = crypto_shash_ctx(hash);
+	struct shash_alg *alg = crypto_shash_alg(hash);
+	struct crypto_shash *fallback_tfm;
+	struct zynqmp_sha_drv_ctx *drv_ctx;
+
+	drv_ctx = container_of(alg, struct zynqmp_sha_drv_ctx, sha3_384);
+	tfm_ctx->dev = drv_ctx->dev;
+
+	/* Allocate a fallback and abort if it failed. */
+	fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
+					  CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback_tfm))
+		return PTR_ERR(fallback_tfm);
+
+	tfm_ctx->fbk_tfm = fallback_tfm;
+	hash->descsize += crypto_shash_descsize(tfm_ctx->fbk_tfm);
+
+	return 0;
+}
+
+static void zynqmp_sha_exit_tfm(struct crypto_shash *hash)
+{
+	struct zynqmp_sha_tfm_ctx *tfm_ctx = crypto_shash_ctx(hash);
+
+	if (tfm_ctx->fbk_tfm) {
+		crypto_free_shash(tfm_ctx->fbk_tfm);
+		tfm_ctx->fbk_tfm = NULL;
+	}
+
+	memzero_explicit(tfm_ctx, sizeof(struct zynqmp_sha_tfm_ctx));
+}
+
+static int zynqmp_sha_init(struct shash_desc *desc)
+{
+	struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct zynqmp_sha_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+
+	dctx->fbk_req.tfm = tctx->fbk_tfm;
+	return crypto_shash_init(&dctx->fbk_req);
+}
+
+static int zynqmp_sha_update(struct shash_desc *desc, const u8 *data, unsigned int length)
+{
+	struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	return crypto_shash_update(&dctx->fbk_req, data, length);
+}
+
+static int zynqmp_sha_final(struct shash_desc *desc, u8 *out)
+{
+	struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	return crypto_shash_final(&dctx->fbk_req, out);
+}
+
+static int zynqmp_sha_finup(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out)
+{
+	struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	return crypto_shash_finup(&dctx->fbk_req, data, length, out);
+}
+
+static int zynqmp_sha_import(struct shash_desc *desc, const void *in)
+{
+	struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct zynqmp_sha_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+
+	dctx->fbk_req.tfm = tctx->fbk_tfm;
+	return crypto_shash_import(&dctx->fbk_req, in);
+}
+
+static int zynqmp_sha_export(struct shash_desc *desc, void *out)
+{
+	struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	return crypto_shash_export(&dctx->fbk_req, out);
+}
+
+static int zynqmp_sha_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
+{
+	unsigned int remaining_len = len;
+	int update_size;
+	int ret;
+
+	ret = zynqmp_pm_sha_hash(0, 0, ZYNQMP_SHA3_INIT);
+	if (ret)
+		return ret;
+
+	while (remaining_len != 0) {
+		memzero_explicit(ubuf, ZYNQMP_DMA_ALLOC_FIXED_SIZE);
+		if (remaining_len >= ZYNQMP_DMA_ALLOC_FIXED_SIZE) {
+			update_size = ZYNQMP_DMA_ALLOC_FIXED_SIZE;
+			remaining_len -= ZYNQMP_DMA_ALLOC_FIXED_SIZE;
+		} else {
+			update_size = remaining_len;
+			remaining_len = 0;
+		}
+		memcpy(ubuf, data, update_size);
+		flush_icache_range((unsigned long)ubuf, (unsigned long)ubuf + update_size);
+		ret = zynqmp_pm_sha_hash(update_dma_addr, update_size, ZYNQMP_SHA3_UPDATE);
+		if (ret)
+			return ret;
+
+		data += update_size;
+	}
+
+	ret = zynqmp_pm_sha_hash(final_dma_addr, SHA3_384_DIGEST_SIZE, ZYNQMP_SHA3_FINAL);
+	memcpy(out, fbuf, SHA3_384_DIGEST_SIZE);
+	memzero_explicit(fbuf, SHA3_384_DIGEST_SIZE);
+
+	return ret;
+}
+
+static struct zynqmp_sha_drv_ctx sha3_drv_ctx = {
+	.sha3_384 = {
+		.init = zynqmp_sha_init,
+		.update = zynqmp_sha_update,
+		.final = zynqmp_sha_final,
+		.finup = zynqmp_sha_finup,
+		.digest = zynqmp_sha_digest,
+		.export = zynqmp_sha_export,
+		.import = zynqmp_sha_import,
+		.init_tfm = zynqmp_sha_init_tfm,
+		.exit_tfm = zynqmp_sha_exit_tfm,
+		.descsize = sizeof(struct zynqmp_sha_desc_ctx),
+		.statesize = sizeof(struct sha3_state),
+		.digestsize = SHA3_384_DIGEST_SIZE,
+		.base = {
+			.cra_name = "sha3-384",
+			.cra_driver_name = "zynqmp-sha3-384",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |
+				     CRYPTO_ALG_ALLOCATES_MEMORY |
+				     CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize = SHA3_384_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct zynqmp_sha_tfm_ctx),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		}
+	}
+};
+
+static int zynqmp_sha_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	int err;
+	u32 v;
+
+	/* Verify the hardware is present */
+	err = zynqmp_pm_get_api_version(&v);
+	if (err)
+		return err;
+
+
+	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(ZYNQMP_DMA_BIT_MASK));
+	if (err < 0) {
+		dev_err(dev, "No usable DMA configuration\n");
+		return err;
+	}
+
+	err = crypto_register_shash(&sha3_drv_ctx.sha3_384);
+	if (err < 0) {
+		dev_err(dev, "Failed to register shash alg.\n");
+		return err;
+	}
+
+	sha3_drv_ctx.dev = dev;
+	platform_set_drvdata(pdev, &sha3_drv_ctx);
+
+	ubuf = dma_alloc_coherent(dev, ZYNQMP_DMA_ALLOC_FIXED_SIZE, &update_dma_addr, GFP_KERNEL);
+	if (!ubuf) {
+		err = -ENOMEM;
+		goto err_shash;
+	}
+
+	fbuf = dma_alloc_coherent(dev, SHA3_384_DIGEST_SIZE, &final_dma_addr, GFP_KERNEL);
+	if (!fbuf) {
+		err = -ENOMEM;
+		goto err_mem;
+	}
+
+	return 0;
+
+err_mem:
+	dma_free_coherent(sha3_drv_ctx.dev, ZYNQMP_DMA_ALLOC_FIXED_SIZE, ubuf, update_dma_addr);
+
+err_shash:
+	crypto_unregister_shash(&sha3_drv_ctx.sha3_384);
+
+	return err;
+}
+
+static int zynqmp_sha_remove(struct platform_device *pdev)
+{
+	sha3_drv_ctx.dev = platform_get_drvdata(pdev);
+
+	dma_free_coherent(sha3_drv_ctx.dev, ZYNQMP_DMA_ALLOC_FIXED_SIZE, ubuf, update_dma_addr);
+	dma_free_coherent(sha3_drv_ctx.dev, SHA3_384_DIGEST_SIZE, fbuf, final_dma_addr);
+	crypto_unregister_shash(&sha3_drv_ctx.sha3_384);
+
+	return 0;
+}
+
+static struct platform_driver zynqmp_sha_driver = {
+	.probe = zynqmp_sha_probe,
+	.remove = zynqmp_sha_remove,
+	.driver = {
+		.name = "zynqmp-sha3-384",
+	},
+};
+
+module_platform_driver(zynqmp_sha_driver);
+MODULE_DESCRIPTION("ZynqMP SHA3 hardware acceleration support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Harsha <[email protected]>");
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 110de8a60058..858400e42ec0 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2968,7 +2968,7 @@ static int __maybe_unused pl330_suspend(struct device *dev)
 	struct amba_device *pcdev = to_amba_device(dev);
 
 	pm_runtime_force_suspend(dev);
-	amba_pclk_unprepare(pcdev);
+	clk_unprepare(pcdev->pclk);
 
 	return 0;
 }
@@ -2978,7 +2978,7 @@ static int __maybe_unused pl330_resume(struct device *dev)
 	struct amba_device *pcdev = to_amba_device(dev);
 	int ret;
 
-	ret = amba_pclk_prepare(pcdev);
+	ret = clk_prepare(pcdev->pclk);
 	if (ret)
 		return ret;
 
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 5dd29789f97d..e7e8e624a436 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -1083,8 +1083,46 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat)
 
 #ifdef CONFIG_EDAC_ALTERA_SDRAM
 
+/*
+ * A legacy U-Boot bug only enabled memory mapped access to the ECC Enable
+ * register if ECC is enabled. Linux checks the ECC Enable register to
+ * determine ECC status.
+ * Use an SMC call (which always works) to determine ECC enablement.
+ */
+static int altr_s10_sdram_check_ecc_deps(struct altr_edac_device_dev *device)
+{
+	const struct edac_device_prv_data *prv = device->data;
+	unsigned long sdram_ecc_addr;
+	struct arm_smccc_res result;
+	struct device_node *np;
+	phys_addr_t sdram_addr;
+	u32 read_reg;
+	int ret;
+
+	np = of_find_compatible_node(NULL, NULL, "altr,sdr-ctl");
+	if (!np)
+		goto sdram_err;
+
+	sdram_addr = of_translate_address(np, of_get_address(np, 0,
+							     NULL, NULL));
+	of_node_put(np);
+	sdram_ecc_addr = (unsigned long)sdram_addr + prv->ecc_en_ofst;
+	arm_smccc_smc(INTEL_SIP_SMC_REG_READ, sdram_ecc_addr,
+		      0, 0, 0, 0, 0, 0, &result);
+	read_reg = (unsigned int)result.a1;
+	ret = (int)result.a0;
+	if (!ret && (read_reg & prv->ecc_enable_mask))
+		return 0;
+
+sdram_err:
+	edac_printk(KERN_ERR, EDAC_DEVICE,
+		    "%s: No ECC present or ECC disabled.\n",
+		    device->edac_dev_name);
+	return -ENODEV;
+}
+
 static const struct edac_device_prv_data s10_sdramecc_data = {
-	.setup = altr_check_ecc_deps,
+	.setup = altr_s10_sdram_check_ecc_deps,
 	.ce_clear_mask = ALTR_S10_ECC_SERRPENA,
 	.ue_clear_mask = ALTR_S10_ECC_DERRPENA,
 	.ecc_enable_mask = ALTR_S10_ECC_EN,
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index fba609ada0e6..812baa48b290 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -15,6 +15,21 @@ static struct msr __percpu *msrs;
 
 static struct amd64_family_type *fam_type;
 
+static inline u32 get_umc_reg(u32 reg)
+{
+	if (!fam_type->flags.zn_regs_v2)
+		return reg;
+
+	switch (reg) {
+	case UMCCH_ADDR_CFG:		return UMCCH_ADDR_CFG_DDR5;
+	case UMCCH_ADDR_MASK_SEC:	return UMCCH_ADDR_MASK_SEC_DDR5;
+	case UMCCH_DIMM_CFG:		return UMCCH_DIMM_CFG_DDR5;
+	}
+
+	WARN_ONCE(1, "%s: unknown register 0x%x", __func__, reg);
+	return 0;
+}
+
 /* Per-node stuff */
 static struct ecc_settings **ecc_stngs;
 
@@ -1429,8 +1444,10 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
 		edac_dbg(1, "UMC%d x16 DIMMs present: %s\n",
 				i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no");
 
-		if (pvt->dram_type == MEM_LRDDR4) {
-			amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp);
+		if (umc->dram_type == MEM_LRDDR4 || umc->dram_type == MEM_LRDDR5) {
+			amd_smn_read(pvt->mc_node_id,
+				     umc_base + get_umc_reg(UMCCH_ADDR_CFG),
+				     &tmp);
 			edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n",
 					i, 1 << ((tmp >> 4) & 0x3));
 		}
@@ -1505,7 +1522,7 @@ static void prep_chip_selects(struct amd64_pvt *pvt)
 
 		for_each_umc(umc) {
 			pvt->csels[umc].b_cnt = 4;
-			pvt->csels[umc].m_cnt = 2;
+			pvt->csels[umc].m_cnt = fam_type->flags.zn_regs_v2 ? 4 : 2;
 		}
 
 	} else {
@@ -1545,7 +1562,7 @@ static void read_umc_base_mask(struct amd64_pvt *pvt)
 		}
 
 		umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK;
-		umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC;
+		umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(UMCCH_ADDR_MASK_SEC);
 
 		for_each_chip_select_mask(cs, umc, pvt) {
 			mask = &pvt->csels[umc].csmasks[cs];
@@ -1616,19 +1633,49 @@ static void read_dct_base_mask(struct amd64_pvt *pvt)
 	}
 }
 
+static void determine_memory_type_df(struct amd64_pvt *pvt)
+{
+	struct amd64_umc *umc;
+	u32 i;
+
+	for_each_umc(i) {
+		umc = &pvt->umc[i];
+
+		if (!(umc->sdp_ctrl & UMC_SDP_INIT)) {
+			umc->dram_type = MEM_EMPTY;
+			continue;
+		}
+
+		/*
+		 * Check if the system supports the "DDR Type" field in UMC Config
+		 * and has DDR5 DIMMs in use.
+		 */
+		if (fam_type->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) {
+			if (umc->dimm_cfg & BIT(5))
+				umc->dram_type = MEM_LRDDR5;
+			else if (umc->dimm_cfg & BIT(4))
+				umc->dram_type = MEM_RDDR5;
+			else
+				umc->dram_type = MEM_DDR5;
+		} else {
+			if (umc->dimm_cfg & BIT(5))
+				umc->dram_type = MEM_LRDDR4;
+			else if (umc->dimm_cfg & BIT(4))
+				umc->dram_type = MEM_RDDR4;
+			else
+				umc->dram_type = MEM_DDR4;
+		}
+
+		edac_dbg(1, "  UMC%d DIMM type: %s\n", i, edac_mem_types[umc->dram_type]);
+	}
+}
+
 static void determine_memory_type(struct amd64_pvt *pvt)
 {
 	u32 dram_ctrl, dcsm;
 
-	if (pvt->umc) {
-		if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
-			pvt->dram_type = MEM_LRDDR4;
-		else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
-			pvt->dram_type = MEM_RDDR4;
-		else
-			pvt->dram_type = MEM_DDR4;
-		return;
-	}
+	if (pvt->umc)
+		return determine_memory_type_df(pvt);
 
 	switch (pvt->fam) {
 	case 0xf:
@@ -2149,6 +2196,7 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
 {
 	u32 addr_mask_orig, addr_mask_deinterleaved;
 	u32 msb, weight, num_zero_bits;
+	int cs_mask_nr = csrow_nr;
 	int dimm, size = 0;
 
 	/* No Chip Selects are enabled. */
@@ -2164,17 +2212,33 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
 		return size;
 
 	/*
-	 * There is one mask per DIMM, and two Chip Selects per DIMM.
-	 *	CS0 and CS1 -> DIMM0
-	 *	CS2 and CS3 -> DIMM1
+	 * Family 17h introduced systems with one mask per DIMM,
+	 * and two Chip Selects per DIMM.
+	 *
+	 *	CS0 and CS1 -> MASK0 / DIMM0
+	 *	CS2 and CS3 -> MASK1 / DIMM1
+	 *
+	 * Family 19h Model 10h introduced systems with one mask per Chip Select,
+	 * and two Chip Selects per DIMM.
+	 *
+	 *	CS0 -> MASK0 -> DIMM0
+	 *	CS1 -> MASK1 -> DIMM0
+	 *	CS2 -> MASK2 -> DIMM1
+	 *	CS3 -> MASK3 -> DIMM1
+	 *
+	 * Keep the mask number equal to the Chip Select number for newer systems,
+	 * and shift the mask number for older systems.
 	 */
 	dimm = csrow_nr >> 1;
 
+	if (!fam_type->flags.zn_regs_v2)
+		cs_mask_nr >>= 1;
+
 	/* Asymmetric dual-rank DIMM support. */
 	if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
-		addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm];
+		addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr];
 	else
-		addr_mask_orig = pvt->csels[umc].csmasks[dimm];
+		addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
 
 	/*
 	 * The number of zero bits in the mask is equal to the number of bits
@@ -2930,6 +2994,7 @@ static struct amd64_family_type family_types[] = {
 		.f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0,
 		.f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6,
 		.max_mcs = 12,
+		.flags.zn_regs_v2 = 1,
 		.ops = {
 			.early_channel_count	= f17_early_channel_count,
 			.dbam_to_cs		= f17_addr_mask_to_cs_size,
@@ -3368,7 +3433,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
 		umc_base = get_umc_base(i);
 		umc = &pvt->umc[i];
 
-		amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg);
+		amd_smn_read(nid, umc_base + get_umc_reg(UMCCH_DIMM_CFG), &umc->dimm_cfg);
 		amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
 		amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
 		amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
@@ -3452,7 +3517,9 @@ skip:
 	read_dct_base_mask(pvt);
 
 	determine_memory_type(pvt);
-	edac_dbg(1, "  DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
+
+	if (!pvt->umc)
+		edac_dbg(1, "  DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
 
 	determine_ecc_sym_sz(pvt);
 }
@@ -3548,7 +3615,7 @@ static int init_csrows_df(struct mem_ctl_info *mci)
 					pvt->mc_node_id, cs);
 
 			dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs);
-			dimm->mtype = pvt->dram_type;
+			dimm->mtype = pvt->umc[umc].dram_type;
 			dimm->edac_mode = edac_mode;
 			dimm->dtype = dev_type;
 			dimm->grain = 64;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 352bda9803f6..38e5ad95d010 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -273,8 +273,11 @@
 #define UMCCH_BASE_ADDR_SEC		0x10
 #define UMCCH_ADDR_MASK			0x20
 #define UMCCH_ADDR_MASK_SEC		0x28
+#define UMCCH_ADDR_MASK_SEC_DDR5	0x30
 #define UMCCH_ADDR_CFG			0x30
+#define UMCCH_ADDR_CFG_DDR5		0x40
 #define UMCCH_DIMM_CFG			0x80
+#define UMCCH_DIMM_CFG_DDR5		0x90
 #define UMCCH_UMC_CFG			0x100
 #define UMCCH_SDP_CTRL			0x104
 #define UMCCH_ECC_CTRL			0x14C
@@ -344,6 +347,9 @@ struct amd64_umc {
 	u32 sdp_ctrl;		/* SDP Control reg */
 	u32 ecc_ctrl;		/* DRAM ECC Control reg */
 	u32 umc_cap_hi;		/* Capabilities High reg */
+
+	/* cache the dram_type */
+	enum mem_type dram_type;
 };
 
 struct amd64_pvt {
@@ -391,7 +397,12 @@ struct amd64_pvt {
 	/* place to store error injection parameters prior to issue */
 	struct error_injection injection;
 
-	/* cache the dram_type */
+	/*
+	 * cache the dram_type
+	 *
+	 * NOTE: Don't use this for Family 17h and later.
+	 *	 Use dram_type in struct amd64_umc instead.
+	 */
 	enum mem_type dram_type;
 
 	struct amd64_umc *umc;	/* UMC registers */
@@ -480,11 +491,22 @@ struct low_ops {
 					 unsigned cs_mode, int cs_mask_nr);
 };
 
+struct amd64_family_flags {
+	/*
+	 * Indicates that the system supports the new register offsets, etc.
+	 * first introduced with Family 19h Model 10h.
+	 */
+	__u64 zn_regs_v2	: 1,
+
+	      __reserved	: 63;
+};
+
 struct amd64_family_type {
 	const char *ctl_name;
 	u16 f0_id, f1_id, f2_id, f6_id;
 	/* Maximum number of memory controllers per die/node. */
 	u8 max_mcs;
+	struct amd64_family_flags flags;
 	struct low_ops ops;
 };
 
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 5e7593753799..9a61d92bdf42 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -163,13 +163,14 @@ CTL_INFO_ATTR(poll_msec, S_IRUGO | S_IWUSR,
 	edac_device_ctl_poll_msec_show, edac_device_ctl_poll_msec_store);
 
 /* Base Attributes of the EDAC_DEVICE ECC object */
-static struct ctl_info_attribute *device_ctrl_attr[] = {
-	&attr_ctl_info_panic_on_ue,
-	&attr_ctl_info_log_ue,
-	&attr_ctl_info_log_ce,
-	&attr_ctl_info_poll_msec,
+static struct attribute *device_ctrl_attrs[] = {
+	&attr_ctl_info_panic_on_ue.attr,
+	&attr_ctl_info_log_ue.attr,
+	&attr_ctl_info_log_ce.attr,
+	&attr_ctl_info_poll_msec.attr,
 	NULL,
 };
+ATTRIBUTE_GROUPS(device_ctrl);
 
 /*
  * edac_device_ctrl_master_release
@@ -217,7 +218,7 @@ static void edac_device_ctrl_master_release(struct kobject *kobj)
 static struct kobj_type ktype_device_ctrl = {
 	.release = edac_device_ctrl_master_release,
 	.sysfs_ops = &device_ctl_info_ops,
-	.default_attrs = (struct attribute **)device_ctrl_attr,
+	.default_groups = device_ctrl_groups,
 };
 
 /*
@@ -389,17 +390,18 @@ INSTANCE_ATTR(ce_count, S_IRUGO, instance_ce_count_show, NULL);
 INSTANCE_ATTR(ue_count, S_IRUGO, instance_ue_count_show, NULL);
 
 /* list of edac_dev 'instance' attributes */
-static struct instance_attribute *device_instance_attr[] = {
-	&attr_instance_ce_count,
-	&attr_instance_ue_count,
+static struct attribute *device_instance_attrs[] = {
+	&attr_instance_ce_count.attr,
+	&attr_instance_ue_count.attr,
 	NULL,
 };
+ATTRIBUTE_GROUPS(device_instance);
 
 /* The 'ktype' for each edac_dev 'instance' */
 static struct kobj_type ktype_instance_ctrl = {
 	.release = edac_device_ctrl_instance_release,
 	.sysfs_ops = &device_instance_ops,
-	.default_attrs = (struct attribute **)device_instance_attr,
+	.default_groups = device_instance_groups,
 };
 
 /* edac_dev -> instance -> block information */
@@ -487,17 +489,18 @@ BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL);
 BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL);
 
 /* list of edac_dev 'block' attributes */
-static struct edac_dev_sysfs_block_attribute *device_block_attr[] = {
-	&attr_block_ce_count,
-	&attr_block_ue_count,
+static struct attribute *device_block_attrs[] = {
+	&attr_block_ce_count.attr,
+	&attr_block_ue_count.attr,
 	NULL,
 };
+ATTRIBUTE_GROUPS(device_block);
 
 /* The 'ktype' for each edac_dev 'block' */
 static struct kobj_type ktype_block_ctrl = {
 	.release = edac_device_ctrl_block_release,
 	.sysfs_ops = &device_block_ops,
-	.default_attrs = (struct attribute **)device_block_attr,
+	.default_groups = device_block_groups,
 };
 
 /* block ctor/dtor  code */
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index f5677d81bd2d..d2715774af6f 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -213,12 +213,12 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems)
 	else if (size > sizeof(char))
 		align = sizeof(short);
 	else
-		return (char *)ptr;
+		return ptr;
 
 	r = (unsigned long)ptr % align;
 
 	if (r == 0)
-		return (char *)ptr;
+		return ptr;
 
 	*p += align - r;
 
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 53042af7262e..888d5728ecef 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -135,17 +135,18 @@ INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL);
 INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL);
 
 /* pci instance attributes */
-static struct instance_attribute *pci_instance_attr[] = {
-	&attr_instance_pe_count,
-	&attr_instance_npe_count,
+static struct attribute *pci_instance_attrs[] = {
+	&attr_instance_pe_count.attr,
+	&attr_instance_npe_count.attr,
 	NULL
 };
+ATTRIBUTE_GROUPS(pci_instance);
 
 /* the ktype for a pci instance */
 static struct kobj_type ktype_pci_instance = {
 	.release = edac_pci_instance_release,
 	.sysfs_ops = &pci_instance_ops,
-	.default_attrs = (struct attribute **)pci_instance_attr,
+	.default_groups = pci_instance_groups,
 };
 
 /*
@@ -292,15 +293,16 @@ EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL);
 EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL);
 
 /* Base Attributes of the memory ECC object */
-static struct edac_pci_dev_attribute *edac_pci_attr[] = {
-	&edac_pci_attr_check_pci_errors,
-	&edac_pci_attr_edac_pci_log_pe,
-	&edac_pci_attr_edac_pci_log_npe,
-	&edac_pci_attr_edac_pci_panic_on_pe,
-	&edac_pci_attr_pci_parity_count,
-	&edac_pci_attr_pci_nonparity_count,
+static struct attribute *edac_pci_attrs[] = {
+	&edac_pci_attr_check_pci_errors.attr,
+	&edac_pci_attr_edac_pci_log_pe.attr,
+	&edac_pci_attr_edac_pci_log_npe.attr,
+	&edac_pci_attr_edac_pci_panic_on_pe.attr,
+	&edac_pci_attr_pci_parity_count.attr,
+	&edac_pci_attr_pci_nonparity_count.attr,
 	NULL,
 };
+ATTRIBUTE_GROUPS(edac_pci);
 
 /*
  * edac_pci_release_main_kobj
@@ -327,7 +329,7 @@ static void edac_pci_release_main_kobj(struct kobject *kobj)
 static struct kobj_type ktype_edac_pci_main_kobj = {
 	.release = edac_pci_release_main_kobj,
 	.sysfs_ops = &edac_pci_sysfs_ops,
-	.default_attrs = (struct attribute **)edac_pci_attr,
+	.default_groups = edac_pci_groups,
 };
 
 /**
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 75cb91055c17..e5cfb01353d8 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -40,6 +40,7 @@ config ARM_SCPI_POWER_DOMAIN
 config ARM_SDE_INTERFACE
 	bool "ARM Software Delegated Exception Interface (SDEI)"
 	depends on ARM64
+	depends on ACPI_APEI_GHES
 	help
 	  The Software Delegated Exception Interface (SDEI) is an ARM
 	  standard for registering callbacks from the platform firmware
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index a7e762c352f9..1e1a51510e83 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
 	return true;
 }
 
-static int __init sdei_init(void)
+void __init sdei_init(void)
 {
 	struct platform_device *pdev;
 	int ret;
 
 	ret = platform_driver_register(&sdei_driver);
 	if (ret || !sdei_present_acpi())
-		return ret;
+		return;
 
 	pdev = platform_device_register_simple(sdei_driver.driver.name,
 					       0, NULL, 0);
@@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
 		pr_info("Failed to register ACPI:SDEI platform device %d\n",
 			ret);
 	}
-
-	return ret;
 }
 
-/*
- * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
- * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
- * by device_initcall(). We want to be called in the middle.
- */
-subsys_initcall_sync(sdei_init);
-
 int sdei_event_handler(struct pt_regs *regs,
 		       struct sdei_registered_event *arg)
 {
diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c
index 4c3201e290e2..ea84108035eb 100644
--- a/drivers/firmware/efi/apple-properties.c
+++ b/drivers/firmware/efi/apple-properties.c
@@ -24,7 +24,7 @@ static bool dump_properties __initdata;
 static int __init dump_properties_enable(char *arg)
 {
 	dump_properties = true;
-	return 0;
+	return 1;
 }
 
 __setup("dump_apple_properties", dump_properties_enable);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 7de3f5b6e8d0..5502e176d51b 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -212,7 +212,7 @@ static int __init efivar_ssdt_setup(char *str)
 		memcpy(efivar_ssdt, str, strlen(str));
 	else
 		pr_warn("efivar_ssdt: name too long: %s\n", str);
-	return 0;
+	return 1;
 }
 __setup("efivar_ssdt=", efivar_ssdt_setup);
 
diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c
index 38722d2009e2..5ed0602c2f75 100644
--- a/drivers/firmware/efi/mokvar-table.c
+++ b/drivers/firmware/efi/mokvar-table.c
@@ -359,4 +359,4 @@ static int __init efi_mokvar_sysfs_init(void)
 	}
 	return err;
 }
-device_initcall(efi_mokvar_sysfs_init);
+fs_initcall(efi_mokvar_sysfs_init);
diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 450c5f6a1cbf..5e5b0bb2e4e0 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -1121,6 +1121,32 @@ int zynqmp_pm_aes_engine(const u64 address, u32 *out)
 EXPORT_SYMBOL_GPL(zynqmp_pm_aes_engine);
 
 /**
+ * zynqmp_pm_sha_hash - Access the SHA engine to calculate the hash
+ * @address:	Address of the data/ Address of output buffer where
+ *		hash should be stored.
+ * @size:	Size of the data.
+ * @flags:
+ *	BIT(0) - for initializing csudma driver and SHA3(Here address
+ *		 and size inputs can be NULL).
+ *	BIT(1) - to call Sha3_Update API which can be called multiple
+ *		 times when data is not contiguous.
+ *	BIT(2) - to get final hash of the whole updated data.
+ *		 Hash will be overwritten at provided address with
+ *		 48 bytes.
+ *
+ * Return:	Returns status, either success or error code.
+ */
+int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags)
+{
+	u32 lower_addr = lower_32_bits(address);
+	u32 upper_addr = upper_32_bits(address);
+
+	return zynqmp_pm_invoke_fn(PM_SECURE_SHA, upper_addr, lower_addr,
+				   size, flags, NULL);
+}
+EXPORT_SYMBOL_GPL(zynqmp_pm_sha_hash);
+
+/**
  * zynqmp_pm_register_notifier() - PM API for register a subsystem
  *                                to be notified about specific
  *                                event/error.
diff --git a/drivers/gpio/gpio-mt7621.c b/drivers/gpio/gpio-mt7621.c
index ccaad1cb3c2e..d8a26e503ca5 100644
--- a/drivers/gpio/gpio-mt7621.c
+++ b/drivers/gpio/gpio-mt7621.c
@@ -239,7 +239,6 @@ mediatek_gpio_bank_probe(struct device *dev, int bank)
 
 	rg->chip.offset = bank * MTK_BANK_WIDTH;
 	rg->irq_chip.name = dev_name(dev);
-	rg->irq_chip.parent_device = dev;
 	rg->irq_chip.irq_unmask = mediatek_gpio_irq_unmask;
 	rg->irq_chip.irq_mask = mediatek_gpio_irq_mask;
 	rg->irq_chip.irq_mask_ack = mediatek_gpio_irq_mask;
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index e099c39e0355..80ddc43fd875 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -986,7 +986,8 @@ static void omap_gpio_mod_init(struct gpio_bank *bank)
 		writel_relaxed(0, base + bank->regs->ctrl);
 }
 
-static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
+static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc,
+			       struct device *pm_dev)
 {
 	struct gpio_irq_chip *irq;
 	static int gpio;
@@ -1052,6 +1053,7 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
 	if (ret)
 		return dev_err_probe(bank->chip.parent, ret, "Could not register gpio chip\n");
 
+	irq_domain_set_pm_device(bank->chip.irq.domain, pm_dev);
 	ret = devm_request_irq(bank->chip.parent, bank->irq,
 			       omap_gpio_irq_handler,
 			       0, dev_name(bank->chip.parent), bank);
@@ -1402,7 +1404,6 @@ static int omap_gpio_probe(struct platform_device *pdev)
 	irqc->irq_bus_sync_unlock = gpio_irq_bus_sync_unlock,
 	irqc->name = dev_name(&pdev->dev);
 	irqc->flags = IRQCHIP_MASK_ON_SUSPEND;
-	irqc->parent_device = dev;
 
 	bank->irq = platform_get_irq(pdev, 0);
 	if (bank->irq <= 0) {
@@ -1466,7 +1467,7 @@ static int omap_gpio_probe(struct platform_device *pdev)
 
 	omap_gpio_mod_init(bank);
 
-	ret = omap_gpio_chip_init(bank, irqc);
+	ret = omap_gpio_chip_init(bank, irqc, dev);
 	if (ret) {
 		pm_runtime_put_sync(dev);
 		pm_runtime_disable(dev);
diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c
index bd2e16d6e21c..3a76538f27fa 100644
--- a/drivers/gpio/gpio-rcar.c
+++ b/drivers/gpio/gpio-rcar.c
@@ -530,7 +530,6 @@ static int gpio_rcar_probe(struct platform_device *pdev)
 
 	irq_chip = &p->irq_chip;
 	irq_chip->name = "gpio-rcar";
-	irq_chip->parent_device = dev;
 	irq_chip->irq_mask = gpio_rcar_irq_disable;
 	irq_chip->irq_unmask = gpio_rcar_irq_enable;
 	irq_chip->irq_set_type = gpio_rcar_irq_set_type;
@@ -552,6 +551,7 @@ static int gpio_rcar_probe(struct platform_device *pdev)
 		goto err0;
 	}
 
+	irq_domain_set_pm_device(gpio_chip->irq.domain, dev);
 	ret = devm_request_irq(dev, p->irq_parent, gpio_rcar_irq_handler,
 			       IRQF_SHARED, name, p);
 	if (ret) {
diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index 153fe79e1bf3..8e5d87984a48 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -547,7 +547,7 @@ struct gpio_sim_bank {
 	 *
 	 * So we need to store the pointer to the parent struct here. We can
 	 * dereference it anywhere we need with no checks and no locking as
-	 * it's guaranteed to survive the childred and protected by configfs
+	 * it's guaranteed to survive the children and protected by configfs
 	 * locks.
 	 *
 	 * Same for other structures.
@@ -1322,7 +1322,7 @@ static void gpio_sim_hog_config_item_release(struct config_item *item)
 	kfree(hog);
 }
 
-struct configfs_item_operations gpio_sim_hog_config_item_ops = {
+static struct configfs_item_operations gpio_sim_hog_config_item_ops = {
 	.release	= gpio_sim_hog_config_item_release,
 };
 
diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c
index 8d298beffd86..031fe105b58e 100644
--- a/drivers/gpio/gpio-tegra186.c
+++ b/drivers/gpio/gpio-tegra186.c
@@ -1075,6 +1075,7 @@ static const struct tegra_gpio_soc tegra241_main_soc = {
 	.ports = tegra241_main_ports,
 	.name = "tegra241-gpio",
 	.instance = 0,
+	.num_irqs_per_bank = 8,
 };
 
 #define TEGRA241_AON_GPIO_PORT(_name, _bank, _port, _pins)	\
@@ -1095,6 +1096,7 @@ static const struct tegra_gpio_soc tegra241_aon_soc = {
 	.ports = tegra241_aon_ports,
 	.name = "tegra241-gpio-aon",
 	.instance = 1,
+	.num_irqs_per_bank = 8,
 };
 
 static const struct of_device_id tegra186_gpio_of_match[] = {
diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c
index 5b103221b58d..fa4bc7481f9a 100644
--- a/drivers/gpio/gpio-tqmx86.c
+++ b/drivers/gpio/gpio-tqmx86.c
@@ -281,7 +281,6 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
 		u8 irq_status;
 
 		irq_chip->name = chip->label;
-		irq_chip->parent_device = &pdev->dev;
 		irq_chip->irq_mask = tqmx86_gpio_irq_mask;
 		irq_chip->irq_unmask = tqmx86_gpio_irq_unmask;
 		irq_chip->irq_set_type = tqmx86_gpio_irq_set_type;
@@ -316,6 +315,8 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
 		goto out_pm_dis;
 	}
 
+	irq_domain_set_pm_device(girq->domain, dev);
+
 	dev_info(dev, "GPIO functionality initialized with %d pins\n",
 		 chip->ngpio);
 
diff --git a/drivers/gpio/gpio-ts4900.c b/drivers/gpio/gpio-ts4900.c
index d885032cf814..d918d2df4de2 100644
--- a/drivers/gpio/gpio-ts4900.c
+++ b/drivers/gpio/gpio-ts4900.c
@@ -1,7 +1,7 @@
 /*
  * Digital I/O driver for Technologic Systems I2C FPGA Core
  *
- * Copyright (C) 2015 Technologic Systems
+ * Copyright (C) 2015, 2018 Technologic Systems
  * Copyright (C) 2016 Savoir-Faire Linux
  *
  * This program is free software; you can redistribute it and/or
@@ -55,19 +55,33 @@ static int ts4900_gpio_direction_input(struct gpio_chip *chip,
 {
 	struct ts4900_gpio_priv *priv = gpiochip_get_data(chip);
 
-	/*
-	 * This will clear the output enable bit, the other bits are
-	 * dontcare when this is cleared
+	/* Only clear the OE bit here, requires a RMW. Prevents potential issue
+	 * with OE and data getting to the physical pin at different times.
 	 */
-	return regmap_write(priv->regmap, offset, 0);
+	return regmap_update_bits(priv->regmap, offset, TS4900_GPIO_OE, 0);
 }
 
 static int ts4900_gpio_direction_output(struct gpio_chip *chip,
 					unsigned int offset, int value)
 {
 	struct ts4900_gpio_priv *priv = gpiochip_get_data(chip);
+	unsigned int reg;
 	int ret;
 
+	/* If changing from an input to an output, we need to first set the
+	 * proper data bit to what is requested and then set OE bit. This
+	 * prevents a glitch that can occur on the IO line
+	 */
+	regmap_read(priv->regmap, offset, &reg);
+	if (!(reg & TS4900_GPIO_OE)) {
+		if (value)
+			reg = TS4900_GPIO_OUT;
+		else
+			reg &= ~TS4900_GPIO_OUT;
+
+		regmap_write(priv->regmap, offset, reg);
+	}
+
 	if (value)
 		ret = regmap_write(priv->regmap, offset, TS4900_GPIO_OE |
 							 TS4900_GPIO_OUT);
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index c0f6a25c3279..a5495ad31c9c 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -307,7 +307,8 @@ static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip,
 	if (IS_ERR(desc))
 		return desc;
 
-	ret = gpio_set_debounce_timeout(desc, agpio->debounce_timeout);
+	/* ACPI uses hundredths of milliseconds units */
+	ret = gpio_set_debounce_timeout(desc, agpio->debounce_timeout * 10);
 	if (ret)
 		dev_warn(chip->parent,
 			 "Failed to set debounce-timeout for pin 0x%04X, err %d\n",
@@ -1035,7 +1036,8 @@ int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int ind
 			if (ret < 0)
 				return ret;
 
-			ret = gpio_set_debounce_timeout(desc, info.debounce);
+			/* ACPI uses hundredths of milliseconds units */
+			ret = gpio_set_debounce_timeout(desc, info.debounce * 10);
 			if (ret)
 				return ret;
 
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index a3d14277f17c..6630d92e30ad 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2227,6 +2227,16 @@ static int gpio_set_bias(struct gpio_desc *desc)
 	return gpio_set_config_with_argument_optional(desc, bias, arg);
 }
 
+/**
+ * gpio_set_debounce_timeout() - Set debounce timeout
+ * @desc:	GPIO descriptor to set the debounce timeout
+ * @debounce:	Debounce timeout in microseconds
+ *
+ * The function calls the certain GPIO driver to set debounce timeout
+ * in the hardware.
+ *
+ * Returns 0 on success, or negative error code otherwise.
+ */
 int gpio_set_debounce_timeout(struct gpio_desc *desc, unsigned int debounce)
 {
 	return gpio_set_config_with_argument_optional(desc,
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index 61db5a66b493..44ad70939663 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -8,7 +8,6 @@ config DRM_BRIDGE
 config DRM_PANEL_BRIDGE
 	def_bool y
 	depends on DRM_BRIDGE
-	depends on DRM_KMS_HELPER
 	select DRM_PANEL
 	help
 	  DRM bridge wrapper of DRM panels
@@ -30,6 +29,7 @@ config DRM_CDNS_DSI
 config DRM_CHIPONE_ICN6211
 	tristate "Chipone ICN6211 MIPI-DSI/RGB Converter bridge"
 	depends on OF
+	select DRM_KMS_HELPER
 	select DRM_MIPI_DSI
 	select DRM_PANEL_BRIDGE
 	help
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index a1a663f362e7..00279e8c2775 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1406,6 +1406,13 @@ static inline u32 man_trk_ctl_single_full_frame_bit_get(struct drm_i915_private
 	       PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME;
 }
 
+static inline u32 man_trk_ctl_partial_frame_bit_get(struct drm_i915_private *dev_priv)
+{
+	return IS_ALDERLAKE_P(dev_priv) ?
+	       ADLP_PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE :
+	       PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE;
+}
+
 static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
@@ -1510,7 +1517,13 @@ static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state,
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	u32 val = PSR2_MAN_TRK_CTL_ENABLE;
+	u32 val = 0;
+
+	if (!IS_ALDERLAKE_P(dev_priv))
+		val = PSR2_MAN_TRK_CTL_ENABLE;
+
+	/* SF partial frame enable has to be set even on full update */
+	val |= man_trk_ctl_partial_frame_bit_get(dev_priv);
 
 	if (full_update) {
 		/*
@@ -1530,7 +1543,6 @@ static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state,
 	} else {
 		drm_WARN_ON(crtc_state->uapi.crtc->dev, clip->y1 % 4 || clip->y2 % 4);
 
-		val |= PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE;
 		val |= PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(clip->y1 / 4 + 1);
 		val |= PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(clip->y2 / 4 + 1);
 	}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c2bb33febb68..902e4c802a12 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4829,6 +4829,7 @@ enum {
 #define  ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(val)	REG_FIELD_PREP(ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK, val)
 #define  ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK		REG_GENMASK(12, 0)
 #define  ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(val)		REG_FIELD_PREP(ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK, val)
+#define  ADLP_PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE		REG_BIT(31)
 #define  ADLP_PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME		REG_BIT(14)
 #define  ADLP_PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME		REG_BIT(13)
 
diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index a8aba0141ce7..06cb1a59b9bc 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -217,14 +217,6 @@ static int imx_pd_bridge_atomic_check(struct drm_bridge *bridge,
 	if (!imx_pd_format_supported(bus_fmt))
 		return -EINVAL;
 
-	if (bus_flags &
-	    ~(DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_DE_HIGH |
-	      DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE |
-	      DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE)) {
-		dev_warn(imxpd->dev, "invalid bus_flags (%x)\n", bus_flags);
-		return -EINVAL;
-	}
-
 	bridge_state->output_bus_cfg.flags = bus_flags;
 	bridge_state->input_bus_cfg.flags = bus_flags;
 	imx_crtc_state->bus_flags = bus_flags;
diff --git a/drivers/gpu/drm/mgag200/mgag200_pll.c b/drivers/gpu/drm/mgag200/mgag200_pll.c
index e9ae22b4f813..52be08b744ad 100644
--- a/drivers/gpu/drm/mgag200/mgag200_pll.c
+++ b/drivers/gpu/drm/mgag200/mgag200_pll.c
@@ -404,9 +404,9 @@ mgag200_pixpll_update_g200wb(struct mgag200_pll *pixpll, const struct mgag200_pl
 		udelay(50);
 
 		/* program pixel pll register */
-		WREG_DAC(MGA1064_PIX_PLLC_N, xpixpllcn);
-		WREG_DAC(MGA1064_PIX_PLLC_M, xpixpllcm);
-		WREG_DAC(MGA1064_PIX_PLLC_P, xpixpllcp);
+		WREG_DAC(MGA1064_WB_PIX_PLLC_N, xpixpllcn);
+		WREG_DAC(MGA1064_WB_PIX_PLLC_M, xpixpllcm);
+		WREG_DAC(MGA1064_WB_PIX_PLLC_P, xpixpllcp);
 
 		udelay(50);
 
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index 434c2861bb40..9989a316fe88 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -106,6 +106,8 @@ config DRM_PANEL_EDP
 	depends on PM
 	select VIDEOMODE_HELPERS
 	select DRM_DP_AUX_BUS
+	select DRM_DP_HELPER
+	select DRM_KMS_HELPER
 	help
 	  DRM panel driver for dumb eDP panels that need at most a regulator and
 	  a GPIO to be powered up. Optionally a backlight can be attached so
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 3c08f9827acf..b42c1d816e79 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -2017,7 +2017,7 @@ static const struct display_timing innolux_g070y2_l01_timing = {
 static const struct panel_desc innolux_g070y2_l01 = {
 	.timings = &innolux_g070y2_l01_timing,
 	.num_timings = 1,
-	.bpc = 6,
+	.bpc = 8,
 	.size = {
 		.width = 152,
 		.height = 91,
diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.h b/drivers/gpu/drm/sun4i/sun8i_mixer.h
index 145833a9d82d..5b3fbee18671 100644
--- a/drivers/gpu/drm/sun4i/sun8i_mixer.h
+++ b/drivers/gpu/drm/sun4i/sun8i_mixer.h
@@ -111,10 +111,10 @@
 /* format 13 is semi-planar YUV411 VUVU */
 #define SUN8I_MIXER_FBFMT_YUV411	14
 /* format 15 doesn't exist */
-/* format 16 is P010 YVU */
-#define SUN8I_MIXER_FBFMT_P010_YUV	17
-/* format 18 is P210 YVU */
-#define SUN8I_MIXER_FBFMT_P210_YUV	19
+#define SUN8I_MIXER_FBFMT_P010_YUV	16
+/* format 17 is P010 YVU */
+#define SUN8I_MIXER_FBFMT_P210_YUV	18
+/* format 19 is P210 YVU */
 /* format 20 is packed YVU444 10-bit */
 /* format 21 is packed YUV444 10-bit */
 
diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c
index 9b42b0cdeef0..2876cb6a7dca 100644
--- a/drivers/hid/hid-elo.c
+++ b/drivers/hid/hid-elo.c
@@ -228,7 +228,6 @@ static int elo_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
 	struct elo_priv *priv;
 	int ret;
-	struct usb_device *udev;
 
 	if (!hid_is_usb(hdev))
 		return -EINVAL;
@@ -238,8 +237,7 @@ static int elo_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		return -ENOMEM;
 
 	INIT_DELAYED_WORK(&priv->work, elo_work);
-	udev = interface_to_usbdev(to_usb_interface(hdev->dev.parent));
-	priv->usbdev = usb_get_dev(udev);
+	priv->usbdev = interface_to_usbdev(to_usb_interface(hdev->dev.parent));
 
 	hid_set_drvdata(hdev, priv);
 
@@ -262,7 +260,6 @@ static int elo_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	return 0;
 err_free:
-	usb_put_dev(udev);
 	kfree(priv);
 	return ret;
 }
@@ -271,8 +268,6 @@ static void elo_remove(struct hid_device *hdev)
 {
 	struct elo_priv *priv = hid_get_drvdata(hdev);
 
-	usb_put_dev(priv->usbdev);
-
 	hid_hw_stop(hdev);
 	cancel_delayed_work_sync(&priv->work);
 	kfree(priv);
diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index 7106b921b53c..c358778e070b 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -1068,6 +1068,7 @@ static void logi_hidpp_recv_queue_notif(struct hid_device *hdev,
 		workitem.reports_supported |= STD_KEYBOARD;
 		break;
 	case 0x0f:
+	case 0x11:
 		device_type = "eQUAD Lightspeed 1.2";
 		logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
 		workitem.reports_supported |= STD_KEYBOARD;
diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c
index b6a9a0f3966e..2204de889739 100644
--- a/drivers/hid/hid-nintendo.c
+++ b/drivers/hid/hid-nintendo.c
@@ -2128,6 +2128,10 @@ static int nintendo_hid_probe(struct hid_device *hdev,
 	spin_lock_init(&ctlr->lock);
 	ctlr->rumble_queue = alloc_workqueue("hid-nintendo-rumble_wq",
 					     WQ_FREEZABLE | WQ_MEM_RECLAIM, 0);
+	if (!ctlr->rumble_queue) {
+		ret = -ENOMEM;
+		goto err;
+	}
 	INIT_WORK(&ctlr->rumble_worker, joycon_rumble_worker);
 
 	ret = hid_parse(hdev);
diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c
index 03b935ff02d5..c3e6d69fdfbd 100644
--- a/drivers/hid/hid-thrustmaster.c
+++ b/drivers/hid/hid-thrustmaster.c
@@ -64,7 +64,9 @@ struct tm_wheel_info {
  */
 static const struct tm_wheel_info tm_wheels_infos[] = {
 	{0x0306, 0x0006, "Thrustmaster T150RS"},
+	{0x0200, 0x0005, "Thrustmaster T300RS (Missing Attachment)"},
 	{0x0206, 0x0005, "Thrustmaster T300RS"},
+	{0x0209, 0x0005, "Thrustmaster T300RS (Open Wheel Attachment)"},
 	{0x0204, 0x0005, "Thrustmaster T300 Ferrari Alcantara Edition"},
 	{0x0002, 0x0002, "Thrustmaster T500RS"}
 	//{0x0407, 0x0001, "Thrustmaster TMX"}
@@ -158,6 +160,12 @@ static void thrustmaster_interrupts(struct hid_device *hdev)
 		return;
 	}
 
+	if (usbif->cur_altsetting->desc.bNumEndpoints < 2) {
+		kfree(send_buf);
+		hid_err(hdev, "Wrong number of endpoints?\n");
+		return;
+	}
+
 	ep = &usbif->cur_altsetting->endpoint[1];
 	b_ep = ep->desc.bEndpointAddress;
 
diff --git a/drivers/hid/hid-vivaldi.c b/drivers/hid/hid-vivaldi.c
index efa6140915f4..42ceb2058a09 100644
--- a/drivers/hid/hid-vivaldi.c
+++ b/drivers/hid/hid-vivaldi.c
@@ -144,7 +144,7 @@ out:
 static int vivaldi_input_configured(struct hid_device *hdev,
 				    struct hid_input *hidinput)
 {
-	return sysfs_create_group(&hdev->dev.kobj, &input_attribute_group);
+	return devm_device_add_group(&hdev->dev, &input_attribute_group);
 }
 
 static const struct hid_device_id vivaldi_table[] = {
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 0b66e25c0e2d..b7640cfe0020 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -64,6 +64,7 @@ static struct cpuidle_driver intel_idle_driver = {
 /* intel_idle.max_cstate=0 disables driver */
 static int max_cstate = CPUIDLE_STATE_MAX - 1;
 static unsigned int disabled_states_mask;
+static unsigned int preferred_states_mask;
 
 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
 
@@ -121,9 +122,6 @@ static unsigned int mwait_substates __initdata;
  * If the local APIC timer is not known to be reliable in the target idle state,
  * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
  *
- * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to
- * flushing user TLBs.
- *
  * Must be called under local_irq_disable().
  */
 static __cpuidle int intel_idle(struct cpuidle_device *dev,
@@ -761,6 +759,46 @@ static struct cpuidle_state icx_cstates[] __initdata = {
 		.enter = NULL }
 };
 
+/*
+ * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
+ * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
+ * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
+ * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then
+ * both C1 and C1E requests end up with C1, so there is effectively no C1E.
+ *
+ * By default we enable C1 and disable C1E by marking it with
+ * 'CPUIDLE_FLAG_UNUSABLE'.
+ */
+static struct cpuidle_state spr_cstates[] __initdata = {
+	{
+		.name = "C1",
+		.desc = "MWAIT 0x00",
+		.flags = MWAIT2flg(0x00),
+		.exit_latency = 1,
+		.target_residency = 1,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.name = "C1E",
+		.desc = "MWAIT 0x01",
+		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE |
+					   CPUIDLE_FLAG_UNUSABLE,
+		.exit_latency = 2,
+		.target_residency = 4,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.name = "C6",
+		.desc = "MWAIT 0x20",
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 290,
+		.target_residency = 800,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.enter = NULL }
+};
+
 static struct cpuidle_state atom_cstates[] __initdata = {
 	{
 		.name = "C1E",
@@ -1104,6 +1142,12 @@ static const struct idle_cpu idle_cpu_icx __initconst = {
 	.use_acpi = true,
 };
 
+static const struct idle_cpu idle_cpu_spr __initconst = {
+	.state_table = spr_cstates,
+	.disable_promotion_to_c1e = true,
+	.use_acpi = true,
+};
+
 static const struct idle_cpu idle_cpu_avn __initconst = {
 	.state_table = avn_cstates,
 	.disable_promotion_to_c1e = true,
@@ -1166,6 +1210,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,		&idle_cpu_skx),
 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&idle_cpu_icx),
 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&idle_cpu_icx),
+	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,	&idle_cpu_spr),
 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	&idle_cpu_knl),
 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,	&idle_cpu_knl),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,	&idle_cpu_bxt),
@@ -1353,6 +1398,8 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
 
+static void c1e_promotion_enable(void);
+
 /**
  * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
  *
@@ -1523,6 +1570,41 @@ static void __init skx_idle_state_table_update(void)
 	}
 }
 
+/**
+ * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
+ */
+static void __init spr_idle_state_table_update(void)
+{
+	unsigned long long msr;
+
+	/* Check if user prefers C1E over C1. */
+	if (preferred_states_mask & BIT(2)) {
+		if (preferred_states_mask & BIT(1))
+			/* Both can't be enabled, stick to the defaults. */
+			return;
+
+		spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
+		spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
+
+		/* Enable C1E using the "C1E promotion" bit. */
+		c1e_promotion_enable();
+		disable_promotion_to_c1e = false;
+	}
+
+	/*
+	 * By default, the C6 state assumes the worst-case scenario of package
+	 * C6. However, if PC6 is disabled, we update the numbers to match
+	 * core C6.
+	 */
+	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+
+	/* Limit value 2 and above allow for PC6. */
+	if ((msr & 0x7) < 2) {
+		spr_cstates[2].exit_latency = 190;
+		spr_cstates[2].target_residency = 600;
+	}
+}
+
 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
 {
 	unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
@@ -1557,6 +1639,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
 	case INTEL_FAM6_SKYLAKE_X:
 		skx_idle_state_table_update();
 		break;
+	case INTEL_FAM6_SAPPHIRERAPIDS_X:
+		spr_idle_state_table_update();
+		break;
 	}
 
 	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
@@ -1629,6 +1714,15 @@ static void auto_demotion_disable(void)
 	wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
 }
 
+static void c1e_promotion_enable(void)
+{
+	unsigned long long msr_bits;
+
+	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
+	msr_bits |= 0x2;
+	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+}
+
 static void c1e_promotion_disable(void)
 {
 	unsigned long long msr_bits;
@@ -1798,3 +1892,14 @@ module_param(max_cstate, int, 0444);
  */
 module_param_named(states_off, disabled_states_mask, uint, 0444);
 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
+/*
+ * Some platforms come with mutually exclusive C-states, so that if one is
+ * enabled, the other C-states must not be used. Example: C1 and C1E on
+ * Sapphire Rapids platform. This parameter allows for selecting the
+ * preferred C-states among the groups of mutually exclusive C-states - the
+ * selected C-states will be registered, the other C-states from the mutually
+ * exclusive group won't be registered. If the platform has no mutually
+ * exclusive C-states, this parameter has no effect.
+ */
+module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
+MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");
diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c
index fcb1b646436a..1581f6ef0927 100644
--- a/drivers/input/tablet/aiptek.c
+++ b/drivers/input/tablet/aiptek.c
@@ -1787,15 +1787,13 @@ aiptek_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	input_set_abs_params(inputdev, ABS_TILT_Y, AIPTEK_TILT_MIN, AIPTEK_TILT_MAX, 0, 0);
 	input_set_abs_params(inputdev, ABS_WHEEL, AIPTEK_WHEEL_MIN, AIPTEK_WHEEL_MAX - 1, 0, 0);
 
-	/* Verify that a device really has an endpoint */
-	if (intf->cur_altsetting->desc.bNumEndpoints < 1) {
+	err = usb_find_common_endpoints(intf->cur_altsetting,
+					NULL, NULL, &endpoint, NULL);
+	if (err) {
 		dev_err(&intf->dev,
-			"interface has %d endpoints, but must have minimum 1\n",
-			intf->cur_altsetting->desc.bNumEndpoints);
-		err = -EINVAL;
+			"interface has no int in endpoints, but must have minimum 1\n");
 		goto fail3;
 	}
-	endpoint = &intf->cur_altsetting->endpoint[0].desc;
 
 	/* Go set up our URB, which is called when the tablet receives
 	 * input.
diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c
index 129ebc810de8..8bd03278ad9a 100644
--- a/drivers/input/touchscreen/zinitix.c
+++ b/drivers/input/touchscreen/zinitix.c
@@ -135,7 +135,7 @@ struct point_coord {
 
 struct touch_event {
 	__le16	status;
-	u8	finger_cnt;
+	u8	finger_mask;
 	u8	time_stamp;
 	struct point_coord point_coord[MAX_SUPPORTED_FINGER_NUM];
 };
@@ -322,11 +322,32 @@ static int zinitix_send_power_on_sequence(struct bt541_ts_data *bt541)
 static void zinitix_report_finger(struct bt541_ts_data *bt541, int slot,
 				  const struct point_coord *p)
 {
+	u16 x, y;
+
+	if (unlikely(!(p->sub_status &
+		       (SUB_BIT_UP | SUB_BIT_DOWN | SUB_BIT_MOVE)))) {
+		dev_dbg(&bt541->client->dev, "unknown finger event %#02x\n",
+			p->sub_status);
+		return;
+	}
+
+	x = le16_to_cpu(p->x);
+	y = le16_to_cpu(p->y);
+
 	input_mt_slot(bt541->input_dev, slot);
-	input_mt_report_slot_state(bt541->input_dev, MT_TOOL_FINGER, true);
-	touchscreen_report_pos(bt541->input_dev, &bt541->prop,
-			       le16_to_cpu(p->x), le16_to_cpu(p->y), true);
-	input_report_abs(bt541->input_dev, ABS_MT_TOUCH_MAJOR, p->width);
+	if (input_mt_report_slot_state(bt541->input_dev, MT_TOOL_FINGER,
+				       !(p->sub_status & SUB_BIT_UP))) {
+		touchscreen_report_pos(bt541->input_dev,
+				       &bt541->prop, x, y, true);
+		input_report_abs(bt541->input_dev,
+				 ABS_MT_TOUCH_MAJOR, p->width);
+		dev_dbg(&bt541->client->dev, "finger %d %s (%u, %u)\n",
+			slot, p->sub_status & SUB_BIT_DOWN ? "down" : "move",
+			x, y);
+	} else {
+		dev_dbg(&bt541->client->dev, "finger %d up (%u, %u)\n",
+			slot, x, y);
+	}
 }
 
 static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler)
@@ -334,6 +355,7 @@ static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler)
 	struct bt541_ts_data *bt541 = bt541_handler;
 	struct i2c_client *client = bt541->client;
 	struct touch_event touch_event;
+	unsigned long finger_mask;
 	int error;
 	int i;
 
@@ -346,10 +368,14 @@ static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler)
 		goto out;
 	}
 
-	for (i = 0; i < MAX_SUPPORTED_FINGER_NUM; i++)
-		if (touch_event.point_coord[i].sub_status & SUB_BIT_EXIST)
-			zinitix_report_finger(bt541, i,
-					      &touch_event.point_coord[i]);
+	finger_mask = touch_event.finger_mask;
+	for_each_set_bit(i, &finger_mask, MAX_SUPPORTED_FINGER_NUM) {
+		const struct point_coord *p = &touch_event.point_coord[i];
+
+		/* Only process contacts that are actually reported */
+		if (p->sub_status & SUB_BIT_EXIST)
+			zinitix_report_finger(bt541, i, p);
+	}
 
 	input_mt_sync_frame(bt541->input_dev);
 	input_sync(bt541->input_dev);
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 3eb68fa1b8cc..c79a0df090c0 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -144,8 +144,8 @@ config IOMMU_DMA
 	select IRQ_MSI_IOMMU
 	select NEED_SG_DMA_LENGTH
 
-# Shared Virtual Addressing library
-config IOMMU_SVA_LIB
+# Shared Virtual Addressing
+config IOMMU_SVA
 	bool
 	select IOASID
 
@@ -379,7 +379,7 @@ config ARM_SMMU_V3
 config ARM_SMMU_V3_SVA
 	bool "Shared Virtual Addressing support for the ARM SMMUv3"
 	depends on ARM_SMMU_V3
-	select IOMMU_SVA_LIB
+	select IOMMU_SVA
 	select MMU_NOTIFIER
 	help
 	  Support for sharing process address spaces with devices using the
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index bc7f730edbb0..44475a9b3eea 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -27,6 +27,6 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
 obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
 obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
 obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
-obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o
+obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o
 obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
 obj-$(CONFIG_APPLE_DART) += apple-dart.o
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index a737ba5f727e..22ddd05bbdcd 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -340,14 +340,12 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
 	bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm);
 	if (IS_ERR(bond->smmu_mn)) {
 		ret = PTR_ERR(bond->smmu_mn);
-		goto err_free_pasid;
+		goto err_free_bond;
 	}
 
 	list_add(&bond->list, &master->bonds);
 	return &bond->sva;
 
-err_free_pasid:
-	iommu_sva_free_pasid(mm);
 err_free_bond:
 	kfree(bond);
 	return ERR_PTR(ret);
@@ -377,7 +375,6 @@ void arm_smmu_sva_unbind(struct iommu_sva *handle)
 	if (refcount_dec_and_test(&bond->refs)) {
 		list_del(&bond->list);
 		arm_smmu_mmu_notifier_put(bond->smmu_mn);
-		iommu_sva_free_pasid(bond->mm);
 		kfree(bond);
 	}
 	mutex_unlock(&sva_lock);
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 247d0f2d5fdf..39a06d245f12 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -52,7 +52,7 @@ config INTEL_IOMMU_SVM
 	select PCI_PRI
 	select MMU_NOTIFIER
 	select IOASID
-	select IOMMU_SVA_LIB
+	select IOMMU_SVA
 	help
 	  Shared Virtual Memory (SVM) provides a facility for devices
 	  to access DMA resources through process address space by
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 5b196cfe9ed2..1ce1741a7fa4 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4781,7 +4781,7 @@ attach_failed:
 link_failed:
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 	if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
-		ioasid_put(domain->default_pasid);
+		ioasid_free(domain->default_pasid);
 
 	return ret;
 }
@@ -4811,7 +4811,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain,
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 
 	if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
-		ioasid_put(domain->default_pasid);
+		ioasid_free(domain->default_pasid);
 }
 
 static int prepare_domain_attach_device(struct iommu_domain *domain,
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 5b5d69b04fcc..51ac2096b3da 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -514,11 +514,6 @@ static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
 	return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1);
 }
 
-static void intel_svm_free_pasid(struct mm_struct *mm)
-{
-	iommu_sva_free_pasid(mm);
-}
-
 static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
 					   struct device *dev,
 					   struct mm_struct *mm,
@@ -662,8 +657,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
 				kfree(svm);
 			}
 		}
-		/* Drop a PASID reference and free it if no reference. */
-		intel_svm_free_pasid(mm);
 	}
 out:
 	return ret;
@@ -1047,8 +1040,6 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void
 	}
 
 	sva = intel_svm_bind_mm(iommu, dev, mm, flags);
-	if (IS_ERR_OR_NULL(sva))
-		intel_svm_free_pasid(mm);
 	mutex_unlock(&pasid_mutex);
 
 	return sva;
diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c
index 06fee7416816..a786c034907c 100644
--- a/drivers/iommu/ioasid.c
+++ b/drivers/iommu/ioasid.c
@@ -2,7 +2,7 @@
 /*
  * I/O Address Space ID allocator. There is one global IOASID space, split into
  * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and
- * free IOASIDs with ioasid_alloc and ioasid_put.
+ * free IOASIDs with ioasid_alloc() and ioasid_free().
  */
 #include <linux/ioasid.h>
 #include <linux/module.h>
@@ -15,7 +15,6 @@ struct ioasid_data {
 	struct ioasid_set *set;
 	void *private;
 	struct rcu_head rcu;
-	refcount_t refs;
 };
 
 /*
@@ -315,7 +314,6 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
 
 	data->set = set;
 	data->private = private;
-	refcount_set(&data->refs, 1);
 
 	/*
 	 * Custom allocator needs allocator data to perform platform specific
@@ -348,35 +346,11 @@ exit_free:
 EXPORT_SYMBOL_GPL(ioasid_alloc);
 
 /**
- * ioasid_get - obtain a reference to the IOASID
- * @ioasid: the ID to get
- */
-void ioasid_get(ioasid_t ioasid)
-{
-	struct ioasid_data *ioasid_data;
-
-	spin_lock(&ioasid_allocator_lock);
-	ioasid_data = xa_load(&active_allocator->xa, ioasid);
-	if (ioasid_data)
-		refcount_inc(&ioasid_data->refs);
-	else
-		WARN_ON(1);
-	spin_unlock(&ioasid_allocator_lock);
-}
-EXPORT_SYMBOL_GPL(ioasid_get);
-
-/**
- * ioasid_put - Release a reference to an ioasid
+ * ioasid_free - Free an ioasid
  * @ioasid: the ID to remove
- *
- * Put a reference to the IOASID, free it when the number of references drops to
- * zero.
- *
- * Return: %true if the IOASID was freed, %false otherwise.
  */
-bool ioasid_put(ioasid_t ioasid)
+void ioasid_free(ioasid_t ioasid)
 {
-	bool free = false;
 	struct ioasid_data *ioasid_data;
 
 	spin_lock(&ioasid_allocator_lock);
@@ -386,10 +360,6 @@ bool ioasid_put(ioasid_t ioasid)
 		goto exit_unlock;
 	}
 
-	free = refcount_dec_and_test(&ioasid_data->refs);
-	if (!free)
-		goto exit_unlock;
-
 	active_allocator->ops->free(ioasid, active_allocator->ops->pdata);
 	/* Custom allocator needs additional steps to free the xa element */
 	if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) {
@@ -399,9 +369,8 @@ bool ioasid_put(ioasid_t ioasid)
 
 exit_unlock:
 	spin_unlock(&ioasid_allocator_lock);
-	return free;
 }
-EXPORT_SYMBOL_GPL(ioasid_put);
+EXPORT_SYMBOL_GPL(ioasid_free);
 
 /**
  * ioasid_find - Find IOASID data
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index bd41405d34e9..106506143896 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -18,8 +18,7 @@ static DECLARE_IOASID_SET(iommu_sva_pasid);
  *
  * Try to allocate a PASID for this mm, or take a reference to the existing one
  * provided it fits within the [@min, @max] range. On success the PASID is
- * available in mm->pasid, and must be released with iommu_sva_free_pasid().
- * @min must be greater than 0, because 0 indicates an unused mm->pasid.
+ * available in mm->pasid and will be available for the lifetime of the mm.
  *
  * Returns 0 on success and < 0 on error.
  */
@@ -33,38 +32,24 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
 		return -EINVAL;
 
 	mutex_lock(&iommu_sva_lock);
-	if (mm->pasid) {
-		if (mm->pasid >= min && mm->pasid <= max)
-			ioasid_get(mm->pasid);
-		else
+	/* Is a PASID already associated with this mm? */
+	if (pasid_valid(mm->pasid)) {
+		if (mm->pasid < min || mm->pasid >= max)
 			ret = -EOVERFLOW;
-	} else {
-		pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
-		if (pasid == INVALID_IOASID)
-			ret = -ENOMEM;
-		else
-			mm->pasid = pasid;
+		goto out;
 	}
+
+	pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
+	if (!pasid_valid(pasid))
+		ret = -ENOMEM;
+	else
+		mm_pasid_set(mm, pasid);
+out:
 	mutex_unlock(&iommu_sva_lock);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid);
 
-/**
- * iommu_sva_free_pasid - Release the mm's PASID
- * @mm: the mm
- *
- * Drop one reference to a PASID allocated with iommu_sva_alloc_pasid()
- */
-void iommu_sva_free_pasid(struct mm_struct *mm)
-{
-	mutex_lock(&iommu_sva_lock);
-	if (ioasid_put(mm->pasid))
-		mm->pasid = 0;
-	mutex_unlock(&iommu_sva_lock);
-}
-EXPORT_SYMBOL_GPL(iommu_sva_free_pasid);
-
 /* ioasid_find getter() requires a void * argument */
 static bool __mmget_not_zero(void *mm)
 {
diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h
index 031155010ca8..8909ea1094e3 100644
--- a/drivers/iommu/iommu-sva-lib.h
+++ b/drivers/iommu/iommu-sva-lib.h
@@ -9,7 +9,6 @@
 #include <linux/mm_types.h>
 
 int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max);
-void iommu_sva_free_pasid(struct mm_struct *mm);
 struct mm_struct *iommu_sva_find(ioasid_t pasid);
 
 /* I/O Page fault */
@@ -17,7 +16,7 @@ struct device;
 struct iommu_fault;
 struct iopf_queue;
 
-#ifdef CONFIG_IOMMU_SVA_LIB
+#ifdef CONFIG_IOMMU_SVA
 int iommu_queue_iopf(struct iommu_fault *fault, void *cookie);
 
 int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
@@ -28,7 +27,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name);
 void iopf_queue_free(struct iopf_queue *queue);
 int iopf_queue_discard_partial(struct iopf_queue *queue);
 
-#else /* CONFIG_IOMMU_SVA_LIB */
+#else /* CONFIG_IOMMU_SVA */
 static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
 {
 	return -ENODEV;
@@ -64,5 +63,5 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
 {
 	return -ENODEV;
 }
-#endif /* CONFIG_IOMMU_SVA_LIB */
+#endif /* CONFIG_IOMMU_SVA */
 #endif /* _IOMMU_SVA_LIB_H */
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 7038957f4a77..680d2fcf2686 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -430,6 +430,14 @@ config QCOM_PDC
 	  Power Domain Controller driver to manage and configure wakeup
 	  IRQs for Qualcomm Technologies Inc (QTI) mobile chips.
 
+config QCOM_MPM
+	tristate "QCOM MPM"
+	depends on ARCH_QCOM
+	select IRQ_DOMAIN_HIERARCHY
+	help
+	  MSM Power Manager driver to manage and configure wakeup
+	  IRQs for Qualcomm Technologies Inc (QTI) mobile chips.
+
 config CSKY_MPINTC
 	bool
 	depends on CSKY
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index c1f611cbfbf8..1f8990f812f1 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -94,6 +94,7 @@ obj-$(CONFIG_MESON_IRQ_GPIO)		+= irq-meson-gpio.o
 obj-$(CONFIG_GOLDFISH_PIC) 		+= irq-goldfish-pic.o
 obj-$(CONFIG_NDS32)			+= irq-ativic32.o
 obj-$(CONFIG_QCOM_PDC)			+= qcom-pdc.o
+obj-$(CONFIG_QCOM_MPM)			+= irq-qcom-mpm.o
 obj-$(CONFIG_CSKY_MPINTC)		+= irq-csky-mpintc.o
 obj-$(CONFIG_CSKY_APB_INTC)		+= irq-csky-apb-intc.o
 obj-$(CONFIG_RISCV_INTC)		+= irq-riscv-intc.o
diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index 38091ebb9403..12dd48727a15 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -24,7 +24,7 @@
  * - Default "this CPU" register view and explicit per-CPU views
  *
  * In addition, this driver also handles FIQs, as these are routed to the same
- * IRQ vector. These are used for Fast IPIs (TODO), the ARMv8 timer IRQs, and
+ * IRQ vector. These are used for Fast IPIs, the ARMv8 timer IRQs, and
  * performance counters (TODO).
  *
  * Implementation notes:
@@ -52,9 +52,12 @@
 #include <linux/irqchip.h>
 #include <linux/irqchip/arm-vgic-info.h>
 #include <linux/irqdomain.h>
+#include <linux/jump_label.h>
 #include <linux/limits.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
+#include <asm/apple_m1_pmu.h>
+#include <asm/cputype.h>
 #include <asm/exception.h>
 #include <asm/sysreg.h>
 #include <asm/virt.h>
@@ -62,20 +65,22 @@
 #include <dt-bindings/interrupt-controller/apple-aic.h>
 
 /*
- * AIC registers (MMIO)
+ * AIC v1 registers (MMIO)
  */
 
 #define AIC_INFO		0x0004
-#define AIC_INFO_NR_HW		GENMASK(15, 0)
+#define AIC_INFO_NR_IRQ		GENMASK(15, 0)
 
 #define AIC_CONFIG		0x0010
 
 #define AIC_WHOAMI		0x2000
 #define AIC_EVENT		0x2004
-#define AIC_EVENT_TYPE		GENMASK(31, 16)
+#define AIC_EVENT_DIE		GENMASK(31, 24)
+#define AIC_EVENT_TYPE		GENMASK(23, 16)
 #define AIC_EVENT_NUM		GENMASK(15, 0)
 
-#define AIC_EVENT_TYPE_HW	1
+#define AIC_EVENT_TYPE_FIQ	0 /* Software use */
+#define AIC_EVENT_TYPE_IRQ	1
 #define AIC_EVENT_TYPE_IPI	4
 #define AIC_EVENT_IPI_OTHER	1
 #define AIC_EVENT_IPI_SELF	2
@@ -91,34 +96,73 @@
 #define AIC_IPI_SELF		BIT(31)
 
 #define AIC_TARGET_CPU		0x3000
-#define AIC_SW_SET		0x4000
-#define AIC_SW_CLR		0x4080
-#define AIC_MASK_SET		0x4100
-#define AIC_MASK_CLR		0x4180
 
 #define AIC_CPU_IPI_SET(cpu)	(0x5008 + ((cpu) << 7))
 #define AIC_CPU_IPI_CLR(cpu)	(0x500c + ((cpu) << 7))
 #define AIC_CPU_IPI_MASK_SET(cpu) (0x5024 + ((cpu) << 7))
 #define AIC_CPU_IPI_MASK_CLR(cpu) (0x5028 + ((cpu) << 7))
 
+#define AIC_MAX_IRQ		0x400
+
+/*
+ * AIC v2 registers (MMIO)
+ */
+
+#define AIC2_VERSION		0x0000
+#define AIC2_VERSION_VER	GENMASK(7, 0)
+
+#define AIC2_INFO1		0x0004
+#define AIC2_INFO1_NR_IRQ	GENMASK(15, 0)
+#define AIC2_INFO1_LAST_DIE	GENMASK(27, 24)
+
+#define AIC2_INFO2		0x0008
+
+#define AIC2_INFO3		0x000c
+#define AIC2_INFO3_MAX_IRQ	GENMASK(15, 0)
+#define AIC2_INFO3_MAX_DIE	GENMASK(27, 24)
+
+#define AIC2_RESET		0x0010
+#define AIC2_RESET_RESET	BIT(0)
+
+#define AIC2_CONFIG		0x0014
+#define AIC2_CONFIG_ENABLE	BIT(0)
+#define AIC2_CONFIG_PREFER_PCPU	BIT(28)
+
+#define AIC2_TIMEOUT		0x0028
+#define AIC2_CLUSTER_PRIO	0x0030
+#define AIC2_DELAY_GROUPS	0x0100
+
+#define AIC2_IRQ_CFG		0x2000
+
+/*
+ * AIC2 registers are laid out like this, starting at AIC2_IRQ_CFG:
+ *
+ * Repeat for each die:
+ *   IRQ_CFG: u32 * MAX_IRQS
+ *   SW_SET: u32 * (MAX_IRQS / 32)
+ *   SW_CLR: u32 * (MAX_IRQS / 32)
+ *   MASK_SET: u32 * (MAX_IRQS / 32)
+ *   MASK_CLR: u32 * (MAX_IRQS / 32)
+ *   HW_STATE: u32 * (MAX_IRQS / 32)
+ *
+ * This is followed by a set of event registers, each 16K page aligned.
+ * The first one is the AP event register we will use. Unfortunately,
+ * the actual implemented die count is not specified anywhere in the
+ * capability registers, so we have to explicitly specify the event
+ * register as a second reg entry in the device tree to remain
+ * forward-compatible.
+ */
+
+#define AIC2_IRQ_CFG_TARGET	GENMASK(3, 0)
+#define AIC2_IRQ_CFG_DELAY_IDX	GENMASK(7, 5)
+
 #define MASK_REG(x)		(4 * ((x) >> 5))
 #define MASK_BIT(x)		BIT((x) & GENMASK(4, 0))
 
 /*
  * IMP-DEF sysregs that control FIQ sources
- * Note: sysreg-based IPIs are not supported yet.
  */
 
-/* Core PMC control register */
-#define SYS_IMP_APL_PMCR0_EL1		sys_reg(3, 1, 15, 0, 0)
-#define PMCR0_IMODE			GENMASK(10, 8)
-#define PMCR0_IMODE_OFF			0
-#define PMCR0_IMODE_PMI			1
-#define PMCR0_IMODE_AIC			2
-#define PMCR0_IMODE_HALT		3
-#define PMCR0_IMODE_FIQ			4
-#define PMCR0_IACT			BIT(11)
-
 /* IPI request registers */
 #define SYS_IMP_APL_IPI_RR_LOCAL_EL1	sys_reg(3, 5, 15, 0, 0)
 #define SYS_IMP_APL_IPI_RR_GLOBAL_EL1	sys_reg(3, 5, 15, 0, 1)
@@ -155,7 +199,18 @@
 #define SYS_IMP_APL_UPMSR_EL1		sys_reg(3, 7, 15, 6, 4)
 #define UPMSR_IACT			BIT(0)
 
-#define AIC_NR_FIQ		4
+/* MPIDR fields */
+#define MPIDR_CPU(x)			MPIDR_AFFINITY_LEVEL(x, 0)
+#define MPIDR_CLUSTER(x)		MPIDR_AFFINITY_LEVEL(x, 1)
+
+#define AIC_IRQ_HWIRQ(die, irq)	(FIELD_PREP(AIC_EVENT_DIE, die) | \
+				 FIELD_PREP(AIC_EVENT_TYPE, AIC_EVENT_TYPE_IRQ) | \
+				 FIELD_PREP(AIC_EVENT_NUM, irq))
+#define AIC_FIQ_HWIRQ(x)	(FIELD_PREP(AIC_EVENT_TYPE, AIC_EVENT_TYPE_FIQ) | \
+				 FIELD_PREP(AIC_EVENT_NUM, x))
+#define AIC_HWIRQ_IRQ(x)	FIELD_GET(AIC_EVENT_NUM, x)
+#define AIC_HWIRQ_DIE(x)	FIELD_GET(AIC_EVENT_DIE, x)
+#define AIC_NR_FIQ		6
 #define AIC_NR_SWIPI		32
 
 /*
@@ -173,11 +228,81 @@
 #define AIC_TMR_EL02_PHYS	AIC_TMR_GUEST_PHYS
 #define AIC_TMR_EL02_VIRT	AIC_TMR_GUEST_VIRT
 
+DEFINE_STATIC_KEY_TRUE(use_fast_ipi);
+
+struct aic_info {
+	int version;
+
+	/* Register offsets */
+	u32 event;
+	u32 target_cpu;
+	u32 irq_cfg;
+	u32 sw_set;
+	u32 sw_clr;
+	u32 mask_set;
+	u32 mask_clr;
+
+	u32 die_stride;
+
+	/* Features */
+	bool fast_ipi;
+};
+
+static const struct aic_info aic1_info = {
+	.version	= 1,
+
+	.event		= AIC_EVENT,
+	.target_cpu	= AIC_TARGET_CPU,
+};
+
+static const struct aic_info aic1_fipi_info = {
+	.version	= 1,
+
+	.event		= AIC_EVENT,
+	.target_cpu	= AIC_TARGET_CPU,
+
+	.fast_ipi	= true,
+};
+
+static const struct aic_info aic2_info = {
+	.version	= 2,
+
+	.irq_cfg	= AIC2_IRQ_CFG,
+
+	.fast_ipi	= true,
+};
+
+static const struct of_device_id aic_info_match[] = {
+	{
+		.compatible = "apple,t8103-aic",
+		.data = &aic1_fipi_info,
+	},
+	{
+		.compatible = "apple,aic",
+		.data = &aic1_info,
+	},
+	{
+		.compatible = "apple,aic2",
+		.data = &aic2_info,
+	},
+	{}
+};
+
 struct aic_irq_chip {
 	void __iomem *base;
+	void __iomem *event;
 	struct irq_domain *hw_domain;
 	struct irq_domain *ipi_domain;
-	int nr_hw;
+	struct {
+		cpumask_t aff;
+	} *fiq_aff[AIC_NR_FIQ];
+
+	int nr_irq;
+	int max_irq;
+	int nr_die;
+	int max_die;
+
+	struct aic_info info;
 };
 
 static DEFINE_PER_CPU(uint32_t, aic_fiq_unmasked);
@@ -205,18 +330,24 @@ static void aic_ic_write(struct aic_irq_chip *ic, u32 reg, u32 val)
 
 static void aic_irq_mask(struct irq_data *d)
 {
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
 	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
 
-	aic_ic_write(ic, AIC_MASK_SET + MASK_REG(irqd_to_hwirq(d)),
-		     MASK_BIT(irqd_to_hwirq(d)));
+	u32 off = AIC_HWIRQ_DIE(hwirq) * ic->info.die_stride;
+	u32 irq = AIC_HWIRQ_IRQ(hwirq);
+
+	aic_ic_write(ic, ic->info.mask_set + off + MASK_REG(irq), MASK_BIT(irq));
 }
 
 static void aic_irq_unmask(struct irq_data *d)
 {
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
 	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
 
-	aic_ic_write(ic, AIC_MASK_CLR + MASK_REG(d->hwirq),
-		     MASK_BIT(irqd_to_hwirq(d)));
+	u32 off = AIC_HWIRQ_DIE(hwirq) * ic->info.die_stride;
+	u32 irq = AIC_HWIRQ_IRQ(hwirq);
+
+	aic_ic_write(ic, ic->info.mask_clr + off + MASK_REG(irq), MASK_BIT(irq));
 }
 
 static void aic_irq_eoi(struct irq_data *d)
@@ -239,12 +370,12 @@ static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs)
 		 * We cannot use a relaxed read here, as reads from DMA buffers
 		 * need to be ordered after the IRQ fires.
 		 */
-		event = readl(ic->base + AIC_EVENT);
+		event = readl(ic->event + ic->info.event);
 		type = FIELD_GET(AIC_EVENT_TYPE, event);
 		irq = FIELD_GET(AIC_EVENT_NUM, event);
 
-		if (type == AIC_EVENT_TYPE_HW)
-			generic_handle_domain_irq(aic_irqc->hw_domain, irq);
+		if (type == AIC_EVENT_TYPE_IRQ)
+			generic_handle_domain_irq(aic_irqc->hw_domain, event);
 		else if (type == AIC_EVENT_TYPE_IPI && irq == 1)
 			aic_handle_ipi(regs);
 		else if (event != 0)
@@ -271,12 +402,14 @@ static int aic_irq_set_affinity(struct irq_data *d,
 	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
 	int cpu;
 
+	BUG_ON(!ic->info.target_cpu);
+
 	if (force)
 		cpu = cpumask_first(mask_val);
 	else
 		cpu = cpumask_any_and(mask_val, cpu_online_mask);
 
-	aic_ic_write(ic, AIC_TARGET_CPU + hwirq * 4, BIT(cpu));
+	aic_ic_write(ic, ic->info.target_cpu + AIC_HWIRQ_IRQ(hwirq) * 4, BIT(cpu));
 	irq_data_update_effective_affinity(d, cpumask_of(cpu));
 
 	return IRQ_SET_MASK_OK;
@@ -300,15 +433,21 @@ static struct irq_chip aic_chip = {
 	.irq_set_type = aic_irq_set_type,
 };
 
+static struct irq_chip aic2_chip = {
+	.name = "AIC2",
+	.irq_mask = aic_irq_mask,
+	.irq_unmask = aic_irq_unmask,
+	.irq_eoi = aic_irq_eoi,
+	.irq_set_type = aic_irq_set_type,
+};
+
 /*
  * FIQ irqchip
  */
 
 static unsigned long aic_fiq_get_idx(struct irq_data *d)
 {
-	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
-
-	return irqd_to_hwirq(d) - ic->nr_hw;
+	return AIC_HWIRQ_IRQ(irqd_to_hwirq(d));
 }
 
 static void aic_fiq_set_mask(struct irq_data *d)
@@ -386,17 +525,21 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)
 	 */
 
 	if (read_sysreg_s(SYS_IMP_APL_IPI_SR_EL1) & IPI_SR_PENDING) {
-		pr_err_ratelimited("Fast IPI fired. Acking.\n");
-		write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);
+		if (static_branch_likely(&use_fast_ipi)) {
+			aic_handle_ipi(regs);
+		} else {
+			pr_err_ratelimited("Fast IPI fired. Acking.\n");
+			write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);
+		}
 	}
 
 	if (TIMER_FIRING(read_sysreg(cntp_ctl_el0)))
 		generic_handle_domain_irq(aic_irqc->hw_domain,
-					  aic_irqc->nr_hw + AIC_TMR_EL0_PHYS);
+					  AIC_FIQ_HWIRQ(AIC_TMR_EL0_PHYS));
 
 	if (TIMER_FIRING(read_sysreg(cntv_ctl_el0)))
 		generic_handle_domain_irq(aic_irqc->hw_domain,
-					  aic_irqc->nr_hw + AIC_TMR_EL0_VIRT);
+					  AIC_FIQ_HWIRQ(AIC_TMR_EL0_VIRT));
 
 	if (is_kernel_in_hyp_mode()) {
 		uint64_t enabled = read_sysreg_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2);
@@ -404,24 +547,23 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)
 		if ((enabled & VM_TMR_FIQ_ENABLE_P) &&
 		    TIMER_FIRING(read_sysreg_s(SYS_CNTP_CTL_EL02)))
 			generic_handle_domain_irq(aic_irqc->hw_domain,
-						  aic_irqc->nr_hw + AIC_TMR_EL02_PHYS);
+						  AIC_FIQ_HWIRQ(AIC_TMR_EL02_PHYS));
 
 		if ((enabled & VM_TMR_FIQ_ENABLE_V) &&
 		    TIMER_FIRING(read_sysreg_s(SYS_CNTV_CTL_EL02)))
 			generic_handle_domain_irq(aic_irqc->hw_domain,
-						  aic_irqc->nr_hw + AIC_TMR_EL02_VIRT);
+						  AIC_FIQ_HWIRQ(AIC_TMR_EL02_VIRT));
 	}
 
-	if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) ==
-			(FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) {
-		/*
-		 * Not supported yet, let's figure out how to handle this when
-		 * we implement these proprietary performance counters. For now,
-		 * just mask it and move on.
-		 */
-		pr_err_ratelimited("PMC FIQ fired. Masking.\n");
-		sysreg_clear_set_s(SYS_IMP_APL_PMCR0_EL1, PMCR0_IMODE | PMCR0_IACT,
-				   FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF));
+	if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) {
+		int irq;
+		if (cpumask_test_cpu(smp_processor_id(),
+				     &aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff))
+			irq = AIC_CPU_PMU_P;
+		else
+			irq = AIC_CPU_PMU_E;
+		generic_handle_domain_irq(aic_irqc->hw_domain,
+					  AIC_FIQ_HWIRQ(irq));
 	}
 
 	if (FIELD_GET(UPMCR0_IMODE, read_sysreg_s(SYS_IMP_APL_UPMCR0_EL1)) == UPMCR0_IMODE_FIQ &&
@@ -455,13 +597,29 @@ static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq,
 			      irq_hw_number_t hw)
 {
 	struct aic_irq_chip *ic = id->host_data;
+	u32 type = FIELD_GET(AIC_EVENT_TYPE, hw);
+	struct irq_chip *chip = &aic_chip;
 
-	if (hw < ic->nr_hw) {
-		irq_domain_set_info(id, irq, hw, &aic_chip, id->host_data,
+	if (ic->info.version == 2)
+		chip = &aic2_chip;
+
+	if (type == AIC_EVENT_TYPE_IRQ) {
+		irq_domain_set_info(id, irq, hw, chip, id->host_data,
 				    handle_fasteoi_irq, NULL, NULL);
 		irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq)));
 	} else {
-		irq_set_percpu_devid(irq);
+		int fiq = FIELD_GET(AIC_EVENT_NUM, hw);
+
+		switch (fiq) {
+		case AIC_CPU_PMU_P:
+		case AIC_CPU_PMU_E:
+			irq_set_percpu_devid_partition(irq, &ic->fiq_aff[fiq]->aff);
+			break;
+		default:
+			irq_set_percpu_devid(irq);
+			break;
+		}
+
 		irq_domain_set_info(id, irq, hw, &fiq_chip, id->host_data,
 				    handle_percpu_devid_irq, NULL, NULL);
 	}
@@ -475,32 +633,46 @@ static int aic_irq_domain_translate(struct irq_domain *id,
 				    unsigned int *type)
 {
 	struct aic_irq_chip *ic = id->host_data;
+	u32 *args;
+	u32 die = 0;
 
-	if (fwspec->param_count != 3 || !is_of_node(fwspec->fwnode))
+	if (fwspec->param_count < 3 || fwspec->param_count > 4 ||
+	    !is_of_node(fwspec->fwnode))
 		return -EINVAL;
 
+	args = &fwspec->param[1];
+
+	if (fwspec->param_count == 4) {
+		die = args[0];
+		args++;
+	}
+
 	switch (fwspec->param[0]) {
 	case AIC_IRQ:
-		if (fwspec->param[1] >= ic->nr_hw)
+		if (die >= ic->nr_die)
 			return -EINVAL;
-		*hwirq = fwspec->param[1];
+		if (args[0] >= ic->nr_irq)
+			return -EINVAL;
+		*hwirq = AIC_IRQ_HWIRQ(die, args[0]);
 		break;
 	case AIC_FIQ:
-		if (fwspec->param[1] >= AIC_NR_FIQ)
+		if (die != 0)
+			return -EINVAL;
+		if (args[0] >= AIC_NR_FIQ)
 			return -EINVAL;
-		*hwirq = ic->nr_hw + fwspec->param[1];
+		*hwirq = AIC_FIQ_HWIRQ(args[0]);
 
 		/*
 		 * In EL1 the non-redirected registers are the guest's,
 		 * not EL2's, so remap the hwirqs to match.
 		 */
 		if (!is_kernel_in_hyp_mode()) {
-			switch (fwspec->param[1]) {
+			switch (args[0]) {
 			case AIC_TMR_GUEST_PHYS:
-				*hwirq = ic->nr_hw + AIC_TMR_EL0_PHYS;
+				*hwirq = AIC_FIQ_HWIRQ(AIC_TMR_EL0_PHYS);
 				break;
 			case AIC_TMR_GUEST_VIRT:
-				*hwirq = ic->nr_hw + AIC_TMR_EL0_VIRT;
+				*hwirq = AIC_FIQ_HWIRQ(AIC_TMR_EL0_VIRT);
 				break;
 			case AIC_TMR_HV_PHYS:
 			case AIC_TMR_HV_VIRT:
@@ -514,7 +686,7 @@ static int aic_irq_domain_translate(struct irq_domain *id,
 		return -EINVAL;
 	}
 
-	*type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
+	*type = args[1] & IRQ_TYPE_SENSE_MASK;
 
 	return 0;
 }
@@ -563,6 +735,22 @@ static const struct irq_domain_ops aic_irq_domain_ops = {
  * IPI irqchip
  */
 
+static void aic_ipi_send_fast(int cpu)
+{
+	u64 mpidr = cpu_logical_map(cpu);
+	u64 my_mpidr = read_cpuid_mpidr();
+	u64 cluster = MPIDR_CLUSTER(mpidr);
+	u64 idx = MPIDR_CPU(mpidr);
+
+	if (MPIDR_CLUSTER(my_mpidr) == cluster)
+		write_sysreg_s(FIELD_PREP(IPI_RR_CPU, idx),
+			       SYS_IMP_APL_IPI_RR_LOCAL_EL1);
+	else
+		write_sysreg_s(FIELD_PREP(IPI_RR_CPU, idx) | FIELD_PREP(IPI_RR_CLUSTER, cluster),
+			       SYS_IMP_APL_IPI_RR_GLOBAL_EL1);
+	isb();
+}
+
 static void aic_ipi_mask(struct irq_data *d)
 {
 	u32 irq_bit = BIT(irqd_to_hwirq(d));
@@ -588,8 +776,12 @@ static void aic_ipi_unmask(struct irq_data *d)
 	 * If a pending vIPI was unmasked, raise a HW IPI to ourselves.
 	 * No barriers needed here since this is a self-IPI.
 	 */
-	if (atomic_read(this_cpu_ptr(&aic_vipi_flag)) & irq_bit)
-		aic_ic_write(ic, AIC_IPI_SEND, AIC_IPI_SEND_CPU(smp_processor_id()));
+	if (atomic_read(this_cpu_ptr(&aic_vipi_flag)) & irq_bit) {
+		if (static_branch_likely(&use_fast_ipi))
+			aic_ipi_send_fast(smp_processor_id());
+		else
+			aic_ic_write(ic, AIC_IPI_SEND, AIC_IPI_SEND_CPU(smp_processor_id()));
+	}
 }
 
 static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
@@ -617,8 +809,12 @@ static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
 		smp_mb__after_atomic();
 
 		if (!(pending & irq_bit) &&
-		    (atomic_read(per_cpu_ptr(&aic_vipi_enable, cpu)) & irq_bit))
-			send |= AIC_IPI_SEND_CPU(cpu);
+		    (atomic_read(per_cpu_ptr(&aic_vipi_enable, cpu)) & irq_bit)) {
+			if (static_branch_likely(&use_fast_ipi))
+				aic_ipi_send_fast(cpu);
+			else
+				send |= AIC_IPI_SEND_CPU(cpu);
+		}
 	}
 
 	/*
@@ -650,8 +846,16 @@ static void aic_handle_ipi(struct pt_regs *regs)
 	/*
 	 * Ack the IPI. We need to order this after the AIC event read, but
 	 * that is enforced by normal MMIO ordering guarantees.
+	 *
+	 * For the Fast IPI case, this needs to be ordered before the vIPI
+	 * handling below, so we need to isb();
 	 */
-	aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER);
+	if (static_branch_likely(&use_fast_ipi)) {
+		write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);
+		isb();
+	} else {
+		aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER);
+	}
 
 	/*
 	 * The mask read does not need to be ordered. Only we can change
@@ -679,7 +883,8 @@ static void aic_handle_ipi(struct pt_regs *regs)
 	 * No ordering needed here; at worst this just changes the timing of
 	 * when the next IPI will be delivered.
 	 */
-	aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+	if (!static_branch_likely(&use_fast_ipi))
+		aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
 }
 
 static int aic_ipi_alloc(struct irq_domain *d, unsigned int virq,
@@ -766,20 +971,27 @@ static int aic_init_cpu(unsigned int cpu)
 	/* Commit all of the above */
 	isb();
 
-	/*
-	 * Make sure the kernel's idea of logical CPU order is the same as AIC's
-	 * If we ever end up with a mismatch here, we will have to introduce
-	 * a mapping table similar to what other irqchip drivers do.
-	 */
-	WARN_ON(aic_ic_read(aic_irqc, AIC_WHOAMI) != smp_processor_id());
+	if (aic_irqc->info.version == 1) {
+		/*
+		 * Make sure the kernel's idea of logical CPU order is the same as AIC's
+		 * If we ever end up with a mismatch here, we will have to introduce
+		 * a mapping table similar to what other irqchip drivers do.
+		 */
+		WARN_ON(aic_ic_read(aic_irqc, AIC_WHOAMI) != smp_processor_id());
 
-	/*
-	 * Always keep IPIs unmasked at the hardware level (except auto-masking
-	 * by AIC during processing). We manage masks at the vIPI level.
-	 */
-	aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_SELF | AIC_IPI_OTHER);
-	aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF);
-	aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+		/*
+		 * Always keep IPIs unmasked at the hardware level (except auto-masking
+		 * by AIC during processing). We manage masks at the vIPI level.
+		 * These registers only exist on AICv1, AICv2 always uses fast IPIs.
+		 */
+		aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_SELF | AIC_IPI_OTHER);
+		if (static_branch_likely(&use_fast_ipi)) {
+			aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF | AIC_IPI_OTHER);
+		} else {
+			aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF);
+			aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+		}
+	}
 
 	/* Initialize the local mask state */
 	__this_cpu_write(aic_fiq_unmasked, 0);
@@ -793,68 +1005,193 @@ static struct gic_kvm_info vgic_info __initdata = {
 	.no_hw_deactivation	= true,
 };
 
+static void build_fiq_affinity(struct aic_irq_chip *ic, struct device_node *aff)
+{
+	int i, n;
+	u32 fiq;
+
+	if (of_property_read_u32(aff, "apple,fiq-index", &fiq) ||
+	    WARN_ON(fiq >= AIC_NR_FIQ) || ic->fiq_aff[fiq])
+		return;
+
+	n = of_property_count_elems_of_size(aff, "cpus", sizeof(u32));
+	if (WARN_ON(n < 0))
+		return;
+
+	ic->fiq_aff[fiq] = kzalloc(sizeof(*ic->fiq_aff[fiq]), GFP_KERNEL);
+	if (!ic->fiq_aff[fiq])
+		return;
+
+	for (i = 0; i < n; i++) {
+		struct device_node *cpu_node;
+		u32 cpu_phandle;
+		int cpu;
+
+		if (of_property_read_u32_index(aff, "cpus", i, &cpu_phandle))
+			continue;
+
+		cpu_node = of_find_node_by_phandle(cpu_phandle);
+		if (WARN_ON(!cpu_node))
+			continue;
+
+		cpu = of_cpu_node_to_id(cpu_node);
+		if (WARN_ON(cpu < 0))
+			continue;
+
+		cpumask_set_cpu(cpu, &ic->fiq_aff[fiq]->aff);
+	}
+}
+
 static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent)
 {
-	int i;
+	int i, die;
+	u32 off, start_off;
 	void __iomem *regs;
-	u32 info;
 	struct aic_irq_chip *irqc;
+	struct device_node *affs;
+	const struct of_device_id *match;
 
 	regs = of_iomap(node, 0);
 	if (WARN_ON(!regs))
 		return -EIO;
 
 	irqc = kzalloc(sizeof(*irqc), GFP_KERNEL);
-	if (!irqc)
+	if (!irqc) {
+		iounmap(regs);
 		return -ENOMEM;
+	}
 
-	aic_irqc = irqc;
 	irqc->base = regs;
 
-	info = aic_ic_read(irqc, AIC_INFO);
-	irqc->nr_hw = FIELD_GET(AIC_INFO_NR_HW, info);
+	match = of_match_node(aic_info_match, node);
+	if (!match)
+		goto err_unmap;
 
-	irqc->hw_domain = irq_domain_create_linear(of_node_to_fwnode(node),
-						   irqc->nr_hw + AIC_NR_FIQ,
-						   &aic_irq_domain_ops, irqc);
-	if (WARN_ON(!irqc->hw_domain)) {
-		iounmap(irqc->base);
-		kfree(irqc);
-		return -ENODEV;
+	irqc->info = *(struct aic_info *)match->data;
+
+	aic_irqc = irqc;
+
+	switch (irqc->info.version) {
+	case 1: {
+		u32 info;
+
+		info = aic_ic_read(irqc, AIC_INFO);
+		irqc->nr_irq = FIELD_GET(AIC_INFO_NR_IRQ, info);
+		irqc->max_irq = AIC_MAX_IRQ;
+		irqc->nr_die = irqc->max_die = 1;
+
+		off = start_off = irqc->info.target_cpu;
+		off += sizeof(u32) * irqc->max_irq; /* TARGET_CPU */
+
+		irqc->event = irqc->base;
+
+		break;
 	}
+	case 2: {
+		u32 info1, info3;
+
+		info1 = aic_ic_read(irqc, AIC2_INFO1);
+		info3 = aic_ic_read(irqc, AIC2_INFO3);
+
+		irqc->nr_irq = FIELD_GET(AIC2_INFO1_NR_IRQ, info1);
+		irqc->max_irq = FIELD_GET(AIC2_INFO3_MAX_IRQ, info3);
+		irqc->nr_die = FIELD_GET(AIC2_INFO1_LAST_DIE, info1) + 1;
+		irqc->max_die = FIELD_GET(AIC2_INFO3_MAX_DIE, info3);
+
+		off = start_off = irqc->info.irq_cfg;
+		off += sizeof(u32) * irqc->max_irq; /* IRQ_CFG */
+
+		irqc->event = of_iomap(node, 1);
+		if (WARN_ON(!irqc->event))
+			goto err_unmap;
+
+		break;
+	}
+	}
+
+	irqc->info.sw_set = off;
+	off += sizeof(u32) * (irqc->max_irq >> 5); /* SW_SET */
+	irqc->info.sw_clr = off;
+	off += sizeof(u32) * (irqc->max_irq >> 5); /* SW_CLR */
+	irqc->info.mask_set = off;
+	off += sizeof(u32) * (irqc->max_irq >> 5); /* MASK_SET */
+	irqc->info.mask_clr = off;
+	off += sizeof(u32) * (irqc->max_irq >> 5); /* MASK_CLR */
+	off += sizeof(u32) * (irqc->max_irq >> 5); /* HW_STATE */
+
+	if (irqc->info.fast_ipi)
+		static_branch_enable(&use_fast_ipi);
+	else
+		static_branch_disable(&use_fast_ipi);
+
+	irqc->info.die_stride = off - start_off;
+
+	irqc->hw_domain = irq_domain_create_tree(of_node_to_fwnode(node),
+						 &aic_irq_domain_ops, irqc);
+	if (WARN_ON(!irqc->hw_domain))
+		goto err_unmap;
 
 	irq_domain_update_bus_token(irqc->hw_domain, DOMAIN_BUS_WIRED);
 
-	if (aic_init_smp(irqc, node)) {
-		irq_domain_remove(irqc->hw_domain);
-		iounmap(irqc->base);
-		kfree(irqc);
-		return -ENODEV;
+	if (aic_init_smp(irqc, node))
+		goto err_remove_domain;
+
+	affs = of_get_child_by_name(node, "affinities");
+	if (affs) {
+		struct device_node *chld;
+
+		for_each_child_of_node(affs, chld)
+			build_fiq_affinity(irqc, chld);
 	}
 
 	set_handle_irq(aic_handle_irq);
 	set_handle_fiq(aic_handle_fiq);
 
-	for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++)
-		aic_ic_write(irqc, AIC_MASK_SET + i * 4, U32_MAX);
-	for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++)
-		aic_ic_write(irqc, AIC_SW_CLR + i * 4, U32_MAX);
-	for (i = 0; i < irqc->nr_hw; i++)
-		aic_ic_write(irqc, AIC_TARGET_CPU + i * 4, 1);
+	off = 0;
+	for (die = 0; die < irqc->nr_die; die++) {
+		for (i = 0; i < BITS_TO_U32(irqc->nr_irq); i++)
+			aic_ic_write(irqc, irqc->info.mask_set + off + i * 4, U32_MAX);
+		for (i = 0; i < BITS_TO_U32(irqc->nr_irq); i++)
+			aic_ic_write(irqc, irqc->info.sw_clr + off + i * 4, U32_MAX);
+		if (irqc->info.target_cpu)
+			for (i = 0; i < irqc->nr_irq; i++)
+				aic_ic_write(irqc, irqc->info.target_cpu + off + i * 4, 1);
+		off += irqc->info.die_stride;
+	}
+
+	if (irqc->info.version == 2) {
+		u32 config = aic_ic_read(irqc, AIC2_CONFIG);
+
+		config |= AIC2_CONFIG_ENABLE;
+		aic_ic_write(irqc, AIC2_CONFIG, config);
+	}
 
 	if (!is_kernel_in_hyp_mode())
 		pr_info("Kernel running in EL1, mapping interrupts");
 
+	if (static_branch_likely(&use_fast_ipi))
+		pr_info("Using Fast IPIs");
+
 	cpuhp_setup_state(CPUHP_AP_IRQ_APPLE_AIC_STARTING,
 			  "irqchip/apple-aic/ipi:starting",
 			  aic_init_cpu, NULL);
 
 	vgic_set_kvm_info(&vgic_info);
 
-	pr_info("Initialized with %d IRQs, %d FIQs, %d vIPIs\n",
-		irqc->nr_hw, AIC_NR_FIQ, AIC_NR_SWIPI);
+	pr_info("Initialized with %d/%d IRQs * %d/%d die(s), %d FIQs, %d vIPIs",
+		irqc->nr_irq, irqc->max_irq, irqc->nr_die, irqc->max_die, AIC_NR_FIQ, AIC_NR_SWIPI);
 
 	return 0;
+
+err_remove_domain:
+	irq_domain_remove(irqc->hw_domain);
+err_unmap:
+	if (irqc->event && irqc->event != irqc->base)
+		iounmap(irqc->event);
+	iounmap(irqc->base);
+	kfree(irqc);
+	return -ENODEV;
 }
 
-IRQCHIP_DECLARE(apple_m1_aic, "apple,aic", aic_of_ic_init);
+IRQCHIP_DECLARE(apple_aic, "apple,aic", aic_of_ic_init);
+IRQCHIP_DECLARE(apple_aic2, "apple,aic2", aic_of_ic_init);
diff --git a/drivers/irqchip/irq-ftintc010.c b/drivers/irqchip/irq-ftintc010.c
index 5cc268880f8e..46a3aa60e50e 100644
--- a/drivers/irqchip/irq-ftintc010.c
+++ b/drivers/irqchip/irq-ftintc010.c
@@ -11,7 +11,6 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/irqchip.h>
-#include <linux/irqchip/versatile-fpga.h>
 #include <linux/irqdomain.h>
 #include <linux/module.h>
 #include <linux/of.h>
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 5e935d97207d..0efe1a9a9f3b 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1211,7 +1211,7 @@ static void gic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
 	 * Ensure that stores to Normal memory are visible to the
 	 * other CPUs before issuing the IPI.
 	 */
-	wmb();
+	dsb(ishst);
 
 	for_each_cpu(cpu, mask) {
 		u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu));
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index b8bb46c65a97..58ba835bee1f 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -34,6 +34,7 @@
 #include <linux/irqdomain.h>
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
+#include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/irqchip.h>
 #include <linux/irqchip/chained_irq.h>
@@ -66,7 +67,6 @@ union gic_base {
 };
 
 struct gic_chip_data {
-	struct irq_chip chip;
 	union gic_base dist_base;
 	union gic_base cpu_base;
 	void __iomem *raw_dist_base;
@@ -397,18 +397,15 @@ static void gic_handle_cascade_irq(struct irq_desc *desc)
 	chained_irq_exit(chip, desc);
 }
 
-static const struct irq_chip gic_chip = {
-	.irq_mask		= gic_mask_irq,
-	.irq_unmask		= gic_unmask_irq,
-	.irq_eoi		= gic_eoi_irq,
-	.irq_set_type		= gic_set_type,
-	.irq_retrigger          = gic_retrigger,
-	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
-	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
-	.flags			= IRQCHIP_SET_TYPE_MASKED |
-				  IRQCHIP_SKIP_SET_WAKE |
-				  IRQCHIP_MASK_ON_SUSPEND,
-};
+static void gic_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+	struct gic_chip_data *gic = irq_data_get_irq_chip_data(d);
+
+	if (gic->domain->dev)
+		seq_printf(p, gic->domain->dev->of_node->name);
+	else
+		seq_printf(p, "GIC-%d", (int)(gic - &gic_data[0]));
+}
 
 void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
 {
@@ -799,8 +796,12 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 			    bool force)
 {
 	void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + gic_irq(d);
+	struct gic_chip_data *gic = irq_data_get_irq_chip_data(d);
 	unsigned int cpu;
 
+	if (unlikely(gic != &gic_data[0]))
+		return -EINVAL;
+
 	if (!force)
 		cpu = cpumask_any_and(mask_val, cpu_online_mask);
 	else
@@ -880,6 +881,39 @@ static __init void gic_smp_init(void)
 #define gic_ipi_send_mask	NULL
 #endif
 
+static const struct irq_chip gic_chip = {
+	.irq_mask		= gic_mask_irq,
+	.irq_unmask		= gic_unmask_irq,
+	.irq_eoi		= gic_eoi_irq,
+	.irq_set_type		= gic_set_type,
+	.irq_retrigger          = gic_retrigger,
+	.irq_set_affinity	= gic_set_affinity,
+	.ipi_send_mask		= gic_ipi_send_mask,
+	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
+	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
+	.irq_print_chip		= gic_irq_print_chip,
+	.flags			= IRQCHIP_SET_TYPE_MASKED |
+				  IRQCHIP_SKIP_SET_WAKE |
+				  IRQCHIP_MASK_ON_SUSPEND,
+};
+
+static const struct irq_chip gic_chip_mode1 = {
+	.name			= "GICv2",
+	.irq_mask		= gic_eoimode1_mask_irq,
+	.irq_unmask		= gic_unmask_irq,
+	.irq_eoi		= gic_eoimode1_eoi_irq,
+	.irq_set_type		= gic_set_type,
+	.irq_retrigger          = gic_retrigger,
+	.irq_set_affinity	= gic_set_affinity,
+	.ipi_send_mask		= gic_ipi_send_mask,
+	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
+	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
+	.irq_set_vcpu_affinity	= gic_irq_set_vcpu_affinity,
+	.flags			= IRQCHIP_SET_TYPE_MASKED |
+				  IRQCHIP_SKIP_SET_WAKE |
+				  IRQCHIP_MASK_ON_SUSPEND,
+};
+
 #ifdef CONFIG_BL_SWITCHER
 /*
  * gic_send_sgi - send a SGI directly to given CPU interface number
@@ -1024,15 +1058,19 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 {
 	struct gic_chip_data *gic = d->host_data;
 	struct irq_data *irqd = irq_desc_get_irq_data(irq_to_desc(irq));
+	const struct irq_chip *chip;
+
+	chip = (static_branch_likely(&supports_deactivate_key) &&
+		gic == &gic_data[0]) ? &gic_chip_mode1 : &gic_chip;
 
 	switch (hw) {
 	case 0 ... 31:
 		irq_set_percpu_devid(irq);
-		irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data,
+		irq_domain_set_info(d, irq, hw, chip, d->host_data,
 				    handle_percpu_devid_irq, NULL, NULL);
 		break;
 	default:
-		irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data,
+		irq_domain_set_info(d, irq, hw, chip, d->host_data,
 				    handle_fasteoi_irq, NULL, NULL);
 		irq_set_probe(irq);
 		irqd_set_single_target(irqd);
@@ -1127,26 +1165,6 @@ static const struct irq_domain_ops gic_irq_domain_ops = {
 	.unmap = gic_irq_domain_unmap,
 };
 
-static void gic_init_chip(struct gic_chip_data *gic, struct device *dev,
-			  const char *name, bool use_eoimode1)
-{
-	/* Initialize irq_chip */
-	gic->chip = gic_chip;
-	gic->chip.name = name;
-	gic->chip.parent_device = dev;
-
-	if (use_eoimode1) {
-		gic->chip.irq_mask = gic_eoimode1_mask_irq;
-		gic->chip.irq_eoi = gic_eoimode1_eoi_irq;
-		gic->chip.irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity;
-	}
-
-	if (gic == &gic_data[0]) {
-		gic->chip.irq_set_affinity = gic_set_affinity;
-		gic->chip.ipi_send_mask = gic_ipi_send_mask;
-	}
-}
-
 static int gic_init_bases(struct gic_chip_data *gic,
 			  struct fwnode_handle *handle)
 {
@@ -1246,7 +1264,6 @@ error:
 static int __init __gic_init_bases(struct gic_chip_data *gic,
 				   struct fwnode_handle *handle)
 {
-	char *name;
 	int i, ret;
 
 	if (WARN_ON(!gic || gic->domain))
@@ -1266,18 +1283,8 @@ static int __init __gic_init_bases(struct gic_chip_data *gic,
 			pr_info("GIC: Using split EOI/Deactivate mode\n");
 	}
 
-	if (static_branch_likely(&supports_deactivate_key) && gic == &gic_data[0]) {
-		name = kasprintf(GFP_KERNEL, "GICv2");
-		gic_init_chip(gic, NULL, name, true);
-	} else {
-		name = kasprintf(GFP_KERNEL, "GIC-%d", (int)(gic-&gic_data[0]));
-		gic_init_chip(gic, NULL, name, false);
-	}
-
 	ret = gic_init_bases(gic, handle);
-	if (ret)
-		kfree(name);
-	else if (gic == &gic_data[0])
+	if (gic == &gic_data[0])
 		gic_smp_init();
 
 	return ret;
@@ -1460,8 +1467,6 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
 	if (!*gic)
 		return -ENOMEM;
 
-	gic_init_chip(*gic, dev, dev->of_node->name, false);
-
 	ret = gic_of_setup(*gic, dev->of_node);
 	if (ret)
 		return ret;
@@ -1472,6 +1477,7 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
 		return ret;
 	}
 
+	irq_domain_set_pm_device((*gic)->domain, dev);
 	irq_set_chained_handler_and_data(irq, gic_handle_cascade_irq, *gic);
 
 	return 0;
diff --git a/drivers/irqchip/irq-imx-intmux.c b/drivers/irqchip/irq-imx-intmux.c
index e86ff743e98c..80aaea82468a 100644
--- a/drivers/irqchip/irq-imx-intmux.c
+++ b/drivers/irqchip/irq-imx-intmux.c
@@ -61,7 +61,6 @@
 #define CHAN_MAX_NUM		0x8
 
 struct intmux_irqchip_data {
-	struct irq_chip		chip;
 	u32			saved_reg;
 	int			chanidx;
 	int			irq;
@@ -114,7 +113,7 @@ static void imx_intmux_irq_unmask(struct irq_data *d)
 	raw_spin_unlock_irqrestore(&data->lock, flags);
 }
 
-static struct irq_chip imx_intmux_irq_chip = {
+static struct irq_chip imx_intmux_irq_chip __ro_after_init = {
 	.name		= "intmux",
 	.irq_mask	= imx_intmux_irq_mask,
 	.irq_unmask	= imx_intmux_irq_unmask,
@@ -126,7 +125,7 @@ static int imx_intmux_irq_map(struct irq_domain *h, unsigned int irq,
 	struct intmux_irqchip_data *data = h->host_data;
 
 	irq_set_chip_data(irq, data);
-	irq_set_chip_and_handler(irq, &data->chip, handle_level_irq);
+	irq_set_chip_and_handler(irq, &imx_intmux_irq_chip, handle_level_irq);
 
 	return 0;
 }
@@ -241,8 +240,6 @@ static int imx_intmux_probe(struct platform_device *pdev)
 	}
 
 	for (i = 0; i < channum; i++) {
-		data->irqchip_data[i].chip = imx_intmux_irq_chip;
-		data->irqchip_data[i].chip.parent_device = &pdev->dev;
 		data->irqchip_data[i].chanidx = i;
 
 		data->irqchip_data[i].irq = irq_of_parse_and_map(np, i);
@@ -260,6 +257,7 @@ static int imx_intmux_probe(struct platform_device *pdev)
 			goto out;
 		}
 		data->irqchip_data[i].domain = domain;
+		irq_domain_set_pm_device(domain, &pdev->dev);
 
 		/* disable all interrupt sources of this channel firstly */
 		writel_relaxed(0, data->regs + CHANIER(i));
diff --git a/drivers/irqchip/irq-lpc32xx.c b/drivers/irqchip/irq-lpc32xx.c
index a29357f39450..4d70a857133f 100644
--- a/drivers/irqchip/irq-lpc32xx.c
+++ b/drivers/irqchip/irq-lpc32xx.c
@@ -11,6 +11,7 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
+#include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <asm/exception.h>
 
@@ -25,8 +26,8 @@
 
 struct lpc32xx_irq_chip {
 	void __iomem *base;
+	phys_addr_t addr;
 	struct irq_domain *domain;
-	struct irq_chip chip;
 };
 
 static struct lpc32xx_irq_chip *lpc32xx_mic_irqc;
@@ -118,6 +119,24 @@ static int lpc32xx_irq_set_type(struct irq_data *d, unsigned int type)
 	return 0;
 }
 
+static void lpc32xx_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+	struct lpc32xx_irq_chip *ic = irq_data_get_irq_chip_data(d);
+
+	if (ic == lpc32xx_mic_irqc)
+		seq_printf(p, "%08x.mic", ic->addr);
+	else
+		seq_printf(p, "%08x.sic", ic->addr);
+}
+
+static const struct irq_chip lpc32xx_chip = {
+	.irq_ack	= lpc32xx_irq_ack,
+	.irq_mask	= lpc32xx_irq_mask,
+	.irq_unmask	= lpc32xx_irq_unmask,
+	.irq_set_type	= lpc32xx_irq_set_type,
+	.irq_print_chip	= lpc32xx_irq_print_chip,
+};
+
 static void __exception_irq_entry lpc32xx_handle_irq(struct pt_regs *regs)
 {
 	struct lpc32xx_irq_chip *ic = lpc32xx_mic_irqc;
@@ -153,7 +172,7 @@ static int lpc32xx_irq_domain_map(struct irq_domain *id, unsigned int virq,
 	struct lpc32xx_irq_chip *ic = id->host_data;
 
 	irq_set_chip_data(virq, ic);
-	irq_set_chip_and_handler(virq, &ic->chip, handle_level_irq);
+	irq_set_chip_and_handler(virq, &lpc32xx_chip, handle_level_irq);
 	irq_set_status_flags(virq, IRQ_LEVEL);
 	irq_set_noprobe(virq);
 
@@ -183,6 +202,7 @@ static int __init lpc32xx_of_ic_init(struct device_node *node,
 	if (!irqc)
 		return -ENOMEM;
 
+	irqc->addr = addr;
 	irqc->base = of_iomap(node, 0);
 	if (!irqc->base) {
 		pr_err("%pOF: unable to map registers\n", node);
@@ -190,21 +210,11 @@ static int __init lpc32xx_of_ic_init(struct device_node *node,
 		return -EINVAL;
 	}
 
-	irqc->chip.irq_ack = lpc32xx_irq_ack;
-	irqc->chip.irq_mask = lpc32xx_irq_mask;
-	irqc->chip.irq_unmask = lpc32xx_irq_unmask;
-	irqc->chip.irq_set_type = lpc32xx_irq_set_type;
-	if (is_mic)
-		irqc->chip.name = kasprintf(GFP_KERNEL, "%08x.mic", addr);
-	else
-		irqc->chip.name = kasprintf(GFP_KERNEL, "%08x.sic", addr);
-
 	irqc->domain = irq_domain_add_linear(node, NR_LPC32XX_IC_IRQS,
 					     &lpc32xx_irq_domain_ops, irqc);
 	if (!irqc->domain) {
 		pr_err("unable to add irq domain\n");
 		iounmap(irqc->base);
-		kfree(irqc->chip.name);
 		kfree(irqc);
 		return -ENODEV;
 	}
diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
index d90ff0b92480..2aaa9aad3e87 100644
--- a/drivers/irqchip/irq-meson-gpio.c
+++ b/drivers/irqchip/irq-meson-gpio.c
@@ -16,7 +16,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 
-#define NUM_CHANNEL 8
+#define MAX_NUM_CHANNEL 64
 #define MAX_INPUT_MUX 256
 
 #define REG_EDGE_POL	0x00
@@ -26,6 +26,8 @@
 
 /* use for A1 like chips */
 #define REG_PIN_A1_SEL	0x04
+/* Used for s4 chips */
+#define REG_EDGE_POL_S4	0x1c
 
 /*
  * Note: The S905X3 datasheet reports that BOTH_EDGE is controlled by
@@ -51,15 +53,22 @@ static void meson_a1_gpio_irq_sel_pin(struct meson_gpio_irq_controller *ctl,
 				      unsigned int channel,
 				      unsigned long hwirq);
 static void meson_a1_gpio_irq_init(struct meson_gpio_irq_controller *ctl);
+static int meson8_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl,
+				    unsigned int type, u32 *channel_hwirq);
+static int meson_s4_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl,
+				      unsigned int type, u32 *channel_hwirq);
 
 struct irq_ctl_ops {
 	void (*gpio_irq_sel_pin)(struct meson_gpio_irq_controller *ctl,
 				 unsigned int channel, unsigned long hwirq);
 	void (*gpio_irq_init)(struct meson_gpio_irq_controller *ctl);
+	int (*gpio_irq_set_type)(struct meson_gpio_irq_controller *ctl,
+				 unsigned int type, u32 *channel_hwirq);
 };
 
 struct meson_gpio_irq_params {
 	unsigned int nr_hwirq;
+	unsigned int nr_channels;
 	bool support_edge_both;
 	unsigned int edge_both_offset;
 	unsigned int edge_single_offset;
@@ -68,28 +77,44 @@ struct meson_gpio_irq_params {
 	struct irq_ctl_ops ops;
 };
 
-#define INIT_MESON_COMMON(irqs, init, sel)			\
+#define INIT_MESON_COMMON(irqs, init, sel, type)		\
 	.nr_hwirq = irqs,					\
 	.ops = {						\
 		.gpio_irq_init = init,				\
 		.gpio_irq_sel_pin = sel,			\
+		.gpio_irq_set_type = type,			\
 	},
 
 #define INIT_MESON8_COMMON_DATA(irqs)				\
 	INIT_MESON_COMMON(irqs, meson_gpio_irq_init_dummy,	\
-			  meson8_gpio_irq_sel_pin)		\
+			  meson8_gpio_irq_sel_pin,		\
+			  meson8_gpio_irq_set_type)		\
 	.edge_single_offset = 0,				\
 	.pol_low_offset = 16,					\
 	.pin_sel_mask = 0xff,					\
+	.nr_channels = 8,					\
 
 #define INIT_MESON_A1_COMMON_DATA(irqs)				\
 	INIT_MESON_COMMON(irqs, meson_a1_gpio_irq_init,		\
-			  meson_a1_gpio_irq_sel_pin)		\
+			  meson_a1_gpio_irq_sel_pin,		\
+			  meson8_gpio_irq_set_type)		\
 	.support_edge_both = true,				\
 	.edge_both_offset = 16,					\
 	.edge_single_offset = 8,				\
 	.pol_low_offset = 0,					\
 	.pin_sel_mask = 0x7f,					\
+	.nr_channels = 8,					\
+
+#define INIT_MESON_S4_COMMON_DATA(irqs)				\
+	INIT_MESON_COMMON(irqs, meson_a1_gpio_irq_init,		\
+			  meson_a1_gpio_irq_sel_pin,		\
+			  meson_s4_gpio_irq_set_type)		\
+	.support_edge_both = true,				\
+	.edge_both_offset = 0,					\
+	.edge_single_offset = 12,				\
+	.pol_low_offset = 0,					\
+	.pin_sel_mask = 0xff,					\
+	.nr_channels = 12,					\
 
 static const struct meson_gpio_irq_params meson8_params = {
 	INIT_MESON8_COMMON_DATA(134)
@@ -121,6 +146,10 @@ static const struct meson_gpio_irq_params a1_params = {
 	INIT_MESON_A1_COMMON_DATA(62)
 };
 
+static const struct meson_gpio_irq_params s4_params = {
+	INIT_MESON_S4_COMMON_DATA(82)
+};
+
 static const struct of_device_id meson_irq_gpio_matches[] = {
 	{ .compatible = "amlogic,meson8-gpio-intc", .data = &meson8_params },
 	{ .compatible = "amlogic,meson8b-gpio-intc", .data = &meson8b_params },
@@ -130,14 +159,15 @@ static const struct of_device_id meson_irq_gpio_matches[] = {
 	{ .compatible = "amlogic,meson-g12a-gpio-intc", .data = &axg_params },
 	{ .compatible = "amlogic,meson-sm1-gpio-intc", .data = &sm1_params },
 	{ .compatible = "amlogic,meson-a1-gpio-intc", .data = &a1_params },
+	{ .compatible = "amlogic,meson-s4-gpio-intc", .data = &s4_params },
 	{ }
 };
 
 struct meson_gpio_irq_controller {
 	const struct meson_gpio_irq_params *params;
 	void __iomem *base;
-	u32 channel_irqs[NUM_CHANNEL];
-	DECLARE_BITMAP(channel_map, NUM_CHANNEL);
+	u32 channel_irqs[MAX_NUM_CHANNEL];
+	DECLARE_BITMAP(channel_map, MAX_NUM_CHANNEL);
 	spinlock_t lock;
 };
 
@@ -207,8 +237,8 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl,
 	spin_lock_irqsave(&ctl->lock, flags);
 
 	/* Find a free channel */
-	idx = find_first_zero_bit(ctl->channel_map, NUM_CHANNEL);
-	if (idx >= NUM_CHANNEL) {
+	idx = find_first_zero_bit(ctl->channel_map, ctl->params->nr_channels);
+	if (idx >= ctl->params->nr_channels) {
 		spin_unlock_irqrestore(&ctl->lock, flags);
 		pr_err("No channel available\n");
 		return -ENOSPC;
@@ -256,9 +286,8 @@ meson_gpio_irq_release_channel(struct meson_gpio_irq_controller *ctl,
 	clear_bit(idx, ctl->channel_map);
 }
 
-static int meson_gpio_irq_type_setup(struct meson_gpio_irq_controller *ctl,
-				     unsigned int type,
-				     u32 *channel_hwirq)
+static int meson8_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl,
+				    unsigned int type, u32 *channel_hwirq)
 {
 	u32 val = 0;
 	unsigned int idx;
@@ -299,6 +328,51 @@ static int meson_gpio_irq_type_setup(struct meson_gpio_irq_controller *ctl,
 	return 0;
 }
 
+/*
+ * gpio irq relative registers for s4
+ * -PADCTRL_GPIO_IRQ_CTRL0
+ * bit[31]:    enable/disable all the irq lines
+ * bit[12-23]: single edge trigger
+ * bit[0-11]:  polarity trigger
+ *
+ * -PADCTRL_GPIO_IRQ_CTRL[X]
+ * bit[0-16]: 7 bits to choose gpio source for irq line 2*[X] - 2
+ * bit[16-22]:7 bits to choose gpio source for irq line 2*[X] - 1
+ * where X = 1-6
+ *
+ * -PADCTRL_GPIO_IRQ_CTRL[7]
+ * bit[0-11]: both edge trigger
+ */
+static int meson_s4_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl,
+				      unsigned int type, u32 *channel_hwirq)
+{
+	u32 val = 0;
+	unsigned int idx;
+
+	idx = meson_gpio_irq_get_channel_idx(ctl, channel_hwirq);
+
+	type &= IRQ_TYPE_SENSE_MASK;
+
+	meson_gpio_irq_update_bits(ctl, REG_EDGE_POL_S4, BIT(idx), 0);
+
+	if (type == IRQ_TYPE_EDGE_BOTH) {
+		val |= BIT(ctl->params->edge_both_offset + idx);
+		meson_gpio_irq_update_bits(ctl, REG_EDGE_POL_S4,
+					   BIT(ctl->params->edge_both_offset + idx), val);
+		return 0;
+	}
+
+	if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_EDGE_FALLING))
+		val |= BIT(ctl->params->pol_low_offset + idx);
+
+	if (type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING))
+		val |= BIT(ctl->params->edge_single_offset + idx);
+
+	meson_gpio_irq_update_bits(ctl, REG_EDGE_POL,
+				   BIT(idx) | BIT(12 + idx), val);
+	return 0;
+};
+
 static unsigned int meson_gpio_irq_type_output(unsigned int type)
 {
 	unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
@@ -323,7 +397,7 @@ static int meson_gpio_irq_set_type(struct irq_data *data, unsigned int type)
 	u32 *channel_hwirq = irq_data_get_irq_chip_data(data);
 	int ret;
 
-	ret = meson_gpio_irq_type_setup(ctl, type, channel_hwirq);
+	ret = ctl->params->ops.gpio_irq_set_type(ctl, type, channel_hwirq);
 	if (ret)
 		return ret;
 
@@ -450,10 +524,10 @@ static int meson_gpio_irq_parse_dt(struct device_node *node, struct meson_gpio_i
 	ret = of_property_read_variable_u32_array(node,
 						  "amlogic,channel-interrupts",
 						  ctl->channel_irqs,
-						  NUM_CHANNEL,
-						  NUM_CHANNEL);
+						  ctl->params->nr_channels,
+						  ctl->params->nr_channels);
 	if (ret < 0) {
-		pr_err("can't get %d channel interrupts\n", NUM_CHANNEL);
+		pr_err("can't get %d channel interrupts\n", ctl->params->nr_channels);
 		return ret;
 	}
 
@@ -507,7 +581,7 @@ static int meson_gpio_irq_of_init(struct device_node *node, struct device_node *
 	}
 
 	pr_info("%d to %d gpio interrupt mux initialized\n",
-		ctl->params->nr_hwirq, NUM_CHANNEL);
+		ctl->params->nr_hwirq, ctl->params->nr_channels);
 
 	return 0;
 
diff --git a/drivers/irqchip/irq-mvebu-pic.c b/drivers/irqchip/irq-mvebu-pic.c
index 870f9866b8da..ef3d3646ccc2 100644
--- a/drivers/irqchip/irq-mvebu-pic.c
+++ b/drivers/irqchip/irq-mvebu-pic.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
+#include <linux/seq_file.h>
 
 #define PIC_CAUSE	       0x0
 #define PIC_MASK	       0x4
@@ -29,7 +30,7 @@ struct mvebu_pic {
 	void __iomem *base;
 	u32 parent_irq;
 	struct irq_domain *domain;
-	struct irq_chip irq_chip;
+	struct platform_device *pdev;
 };
 
 static void mvebu_pic_reset(struct mvebu_pic *pic)
@@ -66,6 +67,20 @@ static void mvebu_pic_unmask_irq(struct irq_data *d)
 	writel(reg, pic->base + PIC_MASK);
 }
 
+static void mvebu_pic_print_chip(struct irq_data *d, struct seq_file *p)
+{
+	struct mvebu_pic *pic = irq_data_get_irq_chip_data(d);
+
+	seq_printf(p, dev_name(&pic->pdev->dev));
+}
+
+static const struct irq_chip mvebu_pic_chip = {
+	.irq_mask	= mvebu_pic_mask_irq,
+	.irq_unmask	= mvebu_pic_unmask_irq,
+	.irq_eoi	= mvebu_pic_eoi_irq,
+	.irq_print_chip	= mvebu_pic_print_chip,
+};
+
 static int mvebu_pic_irq_map(struct irq_domain *domain, unsigned int virq,
 			     irq_hw_number_t hwirq)
 {
@@ -73,8 +88,7 @@ static int mvebu_pic_irq_map(struct irq_domain *domain, unsigned int virq,
 
 	irq_set_percpu_devid(virq);
 	irq_set_chip_data(virq, pic);
-	irq_set_chip_and_handler(virq, &pic->irq_chip,
-				 handle_percpu_devid_irq);
+	irq_set_chip_and_handler(virq, &mvebu_pic_chip, handle_percpu_devid_irq);
 	irq_set_status_flags(virq, IRQ_LEVEL);
 	irq_set_probe(virq);
 
@@ -120,22 +134,16 @@ static int mvebu_pic_probe(struct platform_device *pdev)
 {
 	struct device_node *node = pdev->dev.of_node;
 	struct mvebu_pic *pic;
-	struct irq_chip *irq_chip;
 
 	pic = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_pic), GFP_KERNEL);
 	if (!pic)
 		return -ENOMEM;
 
+	pic->pdev = pdev;
 	pic->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(pic->base))
 		return PTR_ERR(pic->base);
 
-	irq_chip = &pic->irq_chip;
-	irq_chip->name = dev_name(&pdev->dev);
-	irq_chip->irq_mask = mvebu_pic_mask_irq;
-	irq_chip->irq_unmask = mvebu_pic_unmask_irq;
-	irq_chip->irq_eoi = mvebu_pic_eoi_irq;
-
 	pic->parent_irq = irq_of_parse_and_map(node, 0);
 	if (pic->parent_irq <= 0) {
 		dev_err(&pdev->dev, "Failed to parse parent interrupt\n");
diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c
index ba4759b3e269..94230306e0ee 100644
--- a/drivers/irqchip/irq-nvic.c
+++ b/drivers/irqchip/irq-nvic.c
@@ -107,6 +107,7 @@ static int __init nvic_of_init(struct device_node *node,
 
 	if (!nvic_irq_domain) {
 		pr_warn("Failed to allocate irq domain\n");
+		iounmap(nvic_base);
 		return -ENOMEM;
 	}
 
@@ -116,6 +117,7 @@ static int __init nvic_of_init(struct device_node *node,
 	if (ret) {
 		pr_warn("Failed to allocate irq chips\n");
 		irq_domain_remove(nvic_irq_domain);
+		iounmap(nvic_base);
 		return ret;
 	}
 
diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c
new file mode 100644
index 000000000000..eea5a753618c
--- /dev/null
+++ b/drivers/irqchip/irq-qcom-mpm.c
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, Linaro Limited
+ * Copyright (c) 2010-2020, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/mailbox_client.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/slab.h>
+#include <linux/soc/qcom/irq.h>
+#include <linux/spinlock.h>
+
+/*
+ * This is the driver for Qualcomm MPM (MSM Power Manager) interrupt controller,
+ * which is commonly found on Qualcomm SoCs built on the RPM architecture.
+ * Sitting in always-on domain, MPM monitors the wakeup interrupts when SoC is
+ * asleep, and wakes up the AP when one of those interrupts occurs.  This driver
+ * doesn't directly access physical MPM registers though.  Instead, the access
+ * is bridged via a piece of internal memory (SRAM) that is accessible to both
+ * AP and RPM.  This piece of memory is called 'vMPM' in the driver.
+ *
+ * When SoC is awake, the vMPM is owned by AP and the register setup by this
+ * driver all happens on vMPM.  When AP is about to get power collapsed, the
+ * driver sends a mailbox notification to RPM, which will take over the vMPM
+ * ownership and dump vMPM into physical MPM registers.  On wakeup, AP is woken
+ * up by a MPM pin/interrupt, and RPM will copy STATUS registers into vMPM.
+ * Then AP start owning vMPM again.
+ *
+ * vMPM register map:
+ *
+ *    31                              0
+ *    +--------------------------------+
+ *    |            TIMER0              | 0x00
+ *    +--------------------------------+
+ *    |            TIMER1              | 0x04
+ *    +--------------------------------+
+ *    |            ENABLE0             | 0x08
+ *    +--------------------------------+
+ *    |              ...               | ...
+ *    +--------------------------------+
+ *    |            ENABLEn             |
+ *    +--------------------------------+
+ *    |          FALLING_EDGE0         |
+ *    +--------------------------------+
+ *    |              ...               |
+ *    +--------------------------------+
+ *    |            STATUSn             |
+ *    +--------------------------------+
+ *
+ *    n = DIV_ROUND_UP(pin_cnt, 32)
+ *
+ */
+
+#define MPM_REG_ENABLE		0
+#define MPM_REG_FALLING_EDGE	1
+#define MPM_REG_RISING_EDGE	2
+#define MPM_REG_POLARITY	3
+#define MPM_REG_STATUS		4
+
+/* MPM pin map to GIC hwirq */
+struct mpm_gic_map {
+	int pin;
+	irq_hw_number_t hwirq;
+};
+
+struct qcom_mpm_priv {
+	void __iomem *base;
+	raw_spinlock_t lock;
+	struct mbox_client mbox_client;
+	struct mbox_chan *mbox_chan;
+	struct mpm_gic_map *maps;
+	unsigned int map_cnt;
+	unsigned int reg_stride;
+	struct irq_domain *domain;
+	struct generic_pm_domain genpd;
+};
+
+static u32 qcom_mpm_read(struct qcom_mpm_priv *priv, unsigned int reg,
+			 unsigned int index)
+{
+	unsigned int offset = (reg * priv->reg_stride + index + 2) * 4;
+
+	return readl_relaxed(priv->base + offset);
+}
+
+static void qcom_mpm_write(struct qcom_mpm_priv *priv, unsigned int reg,
+			   unsigned int index, u32 val)
+{
+	unsigned int offset = (reg * priv->reg_stride + index + 2) * 4;
+
+	writel_relaxed(val, priv->base + offset);
+
+	/* Ensure the write is completed */
+	wmb();
+}
+
+static void qcom_mpm_enable_irq(struct irq_data *d, bool en)
+{
+	struct qcom_mpm_priv *priv = d->chip_data;
+	int pin = d->hwirq;
+	unsigned int index = pin / 32;
+	unsigned int shift = pin % 32;
+	unsigned long flags, val;
+
+	raw_spin_lock_irqsave(&priv->lock, flags);
+
+	val = qcom_mpm_read(priv, MPM_REG_ENABLE, index);
+	__assign_bit(shift, &val, en);
+	qcom_mpm_write(priv, MPM_REG_ENABLE, index, val);
+
+	raw_spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void qcom_mpm_mask(struct irq_data *d)
+{
+	qcom_mpm_enable_irq(d, false);
+
+	if (d->parent_data)
+		irq_chip_mask_parent(d);
+}
+
+static void qcom_mpm_unmask(struct irq_data *d)
+{
+	qcom_mpm_enable_irq(d, true);
+
+	if (d->parent_data)
+		irq_chip_unmask_parent(d);
+}
+
+static void mpm_set_type(struct qcom_mpm_priv *priv, bool set, unsigned int reg,
+			 unsigned int index, unsigned int shift)
+{
+	unsigned long flags, val;
+
+	raw_spin_lock_irqsave(&priv->lock, flags);
+
+	val = qcom_mpm_read(priv, reg, index);
+	__assign_bit(shift, &val, set);
+	qcom_mpm_write(priv, reg, index, val);
+
+	raw_spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int qcom_mpm_set_type(struct irq_data *d, unsigned int type)
+{
+	struct qcom_mpm_priv *priv = d->chip_data;
+	int pin = d->hwirq;
+	unsigned int index = pin / 32;
+	unsigned int shift = pin % 32;
+
+	if (type & IRQ_TYPE_EDGE_RISING)
+		mpm_set_type(priv, true, MPM_REG_RISING_EDGE, index, shift);
+	else
+		mpm_set_type(priv, false, MPM_REG_RISING_EDGE, index, shift);
+
+	if (type & IRQ_TYPE_EDGE_FALLING)
+		mpm_set_type(priv, true, MPM_REG_FALLING_EDGE, index, shift);
+	else
+		mpm_set_type(priv, false, MPM_REG_FALLING_EDGE, index, shift);
+
+	if (type & IRQ_TYPE_LEVEL_HIGH)
+		mpm_set_type(priv, true, MPM_REG_POLARITY, index, shift);
+	else
+		mpm_set_type(priv, false, MPM_REG_POLARITY, index, shift);
+
+	if (!d->parent_data)
+		return 0;
+
+	if (type & IRQ_TYPE_EDGE_BOTH)
+		type = IRQ_TYPE_EDGE_RISING;
+
+	if (type & IRQ_TYPE_LEVEL_MASK)
+		type = IRQ_TYPE_LEVEL_HIGH;
+
+	return irq_chip_set_type_parent(d, type);
+}
+
+static struct irq_chip qcom_mpm_chip = {
+	.name			= "mpm",
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_mask		= qcom_mpm_mask,
+	.irq_unmask		= qcom_mpm_unmask,
+	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.irq_set_type		= qcom_mpm_set_type,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.flags			= IRQCHIP_MASK_ON_SUSPEND |
+				  IRQCHIP_SKIP_SET_WAKE,
+};
+
+static struct mpm_gic_map *get_mpm_gic_map(struct qcom_mpm_priv *priv, int pin)
+{
+	struct mpm_gic_map *maps = priv->maps;
+	int i;
+
+	for (i = 0; i < priv->map_cnt; i++) {
+		if (maps[i].pin == pin)
+			return &maps[i];
+	}
+
+	return NULL;
+}
+
+static int qcom_mpm_alloc(struct irq_domain *domain, unsigned int virq,
+			  unsigned int nr_irqs, void *data)
+{
+	struct qcom_mpm_priv *priv = domain->host_data;
+	struct irq_fwspec *fwspec = data;
+	struct irq_fwspec parent_fwspec;
+	struct mpm_gic_map *map;
+	irq_hw_number_t pin;
+	unsigned int type;
+	int  ret;
+
+	ret = irq_domain_translate_twocell(domain, fwspec, &pin, &type);
+	if (ret)
+		return ret;
+
+	ret = irq_domain_set_hwirq_and_chip(domain, virq, pin,
+					    &qcom_mpm_chip, priv);
+	if (ret)
+		return ret;
+
+	map = get_mpm_gic_map(priv, pin);
+	if (map == NULL)
+		return irq_domain_disconnect_hierarchy(domain->parent, virq);
+
+	if (type & IRQ_TYPE_EDGE_BOTH)
+		type = IRQ_TYPE_EDGE_RISING;
+
+	if (type & IRQ_TYPE_LEVEL_MASK)
+		type = IRQ_TYPE_LEVEL_HIGH;
+
+	parent_fwspec.fwnode = domain->parent->fwnode;
+	parent_fwspec.param_count = 3;
+	parent_fwspec.param[0] = 0;
+	parent_fwspec.param[1] = map->hwirq;
+	parent_fwspec.param[2] = type;
+
+	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs,
+					    &parent_fwspec);
+}
+
+static const struct irq_domain_ops qcom_mpm_ops = {
+	.alloc		= qcom_mpm_alloc,
+	.free		= irq_domain_free_irqs_common,
+	.translate	= irq_domain_translate_twocell,
+};
+
+/* Triggered by RPM when system resumes from deep sleep */
+static irqreturn_t qcom_mpm_handler(int irq, void *dev_id)
+{
+	struct qcom_mpm_priv *priv = dev_id;
+	unsigned long enable, pending;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned long flags;
+	int i, j;
+
+	for (i = 0; i < priv->reg_stride; i++) {
+		raw_spin_lock_irqsave(&priv->lock, flags);
+		enable = qcom_mpm_read(priv, MPM_REG_ENABLE, i);
+		pending = qcom_mpm_read(priv, MPM_REG_STATUS, i);
+		pending &= enable;
+		raw_spin_unlock_irqrestore(&priv->lock, flags);
+
+		for_each_set_bit(j, &pending, 32) {
+			unsigned int pin = 32 * i + j;
+			struct irq_desc *desc = irq_resolve_mapping(priv->domain, pin);
+			struct irq_data *d = &desc->irq_data;
+
+			if (!irqd_is_level_type(d))
+				irq_set_irqchip_state(d->irq,
+						IRQCHIP_STATE_PENDING, true);
+			ret = IRQ_HANDLED;
+		}
+	}
+
+	return ret;
+}
+
+static int mpm_pd_power_off(struct generic_pm_domain *genpd)
+{
+	struct qcom_mpm_priv *priv = container_of(genpd, struct qcom_mpm_priv,
+						  genpd);
+	int i, ret;
+
+	for (i = 0; i < priv->reg_stride; i++)
+		qcom_mpm_write(priv, MPM_REG_STATUS, i, 0);
+
+	/* Notify RPM to write vMPM into HW */
+	ret = mbox_send_message(priv->mbox_chan, NULL);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static bool gic_hwirq_is_mapped(struct mpm_gic_map *maps, int cnt, u32 hwirq)
+{
+	int i;
+
+	for (i = 0; i < cnt; i++)
+		if (maps[i].hwirq == hwirq)
+			return true;
+
+	return false;
+}
+
+static int qcom_mpm_init(struct device_node *np, struct device_node *parent)
+{
+	struct platform_device *pdev = of_find_device_by_node(np);
+	struct device *dev = &pdev->dev;
+	struct irq_domain *parent_domain;
+	struct generic_pm_domain *genpd;
+	struct qcom_mpm_priv *priv;
+	unsigned int pin_cnt;
+	int i, irq;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	ret = of_property_read_u32(np, "qcom,mpm-pin-count", &pin_cnt);
+	if (ret) {
+		dev_err(dev, "failed to read qcom,mpm-pin-count: %d\n", ret);
+		return ret;
+	}
+
+	priv->reg_stride = DIV_ROUND_UP(pin_cnt, 32);
+
+	ret = of_property_count_u32_elems(np, "qcom,mpm-pin-map");
+	if (ret < 0) {
+		dev_err(dev, "failed to read qcom,mpm-pin-map: %d\n", ret);
+		return ret;
+	}
+
+	if (ret % 2) {
+		dev_err(dev, "invalid qcom,mpm-pin-map\n");
+		return -EINVAL;
+	}
+
+	priv->map_cnt = ret / 2;
+	priv->maps = devm_kcalloc(dev, priv->map_cnt, sizeof(*priv->maps),
+				  GFP_KERNEL);
+	if (!priv->maps)
+		return -ENOMEM;
+
+	for (i = 0; i < priv->map_cnt; i++) {
+		u32 pin, hwirq;
+
+		of_property_read_u32_index(np, "qcom,mpm-pin-map", i * 2, &pin);
+		of_property_read_u32_index(np, "qcom,mpm-pin-map", i * 2 + 1, &hwirq);
+
+		if (gic_hwirq_is_mapped(priv->maps, i, hwirq)) {
+			dev_warn(dev, "failed to map pin %d as GIC hwirq %d is already mapped\n",
+				 pin, hwirq);
+			continue;
+		}
+
+		priv->maps[i].pin = pin;
+		priv->maps[i].hwirq = hwirq;
+	}
+
+	raw_spin_lock_init(&priv->lock);
+
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
+	if (!priv->base)
+		return PTR_ERR(priv->base);
+
+	for (i = 0; i < priv->reg_stride; i++) {
+		qcom_mpm_write(priv, MPM_REG_ENABLE, i, 0);
+		qcom_mpm_write(priv, MPM_REG_FALLING_EDGE, i, 0);
+		qcom_mpm_write(priv, MPM_REG_RISING_EDGE, i, 0);
+		qcom_mpm_write(priv, MPM_REG_POLARITY, i, 0);
+		qcom_mpm_write(priv, MPM_REG_STATUS, i, 0);
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	genpd = &priv->genpd;
+	genpd->flags = GENPD_FLAG_IRQ_SAFE;
+	genpd->power_off = mpm_pd_power_off;
+
+	genpd->name = devm_kasprintf(dev, GFP_KERNEL, "%s", dev_name(dev));
+	if (!genpd->name)
+		return -ENOMEM;
+
+	ret = pm_genpd_init(genpd, NULL, false);
+	if (ret) {
+		dev_err(dev, "failed to init genpd: %d\n", ret);
+		return ret;
+	}
+
+	ret = of_genpd_add_provider_simple(np, genpd);
+	if (ret) {
+		dev_err(dev, "failed to add genpd provider: %d\n", ret);
+		goto remove_genpd;
+	}
+
+	priv->mbox_client.dev = dev;
+	priv->mbox_chan = mbox_request_channel(&priv->mbox_client, 0);
+	if (IS_ERR(priv->mbox_chan)) {
+		ret = PTR_ERR(priv->mbox_chan);
+		dev_err(dev, "failed to acquire IPC channel: %d\n", ret);
+		return ret;
+	}
+
+	parent_domain = irq_find_host(parent);
+	if (!parent_domain) {
+		dev_err(dev, "failed to find MPM parent domain\n");
+		ret = -ENXIO;
+		goto free_mbox;
+	}
+
+	priv->domain = irq_domain_create_hierarchy(parent_domain,
+				IRQ_DOMAIN_FLAG_QCOM_MPM_WAKEUP, pin_cnt,
+				of_node_to_fwnode(np), &qcom_mpm_ops, priv);
+	if (!priv->domain) {
+		dev_err(dev, "failed to create MPM domain\n");
+		ret = -ENOMEM;
+		goto free_mbox;
+	}
+
+	irq_domain_update_bus_token(priv->domain, DOMAIN_BUS_WAKEUP);
+
+	ret = devm_request_irq(dev, irq, qcom_mpm_handler, IRQF_NO_SUSPEND,
+			       "qcom_mpm", priv);
+	if (ret) {
+		dev_err(dev, "failed to request irq: %d\n", ret);
+		goto remove_domain;
+	}
+
+	return 0;
+
+remove_domain:
+	irq_domain_remove(priv->domain);
+free_mbox:
+	mbox_free_channel(priv->mbox_chan);
+remove_genpd:
+	pm_genpd_remove(genpd);
+	return ret;
+}
+
+IRQCHIP_PLATFORM_DRIVER_BEGIN(qcom_mpm)
+IRQCHIP_MATCH("qcom,mpm", qcom_mpm_init)
+IRQCHIP_PLATFORM_DRIVER_END(qcom_mpm)
+MODULE_DESCRIPTION("Qualcomm Technologies, Inc. MSM Power Manager");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c
index 37f9a4499fdb..e83756aca14e 100644
--- a/drivers/irqchip/irq-renesas-intc-irqpin.c
+++ b/drivers/irqchip/irq-renesas-intc-irqpin.c
@@ -508,7 +508,6 @@ static int intc_irqpin_probe(struct platform_device *pdev)
 
 	irq_chip = &p->irq_chip;
 	irq_chip->name = "intc-irqpin";
-	irq_chip->parent_device = dev;
 	irq_chip->irq_mask = disable_fn;
 	irq_chip->irq_unmask = enable_fn;
 	irq_chip->irq_set_type = intc_irqpin_irq_set_type;
@@ -523,6 +522,8 @@ static int intc_irqpin_probe(struct platform_device *pdev)
 		goto err0;
 	}
 
+	irq_domain_set_pm_device(p->irq_domain, dev);
+
 	if (p->shared_irqs) {
 		/* request one shared interrupt */
 		if (devm_request_irq(dev, p->irq[0].requested_irq,
diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c
index 909325f88239..1ee5e9941f67 100644
--- a/drivers/irqchip/irq-renesas-irqc.c
+++ b/drivers/irqchip/irq-renesas-irqc.c
@@ -188,13 +188,14 @@ static int irqc_probe(struct platform_device *pdev)
 	p->gc->reg_base = p->cpu_int_base;
 	p->gc->chip_types[0].regs.enable = IRQC_EN_SET;
 	p->gc->chip_types[0].regs.disable = IRQC_EN_STS;
-	p->gc->chip_types[0].chip.parent_device = dev;
 	p->gc->chip_types[0].chip.irq_mask = irq_gc_mask_disable_reg;
 	p->gc->chip_types[0].chip.irq_unmask = irq_gc_unmask_enable_reg;
 	p->gc->chip_types[0].chip.irq_set_type	= irqc_irq_set_type;
 	p->gc->chip_types[0].chip.irq_set_wake	= irqc_irq_set_wake;
 	p->gc->chip_types[0].chip.flags	= IRQCHIP_MASK_ON_SUSPEND;
 
+	irq_domain_set_pm_device(p->irq_domain, dev);
+
 	/* request interrupts one by one */
 	for (k = 0; k < p->number_of_irqs; k++) {
 		if (devm_request_irq(dev, p->irq[k].requested_irq,
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 09cc98266d30..bb87e4c3b88e 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -44,8 +44,8 @@
  * Each hart context has a vector of interrupt enable bits associated with it.
  * There's one bit for each interrupt source.
  */
-#define ENABLE_BASE			0x2000
-#define     ENABLE_PER_HART		0x80
+#define CONTEXT_ENABLE_BASE		0x2000
+#define     CONTEXT_ENABLE_SIZE		0x80
 
 /*
  * Each hart context has a set of control registers associated with it.  Right
@@ -53,7 +53,7 @@
  * take an interrupt, and a register to claim interrupts.
  */
 #define CONTEXT_BASE			0x200000
-#define     CONTEXT_PER_HART		0x1000
+#define     CONTEXT_SIZE		0x1000
 #define     CONTEXT_THRESHOLD		0x00
 #define     CONTEXT_CLAIM		0x04
 
@@ -81,17 +81,21 @@ static int plic_parent_irq __ro_after_init;
 static bool plic_cpuhp_setup_done __ro_after_init;
 static DEFINE_PER_CPU(struct plic_handler, plic_handlers);
 
-static inline void plic_toggle(struct plic_handler *handler,
-				int hwirq, int enable)
+static void __plic_toggle(void __iomem *enable_base, int hwirq, int enable)
 {
-	u32 __iomem *reg = handler->enable_base + (hwirq / 32) * sizeof(u32);
+	u32 __iomem *reg = enable_base + (hwirq / 32) * sizeof(u32);
 	u32 hwirq_mask = 1 << (hwirq % 32);
 
-	raw_spin_lock(&handler->enable_lock);
 	if (enable)
 		writel(readl(reg) | hwirq_mask, reg);
 	else
 		writel(readl(reg) & ~hwirq_mask, reg);
+}
+
+static void plic_toggle(struct plic_handler *handler, int hwirq, int enable)
+{
+	raw_spin_lock(&handler->enable_lock);
+	__plic_toggle(handler->enable_base, hwirq, enable);
 	raw_spin_unlock(&handler->enable_lock);
 }
 
@@ -324,8 +328,18 @@ static int __init plic_init(struct device_node *node,
 		 * Skip contexts other than external interrupts for our
 		 * privilege level.
 		 */
-		if (parent.args[0] != RV_IRQ_EXT)
+		if (parent.args[0] != RV_IRQ_EXT) {
+			/* Disable S-mode enable bits if running in M-mode. */
+			if (IS_ENABLED(CONFIG_RISCV_M_MODE)) {
+				void __iomem *enable_base = priv->regs +
+					CONTEXT_ENABLE_BASE +
+					i * CONTEXT_ENABLE_SIZE;
+
+				for (hwirq = 1; hwirq <= nr_irqs; hwirq++)
+					__plic_toggle(enable_base, hwirq, 0);
+			}
 			continue;
+		}
 
 		hartid = riscv_of_parent_hartid(parent.np);
 		if (hartid < 0) {
@@ -361,11 +375,11 @@ static int __init plic_init(struct device_node *node,
 
 		cpumask_set_cpu(cpu, &priv->lmask);
 		handler->present = true;
-		handler->hart_base =
-			priv->regs + CONTEXT_BASE + i * CONTEXT_PER_HART;
+		handler->hart_base = priv->regs + CONTEXT_BASE +
+			i * CONTEXT_SIZE;
 		raw_spin_lock_init(&handler->enable_lock);
-		handler->enable_base =
-			priv->regs + ENABLE_BASE + i * ENABLE_PER_HART;
+		handler->enable_base = priv->regs + CONTEXT_ENABLE_BASE +
+			i * CONTEXT_ENABLE_SIZE;
 		handler->priv = priv;
 done:
 		for (hwirq = 1; hwirq <= nr_irqs; hwirq++)
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index b7cb2da71888..9d18f47040eb 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -214,6 +214,48 @@ static const struct stm32_desc_irq stm32mp1_desc_irq[] = {
 	{ .exti = 73, .irq_parent = 129, .chip = &stm32_exti_h_chip },
 };
 
+static const struct stm32_desc_irq stm32mp13_desc_irq[] = {
+	{ .exti = 0, .irq_parent = 6, .chip = &stm32_exti_h_chip },
+	{ .exti = 1, .irq_parent = 7, .chip = &stm32_exti_h_chip },
+	{ .exti = 2, .irq_parent = 8, .chip = &stm32_exti_h_chip },
+	{ .exti = 3, .irq_parent = 9, .chip = &stm32_exti_h_chip },
+	{ .exti = 4, .irq_parent = 10, .chip = &stm32_exti_h_chip },
+	{ .exti = 5, .irq_parent = 24, .chip = &stm32_exti_h_chip },
+	{ .exti = 6, .irq_parent = 65, .chip = &stm32_exti_h_chip },
+	{ .exti = 7, .irq_parent = 66, .chip = &stm32_exti_h_chip },
+	{ .exti = 8, .irq_parent = 67, .chip = &stm32_exti_h_chip },
+	{ .exti = 9, .irq_parent = 68, .chip = &stm32_exti_h_chip },
+	{ .exti = 10, .irq_parent = 41, .chip = &stm32_exti_h_chip },
+	{ .exti = 11, .irq_parent = 43, .chip = &stm32_exti_h_chip },
+	{ .exti = 12, .irq_parent = 77, .chip = &stm32_exti_h_chip },
+	{ .exti = 13, .irq_parent = 78, .chip = &stm32_exti_h_chip },
+	{ .exti = 14, .irq_parent = 106, .chip = &stm32_exti_h_chip },
+	{ .exti = 15, .irq_parent = 109, .chip = &stm32_exti_h_chip },
+	{ .exti = 16, .irq_parent = 1, .chip = &stm32_exti_h_chip },
+	{ .exti = 19, .irq_parent = 3, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 21, .irq_parent = 32, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 22, .irq_parent = 34, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 23, .irq_parent = 73, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 24, .irq_parent = 93, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 25, .irq_parent = 114, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 26, .irq_parent = 38, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 27, .irq_parent = 39, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 28, .irq_parent = 40, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 29, .irq_parent = 72, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 30, .irq_parent = 53, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 31, .irq_parent = 54, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 32, .irq_parent = 83, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 33, .irq_parent = 84, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 44, .irq_parent = 96, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 47, .irq_parent = 92, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 48, .irq_parent = 116, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 50, .irq_parent = 117, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 52, .irq_parent = 118, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 53, .irq_parent = 119, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 68, .irq_parent = 63, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 70, .irq_parent = 98, .chip = &stm32_exti_h_chip_direct },
+};
+
 static const struct stm32_exti_drv_data stm32mp1_drv_data = {
 	.exti_banks = stm32mp1_exti_banks,
 	.bank_nr = ARRAY_SIZE(stm32mp1_exti_banks),
@@ -221,6 +263,13 @@ static const struct stm32_exti_drv_data stm32mp1_drv_data = {
 	.irq_nr = ARRAY_SIZE(stm32mp1_desc_irq),
 };
 
+static const struct stm32_exti_drv_data stm32mp13_drv_data = {
+	.exti_banks = stm32mp1_exti_banks,
+	.bank_nr = ARRAY_SIZE(stm32mp1_exti_banks),
+	.desc_irqs = stm32mp13_desc_irq,
+	.irq_nr = ARRAY_SIZE(stm32mp13_desc_irq),
+};
+
 static const struct
 stm32_desc_irq *stm32_exti_get_desc(const struct stm32_exti_drv_data *drv_data,
 				    irq_hw_number_t hwirq)
@@ -922,6 +971,7 @@ static int stm32_exti_probe(struct platform_device *pdev)
 /* platform driver only for MP1 */
 static const struct of_device_id stm32_exti_ids[] = {
 	{ .compatible = "st,stm32mp1-exti", .data = &stm32mp1_drv_data},
+	{ .compatible = "st,stm32mp13-exti", .data = &stm32mp13_drv_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, stm32_exti_ids);
diff --git a/drivers/irqchip/irq-ts4800.c b/drivers/irqchip/irq-ts4800.c
index f032db23b30f..b2d61d4f6fe6 100644
--- a/drivers/irqchip/irq-ts4800.c
+++ b/drivers/irqchip/irq-ts4800.c
@@ -19,14 +19,15 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
+#include <linux/seq_file.h>
 
 #define IRQ_MASK        0x4
 #define IRQ_STATUS      0x8
 
 struct ts4800_irq_data {
 	void __iomem            *base;
+	struct platform_device	*pdev;
 	struct irq_domain       *domain;
-	struct irq_chip         irq_chip;
 };
 
 static void ts4800_irq_mask(struct irq_data *d)
@@ -47,12 +48,25 @@ static void ts4800_irq_unmask(struct irq_data *d)
 	writew(reg & ~mask, data->base + IRQ_MASK);
 }
 
+static void ts4800_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+	struct ts4800_irq_data *data = irq_data_get_irq_chip_data(d);
+
+	seq_printf(p, "%s", dev_name(&data->pdev->dev));
+}
+
+static const struct irq_chip ts4800_chip = {
+	.irq_mask	= ts4800_irq_mask,
+	.irq_unmask	= ts4800_irq_unmask,
+	.irq_print_chip	= ts4800_irq_print_chip,
+};
+
 static int ts4800_irqdomain_map(struct irq_domain *d, unsigned int irq,
 				irq_hw_number_t hwirq)
 {
 	struct ts4800_irq_data *data = d->host_data;
 
-	irq_set_chip_and_handler(irq, &data->irq_chip, handle_simple_irq);
+	irq_set_chip_and_handler(irq, &ts4800_chip, handle_simple_irq);
 	irq_set_chip_data(irq, data);
 	irq_set_noprobe(irq);
 
@@ -92,13 +106,13 @@ static int ts4800_ic_probe(struct platform_device *pdev)
 {
 	struct device_node *node = pdev->dev.of_node;
 	struct ts4800_irq_data *data;
-	struct irq_chip *irq_chip;
 	int parent_irq;
 
 	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
+	data->pdev = pdev;
 	data->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(data->base))
 		return PTR_ERR(data->base);
@@ -111,11 +125,6 @@ static int ts4800_ic_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	irq_chip = &data->irq_chip;
-	irq_chip->name = dev_name(&pdev->dev);
-	irq_chip->irq_mask = ts4800_irq_mask;
-	irq_chip->irq_unmask = ts4800_irq_unmask;
-
 	data->domain = irq_domain_add_linear(node, 8, &ts4800_ic_ops, data);
 	if (!data->domain) {
 		dev_err(&pdev->dev, "cannot add IRQ domain\n");
diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c
index f2757b6aecc8..ba543ed9c154 100644
--- a/drivers/irqchip/irq-versatile-fpga.c
+++ b/drivers/irqchip/irq-versatile-fpga.c
@@ -7,12 +7,12 @@
 #include <linux/io.h>
 #include <linux/irqchip.h>
 #include <linux/irqchip/chained_irq.h>
-#include <linux/irqchip/versatile-fpga.h>
 #include <linux/irqdomain.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/seq_file.h>
 
 #include <asm/exception.h>
 #include <asm/mach/irq.h>
@@ -34,14 +34,12 @@
 /**
  * struct fpga_irq_data - irq data container for the FPGA IRQ controller
  * @base: memory offset in virtual memory
- * @chip: chip container for this instance
  * @domain: IRQ domain for this instance
  * @valid: mask for valid IRQs on this controller
  * @used_irqs: number of active IRQs on this controller
  */
 struct fpga_irq_data {
 	void __iomem *base;
-	struct irq_chip chip;
 	u32 valid;
 	struct irq_domain *domain;
 	u8 used_irqs;
@@ -67,6 +65,20 @@ static void fpga_irq_unmask(struct irq_data *d)
 	writel(mask, f->base + IRQ_ENABLE_SET);
 }
 
+static void fpga_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+	struct fpga_irq_data *f = irq_data_get_irq_chip_data(d);
+
+	seq_printf(p, irq_domain_get_of_node(f->domain)->name);
+}
+
+static const struct irq_chip fpga_chip = {
+	.irq_ack	= fpga_irq_mask,
+	.irq_mask	= fpga_irq_mask,
+	.irq_unmask	= fpga_irq_unmask,
+	.irq_print_chip	= fpga_irq_print_chip,
+};
+
 static void fpga_irq_handle(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
@@ -116,7 +128,7 @@ static int handle_one_fpga(struct fpga_irq_data *f, struct pt_regs *regs)
  * Keep iterating over all registered FPGA IRQ controllers until there are
  * no pending interrupts.
  */
-asmlinkage void __exception_irq_entry fpga_handle_irq(struct pt_regs *regs)
+static asmlinkage void __exception_irq_entry fpga_handle_irq(struct pt_regs *regs)
 {
 	int i, handled;
 
@@ -135,8 +147,7 @@ static int fpga_irqdomain_map(struct irq_domain *d, unsigned int irq,
 	if (!(f->valid & BIT(hwirq)))
 		return -EPERM;
 	irq_set_chip_data(irq, f);
-	irq_set_chip_and_handler(irq, &f->chip,
-				handle_level_irq);
+	irq_set_chip_and_handler(irq, &fpga_chip, handle_level_irq);
 	irq_set_probe(irq);
 	return 0;
 }
@@ -146,8 +157,8 @@ static const struct irq_domain_ops fpga_irqdomain_ops = {
 	.xlate = irq_domain_xlate_onetwocell,
 };
 
-void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
-			  int parent_irq, u32 valid, struct device_node *node)
+static void __init fpga_irq_init(void __iomem *base, int parent_irq,
+				 u32 valid, struct device_node *node)
 {
 	struct fpga_irq_data *f;
 	int i;
@@ -158,10 +169,6 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
 	}
 	f = &fpga_irq_devices[fpga_irq_id];
 	f->base = base;
-	f->chip.name = name;
-	f->chip.irq_ack = fpga_irq_mask;
-	f->chip.irq_mask = fpga_irq_mask;
-	f->chip.irq_unmask = fpga_irq_unmask;
 	f->valid = valid;
 
 	if (parent_irq != -1) {
@@ -169,20 +176,19 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
 						 f);
 	}
 
-	/* This will also allocate irq descriptors */
-	f->domain = irq_domain_add_simple(node, fls(valid), irq_start,
+	f->domain = irq_domain_add_linear(node, fls(valid),
 					  &fpga_irqdomain_ops, f);
 
 	/* This will allocate all valid descriptors in the linear case */
 	for (i = 0; i < fls(valid); i++)
 		if (valid & BIT(i)) {
-			if (!irq_start)
-				irq_create_mapping(f->domain, i);
+			/* Is this still required? */
+			irq_create_mapping(f->domain, i);
 			f->used_irqs++;
 		}
 
 	pr_info("FPGA IRQ chip %d \"%s\" @ %p, %u irqs",
-		fpga_irq_id, name, base, f->used_irqs);
+		fpga_irq_id, node->name, base, f->used_irqs);
 	if (parent_irq != -1)
 		pr_cont(", parent IRQ: %d\n", parent_irq);
 	else
@@ -192,8 +198,8 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
 }
 
 #ifdef CONFIG_OF
-int __init fpga_irq_of_init(struct device_node *node,
-			    struct device_node *parent)
+static int __init fpga_irq_of_init(struct device_node *node,
+				   struct device_node *parent)
 {
 	void __iomem *base;
 	u32 clear_mask;
@@ -222,7 +228,7 @@ int __init fpga_irq_of_init(struct device_node *node,
 		parent_irq = -1;
 	}
 
-	fpga_irq_init(base, node->name, 0, parent_irq, valid_mask, node);
+	fpga_irq_init(base, parent_irq, valid_mask, node);
 
 	/*
 	 * On Versatile AB/PB, some secondary interrupts have a direct
diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c
index 356a59755d63..238d3d344949 100644
--- a/drivers/irqchip/irq-xilinx-intc.c
+++ b/drivers/irqchip/irq-xilinx-intc.c
@@ -32,6 +32,8 @@
 #define MER_ME (1<<0)
 #define MER_HIE (1<<1)
 
+#define SPURIOUS_IRQ	(-1U)
+
 static DEFINE_STATIC_KEY_FALSE(xintc_is_be);
 
 struct xintc_irq_chip {
@@ -110,20 +112,6 @@ static struct irq_chip intc_dev = {
 	.irq_mask_ack = intc_mask_ack,
 };
 
-unsigned int xintc_get_irq(void)
-{
-	unsigned int irq = -1;
-	u32 hwirq;
-
-	hwirq = xintc_read(primary_intc, IVR);
-	if (hwirq != -1U)
-		irq = irq_find_mapping(primary_intc->root_domain, hwirq);
-
-	pr_debug("irq-xilinx: hwirq=%d, irq=%d\n", hwirq, irq);
-
-	return irq;
-}
-
 static int xintc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
 {
 	struct xintc_irq_chip *irqc = d->host_data;
@@ -164,6 +152,19 @@ static void xil_intc_irq_handler(struct irq_desc *desc)
 	chained_irq_exit(chip, desc);
 }
 
+static void xil_intc_handle_irq(struct pt_regs *regs)
+{
+	u32 hwirq;
+
+	do {
+		hwirq = xintc_read(primary_intc, IVR);
+		if (unlikely(hwirq == SPURIOUS_IRQ))
+			break;
+
+		generic_handle_domain_irq(primary_intc->root_domain, hwirq);
+	} while (true);
+}
+
 static int __init xilinx_intc_of_init(struct device_node *intc,
 					     struct device_node *parent)
 {
@@ -233,6 +234,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc,
 	} else {
 		primary_intc = irqc;
 		irq_set_default_host(primary_intc->root_domain);
+		set_handle_irq(xil_intc_handle_irq);
 	}
 
 	return 0;
diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
index 173e6520e06e..d96916cf6a41 100644
--- a/drivers/irqchip/qcom-pdc.c
+++ b/drivers/irqchip/qcom-pdc.c
@@ -21,23 +21,19 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
-#define PDC_MAX_IRQS		168
 #define PDC_MAX_GPIO_IRQS	256
 
-#define CLEAR_INTR(reg, intr)	(reg & ~(1 << intr))
-#define ENABLE_INTR(reg, intr)	(reg | (1 << intr))
-
 #define IRQ_ENABLE_BANK		0x10
 #define IRQ_i_CFG		0x110
 
-#define PDC_NO_PARENT_IRQ	~0UL
-
 struct pdc_pin_region {
 	u32 pin_base;
 	u32 parent_base;
 	u32 cnt;
 };
 
+#define pin_to_hwirq(r, p)	((r)->parent_base + (p) - (r)->pin_base)
+
 static DEFINE_RAW_SPINLOCK(pdc_lock);
 static void __iomem *pdc_base;
 static struct pdc_pin_region *pdc_region;
@@ -56,17 +52,18 @@ static u32 pdc_reg_read(int reg, u32 i)
 static void pdc_enable_intr(struct irq_data *d, bool on)
 {
 	int pin_out = d->hwirq;
+	unsigned long enable;
+	unsigned long flags;
 	u32 index, mask;
-	u32 enable;
 
 	index = pin_out / 32;
 	mask = pin_out % 32;
 
-	raw_spin_lock(&pdc_lock);
+	raw_spin_lock_irqsave(&pdc_lock, flags);
 	enable = pdc_reg_read(IRQ_ENABLE_BANK, index);
-	enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask);
+	__assign_bit(mask, &enable, on);
 	pdc_reg_write(IRQ_ENABLE_BANK, index, enable);
-	raw_spin_unlock(&pdc_lock);
+	raw_spin_unlock_irqrestore(&pdc_lock, flags);
 }
 
 static void qcom_pdc_gic_disable(struct irq_data *d)
@@ -186,34 +183,17 @@ static struct irq_chip qcom_pdc_gic_chip = {
 	.irq_set_affinity	= irq_chip_set_affinity_parent,
 };
 
-static irq_hw_number_t get_parent_hwirq(int pin)
+static struct pdc_pin_region *get_pin_region(int pin)
 {
 	int i;
-	struct pdc_pin_region *region;
 
 	for (i = 0; i < pdc_region_cnt; i++) {
-		region = &pdc_region[i];
-		if (pin >= region->pin_base &&
-		    pin < region->pin_base + region->cnt)
-			return (region->parent_base + pin - region->pin_base);
+		if (pin >= pdc_region[i].pin_base &&
+		    pin < pdc_region[i].pin_base + pdc_region[i].cnt)
+			return &pdc_region[i];
 	}
 
-	return PDC_NO_PARENT_IRQ;
-}
-
-static int qcom_pdc_translate(struct irq_domain *d, struct irq_fwspec *fwspec,
-			      unsigned long *hwirq, unsigned int *type)
-{
-	if (is_of_node(fwspec->fwnode)) {
-		if (fwspec->param_count != 2)
-			return -EINVAL;
-
-		*hwirq = fwspec->param[0];
-		*type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
-		return 0;
-	}
-
-	return -EINVAL;
+	return NULL;
 }
 
 static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq,
@@ -221,55 +201,12 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq,
 {
 	struct irq_fwspec *fwspec = data;
 	struct irq_fwspec parent_fwspec;
-	irq_hw_number_t hwirq, parent_hwirq;
-	unsigned int type;
-	int ret;
-
-	ret = qcom_pdc_translate(domain, fwspec, &hwirq, &type);
-	if (ret)
-		return ret;
-
-	ret  = irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
-					     &qcom_pdc_gic_chip, NULL);
-	if (ret)
-		return ret;
-
-	parent_hwirq = get_parent_hwirq(hwirq);
-	if (parent_hwirq == PDC_NO_PARENT_IRQ)
-		return irq_domain_disconnect_hierarchy(domain->parent, virq);
-
-	if (type & IRQ_TYPE_EDGE_BOTH)
-		type = IRQ_TYPE_EDGE_RISING;
-
-	if (type & IRQ_TYPE_LEVEL_MASK)
-		type = IRQ_TYPE_LEVEL_HIGH;
-
-	parent_fwspec.fwnode      = domain->parent->fwnode;
-	parent_fwspec.param_count = 3;
-	parent_fwspec.param[0]    = 0;
-	parent_fwspec.param[1]    = parent_hwirq;
-	parent_fwspec.param[2]    = type;
-
-	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs,
-					    &parent_fwspec);
-}
-
-static const struct irq_domain_ops qcom_pdc_ops = {
-	.translate	= qcom_pdc_translate,
-	.alloc		= qcom_pdc_alloc,
-	.free		= irq_domain_free_irqs_common,
-};
-
-static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq,
-			       unsigned int nr_irqs, void *data)
-{
-	struct irq_fwspec *fwspec = data;
-	struct irq_fwspec parent_fwspec;
-	irq_hw_number_t hwirq, parent_hwirq;
+	struct pdc_pin_region *region;
+	irq_hw_number_t hwirq;
 	unsigned int type;
 	int ret;
 
-	ret = qcom_pdc_translate(domain, fwspec, &hwirq, &type);
+	ret = irq_domain_translate_twocell(domain, fwspec, &hwirq, &type);
 	if (ret)
 		return ret;
 
@@ -281,8 +218,8 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq,
 	if (ret)
 		return ret;
 
-	parent_hwirq = get_parent_hwirq(hwirq);
-	if (parent_hwirq == PDC_NO_PARENT_IRQ)
+	region = get_pin_region(hwirq);
+	if (!region)
 		return irq_domain_disconnect_hierarchy(domain->parent, virq);
 
 	if (type & IRQ_TYPE_EDGE_BOTH)
@@ -294,23 +231,16 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq,
 	parent_fwspec.fwnode      = domain->parent->fwnode;
 	parent_fwspec.param_count = 3;
 	parent_fwspec.param[0]    = 0;
-	parent_fwspec.param[1]    = parent_hwirq;
+	parent_fwspec.param[1]    = pin_to_hwirq(region, hwirq);
 	parent_fwspec.param[2]    = type;
 
 	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs,
 					    &parent_fwspec);
 }
 
-static int qcom_pdc_gpio_domain_select(struct irq_domain *d,
-				       struct irq_fwspec *fwspec,
-				       enum irq_domain_bus_token bus_token)
-{
-	return bus_token == DOMAIN_BUS_WAKEUP;
-}
-
-static const struct irq_domain_ops qcom_pdc_gpio_ops = {
-	.select		= qcom_pdc_gpio_domain_select,
-	.alloc		= qcom_pdc_gpio_alloc,
+static const struct irq_domain_ops qcom_pdc_ops = {
+	.translate	= irq_domain_translate_twocell,
+	.alloc		= qcom_pdc_alloc,
 	.free		= irq_domain_free_irqs_common,
 };
 
@@ -361,7 +291,7 @@ static int pdc_setup_pin_mapping(struct device_node *np)
 
 static int qcom_pdc_init(struct device_node *node, struct device_node *parent)
 {
-	struct irq_domain *parent_domain, *pdc_domain, *pdc_gpio_domain;
+	struct irq_domain *parent_domain, *pdc_domain;
 	int ret;
 
 	pdc_base = of_iomap(node, 0);
@@ -383,32 +313,21 @@ static int qcom_pdc_init(struct device_node *node, struct device_node *parent)
 		goto fail;
 	}
 
-	pdc_domain = irq_domain_create_hierarchy(parent_domain, 0, PDC_MAX_IRQS,
-						 of_fwnode_handle(node),
-						 &qcom_pdc_ops, NULL);
-	if (!pdc_domain) {
-		pr_err("%pOF: GIC domain add failed\n", node);
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	pdc_gpio_domain = irq_domain_create_hierarchy(parent_domain,
+	pdc_domain = irq_domain_create_hierarchy(parent_domain,
 					IRQ_DOMAIN_FLAG_QCOM_PDC_WAKEUP,
 					PDC_MAX_GPIO_IRQS,
 					of_fwnode_handle(node),
-					&qcom_pdc_gpio_ops, NULL);
-	if (!pdc_gpio_domain) {
-		pr_err("%pOF: PDC domain add failed for GPIO domain\n", node);
+					&qcom_pdc_ops, NULL);
+	if (!pdc_domain) {
+		pr_err("%pOF: PDC domain add failed\n", node);
 		ret = -ENOMEM;
-		goto remove;
+		goto fail;
 	}
 
-	irq_domain_update_bus_token(pdc_gpio_domain, DOMAIN_BUS_WAKEUP);
+	irq_domain_update_bus_token(pdc_domain, DOMAIN_BUS_WAKEUP);
 
 	return 0;
 
-remove:
-	irq_domain_remove(pdc_domain);
 fail:
 	kfree(pdc_region);
 	iounmap(pdc_base);
diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
index bd087cca1c1d..af17459c1a5c 100644
--- a/drivers/isdn/hardware/mISDN/hfcpci.c
+++ b/drivers/isdn/hardware/mISDN/hfcpci.c
@@ -2005,7 +2005,11 @@ setup_hw(struct hfc_pci *hc)
 	}
 	/* Allocate memory for FIFOS */
 	/* the memory needs to be on a 32k boundary within the first 4G */
-	dma_set_mask(&hc->pdev->dev, 0xFFFF8000);
+	if (dma_set_mask(&hc->pdev->dev, 0xFFFF8000)) {
+		printk(KERN_WARNING
+		       "HFC-PCI: No usable DMA configuration!\n");
+		return -EIO;
+	}
 	buffer = dma_alloc_coherent(&hc->pdev->dev, 0x8000, &hc->hw.dmahandle,
 				    GFP_KERNEL);
 	/* We silently assume the address is okay if nonzero */
diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c
index e11ca6bbc7f4..c3b2c99b5cd5 100644
--- a/drivers/isdn/mISDN/dsp_pipeline.c
+++ b/drivers/isdn/mISDN/dsp_pipeline.c
@@ -192,7 +192,7 @@ void dsp_pipeline_destroy(struct dsp_pipeline *pipeline)
 int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg)
 {
 	int found = 0;
-	char *dup, *tok, *name, *args;
+	char *dup, *next, *tok, *name, *args;
 	struct dsp_element_entry *entry, *n;
 	struct dsp_pipeline_entry *pipeline_entry;
 	struct mISDN_dsp_element *elem;
@@ -203,10 +203,10 @@ int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg)
 	if (!list_empty(&pipeline->list))
 		_dsp_pipeline_destroy(pipeline);
 
-	dup = kstrdup(cfg, GFP_ATOMIC);
+	dup = next = kstrdup(cfg, GFP_ATOMIC);
 	if (!dup)
 		return 0;
-	while ((tok = strsep(&dup, "|"))) {
+	while ((tok = strsep(&next, "|"))) {
 		if (!strlen(tok))
 			continue;
 		name = strsep(&tok, "(");
diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index 70fa18b04ad2..b14d3f98e1eb 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -193,13 +193,11 @@ static void pcap_isr_work(struct work_struct *work)
 		ezx_pcap_write(pcap, PCAP_REG_MSR, isr | msr);
 		ezx_pcap_write(pcap, PCAP_REG_ISR, isr);
 
-		local_irq_disable();
 		service = isr & ~msr;
 		for (irq = pcap->irq_base; service; service >>= 1, irq++) {
 			if (service & 1)
-				generic_handle_irq(irq);
+				generic_handle_irq_safe(irq);
 		}
-		local_irq_enable();
 		ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr);
 	} while (gpio_get_value(pdata->gpio));
 }
diff --git a/drivers/misc/hi6421v600-irq.c b/drivers/misc/hi6421v600-irq.c
index 1c763796cf1f..caa3de37698b 100644
--- a/drivers/misc/hi6421v600-irq.c
+++ b/drivers/misc/hi6421v600-irq.c
@@ -117,8 +117,8 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
 			 * If both powerkey down and up IRQs are received,
 			 * handle them at the right order
 			 */
-			generic_handle_irq(priv->irqs[POWERKEY_DOWN]);
-			generic_handle_irq(priv->irqs[POWERKEY_UP]);
+			generic_handle_irq_safe(priv->irqs[POWERKEY_DOWN]);
+			generic_handle_irq_safe(priv->irqs[POWERKEY_UP]);
 			pending &= ~HISI_IRQ_POWERKEY_UP_DOWN;
 		}
 
@@ -126,7 +126,7 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
 			continue;
 
 		for_each_set_bit(offset, &pending, BITS_PER_BYTE) {
-			generic_handle_irq(priv->irqs[offset + i * BITS_PER_BYTE]);
+			generic_handle_irq_safe(priv->irqs[offset + i * BITS_PER_BYTE]);
 		}
 	}
 
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 8d718aa56d33..4e67c1403cc9 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1908,7 +1908,7 @@ static int mmc_blk_card_busy(struct mmc_card *card, struct request *req)
 
 	cb_data.card = card;
 	cb_data.status = 0;
-	err = __mmc_poll_for_busy(card->host, MMC_BLK_TIMEOUT_MS,
+	err = __mmc_poll_for_busy(card->host, 0, MMC_BLK_TIMEOUT_MS,
 				  &mmc_blk_busy_cb, &cb_data);
 
 	/*
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index bbbbcaf70a59..8421519c2a98 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1962,7 +1962,7 @@ static int mmc_sleep(struct mmc_host *host)
 		goto out_release;
 	}
 
-	err = __mmc_poll_for_busy(host, timeout_ms, &mmc_sleep_busy_cb, host);
+	err = __mmc_poll_for_busy(host, 0, timeout_ms, &mmc_sleep_busy_cb, host);
 
 out_release:
 	mmc_retune_release(host);
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index d63d1c735335..180d7e9d3400 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -21,6 +21,8 @@
 
 #define MMC_BKOPS_TIMEOUT_MS		(120 * 1000) /* 120s */
 #define MMC_SANITIZE_TIMEOUT_MS		(240 * 1000) /* 240s */
+#define MMC_OP_COND_PERIOD_US		(1 * 1000) /* 1ms */
+#define MMC_OP_COND_TIMEOUT_MS		1000 /* 1s */
 
 static const u8 tuning_blk_pattern_4bit[] = {
 	0xff, 0x0f, 0xff, 0x00, 0xff, 0xcc, 0xc3, 0xcc,
@@ -232,7 +234,9 @@ int mmc_send_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr)
 	cmd.arg = mmc_host_is_spi(host) ? 0 : ocr;
 	cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R3 | MMC_CMD_BCR;
 
-	err = __mmc_poll_for_busy(host, 1000, &__mmc_send_op_cond_cb, &cb_data);
+	err = __mmc_poll_for_busy(host, MMC_OP_COND_PERIOD_US,
+				  MMC_OP_COND_TIMEOUT_MS,
+				  &__mmc_send_op_cond_cb, &cb_data);
 	if (err)
 		return err;
 
@@ -495,13 +499,14 @@ static int mmc_busy_cb(void *cb_data, bool *busy)
 	return 0;
 }
 
-int __mmc_poll_for_busy(struct mmc_host *host, unsigned int timeout_ms,
+int __mmc_poll_for_busy(struct mmc_host *host, unsigned int period_us,
+			unsigned int timeout_ms,
 			int (*busy_cb)(void *cb_data, bool *busy),
 			void *cb_data)
 {
 	int err;
 	unsigned long timeout;
-	unsigned int udelay = 32, udelay_max = 32768;
+	unsigned int udelay = period_us ? period_us : 32, udelay_max = 32768;
 	bool expired = false;
 	bool busy = false;
 
@@ -546,7 +551,7 @@ int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
 	cb_data.retry_crc_err = retry_crc_err;
 	cb_data.busy_cmd = busy_cmd;
 
-	return __mmc_poll_for_busy(host, timeout_ms, &mmc_busy_cb, &cb_data);
+	return __mmc_poll_for_busy(host, 0, timeout_ms, &mmc_busy_cb, &cb_data);
 }
 EXPORT_SYMBOL_GPL(mmc_poll_for_busy);
 
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 9c813b851d0b..09ffbc00908b 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -41,7 +41,8 @@ int mmc_can_ext_csd(struct mmc_card *card);
 int mmc_switch_status(struct mmc_card *card, bool crc_err_fatal);
 bool mmc_prepare_busy_cmd(struct mmc_host *host, struct mmc_command *cmd,
 			  unsigned int timeout_ms);
-int __mmc_poll_for_busy(struct mmc_host *host, unsigned int timeout_ms,
+int __mmc_poll_for_busy(struct mmc_host *host, unsigned int period_us,
+			unsigned int timeout_ms,
 			int (*busy_cb)(void *cb_data, bool *busy),
 			void *cb_data);
 int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index bd87012c220c..bfbfed30dc4d 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -1672,7 +1672,7 @@ static int sd_poweroff_notify(struct mmc_card *card)
 
 	cb_data.card = card;
 	cb_data.reg_buf = reg_buf;
-	err = __mmc_poll_for_busy(card->host, SD_POWEROFF_NOTIFY_TIMEOUT_MS,
+	err = __mmc_poll_for_busy(card->host, 0, SD_POWEROFF_NOTIFY_TIMEOUT_MS,
 				  &sd_busy_poweroff_notify_cb, &cb_data);
 
 out:
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 8f36536cb1b6..58ab9d90bc8b 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -173,6 +173,8 @@ struct meson_host {
 	int irq;
 
 	bool vqmmc_enabled;
+	bool needs_pre_post_req;
+
 };
 
 #define CMD_CFG_LENGTH_MASK GENMASK(8, 0)
@@ -663,6 +665,8 @@ static void meson_mmc_request_done(struct mmc_host *mmc,
 	struct meson_host *host = mmc_priv(mmc);
 
 	host->cmd = NULL;
+	if (host->needs_pre_post_req)
+		meson_mmc_post_req(mmc, mrq, 0);
 	mmc_request_done(host->mmc, mrq);
 }
 
@@ -880,7 +884,7 @@ static int meson_mmc_validate_dram_access(struct mmc_host *mmc, struct mmc_data
 static void meson_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 {
 	struct meson_host *host = mmc_priv(mmc);
-	bool needs_pre_post_req = mrq->data &&
+	host->needs_pre_post_req = mrq->data &&
 			!(mrq->data->host_cookie & SD_EMMC_PRE_REQ_DONE);
 
 	/*
@@ -896,22 +900,19 @@ static void meson_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 		}
 	}
 
-	if (needs_pre_post_req) {
+	if (host->needs_pre_post_req) {
 		meson_mmc_get_transfer_mode(mmc, mrq);
 		if (!meson_mmc_desc_chain_mode(mrq->data))
-			needs_pre_post_req = false;
+			host->needs_pre_post_req = false;
 	}
 
-	if (needs_pre_post_req)
+	if (host->needs_pre_post_req)
 		meson_mmc_pre_req(mmc, mrq);
 
 	/* Stop execution */
 	writel(0, host->regs + SD_EMMC_START);
 
 	meson_mmc_start_cmd(mmc, mrq->sbc ?: mrq->cmd);
-
-	if (needs_pre_post_req)
-		meson_mmc_post_req(mmc, mrq, 0);
 }
 
 static void meson_mmc_read_resp(struct mmc_host *mmc, struct mmc_command *cmd)
diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig
index d986ab4e4c35..820e5dc3bc9b 100644
--- a/drivers/mtd/nand/raw/Kconfig
+++ b/drivers/mtd/nand/raw/Kconfig
@@ -42,8 +42,7 @@ config MTD_NAND_OMAP2
 	tristate "OMAP2, OMAP3, OMAP4 and Keystone NAND controller"
 	depends on ARCH_OMAP2PLUS || ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST
 	depends on HAS_IOMEM
-	select MEMORY
-	select OMAP_GPMC
+	depends on OMAP_GPMC
 	help
 	  Support for NAND flash on Texas Instruments OMAP2, OMAP3, OMAP4
 	  and Keystone platforms.
diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c
index 866767b70d65..b0a7dee27ffc 100644
--- a/drivers/net/dsa/microchip/ksz8795_spi.c
+++ b/drivers/net/dsa/microchip/ksz8795_spi.c
@@ -124,12 +124,23 @@ static const struct of_device_id ksz8795_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, ksz8795_dt_ids);
 
+static const struct spi_device_id ksz8795_spi_ids[] = {
+	{ "ksz8765" },
+	{ "ksz8794" },
+	{ "ksz8795" },
+	{ "ksz8863" },
+	{ "ksz8873" },
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, ksz8795_spi_ids);
+
 static struct spi_driver ksz8795_spi_driver = {
 	.driver = {
 		.name	= "ksz8795-switch",
 		.owner	= THIS_MODULE,
 		.of_match_table = of_match_ptr(ksz8795_dt_ids),
 	},
+	.id_table = ksz8795_spi_ids,
 	.probe	= ksz8795_spi_probe,
 	.remove	= ksz8795_spi_remove,
 	.shutdown = ksz8795_spi_shutdown,
diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c
index e3cb0e6c9f6f..43addeabfc25 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -98,12 +98,24 @@ static const struct of_device_id ksz9477_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, ksz9477_dt_ids);
 
+static const struct spi_device_id ksz9477_spi_ids[] = {
+	{ "ksz9477" },
+	{ "ksz9897" },
+	{ "ksz9893" },
+	{ "ksz9563" },
+	{ "ksz8563" },
+	{ "ksz9567" },
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, ksz9477_spi_ids);
+
 static struct spi_driver ksz9477_spi_driver = {
 	.driver = {
 		.name	= "ksz9477-switch",
 		.owner	= THIS_MODULE,
 		.of_match_table = of_match_ptr(ksz9477_dt_ids),
 	},
+	.id_table = ksz9477_spi_ids,
 	.probe	= ksz9477_spi_probe,
 	.remove	= ksz9477_spi_remove,
 	.shutdown = ksz9477_spi_shutdown,
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index ff3c267d0f26..a251bc55727f 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -2936,7 +2936,7 @@ mt753x_phylink_validate(struct dsa_switch *ds, int port,
 
 	phylink_set_port_modes(mask);
 
-	if (state->interface != PHY_INTERFACE_MODE_TRGMII ||
+	if (state->interface != PHY_INTERFACE_MODE_TRGMII &&
 	    !phy_interface_mode_is_8023z(state->interface)) {
 		phylink_set(mask, 10baseT_Half);
 		phylink_set(mask, 10baseT_Full);
diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c
index e320cccba61a..90cd7bdf06f5 100644
--- a/drivers/net/ethernet/8390/mcf8390.c
+++ b/drivers/net/ethernet/8390/mcf8390.c
@@ -405,12 +405,12 @@ static int mcf8390_init(struct net_device *dev)
 static int mcf8390_probe(struct platform_device *pdev)
 {
 	struct net_device *dev;
-	struct resource *mem, *irq;
+	struct resource *mem;
 	resource_size_t msize;
-	int ret;
+	int ret, irq;
 
-	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (irq == NULL) {
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
 		dev_err(&pdev->dev, "no IRQ specified?\n");
 		return -ENXIO;
 	}
@@ -433,7 +433,7 @@ static int mcf8390_probe(struct platform_device *pdev)
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	platform_set_drvdata(pdev, dev);
 
-	dev->irq = irq->start;
+	dev->irq = irq;
 	dev->base_addr = mem->start;
 
 	ret = mcf8390_init(dev);
diff --git a/drivers/net/ethernet/arc/emac_mdio.c b/drivers/net/ethernet/arc/emac_mdio.c
index 9acf589b1178..87f40c2ba904 100644
--- a/drivers/net/ethernet/arc/emac_mdio.c
+++ b/drivers/net/ethernet/arc/emac_mdio.c
@@ -132,6 +132,7 @@ int arc_mdio_probe(struct arc_emac_priv *priv)
 {
 	struct arc_emac_mdio_bus_data *data = &priv->bus_data;
 	struct device_node *np = priv->dev->of_node;
+	const char *name = "Synopsys MII Bus";
 	struct mii_bus *bus;
 	int error;
 
@@ -142,7 +143,7 @@ int arc_mdio_probe(struct arc_emac_priv *priv)
 	priv->bus = bus;
 	bus->priv = priv;
 	bus->parent = priv->dev;
-	bus->name = "Synopsys MII Bus";
+	bus->name = name;
 	bus->read = &arc_mdio_read;
 	bus->write = &arc_mdio_write;
 	bus->reset = &arc_mdio_reset;
@@ -167,7 +168,7 @@ int arc_mdio_probe(struct arc_emac_priv *priv)
 	if (error) {
 		mdiobus_free(bus);
 		return dev_err_probe(priv->dev, error,
-				     "cannot register MDIO bus %s\n", bus->name);
+				     "cannot register MDIO bus %s\n", name);
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 4ad3fc72e74e..a89b93cb4e26 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1181,8 +1181,11 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 	alx->hw.mtu = mtu;
 	alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE);
 	netdev_update_features(netdev);
-	if (netif_running(netdev))
+	if (netif_running(netdev)) {
+		mutex_lock(&alx->mtx);
 		alx_reinit(alx);
+		mutex_unlock(&alx->mtx);
+	}
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index a19dd6797070..2209d99b3404 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -2533,6 +2533,4 @@ void bnx2x_register_phc(struct bnx2x *bp);
  * Meant for implicit re-load flows.
  */
 int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp);
-int bnx2x_init_firmware(struct bnx2x *bp);
-void bnx2x_release_firmware(struct bnx2x *bp);
 #endif /* bnx2x.h */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 8d36ebbf08e1..5729a5ab059d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2364,24 +2364,30 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err)
 	/* is another pf loaded on this engine? */
 	if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
 	    load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
-		/* build my FW version dword */
-		u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) +
-				(bp->fw_rev << 16) + (bp->fw_eng << 24);
+		u8 loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng;
+		u32 loaded_fw;
 
 		/* read loaded FW from chip */
-		u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
+		loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
 
-		DP(BNX2X_MSG_SP, "loaded fw %x, my fw %x\n",
-		   loaded_fw, my_fw);
+		loaded_fw_major = loaded_fw & 0xff;
+		loaded_fw_minor = (loaded_fw >> 8) & 0xff;
+		loaded_fw_rev = (loaded_fw >> 16) & 0xff;
+		loaded_fw_eng = (loaded_fw >> 24) & 0xff;
+
+		DP(BNX2X_MSG_SP, "loaded fw 0x%x major 0x%x minor 0x%x rev 0x%x eng 0x%x\n",
+		   loaded_fw, loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng);
 
 		/* abort nic load if version mismatch */
-		if (my_fw != loaded_fw) {
+		if (loaded_fw_major != BCM_5710_FW_MAJOR_VERSION ||
+		    loaded_fw_minor != BCM_5710_FW_MINOR_VERSION ||
+		    loaded_fw_eng != BCM_5710_FW_ENGINEERING_VERSION ||
+		    loaded_fw_rev < BCM_5710_FW_REVISION_VERSION_V15) {
 			if (print_err)
-				BNX2X_ERR("bnx2x with FW %x was already loaded which mismatches my %x FW. Aborting\n",
-					  loaded_fw, my_fw);
+				BNX2X_ERR("loaded FW incompatible. Aborting\n");
 			else
-				BNX2X_DEV_INFO("bnx2x with FW %x was already loaded which mismatches my %x FW, possibly due to MF UNDI\n",
-					       loaded_fw, my_fw);
+				BNX2X_DEV_INFO("loaded FW incompatible, possibly due to MF UNDI\n");
+
 			return -EBUSY;
 		}
 	}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index eedb48d945ed..c19b072f3a23 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12319,15 +12319,6 @@ static int bnx2x_init_bp(struct bnx2x *bp)
 
 	bnx2x_read_fwinfo(bp);
 
-	if (IS_PF(bp)) {
-		rc = bnx2x_init_firmware(bp);
-
-		if (rc) {
-			bnx2x_free_mem_bp(bp);
-			return rc;
-		}
-	}
-
 	func = BP_FUNC(bp);
 
 	/* need to reset chip if undi was active */
@@ -12340,7 +12331,6 @@ static int bnx2x_init_bp(struct bnx2x *bp)
 
 		rc = bnx2x_prev_unload(bp);
 		if (rc) {
-			bnx2x_release_firmware(bp);
 			bnx2x_free_mem_bp(bp);
 			return rc;
 		}
@@ -13409,7 +13399,7 @@ do {									\
 	     (u8 *)bp->arr, len);					\
 } while (0)
 
-int bnx2x_init_firmware(struct bnx2x *bp)
+static int bnx2x_init_firmware(struct bnx2x *bp)
 {
 	const char *fw_file_name, *fw_file_name_v15;
 	struct bnx2x_fw_file_hdr *fw_hdr;
@@ -13509,7 +13499,7 @@ request_firmware_exit:
 	return rc;
 }
 
-void bnx2x_release_firmware(struct bnx2x *bp)
+static void bnx2x_release_firmware(struct bnx2x *bp)
 {
 	kfree(bp->init_ops_offsets);
 	kfree(bp->init_ops);
@@ -14026,7 +14016,6 @@ static int bnx2x_init_one(struct pci_dev *pdev,
 	return 0;
 
 init_one_freemem:
-	bnx2x_release_firmware(bp);
 	bnx2x_free_mem_bp(bp);
 
 init_one_exit:
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 87f1056e29ff..2da804f84b48 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2287,8 +2287,10 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
 		dma_length_status = status->length_status;
 		if (dev->features & NETIF_F_RXCSUM) {
 			rx_csum = (__force __be16)(status->rx_csum & 0xffff);
-			skb->csum = (__force __wsum)ntohs(rx_csum);
-			skb->ip_summed = CHECKSUM_COMPLETE;
+			if (rx_csum) {
+				skb->csum = (__force __wsum)ntohs(rx_csum);
+				skb->ip_summed = CHECKSUM_COMPLETE;
+			}
 		}
 
 		/* DMA flags and length are still valid no matter how
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
index e31a5a397f11..f55d9d9c01a8 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
@@ -40,6 +40,13 @@
 void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
+	struct device *kdev = &priv->pdev->dev;
+
+	if (!device_can_wakeup(kdev)) {
+		wol->supported = 0;
+		wol->wolopts = 0;
+		return;
+	}
 
 	wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER;
 	wol->wolopts = priv->wolopts;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 98498a76ae16..d13f06cf0308 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -1573,7 +1573,14 @@ static int macb_poll(struct napi_struct *napi, int budget)
 	if (work_done < budget) {
 		napi_complete_done(napi, work_done);
 
-		/* Packets received while interrupts were disabled */
+		/* RSR bits only seem to propagate to raise interrupts when
+		 * interrupts are enabled at the time, so if bits are already
+		 * set due to packets received while interrupts were disabled,
+		 * they will not cause another interrupt to be generated when
+		 * interrupts are re-enabled.
+		 * Check for this case here. This has been seen to happen
+		 * around 30% of the time under heavy network load.
+		 */
 		status = macb_readl(bp, RSR);
 		if (status) {
 			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
@@ -1581,6 +1588,22 @@ static int macb_poll(struct napi_struct *napi, int budget)
 			napi_reschedule(napi);
 		} else {
 			queue_writel(queue, IER, bp->rx_intr_mask);
+
+			/* In rare cases, packets could have been received in
+			 * the window between the check above and re-enabling
+			 * interrupts. Therefore, a double-check is required
+			 * to avoid losing a wakeup. This can potentially race
+			 * with the interrupt handler doing the same actions
+			 * if an interrupt is raised just after enabling them,
+			 * but this should be harmless.
+			 */
+			status = macb_readl(bp, RSR);
+			if (unlikely(status)) {
+				queue_writel(queue, IDR, bp->rx_intr_mask);
+				if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
+					queue_writel(queue, ISR, MACB_BIT(RCOMP));
+				napi_schedule(napi);
+			}
 		}
 	}
 
diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index ff756265d58f..9a2c16d69e2c 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -1464,6 +1464,7 @@ static int gfar_get_ts_info(struct net_device *dev,
 	ptp_node = of_find_compatible_node(NULL, NULL, "fsl,etsec-ptp");
 	if (ptp_node) {
 		ptp_dev = of_find_device_by_node(ptp_node);
+		of_node_put(ptp_node);
 		if (ptp_dev)
 			ptp = platform_get_drvdata(ptp_dev);
 	}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 1e57cc8c47d7..9db5001297c7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -742,10 +742,8 @@ static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id)
 		vsi = pf->vsi[vf->lan_vsi_idx];
 		dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n",
 			 vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs);
-		dev_info(&pf->pdev->dev, "       num MDD=%lld, invalid msg=%lld, valid msg=%lld\n",
-			 vf->num_mdd_events,
-			 vf->num_invalid_msgs,
-			 vf->num_valid_msgs);
+		dev_info(&pf->pdev->dev, "       num MDD=%lld\n",
+			 vf->num_mdd_events);
 	} else {
 		dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id);
 	}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index dfdb6e786461..2606e8f0f19b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1917,19 +1917,17 @@ sriov_configure_out:
 /***********************virtual channel routines******************/
 
 /**
- * i40e_vc_send_msg_to_vf_ex
+ * i40e_vc_send_msg_to_vf
  * @vf: pointer to the VF info
  * @v_opcode: virtual channel opcode
  * @v_retval: virtual channel return value
  * @msg: pointer to the msg buffer
  * @msglen: msg length
- * @is_quiet: true for not printing unsuccessful return values, false otherwise
  *
  * send msg to VF
  **/
-static int i40e_vc_send_msg_to_vf_ex(struct i40e_vf *vf, u32 v_opcode,
-				     u32 v_retval, u8 *msg, u16 msglen,
-				     bool is_quiet)
+static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode,
+				  u32 v_retval, u8 *msg, u16 msglen)
 {
 	struct i40e_pf *pf;
 	struct i40e_hw *hw;
@@ -1944,25 +1942,6 @@ static int i40e_vc_send_msg_to_vf_ex(struct i40e_vf *vf, u32 v_opcode,
 	hw = &pf->hw;
 	abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
 
-	/* single place to detect unsuccessful return values */
-	if (v_retval && !is_quiet) {
-		vf->num_invalid_msgs++;
-		dev_info(&pf->pdev->dev, "VF %d failed opcode %d, retval: %d\n",
-			 vf->vf_id, v_opcode, v_retval);
-		if (vf->num_invalid_msgs >
-		    I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED) {
-			dev_err(&pf->pdev->dev,
-				"Number of invalid messages exceeded for VF %d\n",
-				vf->vf_id);
-			dev_err(&pf->pdev->dev, "Use PF Control I/F to enable the VF\n");
-			set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
-		}
-	} else {
-		vf->num_valid_msgs++;
-		/* reset the invalid counter, if a valid message is received. */
-		vf->num_invalid_msgs = 0;
-	}
-
 	aq_ret = i40e_aq_send_msg_to_vf(hw, abs_vf_id,	v_opcode, v_retval,
 					msg, msglen, NULL);
 	if (aq_ret) {
@@ -1976,23 +1955,6 @@ static int i40e_vc_send_msg_to_vf_ex(struct i40e_vf *vf, u32 v_opcode,
 }
 
 /**
- * i40e_vc_send_msg_to_vf
- * @vf: pointer to the VF info
- * @v_opcode: virtual channel opcode
- * @v_retval: virtual channel return value
- * @msg: pointer to the msg buffer
- * @msglen: msg length
- *
- * send msg to VF
- **/
-static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode,
-				  u32 v_retval, u8 *msg, u16 msglen)
-{
-	return i40e_vc_send_msg_to_vf_ex(vf, v_opcode, v_retval,
-					 msg, msglen, false);
-}
-
-/**
  * i40e_vc_send_resp_to_vf
  * @vf: pointer to the VF info
  * @opcode: operation code
@@ -2822,7 +2784,6 @@ error_param:
  * i40e_check_vf_permission
  * @vf: pointer to the VF info
  * @al: MAC address list from virtchnl
- * @is_quiet: set true for printing msg without opcode info, false otherwise
  *
  * Check that the given list of MAC addresses is allowed. Will return -EPERM
  * if any address in the list is not valid. Checks the following conditions:
@@ -2837,8 +2798,7 @@ error_param:
  * addresses might not be accurate.
  **/
 static inline int i40e_check_vf_permission(struct i40e_vf *vf,
-					   struct virtchnl_ether_addr_list *al,
-					   bool *is_quiet)
+					   struct virtchnl_ether_addr_list *al)
 {
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
@@ -2846,7 +2806,6 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
 	int mac2add_cnt = 0;
 	int i;
 
-	*is_quiet = false;
 	for (i = 0; i < al->num_elements; i++) {
 		struct i40e_mac_filter *f;
 		u8 *addr = al->list[i].addr;
@@ -2870,7 +2829,6 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
 		    !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
 			dev_err(&pf->pdev->dev,
 				"VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
-			*is_quiet = true;
 			return -EPERM;
 		}
 
@@ -2921,7 +2879,6 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
 	    (struct virtchnl_ether_addr_list *)msg;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi = NULL;
-	bool is_quiet = false;
 	i40e_status ret = 0;
 	int i;
 
@@ -2938,7 +2895,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
 	 */
 	spin_lock_bh(&vsi->mac_filter_hash_lock);
 
-	ret = i40e_check_vf_permission(vf, al, &is_quiet);
+	ret = i40e_check_vf_permission(vf, al);
 	if (ret) {
 		spin_unlock_bh(&vsi->mac_filter_hash_lock);
 		goto error_param;
@@ -2976,8 +2933,8 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
 
 error_param:
 	/* send the response to the VF */
-	return i40e_vc_send_msg_to_vf_ex(vf, VIRTCHNL_OP_ADD_ETH_ADDR,
-				       ret, NULL, 0, is_quiet);
+	return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_ETH_ADDR,
+				      ret, NULL, 0);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 03c42fd0fea1..a554d0a0b09b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -10,8 +10,6 @@
 
 #define I40E_VIRTCHNL_SUPPORTED_QTYPES 2
 
-#define I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED	10
-
 #define I40E_VLAN_PRIORITY_SHIFT	13
 #define I40E_VLAN_MASK			0xFFF
 #define I40E_PRIORITY_MASK		0xE000
@@ -92,9 +90,6 @@ struct i40e_vf {
 	u8 num_queue_pairs;	/* num of qps assigned to VF vsis */
 	u8 num_req_queues;	/* num of requested qps */
 	u64 num_mdd_events;	/* num of mdd events detected */
-	/* num of continuous malformed or invalid msgs detected */
-	u64 num_invalid_msgs;
-	u64 num_valid_msgs;	/* num of valid msgs detected */
 
 	unsigned long vf_caps;	/* vf's adv. capabilities */
 	unsigned long vf_states;	/* vf's runtime states */
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 89423947ee65..4babe4705a55 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -288,6 +288,7 @@ struct iavf_adapter {
 #define IAVF_FLAG_REINIT_ITR_NEEDED		BIT(16)
 #define IAVF_FLAG_QUEUES_DISABLED		BIT(17)
 #define IAVF_FLAG_SETUP_NETDEV_FEATURES		BIT(18)
+#define IAVF_FLAG_REINIT_MSIX_NEEDED		BIT(20)
 /* duplicates for common code */
 #define IAVF_FLAG_DCB_ENABLED			0
 	/* flags for admin queue service task */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index dcf24264c7ea..0e178a0a59c5 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -2120,7 +2120,7 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter)
 			"Requested %d queues, but PF only gave us %d.\n",
 			num_req_queues,
 			adapter->vsi_res->num_queue_pairs);
-		adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
+		adapter->flags |= IAVF_FLAG_REINIT_MSIX_NEEDED;
 		adapter->num_req_queues = adapter->vsi_res->num_queue_pairs;
 		iavf_schedule_reset(adapter);
 
@@ -2541,6 +2541,13 @@ restart_watchdog:
 		queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2);
 }
 
+/**
+ * iavf_disable_vf - disable VF
+ * @adapter: board private structure
+ *
+ * Set communication failed flag and free all resources.
+ * NOTE: This function is expected to be called with crit_lock being held.
+ **/
 static void iavf_disable_vf(struct iavf_adapter *adapter)
 {
 	struct iavf_mac_filter *f, *ftmp;
@@ -2595,7 +2602,6 @@ static void iavf_disable_vf(struct iavf_adapter *adapter)
 	memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE);
 	iavf_shutdown_adminq(&adapter->hw);
 	adapter->netdev->flags &= ~IFF_UP;
-	mutex_unlock(&adapter->crit_lock);
 	adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
 	iavf_change_state(adapter, __IAVF_DOWN);
 	wake_up(&adapter->down_waitqueue);
@@ -2727,7 +2733,8 @@ continue_reset:
 			 err);
 	adapter->aq_required = 0;
 
-	if (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED) {
+	if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) ||
+	    (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED)) {
 		err = iavf_reinit_interrupt_scheme(adapter);
 		if (err)
 			goto reset_err;
@@ -2799,12 +2806,13 @@ continue_reset:
 		if (err)
 			goto reset_err;
 
-		if (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED) {
+		if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) ||
+		    (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED)) {
 			err = iavf_request_traffic_irqs(adapter, netdev->name);
 			if (err)
 				goto reset_err;
 
-			adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+			adapter->flags &= ~IAVF_FLAG_REINIT_MSIX_NEEDED;
 		}
 
 		iavf_configure(adapter);
@@ -2819,6 +2827,9 @@ continue_reset:
 		iavf_change_state(adapter, __IAVF_DOWN);
 		wake_up(&adapter->down_waitqueue);
 	}
+
+	adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+
 	mutex_unlock(&adapter->client_lock);
 	mutex_unlock(&adapter->crit_lock);
 
@@ -4609,6 +4620,13 @@ static void iavf_remove(struct pci_dev *pdev)
 	struct iavf_hw *hw = &adapter->hw;
 	int err;
 
+	/* When reboot/shutdown is in progress no need to do anything
+	 * as the adapter is already REMOVE state that was set during
+	 * iavf_shutdown() callback.
+	 */
+	if (adapter->state == __IAVF_REMOVE)
+		return;
+
 	set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section);
 	/* Wait until port initialization is complete.
 	 * There are flows where register/unregister netdev may race.
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 88844d68e150..5263cefe46f5 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -1835,6 +1835,22 @@ void iavf_request_reset(struct iavf_adapter *adapter)
 }
 
 /**
+ * iavf_netdev_features_vlan_strip_set - update vlan strip status
+ * @netdev: ptr to netdev being adjusted
+ * @enable: enable or disable vlan strip
+ *
+ * Helper function to change vlan strip status in netdev->features.
+ */
+static void iavf_netdev_features_vlan_strip_set(struct net_device *netdev,
+						const bool enable)
+{
+	if (enable)
+		netdev->features |= NETIF_F_HW_VLAN_CTAG_RX;
+	else
+		netdev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+}
+
+/**
  * iavf_virtchnl_completion
  * @adapter: adapter structure
  * @v_opcode: opcode sent by PF
@@ -2057,8 +2073,18 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 			}
 			break;
 		case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+			dev_warn(&adapter->pdev->dev, "Changing VLAN Stripping is not allowed when Port VLAN is configured\n");
+			/* Vlan stripping could not be enabled by ethtool.
+			 * Disable it in netdev->features.
+			 */
+			iavf_netdev_features_vlan_strip_set(netdev, false);
+			break;
 		case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
 			dev_warn(&adapter->pdev->dev, "Changing VLAN Stripping is not allowed when Port VLAN is configured\n");
+			/* Vlan stripping could not be disabled by ethtool.
+			 * Enable it in netdev->features.
+			 */
+			iavf_netdev_features_vlan_strip_set(netdev, true);
 			break;
 		default:
 			dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
@@ -2312,6 +2338,20 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 		spin_unlock_bh(&adapter->adv_rss_lock);
 		}
 		break;
+	case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+		/* PF enabled vlan strip on this VF.
+		 * Update netdev->features if needed to be in sync with ethtool.
+		 */
+		if (!v_retval)
+			iavf_netdev_features_vlan_strip_set(netdev, true);
+		break;
+	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
+		/* PF disabled vlan strip on this VF.
+		 * Update netdev->features if needed to be in sync with ethtool.
+		 */
+		if (!v_retval)
+			iavf_netdev_features_vlan_strip_set(netdev, false);
+		break;
 	default:
 		if (adapter->current_op && (v_opcode != adapter->current_op))
 			dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 473b1f6be9de..bea1d1e39fa2 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -483,6 +483,7 @@ enum ice_pf_flags {
 	ICE_FLAG_MDD_AUTO_RESET_VF,
 	ICE_FLAG_LINK_LENIENT_MODE_ENA,
 	ICE_FLAG_PLUG_AUX_DEV,
+	ICE_FLAG_MTU_CHANGED,
 	ICE_PF_FLAGS_NBITS		/* must be last */
 };
 
@@ -897,7 +898,16 @@ static inline void ice_set_rdma_cap(struct ice_pf *pf)
  */
 static inline void ice_clear_rdma_cap(struct ice_pf *pf)
 {
-	ice_unplug_aux_dev(pf);
+	/* We can directly unplug aux device here only if the flag bit
+	 * ICE_FLAG_PLUG_AUX_DEV is not set because ice_unplug_aux_dev()
+	 * could race with ice_plug_aux_dev() called from
+	 * ice_service_task(). In this case we only clear that bit now and
+	 * aux device will be unplugged later once ice_plug_aux_device()
+	 * called from ice_service_task() finishes (see ice_service_task()).
+	 */
+	if (!test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags))
+		ice_unplug_aux_dev(pf);
+
 	clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
 	clear_bit(ICE_FLAG_AUX_ENA, pf->flags);
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index e2e3ef7fba7f..a5dc9824e255 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2298,7 +2298,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 	if (err)
 		goto done;
 
-	curr_link_speed = pi->phy.link_info.link_speed;
+	curr_link_speed = pi->phy.curr_user_speed_req;
 	adv_link_speed = ice_ksettings_find_adv_link_speed(ks);
 
 	/* If speed didn't get set, set it to what it currently is.
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index f3c346e13b7a..b7e8744b0c0a 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2255,9 +2255,30 @@ static void ice_service_task(struct work_struct *work)
 		return;
 	}
 
-	if (test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags))
+	if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) {
+		/* Plug aux device per request */
 		ice_plug_aux_dev(pf);
 
+		/* Mark plugging as done but check whether unplug was
+		 * requested during ice_plug_aux_dev() call
+		 * (e.g. from ice_clear_rdma_cap()) and if so then
+		 * plug aux device.
+		 */
+		if (!test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags))
+			ice_unplug_aux_dev(pf);
+	}
+
+	if (test_and_clear_bit(ICE_FLAG_MTU_CHANGED, pf->flags)) {
+		struct iidc_event *event;
+
+		event = kzalloc(sizeof(*event), GFP_KERNEL);
+		if (event) {
+			set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type);
+			ice_send_event_to_aux(pf, event);
+			kfree(event);
+		}
+	}
+
 	ice_clean_adminq_subtask(pf);
 	ice_check_media_subtask(pf);
 	ice_check_for_hang_subtask(pf);
@@ -3023,7 +3044,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 		struct iidc_event *event;
 
 		ena_mask &= ~ICE_AUX_CRIT_ERR;
-		event = kzalloc(sizeof(*event), GFP_KERNEL);
+		event = kzalloc(sizeof(*event), GFP_ATOMIC);
 		if (event) {
 			set_bit(IIDC_EVENT_CRIT_ERR, event->type);
 			/* report the entire OICR value to AUX driver */
@@ -4859,7 +4880,6 @@ static void ice_remove(struct pci_dev *pdev)
 	ice_devlink_unregister_params(pf);
 	set_bit(ICE_DOWN, pf->state);
 
-	mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
 	ice_deinit_lag(pf);
 	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
 		ice_ptp_release(pf);
@@ -4867,6 +4887,7 @@ static void ice_remove(struct pci_dev *pdev)
 		ice_remove_arfs(pf);
 	ice_setup_mc_magic_wake(pf);
 	ice_vsi_release_all(pf);
+	mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
 	ice_set_wake(pf);
 	ice_free_irq_msix_misc(pf);
 	ice_for_each_vsi(pf, i) {
@@ -5941,8 +5962,9 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi,
 		u64 pkts = 0, bytes = 0;
 
 		ring = READ_ONCE(rings[i]);
-		if (ring)
-			ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes);
+		if (!ring)
+			continue;
+		ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes);
 		vsi_stats->tx_packets += pkts;
 		vsi_stats->tx_bytes += bytes;
 		vsi->tx_restart += ring->tx_stats.restart_q;
@@ -6822,7 +6844,6 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
-	struct iidc_event *event;
 	u8 count = 0;
 	int err = 0;
 
@@ -6857,14 +6878,6 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 		return -EBUSY;
 	}
 
-	event = kzalloc(sizeof(*event), GFP_KERNEL);
-	if (!event)
-		return -ENOMEM;
-
-	set_bit(IIDC_EVENT_BEFORE_MTU_CHANGE, event->type);
-	ice_send_event_to_aux(pf, event);
-	clear_bit(IIDC_EVENT_BEFORE_MTU_CHANGE, event->type);
-
 	netdev->mtu = (unsigned int)new_mtu;
 
 	/* if VSI is up, bring it down and then back up */
@@ -6872,21 +6885,18 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 		err = ice_down(vsi);
 		if (err) {
 			netdev_err(netdev, "change MTU if_down err %d\n", err);
-			goto event_after;
+			return err;
 		}
 
 		err = ice_up(vsi);
 		if (err) {
 			netdev_err(netdev, "change MTU if_up err %d\n", err);
-			goto event_after;
+			return err;
 		}
 	}
 
 	netdev_dbg(netdev, "changed MTU to %d\n", new_mtu);
-event_after:
-	set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type);
-	ice_send_event_to_aux(pf, event);
-	kfree(event);
+	set_bit(ICE_FLAG_MTU_CHANGED, pf->flags);
 
 	return err;
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 408f78e3eb13..1be3cd4b2bef 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2182,24 +2182,6 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
 
 	dev = ice_pf_to_dev(pf);
 
-	/* single place to detect unsuccessful return values */
-	if (v_retval) {
-		vf->num_inval_msgs++;
-		dev_info(dev, "VF %d failed opcode %d, retval: %d\n", vf->vf_id,
-			 v_opcode, v_retval);
-		if (vf->num_inval_msgs > ICE_DFLT_NUM_INVAL_MSGS_ALLOWED) {
-			dev_err(dev, "Number of invalid messages exceeded for VF %d\n",
-				vf->vf_id);
-			dev_err(dev, "Use PF Control I/F to enable the VF\n");
-			set_bit(ICE_VF_STATE_DIS, vf->vf_states);
-			return -EIO;
-		}
-	} else {
-		vf->num_valid_msgs++;
-		/* reset the invalid counter, if a valid message is received. */
-		vf->num_inval_msgs = 0;
-	}
-
 	aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval,
 				       msg, msglen, NULL);
 	if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) {
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 752487a1bdd6..8f27255cc0cc 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -14,7 +14,6 @@
 #define ICE_MAX_MACADDR_PER_VF		18
 
 /* Malicious Driver Detection */
-#define ICE_DFLT_NUM_INVAL_MSGS_ALLOWED		10
 #define ICE_MDD_EVENTS_THRESHOLD		30
 
 /* Static VF transaction/status register def */
@@ -134,8 +133,6 @@ struct ice_vf {
 	unsigned int max_tx_rate;	/* Maximum Tx bandwidth limit in Mbps */
 	DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS);	/* VF runtime states */
 
-	u64 num_inval_msgs;		/* number of continuous invalid msgs */
-	u64 num_valid_msgs;		/* number of valid msgs detected */
 	unsigned long vf_caps;		/* VF's adv. capabilities */
 	u8 num_req_qs;			/* num of queue pairs requested by VF */
 	u16 num_mac;
diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 41d11137cde0..5712c3e94be8 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -260,9 +260,9 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
 
 	if (ctl & LTQ_DMA_EOP) {
 		ch->skb_head->protocol = eth_type_trans(ch->skb_head, net_dev);
-		netif_receive_skb(ch->skb_head);
 		net_dev->stats.rx_packets++;
 		net_dev->stats.rx_bytes += ch->skb_head->len;
+		netif_receive_skb(ch->skb_head);
 		ch->skb_head = NULL;
 		ch->skb_tail = NULL;
 		ret = XRX200_DMA_PACKET_COMPLETE;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
index a73a8017e0ee..a79201a9a6f0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
@@ -605,6 +605,7 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req)
 	} else if (!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK)) {
 		/* Registers that can be accessed from PF */
 		switch (offset) {
+		case CPT_AF_DIAG:
 		case CPT_AF_CTL:
 		case CPT_AF_PF_FUNC:
 		case CPT_AF_BLK_RST:
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index cad93f747d0c..73cd0a4b7291 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -554,6 +554,7 @@ static int prestera_switch_set_base_mac_addr(struct prestera_switch *sw)
 		dev_info(prestera_dev(sw), "using random base mac address\n");
 	}
 	of_node_put(base_mac_np);
+	of_node_put(np);
 
 	return prestera_hw_switch_mac_set(sw, sw->base_mac);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 17fe05809653..3eacd8739929 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -131,11 +131,8 @@ static int cmd_alloc_index(struct mlx5_cmd *cmd)
 
 static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&cmd->alloc_lock, flags);
+	lockdep_assert_held(&cmd->alloc_lock);
 	set_bit(idx, &cmd->bitmask);
-	spin_unlock_irqrestore(&cmd->alloc_lock, flags);
 }
 
 static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
@@ -145,17 +142,21 @@ static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
 
 static void cmd_ent_put(struct mlx5_cmd_work_ent *ent)
 {
+	struct mlx5_cmd *cmd = ent->cmd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cmd->alloc_lock, flags);
 	if (!refcount_dec_and_test(&ent->refcnt))
-		return;
+		goto out;
 
 	if (ent->idx >= 0) {
-		struct mlx5_cmd *cmd = ent->cmd;
-
 		cmd_free_index(cmd, ent->idx);
 		up(ent->page_queue ? &cmd->pages_sem : &cmd->sem);
 	}
 
 	cmd_free_ent(ent);
+out:
+	spin_unlock_irqrestore(&cmd->alloc_lock, flags);
 }
 
 static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
index da169b816665..d4239e3b3c88 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
@@ -88,9 +88,6 @@ void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
 			 (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
 		MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout);
 		break;
-	case MLX5E_PACKET_MERGE_SHAMPO:
-		MLX5_SET(tirc, tirc, packet_merge_mask, MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO);
-		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bf80fb612449..3667f5ef5990 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3616,8 +3616,7 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable)
 		goto out;
 	}
 
-	err = mlx5e_safe_switch_params(priv, &new_params,
-				       mlx5e_modify_tirs_packet_merge_ctx, NULL, reset);
+	err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
 out:
 	mutex_unlock(&priv->state_lock);
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
index 1ca01a5b6cdd..626aa60b6099 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
@@ -126,6 +126,10 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
 		return;
 	}
 
+	/* Handle multipath entry with lower priority value */
+	if (mp->mfi && mp->mfi != fi && fi->fib_priority >= mp->mfi->fib_priority)
+		return;
+
 	/* Handle add/replace event */
 	nhs = fib_info_num_path(fi);
 	if (nhs == 1) {
@@ -135,12 +139,13 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
 			int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
 
 			if (i < 0)
-				i = MLX5_LAG_NORMAL_AFFINITY;
-			else
-				++i;
+				return;
 
+			i++;
 			mlx5_lag_set_port_affinity(ldev, i);
 		}
+
+		mp->mfi = fi;
 		return;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
index 1e8ec4f236b2..df58cba37930 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -121,9 +121,6 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains)
 
 u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
 {
-	if (!mlx5_chains_prios_supported(chains))
-		return 1;
-
 	if (mlx5_chains_ignore_flow_level_supported(chains))
 		return UINT_MAX;
 
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index 949858891973..fdb4d7e7296c 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -60,6 +60,12 @@ static int ocelot_chain_to_block(int chain, bool ingress)
  */
 static int ocelot_chain_to_lookup(int chain)
 {
+	/* Backwards compatibility with older, single-chain tc-flower
+	 * offload support in Ocelot
+	 */
+	if (chain == 0)
+		return 0;
+
 	return (chain / VCAP_LOOKUP) % 10;
 }
 
@@ -68,7 +74,15 @@ static int ocelot_chain_to_lookup(int chain)
  */
 static int ocelot_chain_to_pag(int chain)
 {
-	int lookup = ocelot_chain_to_lookup(chain);
+	int lookup;
+
+	/* Backwards compatibility with older, single-chain tc-flower
+	 * offload support in Ocelot
+	 */
+	if (chain == 0)
+		return 0;
+
+	lookup = ocelot_chain_to_lookup(chain);
 
 	/* calculate PAG value as chain index relative to the first PAG */
 	return chain - VCAP_IS2_CHAIN(lookup, 0);
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index bc39558fe82b..756f97dce85b 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1471,6 +1471,7 @@ static int lpc_eth_drv_resume(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct netdata_local *pldat;
+	int ret;
 
 	if (device_may_wakeup(&pdev->dev))
 		disable_irq_wake(ndev->irq);
@@ -1480,7 +1481,9 @@ static int lpc_eth_drv_resume(struct platform_device *pdev)
 			pldat = netdev_priv(ndev);
 
 			/* Enable interface clock */
-			clk_enable(pldat->clk);
+			ret = clk_enable(pldat->clk);
+			if (ret)
+				return ret;
 
 			/* Reset and initialize */
 			__lpc_eth_reset(pldat);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 8ac38828ba45..48cf4355bc47 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -3806,11 +3806,11 @@ bool qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *p_disabled_vfs)
 	return found;
 }
 
-static void qed_iov_get_link(struct qed_hwfn *p_hwfn,
-			     u16 vfid,
-			     struct qed_mcp_link_params *p_params,
-			     struct qed_mcp_link_state *p_link,
-			     struct qed_mcp_link_capabilities *p_caps)
+static int qed_iov_get_link(struct qed_hwfn *p_hwfn,
+			    u16 vfid,
+			    struct qed_mcp_link_params *p_params,
+			    struct qed_mcp_link_state *p_link,
+			    struct qed_mcp_link_capabilities *p_caps)
 {
 	struct qed_vf_info *p_vf = qed_iov_get_vf_info(p_hwfn,
 						       vfid,
@@ -3818,7 +3818,7 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn,
 	struct qed_bulletin_content *p_bulletin;
 
 	if (!p_vf)
-		return;
+		return -EINVAL;
 
 	p_bulletin = p_vf->bulletin.p_virt;
 
@@ -3828,6 +3828,7 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn,
 		__qed_vf_get_link_state(p_hwfn, p_link, p_bulletin);
 	if (p_caps)
 		__qed_vf_get_link_caps(p_hwfn, p_caps, p_bulletin);
+	return 0;
 }
 
 static int
@@ -4686,6 +4687,7 @@ static int qed_get_vf_config(struct qed_dev *cdev,
 	struct qed_public_vf_info *vf_info;
 	struct qed_mcp_link_state link;
 	u32 tx_rate;
+	int ret;
 
 	/* Sanitize request */
 	if (IS_VF(cdev))
@@ -4699,7 +4701,9 @@ static int qed_get_vf_config(struct qed_dev *cdev,
 
 	vf_info = qed_iov_get_public_vf_info(hwfn, vf_id, true);
 
-	qed_iov_get_link(hwfn, vf_id, NULL, &link, NULL);
+	ret = qed_iov_get_link(hwfn, vf_id, NULL, &link, NULL);
+	if (ret)
+		return ret;
 
 	/* Fill information about VF */
 	ivi->vf = vf_id;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 597cd9cd57b5..7b0e390c0b07 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -513,6 +513,9 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn)
 						    p_iov->bulletin.size,
 						    &p_iov->bulletin.phys,
 						    GFP_KERNEL);
+	if (!p_iov->bulletin.p_virt)
+		goto free_pf2vf_reply;
+
 	DP_VERBOSE(p_hwfn, QED_MSG_IOV,
 		   "VF's bulletin Board [%p virt 0x%llx phys 0x%08x bytes]\n",
 		   p_iov->bulletin.p_virt,
@@ -552,6 +555,10 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn)
 
 	return rc;
 
+free_pf2vf_reply:
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(union pfvf_tlvs),
+			  p_iov->pf2vf_reply, p_iov->pf2vf_reply_phys);
 free_vf2pf_request:
 	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
 			  sizeof(union vfpf_tlvs),
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index ad9029ae6848..77e5dffb558f 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -3146,7 +3146,7 @@ static int happy_meal_pci_probe(struct pci_dev *pdev,
 	if (err) {
 		printk(KERN_ERR "happymeal(PCI): Cannot register net device, "
 		       "aborting.\n");
-		goto err_out_iounmap;
+		goto err_out_free_coherent;
 	}
 
 	pci_set_drvdata(pdev, hp);
@@ -3179,6 +3179,10 @@ static int happy_meal_pci_probe(struct pci_dev *pdev,
 
 	return 0;
 
+err_out_free_coherent:
+	dma_free_coherent(hp->dma_dev, PAGE_SIZE,
+			  hp->happy_block, hp->hblock_dvma);
+
 err_out_iounmap:
 	iounmap(hp->gregs);
 
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index dc70a6bfaa6a..92ca739fac01 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -568,7 +568,9 @@ int cpts_register(struct cpts *cpts)
 	for (i = 0; i < CPTS_MAX_EVENTS; i++)
 		list_add(&cpts->pool_data[i].list, &cpts->pool);
 
-	clk_enable(cpts->refclk);
+	err = clk_enable(cpts->refclk);
+	if (err)
+		return err;
 
 	cpts_write32(cpts, CPTS_EN, control);
 	cpts_write32(cpts, TS_PEND_EN, int_enable);
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 519599480b15..77fa2cb03aca 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1183,7 +1183,7 @@ static int xemaclite_of_probe(struct platform_device *ofdev)
 	if (rc) {
 		dev_err(dev,
 			"Cannot register network device, aborting\n");
-		goto error;
+		goto put_node;
 	}
 
 	dev_info(dev,
@@ -1191,6 +1191,8 @@ static int xemaclite_of_probe(struct platform_device *ofdev)
 		 (unsigned long __force)ndev->mem_start, lp->base_addr, ndev->irq);
 	return 0;
 
+put_node:
+	of_node_put(lp->phy_node);
 error:
 	free_netdev(ndev);
 	return rc;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 3646469433b1..fde1c492ca02 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1587,6 +1587,9 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 	pcpu_sum = kvmalloc_array(num_possible_cpus(),
 				  sizeof(struct netvsc_ethtool_pcpu_stats),
 				  GFP_KERNEL);
+	if (!pcpu_sum)
+		return;
+
 	netvsc_get_pcpu_stats(dev, pcpu_sum);
 	for_each_present_cpu(cpu) {
 		struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c
index 7d2abaf2b2c9..64fb76c1e395 100644
--- a/drivers/net/mdio/mdio-mscc-miim.c
+++ b/drivers/net/mdio/mdio-mscc-miim.c
@@ -187,6 +187,13 @@ static const struct regmap_config mscc_miim_regmap_config = {
 	.reg_stride	= 4,
 };
 
+static const struct regmap_config mscc_miim_phy_regmap_config = {
+	.reg_bits	= 32,
+	.val_bits	= 32,
+	.reg_stride	= 4,
+	.name		= "phy",
+};
+
 int mscc_miim_setup(struct device *dev, struct mii_bus **pbus, const char *name,
 		    struct regmap *mii_regmap, int status_offset)
 {
@@ -250,7 +257,7 @@ static int mscc_miim_probe(struct platform_device *pdev)
 		}
 
 		phy_regmap = devm_regmap_init_mmio(&pdev->dev, phy_regs,
-						   &mscc_miim_regmap_config);
+						   &mscc_miim_phy_regmap_config);
 		if (IS_ERR(phy_regmap)) {
 			dev_err(&pdev->dev, "Unable to create phy register regmap\n");
 			return PTR_ERR(phy_regmap);
diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
index 211b5476a6f5..ce17b2af3218 100644
--- a/drivers/net/phy/dp83822.c
+++ b/drivers/net/phy/dp83822.c
@@ -274,7 +274,7 @@ static int dp83822_config_intr(struct phy_device *phydev)
 		if (err < 0)
 			return err;
 
-		err = phy_write(phydev, MII_DP83822_MISR1, 0);
+		err = phy_write(phydev, MII_DP83822_MISR2, 0);
 		if (err < 0)
 			return err;
 
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 2429db614b59..2702faf7b0f6 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1687,8 +1687,8 @@ static int marvell_suspend(struct phy_device *phydev)
 	int err;
 
 	/* Suspend the fiber mode first */
-	if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
-			       phydev->supported)) {
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
+			      phydev->supported)) {
 		err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE);
 		if (err < 0)
 			goto error;
@@ -1722,8 +1722,8 @@ static int marvell_resume(struct phy_device *phydev)
 	int err;
 
 	/* Resume the fiber mode first */
-	if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
-			       phydev->supported)) {
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
+			      phydev->supported)) {
 		err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE);
 		if (err < 0)
 			goto error;
diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c
index 7e7904fee1d9..73f7962a37d3 100644
--- a/drivers/net/phy/meson-gxl.c
+++ b/drivers/net/phy/meson-gxl.c
@@ -30,8 +30,12 @@
 #define  INTSRC_LINK_DOWN	BIT(4)
 #define  INTSRC_REMOTE_FAULT	BIT(5)
 #define  INTSRC_ANEG_COMPLETE	BIT(6)
+#define  INTSRC_ENERGY_DETECT	BIT(7)
 #define INTSRC_MASK	30
 
+#define INT_SOURCES (INTSRC_LINK_DOWN | INTSRC_ANEG_COMPLETE | \
+		     INTSRC_ENERGY_DETECT)
+
 #define BANK_ANALOG_DSP		0
 #define BANK_WOL		1
 #define BANK_BIST		3
@@ -200,7 +204,6 @@ static int meson_gxl_ack_interrupt(struct phy_device *phydev)
 
 static int meson_gxl_config_intr(struct phy_device *phydev)
 {
-	u16 val;
 	int ret;
 
 	if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
@@ -209,16 +212,9 @@ static int meson_gxl_config_intr(struct phy_device *phydev)
 		if (ret)
 			return ret;
 
-		val = INTSRC_ANEG_PR
-			| INTSRC_PARALLEL_FAULT
-			| INTSRC_ANEG_LP_ACK
-			| INTSRC_LINK_DOWN
-			| INTSRC_REMOTE_FAULT
-			| INTSRC_ANEG_COMPLETE;
-		ret = phy_write(phydev, INTSRC_MASK, val);
+		ret = phy_write(phydev, INTSRC_MASK, INT_SOURCES);
 	} else {
-		val = 0;
-		ret = phy_write(phydev, INTSRC_MASK, val);
+		ret = phy_write(phydev, INTSRC_MASK, 0);
 
 		/* Ack any pending IRQ */
 		ret = meson_gxl_ack_interrupt(phydev);
@@ -237,10 +233,23 @@ static irqreturn_t meson_gxl_handle_interrupt(struct phy_device *phydev)
 		return IRQ_NONE;
 	}
 
+	irq_status &= INT_SOURCES;
+
 	if (irq_status == 0)
 		return IRQ_NONE;
 
-	phy_trigger_machine(phydev);
+	/* Aneg-complete interrupt is used for link-up detection */
+	if (phydev->autoneg == AUTONEG_ENABLE &&
+	    irq_status == INTSRC_ENERGY_DETECT)
+		return IRQ_HANDLED;
+
+	/* Give PHY some time before MAC starts sending data. This works
+	 * around an issue where network doesn't come up properly.
+	 */
+	if (!(irq_status & INTSRC_LINK_DOWN))
+		phy_queue_state_machine(phydev, msecs_to_jiffies(100));
+	else
+		phy_trigger_machine(phydev);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index ebfeeb3c67c1..7e3017e7a1c0 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -2685,3 +2685,6 @@ MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl);
 MODULE_DESCRIPTION("Microsemi VSC85xx PHY driver");
 MODULE_AUTHOR("Nagaraju Lakkaraju");
 MODULE_LICENSE("Dual MIT/GPL");
+
+MODULE_FIRMWARE(MSCC_VSC8584_REVB_INT8051_FW);
+MODULE_FIRMWARE(MSCC_VSC8574_REVB_INT8051_FW);
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index b8e20a3f2b84..415f16662f88 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1537,11 +1537,8 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb)
 		netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
 		lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
 
-		if (dev->domain_data.phyirq > 0) {
-			local_irq_disable();
-			generic_handle_irq(dev->domain_data.phyirq);
-			local_irq_enable();
-		}
+		if (dev->domain_data.phyirq > 0)
+			generic_handle_irq_safe(dev->domain_data.phyirq);
 	} else {
 		netdev_warn(dev->net,
 			    "unexpected interrupt: 0x%08x\n", intdata);
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index bc1e3dd67c04..a0f29482294d 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -84,9 +84,10 @@ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
 	ret = fn(dev, USB_VENDOR_REQUEST_READ_REGISTER, USB_DIR_IN
 		 | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
 		 0, index, &buf, 4);
-	if (unlikely(ret < 0)) {
-		netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d\n",
-			    index, ret);
+	if (ret < 0) {
+		if (ret != -ENODEV)
+			netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d\n",
+				    index, ret);
 		return ret;
 	}
 
@@ -116,7 +117,7 @@ static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index,
 	ret = fn(dev, USB_VENDOR_REQUEST_WRITE_REGISTER, USB_DIR_OUT
 		 | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
 		 0, index, &buf, 4);
-	if (unlikely(ret < 0))
+	if (ret < 0 && ret != -ENODEV)
 		netdev_warn(dev->net, "Failed to write reg index 0x%08x: %d\n",
 			    index, ret);
 
@@ -159,6 +160,9 @@ static int __must_check __smsc95xx_phy_wait_not_busy(struct usbnet *dev,
 	do {
 		ret = __smsc95xx_read_reg(dev, MII_ADDR, &val, in_pm);
 		if (ret < 0) {
+			/* Ignore -ENODEV error during disconnect() */
+			if (ret == -ENODEV)
+				return 0;
 			netdev_warn(dev->net, "Error reading MII_ACCESS\n");
 			return ret;
 		}
@@ -194,7 +198,8 @@ static int __smsc95xx_mdio_read(struct usbnet *dev, int phy_id, int idx,
 	addr = mii_address_cmd(phy_id, idx, MII_READ_ | MII_BUSY_);
 	ret = __smsc95xx_write_reg(dev, MII_ADDR, addr, in_pm);
 	if (ret < 0) {
-		netdev_warn(dev->net, "Error writing MII_ADDR\n");
+		if (ret != -ENODEV)
+			netdev_warn(dev->net, "Error writing MII_ADDR\n");
 		goto done;
 	}
 
@@ -206,7 +211,8 @@ static int __smsc95xx_mdio_read(struct usbnet *dev, int phy_id, int idx,
 
 	ret = __smsc95xx_read_reg(dev, MII_DATA, &val, in_pm);
 	if (ret < 0) {
-		netdev_warn(dev->net, "Error reading MII_DATA\n");
+		if (ret != -ENODEV)
+			netdev_warn(dev->net, "Error reading MII_DATA\n");
 		goto done;
 	}
 
@@ -214,6 +220,10 @@ static int __smsc95xx_mdio_read(struct usbnet *dev, int phy_id, int idx,
 
 done:
 	mutex_unlock(&dev->phy_mutex);
+
+	/* Ignore -ENODEV error during disconnect() */
+	if (ret == -ENODEV)
+		return 0;
 	return ret;
 }
 
@@ -235,7 +245,8 @@ static void __smsc95xx_mdio_write(struct usbnet *dev, int phy_id,
 	val = regval;
 	ret = __smsc95xx_write_reg(dev, MII_DATA, val, in_pm);
 	if (ret < 0) {
-		netdev_warn(dev->net, "Error writing MII_DATA\n");
+		if (ret != -ENODEV)
+			netdev_warn(dev->net, "Error writing MII_DATA\n");
 		goto done;
 	}
 
@@ -243,7 +254,8 @@ static void __smsc95xx_mdio_write(struct usbnet *dev, int phy_id,
 	addr = mii_address_cmd(phy_id, idx, MII_WRITE_ | MII_BUSY_);
 	ret = __smsc95xx_write_reg(dev, MII_ADDR, addr, in_pm);
 	if (ret < 0) {
-		netdev_warn(dev->net, "Error writing MII_ADDR\n");
+		if (ret != -ENODEV)
+			netdev_warn(dev->net, "Error writing MII_ADDR\n");
 		goto done;
 	}
 
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index a46067c38bf5..0fad1331303c 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -59,9 +59,7 @@ out:
 	return ret;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int wg_pm_notification(struct notifier_block *nb, unsigned long action,
-			      void *data)
+static int wg_pm_notification(struct notifier_block *nb, unsigned long action, void *data)
 {
 	struct wg_device *wg;
 	struct wg_peer *peer;
@@ -92,7 +90,24 @@ static int wg_pm_notification(struct notifier_block *nb, unsigned long action,
 }
 
 static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification };
-#endif
+
+static int wg_vm_notification(struct notifier_block *nb, unsigned long action, void *data)
+{
+	struct wg_device *wg;
+	struct wg_peer *peer;
+
+	rtnl_lock();
+	list_for_each_entry(wg, &device_list, device_list) {
+		mutex_lock(&wg->device_update_lock);
+		list_for_each_entry(peer, &wg->peer_list, peer_list)
+			wg_noise_expire_current_peer_keypairs(peer);
+		mutex_unlock(&wg->device_update_lock);
+	}
+	rtnl_unlock();
+	return 0;
+}
+
+static struct notifier_block vm_notifier = { .notifier_call = wg_vm_notification };
 
 static int wg_stop(struct net_device *dev)
 {
@@ -424,16 +439,18 @@ int __init wg_device_init(void)
 {
 	int ret;
 
-#ifdef CONFIG_PM_SLEEP
 	ret = register_pm_notifier(&pm_notifier);
 	if (ret)
 		return ret;
-#endif
 
-	ret = register_pernet_device(&pernet_ops);
+	ret = register_random_vmfork_notifier(&vm_notifier);
 	if (ret)
 		goto error_pm;
 
+	ret = register_pernet_device(&pernet_ops);
+	if (ret)
+		goto error_vm;
+
 	ret = rtnl_link_register(&link_ops);
 	if (ret)
 		goto error_pernet;
@@ -442,10 +459,10 @@ int __init wg_device_init(void)
 
 error_pernet:
 	unregister_pernet_device(&pernet_ops);
+error_vm:
+	unregister_random_vmfork_notifier(&vm_notifier);
 error_pm:
-#ifdef CONFIG_PM_SLEEP
 	unregister_pm_notifier(&pm_notifier);
-#endif
 	return ret;
 }
 
@@ -453,8 +470,7 @@ void wg_device_uninit(void)
 {
 	rtnl_link_unregister(&link_ops);
 	unregister_pernet_device(&pernet_ops);
-#ifdef CONFIG_PM_SLEEP
+	unregister_random_vmfork_notifier(&vm_notifier);
 	unregister_pm_notifier(&pm_notifier);
-#endif
 	rcu_barrier();
 }
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 62c453a21e49..7c1c2658cb5f 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -2611,36 +2611,9 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb)
 		ath10k_mac_handle_beacon(ar, skb);
 
 	if (ieee80211_is_beacon(hdr->frame_control) ||
-	    ieee80211_is_probe_resp(hdr->frame_control)) {
-		struct ieee80211_mgmt *mgmt = (void *)skb->data;
-		enum cfg80211_bss_frame_type ftype;
-		u8 *ies;
-		int ies_ch;
-
+	    ieee80211_is_probe_resp(hdr->frame_control))
 		status->boottime_ns = ktime_get_boottime_ns();
 
-		if (!ar->scan_channel)
-			goto drop;
-
-		ies = mgmt->u.beacon.variable;
-
-		if (ieee80211_is_beacon(mgmt->frame_control))
-			ftype = CFG80211_BSS_FTYPE_BEACON;
-		else
-			ftype = CFG80211_BSS_FTYPE_PRESP;
-
-		ies_ch = cfg80211_get_ies_channel_number(mgmt->u.beacon.variable,
-							 skb_tail_pointer(skb) - ies,
-							 sband->band, ftype);
-
-		if (ies_ch > 0 && ies_ch != channel) {
-			ath10k_dbg(ar, ATH10K_DBG_MGMT,
-				   "channel mismatched ds channel %d scan channel %d\n",
-				   ies_ch, channel);
-			goto drop;
-		}
-	}
-
 	ath10k_dbg(ar, ATH10K_DBG_MGMT,
 		   "event mgmt rx skb %pK len %d ftype %02x stype %02x\n",
 		   skb, skb->len,
@@ -2654,10 +2627,6 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb)
 	ieee80211_rx_ni(ar->hw, skb);
 
 	return 0;
-
-drop:
-	dev_kfree_skb(skb);
-	return 0;
 }
 
 static int freq_to_idx(struct ath10k *ar, int freq)
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 7748f07e2cf1..daa4e6106aac 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -424,14 +424,12 @@ static bool xennet_tx_buf_gc(struct netfront_queue *queue)
 			queue->tx_link[id] = TX_LINK_NONE;
 			skb = queue->tx_skbs[id];
 			queue->tx_skbs[id] = NULL;
-			if (unlikely(gnttab_query_foreign_access(
-				queue->grant_tx_ref[id]) != 0)) {
+			if (unlikely(!gnttab_end_foreign_access_ref(
+				queue->grant_tx_ref[id], GNTMAP_readonly))) {
 				dev_alert(dev,
 					  "Grant still in use by backend domain\n");
 				goto err;
 			}
-			gnttab_end_foreign_access_ref(
-				queue->grant_tx_ref[id], GNTMAP_readonly);
 			gnttab_release_grant_reference(
 				&queue->gref_tx_head, queue->grant_tx_ref[id]);
 			queue->grant_tx_ref[id] = GRANT_INVALID_REF;
@@ -990,7 +988,6 @@ static int xennet_get_responses(struct netfront_queue *queue,
 	struct device *dev = &queue->info->netdev->dev;
 	struct bpf_prog *xdp_prog;
 	struct xdp_buff xdp;
-	unsigned long ret;
 	int slots = 1;
 	int err = 0;
 	u32 verdict;
@@ -1032,8 +1029,13 @@ static int xennet_get_responses(struct netfront_queue *queue,
 			goto next;
 		}
 
-		ret = gnttab_end_foreign_access_ref(ref, 0);
-		BUG_ON(!ret);
+		if (!gnttab_end_foreign_access_ref(ref, 0)) {
+			dev_alert(dev,
+				  "Grant still in use by backend domain\n");
+			queue->info->broken = true;
+			dev_alert(dev, "Disabled for further use\n");
+			return -EINVAL;
+		}
 
 		gnttab_release_grant_reference(&queue->gref_rx_head, ref);
 
@@ -1254,6 +1256,10 @@ static int xennet_poll(struct napi_struct *napi, int budget)
 					   &need_xdp_flush);
 
 		if (unlikely(err)) {
+			if (queue->info->broken) {
+				spin_unlock(&queue->rx_lock);
+				return 0;
+			}
 err:
 			while ((skb = __skb_dequeue(&tmpq)))
 				__skb_queue_tail(&errq, skb);
@@ -1918,7 +1924,7 @@ static int setup_netfront(struct xenbus_device *dev,
 			struct netfront_queue *queue, unsigned int feature_split_evtchn)
 {
 	struct xen_netif_tx_sring *txs;
-	struct xen_netif_rx_sring *rxs;
+	struct xen_netif_rx_sring *rxs = NULL;
 	grant_ref_t gref;
 	int err;
 
@@ -1938,21 +1944,21 @@ static int setup_netfront(struct xenbus_device *dev,
 
 	err = xenbus_grant_ring(dev, txs, 1, &gref);
 	if (err < 0)
-		goto grant_tx_ring_fail;
+		goto fail;
 	queue->tx_ring_ref = gref;
 
 	rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
 	if (!rxs) {
 		err = -ENOMEM;
 		xenbus_dev_fatal(dev, err, "allocating rx ring page");
-		goto alloc_rx_ring_fail;
+		goto fail;
 	}
 	SHARED_RING_INIT(rxs);
 	FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
 
 	err = xenbus_grant_ring(dev, rxs, 1, &gref);
 	if (err < 0)
-		goto grant_rx_ring_fail;
+		goto fail;
 	queue->rx_ring_ref = gref;
 
 	if (feature_split_evtchn)
@@ -1965,22 +1971,28 @@ static int setup_netfront(struct xenbus_device *dev,
 		err = setup_netfront_single(queue);
 
 	if (err)
-		goto alloc_evtchn_fail;
+		goto fail;
 
 	return 0;
 
 	/* If we fail to setup netfront, it is safe to just revoke access to
 	 * granted pages because backend is not accessing it at this point.
 	 */
-alloc_evtchn_fail:
-	gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0);
-grant_rx_ring_fail:
-	free_page((unsigned long)rxs);
-alloc_rx_ring_fail:
-	gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0);
-grant_tx_ring_fail:
-	free_page((unsigned long)txs);
-fail:
+ fail:
+	if (queue->rx_ring_ref != GRANT_INVALID_REF) {
+		gnttab_end_foreign_access(queue->rx_ring_ref, 0,
+					  (unsigned long)rxs);
+		queue->rx_ring_ref = GRANT_INVALID_REF;
+	} else {
+		free_page((unsigned long)rxs);
+	}
+	if (queue->tx_ring_ref != GRANT_INVALID_REF) {
+		gnttab_end_foreign_access(queue->tx_ring_ref, 0,
+					  (unsigned long)txs);
+		queue->tx_ring_ref = GRANT_INVALID_REF;
+	} else {
+		free_page((unsigned long)txs);
+	}
 	return err;
 }
 
diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
index d7db1a0e6be1..00d8ea6dcb5d 100644
--- a/drivers/nfc/port100.c
+++ b/drivers/nfc/port100.c
@@ -1612,7 +1612,9 @@ free_nfc_dev:
 	nfc_digital_free_device(dev->nfc_digital_dev);
 
 error:
+	usb_kill_urb(dev->in_urb);
 	usb_free_urb(dev->in_urb);
+	usb_kill_urb(dev->out_urb);
 	usb_free_urb(dev->out_urb);
 	usb_put_dev(dev->udev);
 
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 091a0ca16361..496d775c6770 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1233,44 +1233,6 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
 }
 CONFIGFS_ATTR(nvmet_subsys_, attr_model);
 
-static ssize_t nvmet_subsys_attr_discovery_nqn_show(struct config_item *item,
-			char *page)
-{
-	return snprintf(page, PAGE_SIZE, "%s\n",
-			nvmet_disc_subsys->subsysnqn);
-}
-
-static ssize_t nvmet_subsys_attr_discovery_nqn_store(struct config_item *item,
-			const char *page, size_t count)
-{
-	struct nvmet_subsys *subsys = to_subsys(item);
-	char *subsysnqn;
-	int len;
-
-	len = strcspn(page, "\n");
-	if (!len)
-		return -EINVAL;
-
-	subsysnqn = kmemdup_nul(page, len, GFP_KERNEL);
-	if (!subsysnqn)
-		return -ENOMEM;
-
-	/*
-	 * The discovery NQN must be different from subsystem NQN.
-	 */
-	if (!strcmp(subsysnqn, subsys->subsysnqn)) {
-		kfree(subsysnqn);
-		return -EBUSY;
-	}
-	down_write(&nvmet_config_sem);
-	kfree(nvmet_disc_subsys->subsysnqn);
-	nvmet_disc_subsys->subsysnqn = subsysnqn;
-	up_write(&nvmet_config_sem);
-
-	return count;
-}
-CONFIGFS_ATTR(nvmet_subsys_, attr_discovery_nqn);
-
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item,
 						char *page)
@@ -1300,7 +1262,6 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = {
 	&nvmet_subsys_attr_attr_cntlid_min,
 	&nvmet_subsys_attr_attr_cntlid_max,
 	&nvmet_subsys_attr_attr_model,
-	&nvmet_subsys_attr_attr_discovery_nqn,
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 	&nvmet_subsys_attr_attr_pi_enable,
 #endif
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 5119c687de68..626caf6f1e4b 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1493,8 +1493,7 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
 	if (!port)
 		return NULL;
 
-	if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn) ||
-	    !strcmp(nvmet_disc_subsys->subsysnqn, subsysnqn)) {
+	if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) {
 		if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
 			return NULL;
 		return nvmet_disc_subsys;
diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c
index 854d95163112..a2c3c207a04b 100644
--- a/drivers/pci/controller/pcie-apple.c
+++ b/drivers/pci/controller/pcie-apple.c
@@ -219,7 +219,7 @@ static int apple_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
 	if (hwirq < 0)
 		return -ENOSPC;
 
-	fwspec.param[1] += hwirq;
+	fwspec.param[fwspec.param_count - 2] += hwirq;
 
 	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &fwspec);
 	if (ret)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 588588cfda48..415f7664b010 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -596,7 +596,7 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state)
 		int error;
 
 		error = drv->suspend(pci_dev, state);
-		suspend_report_result(drv->suspend, error);
+		suspend_report_result(dev, drv->suspend, error);
 		if (error)
 			return error;
 
@@ -775,7 +775,7 @@ static int pci_pm_suspend(struct device *dev)
 		int error;
 
 		error = pm->suspend(dev);
-		suspend_report_result(pm->suspend, error);
+		suspend_report_result(dev, pm->suspend, error);
 		if (error)
 			return error;
 
@@ -821,7 +821,7 @@ static int pci_pm_suspend_noirq(struct device *dev)
 		int error;
 
 		error = pm->suspend_noirq(dev);
-		suspend_report_result(pm->suspend_noirq, error);
+		suspend_report_result(dev, pm->suspend_noirq, error);
 		if (error)
 			return error;
 
@@ -1010,7 +1010,7 @@ static int pci_pm_freeze(struct device *dev)
 		int error;
 
 		error = pm->freeze(dev);
-		suspend_report_result(pm->freeze, error);
+		suspend_report_result(dev, pm->freeze, error);
 		if (error)
 			return error;
 	}
@@ -1030,7 +1030,7 @@ static int pci_pm_freeze_noirq(struct device *dev)
 		int error;
 
 		error = pm->freeze_noirq(dev);
-		suspend_report_result(pm->freeze_noirq, error);
+		suspend_report_result(dev, pm->freeze_noirq, error);
 		if (error)
 			return error;
 	}
@@ -1116,7 +1116,7 @@ static int pci_pm_poweroff(struct device *dev)
 		int error;
 
 		error = pm->poweroff(dev);
-		suspend_report_result(pm->poweroff, error);
+		suspend_report_result(dev, pm->poweroff, error);
 		if (error)
 			return error;
 	}
@@ -1154,7 +1154,7 @@ static int pci_pm_poweroff_noirq(struct device *dev)
 		int error;
 
 		error = pm->poweroff_noirq(dev);
-		suspend_report_result(pm->poweroff_noirq, error);
+		suspend_report_result(dev, pm->poweroff_noirq, error);
 		if (error)
 			return error;
 	}
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index e1a0c44bc686..d05ca6ebbb9d 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -141,11 +141,25 @@ config ARM_DMC620_PMU
 
 config MARVELL_CN10K_TAD_PMU
 	tristate "Marvell CN10K LLC-TAD PMU"
-	depends on ARM64 || (COMPILE_TEST && 64BIT)
+	depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
 	help
 	  Provides support for Last-Level cache Tag-and-data Units (LLC-TAD)
 	  performance monitors on CN10K family silicons.
 
+config APPLE_M1_CPU_PMU
+	bool "Apple M1 CPU PMU support"
+	depends on ARM_PMU && ARCH_APPLE
+	help
+	  Provides support for the non-architectural CPU PMUs present on
+	  the Apple M1 SoCs and derivatives.
+
 source "drivers/perf/hisilicon/Kconfig"
 
+config MARVELL_CN10K_DDR_PMU
+	tristate "Enable MARVELL CN10K DRAM Subsystem(DSS) PMU Support"
+	depends on ARM64 || (COMPILE_TEST && 64BIT)
+	help
+	  Enable perf support for Marvell DDR Performance monitoring
+	  event on CN10K platform.
+
 endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 2db5418d5b0a..4f43080ec54e 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -15,3 +15,5 @@ obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
 obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
 obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
+obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
+obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
new file mode 100644
index 000000000000..979a7c2b4f56
--- /dev/null
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -0,0 +1,584 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CPU PMU driver for the Apple M1 and derivatives
+ *
+ * Copyright (C) 2021 Google LLC
+ *
+ * Author: Marc Zyngier <[email protected]>
+ *
+ * Most of the information used in this driver was provided by the
+ * Asahi Linux project. The rest was experimentally discovered.
+ */
+
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+
+#include <asm/apple_m1_pmu.h>
+#include <asm/irq_regs.h>
+#include <asm/perf_event.h>
+
+#define M1_PMU_NR_COUNTERS		10
+
+#define M1_PMU_CFG_EVENT		GENMASK(7, 0)
+
+#define ANY_BUT_0_1			GENMASK(9, 2)
+#define ONLY_2_TO_7			GENMASK(7, 2)
+#define ONLY_2_4_6			(BIT(2) | BIT(4) | BIT(6))
+#define ONLY_5_6_7			(BIT(5) | BIT(6) | BIT(7))
+
+/*
+ * Description of the events we actually know about, as well as those with
+ * a specific counter affinity. Yes, this is a grand total of two known
+ * counters, and the rest is anybody's guess.
+ *
+ * Not all counters can count all events. Counters #0 and #1 are wired to
+ * count cycles and instructions respectively, and some events have
+ * bizarre mappings (every other counter, or even *one* counter). These
+ * restrictions equally apply to both P and E cores.
+ *
+ * It is worth noting that the PMUs attached to P and E cores are likely
+ * to be different because the underlying uarches are different. At the
+ * moment, we don't really need to distinguish between the two because we
+ * know next to nothing about the events themselves, and we already have
+ * per cpu-type PMU abstractions.
+ *
+ * If we eventually find out that the events are different across
+ * implementations, we'll have to introduce per cpu-type tables.
+ */
+enum m1_pmu_events {
+	M1_PMU_PERFCTR_UNKNOWN_01	= 0x01,
+	M1_PMU_PERFCTR_CPU_CYCLES	= 0x02,
+	M1_PMU_PERFCTR_INSTRUCTIONS	= 0x8c,
+	M1_PMU_PERFCTR_UNKNOWN_8d	= 0x8d,
+	M1_PMU_PERFCTR_UNKNOWN_8e	= 0x8e,
+	M1_PMU_PERFCTR_UNKNOWN_8f	= 0x8f,
+	M1_PMU_PERFCTR_UNKNOWN_90	= 0x90,
+	M1_PMU_PERFCTR_UNKNOWN_93	= 0x93,
+	M1_PMU_PERFCTR_UNKNOWN_94	= 0x94,
+	M1_PMU_PERFCTR_UNKNOWN_95	= 0x95,
+	M1_PMU_PERFCTR_UNKNOWN_96	= 0x96,
+	M1_PMU_PERFCTR_UNKNOWN_97	= 0x97,
+	M1_PMU_PERFCTR_UNKNOWN_98	= 0x98,
+	M1_PMU_PERFCTR_UNKNOWN_99	= 0x99,
+	M1_PMU_PERFCTR_UNKNOWN_9a	= 0x9a,
+	M1_PMU_PERFCTR_UNKNOWN_9b	= 0x9b,
+	M1_PMU_PERFCTR_UNKNOWN_9c	= 0x9c,
+	M1_PMU_PERFCTR_UNKNOWN_9f	= 0x9f,
+	M1_PMU_PERFCTR_UNKNOWN_bf	= 0xbf,
+	M1_PMU_PERFCTR_UNKNOWN_c0	= 0xc0,
+	M1_PMU_PERFCTR_UNKNOWN_c1	= 0xc1,
+	M1_PMU_PERFCTR_UNKNOWN_c4	= 0xc4,
+	M1_PMU_PERFCTR_UNKNOWN_c5	= 0xc5,
+	M1_PMU_PERFCTR_UNKNOWN_c6	= 0xc6,
+	M1_PMU_PERFCTR_UNKNOWN_c8	= 0xc8,
+	M1_PMU_PERFCTR_UNKNOWN_ca	= 0xca,
+	M1_PMU_PERFCTR_UNKNOWN_cb	= 0xcb,
+	M1_PMU_PERFCTR_UNKNOWN_f5	= 0xf5,
+	M1_PMU_PERFCTR_UNKNOWN_f6	= 0xf6,
+	M1_PMU_PERFCTR_UNKNOWN_f7	= 0xf7,
+	M1_PMU_PERFCTR_UNKNOWN_f8	= 0xf8,
+	M1_PMU_PERFCTR_UNKNOWN_fd	= 0xfd,
+	M1_PMU_PERFCTR_LAST		= M1_PMU_CFG_EVENT,
+
+	/*
+	 * From this point onwards, these are not actual HW events,
+	 * but attributes that get stored in hw->config_base.
+	 */
+	M1_PMU_CFG_COUNT_USER		= BIT(8),
+	M1_PMU_CFG_COUNT_KERNEL		= BIT(9),
+};
+
+/*
+ * Per-event affinity table. Most events can be installed on counter
+ * 2-9, but there are a number of exceptions. Note that this table
+ * has been created experimentally, and I wouldn't be surprised if more
+ * counters had strange affinities.
+ */
+static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = {
+	[0 ... M1_PMU_PERFCTR_LAST]	= ANY_BUT_0_1,
+	[M1_PMU_PERFCTR_UNKNOWN_01]	= BIT(7),
+	[M1_PMU_PERFCTR_CPU_CYCLES]	= ANY_BUT_0_1 | BIT(0),
+	[M1_PMU_PERFCTR_INSTRUCTIONS]	= BIT(7) | BIT(1),
+	[M1_PMU_PERFCTR_UNKNOWN_8d]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_8e]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_8f]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_90]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_93]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_94]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_95]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_96]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_97]	= BIT(7),
+	[M1_PMU_PERFCTR_UNKNOWN_98]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_99]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_9a]	= BIT(7),
+	[M1_PMU_PERFCTR_UNKNOWN_9b]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_9c]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_9f]	= BIT(7),
+	[M1_PMU_PERFCTR_UNKNOWN_bf]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c0]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c1]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c4]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c5]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c6]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c8]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_ca]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_cb]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_f5]	= ONLY_2_4_6,
+	[M1_PMU_PERFCTR_UNKNOWN_f6]	= ONLY_2_4_6,
+	[M1_PMU_PERFCTR_UNKNOWN_f7]	= ONLY_2_4_6,
+	[M1_PMU_PERFCTR_UNKNOWN_f8]	= ONLY_2_TO_7,
+	[M1_PMU_PERFCTR_UNKNOWN_fd]	= ONLY_2_4_6,
+};
+
+static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]	= M1_PMU_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	= M1_PMU_PERFCTR_INSTRUCTIONS,
+	/* No idea about the rest yet */
+};
+
+/* sysfs definitions */
+static ssize_t m1_pmu_events_sysfs_show(struct device *dev,
+					struct device_attribute *attr,
+					char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+	return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+}
+
+#define M1_PMU_EVENT_ATTR(name, config)					\
+	PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config)
+
+static struct attribute *m1_pmu_event_attrs[] = {
+	M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES),
+	M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS),
+	NULL,
+};
+
+static const struct attribute_group m1_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = m1_pmu_event_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *m1_pmu_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static const struct attribute_group m1_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = m1_pmu_format_attrs,
+};
+
+/* Low level accessors. No synchronisation. */
+#define PMU_READ_COUNTER(_idx)						\
+	case _idx:	return read_sysreg_s(SYS_IMP_APL_PMC## _idx ##_EL1)
+
+#define PMU_WRITE_COUNTER(_val, _idx)					\
+	case _idx:							\
+		write_sysreg_s(_val, SYS_IMP_APL_PMC## _idx ##_EL1);	\
+		return
+
+static u64 m1_pmu_read_hw_counter(unsigned int index)
+{
+	switch (index) {
+		PMU_READ_COUNTER(0);
+		PMU_READ_COUNTER(1);
+		PMU_READ_COUNTER(2);
+		PMU_READ_COUNTER(3);
+		PMU_READ_COUNTER(4);
+		PMU_READ_COUNTER(5);
+		PMU_READ_COUNTER(6);
+		PMU_READ_COUNTER(7);
+		PMU_READ_COUNTER(8);
+		PMU_READ_COUNTER(9);
+	}
+
+	BUG();
+}
+
+static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
+{
+	switch (index) {
+		PMU_WRITE_COUNTER(val, 0);
+		PMU_WRITE_COUNTER(val, 1);
+		PMU_WRITE_COUNTER(val, 2);
+		PMU_WRITE_COUNTER(val, 3);
+		PMU_WRITE_COUNTER(val, 4);
+		PMU_WRITE_COUNTER(val, 5);
+		PMU_WRITE_COUNTER(val, 6);
+		PMU_WRITE_COUNTER(val, 7);
+		PMU_WRITE_COUNTER(val, 8);
+		PMU_WRITE_COUNTER(val, 9);
+	}
+
+	BUG();
+}
+
+#define get_bit_offset(index, mask)	(__ffs(mask) + (index))
+
+static void __m1_pmu_enable_counter(unsigned int index, bool en)
+{
+	u64 val, bit;
+
+	switch (index) {
+	case 0 ... 7:
+		bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7));
+		break;
+	case 8 ... 9:
+		bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9));
+		break;
+	default:
+		BUG();
+	}
+
+	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+
+	if (en)
+		val |= bit;
+	else
+		val &= ~bit;
+
+	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+}
+
+static void m1_pmu_enable_counter(unsigned int index)
+{
+	__m1_pmu_enable_counter(index, true);
+}
+
+static void m1_pmu_disable_counter(unsigned int index)
+{
+	__m1_pmu_enable_counter(index, false);
+}
+
+static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en)
+{
+	u64 val, bit;
+
+	switch (index) {
+	case 0 ... 7:
+		bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7));
+		break;
+	case 8 ... 9:
+		bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9));
+		break;
+	default:
+		BUG();
+	}
+
+	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+
+	if (en)
+		val |= bit;
+	else
+		val &= ~bit;
+
+	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+}
+
+static void m1_pmu_enable_counter_interrupt(unsigned int index)
+{
+	__m1_pmu_enable_counter_interrupt(index, true);
+}
+
+static void m1_pmu_disable_counter_interrupt(unsigned int index)
+{
+	__m1_pmu_enable_counter_interrupt(index, false);
+}
+
+static void m1_pmu_configure_counter(unsigned int index, u8 event,
+				     bool user, bool kernel)
+{
+	u64 val, user_bit, kernel_bit;
+	int shift;
+
+	switch (index) {
+	case 0 ... 7:
+		user_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL0_0_7));
+		kernel_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL1_0_7));
+		break;
+	case 8 ... 9:
+		user_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL0_8_9));
+		kernel_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL1_8_9));
+		break;
+	default:
+		BUG();
+	}
+
+	val = read_sysreg_s(SYS_IMP_APL_PMCR1_EL1);
+
+	if (user)
+		val |= user_bit;
+	else
+		val &= ~user_bit;
+
+	if (kernel)
+		val |= kernel_bit;
+	else
+		val &= ~kernel_bit;
+
+	write_sysreg_s(val, SYS_IMP_APL_PMCR1_EL1);
+
+	/*
+	 * Counters 0 and 1 have fixed events. For anything else,
+	 * place the event at the expected location in the relevant
+	 * register (PMESR0 holds the event configuration for counters
+	 * 2-5, resp. PMESR1 for counters 6-9).
+	 */
+	switch (index) {
+	case 0 ... 1:
+		break;
+	case 2 ... 5:
+		shift = (index - 2) * 8;
+		val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1);
+		val &= ~((u64)0xff << shift);
+		val |= (u64)event << shift;
+		write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1);
+		break;
+	case 6 ... 9:
+		shift = (index - 6) * 8;
+		val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1);
+		val &= ~((u64)0xff << shift);
+		val |= (u64)event << shift;
+		write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1);
+		break;
+	}
+}
+
+/* arm_pmu backend */
+static void m1_pmu_enable_event(struct perf_event *event)
+{
+	bool user, kernel;
+	u8 evt;
+
+	evt = event->hw.config_base & M1_PMU_CFG_EVENT;
+	user = event->hw.config_base & M1_PMU_CFG_COUNT_USER;
+	kernel = event->hw.config_base & M1_PMU_CFG_COUNT_KERNEL;
+
+	m1_pmu_disable_counter_interrupt(event->hw.idx);
+	m1_pmu_disable_counter(event->hw.idx);
+	isb();
+
+	m1_pmu_configure_counter(event->hw.idx, evt, user, kernel);
+	m1_pmu_enable_counter(event->hw.idx);
+	m1_pmu_enable_counter_interrupt(event->hw.idx);
+	isb();
+}
+
+static void m1_pmu_disable_event(struct perf_event *event)
+{
+	m1_pmu_disable_counter_interrupt(event->hw.idx);
+	m1_pmu_disable_counter(event->hw.idx);
+	isb();
+}
+
+static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+	struct pt_regs *regs;
+	u64 overflow, state;
+	int idx;
+
+	overflow = read_sysreg_s(SYS_IMP_APL_PMSR_EL1);
+	if (!overflow) {
+		/* Spurious interrupt? */
+		state = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+		state &= ~PMCR0_IACT;
+		write_sysreg_s(state, SYS_IMP_APL_PMCR0_EL1);
+		isb();
+		return IRQ_NONE;
+	}
+
+	cpu_pmu->stop(cpu_pmu);
+
+	regs = get_irq_regs();
+
+	for (idx = 0; idx < cpu_pmu->num_events; idx++) {
+		struct perf_event *event = cpuc->events[idx];
+		struct perf_sample_data data;
+
+		if (!event)
+			continue;
+
+		armpmu_event_update(event);
+		perf_sample_data_init(&data, 0, event->hw.last_period);
+		if (!armpmu_event_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			m1_pmu_disable_event(event);
+	}
+
+	cpu_pmu->start(cpu_pmu);
+
+	return IRQ_HANDLED;
+}
+
+static u64 m1_pmu_read_counter(struct perf_event *event)
+{
+	return m1_pmu_read_hw_counter(event->hw.idx);
+}
+
+static void m1_pmu_write_counter(struct perf_event *event, u64 value)
+{
+	m1_pmu_write_hw_counter(value, event->hw.idx);
+	isb();
+}
+
+static int m1_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				struct perf_event *event)
+{
+	unsigned long evtype = event->hw.config_base & M1_PMU_CFG_EVENT;
+	unsigned long affinity = m1_pmu_event_affinity[evtype];
+	int idx;
+
+	/*
+	 * Place the event on the first free counter that can count
+	 * this event.
+	 *
+	 * We could do a better job if we had a view of all the events
+	 * counting on the PMU at any given time, and by placing the
+	 * most constraining events first.
+	 */
+	for_each_set_bit(idx, &affinity, M1_PMU_NR_COUNTERS) {
+		if (!test_and_set_bit(idx, cpuc->used_mask))
+			return idx;
+	}
+
+	return -EAGAIN;
+}
+
+static void m1_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+				   struct perf_event *event)
+{
+	clear_bit(event->hw.idx, cpuc->used_mask);
+}
+
+static void __m1_pmu_set_mode(u8 mode)
+{
+	u64 val;
+
+	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+	val &= ~(PMCR0_IMODE | PMCR0_IACT);
+	val |= FIELD_PREP(PMCR0_IMODE, mode);
+	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+	isb();
+}
+
+static void m1_pmu_start(struct arm_pmu *cpu_pmu)
+{
+	__m1_pmu_set_mode(PMCR0_IMODE_FIQ);
+}
+
+static void m1_pmu_stop(struct arm_pmu *cpu_pmu)
+{
+	__m1_pmu_set_mode(PMCR0_IMODE_OFF);
+}
+
+static int m1_pmu_map_event(struct perf_event *event)
+{
+	/*
+	 * Although the counters are 48bit wide, bit 47 is what
+	 * triggers the overflow interrupt. Advertise the counters
+	 * being 47bit wide to mimick the behaviour of the ARM PMU.
+	 */
+	event->hw.flags |= ARMPMU_EVT_47BIT;
+	return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT);
+}
+
+static void m1_pmu_reset(void *info)
+{
+	int i;
+
+	__m1_pmu_set_mode(PMCR0_IMODE_OFF);
+
+	for (i = 0; i < M1_PMU_NR_COUNTERS; i++) {
+		m1_pmu_disable_counter(i);
+		m1_pmu_disable_counter_interrupt(i);
+		m1_pmu_write_hw_counter(0, i);
+	}
+
+	isb();
+}
+
+static int m1_pmu_set_event_filter(struct hw_perf_event *event,
+				   struct perf_event_attr *attr)
+{
+	unsigned long config_base = 0;
+
+	if (!attr->exclude_guest)
+		return -EINVAL;
+	if (!attr->exclude_kernel)
+		config_base |= M1_PMU_CFG_COUNT_KERNEL;
+	if (!attr->exclude_user)
+		config_base |= M1_PMU_CFG_COUNT_USER;
+
+	event->config_base = config_base;
+
+	return 0;
+}
+
+static int m1_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->handle_irq	  = m1_pmu_handle_irq;
+	cpu_pmu->enable		  = m1_pmu_enable_event;
+	cpu_pmu->disable	  = m1_pmu_disable_event;
+	cpu_pmu->read_counter	  = m1_pmu_read_counter;
+	cpu_pmu->write_counter	  = m1_pmu_write_counter;
+	cpu_pmu->get_event_idx	  = m1_pmu_get_event_idx;
+	cpu_pmu->clear_event_idx  = m1_pmu_clear_event_idx;
+	cpu_pmu->start		  = m1_pmu_start;
+	cpu_pmu->stop		  = m1_pmu_stop;
+	cpu_pmu->map_event	  = m1_pmu_map_event;
+	cpu_pmu->reset		  = m1_pmu_reset;
+	cpu_pmu->set_event_filter = m1_pmu_set_event_filter;
+
+	cpu_pmu->num_events	  = M1_PMU_NR_COUNTERS;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &m1_pmu_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &m1_pmu_format_attr_group;
+	return 0;
+}
+
+/* Device driver gunk */
+static int m1_pmu_ice_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->name = "apple_icestorm_pmu";
+	return m1_pmu_init(cpu_pmu);
+}
+
+static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->name = "apple_firestorm_pmu";
+	return m1_pmu_init(cpu_pmu);
+}
+
+static const struct of_device_id m1_pmu_of_device_ids[] = {
+	{ .compatible = "apple,icestorm-pmu",	.data = m1_pmu_ice_init, },
+	{ .compatible = "apple,firestorm-pmu",	.data = m1_pmu_fire_init, },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, m1_pmu_of_device_ids);
+
+static int m1_pmu_device_probe(struct platform_device *pdev)
+{
+	return arm_pmu_device_probe(pdev, m1_pmu_of_device_ids, NULL);
+}
+
+static struct platform_driver m1_pmu_driver = {
+	.driver		= {
+		.name			= "apple-m1-cpu-pmu",
+		.of_match_table		= m1_pmu_of_device_ids,
+		.suppress_bind_attrs	= true,
+	},
+	.probe		= m1_pmu_device_probe,
+};
+
+module_platform_driver(m1_pmu_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 54aca3a62814..96e09fa40909 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -1096,7 +1096,7 @@ static void cci_pmu_enable(struct pmu *pmu)
 {
 	struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
 	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
-	int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs);
+	bool enabled = !bitmap_empty(hw_events->used_mask, cci_pmu->num_cntrs);
 	unsigned long flags;
 
 	if (!enabled)
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index a96c31604545..40b352e8aa7f 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -1460,8 +1460,7 @@ static irqreturn_t arm_ccn_irq_handler(int irq, void *dev_id)
 static int arm_ccn_probe(struct platform_device *pdev)
 {
 	struct arm_ccn *ccn;
-	struct resource *res;
-	unsigned int irq;
+	int irq;
 	int err;
 
 	ccn = devm_kzalloc(&pdev->dev, sizeof(*ccn), GFP_KERNEL);
@@ -1474,10 +1473,9 @@ static int arm_ccn_probe(struct platform_device *pdev)
 	if (IS_ERR(ccn->base))
 		return PTR_ERR(ccn->base);
 
-	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!res)
-		return -EINVAL;
-	irq = res->start;
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
 
 	/* Check if we can use the interrupt */
 	writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLE,
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 0e48adce57ef..9c1d82be7a2f 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -71,9 +71,11 @@
 #define CMN_DTM_WPn(n)			(0x1A0 + (n) * 0x18)
 #define CMN_DTM_WPn_CONFIG(n)		(CMN_DTM_WPn(n) + 0x00)
 #define CMN_DTM_WPn_CONFIG_WP_DEV_SEL2	GENMASK_ULL(18,17)
-#define CMN_DTM_WPn_CONFIG_WP_COMBINE	BIT(6)
-#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE	BIT(5)
-#define CMN_DTM_WPn_CONFIG_WP_GRP	BIT(4)
+#define CMN_DTM_WPn_CONFIG_WP_COMBINE	BIT(9)
+#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE	BIT(8)
+#define CMN600_WPn_CONFIG_WP_COMBINE	BIT(6)
+#define CMN600_WPn_CONFIG_WP_EXCLUSIVE	BIT(5)
+#define CMN_DTM_WPn_CONFIG_WP_GRP	GENMASK_ULL(5, 4)
 #define CMN_DTM_WPn_CONFIG_WP_CHN_SEL	GENMASK_ULL(3, 1)
 #define CMN_DTM_WPn_CONFIG_WP_DEV_SEL	BIT(0)
 #define CMN_DTM_WPn_VAL(n)		(CMN_DTM_WPn(n) + 0x08)
@@ -155,6 +157,7 @@
 #define CMN_CONFIG_WP_COMBINE		GENMASK_ULL(27, 24)
 #define CMN_CONFIG_WP_DEV_SEL		GENMASK_ULL(50, 48)
 #define CMN_CONFIG_WP_CHN_SEL		GENMASK_ULL(55, 51)
+/* Note that we don't yet support the tertiary match group on newer IPs */
 #define CMN_CONFIG_WP_GRP		BIT_ULL(56)
 #define CMN_CONFIG_WP_EXCLUSIVE		BIT_ULL(57)
 #define CMN_CONFIG1_WP_VAL		GENMASK_ULL(63, 0)
@@ -353,7 +356,7 @@ static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
 	return NULL;
 }
 
-struct dentry *arm_cmn_debugfs;
+static struct dentry *arm_cmn_debugfs;
 
 #ifdef CONFIG_DEBUG_FS
 static const char *arm_cmn_device_type(u8 type)
@@ -595,6 +598,9 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
 		if ((intf & 4) && !(cmn->ports_used & BIT(intf & 3)))
 			return 0;
 
+		if (chan == 4 && cmn->model == CMN600)
+			return 0;
+
 		if ((chan == 5 && cmn->rsp_vc_num < 2) ||
 		    (chan == 6 && cmn->dat_vc_num < 2))
 			return 0;
@@ -905,15 +911,18 @@ static u32 arm_cmn_wp_config(struct perf_event *event)
 	u32 grp = CMN_EVENT_WP_GRP(event);
 	u32 exc = CMN_EVENT_WP_EXCLUSIVE(event);
 	u32 combine = CMN_EVENT_WP_COMBINE(event);
+	bool is_cmn600 = to_cmn(event->pmu)->model == CMN600;
 
 	config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) |
 		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) |
 		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) |
-		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE, exc) |
 		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL2, dev >> 1);
+	if (exc)
+		config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE :
+				      CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE;
 	if (combine && !grp)
-		config |= CMN_DTM_WPn_CONFIG_WP_COMBINE;
-
+		config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_COMBINE :
+				      CMN_DTM_WPn_CONFIG_WP_COMBINE;
 	return config;
 }
 
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 295cc7952d0e..9694370651fa 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -109,6 +109,8 @@ static inline u64 arm_pmu_event_max_period(struct perf_event *event)
 {
 	if (event->hw.flags & ARMPMU_EVT_64BIT)
 		return GENMASK_ULL(63, 0);
+	else if (event->hw.flags & ARMPMU_EVT_47BIT)
+		return GENMASK_ULL(46, 0);
 	else
 		return GENMASK_ULL(31, 0);
 }
@@ -524,7 +526,7 @@ static void armpmu_enable(struct pmu *pmu)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(pmu);
 	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
-	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+	bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events);
 
 	/* For task-bound events we may be called on other CPUs */
 	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
@@ -785,7 +787,7 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
 {
 	struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
 	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
-	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+	bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events);
 
 	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
 		return NOTIFY_DONE;
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index a738aeab5c04..358e4e284a62 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -393,7 +393,7 @@ EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read);
 void hisi_uncore_pmu_enable(struct pmu *pmu)
 {
 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
-	int enabled = bitmap_weight(hisi_pmu->pmu_events.used_mask,
+	bool enabled = !bitmap_empty(hisi_pmu->pmu_events.used_mask,
 				    hisi_pmu->num_counters);
 
 	if (!enabled)
diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c
new file mode 100644
index 000000000000..665b382a0ee3
--- /dev/null
+++ b/drivers/perf/marvell_cn10k_ddr_pmu.c
@@ -0,0 +1,758 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver
+ *
+ * Copyright (C) 2021 Marvell.
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/perf_event.h>
+#include <linux/hrtimer.h>
+
+/* Performance Counters Operating Mode Control Registers */
+#define DDRC_PERF_CNT_OP_MODE_CTRL	0x8020
+#define OP_MODE_CTRL_VAL_MANNUAL	0x1
+
+/* Performance Counters Start Operation Control Registers */
+#define DDRC_PERF_CNT_START_OP_CTRL	0x8028
+#define START_OP_CTRL_VAL_START		0x1ULL
+#define START_OP_CTRL_VAL_ACTIVE	0x2
+
+/* Performance Counters End Operation Control Registers */
+#define DDRC_PERF_CNT_END_OP_CTRL	0x8030
+#define END_OP_CTRL_VAL_END		0x1ULL
+
+/* Performance Counters End Status Registers */
+#define DDRC_PERF_CNT_END_STATUS		0x8038
+#define END_STATUS_VAL_END_TIMER_MODE_END	0x1
+
+/* Performance Counters Configuration Registers */
+#define DDRC_PERF_CFG_BASE		0x8040
+
+/* 8 Generic event counter + 2 fixed event counters */
+#define DDRC_PERF_NUM_GEN_COUNTERS	8
+#define DDRC_PERF_NUM_FIX_COUNTERS	2
+#define DDRC_PERF_READ_COUNTER_IDX	DDRC_PERF_NUM_GEN_COUNTERS
+#define DDRC_PERF_WRITE_COUNTER_IDX	(DDRC_PERF_NUM_GEN_COUNTERS + 1)
+#define DDRC_PERF_NUM_COUNTERS		(DDRC_PERF_NUM_GEN_COUNTERS + \
+					 DDRC_PERF_NUM_FIX_COUNTERS)
+
+/* Generic event counter registers */
+#define DDRC_PERF_CFG(n)		(DDRC_PERF_CFG_BASE + 8 * (n))
+#define EVENT_ENABLE			BIT_ULL(63)
+
+/* Two dedicated event counters for DDR reads and writes */
+#define EVENT_DDR_READS			101
+#define EVENT_DDR_WRITES		100
+
+/*
+ * programmable events IDs in programmable event counters.
+ * DO NOT change these event-id numbers, they are used to
+ * program event bitmap in h/w.
+ */
+#define EVENT_OP_IS_ZQLATCH			55
+#define EVENT_OP_IS_ZQSTART			54
+#define EVENT_OP_IS_TCR_MRR			53
+#define EVENT_OP_IS_DQSOSC_MRR			52
+#define EVENT_OP_IS_DQSOSC_MPC			51
+#define EVENT_VISIBLE_WIN_LIMIT_REACHED_WR	50
+#define EVENT_VISIBLE_WIN_LIMIT_REACHED_RD	49
+#define EVENT_BSM_STARVATION			48
+#define EVENT_BSM_ALLOC				47
+#define EVENT_LPR_REQ_WITH_NOCREDIT		46
+#define EVENT_HPR_REQ_WITH_NOCREDIT		45
+#define EVENT_OP_IS_ZQCS			44
+#define EVENT_OP_IS_ZQCL			43
+#define EVENT_OP_IS_LOAD_MODE			42
+#define EVENT_OP_IS_SPEC_REF			41
+#define EVENT_OP_IS_CRIT_REF			40
+#define EVENT_OP_IS_REFRESH			39
+#define EVENT_OP_IS_ENTER_MPSM			35
+#define EVENT_OP_IS_ENTER_POWERDOWN		31
+#define EVENT_OP_IS_ENTER_SELFREF		27
+#define EVENT_WAW_HAZARD			26
+#define EVENT_RAW_HAZARD			25
+#define EVENT_WAR_HAZARD			24
+#define EVENT_WRITE_COMBINE			23
+#define EVENT_RDWR_TRANSITIONS			22
+#define EVENT_PRECHARGE_FOR_OTHER		21
+#define EVENT_PRECHARGE_FOR_RDWR		20
+#define EVENT_OP_IS_PRECHARGE			19
+#define EVENT_OP_IS_MWR				18
+#define EVENT_OP_IS_WR				17
+#define EVENT_OP_IS_RD				16
+#define EVENT_OP_IS_RD_ACTIVATE			15
+#define EVENT_OP_IS_RD_OR_WR			14
+#define EVENT_OP_IS_ACTIVATE			13
+#define EVENT_WR_XACT_WHEN_CRITICAL		12
+#define EVENT_LPR_XACT_WHEN_CRITICAL		11
+#define EVENT_HPR_XACT_WHEN_CRITICAL		10
+#define EVENT_DFI_RD_DATA_CYCLES		9
+#define EVENT_DFI_WR_DATA_CYCLES		8
+#define EVENT_ACT_BYPASS			7
+#define EVENT_READ_BYPASS			6
+#define EVENT_HIF_HI_PRI_RD			5
+#define EVENT_HIF_RMW				4
+#define EVENT_HIF_RD				3
+#define EVENT_HIF_WR				2
+#define EVENT_HIF_RD_OR_WR			1
+
+/* Event counter value registers */
+#define DDRC_PERF_CNT_VALUE_BASE		0x8080
+#define DDRC_PERF_CNT_VALUE(n)	(DDRC_PERF_CNT_VALUE_BASE + 8 * (n))
+
+/* Fixed event counter enable/disable register */
+#define DDRC_PERF_CNT_FREERUN_EN	0x80C0
+#define DDRC_PERF_FREERUN_WRITE_EN	0x1
+#define DDRC_PERF_FREERUN_READ_EN	0x2
+
+/* Fixed event counter control register */
+#define DDRC_PERF_CNT_FREERUN_CTRL	0x80C8
+#define DDRC_FREERUN_WRITE_CNT_CLR	0x1
+#define DDRC_FREERUN_READ_CNT_CLR	0x2
+
+/* Fixed event counter value register */
+#define DDRC_PERF_CNT_VALUE_WR_OP	0x80D0
+#define DDRC_PERF_CNT_VALUE_RD_OP	0x80D8
+#define DDRC_PERF_CNT_VALUE_OVERFLOW	BIT_ULL(48)
+#define DDRC_PERF_CNT_MAX_VALUE		GENMASK_ULL(48, 0)
+
+struct cn10k_ddr_pmu {
+	struct pmu pmu;
+	void __iomem *base;
+	unsigned int cpu;
+	struct	device *dev;
+	int active_events;
+	struct perf_event *events[DDRC_PERF_NUM_COUNTERS];
+	struct hrtimer hrtimer;
+	struct hlist_node node;
+};
+
+#define to_cn10k_ddr_pmu(p)	container_of(p, struct cn10k_ddr_pmu, pmu)
+
+static ssize_t cn10k_ddr_pmu_event_show(struct device *dev,
+					struct device_attribute *attr,
+					char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+
+}
+
+#define CN10K_DDR_PMU_EVENT_ATTR(_name, _id)				     \
+	PMU_EVENT_ATTR_ID(_name, cn10k_ddr_pmu_event_show, _id)
+
+static struct attribute *cn10k_ddr_perf_events_attrs[] = {
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_or_wr_access, EVENT_HIF_RD_OR_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_wr_access, EVENT_HIF_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_access, EVENT_HIF_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rmw_access, EVENT_HIF_RMW),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_pri_rdaccess, EVENT_HIF_HI_PRI_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_bypass_access, EVENT_READ_BYPASS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_act_bypass_access, EVENT_ACT_BYPASS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dif_wr_data_access, EVENT_DFI_WR_DATA_CYCLES),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dif_rd_data_access, EVENT_DFI_RD_DATA_CYCLES),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hpri_sched_rd_crit_access,
+					EVENT_HPR_XACT_WHEN_CRITICAL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_lpri_sched_rd_crit_access,
+					EVENT_LPR_XACT_WHEN_CRITICAL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_trxn_crit_access,
+					EVENT_WR_XACT_WHEN_CRITICAL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_active_access, EVENT_OP_IS_ACTIVATE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_or_wr_access, EVENT_OP_IS_RD_OR_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_active_access, EVENT_OP_IS_RD_ACTIVATE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_read, EVENT_OP_IS_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_write, EVENT_OP_IS_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_mwr, EVENT_OP_IS_MWR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge, EVENT_OP_IS_PRECHARGE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_rdwr, EVENT_PRECHARGE_FOR_RDWR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_other,
+					EVENT_PRECHARGE_FOR_OTHER),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_rdwr_transitions, EVENT_RDWR_TRANSITIONS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_write_combine, EVENT_WRITE_COMBINE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_war_hazard, EVENT_WAR_HAZARD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_raw_hazard, EVENT_RAW_HAZARD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_waw_hazard, EVENT_WAW_HAZARD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_selfref, EVENT_OP_IS_ENTER_SELFREF),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_powerdown, EVENT_OP_IS_ENTER_POWERDOWN),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_mpsm, EVENT_OP_IS_ENTER_MPSM),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_refresh, EVENT_OP_IS_REFRESH),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_crit_ref, EVENT_OP_IS_CRIT_REF),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_spec_ref, EVENT_OP_IS_SPEC_REF),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_load_mode, EVENT_OP_IS_LOAD_MODE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_zqcl, EVENT_OP_IS_ZQCL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_wr_access, EVENT_OP_IS_ZQCS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hpr_req_with_nocredit,
+					EVENT_HPR_REQ_WITH_NOCREDIT),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_lpr_req_with_nocredit,
+					EVENT_LPR_REQ_WITH_NOCREDIT),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_alloc, EVENT_BSM_ALLOC),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_starvation, EVENT_BSM_STARVATION),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_rd,
+					EVENT_VISIBLE_WIN_LIMIT_REACHED_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_wr,
+					EVENT_VISIBLE_WIN_LIMIT_REACHED_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mpc, EVENT_OP_IS_DQSOSC_MPC),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mrr, EVENT_OP_IS_DQSOSC_MRR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_tcr_mrr, EVENT_OP_IS_TCR_MRR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_zqstart, EVENT_OP_IS_ZQSTART),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_zqlatch, EVENT_OP_IS_ZQLATCH),
+	/* Free run event counters */
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_reads, EVENT_DDR_READS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_writes, EVENT_DDR_WRITES),
+	NULL
+};
+
+static struct attribute_group cn10k_ddr_perf_events_attr_group = {
+	.name = "events",
+	.attrs = cn10k_ddr_perf_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-8");
+
+static struct attribute *cn10k_ddr_perf_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cn10k_ddr_perf_format_attr_group = {
+	.name = "format",
+	.attrs = cn10k_ddr_perf_format_attrs,
+};
+
+static ssize_t cn10k_ddr_perf_cpumask_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct cn10k_ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute cn10k_ddr_perf_cpumask_attr =
+	__ATTR(cpumask, 0444, cn10k_ddr_perf_cpumask_show, NULL);
+
+static struct attribute *cn10k_ddr_perf_cpumask_attrs[] = {
+	&cn10k_ddr_perf_cpumask_attr.attr,
+	NULL,
+};
+
+static struct attribute_group cn10k_ddr_perf_cpumask_attr_group = {
+	.attrs = cn10k_ddr_perf_cpumask_attrs,
+};
+
+static const struct attribute_group *cn10k_attr_groups[] = {
+	&cn10k_ddr_perf_events_attr_group,
+	&cn10k_ddr_perf_format_attr_group,
+	&cn10k_ddr_perf_cpumask_attr_group,
+	NULL,
+};
+
+/* Default poll timeout is 100 sec, which is very sufficient for
+ * 48 bit counter incremented max at 5.6 GT/s, which may take many
+ * hours to overflow.
+ */
+static unsigned long cn10k_ddr_pmu_poll_period_sec = 100;
+module_param_named(poll_period_sec, cn10k_ddr_pmu_poll_period_sec, ulong, 0644);
+
+static ktime_t cn10k_ddr_pmu_timer_period(void)
+{
+	return ms_to_ktime((u64)cn10k_ddr_pmu_poll_period_sec * USEC_PER_SEC);
+}
+
+static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
+{
+	switch (eventid) {
+	case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD:
+	case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH:
+		*event_bitmap = (1ULL << (eventid - 1));
+		break;
+	case EVENT_OP_IS_ENTER_SELFREF:
+	case EVENT_OP_IS_ENTER_POWERDOWN:
+	case EVENT_OP_IS_ENTER_MPSM:
+		*event_bitmap = (0xFULL << (eventid - 1));
+		break;
+	default:
+		pr_err("%s Invalid eventid %d\n", __func__, eventid);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int cn10k_ddr_perf_alloc_counter(struct cn10k_ddr_pmu *pmu,
+					struct perf_event *event)
+{
+	u8 config = event->attr.config;
+	int i;
+
+	/* DDR read free-run counter index */
+	if (config == EVENT_DDR_READS) {
+		pmu->events[DDRC_PERF_READ_COUNTER_IDX] = event;
+		return DDRC_PERF_READ_COUNTER_IDX;
+	}
+
+	/* DDR write free-run counter index */
+	if (config == EVENT_DDR_WRITES) {
+		pmu->events[DDRC_PERF_WRITE_COUNTER_IDX] = event;
+		return DDRC_PERF_WRITE_COUNTER_IDX;
+	}
+
+	/* Allocate DDR generic counters */
+	for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+		if (pmu->events[i] == NULL) {
+			pmu->events[i] = event;
+			return i;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static void cn10k_ddr_perf_free_counter(struct cn10k_ddr_pmu *pmu, int counter)
+{
+	pmu->events[counter] = NULL;
+}
+
+static int cn10k_ddr_perf_event_init(struct perf_event *event)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event)) {
+		dev_info(pmu->dev, "Sampling not supported!\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->cpu < 0) {
+		dev_warn(pmu->dev, "Can't provide per-task data!\n");
+		return -EOPNOTSUPP;
+	}
+
+	/*  We must NOT create groups containing mixed PMUs */
+	if (event->group_leader->pmu != event->pmu &&
+	    !is_software_event(event->group_leader))
+		return -EINVAL;
+
+	/* Set ownership of event to one CPU, same event can not be observed
+	 * on multiple cpus at same time.
+	 */
+	event->cpu = pmu->cpu;
+	hwc->idx = -1;
+	return 0;
+}
+
+static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu,
+					  int counter, bool enable)
+{
+	u32 reg;
+	u64 val;
+
+	if (counter > DDRC_PERF_NUM_COUNTERS) {
+		pr_err("Error: unsupported counter %d\n", counter);
+		return;
+	}
+
+	if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
+		reg = DDRC_PERF_CFG(counter);
+		val = readq_relaxed(pmu->base + reg);
+
+		if (enable)
+			val |= EVENT_ENABLE;
+		else
+			val &= ~EVENT_ENABLE;
+
+		writeq_relaxed(val, pmu->base + reg);
+	} else {
+		val = readq_relaxed(pmu->base + DDRC_PERF_CNT_FREERUN_EN);
+		if (enable) {
+			if (counter == DDRC_PERF_READ_COUNTER_IDX)
+				val |= DDRC_PERF_FREERUN_READ_EN;
+			else
+				val |= DDRC_PERF_FREERUN_WRITE_EN;
+		} else {
+			if (counter == DDRC_PERF_READ_COUNTER_IDX)
+				val &= ~DDRC_PERF_FREERUN_READ_EN;
+			else
+				val &= ~DDRC_PERF_FREERUN_WRITE_EN;
+		}
+		writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_EN);
+	}
+}
+
+static u64 cn10k_ddr_perf_read_counter(struct cn10k_ddr_pmu *pmu, int counter)
+{
+	u64 val;
+
+	if (counter == DDRC_PERF_READ_COUNTER_IDX)
+		return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_RD_OP);
+
+	if (counter == DDRC_PERF_WRITE_COUNTER_IDX)
+		return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_WR_OP);
+
+	val = readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE(counter));
+	return val;
+}
+
+static void cn10k_ddr_perf_event_update(struct perf_event *event)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 prev_count, new_count, mask;
+
+	do {
+		prev_count = local64_read(&hwc->prev_count);
+		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
+	} while (local64_xchg(&hwc->prev_count, new_count) != prev_count);
+
+	mask = DDRC_PERF_CNT_MAX_VALUE;
+
+	local64_add((new_count - prev_count) & mask, &event->count);
+}
+
+static void cn10k_ddr_perf_event_start(struct perf_event *event, int flags)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	local64_set(&hwc->prev_count, 0);
+
+	cn10k_ddr_perf_counter_enable(pmu, counter, true);
+
+	hwc->state = 0;
+}
+
+static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u8 config = event->attr.config;
+	int counter, ret;
+	u32 reg_offset;
+	u64 val;
+
+	counter = cn10k_ddr_perf_alloc_counter(pmu, event);
+	if (counter < 0)
+		return -EAGAIN;
+
+	pmu->active_events++;
+	hwc->idx = counter;
+
+	if (pmu->active_events == 1)
+		hrtimer_start(&pmu->hrtimer, cn10k_ddr_pmu_timer_period(),
+			      HRTIMER_MODE_REL_PINNED);
+
+	if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
+		/* Generic counters, configure event id */
+		reg_offset = DDRC_PERF_CFG(counter);
+		ret = ddr_perf_get_event_bitmap(config, &val);
+		if (ret)
+			return ret;
+
+		writeq_relaxed(val, pmu->base + reg_offset);
+	} else {
+		/* fixed event counter, clear counter value */
+		if (counter == DDRC_PERF_READ_COUNTER_IDX)
+			val = DDRC_FREERUN_READ_CNT_CLR;
+		else
+			val = DDRC_FREERUN_WRITE_CNT_CLR;
+
+		writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_CTRL);
+	}
+
+	hwc->state |= PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		cn10k_ddr_perf_event_start(event, flags);
+
+	return 0;
+}
+
+static void cn10k_ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	cn10k_ddr_perf_counter_enable(pmu, counter, false);
+
+	if (flags & PERF_EF_UPDATE)
+		cn10k_ddr_perf_event_update(event);
+
+	hwc->state |= PERF_HES_STOPPED;
+}
+
+static void cn10k_ddr_perf_event_del(struct perf_event *event, int flags)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	cn10k_ddr_perf_event_stop(event, PERF_EF_UPDATE);
+
+	cn10k_ddr_perf_free_counter(pmu, counter);
+	pmu->active_events--;
+	hwc->idx = -1;
+
+	/* Cancel timer when no events to capture */
+	if (pmu->active_events == 0)
+		hrtimer_cancel(&pmu->hrtimer);
+}
+
+static void cn10k_ddr_perf_pmu_enable(struct pmu *pmu)
+{
+	struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
+
+	writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base +
+		       DDRC_PERF_CNT_START_OP_CTRL);
+}
+
+static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu)
+{
+	struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
+
+	writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base +
+		       DDRC_PERF_CNT_END_OP_CTRL);
+}
+
+static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
+{
+	struct hw_perf_event *hwc;
+	int i;
+
+	for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+		if (pmu->events[i] == NULL)
+			continue;
+
+		cn10k_ddr_perf_event_update(pmu->events[i]);
+	}
+
+	/* Reset previous count as h/w counter are reset */
+	for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+		if (pmu->events[i] == NULL)
+			continue;
+
+		hwc = &pmu->events[i]->hw;
+		local64_set(&hwc->prev_count, 0);
+	}
+}
+
+static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
+{
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	u64 prev_count, new_count;
+	u64 value;
+	int i;
+
+	event = pmu->events[DDRC_PERF_READ_COUNTER_IDX];
+	if (event) {
+		hwc = &event->hw;
+		prev_count = local64_read(&hwc->prev_count);
+		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
+
+		/* Overflow condition is when new count less than
+		 * previous count
+		 */
+		if (new_count < prev_count)
+			cn10k_ddr_perf_event_update(event);
+	}
+
+	event = pmu->events[DDRC_PERF_WRITE_COUNTER_IDX];
+	if (event) {
+		hwc = &event->hw;
+		prev_count = local64_read(&hwc->prev_count);
+		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
+
+		/* Overflow condition is when new count less than
+		 * previous count
+		 */
+		if (new_count < prev_count)
+			cn10k_ddr_perf_event_update(event);
+	}
+
+	for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+		if (pmu->events[i] == NULL)
+			continue;
+
+		value = cn10k_ddr_perf_read_counter(pmu, i);
+		if (value == DDRC_PERF_CNT_MAX_VALUE) {
+			pr_info("Counter-(%d) reached max value\n", i);
+			cn10k_ddr_perf_event_update_all(pmu);
+			cn10k_ddr_perf_pmu_disable(&pmu->pmu);
+			cn10k_ddr_perf_pmu_enable(&pmu->pmu);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart cn10k_ddr_pmu_timer_handler(struct hrtimer *hrtimer)
+{
+	struct cn10k_ddr_pmu *pmu = container_of(hrtimer, struct cn10k_ddr_pmu,
+						 hrtimer);
+	unsigned long flags;
+
+	local_irq_save(flags);
+	cn10k_ddr_pmu_overflow_handler(pmu);
+	local_irq_restore(flags);
+
+	hrtimer_forward_now(hrtimer, cn10k_ddr_pmu_timer_period());
+	return HRTIMER_RESTART;
+}
+
+static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct cn10k_ddr_pmu *pmu = hlist_entry_safe(node, struct cn10k_ddr_pmu,
+						     node);
+	unsigned int target;
+
+	if (cpu != pmu->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+	pmu->cpu = target;
+	return 0;
+}
+
+static int cn10k_ddr_perf_probe(struct platform_device *pdev)
+{
+	struct cn10k_ddr_pmu *ddr_pmu;
+	struct resource *res;
+	void __iomem *base;
+	char *name;
+	int ret;
+
+	ddr_pmu = devm_kzalloc(&pdev->dev, sizeof(*ddr_pmu), GFP_KERNEL);
+	if (!ddr_pmu)
+		return -ENOMEM;
+
+	ddr_pmu->dev = &pdev->dev;
+	platform_set_drvdata(pdev, ddr_pmu);
+
+	base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	ddr_pmu->base = base;
+
+	/* Setup the PMU counter to work in manual mode */
+	writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base +
+		       DDRC_PERF_CNT_OP_MODE_CTRL);
+
+	ddr_pmu->pmu = (struct pmu) {
+		.module	      = THIS_MODULE,
+		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+		.task_ctx_nr = perf_invalid_context,
+		.attr_groups = cn10k_attr_groups,
+		.event_init  = cn10k_ddr_perf_event_init,
+		.add	     = cn10k_ddr_perf_event_add,
+		.del	     = cn10k_ddr_perf_event_del,
+		.start	     = cn10k_ddr_perf_event_start,
+		.stop	     = cn10k_ddr_perf_event_stop,
+		.read	     = cn10k_ddr_perf_event_update,
+		.pmu_enable  = cn10k_ddr_perf_pmu_enable,
+		.pmu_disable = cn10k_ddr_perf_pmu_disable,
+	};
+
+	/* Choose this cpu to collect perf data */
+	ddr_pmu->cpu = raw_smp_processor_id();
+
+	name = devm_kasprintf(ddr_pmu->dev, GFP_KERNEL, "mrvl_ddr_pmu_%llx",
+			      res->start);
+	if (!name)
+		return -ENOMEM;
+
+	hrtimer_init(&ddr_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	ddr_pmu->hrtimer.function = cn10k_ddr_pmu_timer_handler;
+
+	cpuhp_state_add_instance_nocalls(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+				&ddr_pmu->node);
+
+	ret = perf_pmu_register(&ddr_pmu->pmu, name, -1);
+	if (ret)
+		goto error;
+
+	pr_info("CN10K DDR PMU Driver for ddrc@%llx\n", res->start);
+	return 0;
+error:
+	cpuhp_state_remove_instance_nocalls(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+				&ddr_pmu->node);
+	return ret;
+}
+
+static int cn10k_ddr_perf_remove(struct platform_device *pdev)
+{
+	struct cn10k_ddr_pmu *ddr_pmu = platform_get_drvdata(pdev);
+
+	cpuhp_state_remove_instance_nocalls(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+				&ddr_pmu->node);
+
+	perf_pmu_unregister(&ddr_pmu->pmu);
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id cn10k_ddr_pmu_of_match[] = {
+	{ .compatible = "marvell,cn10k-ddr-pmu", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match);
+#endif
+
+static struct platform_driver cn10k_ddr_pmu_driver = {
+	.driver	= {
+		.name   = "cn10k-ddr-pmu",
+		.of_match_table = of_match_ptr(cn10k_ddr_pmu_of_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe		= cn10k_ddr_perf_probe,
+	.remove		= cn10k_ddr_perf_remove,
+};
+
+static int __init cn10k_ddr_pmu_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+				"perf/marvell/cn10k/ddr:online", NULL,
+				cn10k_ddr_pmu_offline_cpu);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&cn10k_ddr_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE);
+	return ret;
+}
+
+static void __exit cn10k_ddr_pmu_exit(void)
+{
+	platform_driver_unregister(&cn10k_ddr_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE);
+}
+
+module_init(cn10k_ddr_pmu_init);
+module_exit(cn10k_ddr_pmu_exit);
+
+MODULE_AUTHOR("Bharat Bhushan <[email protected]>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index 7f4d292658e3..ee67305f822d 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -368,10 +368,12 @@ static int tad_pmu_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_OF
 static const struct of_device_id tad_pmu_of_match[] = {
 	{ .compatible = "marvell,cn10k-tad-pmu", },
 	{},
 };
+#endif
 
 static struct platform_driver tad_pmu_driver = {
 	.driver         = {
diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
index 05378c0fd8f3..1edb9c03704f 100644
--- a/drivers/perf/thunderx2_pmu.c
+++ b/drivers/perf/thunderx2_pmu.c
@@ -887,13 +887,11 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev,
 static acpi_status tx2_uncore_pmu_add(acpi_handle handle, u32 level,
 				    void *data, void **return_value)
 {
+	struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
 	struct tx2_uncore_pmu *tx2_pmu;
-	struct acpi_device *adev;
 	enum tx2_uncore_type type;
 
-	if (acpi_bus_get_device(handle, &adev))
-		return AE_OK;
-	if (acpi_bus_get_status(adev) || !adev->status.present)
+	if (!adev || acpi_bus_get_status(adev) || !adev->status.present)
 		return AE_OK;
 
 	type = get_tx2_pmu_type(adev);
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 2b6d476bd213..0c32dffc7ede 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -867,7 +867,7 @@ static void xgene_perf_pmu_enable(struct pmu *pmu)
 {
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
 	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
-	int enabled = bitmap_weight(pmu_dev->cntr_assign_mask,
+	bool enabled = !bitmap_empty(pmu_dev->cntr_assign_mask,
 			pmu_dev->max_counters);
 
 	if (!enabled)
@@ -1549,14 +1549,12 @@ static const struct acpi_device_id *xgene_pmu_acpi_match_type(
 static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
 				    void *data, void **return_value)
 {
+	struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
 	const struct acpi_device_id *acpi_id;
 	struct xgene_pmu *xgene_pmu = data;
 	struct xgene_pmu_dev_ctx *ctx;
-	struct acpi_device *adev;
 
-	if (acpi_bus_get_device(handle, &adev))
-		return AE_OK;
-	if (acpi_bus_get_status(adev) || !adev->status.present)
+	if (!adev || acpi_bus_get_status(adev) || !adev->status.present)
 		return AE_OK;
 
 	acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev);
diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
index 4d81908d6725..ba536fd4d674 100644
--- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
+++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
@@ -78,7 +78,6 @@ struct npcm7xx_gpio {
 	struct gpio_chip	gc;
 	int			irqbase;
 	int			irq;
-	void			*priv;
 	struct irq_chip		irq_chip;
 	u32			pinctrl_id;
 	int (*direction_input)(struct gpio_chip *chip, unsigned offset);
@@ -226,7 +225,7 @@ static void npcmgpio_irq_handler(struct irq_desc *desc)
 	chained_irq_enter(chip, desc);
 	sts = ioread32(bank->base + NPCM7XX_GP_N_EVST);
 	en  = ioread32(bank->base + NPCM7XX_GP_N_EVEN);
-	dev_dbg(chip->parent_device, "==> got irq sts %.8x %.8x\n", sts,
+	dev_dbg(bank->gc.parent, "==> got irq sts %.8x %.8x\n", sts,
 		en);
 
 	sts &= en;
@@ -241,33 +240,33 @@ static int npcmgpio_set_irq_type(struct irq_data *d, unsigned int type)
 		gpiochip_get_data(irq_data_get_irq_chip_data(d));
 	unsigned int gpio = BIT(d->hwirq);
 
-	dev_dbg(d->chip->parent_device, "setirqtype: %u.%u = %u\n", gpio,
+	dev_dbg(bank->gc.parent, "setirqtype: %u.%u = %u\n", gpio,
 		d->irq, type);
 	switch (type) {
 	case IRQ_TYPE_EDGE_RISING:
-		dev_dbg(d->chip->parent_device, "edge.rising\n");
+		dev_dbg(bank->gc.parent, "edge.rising\n");
 		npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio);
 		npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
 		break;
 	case IRQ_TYPE_EDGE_FALLING:
-		dev_dbg(d->chip->parent_device, "edge.falling\n");
+		dev_dbg(bank->gc.parent, "edge.falling\n");
 		npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio);
 		npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
 		break;
 	case IRQ_TYPE_EDGE_BOTH:
-		dev_dbg(d->chip->parent_device, "edge.both\n");
+		dev_dbg(bank->gc.parent, "edge.both\n");
 		npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio);
 		break;
 	case IRQ_TYPE_LEVEL_LOW:
-		dev_dbg(d->chip->parent_device, "level.low\n");
+		dev_dbg(bank->gc.parent, "level.low\n");
 		npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
 		break;
 	case IRQ_TYPE_LEVEL_HIGH:
-		dev_dbg(d->chip->parent_device, "level.high\n");
+		dev_dbg(bank->gc.parent, "level.high\n");
 		npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
 		break;
 	default:
-		dev_dbg(d->chip->parent_device, "invalid irq type\n");
+		dev_dbg(bank->gc.parent, "invalid irq type\n");
 		return -EINVAL;
 	}
 
@@ -289,7 +288,7 @@ static void npcmgpio_irq_ack(struct irq_data *d)
 		gpiochip_get_data(irq_data_get_irq_chip_data(d));
 	unsigned int gpio = d->hwirq;
 
-	dev_dbg(d->chip->parent_device, "irq_ack: %u.%u\n", gpio, d->irq);
+	dev_dbg(bank->gc.parent, "irq_ack: %u.%u\n", gpio, d->irq);
 	iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVST);
 }
 
@@ -301,7 +300,7 @@ static void npcmgpio_irq_mask(struct irq_data *d)
 	unsigned int gpio = d->hwirq;
 
 	/* Clear events */
-	dev_dbg(d->chip->parent_device, "irq_mask: %u.%u\n", gpio, d->irq);
+	dev_dbg(bank->gc.parent, "irq_mask: %u.%u\n", gpio, d->irq);
 	iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENC);
 }
 
@@ -313,7 +312,7 @@ static void npcmgpio_irq_unmask(struct irq_data *d)
 	unsigned int gpio = d->hwirq;
 
 	/* Enable events */
-	dev_dbg(d->chip->parent_device, "irq_unmask: %u.%u\n", gpio, d->irq);
+	dev_dbg(bank->gc.parent, "irq_unmask: %u.%u\n", gpio, d->irq);
 	iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENS);
 }
 
@@ -323,7 +322,7 @@ static unsigned int npcmgpio_irq_startup(struct irq_data *d)
 	unsigned int gpio = d->hwirq;
 
 	/* active-high, input, clear interrupt, enable interrupt */
-	dev_dbg(d->chip->parent_device, "startup: %u.%u\n", gpio, d->irq);
+	dev_dbg(gc->parent, "startup: %u.%u\n", gpio, d->irq);
 	npcmgpio_direction_input(gc, gpio);
 	npcmgpio_irq_ack(d);
 	npcmgpio_irq_unmask(d);
diff --git a/drivers/pinctrl/pinctrl-starfive.c b/drivers/pinctrl/pinctrl-starfive.c
index 266da41a6162..ab4b2ee9f217 100644
--- a/drivers/pinctrl/pinctrl-starfive.c
+++ b/drivers/pinctrl/pinctrl-starfive.c
@@ -1308,8 +1308,6 @@ static int starfive_probe(struct platform_device *pdev)
 	sfp->gc.base = -1;
 	sfp->gc.ngpio = NR_GPIOS;
 
-	starfive_irq_chip.parent_device = dev;
-
 	sfp->gc.irq.chip = &starfive_irq_chip;
 	sfp->gc.irq.parent_handler = starfive_gpio_irq_handler;
 	sfp->gc.irq.num_parents = 1;
@@ -1330,6 +1328,8 @@ static int starfive_probe(struct platform_device *pdev)
 	if (ret)
 		return dev_err_probe(dev, ret, "could not register gpiochip\n");
 
+	irq_domain_set_pm_device(sfp->gc.irq.domain, dev);
+
 out_pinctrl_enable:
 	return pinctrl_enable(sfp->pctl);
 }
diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index cc6757dfa3f1..c02e7bf643a6 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -171,7 +171,7 @@ static int __pnp_bus_suspend(struct device *dev, pm_message_t state)
 
 	if (pnp_drv->driver.pm && pnp_drv->driver.pm->suspend) {
 		error = pnp_drv->driver.pm->suspend(dev);
-		suspend_report_result(pnp_drv->driver.pm->suspend, error);
+		suspend_report_result(dev, pnp_drv->driver.pm->suspend, error);
 		if (error)
 			return error;
 	}
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index afaf30a3622c..38928ff7472b 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -287,9 +287,9 @@ static acpi_status __init pnpacpi_add_device_handler(acpi_handle handle,
 						     u32 lvl, void *context,
 						     void **rv)
 {
-	struct acpi_device *device;
+	struct acpi_device *device = acpi_fetch_acpi_dev(handle);
 
-	if (acpi_bus_get_device(handle, &device))
+	if (!device)
 		return AE_CTRL_DEPTH;
 	if (acpi_is_pnp_device(device))
 		pnpacpi_add_device(device);
diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index 8242e8c5ed77..515e3ceb3393 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -46,6 +46,7 @@ config IDLE_INJECT
 
 config DTPM
 	bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)"
+	depends on OF
 	help
 	  This enables support for the power capping for the dynamic
 	  thermal power management userspace engine.
@@ -56,4 +57,11 @@ config DTPM_CPU
 	help
 	  This enables support for CPU power limitation based on
 	  energy model.
+
+config DTPM_DEVFREQ
+	bool "Add device power capping based on the energy model"
+	depends on DTPM && ENERGY_MODEL
+	help
+	  This enables support for device power limitation based on
+	  energy model.
 endif
diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile
index fabcf388a8d3..494617cdad88 100644
--- a/drivers/powercap/Makefile
+++ b/drivers/powercap/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_DTPM) += dtpm.o
 obj-$(CONFIG_DTPM_CPU) += dtpm_cpu.o
+obj-$(CONFIG_DTPM_DEVFREQ) += dtpm_devfreq.o
 obj-$(CONFIG_POWERCAP)	+= powercap_sys.o
 obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o
 obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o
diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index 8cb45f2d3d78..ce920f17f45f 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -23,6 +23,9 @@
 #include <linux/powercap.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
+#include <linux/of.h>
+
+#include "dtpm_subsys.h"
 
 #define DTPM_POWER_LIMIT_FLAG 0
 
@@ -48,9 +51,7 @@ static int get_max_power_range_uw(struct powercap_zone *pcz, u64 *max_power_uw)
 {
 	struct dtpm *dtpm = to_dtpm(pcz);
 
-	mutex_lock(&dtpm_lock);
 	*max_power_uw = dtpm->power_max - dtpm->power_min;
-	mutex_unlock(&dtpm_lock);
 
 	return 0;
 }
@@ -80,14 +81,7 @@ static int __get_power_uw(struct dtpm *dtpm, u64 *power_uw)
 
 static int get_power_uw(struct powercap_zone *pcz, u64 *power_uw)
 {
-	struct dtpm *dtpm = to_dtpm(pcz);
-	int ret;
-
-	mutex_lock(&dtpm_lock);
-	ret = __get_power_uw(dtpm, power_uw);
-	mutex_unlock(&dtpm_lock);
-
-	return ret;
+	return __get_power_uw(to_dtpm(pcz), power_uw);
 }
 
 static void __dtpm_rebalance_weight(struct dtpm *dtpm)
@@ -130,7 +124,16 @@ static void __dtpm_add_power(struct dtpm *dtpm)
 	}
 }
 
-static int __dtpm_update_power(struct dtpm *dtpm)
+/**
+ * dtpm_update_power - Update the power on the dtpm
+ * @dtpm: a pointer to a dtpm structure to update
+ *
+ * Function to update the power values of the dtpm node specified in
+ * parameter. These new values will be propagated to the tree.
+ *
+ * Return: zero on success, -EINVAL if the values are inconsistent
+ */
+int dtpm_update_power(struct dtpm *dtpm)
 {
 	int ret;
 
@@ -153,26 +156,6 @@ static int __dtpm_update_power(struct dtpm *dtpm)
 }
 
 /**
- * dtpm_update_power - Update the power on the dtpm
- * @dtpm: a pointer to a dtpm structure to update
- *
- * Function to update the power values of the dtpm node specified in
- * parameter. These new values will be propagated to the tree.
- *
- * Return: zero on success, -EINVAL if the values are inconsistent
- */
-int dtpm_update_power(struct dtpm *dtpm)
-{
-	int ret;
-
-	mutex_lock(&dtpm_lock);
-	ret = __dtpm_update_power(dtpm);
-	mutex_unlock(&dtpm_lock);
-
-	return ret;
-}
-
-/**
  * dtpm_release_zone - Cleanup when the node is released
  * @pcz: a pointer to a powercap_zone structure
  *
@@ -188,48 +171,28 @@ int dtpm_release_zone(struct powercap_zone *pcz)
 	struct dtpm *dtpm = to_dtpm(pcz);
 	struct dtpm *parent = dtpm->parent;
 
-	mutex_lock(&dtpm_lock);
-
-	if (!list_empty(&dtpm->children)) {
-		mutex_unlock(&dtpm_lock);
+	if (!list_empty(&dtpm->children))
 		return -EBUSY;
-	}
 
 	if (parent)
 		list_del(&dtpm->sibling);
 
 	__dtpm_sub_power(dtpm);
 
-	mutex_unlock(&dtpm_lock);
-
 	if (dtpm->ops)
 		dtpm->ops->release(dtpm);
+	else
+		kfree(dtpm);
 
-	if (root == dtpm)
-		root = NULL;
-
-	kfree(dtpm);
-
-	return 0;
-}
-
-static int __get_power_limit_uw(struct dtpm *dtpm, int cid, u64 *power_limit)
-{
-	*power_limit = dtpm->power_limit;
 	return 0;
 }
 
 static int get_power_limit_uw(struct powercap_zone *pcz,
 			      int cid, u64 *power_limit)
 {
-	struct dtpm *dtpm = to_dtpm(pcz);
-	int ret;
-
-	mutex_lock(&dtpm_lock);
-	ret = __get_power_limit_uw(dtpm, cid, power_limit);
-	mutex_unlock(&dtpm_lock);
-
-	return ret;
+	*power_limit = to_dtpm(pcz)->power_limit;
+	
+	return 0;
 }
 
 /*
@@ -289,7 +252,7 @@ static int __set_power_limit_uw(struct dtpm *dtpm, int cid, u64 power_limit)
 
 			ret = __set_power_limit_uw(child, cid, power);
 			if (!ret)
-				ret = __get_power_limit_uw(child, cid, &power);
+				ret = get_power_limit_uw(&child->zone, cid, &power);
 
 			if (ret)
 				break;
@@ -307,8 +270,6 @@ static int set_power_limit_uw(struct powercap_zone *pcz,
 	struct dtpm *dtpm = to_dtpm(pcz);
 	int ret;
 
-	mutex_lock(&dtpm_lock);
-
 	/*
 	 * Don't allow values outside of the power range previously
 	 * set when initializing the power numbers.
@@ -320,8 +281,6 @@ static int set_power_limit_uw(struct powercap_zone *pcz,
 	pr_debug("%s: power limit: %llu uW, power max: %llu uW\n",
 		 dtpm->zone.name, dtpm->power_limit, dtpm->power_max);
 
-	mutex_unlock(&dtpm_lock);
-
 	return ret;
 }
 
@@ -332,11 +291,7 @@ static const char *get_constraint_name(struct powercap_zone *pcz, int cid)
 
 static int get_max_power_uw(struct powercap_zone *pcz, int id, u64 *max_power)
 {
-	struct dtpm *dtpm = to_dtpm(pcz);
-
-	mutex_lock(&dtpm_lock);
-	*max_power = dtpm->power_max;
-	mutex_unlock(&dtpm_lock);
+	*max_power = to_dtpm(pcz)->power_max;
 
 	return 0;
 }
@@ -439,8 +394,6 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 	if (IS_ERR(pcz))
 		return PTR_ERR(pcz);
 
-	mutex_lock(&dtpm_lock);
-
 	if (parent) {
 		list_add_tail(&dtpm->sibling, &parent->children);
 		dtpm->parent = parent;
@@ -456,19 +409,253 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 	pr_debug("Registered dtpm node '%s' / %llu-%llu uW, \n",
 		 dtpm->zone.name, dtpm->power_min, dtpm->power_max);
 
-	mutex_unlock(&dtpm_lock);
+	return 0;
+}
+
+static struct dtpm *dtpm_setup_virtual(const struct dtpm_node *hierarchy,
+				       struct dtpm *parent)
+{
+	struct dtpm *dtpm;
+	int ret;
+
+	dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL);
+	if (!dtpm)
+		return ERR_PTR(-ENOMEM);
+	dtpm_init(dtpm, NULL);
+
+	ret = dtpm_register(hierarchy->name, dtpm, parent);
+	if (ret) {
+		pr_err("Failed to register dtpm node '%s': %d\n",
+		       hierarchy->name, ret);
+		kfree(dtpm);
+		return ERR_PTR(ret);
+	}
+
+	return dtpm;
+}
+
+static struct dtpm *dtpm_setup_dt(const struct dtpm_node *hierarchy,
+				  struct dtpm *parent)
+{
+	struct device_node *np;
+	int i, ret;
+
+	np = of_find_node_by_path(hierarchy->name);
+	if (!np) {
+		pr_err("Failed to find '%s'\n", hierarchy->name);
+		return ERR_PTR(-ENXIO);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) {
+
+		if (!dtpm_subsys[i]->setup)
+			continue;
+
+		ret = dtpm_subsys[i]->setup(parent, np);
+		if (ret) {
+			pr_err("Failed to setup '%s': %d\n", dtpm_subsys[i]->name, ret);
+			of_node_put(np);
+			return ERR_PTR(ret);
+		}
+	}
+
+	of_node_put(np);
+
+	/*
+	 * By returning a NULL pointer, we let know the caller there
+	 * is no child for us as we are a leaf of the tree
+	 */
+	return NULL;
+}
+
+typedef struct dtpm * (*dtpm_node_callback_t)(const struct dtpm_node *, struct dtpm *);
+
+static dtpm_node_callback_t dtpm_node_callback[] = {
+	[DTPM_NODE_VIRTUAL] = dtpm_setup_virtual,
+	[DTPM_NODE_DT] = dtpm_setup_dt,
+};
+
+static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
+			       const struct dtpm_node *it, struct dtpm *parent)
+{
+	struct dtpm *dtpm;
+	int i, ret;
+
+	for (i = 0; hierarchy[i].name; i++) {
+
+		if (hierarchy[i].parent != it)
+			continue;
+
+		dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent);
+
+		/*
+		 * A NULL pointer means there is no children, hence we
+		 * continue without going deeper in the recursivity.
+		 */
+		if (!dtpm)
+			continue;
+
+		/*
+		 * There are multiple reasons why the callback could
+		 * fail. The generic glue is abstracting the backend
+		 * and therefore it is not possible to report back or
+		 * take a decision based on the error.  In any case,
+		 * if this call fails, it is not critical in the
+		 * hierarchy creation, we can assume the underlying
+		 * service is not found, so we continue without this
+		 * branch in the tree but with a warning to log the
+		 * information the node was not created.
+		 */
+		if (IS_ERR(dtpm)) {
+			pr_warn("Failed to create '%s' in the hierarchy\n",
+				hierarchy[i].name);
+			continue;
+		}
+
+		ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm);
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
 
-static int __init init_dtpm(void)
+/**
+ * dtpm_create_hierarchy - Create the dtpm hierarchy
+ * @hierarchy: An array of struct dtpm_node describing the hierarchy
+ *
+ * The function is called by the platform specific code with the
+ * description of the different node in the hierarchy. It creates the
+ * tree in the sysfs filesystem under the powercap dtpm entry.
+ *
+ * The expected tree has the format:
+ *
+ * struct dtpm_node hierarchy[] = {
+ *	[0] { .name = "topmost", type =  DTPM_NODE_VIRTUAL },
+ *	[1] { .name = "package", .type = DTPM_NODE_VIRTUAL, .parent = &hierarchy[0] },
+ *	[2] { .name = "/cpus/cpu0", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ *	[3] { .name = "/cpus/cpu1", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ *	[4] { .name = "/cpus/cpu2", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ *	[5] { .name = "/cpus/cpu3", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ *	[6] { }
+ * };
+ *
+ * The last element is always an empty one and marks the end of the
+ * array.
+ *
+ * Return: zero on success, a negative value in case of error. Errors
+ * are reported back from the underlying functions.
+ */
+int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table)
 {
+	const struct of_device_id *match;
+	const struct dtpm_node *hierarchy;
+	struct device_node *np;
+	int i, ret;
+
+	mutex_lock(&dtpm_lock);
+
+	if (pct) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
 	pct = powercap_register_control_type(NULL, "dtpm", NULL);
 	if (IS_ERR(pct)) {
 		pr_err("Failed to register control type\n");
-		return PTR_ERR(pct);
+		ret = PTR_ERR(pct);
+		goto out_pct;
 	}
 
+	ret = -ENODEV;
+	np = of_find_node_by_path("/");
+	if (!np)
+		goto out_err;
+
+	match = of_match_node(dtpm_match_table, np);
+
+	of_node_put(np);
+
+	if (!match)
+		goto out_err;
+
+	hierarchy = match->data;
+	if (!hierarchy) {
+		ret = -EFAULT;
+		goto out_err;
+	}
+
+	ret = dtpm_for_each_child(hierarchy, NULL, NULL);
+	if (ret)
+		goto out_err;
+	
+	for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) {
+
+		if (!dtpm_subsys[i]->init)
+			continue;
+
+		ret = dtpm_subsys[i]->init();
+		if (ret)
+			pr_info("Failed to initialize '%s': %d",
+				dtpm_subsys[i]->name, ret);
+	}
+
+	mutex_unlock(&dtpm_lock);
+
 	return 0;
+
+out_err:
+	powercap_unregister_control_type(pct);
+out_pct:
+	pct = NULL;
+out_unlock:
+	mutex_unlock(&dtpm_lock);
+	
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dtpm_create_hierarchy);
+
+static void __dtpm_destroy_hierarchy(struct dtpm *dtpm)
+{
+	struct dtpm *child, *aux;
+
+	list_for_each_entry_safe(child, aux, &dtpm->children, sibling)
+		__dtpm_destroy_hierarchy(child);
+
+	/*
+	 * At this point, we know all children were removed from the
+	 * recursive call before
+	 */
+	dtpm_unregister(dtpm);
+}
+
+void dtpm_destroy_hierarchy(void)
+{
+	int i;
+
+	mutex_lock(&dtpm_lock);
+
+	if (!pct)
+		goto out_unlock;
+
+	__dtpm_destroy_hierarchy(root);
+	
+
+	for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) {
+
+		if (!dtpm_subsys[i]->exit)
+			continue;
+
+		dtpm_subsys[i]->exit();
+	}
+
+	powercap_unregister_control_type(pct);
+
+	pct = NULL;
+
+	root = NULL;
+
+out_unlock:
+	mutex_unlock(&dtpm_lock);
 }
-late_initcall(init_dtpm);
+EXPORT_SYMBOL_GPL(dtpm_destroy_hierarchy);
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index b740866b228d..bca2f912d349 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -21,6 +21,7 @@
 #include <linux/cpuhotplug.h>
 #include <linux/dtpm.h>
 #include <linux/energy_model.h>
+#include <linux/of.h>
 #include <linux/pm_qos.h>
 #include <linux/slab.h>
 #include <linux/units.h>
@@ -150,10 +151,17 @@ static int update_pd_power_uw(struct dtpm *dtpm)
 static void pd_release(struct dtpm *dtpm)
 {
 	struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
+	struct cpufreq_policy *policy;
 
 	if (freq_qos_request_active(&dtpm_cpu->qos_req))
 		freq_qos_remove_request(&dtpm_cpu->qos_req);
 
+	policy = cpufreq_cpu_get(dtpm_cpu->cpu);
+	if (policy) {
+		for_each_cpu(dtpm_cpu->cpu, policy->related_cpus)
+			per_cpu(dtpm_per_cpu, dtpm_cpu->cpu) = NULL;
+	}
+	
 	kfree(dtpm_cpu);
 }
 
@@ -178,11 +186,26 @@ static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
 static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 {
 	struct dtpm_cpu *dtpm_cpu;
+
+	dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
+	if (dtpm_cpu)
+		return dtpm_update_power(&dtpm_cpu->dtpm);
+
+	return 0;
+}
+
+static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
+{
+	struct dtpm_cpu *dtpm_cpu;
 	struct cpufreq_policy *policy;
 	struct em_perf_domain *pd;
 	char name[CPUFREQ_NAME_LEN];
 	int ret = -ENOMEM;
 
+	dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
+	if (dtpm_cpu)
+		return 0;
+
 	policy = cpufreq_cpu_get(cpu);
 	if (!policy)
 		return 0;
@@ -191,10 +214,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 	if (!pd)
 		return -EINVAL;
 
-	dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
-	if (dtpm_cpu)
-		return dtpm_update_power(&dtpm_cpu->dtpm);
-
 	dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
 	if (!dtpm_cpu)
 		return -ENOMEM;
@@ -207,7 +226,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 
 	snprintf(name, sizeof(name), "cpu%d-cpufreq", dtpm_cpu->cpu);
 
-	ret = dtpm_register(name, &dtpm_cpu->dtpm, NULL);
+	ret = dtpm_register(name, &dtpm_cpu->dtpm, parent);
 	if (ret)
 		goto out_kfree_dtpm_cpu;
 
@@ -231,7 +250,18 @@ out_kfree_dtpm_cpu:
 	return ret;
 }
 
-static int __init dtpm_cpu_init(void)
+static int dtpm_cpu_setup(struct dtpm *dtpm, struct device_node *np)
+{
+	int cpu;
+
+	cpu = of_cpu_node_to_id(np);
+	if (cpu < 0)
+		return 0;
+
+	return __dtpm_cpu_setup(cpu, dtpm);
+}
+
+static int dtpm_cpu_init(void)
 {
 	int ret;
 
@@ -269,4 +299,15 @@ static int __init dtpm_cpu_init(void)
 	return 0;
 }
 
-DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init);
+static void dtpm_cpu_exit(void)
+{
+	cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN);
+	cpuhp_remove_state_nocalls(CPUHP_AP_DTPM_CPU_DEAD);
+}
+
+struct dtpm_subsys_ops dtpm_cpu_ops = {
+	.name = KBUILD_MODNAME,
+	.init = dtpm_cpu_init,
+	.exit = dtpm_cpu_exit,
+	.setup = dtpm_cpu_setup,
+};
diff --git a/drivers/powercap/dtpm_devfreq.c b/drivers/powercap/dtpm_devfreq.c
new file mode 100644
index 000000000000..91276761a31d
--- /dev/null
+++ b/drivers/powercap/dtpm_devfreq.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021 Linaro Limited
+ *
+ * Author: Daniel Lezcano <[email protected]>
+ *
+ * The devfreq device combined with the energy model and the load can
+ * give an estimation of the power consumption as well as limiting the
+ * power.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpumask.h>
+#include <linux/devfreq.h>
+#include <linux/dtpm.h>
+#include <linux/energy_model.h>
+#include <linux/of.h>
+#include <linux/pm_qos.h>
+#include <linux/slab.h>
+#include <linux/units.h>
+
+struct dtpm_devfreq {
+	struct dtpm dtpm;
+	struct dev_pm_qos_request qos_req;
+	struct devfreq *devfreq;
+};
+
+static struct dtpm_devfreq *to_dtpm_devfreq(struct dtpm *dtpm)
+{
+	return container_of(dtpm, struct dtpm_devfreq, dtpm);
+}
+
+static int update_pd_power_uw(struct dtpm *dtpm)
+{
+	struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+	struct devfreq *devfreq = dtpm_devfreq->devfreq;
+	struct device *dev = devfreq->dev.parent;
+	struct em_perf_domain *pd = em_pd_get(dev);
+
+	dtpm->power_min = pd->table[0].power;
+	dtpm->power_min *= MICROWATT_PER_MILLIWATT;
+
+	dtpm->power_max = pd->table[pd->nr_perf_states - 1].power;
+	dtpm->power_max *= MICROWATT_PER_MILLIWATT;
+
+	return 0;
+}
+
+static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
+{
+	struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+	struct devfreq *devfreq = dtpm_devfreq->devfreq;
+	struct device *dev = devfreq->dev.parent;
+	struct em_perf_domain *pd = em_pd_get(dev);
+	unsigned long freq;
+	u64 power;
+	int i;
+
+	for (i = 0; i < pd->nr_perf_states; i++) {
+
+		power = pd->table[i].power * MICROWATT_PER_MILLIWATT;
+		if (power > power_limit)
+			break;
+	}
+
+	freq = pd->table[i - 1].frequency;
+
+	dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq);
+
+	power_limit = pd->table[i - 1].power * MICROWATT_PER_MILLIWATT;
+
+	return power_limit;
+}
+
+static void _normalize_load(struct devfreq_dev_status *status)
+{
+	if (status->total_time > 0xfffff) {
+		status->total_time >>= 10;
+		status->busy_time >>= 10;
+	}
+
+	status->busy_time <<= 10;
+	status->busy_time /= status->total_time ? : 1;
+
+	status->busy_time = status->busy_time ? : 1;
+	status->total_time = 1024;
+}
+
+static u64 get_pd_power_uw(struct dtpm *dtpm)
+{
+	struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+	struct devfreq *devfreq = dtpm_devfreq->devfreq;
+	struct device *dev = devfreq->dev.parent;
+	struct em_perf_domain *pd = em_pd_get(dev);
+	struct devfreq_dev_status status;
+	unsigned long freq;
+	u64 power;
+	int i;
+
+	mutex_lock(&devfreq->lock);
+	status = devfreq->last_status;
+	mutex_unlock(&devfreq->lock);
+
+	freq = DIV_ROUND_UP(status.current_frequency, HZ_PER_KHZ);
+	_normalize_load(&status);
+
+	for (i = 0; i < pd->nr_perf_states; i++) {
+
+		if (pd->table[i].frequency < freq)
+			continue;
+
+		power = pd->table[i].power * MICROWATT_PER_MILLIWATT;
+		power *= status.busy_time;
+		power >>= 10;
+
+		return power;
+	}
+
+	return 0;
+}
+
+static void pd_release(struct dtpm *dtpm)
+{
+	struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+
+	if (dev_pm_qos_request_active(&dtpm_devfreq->qos_req))
+		dev_pm_qos_remove_request(&dtpm_devfreq->qos_req);
+
+	kfree(dtpm_devfreq);
+}
+
+static struct dtpm_ops dtpm_ops = {
+	.set_power_uw = set_pd_power_limit,
+	.get_power_uw = get_pd_power_uw,
+	.update_power_uw = update_pd_power_uw,
+	.release = pd_release,
+};
+
+static int __dtpm_devfreq_setup(struct devfreq *devfreq, struct dtpm *parent)
+{
+	struct device *dev = devfreq->dev.parent;
+	struct dtpm_devfreq *dtpm_devfreq;
+	struct em_perf_domain *pd;
+	int ret = -ENOMEM;
+
+	pd = em_pd_get(dev);
+	if (!pd) {
+		ret = dev_pm_opp_of_register_em(dev, NULL);
+		if (ret) {
+			pr_err("No energy model available for '%s'\n", dev_name(dev));
+			return -EINVAL;
+		}
+	}
+
+	dtpm_devfreq = kzalloc(sizeof(*dtpm_devfreq), GFP_KERNEL);
+	if (!dtpm_devfreq)
+		return -ENOMEM;
+
+	dtpm_init(&dtpm_devfreq->dtpm, &dtpm_ops);
+
+	dtpm_devfreq->devfreq = devfreq;
+
+	ret = dtpm_register(dev_name(dev), &dtpm_devfreq->dtpm, parent);
+	if (ret) {
+		pr_err("Failed to register '%s': %d\n", dev_name(dev), ret);
+		kfree(dtpm_devfreq);
+		return ret;
+	}
+
+	ret = dev_pm_qos_add_request(dev, &dtpm_devfreq->qos_req,
+				     DEV_PM_QOS_MAX_FREQUENCY,
+				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
+	if (ret) {
+		pr_err("Failed to add QoS request: %d\n", ret);
+		goto out_dtpm_unregister;
+	}
+
+	dtpm_update_power(&dtpm_devfreq->dtpm);
+
+	return 0;
+
+out_dtpm_unregister:
+	dtpm_unregister(&dtpm_devfreq->dtpm);
+
+	return ret;
+}
+
+static int dtpm_devfreq_setup(struct dtpm *dtpm, struct device_node *np)
+{
+	struct devfreq *devfreq;
+
+	devfreq = devfreq_get_devfreq_by_node(np);
+	if (IS_ERR(devfreq))
+		return 0;
+
+	return __dtpm_devfreq_setup(devfreq, dtpm);
+}
+
+struct dtpm_subsys_ops dtpm_devfreq_ops = {
+	.name = KBUILD_MODNAME,
+	.setup = dtpm_devfreq_setup,
+};
diff --git a/drivers/powercap/dtpm_subsys.h b/drivers/powercap/dtpm_subsys.h
new file mode 100644
index 000000000000..db1712938a96
--- /dev/null
+++ b/drivers/powercap/dtpm_subsys.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Linaro Ltd
+ *
+ * Author: Daniel Lezcano <[email protected]>
+ */
+#ifndef ___DTPM_SUBSYS_H__
+#define ___DTPM_SUBSYS_H__
+
+extern struct dtpm_subsys_ops dtpm_cpu_ops;
+extern struct dtpm_subsys_ops dtpm_devfreq_ops;
+
+struct dtpm_subsys_ops *dtpm_subsys[] = {
+#ifdef CONFIG_DTPM_CPU
+	&dtpm_cpu_ops,
+#endif
+#ifdef CONFIG_DTPM_DEVFREQ
+	&dtpm_devfreq_ops,
+#endif
+};
+
+#endif
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 88c549f257db..40a52feb315d 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -986,8 +986,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
 	CMD_SP(sc) = NULL;
 	CMD_FLAGS(sc) |= FNIC_IO_DONE;
 
-	spin_unlock_irqrestore(io_lock, flags);
-
 	if (hdr_status != FCPIO_SUCCESS) {
 		atomic64_inc(&fnic_stats->io_stats.io_failures);
 		shost_printk(KERN_ERR, fnic->lport->host, "hdr status = %s\n",
@@ -996,8 +994,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
 
 	fnic_release_ioreq_buf(fnic, io_req, sc);
 
-	mempool_free(io_req, fnic->io_req_pool);
-
 	cmd_trace = ((u64)hdr_status << 56) |
 		  (u64)icmnd_cmpl->scsi_status << 48 |
 		  (u64)icmnd_cmpl->flags << 40 | (u64)sc->cmnd[0] << 32 |
@@ -1021,6 +1017,12 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
 	} else
 		fnic->lport->host_stats.fcp_control_requests++;
 
+	/* Call SCSI completion function to complete the IO */
+	scsi_done(sc);
+	spin_unlock_irqrestore(io_lock, flags);
+
+	mempool_free(io_req, fnic->io_req_pool);
+
 	atomic64_dec(&fnic_stats->io_stats.active_ios);
 	if (atomic64_read(&fnic->io_cmpl_skip))
 		atomic64_dec(&fnic->io_cmpl_skip);
@@ -1049,9 +1051,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
 		if(io_duration_time > atomic64_read(&fnic_stats->io_stats.current_max_io_time))
 			atomic64_set(&fnic_stats->io_stats.current_max_io_time, io_duration_time);
 	}
-
-	/* Call SCSI completion function to complete the IO */
-	scsi_done(sc);
 }
 
 /* fnic_fcpio_itmf_cmpl_handler
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 511726f92d9a..76229b839560 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -2011,9 +2011,10 @@ mpt3sas_base_sync_reply_irqs(struct MPT3SAS_ADAPTER *ioc, u8 poll)
 				enable_irq(reply_q->os_irq);
 			}
 		}
+
+		if (poll)
+			_base_process_reply_queue(reply_q);
 	}
-	if (poll)
-		_base_process_reply_queue(reply_q);
 }
 
 /**
diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
index 12c10a5e3d93..7f421600cb66 100644
--- a/drivers/scsi/xen-scsifront.c
+++ b/drivers/scsi/xen-scsifront.c
@@ -233,12 +233,11 @@ static void scsifront_gnttab_done(struct vscsifrnt_info *info,
 		return;
 
 	for (i = 0; i < shadow->nr_grants; i++) {
-		if (unlikely(gnttab_query_foreign_access(shadow->gref[i]))) {
+		if (unlikely(!gnttab_try_end_foreign_access(shadow->gref[i]))) {
 			shost_printk(KERN_ALERT, info->host, KBUILD_MODNAME
 				     "grant still in use by backend\n");
 			BUG();
 		}
-		gnttab_end_foreign_access(shadow->gref[i], 0, 0UL);
 	}
 
 	kfree(shadow->sg);
diff --git a/drivers/soc/mediatek/mt8192-mmsys.h b/drivers/soc/mediatek/mt8192-mmsys.h
index 6f0a57044a7b..6aae0b12b6ff 100644
--- a/drivers/soc/mediatek/mt8192-mmsys.h
+++ b/drivers/soc/mediatek/mt8192-mmsys.h
@@ -53,7 +53,8 @@ static const struct mtk_mmsys_routes mmsys_mt8192_routing_table[] = {
 		MT8192_AAL0_SEL_IN_CCORR0
 	}, {
 		DDP_COMPONENT_DITHER, DDP_COMPONENT_DSI0,
-		MT8192_DISP_DSI0_SEL_IN, MT8192_DSI0_SEL_IN_DITHER0
+		MT8192_DISP_DSI0_SEL_IN, MT8192_DSI0_SEL_IN_DITHER0,
+		MT8192_DSI0_SEL_IN_DITHER0
 	}, {
 		DDP_COMPONENT_RDMA0, DDP_COMPONENT_COLOR0,
 		MT8192_DISP_RDMA0_SOUT_SEL, MT8192_RDMA0_SOUT_COLOR0,
diff --git a/drivers/soc/rockchip/Kconfig b/drivers/soc/rockchip/Kconfig
index 25eb2c1e31bb..156ac0e0c8fe 100644
--- a/drivers/soc/rockchip/Kconfig
+++ b/drivers/soc/rockchip/Kconfig
@@ -34,4 +34,12 @@ config ROCKCHIP_PM_DOMAINS
 
           If unsure, say N.
 
+config ROCKCHIP_DTPM
+	tristate "Rockchip DTPM hierarchy"
+	depends on DTPM && m
+	help
+	 Describe the hierarchy for the Dynamic Thermal Power
+	 Management tree on this platform. That will create all the
+	 power capping capable devices.
+
 endif
diff --git a/drivers/soc/rockchip/Makefile b/drivers/soc/rockchip/Makefile
index 875032f7344e..05f31a4e743c 100644
--- a/drivers/soc/rockchip/Makefile
+++ b/drivers/soc/rockchip/Makefile
@@ -5,3 +5,4 @@
 obj-$(CONFIG_ROCKCHIP_GRF) += grf.o
 obj-$(CONFIG_ROCKCHIP_IODOMAIN) += io-domain.o
 obj-$(CONFIG_ROCKCHIP_PM_DOMAINS) += pm_domains.o
+obj-$(CONFIG_ROCKCHIP_DTPM) += dtpm.o
diff --git a/drivers/soc/rockchip/dtpm.c b/drivers/soc/rockchip/dtpm.c
new file mode 100644
index 000000000000..5a23784b5221
--- /dev/null
+++ b/drivers/soc/rockchip/dtpm.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021 Linaro Limited
+ *
+ * Author: Daniel Lezcano <[email protected]>
+ *
+ * DTPM hierarchy description
+ */
+#include <linux/dtpm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+static struct dtpm_node __initdata rk3399_hierarchy[] = {
+	[0]{ .name = "rk3399",
+	     .type = DTPM_NODE_VIRTUAL },
+	[1]{ .name = "package",
+	     .type = DTPM_NODE_VIRTUAL,
+	     .parent = &rk3399_hierarchy[0] },
+	[2]{ .name = "/cpus/cpu@0",
+	     .type = DTPM_NODE_DT,
+	     .parent = &rk3399_hierarchy[1] },
+	[3]{ .name = "/cpus/cpu@1",
+	     .type = DTPM_NODE_DT,
+	     .parent = &rk3399_hierarchy[1] },
+	[4]{ .name = "/cpus/cpu@2",
+	     .type = DTPM_NODE_DT,
+	     .parent = &rk3399_hierarchy[1] },
+	[5]{ .name = "/cpus/cpu@3",
+	     .type = DTPM_NODE_DT,
+	     .parent = &rk3399_hierarchy[1] },
+	[6]{ .name = "/cpus/cpu@100",
+	     .type = DTPM_NODE_DT,
+	     .parent = &rk3399_hierarchy[1] },
+	[7]{ .name = "/cpus/cpu@101",
+	     .type = DTPM_NODE_DT,
+	     .parent = &rk3399_hierarchy[1] },
+	[8]{ .name = "/gpu@ff9a0000",
+	     .type = DTPM_NODE_DT,
+	     .parent = &rk3399_hierarchy[1] },
+	[9]{ /* sentinel */ }
+};
+
+static struct of_device_id __initdata rockchip_dtpm_match_table[] = {
+        { .compatible = "rockchip,rk3399", .data = rk3399_hierarchy },
+        {},
+};
+
+static int __init rockchip_dtpm_init(void)
+{
+	return dtpm_create_hierarchy(rockchip_dtpm_match_table);
+}
+module_init(rockchip_dtpm_init);
+
+static void __exit rockchip_dtpm_exit(void)
+{
+	return dtpm_destroy_hierarchy();
+}
+module_exit(rockchip_dtpm_exit);
+
+MODULE_SOFTDEP("pre: panfrost cpufreq-dt");
+MODULE_DESCRIPTION("Rockchip DTPM driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:dtpm");
+MODULE_AUTHOR("Daniel Lezcano <[email protected]");
diff --git a/drivers/soc/samsung/exynos-chipid.c b/drivers/soc/samsung/exynos-chipid.c
index 2746d05936d3..0fb3631e7346 100644
--- a/drivers/soc/samsung/exynos-chipid.c
+++ b/drivers/soc/samsung/exynos-chipid.c
@@ -204,7 +204,7 @@ module_platform_driver(exynos_chipid_driver);
 
 MODULE_DESCRIPTION("Samsung Exynos ChipID controller and ASV driver");
 MODULE_AUTHOR("Bartlomiej Zolnierkiewicz <[email protected]>");
-MODULE_AUTHOR("Krzysztof Kozlowski <[email protected]>");
+MODULE_AUTHOR("Krzysztof Kozlowski <[email protected]>");
 MODULE_AUTHOR("Pankaj Dubey <[email protected]>");
 MODULE_AUTHOR("Sylwester Nawrocki <[email protected]>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 4599b121d744..d96082dc3340 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1019,10 +1019,10 @@ int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
 	int i, ret;
 
 	if (vmalloced_buf || kmap_buf) {
-		desc_len = min_t(int, max_seg_size, PAGE_SIZE);
+		desc_len = min_t(unsigned int, max_seg_size, PAGE_SIZE);
 		sgs = DIV_ROUND_UP(len + offset_in_page(buf), desc_len);
 	} else if (virt_addr_valid(buf)) {
-		desc_len = min_t(int, max_seg_size, ctlr->max_dma_len);
+		desc_len = min_t(unsigned int, max_seg_size, ctlr->max_dma_len);
 		sgs = DIV_ROUND_UP(len, desc_len);
 	} else {
 		return -EINVAL;
diff --git a/drivers/staging/gdm724x/gdm_lte.c b/drivers/staging/gdm724x/gdm_lte.c
index 493ed4821515..0d8d8fed283d 100644
--- a/drivers/staging/gdm724x/gdm_lte.c
+++ b/drivers/staging/gdm724x/gdm_lte.c
@@ -76,14 +76,15 @@ static void tx_complete(void *arg)
 
 static int gdm_lte_rx(struct sk_buff *skb, struct nic *nic, int nic_type)
 {
-	int ret;
+	int ret, len;
 
+	len = skb->len + ETH_HLEN;
 	ret = netif_rx_ni(skb);
 	if (ret == NET_RX_DROP) {
 		nic->stats.rx_dropped++;
 	} else {
 		nic->stats.rx_packets++;
-		nic->stats.rx_bytes += skb->len + ETH_HLEN;
+		nic->stats.rx_bytes += len;
 	}
 
 	return 0;
diff --git a/drivers/staging/greybus/gpio.c b/drivers/staging/greybus/gpio.c
index 7e6347fe93f9..8a7cf1d0e968 100644
--- a/drivers/staging/greybus/gpio.c
+++ b/drivers/staging/greybus/gpio.c
@@ -391,10 +391,7 @@ static int gb_gpio_request_handler(struct gb_operation *op)
 		return -EINVAL;
 	}
 
-	local_irq_disable();
-	ret = generic_handle_irq(irq);
-	local_irq_enable();
-
+	ret = generic_handle_irq_safe(irq);
 	if (ret)
 		dev_err(dev, "failed to invoke irq handler\n");
 
diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
index 0f82f5031c43..49a3f45cb771 100644
--- a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
+++ b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
@@ -5907,6 +5907,7 @@ u8 chk_bmc_sleepq_hdl(struct adapter *padapter, unsigned char *pbuf)
 	struct sta_info *psta_bmc;
 	struct list_head *xmitframe_plist, *xmitframe_phead, *tmp;
 	struct xmit_frame *pxmitframe = NULL;
+	struct xmit_priv *pxmitpriv = &padapter->xmitpriv;
 	struct sta_priv  *pstapriv = &padapter->stapriv;
 
 	/* for BC/MC Frames */
@@ -5917,7 +5918,8 @@ u8 chk_bmc_sleepq_hdl(struct adapter *padapter, unsigned char *pbuf)
 	if ((pstapriv->tim_bitmap&BIT(0)) && (psta_bmc->sleepq_len > 0)) {
 		msleep(10);/*  10ms, ATIM(HIQ) Windows */
 
-		spin_lock_bh(&psta_bmc->sleep_q.lock);
+		/* spin_lock_bh(&psta_bmc->sleep_q.lock); */
+		spin_lock_bh(&pxmitpriv->lock);
 
 		xmitframe_phead = get_list_head(&psta_bmc->sleep_q);
 		list_for_each_safe(xmitframe_plist, tmp, xmitframe_phead) {
@@ -5940,7 +5942,8 @@ u8 chk_bmc_sleepq_hdl(struct adapter *padapter, unsigned char *pbuf)
 			rtw_hal_xmitframe_enqueue(padapter, pxmitframe);
 		}
 
-		spin_unlock_bh(&psta_bmc->sleep_q.lock);
+		/* spin_unlock_bh(&psta_bmc->sleep_q.lock); */
+		spin_unlock_bh(&pxmitpriv->lock);
 
 		/* check hi queue and bmc_sleepq */
 		rtw_chk_hi_queue_cmd(padapter);
diff --git a/drivers/staging/rtl8723bs/core/rtw_recv.c b/drivers/staging/rtl8723bs/core/rtw_recv.c
index 41bfca549c64..105fe0e3482a 100644
--- a/drivers/staging/rtl8723bs/core/rtw_recv.c
+++ b/drivers/staging/rtl8723bs/core/rtw_recv.c
@@ -957,8 +957,10 @@ static signed int validate_recv_ctrl_frame(struct adapter *padapter, union recv_
 		if ((psta->state&WIFI_SLEEP_STATE) && (pstapriv->sta_dz_bitmap&BIT(psta->aid))) {
 			struct list_head	*xmitframe_plist, *xmitframe_phead;
 			struct xmit_frame *pxmitframe = NULL;
+			struct xmit_priv *pxmitpriv = &padapter->xmitpriv;
 
-			spin_lock_bh(&psta->sleep_q.lock);
+			/* spin_lock_bh(&psta->sleep_q.lock); */
+			spin_lock_bh(&pxmitpriv->lock);
 
 			xmitframe_phead = get_list_head(&psta->sleep_q);
 			xmitframe_plist = get_next(xmitframe_phead);
@@ -989,10 +991,12 @@ static signed int validate_recv_ctrl_frame(struct adapter *padapter, union recv_
 					update_beacon(padapter, WLAN_EID_TIM, NULL, true);
 				}
 
-				spin_unlock_bh(&psta->sleep_q.lock);
+				/* spin_unlock_bh(&psta->sleep_q.lock); */
+				spin_unlock_bh(&pxmitpriv->lock);
 
 			} else {
-				spin_unlock_bh(&psta->sleep_q.lock);
+				/* spin_unlock_bh(&psta->sleep_q.lock); */
+				spin_unlock_bh(&pxmitpriv->lock);
 
 				if (pstapriv->tim_bitmap&BIT(psta->aid)) {
 					if (psta->sleepq_len == 0) {
diff --git a/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c b/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c
index 0c9ea1520fd0..beb11d89db18 100644
--- a/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c
+++ b/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c
@@ -293,48 +293,46 @@ u32 rtw_free_stainfo(struct adapter *padapter, struct sta_info *psta)
 
 	/* list_del_init(&psta->wakeup_list); */
 
-	spin_lock_bh(&psta->sleep_q.lock);
+	spin_lock_bh(&pxmitpriv->lock);
+
 	rtw_free_xmitframe_queue(pxmitpriv, &psta->sleep_q);
 	psta->sleepq_len = 0;
-	spin_unlock_bh(&psta->sleep_q.lock);
-
-	spin_lock_bh(&pxmitpriv->lock);
 
 	/* vo */
-	spin_lock_bh(&pstaxmitpriv->vo_q.sta_pending.lock);
+	/* spin_lock_bh(&(pxmitpriv->vo_pending.lock)); */
 	rtw_free_xmitframe_queue(pxmitpriv, &pstaxmitpriv->vo_q.sta_pending);
 	list_del_init(&(pstaxmitpriv->vo_q.tx_pending));
 	phwxmit = pxmitpriv->hwxmits;
 	phwxmit->accnt -= pstaxmitpriv->vo_q.qcnt;
 	pstaxmitpriv->vo_q.qcnt = 0;
-	spin_unlock_bh(&pstaxmitpriv->vo_q.sta_pending.lock);
+	/* spin_unlock_bh(&(pxmitpriv->vo_pending.lock)); */
 
 	/* vi */
-	spin_lock_bh(&pstaxmitpriv->vi_q.sta_pending.lock);
+	/* spin_lock_bh(&(pxmitpriv->vi_pending.lock)); */
 	rtw_free_xmitframe_queue(pxmitpriv, &pstaxmitpriv->vi_q.sta_pending);
 	list_del_init(&(pstaxmitpriv->vi_q.tx_pending));
 	phwxmit = pxmitpriv->hwxmits+1;
 	phwxmit->accnt -= pstaxmitpriv->vi_q.qcnt;
 	pstaxmitpriv->vi_q.qcnt = 0;
-	spin_unlock_bh(&pstaxmitpriv->vi_q.sta_pending.lock);
+	/* spin_unlock_bh(&(pxmitpriv->vi_pending.lock)); */
 
 	/* be */
-	spin_lock_bh(&pstaxmitpriv->be_q.sta_pending.lock);
+	/* spin_lock_bh(&(pxmitpriv->be_pending.lock)); */
 	rtw_free_xmitframe_queue(pxmitpriv, &pstaxmitpriv->be_q.sta_pending);
 	list_del_init(&(pstaxmitpriv->be_q.tx_pending));
 	phwxmit = pxmitpriv->hwxmits+2;
 	phwxmit->accnt -= pstaxmitpriv->be_q.qcnt;
 	pstaxmitpriv->be_q.qcnt = 0;
-	spin_unlock_bh(&pstaxmitpriv->be_q.sta_pending.lock);
+	/* spin_unlock_bh(&(pxmitpriv->be_pending.lock)); */
 
 	/* bk */
-	spin_lock_bh(&pstaxmitpriv->bk_q.sta_pending.lock);
+	/* spin_lock_bh(&(pxmitpriv->bk_pending.lock)); */
 	rtw_free_xmitframe_queue(pxmitpriv, &pstaxmitpriv->bk_q.sta_pending);
 	list_del_init(&(pstaxmitpriv->bk_q.tx_pending));
 	phwxmit = pxmitpriv->hwxmits+3;
 	phwxmit->accnt -= pstaxmitpriv->bk_q.qcnt;
 	pstaxmitpriv->bk_q.qcnt = 0;
-	spin_unlock_bh(&pstaxmitpriv->bk_q.sta_pending.lock);
+	/* spin_unlock_bh(&(pxmitpriv->bk_pending.lock)); */
 
 	spin_unlock_bh(&pxmitpriv->lock);
 
diff --git a/drivers/staging/rtl8723bs/core/rtw_xmit.c b/drivers/staging/rtl8723bs/core/rtw_xmit.c
index 13b8bd5ffabc..f466bfd248fb 100644
--- a/drivers/staging/rtl8723bs/core/rtw_xmit.c
+++ b/drivers/staging/rtl8723bs/core/rtw_xmit.c
@@ -1734,12 +1734,15 @@ void rtw_free_xmitframe_queue(struct xmit_priv *pxmitpriv, struct __queue *pfram
 	struct list_head *plist, *phead, *tmp;
 	struct	xmit_frame	*pxmitframe;
 
+	spin_lock_bh(&pframequeue->lock);
+
 	phead = get_list_head(pframequeue);
 	list_for_each_safe(plist, tmp, phead) {
 		pxmitframe = list_entry(plist, struct xmit_frame, list);
 
 		rtw_free_xmitframe(pxmitpriv, pxmitframe);
 	}
+	spin_unlock_bh(&pframequeue->lock);
 }
 
 s32 rtw_xmitframe_enqueue(struct adapter *padapter, struct xmit_frame *pxmitframe)
@@ -1794,7 +1797,6 @@ s32 rtw_xmit_classifier(struct adapter *padapter, struct xmit_frame *pxmitframe)
 	struct sta_info *psta;
 	struct tx_servq	*ptxservq;
 	struct pkt_attrib	*pattrib = &pxmitframe->attrib;
-	struct xmit_priv *xmit_priv = &padapter->xmitpriv;
 	struct hw_xmit	*phwxmits =  padapter->xmitpriv.hwxmits;
 	signed int res = _SUCCESS;
 
@@ -1812,14 +1814,12 @@ s32 rtw_xmit_classifier(struct adapter *padapter, struct xmit_frame *pxmitframe)
 
 	ptxservq = rtw_get_sta_pending(padapter, psta, pattrib->priority, (u8 *)(&ac_index));
 
-	spin_lock_bh(&xmit_priv->lock);
 	if (list_empty(&ptxservq->tx_pending))
 		list_add_tail(&ptxservq->tx_pending, get_list_head(phwxmits[ac_index].sta_queue));
 
 	list_add_tail(&pxmitframe->list, get_list_head(&ptxservq->sta_pending));
 	ptxservq->qcnt++;
 	phwxmits[ac_index].accnt++;
-	spin_unlock_bh(&xmit_priv->lock);
 
 exit:
 
@@ -2202,10 +2202,11 @@ void wakeup_sta_to_xmit(struct adapter *padapter, struct sta_info *psta)
 	struct list_head *xmitframe_plist, *xmitframe_phead, *tmp;
 	struct xmit_frame *pxmitframe = NULL;
 	struct sta_priv *pstapriv = &padapter->stapriv;
+	struct xmit_priv *pxmitpriv = &padapter->xmitpriv;
 
 	psta_bmc = rtw_get_bcmc_stainfo(padapter);
 
-	spin_lock_bh(&psta->sleep_q.lock);
+	spin_lock_bh(&pxmitpriv->lock);
 
 	xmitframe_phead = get_list_head(&psta->sleep_q);
 	list_for_each_safe(xmitframe_plist, tmp, xmitframe_phead) {
@@ -2306,7 +2307,7 @@ void wakeup_sta_to_xmit(struct adapter *padapter, struct sta_info *psta)
 
 _exit:
 
-	spin_unlock_bh(&psta->sleep_q.lock);
+	spin_unlock_bh(&pxmitpriv->lock);
 
 	if (update_mask)
 		update_beacon(padapter, WLAN_EID_TIM, NULL, true);
@@ -2318,8 +2319,9 @@ void xmit_delivery_enabled_frames(struct adapter *padapter, struct sta_info *pst
 	struct list_head *xmitframe_plist, *xmitframe_phead, *tmp;
 	struct xmit_frame *pxmitframe = NULL;
 	struct sta_priv *pstapriv = &padapter->stapriv;
+	struct xmit_priv *pxmitpriv = &padapter->xmitpriv;
 
-	spin_lock_bh(&psta->sleep_q.lock);
+	spin_lock_bh(&pxmitpriv->lock);
 
 	xmitframe_phead = get_list_head(&psta->sleep_q);
 	list_for_each_safe(xmitframe_plist, tmp, xmitframe_phead) {
@@ -2372,7 +2374,7 @@ void xmit_delivery_enabled_frames(struct adapter *padapter, struct sta_info *pst
 		}
 	}
 
-	spin_unlock_bh(&psta->sleep_q.lock);
+	spin_unlock_bh(&pxmitpriv->lock);
 }
 
 void enqueue_pending_xmitbuf(struct xmit_priv *pxmitpriv, struct xmit_buf *pxmitbuf)
diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c
index b5d5e922231c..15810438a472 100644
--- a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c
+++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c
@@ -502,7 +502,9 @@ s32 rtl8723bs_hal_xmit(
 			rtw_issue_addbareq_cmd(padapter, pxmitframe);
 	}
 
+	spin_lock_bh(&pxmitpriv->lock);
 	err = rtw_xmitframe_enqueue(padapter, pxmitframe);
+	spin_unlock_bh(&pxmitpriv->lock);
 	if (err != _SUCCESS) {
 		rtw_free_xmitframe(pxmitpriv, pxmitframe);
 
diff --git a/drivers/staging/rtl8723bs/include/rtw_mlme.h b/drivers/staging/rtl8723bs/include/rtw_mlme.h
index c94fa7d8d5a9..1b343b434f4d 100644
--- a/drivers/staging/rtl8723bs/include/rtw_mlme.h
+++ b/drivers/staging/rtl8723bs/include/rtw_mlme.h
@@ -102,13 +102,17 @@ there are several "locks" in mlme_priv,
 since mlme_priv is a shared resource between many threads,
 like ISR/Call-Back functions, the OID handlers, and even timer functions.
 
-
 Each struct __queue has its own locks, already.
-Other items are protected by mlme_priv.lock.
+Other items in mlme_priv are protected by mlme_priv.lock, while items in
+xmit_priv are protected by xmit_priv.lock.
 
 To avoid possible dead lock, any thread trying to modifiying mlme_priv
 SHALL not lock up more than one locks at a time!
 
+The only exception is that queue functions which take the __queue.lock
+may be called with the xmit_priv.lock held. In this case the order
+MUST always be first lock xmit_priv.lock and then call any queue functions
+which take __queue.lock.
 */
 
 
diff --git a/drivers/thermal/broadcom/brcmstb_thermal.c b/drivers/thermal/broadcom/brcmstb_thermal.c
index 8df5edef1ded..0cedb8b4f00a 100644
--- a/drivers/thermal/broadcom/brcmstb_thermal.c
+++ b/drivers/thermal/broadcom/brcmstb_thermal.c
@@ -351,7 +351,7 @@ static int brcmstb_thermal_probe(struct platform_device *pdev)
 
 	priv->thermal = thermal;
 
-	irq = platform_get_irq(pdev, 0);
+	irq = platform_get_irq_optional(pdev, 0);
 	if (irq >= 0) {
 		ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
 						brcmstb_tmon_irq_thread,
diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig
index c83ea5d04a1d..f0c845679250 100644
--- a/drivers/thermal/intel/Kconfig
+++ b/drivers/thermal/intel/Kconfig
@@ -99,3 +99,17 @@ config INTEL_MENLOW
 	  Intel Menlow platform.
 
 	  If unsure, say N.
+
+config INTEL_HFI_THERMAL
+	bool "Intel Hardware Feedback Interface"
+	depends on NET
+	depends on CPU_SUP_INTEL
+	depends on X86_THERMAL_VECTOR
+	select THERMAL_NETLINK
+	help
+	  Select this option to enable the Hardware Feedback Interface. If
+	  selected, hardware provides guidance to the operating system on
+	  the performance and energy efficiency capabilities of each CPU.
+	  These capabilities may change as a result of changes in the operating
+	  conditions of the system such power and thermal limits. If selected,
+	  the kernel relays updates in CPUs' capabilities to userspace.
diff --git a/drivers/thermal/intel/Makefile b/drivers/thermal/intel/Makefile
index 960b56268b4a..9a8d8054f316 100644
--- a/drivers/thermal/intel/Makefile
+++ b/drivers/thermal/intel/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_INTEL_PCH_THERMAL)	+= intel_pch_thermal.o
 obj-$(CONFIG_INTEL_TCC_COOLING)	+= intel_tcc_cooling.o
 obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
 obj-$(CONFIG_INTEL_MENLOW)	+= intel_menlow.o
+obj-$(CONFIG_INTEL_HFI_THERMAL) += intel_hfi.o
diff --git a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
index e90690a234c4..01b80331eab6 100644
--- a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
+++ b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
@@ -72,7 +72,6 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
 	int i;
 	int nr_bad_entries = 0;
 	struct trt *trts;
-	struct acpi_device *adev;
 	union acpi_object *p;
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_buffer element = { 0, NULL };
@@ -112,12 +111,10 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
 		if (!create_dev)
 			continue;
 
-		result = acpi_bus_get_device(trt->source, &adev);
-		if (result)
+		if (!acpi_fetch_acpi_dev(trt->source))
 			pr_warn("Failed to get source ACPI device\n");
 
-		result = acpi_bus_get_device(trt->target, &adev);
-		if (result)
+		if (!acpi_fetch_acpi_dev(trt->target))
 			pr_warn("Failed to get target ACPI device\n");
 	}
 
@@ -149,7 +146,6 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
 	int i;
 	int nr_bad_entries = 0;
 	struct art *arts;
-	struct acpi_device *adev;
 	union acpi_object *p;
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_buffer element = { 0, NULL };
@@ -191,16 +187,11 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
 		if (!create_dev)
 			continue;
 
-		if (art->source) {
-			result = acpi_bus_get_device(art->source, &adev);
-			if (result)
-				pr_warn("Failed to get source ACPI device\n");
-		}
-		if (art->target) {
-			result = acpi_bus_get_device(art->target, &adev);
-			if (result)
-				pr_warn("Failed to get target ACPI device\n");
-		}
+		if (!acpi_fetch_acpi_dev(art->source))
+			pr_warn("Failed to get source ACPI device\n");
+
+		if (!acpi_fetch_acpi_dev(art->target))
+			pr_warn("Failed to get target ACPI device\n");
 	}
 
 	*artp = arts;
diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
index 4f478812cb51..4954800b9850 100644
--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -17,8 +17,8 @@
 #define INT3400_KEEP_ALIVE 0xA0
 
 enum int3400_thermal_uuid {
+	INT3400_THERMAL_ACTIVE = 0,
 	INT3400_THERMAL_PASSIVE_1,
-	INT3400_THERMAL_ACTIVE,
 	INT3400_THERMAL_CRITICAL,
 	INT3400_THERMAL_ADAPTIVE_PERFORMANCE,
 	INT3400_THERMAL_EMERGENCY_CALL_MODE,
@@ -31,8 +31,8 @@ enum int3400_thermal_uuid {
 };
 
 static char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
-	"42A441D6-AE6A-462b-A84B-4A8CE79027D3",
 	"3A95C389-E4B8-4629-A526-C52C88626BAE",
+	"42A441D6-AE6A-462b-A84B-4A8CE79027D3",
 	"97C68AE7-15FA-499c-B8C9-5DA81D606E0A",
 	"63BE270F-1C11-48FD-A6F7-3AF253FF3E2D",
 	"5349962F-71E6-431D-9AE8-0A635B710AEE",
@@ -53,12 +53,13 @@ struct int3400_thermal_priv {
 	struct art *arts;
 	int trt_count;
 	struct trt *trts;
-	u8 uuid_bitmap;
+	u32 uuid_bitmap;
 	int rel_misc_dev_res;
 	int current_uuid_index;
 	char *data_vault;
 	int odvp_count;
 	int *odvp;
+	u32 os_uuid_mask;
 	struct odvp_attr *odvp_attrs;
 };
 
@@ -142,12 +143,55 @@ static ssize_t current_uuid_show(struct device *dev,
 				 struct device_attribute *devattr, char *buf)
 {
 	struct int3400_thermal_priv *priv = dev_get_drvdata(dev);
+	int i, length = 0;
 
-	if (priv->current_uuid_index == -1)
-		return sprintf(buf, "INVALID\n");
+	if (priv->current_uuid_index > 0)
+		return sprintf(buf, "%s\n",
+			       int3400_thermal_uuids[priv->current_uuid_index]);
 
-	return sprintf(buf, "%s\n",
-		       int3400_thermal_uuids[priv->current_uuid_index]);
+	for (i = 0; i <= INT3400_THERMAL_CRITICAL; i++) {
+		if (priv->os_uuid_mask & BIT(i))
+			length += scnprintf(&buf[length],
+					    PAGE_SIZE - length,
+					    "%s\n",
+					    int3400_thermal_uuids[i]);
+	}
+
+	if (length)
+		return length;
+
+	return sprintf(buf, "INVALID\n");
+}
+
+static int int3400_thermal_run_osc(acpi_handle handle, char *uuid_str, int *enable)
+{
+	u32 ret, buf[2];
+	acpi_status status;
+	int result = 0;
+	struct acpi_osc_context context = {
+		.uuid_str = NULL,
+		.rev = 1,
+		.cap.length = 8,
+	};
+
+	context.uuid_str = uuid_str;
+
+	buf[OSC_QUERY_DWORD] = 0;
+	buf[OSC_SUPPORT_DWORD] = *enable;
+
+	context.cap.pointer = buf;
+
+	status = acpi_run_osc(handle, &context);
+	if (ACPI_SUCCESS(status)) {
+		ret = *((u32 *)(context.ret.pointer + 4));
+		if (ret != *enable)
+			result = -EPERM;
+	} else
+		result = -EPERM;
+
+	kfree(context.ret.pointer);
+
+	return result;
 }
 
 static ssize_t current_uuid_store(struct device *dev,
@@ -164,16 +208,47 @@ static ssize_t current_uuid_store(struct device *dev,
 			 * If we have a list of supported UUIDs, make sure
 			 * this one is supported.
 			 */
-			if (priv->uuid_bitmap &&
-			    !(priv->uuid_bitmap & (1 << i)))
+			if (priv->uuid_bitmap & BIT(i)) {
+				priv->current_uuid_index = i;
+				return count;
+			}
+
+			/*
+			 * There is support of only 3 policies via the new
+			 * _OSC to inform OS capability:
+			 * INT3400_THERMAL_ACTIVE
+			 * INT3400_THERMAL_PASSIVE_1
+			 * INT3400_THERMAL_CRITICAL
+			 */
+
+			if (i > INT3400_THERMAL_CRITICAL)
 				return -EINVAL;
 
-			priv->current_uuid_index = i;
-			return count;
+			priv->os_uuid_mask |= BIT(i);
+
+			break;
 		}
 	}
 
-	return -EINVAL;
+	if (priv->os_uuid_mask) {
+		int cap, ret;
+
+		/*
+		 * Capability bits:
+		 * Bit 0: set to 1 to indicate DPTF is active
+		 * Bi1 1: set to 1 to active cooling is supported by user space daemon
+		 * Bit 2: set to 1 to passive cooling is supported by user space daemon
+		 * Bit 3: set to 1 to critical trip is handled by user space daemon
+		 */
+		cap = ((priv->os_uuid_mask << 1) | 0x01);
+		ret = int3400_thermal_run_osc(priv->adev->handle,
+					      "b23ba85d-c8b7-3542-88de-8de2ffcfd698",
+					      &cap);
+		if (ret)
+			return ret;
+	}
+
+	return count;
 }
 
 static DEVICE_ATTR_RW(current_uuid);
@@ -236,41 +311,6 @@ end:
 	return result;
 }
 
-static int int3400_thermal_run_osc(acpi_handle handle,
-				enum int3400_thermal_uuid uuid, bool enable)
-{
-	u32 ret, buf[2];
-	acpi_status status;
-	int result = 0;
-	struct acpi_osc_context context = {
-		.uuid_str = NULL,
-		.rev = 1,
-		.cap.length = 8,
-	};
-
-	if (uuid < 0 || uuid >= INT3400_THERMAL_MAXIMUM_UUID)
-		return -EINVAL;
-
-	context.uuid_str = int3400_thermal_uuids[uuid];
-
-	buf[OSC_QUERY_DWORD] = 0;
-	buf[OSC_SUPPORT_DWORD] = enable;
-
-	context.cap.pointer = buf;
-
-	status = acpi_run_osc(handle, &context);
-	if (ACPI_SUCCESS(status)) {
-		ret = *((u32 *)(context.ret.pointer + 4));
-		if (ret != enable)
-			result = -EPERM;
-	} else
-		result = -EPERM;
-
-	kfree(context.ret.pointer);
-
-	return result;
-}
-
 static ssize_t odvp_show(struct kobject *kobj, struct kobj_attribute *attr,
 			 char *buf)
 {
@@ -426,10 +466,18 @@ static int int3400_thermal_change_mode(struct thermal_zone_device *thermal,
 	if (!priv)
 		return -EINVAL;
 
-	if (mode != thermal->mode)
+	if (mode != thermal->mode) {
+		int enabled;
+
+		if (priv->current_uuid_index < 0 ||
+		    priv->current_uuid_index >= INT3400_THERMAL_MAXIMUM_UUID)
+			return -EINVAL;
+
+		enabled = (mode == THERMAL_DEVICE_ENABLED);
 		result = int3400_thermal_run_osc(priv->adev->handle,
-						priv->current_uuid_index,
-						mode == THERMAL_DEVICE_ENABLED);
+						 int3400_thermal_uuids[priv->current_uuid_index],
+						 &enabled);
+	}
 
 
 	evaluate_odvp(priv);
@@ -468,6 +516,11 @@ static void int3400_setup_gddv(struct int3400_thermal_priv *priv)
 	priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer,
 				   obj->package.elements[0].buffer.length,
 				   GFP_KERNEL);
+	if (!priv->data_vault) {
+		kfree(buffer.pointer);
+		return;
+	}
+
 	bin_attr_data_vault.private = priv->data_vault;
 	bin_attr_data_vault.size = obj->package.elements[0].buffer.length;
 	kfree(buffer.pointer);
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
new file mode 100644
index 000000000000..730fd121df6e
--- /dev/null
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Hardware Feedback Interface Driver
+ *
+ * Copyright (c) 2021, Intel Corporation.
+ *
+ * Authors: Aubrey Li <[email protected]>
+ *          Ricardo Neri <[email protected]>
+ *
+ *
+ * The Hardware Feedback Interface provides a performance and energy efficiency
+ * capability information for each CPU in the system. Depending on the processor
+ * model, hardware may periodically update these capabilities as a result of
+ * changes in the operating conditions (e.g., power limits or thermal
+ * constraints). On other processor models, there is a single HFI update
+ * at boot.
+ *
+ * This file provides functionality to process HFI updates and relay these
+ * updates to userspace.
+ */
+
+#define pr_fmt(fmt)  "intel-hfi: " fmt
+
+#include <linux/bitops.h>
+#include <linux/cpufeature.h>
+#include <linux/cpumask.h>
+#include <linux/gfp.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/math.h>
+#include <linux/mutex.h>
+#include <linux/percpu-defs.h>
+#include <linux/printk.h>
+#include <linux/processor.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/topology.h>
+#include <linux/workqueue.h>
+
+#include <asm/msr.h>
+
+#include "../thermal_core.h"
+#include "intel_hfi.h"
+
+#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \
+				     BIT(9) | BIT(11) | BIT(26))
+
+/* Hardware Feedback Interface MSR configuration bits */
+#define HW_FEEDBACK_PTR_VALID_BIT		BIT(0)
+#define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT	BIT(0)
+
+/* CPUID detection and enumeration definitions for HFI */
+
+#define CPUID_HFI_LEAF 6
+
+union hfi_capabilities {
+	struct {
+		u8	performance:1;
+		u8	energy_efficiency:1;
+		u8	__reserved:6;
+	} split;
+	u8 bits;
+};
+
+union cpuid6_edx {
+	struct {
+		union hfi_capabilities	capabilities;
+		u32			table_pages:4;
+		u32			__reserved:4;
+		s32			index:16;
+	} split;
+	u32 full;
+};
+
+/**
+ * struct hfi_cpu_data - HFI capabilities per CPU
+ * @perf_cap:		Performance capability
+ * @ee_cap:		Energy efficiency capability
+ *
+ * Capabilities of a logical processor in the HFI table. These capabilities are
+ * unitless.
+ */
+struct hfi_cpu_data {
+	u8	perf_cap;
+	u8	ee_cap;
+} __packed;
+
+/**
+ * struct hfi_hdr - Header of the HFI table
+ * @perf_updated:	Hardware updated performance capabilities
+ * @ee_updated:		Hardware updated energy efficiency capabilities
+ *
+ * Properties of the data in an HFI table.
+ */
+struct hfi_hdr {
+	u8	perf_updated;
+	u8	ee_updated;
+} __packed;
+
+/**
+ * struct hfi_instance - Representation of an HFI instance (i.e., a table)
+ * @local_table:	Base of the local copy of the HFI table
+ * @timestamp:		Timestamp of the last update of the local table.
+ *			Located at the base of the local table.
+ * @hdr:		Base address of the header of the local table
+ * @data:		Base address of the data of the local table
+ * @cpus:		CPUs represented in this HFI table instance
+ * @hw_table:		Pointer to the HFI table of this instance
+ * @update_work:	Delayed work to process HFI updates
+ * @table_lock:		Lock to protect acceses to the table of this instance
+ * @event_lock:		Lock to process HFI interrupts
+ *
+ * A set of parameters to parse and navigate a specific HFI table.
+ */
+struct hfi_instance {
+	union {
+		void			*local_table;
+		u64			*timestamp;
+	};
+	void			*hdr;
+	void			*data;
+	cpumask_var_t		cpus;
+	void			*hw_table;
+	struct delayed_work	update_work;
+	raw_spinlock_t		table_lock;
+	raw_spinlock_t		event_lock;
+};
+
+/**
+ * struct hfi_features - Supported HFI features
+ * @nr_table_pages:	Size of the HFI table in 4KB pages
+ * @cpu_stride:		Stride size to locate the capability data of a logical
+ *			processor within the table (i.e., row stride)
+ * @hdr_size:		Size of the table header
+ *
+ * Parameters and supported features that are common to all HFI instances
+ */
+struct hfi_features {
+	unsigned int	nr_table_pages;
+	unsigned int	cpu_stride;
+	unsigned int	hdr_size;
+};
+
+/**
+ * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
+ * @index:		Row of this CPU in its HFI table
+ * @hfi_instance:	Attributes of the HFI table to which this CPU belongs
+ *
+ * Parameters to link a logical processor to an HFI table and a row within it.
+ */
+struct hfi_cpu_info {
+	s16			index;
+	struct hfi_instance	*hfi_instance;
+};
+
+static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
+
+static int max_hfi_instances;
+static struct hfi_instance *hfi_instances;
+
+static struct hfi_features hfi_features;
+static DEFINE_MUTEX(hfi_instance_lock);
+
+static struct workqueue_struct *hfi_updates_wq;
+#define HFI_UPDATE_INTERVAL		HZ
+#define HFI_MAX_THERM_NOTIFY_COUNT	16
+
+static void get_hfi_caps(struct hfi_instance *hfi_instance,
+			 struct thermal_genl_cpu_caps *cpu_caps)
+{
+	int cpu, i = 0;
+
+	raw_spin_lock_irq(&hfi_instance->table_lock);
+	for_each_cpu(cpu, hfi_instance->cpus) {
+		struct hfi_cpu_data *caps;
+		s16 index;
+
+		index = per_cpu(hfi_cpu_info, cpu).index;
+		caps = hfi_instance->data + index * hfi_features.cpu_stride;
+		cpu_caps[i].cpu = cpu;
+
+		/*
+		 * Scale performance and energy efficiency to
+		 * the [0, 1023] interval that thermal netlink uses.
+		 */
+		cpu_caps[i].performance = caps->perf_cap << 2;
+		cpu_caps[i].efficiency = caps->ee_cap << 2;
+
+		++i;
+	}
+	raw_spin_unlock_irq(&hfi_instance->table_lock);
+}
+
+/*
+ * Call update_capabilities() when there are changes in the HFI table.
+ */
+static void update_capabilities(struct hfi_instance *hfi_instance)
+{
+	struct thermal_genl_cpu_caps *cpu_caps;
+	int i = 0, cpu_count;
+
+	/* CPUs may come online/offline while processing an HFI update. */
+	mutex_lock(&hfi_instance_lock);
+
+	cpu_count = cpumask_weight(hfi_instance->cpus);
+
+	/* No CPUs to report in this hfi_instance. */
+	if (!cpu_count)
+		goto out;
+
+	cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL);
+	if (!cpu_caps)
+		goto out;
+
+	get_hfi_caps(hfi_instance, cpu_caps);
+
+	if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT)
+		goto last_cmd;
+
+	/* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */
+	for (i = 0;
+	     (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count;
+	     i += HFI_MAX_THERM_NOTIFY_COUNT)
+		thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT,
+						  &cpu_caps[i]);
+
+	cpu_count = cpu_count - i;
+
+last_cmd:
+	/* Process the remaining capabilities if any. */
+	if (cpu_count)
+		thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]);
+
+	kfree(cpu_caps);
+out:
+	mutex_unlock(&hfi_instance_lock);
+}
+
+static void hfi_update_work_fn(struct work_struct *work)
+{
+	struct hfi_instance *hfi_instance;
+
+	hfi_instance = container_of(to_delayed_work(work), struct hfi_instance,
+				    update_work);
+	if (!hfi_instance)
+		return;
+
+	update_capabilities(hfi_instance);
+}
+
+void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
+{
+	struct hfi_instance *hfi_instance;
+	int cpu = smp_processor_id();
+	struct hfi_cpu_info *info;
+	u64 new_timestamp;
+
+	if (!pkg_therm_status_msr_val)
+		return;
+
+	info = &per_cpu(hfi_cpu_info, cpu);
+	if (!info)
+		return;
+
+	/*
+	 * A CPU is linked to its HFI instance before the thermal vector in the
+	 * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL
+	 * when receiving an HFI event.
+	 */
+	hfi_instance = info->hfi_instance;
+	if (unlikely(!hfi_instance)) {
+		pr_debug("Received event on CPU %d but instance was null", cpu);
+		return;
+	}
+
+	/*
+	 * On most systems, all CPUs in the package receive a package-level
+	 * thermal interrupt when there is an HFI update. It is sufficient to
+	 * let a single CPU to acknowledge the update and queue work to
+	 * process it. The remaining CPUs can resume their work.
+	 */
+	if (!raw_spin_trylock(&hfi_instance->event_lock))
+		return;
+
+	/* Skip duplicated updates. */
+	new_timestamp = *(u64 *)hfi_instance->hw_table;
+	if (*hfi_instance->timestamp == new_timestamp) {
+		raw_spin_unlock(&hfi_instance->event_lock);
+		return;
+	}
+
+	raw_spin_lock(&hfi_instance->table_lock);
+
+	/*
+	 * Copy the updated table into our local copy. This includes the new
+	 * timestamp.
+	 */
+	memcpy(hfi_instance->local_table, hfi_instance->hw_table,
+	       hfi_features.nr_table_pages << PAGE_SHIFT);
+
+	raw_spin_unlock(&hfi_instance->table_lock);
+	raw_spin_unlock(&hfi_instance->event_lock);
+
+	/*
+	 * Let hardware know that we are done reading the HFI table and it is
+	 * free to update it again.
+	 */
+	pkg_therm_status_msr_val &= THERM_STATUS_CLEAR_PKG_MASK &
+				    ~PACKAGE_THERM_STATUS_HFI_UPDATED;
+	wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, pkg_therm_status_msr_val);
+
+	queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
+			   HFI_UPDATE_INTERVAL);
+}
+
+static void init_hfi_cpu_index(struct hfi_cpu_info *info)
+{
+	union cpuid6_edx edx;
+
+	/* Do not re-read @cpu's index if it has already been initialized. */
+	if (info->index > -1)
+		return;
+
+	edx.full = cpuid_edx(CPUID_HFI_LEAF);
+	info->index = edx.split.index;
+}
+
+/*
+ * The format of the HFI table depends on the number of capabilities that the
+ * hardware supports. Keep a data structure to navigate the table.
+ */
+static void init_hfi_instance(struct hfi_instance *hfi_instance)
+{
+	/* The HFI header is below the time-stamp. */
+	hfi_instance->hdr = hfi_instance->local_table +
+			    sizeof(*hfi_instance->timestamp);
+
+	/* The HFI data starts below the header. */
+	hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
+}
+
+/**
+ * intel_hfi_online() - Enable HFI on @cpu
+ * @cpu:	CPU in which the HFI will be enabled
+ *
+ * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package
+ * level. The first CPU in the die/package to come online does the full HFI
+ * initialization. Subsequent CPUs will just link themselves to the HFI
+ * instance of their die/package.
+ *
+ * This function is called before enabling the thermal vector in the local APIC
+ * in order to ensure that @cpu has an associated HFI instance when it receives
+ * an HFI event.
+ */
+void intel_hfi_online(unsigned int cpu)
+{
+	struct hfi_instance *hfi_instance;
+	struct hfi_cpu_info *info;
+	phys_addr_t hw_table_pa;
+	u64 msr_val;
+	u16 die_id;
+
+	/* Nothing to do if hfi_instances are missing. */
+	if (!hfi_instances)
+		return;
+
+	/*
+	 * Link @cpu to the HFI instance of its package/die. It does not
+	 * matter whether the instance has been initialized.
+	 */
+	info = &per_cpu(hfi_cpu_info, cpu);
+	die_id = topology_logical_die_id(cpu);
+	hfi_instance = info->hfi_instance;
+	if (!hfi_instance) {
+		if (die_id < 0 || die_id >= max_hfi_instances)
+			return;
+
+		hfi_instance = &hfi_instances[die_id];
+		info->hfi_instance = hfi_instance;
+	}
+
+	init_hfi_cpu_index(info);
+
+	/*
+	 * Now check if the HFI instance of the package/die of @cpu has been
+	 * initialized (by checking its header). In such case, all we have to
+	 * do is to add @cpu to this instance's cpumask.
+	 */
+	mutex_lock(&hfi_instance_lock);
+	if (hfi_instance->hdr) {
+		cpumask_set_cpu(cpu, hfi_instance->cpus);
+		goto unlock;
+	}
+
+	/*
+	 * Hardware is programmed with the physical address of the first page
+	 * frame of the table. Hence, the allocated memory must be page-aligned.
+	 */
+	hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
+						   GFP_KERNEL | __GFP_ZERO);
+	if (!hfi_instance->hw_table)
+		goto unlock;
+
+	hw_table_pa = virt_to_phys(hfi_instance->hw_table);
+
+	/*
+	 * Allocate memory to keep a local copy of the table that
+	 * hardware generates.
+	 */
+	hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
+					    GFP_KERNEL);
+	if (!hfi_instance->local_table)
+		goto free_hw_table;
+
+	/*
+	 * Program the address of the feedback table of this die/package. On
+	 * some processors, hardware remembers the old address of the HFI table
+	 * even after having been reprogrammed and re-enabled. Thus, do not free
+	 * the pages allocated for the table or reprogram the hardware with a
+	 * new base address. Namely, program the hardware only once.
+	 */
+	msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
+	wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
+
+	init_hfi_instance(hfi_instance);
+
+	INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
+	raw_spin_lock_init(&hfi_instance->table_lock);
+	raw_spin_lock_init(&hfi_instance->event_lock);
+
+	cpumask_set_cpu(cpu, hfi_instance->cpus);
+
+	/*
+	 * Enable the hardware feedback interface and never disable it. See
+	 * comment on programming the address of the table.
+	 */
+	rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+	msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
+	wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+
+unlock:
+	mutex_unlock(&hfi_instance_lock);
+	return;
+
+free_hw_table:
+	free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages);
+	goto unlock;
+}
+
+/**
+ * intel_hfi_offline() - Disable HFI on @cpu
+ * @cpu:	CPU in which the HFI will be disabled
+ *
+ * Remove @cpu from those covered by its HFI instance.
+ *
+ * On some processors, hardware remembers previous programming settings even
+ * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
+ * die/package of @cpu are offline. See note in intel_hfi_online().
+ */
+void intel_hfi_offline(unsigned int cpu)
+{
+	struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
+	struct hfi_instance *hfi_instance;
+
+	/*
+	 * Check if @cpu as an associated, initialized (i.e., with a non-NULL
+	 * header). Also, HFI instances are only initialized if X86_FEATURE_HFI
+	 * is present.
+	 */
+	hfi_instance = info->hfi_instance;
+	if (!hfi_instance)
+		return;
+
+	if (!hfi_instance->hdr)
+		return;
+
+	mutex_lock(&hfi_instance_lock);
+	cpumask_clear_cpu(cpu, hfi_instance->cpus);
+	mutex_unlock(&hfi_instance_lock);
+}
+
+static __init int hfi_parse_features(void)
+{
+	unsigned int nr_capabilities;
+	union cpuid6_edx edx;
+
+	if (!boot_cpu_has(X86_FEATURE_HFI))
+		return -ENODEV;
+
+	/*
+	 * If we are here we know that CPUID_HFI_LEAF exists. Parse the
+	 * supported capabilities and the size of the HFI table.
+	 */
+	edx.full = cpuid_edx(CPUID_HFI_LEAF);
+
+	if (!edx.split.capabilities.split.performance) {
+		pr_debug("Performance reporting not supported! Not using HFI\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * The number of supported capabilities determines the number of
+	 * columns in the HFI table. Exclude the reserved bits.
+	 */
+	edx.split.capabilities.split.__reserved = 0;
+	nr_capabilities = hweight8(edx.split.capabilities.bits);
+
+	/* The number of 4KB pages required by the table */
+	hfi_features.nr_table_pages = edx.split.table_pages + 1;
+
+	/*
+	 * The header contains change indications for each supported feature.
+	 * The size of the table header is rounded up to be a multiple of 8
+	 * bytes.
+	 */
+	hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
+
+	/*
+	 * Data of each logical processor is also rounded up to be a multiple
+	 * of 8 bytes.
+	 */
+	hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
+
+	return 0;
+}
+
+void __init intel_hfi_init(void)
+{
+	struct hfi_instance *hfi_instance;
+	int i, j;
+
+	if (hfi_parse_features())
+		return;
+
+	/* There is one HFI instance per die/package. */
+	max_hfi_instances = topology_max_packages() *
+			    topology_max_die_per_package();
+
+	/*
+	 * This allocation may fail. CPU hotplug callbacks must check
+	 * for a null pointer.
+	 */
+	hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances),
+				GFP_KERNEL);
+	if (!hfi_instances)
+		return;
+
+	for (i = 0; i < max_hfi_instances; i++) {
+		hfi_instance = &hfi_instances[i];
+		if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL))
+			goto err_nomem;
+	}
+
+	hfi_updates_wq = create_singlethread_workqueue("hfi-updates");
+	if (!hfi_updates_wq)
+		goto err_nomem;
+
+	return;
+
+err_nomem:
+	for (j = 0; j < i; ++j) {
+		hfi_instance = &hfi_instances[j];
+		free_cpumask_var(hfi_instance->cpus);
+	}
+
+	kfree(hfi_instances);
+	hfi_instances = NULL;
+}
diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h
new file mode 100644
index 000000000000..325aa78b745c
--- /dev/null
+++ b/drivers/thermal/intel/intel_hfi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _INTEL_HFI_H
+#define _INTEL_HFI_H
+
+#if defined(CONFIG_INTEL_HFI_THERMAL)
+void __init intel_hfi_init(void);
+void intel_hfi_online(unsigned int cpu);
+void intel_hfi_offline(unsigned int cpu);
+void intel_hfi_process_event(__u64 pkg_therm_status_msr_val);
+#else
+static inline void intel_hfi_init(void) { }
+static inline void intel_hfi_online(unsigned int cpu) { }
+static inline void intel_hfi_offline(unsigned int cpu) { }
+static inline void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) { }
+#endif /* CONFIG_INTEL_HFI_THERMAL */
+
+#endif /* _INTEL_HFI_H */
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index 14256421d98c..c841ab37e7c6 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -556,12 +556,9 @@ static void end_power_clamp(void)
 	 * stop faster.
 	 */
 	clamping = false;
-	if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
-		for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
-			pr_debug("clamping worker for cpu %d alive, destroy\n",
-				 i);
-			stop_power_clamp_worker(i);
-		}
+	for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
+		pr_debug("clamping worker for cpu %d alive, destroy\n", i);
+		stop_power_clamp_worker(i);
 	}
 }
 
diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
index dab7e8fb1059..8352083b87c7 100644
--- a/drivers/thermal/intel/therm_throt.c
+++ b/drivers/thermal/intel/therm_throt.c
@@ -32,6 +32,7 @@
 #include <asm/irq.h>
 #include <asm/msr.h>
 
+#include "intel_hfi.h"
 #include "thermal_interrupt.h"
 
 /* How long to wait between reporting thermal events */
@@ -475,6 +476,13 @@ static int thermal_throttle_online(unsigned int cpu)
 	INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
 	INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
 
+	/*
+	 * The first CPU coming online will enable the HFI. Usually this causes
+	 * hardware to issue an HFI thermal interrupt. Such interrupt will reach
+	 * the CPU once we enable the thermal vector in the local APIC.
+	 */
+	intel_hfi_online(cpu);
+
 	/* Unmask the thermal vector after the above workqueues are initialized. */
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
@@ -492,6 +500,8 @@ static int thermal_throttle_offline(unsigned int cpu)
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
 
+	intel_hfi_offline(cpu);
+
 	cancel_delayed_work_sync(&state->package_throttle.therm_work);
 	cancel_delayed_work_sync(&state->core_throttle.therm_work);
 
@@ -509,6 +519,8 @@ static __init int thermal_throttle_init_device(void)
 	if (!atomic_read(&therm_throt_en))
 		return 0;
 
+	intel_hfi_init();
+
 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
 				thermal_throttle_online,
 				thermal_throttle_offline);
@@ -608,6 +620,10 @@ void intel_thermal_interrupt(void)
 					PACKAGE_THERM_STATUS_POWER_LIMIT,
 					POWER_LIMIT_EVENT,
 					PACKAGE_LEVEL);
+
+		if (this_cpu_has(X86_FEATURE_HFI))
+			intel_hfi_process_event(msr_val &
+						PACKAGE_THERM_STATUS_HFI_UPDATED);
 	}
 }
 
@@ -717,6 +733,12 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
 			      l | (PACKAGE_THERM_INT_LOW_ENABLE
 				| PACKAGE_THERM_INT_HIGH_ENABLE), h);
+
+		if (cpu_has(c, X86_FEATURE_HFI)) {
+			rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+			      l | PACKAGE_THERM_INT_HFI_ENABLE, h);
+		}
 	}
 
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c
index eafa7526eb8b..c7f91cbdccc7 100644
--- a/drivers/thermal/qcom/lmh.c
+++ b/drivers/thermal/qcom/lmh.c
@@ -28,6 +28,8 @@
 
 #define LMH_REG_DCVS_INTR_CLR		0x8
 
+#define LMH_ENABLE_ALGOS		1
+
 struct lmh_hw_data {
 	void __iomem *base;
 	struct irq_domain *domain;
@@ -90,6 +92,7 @@ static int lmh_probe(struct platform_device *pdev)
 	struct device_node *cpu_node;
 	struct lmh_hw_data *lmh_data;
 	int temp_low, temp_high, temp_arm, cpu_id, ret;
+	unsigned int enable_alg;
 	u32 node_id;
 
 	lmh_data = devm_kzalloc(dev, sizeof(*lmh_data), GFP_KERNEL);
@@ -141,32 +144,36 @@ static int lmh_probe(struct platform_device *pdev)
 	if (!qcom_scm_lmh_dcvsh_available())
 		return -EINVAL;
 
-	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1,
-				 LMH_NODE_DCVS, node_id, 0);
-	if (ret)
-		dev_err(dev, "Error %d enabling current subfunction\n", ret);
-
-	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1,
-				 LMH_NODE_DCVS, node_id, 0);
-	if (ret)
-		dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
-
-	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1,
-				 LMH_NODE_DCVS, node_id, 0);
-	if (ret)
-		dev_err(dev, "Error %d enabling BCL subfunction\n", ret);
-
-	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1,
-				 LMH_NODE_DCVS, node_id, 0);
-	if (ret) {
-		dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
-		return ret;
-	}
-
-	ret = qcom_scm_lmh_profile_change(0x1);
-	if (ret) {
-		dev_err(dev, "Error %d changing profile\n", ret);
-		return ret;
+	enable_alg = (uintptr_t)of_device_get_match_data(dev);
+
+	if (enable_alg) {
+		ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1,
+					 LMH_NODE_DCVS, node_id, 0);
+		if (ret)
+			dev_err(dev, "Error %d enabling current subfunction\n", ret);
+
+		ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1,
+					 LMH_NODE_DCVS, node_id, 0);
+		if (ret)
+			dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
+
+		ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1,
+					 LMH_NODE_DCVS, node_id, 0);
+		if (ret)
+			dev_err(dev, "Error %d enabling BCL subfunction\n", ret);
+
+		ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1,
+					 LMH_NODE_DCVS, node_id, 0);
+		if (ret) {
+			dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
+			return ret;
+		}
+
+		ret = qcom_scm_lmh_profile_change(0x1);
+		if (ret) {
+			dev_err(dev, "Error %d changing profile\n", ret);
+			return ret;
+		}
 	}
 
 	/* Set default thermal trips */
@@ -213,7 +220,8 @@ static int lmh_probe(struct platform_device *pdev)
 }
 
 static const struct of_device_id lmh_table[] = {
-	{ .compatible = "qcom,sdm845-lmh", },
+	{ .compatible = "qcom,sdm845-lmh", .data = (void *)LMH_ENABLE_ALGOS},
+	{ .compatible = "qcom,sm8150-lmh", },
 	{}
 };
 MODULE_DEVICE_TABLE(of, lmh_table);
diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
index 99a8d9f3e03c..154d3cb19c88 100644
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -18,6 +18,7 @@
 #include <linux/regmap.h>
 #include <linux/slab.h>
 #include <linux/thermal.h>
+#include "../thermal_hwmon.h"
 #include "tsens.h"
 
 /**
@@ -1060,6 +1061,10 @@ static int tsens_register(struct tsens_priv *priv)
 		priv->sensor[i].tzd = tzd;
 		if (priv->ops->enable)
 			priv->ops->enable(priv, i);
+
+		if (devm_thermal_add_hwmon_sysfs(tzd))
+			dev_warn(priv->dev,
+				 "Failed to add hwmon sysfs attributes\n");
 	}
 
 	/* VER_0 require to set MIN and MAX THRESH
diff --git a/drivers/thermal/tegra/tegra-bpmp-thermal.c b/drivers/thermal/tegra/tegra-bpmp-thermal.c
index 94f1da1dcd69..5affc3d196be 100644
--- a/drivers/thermal/tegra/tegra-bpmp-thermal.c
+++ b/drivers/thermal/tegra/tegra-bpmp-thermal.c
@@ -52,6 +52,8 @@ static int tegra_bpmp_thermal_get_temp(void *data, int *out_temp)
 	err = tegra_bpmp_transfer(zone->tegra->bpmp, &msg);
 	if (err)
 		return err;
+	if (msg.rx.ret)
+		return -EINVAL;
 
 	*out_temp = reply.get_temp.temp;
 
@@ -63,6 +65,7 @@ static int tegra_bpmp_thermal_set_trips(void *data, int low, int high)
 	struct tegra_bpmp_thermal_zone *zone = data;
 	struct mrq_thermal_host_to_bpmp_request req;
 	struct tegra_bpmp_message msg;
+	int err;
 
 	memset(&req, 0, sizeof(req));
 	req.type = CMD_THERMAL_SET_TRIP;
@@ -76,7 +79,13 @@ static int tegra_bpmp_thermal_set_trips(void *data, int low, int high)
 	msg.tx.data = &req;
 	msg.tx.size = sizeof(req);
 
-	return tegra_bpmp_transfer(zone->tegra->bpmp, &msg);
+	err = tegra_bpmp_transfer(zone->tegra->bpmp, &msg);
+	if (err)
+		return err;
+	if (msg.rx.ret)
+		return -EINVAL;
+
+	return 0;
 }
 
 static void tz_device_update_work_fn(struct work_struct *work)
@@ -140,6 +149,8 @@ static int tegra_bpmp_thermal_get_num_zones(struct tegra_bpmp *bpmp,
 	err = tegra_bpmp_transfer(bpmp, &msg);
 	if (err)
 		return err;
+	if (msg.rx.ret)
+		return -EINVAL;
 
 	*num_zones = reply.get_num_zones.num;
 
diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c
index 73e68cce292e..32fea5174cc0 100644
--- a/drivers/thermal/thermal_netlink.c
+++ b/drivers/thermal/thermal_netlink.c
@@ -43,6 +43,11 @@ static const struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] =
 	[THERMAL_GENL_ATTR_CDEV_MAX_STATE]	= { .type = NLA_U32 },
 	[THERMAL_GENL_ATTR_CDEV_NAME]		= { .type = NLA_STRING,
 						    .len = THERMAL_NAME_LENGTH },
+	/* CPU capabilities */
+	[THERMAL_GENL_ATTR_CPU_CAPABILITY]		= { .type = NLA_NESTED },
+	[THERMAL_GENL_ATTR_CPU_CAPABILITY_ID]		= { .type = NLA_U32 },
+	[THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE]	= { .type = NLA_U32 },
+	[THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY]	= { .type = NLA_U32 },
 };
 
 struct param {
@@ -58,6 +63,8 @@ struct param {
 	int temp;
 	int cdev_state;
 	int cdev_max_state;
+	struct thermal_genl_cpu_caps *cpu_capabilities;
+	int cpu_capabilities_count;
 };
 
 typedef int (*cb_t)(struct param *);
@@ -190,6 +197,42 @@ static int thermal_genl_event_gov_change(struct param *p)
 	return 0;
 }
 
+static int thermal_genl_event_cpu_capability_change(struct param *p)
+{
+	struct thermal_genl_cpu_caps *cpu_cap = p->cpu_capabilities;
+	struct sk_buff *msg = p->msg;
+	struct nlattr *start_cap;
+	int i;
+
+	start_cap = nla_nest_start(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY);
+	if (!start_cap)
+		return -EMSGSIZE;
+
+	for (i = 0; i < p->cpu_capabilities_count; ++i) {
+		if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_ID,
+				cpu_cap->cpu))
+			goto out_cancel_nest;
+
+		if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE,
+				cpu_cap->performance))
+			goto out_cancel_nest;
+
+		if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY,
+				cpu_cap->efficiency))
+			goto out_cancel_nest;
+
+		++cpu_cap;
+	}
+
+	nla_nest_end(msg, start_cap);
+
+	return 0;
+out_cancel_nest:
+	nla_nest_cancel(msg, start_cap);
+
+	return -EMSGSIZE;
+}
+
 int thermal_genl_event_tz_delete(struct param *p)
 	__attribute__((alias("thermal_genl_event_tz")));
 
@@ -219,6 +262,7 @@ static cb_t event_cb[] = {
 	[THERMAL_GENL_EVENT_CDEV_DELETE]	= thermal_genl_event_cdev_delete,
 	[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE]	= thermal_genl_event_cdev_state_update,
 	[THERMAL_GENL_EVENT_TZ_GOV_CHANGE]	= thermal_genl_event_gov_change,
+	[THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE] = thermal_genl_event_cpu_capability_change,
 };
 
 /*
@@ -356,6 +400,15 @@ int thermal_notify_tz_gov_change(int tz_id, const char *name)
 	return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_GOV_CHANGE, &p);
 }
 
+int thermal_genl_cpu_capability_event(int count,
+				      struct thermal_genl_cpu_caps *caps)
+{
+	struct param p = { .cpu_capabilities_count = count, .cpu_capabilities = caps };
+
+	return thermal_genl_send_event(THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, &p);
+}
+EXPORT_SYMBOL_GPL(thermal_genl_cpu_capability_event);
+
 /*************************** Command encoding ********************************/
 
 static int __thermal_genl_cmd_tz_get_id(struct thermal_zone_device *tz,
diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h
index e554f76291f4..1052f523188d 100644
--- a/drivers/thermal/thermal_netlink.h
+++ b/drivers/thermal/thermal_netlink.h
@@ -4,6 +4,12 @@
  *  Author: Daniel Lezcano <[email protected]>
  */
 
+struct thermal_genl_cpu_caps {
+	int cpu;
+	int performance;
+	int efficiency;
+};
+
 /* Netlink notification function */
 #ifdef CONFIG_THERMAL_NETLINK
 int __init thermal_netlink_init(void);
@@ -23,6 +29,8 @@ int thermal_notify_cdev_add(int cdev_id, const char *name, int max_state);
 int thermal_notify_cdev_delete(int cdev_id);
 int thermal_notify_tz_gov_change(int tz_id, const char *name);
 int thermal_genl_sampling_temp(int id, int temp);
+int thermal_genl_cpu_capability_event(int count,
+				      struct thermal_genl_cpu_caps *caps);
 #else
 static inline int thermal_netlink_init(void)
 {
@@ -101,4 +109,10 @@ static inline int thermal_genl_sampling_temp(int id, int temp)
 {
 	return 0;
 }
+
+static inline int thermal_genl_cpu_capability_event(int count, struct thermal_genl_cpu_caps *caps)
+{
+	return 0;
+}
+
 #endif /* CONFIG_THERMAL_NETLINK */
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index f84375865c97..703039d8b937 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -21,6 +21,7 @@
 
 #include "ti-thermal.h"
 #include "ti-bandgap.h"
+#include "../thermal_hwmon.h"
 
 /* common data structures */
 struct ti_thermal_data {
@@ -106,14 +107,6 @@ static inline int __ti_thermal_get_temp(void *devdata, int *temp)
 	return ret;
 }
 
-static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal,
-				      int *temp)
-{
-	struct ti_thermal_data *data = thermal->devdata;
-
-	return __ti_thermal_get_temp(data, temp);
-}
-
 static int __ti_thermal_get_trend(void *p, int trip, enum thermal_trend *trend)
 {
 	struct ti_thermal_data *data = p;
@@ -189,6 +182,9 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id,
 	ti_bandgap_set_sensor_data(bgp, id, data);
 	ti_bandgap_write_update_interval(bgp, data->sensor_id, interval);
 
+	if (devm_thermal_add_hwmon_sysfs(data->ti_thermal))
+		dev_warn(bgp->dev, "failed to add hwmon sysfs attributes\n");
+
 	return 0;
 }
 
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 73f419adce61..4bb6d304eb4b 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -1919,6 +1919,7 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
 	struct usbtmc_ctrlrequest request;
 	u8 *buffer = NULL;
 	int rv;
+	unsigned int is_in, pipe;
 	unsigned long res;
 
 	res = copy_from_user(&request, arg, sizeof(struct usbtmc_ctrlrequest));
@@ -1928,12 +1929,14 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
 	if (request.req.wLength > USBTMC_BUFSIZE)
 		return -EMSGSIZE;
 
+	is_in = request.req.bRequestType & USB_DIR_IN;
+
 	if (request.req.wLength) {
 		buffer = kmalloc(request.req.wLength, GFP_KERNEL);
 		if (!buffer)
 			return -ENOMEM;
 
-		if ((request.req.bRequestType & USB_DIR_IN) == 0) {
+		if (!is_in) {
 			/* Send control data to device */
 			res = copy_from_user(buffer, request.data,
 					     request.req.wLength);
@@ -1944,8 +1947,12 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
 		}
 	}
 
+	if (is_in)
+		pipe = usb_rcvctrlpipe(data->usb_dev, 0);
+	else
+		pipe = usb_sndctrlpipe(data->usb_dev, 0);
 	rv = usb_control_msg(data->usb_dev,
-			usb_rcvctrlpipe(data->usb_dev, 0),
+			pipe,
 			request.req.bRequest,
 			request.req.bRequestType,
 			request.req.wValue,
@@ -1957,7 +1964,7 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
 		goto exit;
 	}
 
-	if (rv && (request.req.bRequestType & USB_DIR_IN)) {
+	if (rv && is_in) {
 		/* Read control data from device */
 		res = copy_to_user(request.data, buffer, rv);
 		if (res)
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index d630cccd2e6e..dd44e37a454a 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -446,7 +446,7 @@ static int suspend_common(struct device *dev, bool do_wakeup)
 				HCD_WAKEUP_PENDING(hcd->shared_hcd))
 			return -EBUSY;
 		retval = hcd->driver->pci_suspend(hcd, do_wakeup);
-		suspend_report_result(hcd->driver->pci_suspend, retval);
+		suspend_report_result(dev, hcd->driver->pci_suspend, retval);
 
 		/* Check again in case wakeup raced with pci_suspend */
 		if ((retval == 0 && do_wakeup && HCD_WAKEUP_PENDING(hcd)) ||
@@ -556,7 +556,7 @@ static int hcd_pci_suspend_noirq(struct device *dev)
 		dev_dbg(dev, "--> PCI %s\n",
 				pci_power_name(pci_dev->current_state));
 	} else {
-		suspend_report_result(pci_prepare_to_sleep, retval);
+		suspend_report_result(dev, pci_prepare_to_sleep, retval);
 		return retval;
 	}
 
diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c
index 00b3f6b3bb31..713efd9aefde 100644
--- a/drivers/usb/gadget/function/rndis.c
+++ b/drivers/usb/gadget/function/rndis.c
@@ -640,6 +640,7 @@ static int rndis_set_response(struct rndis_params *params,
 	BufLength = le32_to_cpu(buf->InformationBufferLength);
 	BufOffset = le32_to_cpu(buf->InformationBufferOffset);
 	if ((BufLength > RNDIS_MAX_TOTAL_SIZE) ||
+	    (BufOffset > RNDIS_MAX_TOTAL_SIZE) ||
 	    (BufOffset + 8 >= RNDIS_MAX_TOTAL_SIZE))
 		    return -EINVAL;
 
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 568534a0d17c..c109b069f511 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1436,7 +1436,6 @@ static void usb_gadget_remove_driver(struct usb_udc *udc)
 	usb_gadget_udc_stop(udc);
 
 	udc->driver = NULL;
-	udc->dev.driver = NULL;
 	udc->gadget->dev.driver = NULL;
 }
 
@@ -1498,7 +1497,6 @@ static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *dri
 			driver->function);
 
 	udc->driver = driver;
-	udc->dev.driver = &driver->driver;
 	udc->gadget->dev.driver = &driver->driver;
 
 	usb_gadget_udc_set_speed(udc, driver->max_speed);
@@ -1521,7 +1519,6 @@ err1:
 		dev_err(&udc->dev, "failed to start %s: %d\n",
 			udc->driver->function, ret);
 	udc->driver = NULL;
-	udc->dev.driver = NULL;
 	udc->gadget->dev.driver = NULL;
 	return ret;
 }
diff --git a/drivers/usb/host/xen-hcd.c b/drivers/usb/host/xen-hcd.c
index be09fd9bac58..19b8c7ed74cb 100644
--- a/drivers/usb/host/xen-hcd.c
+++ b/drivers/usb/host/xen-hcd.c
@@ -716,8 +716,9 @@ static int xenhcd_map_urb_for_request(struct xenhcd_info *info, struct urb *urb,
 	return 0;
 }
 
-static void xenhcd_gnttab_done(struct usb_shadow *shadow)
+static void xenhcd_gnttab_done(struct xenhcd_info *info, unsigned int id)
 {
+	struct usb_shadow *shadow = info->shadow + id;
 	int nr_segs = 0;
 	int i;
 
@@ -726,8 +727,10 @@ static void xenhcd_gnttab_done(struct usb_shadow *shadow)
 	if (xenusb_pipeisoc(shadow->req.pipe))
 		nr_segs += shadow->req.u.isoc.nr_frame_desc_segs;
 
-	for (i = 0; i < nr_segs; i++)
-		gnttab_end_foreign_access(shadow->req.seg[i].gref, 0, 0UL);
+	for (i = 0; i < nr_segs; i++) {
+		if (!gnttab_try_end_foreign_access(shadow->req.seg[i].gref))
+			xenhcd_set_error(info, "backend didn't release grant");
+	}
 
 	shadow->req.nr_buffer_segs = 0;
 	shadow->req.u.isoc.nr_frame_desc_segs = 0;
@@ -841,7 +844,9 @@ static void xenhcd_cancel_all_enqueued_urbs(struct xenhcd_info *info)
 	list_for_each_entry_safe(urbp, tmp, &info->in_progress_list, list) {
 		req_id = urbp->req_id;
 		if (!urbp->unlinked) {
-			xenhcd_gnttab_done(&info->shadow[req_id]);
+			xenhcd_gnttab_done(info, req_id);
+			if (info->error)
+				return;
 			if (urbp->urb->status == -EINPROGRESS)
 				/* not dequeued */
 				xenhcd_giveback_urb(info, urbp->urb,
@@ -942,8 +947,7 @@ static int xenhcd_urb_request_done(struct xenhcd_info *info)
 	rp = info->urb_ring.sring->rsp_prod;
 	if (RING_RESPONSE_PROD_OVERFLOW(&info->urb_ring, rp)) {
 		xenhcd_set_error(info, "Illegal index on urb-ring");
-		spin_unlock_irqrestore(&info->lock, flags);
-		return 0;
+		goto err;
 	}
 	rmb(); /* ensure we see queued responses up to "rp" */
 
@@ -952,11 +956,13 @@ static int xenhcd_urb_request_done(struct xenhcd_info *info)
 		id = res.id;
 		if (id >= XENUSB_URB_RING_SIZE) {
 			xenhcd_set_error(info, "Illegal data on urb-ring");
-			continue;
+			goto err;
 		}
 
 		if (likely(xenusb_pipesubmit(info->shadow[id].req.pipe))) {
-			xenhcd_gnttab_done(&info->shadow[id]);
+			xenhcd_gnttab_done(info, id);
+			if (info->error)
+				goto err;
 			urb = info->shadow[id].urb;
 			if (likely(urb)) {
 				urb->actual_length = res.actual_length;
@@ -978,6 +984,10 @@ static int xenhcd_urb_request_done(struct xenhcd_info *info)
 	spin_unlock_irqrestore(&info->lock, flags);
 
 	return more_to_do;
+
+ err:
+	spin_unlock_irqrestore(&info->lock, flags);
+	return 0;
 }
 
 static int xenhcd_conn_notify(struct xenhcd_info *info)
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index 7d4d0713f4f0..d2b7e613eb34 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -327,7 +327,6 @@ static int omap2430_probe(struct platform_device *pdev)
 	musb->dev.parent		= &pdev->dev;
 	musb->dev.dma_mask		= &omap2430_dmamask;
 	musb->dev.coherent_dma_mask	= omap2430_dmamask;
-	device_set_of_node_from_dev(&musb->dev, &pdev->dev);
 
 	glue->dev			= &pdev->dev;
 	glue->musb			= musb;
diff --git a/drivers/usb/typec/port-mapper.c b/drivers/usb/typec/port-mapper.c
index a7d507802509..a929e000d0e2 100644
--- a/drivers/usb/typec/port-mapper.c
+++ b/drivers/usb/typec/port-mapper.c
@@ -59,7 +59,7 @@ int typec_link_ports(struct typec_port *con)
 	if (!has_acpi_companion(&con->dev))
 		return 0;
 
-	bus_for_each_dev(&acpi_bus_type, NULL, &arg, typec_port_match);
+	acpi_bus_for_each_dev(typec_port_match, &arg);
 	if (!arg.match)
 		return 0;
 
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index f648f1c54a0f..d0f91078600e 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -1563,11 +1563,27 @@ static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
 
 	switch (cmd) {
 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
+		/* This mq feature check aligns with pre-existing userspace
+		 * implementation.
+		 *
+		 * Without it, an untrusted driver could fake a multiqueue config
+		 * request down to a non-mq device that may cause kernel to
+		 * panic due to uninitialized resources for extra vqs. Even with
+		 * a well behaving guest driver, it is not expected to allow
+		 * changing the number of vqs on a non-mq device.
+		 */
+		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
+			break;
+
 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
 		if (read != sizeof(mq))
 			break;
 
 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
+		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
+		    newqps > mlx5_vdpa_max_qps(mvdev->max_vqs))
+			break;
+
 		if (ndev->cur_num_vqs == 2 * newqps) {
 			status = VIRTIO_NET_OK;
 			break;
@@ -1897,11 +1913,25 @@ static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
 	return ndev->mvdev.mlx_features;
 }
 
-static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
+static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
 {
+	/* Minimum features to expect */
 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
 		return -EOPNOTSUPP;
 
+	/* Double check features combination sent down by the driver.
+	 * Fail invalid features due to absence of the depended feature.
+	 *
+	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
+	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
+	 * By failing the invalid features sent down by untrusted drivers,
+	 * we're assured the assumption made upon is_index_valid() and
+	 * is_ctrl_vq_idx() will not be compromised.
+	 */
+	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
+            BIT_ULL(VIRTIO_NET_F_MQ))
+		return -EINVAL;
+
 	return 0;
 }
 
@@ -1977,7 +2007,7 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
 
 	print_features(mvdev, features, true);
 
-	err = verify_min_features(mvdev, features);
+	err = verify_driver_features(mvdev, features);
 	if (err)
 		return err;
 
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index 9846c9de4bfa..1ea525433a5c 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -393,7 +393,7 @@ static void vdpa_get_config_unlocked(struct vdpa_device *vdev,
 	 * If it does happen we assume a legacy guest.
 	 */
 	if (!vdev->features_valid)
-		vdpa_set_features(vdev, 0, true);
+		vdpa_set_features_unlocked(vdev, 0);
 	ops->get_config(vdev, offset, buf, len);
 }
 
diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c
index 2b1143f11d8f..0a4d93edc4c0 100644
--- a/drivers/vdpa/vdpa_user/iova_domain.c
+++ b/drivers/vdpa/vdpa_user/iova_domain.c
@@ -294,7 +294,7 @@ vduse_domain_alloc_iova(struct iova_domain *iovad,
 
 	iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
 
-	return iova_pfn << shift;
+	return (dma_addr_t)iova_pfn << shift;
 }
 
 static void vduse_domain_free_iova(struct iova_domain *iovad,
diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
index a57e381e830b..cce101e6a940 100644
--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
+++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
@@ -533,8 +533,8 @@ static void vp_vdpa_remove(struct pci_dev *pdev)
 {
 	struct vp_vdpa *vp_vdpa = pci_get_drvdata(pdev);
 
-	vdpa_unregister_device(&vp_vdpa->vdpa);
 	vp_modern_remove(&vp_vdpa->mdev);
+	vdpa_unregister_device(&vp_vdpa->vdpa);
 }
 
 static struct pci_driver vp_vdpa_driver = {
diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c
index 670d56c879e5..40b098320b2a 100644
--- a/drivers/vhost/iotlb.c
+++ b/drivers/vhost/iotlb.c
@@ -57,6 +57,17 @@ int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb,
 	if (last < start)
 		return -EFAULT;
 
+	/* If the range being mapped is [0, ULONG_MAX], split it into two entries
+	 * otherwise its size would overflow u64.
+	 */
+	if (start == 0 && last == ULONG_MAX) {
+		u64 mid = last / 2;
+
+		vhost_iotlb_add_range_ctx(iotlb, start, mid, addr, perm, opaque);
+		addr += mid + 1;
+		start = mid + 1;
+	}
+
 	if (iotlb->limit &&
 	    iotlb->nmaps == iotlb->limit &&
 	    iotlb->flags & VHOST_IOTLB_FLAG_RETIRE) {
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 851539807bc9..ec5249e8c32d 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -286,7 +286,7 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
 	if (copy_from_user(&features, featurep, sizeof(features)))
 		return -EFAULT;
 
-	if (vdpa_set_features(vdpa, features, false))
+	if (vdpa_set_features(vdpa, features))
 		return -EINVAL;
 
 	return 0;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 59edb5a1ffe2..1768362115c6 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1170,6 +1170,13 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
 		goto done;
 	}
 
+	if ((msg.type == VHOST_IOTLB_UPDATE ||
+	     msg.type == VHOST_IOTLB_INVALIDATE) &&
+	     msg.size == 0) {
+		ret = -EINVAL;
+		goto done;
+	}
+
 	if (dev->msg_handler)
 		ret = dev->msg_handler(dev, &msg);
 	else
@@ -1981,7 +1988,7 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq)
 	return 0;
 }
 
-static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
+static int vhost_update_avail_event(struct vhost_virtqueue *vq)
 {
 	if (vhost_put_avail_event(vq))
 		return -EFAULT;
@@ -2527,7 +2534,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 			return false;
 		}
 	} else {
-		r = vhost_update_avail_event(vq, vq->avail_idx);
+		r = vhost_update_avail_event(vq);
 		if (r) {
 			vq_err(vq, "Failed to update avail event index at %p: %d\n",
 			       vhost_avail_event(vq), r);
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 37f0b4274113..e6c9d41db1de 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -753,7 +753,8 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
 
 	/* Iterating over all connections for all CIDs to find orphans is
 	 * inefficient.  Room for improvement here. */
-	vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
+	vsock_for_each_connected_socket(&vhost_transport.transport,
+					vhost_vsock_reset_orphans);
 
 	/* Don't check the owner, because we are in the release path, so we
 	 * need to stop the vsock device in any case.
diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 265865610edc..bebb2eea6448 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -1041,6 +1041,47 @@ stifb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
 	SETUP_FB(fb);
 }
 
+#define ARTIST_VRAM_SIZE			0x000804
+#define ARTIST_VRAM_SRC				0x000808
+#define ARTIST_VRAM_SIZE_TRIGGER_WINFILL	0x000a04
+#define ARTIST_VRAM_DEST_TRIGGER_BLOCKMOVE	0x000b00
+#define ARTIST_SRC_BM_ACCESS			0x018008
+#define ARTIST_FGCOLOR				0x018010
+#define ARTIST_BGCOLOR				0x018014
+#define ARTIST_BITMAP_OP			0x01801c
+
+static void
+stifb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
+{
+	struct stifb_info *fb = container_of(info, struct stifb_info, info);
+
+	if (rect->rop != ROP_COPY)
+		return cfb_fillrect(info, rect);
+
+	SETUP_HW(fb);
+
+	if (fb->info.var.bits_per_pixel == 32) {
+		WRITE_WORD(0xBBA0A000, fb, REG_10);
+
+		NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xffffffff);
+	} else {
+		WRITE_WORD(fb->id == S9000_ID_HCRX ? 0x13a02000 : 0x13a01000, fb, REG_10);
+
+		NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xff);
+	}
+
+	WRITE_WORD(0x03000300, fb, ARTIST_BITMAP_OP);
+	WRITE_WORD(0x2ea01000, fb, ARTIST_SRC_BM_ACCESS);
+	NGLE_QUICK_SET_DST_BM_ACCESS(fb, 0x2ea01000);
+	NGLE_REALLY_SET_IMAGE_FG_COLOR(fb, rect->color);
+	WRITE_WORD(0, fb, ARTIST_BGCOLOR);
+
+	NGLE_SET_DSTXY(fb, (rect->dx << 16) | (rect->dy));
+	SET_LENXY_START_RECFILL(fb, (rect->width << 16) | (rect->height));
+
+	SETUP_FB(fb);
+}
+
 static void __init
 stifb_init_display(struct stifb_info *fb)
 {
@@ -1105,7 +1146,7 @@ static const struct fb_ops stifb_ops = {
 	.owner		= THIS_MODULE,
 	.fb_setcolreg	= stifb_setcolreg,
 	.fb_blank	= stifb_blank,
-	.fb_fillrect	= cfb_fillrect,
+	.fb_fillrect	= stifb_fillrect,
 	.fb_copyarea	= stifb_copyarea,
 	.fb_imageblit	= cfb_imageblit,
 };
@@ -1297,7 +1338,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
 		goto out_err0;
 	}
 	info->screen_size = fix->smem_len;
-	info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA;
+	info->flags = FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT;
 	info->pseudo_palette = &fb->pseudo_palette;
 
 	/* This has to be done !!! */
diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig
index 8061e8ef449f..121b9293c737 100644
--- a/drivers/virt/Kconfig
+++ b/drivers/virt/Kconfig
@@ -13,6 +13,17 @@ menuconfig VIRT_DRIVERS
 
 if VIRT_DRIVERS
 
+config VMGENID
+	tristate "Virtual Machine Generation ID driver"
+	default y
+	depends on ACPI
+	help
+	  Say Y here to use the hypervisor-provided Virtual Machine Generation ID
+	  to reseed the RNG when the VM is cloned. This is highly recommended if
+	  you intend to do any rollback / cloning / snapshotting of VMs.
+
+	  Prefer Y to M so that this protection is activated very early.
+
 config FSL_HV_MANAGER
 	tristate "Freescale hypervisor management driver"
 	depends on FSL_SOC
diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
index 3e272ea60cd9..108d0ffcc9aa 100644
--- a/drivers/virt/Makefile
+++ b/drivers/virt/Makefile
@@ -4,6 +4,7 @@
 #
 
 obj-$(CONFIG_FSL_HV_MANAGER)	+= fsl_hypervisor.o
+obj-$(CONFIG_VMGENID)		+= vmgenid.o
 obj-y				+= vboxguest/
 
 obj-$(CONFIG_NITRO_ENCLAVES)	+= nitro_enclaves/
diff --git a/drivers/virt/vmgenid.c b/drivers/virt/vmgenid.c
new file mode 100644
index 000000000000..0ae1a39f2e28
--- /dev/null
+++ b/drivers/virt/vmgenid.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ *
+ * The "Virtual Machine Generation ID" is exposed via ACPI and changes when a
+ * virtual machine forks or is cloned. This driver exists for shepherding that
+ * information to random.c.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/random.h>
+
+ACPI_MODULE_NAME("vmgenid");
+
+enum { VMGENID_SIZE = 16 };
+
+struct vmgenid_state {
+	u8 *next_id;
+	u8 this_id[VMGENID_SIZE];
+};
+
+static int vmgenid_add(struct acpi_device *device)
+{
+	struct acpi_buffer parsed = { ACPI_ALLOCATE_BUFFER };
+	struct vmgenid_state *state;
+	union acpi_object *obj;
+	phys_addr_t phys_addr;
+	acpi_status status;
+	int ret = 0;
+
+	state = devm_kmalloc(&device->dev, sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	status = acpi_evaluate_object(device->handle, "ADDR", NULL, &parsed);
+	if (ACPI_FAILURE(status)) {
+		ACPI_EXCEPTION((AE_INFO, status, "Evaluating ADDR"));
+		return -ENODEV;
+	}
+	obj = parsed.pointer;
+	if (!obj || obj->type != ACPI_TYPE_PACKAGE || obj->package.count != 2 ||
+	    obj->package.elements[0].type != ACPI_TYPE_INTEGER ||
+	    obj->package.elements[1].type != ACPI_TYPE_INTEGER) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	phys_addr = (obj->package.elements[0].integer.value << 0) |
+		    (obj->package.elements[1].integer.value << 32);
+	state->next_id = devm_memremap(&device->dev, phys_addr, VMGENID_SIZE, MEMREMAP_WB);
+	if (IS_ERR(state->next_id)) {
+		ret = PTR_ERR(state->next_id);
+		goto out;
+	}
+
+	memcpy(state->this_id, state->next_id, sizeof(state->this_id));
+	add_device_randomness(state->this_id, sizeof(state->this_id));
+
+	device->driver_data = state;
+
+out:
+	ACPI_FREE(parsed.pointer);
+	return ret;
+}
+
+static void vmgenid_notify(struct acpi_device *device, u32 event)
+{
+	struct vmgenid_state *state = acpi_driver_data(device);
+	u8 old_id[VMGENID_SIZE];
+
+	memcpy(old_id, state->this_id, sizeof(old_id));
+	memcpy(state->this_id, state->next_id, sizeof(state->this_id));
+	if (!memcmp(old_id, state->this_id, sizeof(old_id)))
+		return;
+	add_vmfork_randomness(state->this_id, sizeof(state->this_id));
+}
+
+static const struct acpi_device_id vmgenid_ids[] = {
+	{ "VM_GEN_COUNTER", 0 },
+	{ }
+};
+
+static struct acpi_driver vmgenid_driver = {
+	.name = "vmgenid",
+	.ids = vmgenid_ids,
+	.owner = THIS_MODULE,
+	.ops = {
+		.add = vmgenid_add,
+		.notify = vmgenid_notify
+	}
+};
+
+module_acpi_driver(vmgenid_driver);
+
+MODULE_DEVICE_TABLE(acpi, vmgenid_ids);
+MODULE_DESCRIPTION("Virtual Machine Generation ID");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jason A. Donenfeld <[email protected]>");
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 34f80b7a8a64..492fc26f0b65 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -105,7 +105,6 @@ config VIRTIO_BALLOON
 
 config VIRTIO_MEM
 	tristate "Virtio mem driver"
-	default m
 	depends on X86_64
 	depends on VIRTIO
 	depends on MEMORY_HOTPLUG
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 00ac9db792a4..22f15f444f75 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -166,14 +166,13 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status)
 }
 EXPORT_SYMBOL_GPL(virtio_add_status);
 
-int virtio_finalize_features(struct virtio_device *dev)
+/* Do some validation, then set FEATURES_OK */
+static int virtio_features_ok(struct virtio_device *dev)
 {
-	int ret = dev->config->finalize_features(dev);
 	unsigned status;
+	int ret;
 
 	might_sleep();
-	if (ret)
-		return ret;
 
 	ret = arch_has_restricted_virtio_memory_access();
 	if (ret) {
@@ -202,8 +201,23 @@ int virtio_finalize_features(struct virtio_device *dev)
 	}
 	return 0;
 }
-EXPORT_SYMBOL_GPL(virtio_finalize_features);
 
+/**
+ * virtio_reset_device - quiesce device for removal
+ * @dev: the device to reset
+ *
+ * Prevents device from sending interrupts and accessing memory.
+ *
+ * Generally used for cleanup during driver / device removal.
+ *
+ * Once this has been invoked, caller must ensure that
+ * virtqueue_notify / virtqueue_kick are not in progress.
+ *
+ * Note: this guarantees that vq callbacks are not in progress, however caller
+ * is responsible for preventing access from other contexts, such as a system
+ * call/workqueue/bh.  Invoking virtio_break_device then flushing any such
+ * contexts is one way to handle that.
+ * */
 void virtio_reset_device(struct virtio_device *dev)
 {
 	dev->config->reset(dev);
@@ -245,17 +259,6 @@ static int virtio_dev_probe(struct device *_d)
 		driver_features_legacy = driver_features;
 	}
 
-	/*
-	 * Some devices detect legacy solely via F_VERSION_1. Write
-	 * F_VERSION_1 to force LE config space accesses before FEATURES_OK for
-	 * these when needed.
-	 */
-	if (drv->validate && !virtio_legacy_is_little_endian()
-			  && device_features & BIT_ULL(VIRTIO_F_VERSION_1)) {
-		dev->features = BIT_ULL(VIRTIO_F_VERSION_1);
-		dev->config->finalize_features(dev);
-	}
-
 	if (device_features & (1ULL << VIRTIO_F_VERSION_1))
 		dev->features = driver_features & device_features;
 	else
@@ -266,13 +269,26 @@ static int virtio_dev_probe(struct device *_d)
 		if (device_features & (1ULL << i))
 			__virtio_set_bit(dev, i);
 
+	err = dev->config->finalize_features(dev);
+	if (err)
+		goto err;
+
 	if (drv->validate) {
+		u64 features = dev->features;
+
 		err = drv->validate(dev);
 		if (err)
 			goto err;
+
+		/* Did validation change any features? Then write them again. */
+		if (features != dev->features) {
+			err = dev->config->finalize_features(dev);
+			if (err)
+				goto err;
+		}
 	}
 
-	err = virtio_finalize_features(dev);
+	err = virtio_features_ok(dev);
 	if (err)
 		goto err;
 
@@ -496,7 +512,11 @@ int virtio_device_restore(struct virtio_device *dev)
 	/* We have a driver! */
 	virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
 
-	ret = virtio_finalize_features(dev);
+	ret = dev->config->finalize_features(dev);
+	if (ret)
+		goto err;
+
+	ret = virtio_features_ok(dev);
 	if (ret)
 		goto err;
 
diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
index 7767a7f0119b..76504559bc25 100644
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@@ -317,7 +317,7 @@ static int virtio_vdpa_finalize_features(struct virtio_device *vdev)
 	/* Give virtio_ring a chance to accept features. */
 	vring_transport_features(vdev);
 
-	return vdpa_set_features(vdpa, vdev->features, false);
+	return vdpa_set_features(vdpa, vdev->features);
 }
 
 static const char *virtio_vdpa_bus_name(struct virtio_device *vdev)
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index 3fa40c723e8e..edb0acd0b832 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -169,20 +169,14 @@ undo:
 		__del_gref(gref);
 	}
 
-	/* It's possible for the target domain to map the just-allocated grant
-	 * references by blindly guessing their IDs; if this is done, then
-	 * __del_gref will leave them in the queue_gref list. They need to be
-	 * added to the global list so that we can free them when they are no
-	 * longer referenced.
-	 */
-	if (unlikely(!list_empty(&queue_gref)))
-		list_splice_tail(&queue_gref, &gref_list);
 	mutex_unlock(&gref_mutex);
 	return rc;
 }
 
 static void __del_gref(struct gntalloc_gref *gref)
 {
+	unsigned long addr;
+
 	if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
 		uint8_t *tmp = kmap(gref->page);
 		tmp[gref->notify.pgoff] = 0;
@@ -196,21 +190,16 @@ static void __del_gref(struct gntalloc_gref *gref)
 	gref->notify.flags = 0;
 
 	if (gref->gref_id) {
-		if (gnttab_query_foreign_access(gref->gref_id))
-			return;
-
-		if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
-			return;
-
-		gnttab_free_grant_reference(gref->gref_id);
+		if (gref->page) {
+			addr = (unsigned long)page_to_virt(gref->page);
+			gnttab_end_foreign_access(gref->gref_id, 0, addr);
+		} else
+			gnttab_free_grant_reference(gref->gref_id);
 	}
 
 	gref_size--;
 	list_del(&gref->next_gref);
 
-	if (gref->page)
-		__free_page(gref->page);
-
 	kfree(gref);
 }
 
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 3729bea0c989..5c83d41766c8 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -134,12 +134,9 @@ struct gnttab_ops {
 	 */
 	unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
 	/*
-	 * Query the status of a grant entry. Ref parameter is reference of
-	 * queried grant entry, return value is the status of queried entry.
-	 * Detailed status(writing/reading) can be gotten from the return value
-	 * by bit operations.
+	 * Read the frame number related to a given grant reference.
 	 */
-	int (*query_foreign_access)(grant_ref_t ref);
+	unsigned long (*read_frame)(grant_ref_t ref);
 };
 
 struct unmap_refs_callback_data {
@@ -284,22 +281,6 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
 }
 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
 
-static int gnttab_query_foreign_access_v1(grant_ref_t ref)
-{
-	return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
-}
-
-static int gnttab_query_foreign_access_v2(grant_ref_t ref)
-{
-	return grstatus[ref] & (GTF_reading|GTF_writing);
-}
-
-int gnttab_query_foreign_access(grant_ref_t ref)
-{
-	return gnttab_interface->query_foreign_access(ref);
-}
-EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
-
 static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
 {
 	u16 flags, nflags;
@@ -353,6 +334,16 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
 }
 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
 
+static unsigned long gnttab_read_frame_v1(grant_ref_t ref)
+{
+	return gnttab_shared.v1[ref].frame;
+}
+
+static unsigned long gnttab_read_frame_v2(grant_ref_t ref)
+{
+	return gnttab_shared.v2[ref].full_page.frame;
+}
+
 struct deferred_entry {
 	struct list_head list;
 	grant_ref_t ref;
@@ -382,12 +373,9 @@ static void gnttab_handle_deferred(struct timer_list *unused)
 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
 		if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) {
 			put_free_entry(entry->ref);
-			if (entry->page) {
-				pr_debug("freeing g.e. %#x (pfn %#lx)\n",
-					 entry->ref, page_to_pfn(entry->page));
-				put_page(entry->page);
-			} else
-				pr_info("freeing g.e. %#x\n", entry->ref);
+			pr_debug("freeing g.e. %#x (pfn %#lx)\n",
+				 entry->ref, page_to_pfn(entry->page));
+			put_page(entry->page);
 			kfree(entry);
 			entry = NULL;
 		} else {
@@ -412,9 +400,18 @@ static void gnttab_handle_deferred(struct timer_list *unused)
 static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
 				struct page *page)
 {
-	struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+	struct deferred_entry *entry;
+	gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
 	const char *what = KERN_WARNING "leaking";
 
+	entry = kmalloc(sizeof(*entry), gfp);
+	if (!page) {
+		unsigned long gfn = gnttab_interface->read_frame(ref);
+
+		page = pfn_to_page(gfn_to_pfn(gfn));
+		get_page(page);
+	}
+
 	if (entry) {
 		unsigned long flags;
 
@@ -435,11 +432,21 @@ static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
 	       what, ref, page ? page_to_pfn(page) : -1);
 }
 
+int gnttab_try_end_foreign_access(grant_ref_t ref)
+{
+	int ret = _gnttab_end_foreign_access_ref(ref, 0);
+
+	if (ret)
+		put_free_entry(ref);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gnttab_try_end_foreign_access);
+
 void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
 			       unsigned long page)
 {
-	if (gnttab_end_foreign_access_ref(ref, readonly)) {
-		put_free_entry(ref);
+	if (gnttab_try_end_foreign_access(ref)) {
 		if (page != 0)
 			put_page(virt_to_page(page));
 	} else
@@ -1417,7 +1424,7 @@ static const struct gnttab_ops gnttab_v1_ops = {
 	.update_entry			= gnttab_update_entry_v1,
 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v1,
 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v1,
-	.query_foreign_access		= gnttab_query_foreign_access_v1,
+	.read_frame			= gnttab_read_frame_v1,
 };
 
 static const struct gnttab_ops gnttab_v2_ops = {
@@ -1429,7 +1436,7 @@ static const struct gnttab_ops gnttab_v2_ops = {
 	.update_entry			= gnttab_update_entry_v2,
 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v2,
 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v2,
-	.query_foreign_access		= gnttab_query_foreign_access_v2,
+	.read_frame			= gnttab_read_frame_v2,
 };
 
 static bool gnttab_need_v2(void)
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 3c9ae156b597..0ca351f30a6d 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -337,8 +337,8 @@ static void free_active_ring(struct sock_mapping *map)
 	if (!map->active.ring)
 		return;
 
-	free_pages((unsigned long)map->active.data.in,
-			map->active.ring->ring_order);
+	free_pages_exact(map->active.data.in,
+			 PAGE_SIZE << map->active.ring->ring_order);
 	free_page((unsigned long)map->active.ring);
 }
 
@@ -352,8 +352,8 @@ static int alloc_active_ring(struct sock_mapping *map)
 		goto out;
 
 	map->active.ring->ring_order = PVCALLS_RING_ORDER;
-	bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					PVCALLS_RING_ORDER);
+	bytes = alloc_pages_exact(PAGE_SIZE << PVCALLS_RING_ORDER,
+				  GFP_KERNEL | __GFP_ZERO);
 	if (!bytes)
 		goto out;
 
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index e8bed1cb76ba..df6890681231 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -379,7 +379,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
 		      unsigned int nr_pages, grant_ref_t *grefs)
 {
 	int err;
-	int i, j;
+	unsigned int i;
+	grant_ref_t gref_head;
+
+	err = gnttab_alloc_grant_references(nr_pages, &gref_head);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "granting access to ring page");
+		return err;
+	}
 
 	for (i = 0; i < nr_pages; i++) {
 		unsigned long gfn;
@@ -389,23 +396,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
 		else
 			gfn = virt_to_gfn(vaddr);
 
-		err = gnttab_grant_foreign_access(dev->otherend_id, gfn, 0);
-		if (err < 0) {
-			xenbus_dev_fatal(dev, err,
-					 "granting access to ring page");
-			goto fail;
-		}
-		grefs[i] = err;
+		grefs[i] = gnttab_claim_grant_reference(&gref_head);
+		gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id,
+						gfn, 0);
 
 		vaddr = vaddr + XEN_PAGE_SIZE;
 	}
 
 	return 0;
-
-fail:
-	for (j = 0; j < i; j++)
-		gnttab_end_foreign_access_ref(grefs[j], 0);
-	return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
 
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 4d5ae61580aa..68e586283764 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -36,6 +36,9 @@ config COMPAT_BINFMT_ELF
 config ARCH_BINFMT_ELF_STATE
 	bool
 
+config ARCH_BINFMT_ELF_EXTRA_PHDRS
+	bool
+
 config ARCH_HAVE_ELF_PROT
 	bool
 
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 5e9157d0da29..f447c902318d 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -703,7 +703,7 @@ static int afs_writepages_region(struct address_space *mapping,
 	struct folio *folio;
 	struct page *head_page;
 	ssize_t ret;
-	int n;
+	int n, skips = 0;
 
 	_enter("%llx,%llx,", start, end);
 
@@ -754,8 +754,15 @@ static int afs_writepages_region(struct address_space *mapping,
 #ifdef CONFIG_AFS_FSCACHE
 				folio_wait_fscache(folio);
 #endif
+			} else {
+				start += folio_size(folio);
 			}
 			folio_put(folio);
+			if (wbc->sync_mode == WB_SYNC_NONE) {
+				if (skips >= 5 || need_resched())
+					break;
+				skips++;
+			}
 			continue;
 		}
 
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index 83f41bd0c3a9..35465109d9c4 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -28,6 +28,11 @@ struct cachefiles_xattr {
 static const char cachefiles_xattr_cache[] =
 	XATTR_USER_PREFIX "CacheFiles.cache";
 
+struct cachefiles_vol_xattr {
+	__be32	reserved;	/* Reserved, should be 0 */
+	__u8	data[];		/* netfs volume coherency data */
+} __packed;
+
 /*
  * set the state xattr on a cache file
  */
@@ -185,6 +190,7 @@ void cachefiles_prepare_to_write(struct fscache_cookie *cookie)
  */
 bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume)
 {
+	struct cachefiles_vol_xattr *buf;
 	unsigned int len = volume->vcookie->coherency_len;
 	const void *p = volume->vcookie->coherency;
 	struct dentry *dentry = volume->dentry;
@@ -192,10 +198,17 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume)
 
 	_enter("%x,#%d", volume->vcookie->debug_id, len);
 
+	len += sizeof(*buf);
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf)
+		return false;
+	buf->reserved = cpu_to_be32(0);
+	memcpy(buf->data, p, len);
+
 	ret = cachefiles_inject_write_error();
 	if (ret == 0)
 		ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache,
-				   p, len, 0);
+				   buf, len, 0);
 	if (ret < 0) {
 		trace_cachefiles_vfs_error(NULL, d_inode(dentry), ret,
 					   cachefiles_trace_setxattr_error);
@@ -209,6 +222,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume)
 					       cachefiles_coherency_vol_set_ok);
 	}
 
+	kfree(buf);
 	_leave(" = %d", ret);
 	return ret == 0;
 }
@@ -218,7 +232,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume)
  */
 int cachefiles_check_volume_xattr(struct cachefiles_volume *volume)
 {
-	struct cachefiles_xattr *buf;
+	struct cachefiles_vol_xattr *buf;
 	struct dentry *dentry = volume->dentry;
 	unsigned int len = volume->vcookie->coherency_len;
 	const void *p = volume->vcookie->coherency;
@@ -228,6 +242,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume)
 
 	_enter("");
 
+	len += sizeof(*buf);
 	buf = kmalloc(len, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
@@ -245,7 +260,9 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume)
 					"Failed to read xattr with error %zd", xlen);
 		}
 		why = cachefiles_coherency_vol_check_xattr;
-	} else if (memcmp(buf->data, p, len) != 0) {
+	} else if (buf->reserved != cpu_to_be32(0)) {
+		why = cachefiles_coherency_vol_check_resv;
+	} else if (memcmp(buf->data, p, len - sizeof(*buf)) != 0) {
 		why = cachefiles_coherency_vol_check_cmp;
 	} else {
 		why = cachefiles_coherency_vol_check_ok;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 053cb449eb16..d3020abfe404 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3924,7 +3924,8 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
 
 	/* only send once per connect */
 	spin_lock(&cifs_tcp_ses_lock);
-	if (server->tcpStatus != CifsNeedSessSetup) {
+	if ((server->tcpStatus != CifsNeedSessSetup) &&
+	    (ses->status == CifsGood)) {
 		spin_unlock(&cifs_tcp_ses_lock);
 		return 0;
 	}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cd54a529460d..592730fd6e42 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -941,7 +941,17 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 
 	while (count) {
 		if (cs->write && cs->pipebufs && page) {
-			return fuse_ref_page(cs, page, offset, count);
+			/*
+			 * Can't control lifetime of pipe buffers, so always
+			 * copy user pages.
+			 */
+			if (cs->req->args->user_pages) {
+				err = fuse_copy_fill(cs);
+				if (err)
+					return err;
+			} else {
+				return fuse_ref_page(cs, page, offset, count);
+			}
 		} else if (!cs->len) {
 			if (cs->move_pages && page &&
 			    offset == 0 && count == PAGE_SIZE) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 829094451774..0fc150c1c50b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1413,6 +1413,7 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
 			(PAGE_SIZE - ret) & (PAGE_SIZE - 1);
 	}
 
+	ap->args.user_pages = true;
 	if (write)
 		ap->args.in_pages = true;
 	else
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e8e59fbdefeb..eac4984cc753 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -256,6 +256,7 @@ struct fuse_args {
 	bool nocreds:1;
 	bool in_pages:1;
 	bool out_pages:1;
+	bool user_pages:1;
 	bool out_argvar:1;
 	bool page_zeroing:1;
 	bool page_replace:1;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ee846ce371d8..9ee36aa73251 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -23,6 +23,7 @@
 #include <linux/exportfs.h>
 #include <linux/posix_acl.h>
 #include <linux/pid_namespace.h>
+#include <uapi/linux/magic.h>
 
 MODULE_AUTHOR("Miklos Szeredi <[email protected]>");
 MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -50,8 +51,6 @@ MODULE_PARM_DESC(max_user_congthresh,
  "Global limit for the maximum congestion threshold an "
  "unprivileged user can set");
 
-#define FUSE_SUPER_MAGIC 0x65735546
-
 #define FUSE_DEFAULT_BLKSIZE 512
 
 /** Maximum number of outstanding background requests */
diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c
index fbc09dab1f85..df58966bc874 100644
--- a/fs/fuse/ioctl.c
+++ b/fs/fuse/ioctl.c
@@ -394,9 +394,12 @@ static int fuse_priv_ioctl(struct inode *inode, struct fuse_file *ff,
 	args.out_args[1].value = ptr;
 
 	err = fuse_simple_request(fm, &args);
-	if (!err && outarg.flags & FUSE_IOCTL_RETRY)
-		err = -EIO;
-
+	if (!err) {
+		if (outarg.result < 0)
+			err = outarg.result;
+		else if (outarg.flags & FUSE_IOCTL_RETRY)
+			err = -EIO;
+	}
 	return err;
 }
 
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 257d659fc25f..496a2af7d12c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -264,11 +264,18 @@ struct io_rsrc_data {
 	bool				quiesce;
 };
 
+struct io_buffer_list {
+	struct list_head list;
+	struct list_head buf_list;
+	__u16 bgid;
+};
+
 struct io_buffer {
 	struct list_head list;
 	__u64 addr;
 	__u32 len;
 	__u16 bid;
+	__u16 bgid;
 };
 
 struct io_restriction {
@@ -333,6 +340,8 @@ struct io_ev_fd {
 	struct rcu_head		rcu;
 };
 
+#define IO_BUFFERS_HASH_BITS	5
+
 struct io_ring_ctx {
 	/* const or read-mostly hot data */
 	struct {
@@ -346,6 +355,7 @@ struct io_ring_ctx {
 		unsigned int		off_timeout_used: 1;
 		unsigned int		drain_active: 1;
 		unsigned int		drain_disabled: 1;
+		unsigned int		has_evfd: 1;
 	} ____cacheline_aligned_in_smp;
 
 	/* submission data */
@@ -384,8 +394,9 @@ struct io_ring_ctx {
 		struct list_head	timeout_list;
 		struct list_head	ltimeout_list;
 		struct list_head	cq_overflow_list;
-		struct xarray		io_buffers;
+		struct list_head	*io_buffers;
 		struct list_head	io_buffers_cache;
+		struct list_head	apoll_cache;
 		struct xarray		personalities;
 		u32			pers_next;
 		unsigned		sq_thread_idle;
@@ -769,6 +780,8 @@ enum {
 	REQ_F_ARM_LTIMEOUT_BIT,
 	REQ_F_ASYNC_DATA_BIT,
 	REQ_F_SKIP_LINK_CQES_BIT,
+	REQ_F_SINGLE_POLL_BIT,
+	REQ_F_DOUBLE_POLL_BIT,
 	/* keep async read/write and isreg together and in order */
 	REQ_F_SUPPORT_NOWAIT_BIT,
 	REQ_F_ISREG_BIT,
@@ -827,6 +840,10 @@ enum {
 	REQ_F_ASYNC_DATA	= BIT(REQ_F_ASYNC_DATA_BIT),
 	/* don't post CQEs while failing linked requests */
 	REQ_F_SKIP_LINK_CQES	= BIT(REQ_F_SKIP_LINK_CQES_BIT),
+	/* single poll may be active */
+	REQ_F_SINGLE_POLL	= BIT(REQ_F_SINGLE_POLL_BIT),
+	/* double poll may active */
+	REQ_F_DOUBLE_POLL	= BIT(REQ_F_DOUBLE_POLL_BIT),
 };
 
 struct async_poll {
@@ -906,6 +923,7 @@ struct io_kiocb {
 	/* used by request caches, completion batching and iopoll */
 	struct io_wq_work_node		comp_list;
 	atomic_t			refs;
+	atomic_t			poll_refs;
 	struct io_kiocb			*link;
 	struct io_task_work		io_task_work;
 	/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
@@ -914,12 +932,11 @@ struct io_kiocb {
 	struct async_poll		*apoll;
 	/* opcode allocated if it needs to store data for async defer */
 	void				*async_data;
-	struct io_wq_work		work;
 	/* custom credentials, valid IFF REQ_F_CREDS is set */
-	const struct cred		*creds;
 	/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
 	struct io_buffer		*kbuf;
-	atomic_t			poll_refs;
+	const struct cred		*creds;
+	struct io_wq_work		work;
 };
 
 struct io_tctx_node {
@@ -1173,6 +1190,7 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
 static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
 
 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
+static void io_eventfd_signal(struct io_ring_ctx *ctx);
 
 static struct kmem_cache *req_cachep;
 
@@ -1351,6 +1369,38 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
 	return cflags;
 }
 
+static struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
+						 unsigned int bgid)
+{
+	struct list_head *hash_list;
+	struct io_buffer_list *bl;
+
+	hash_list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
+	list_for_each_entry(bl, hash_list, list)
+		if (bl->bgid == bgid || bgid == -1U)
+			return bl;
+
+	return NULL;
+}
+
+static void io_kbuf_recycle(struct io_kiocb *req)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_buffer_list *bl;
+	struct io_buffer *buf;
+
+	if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+		return;
+
+	lockdep_assert_held(&ctx->uring_lock);
+
+	buf = req->kbuf;
+	bl = io_buffer_get_list(ctx, buf->bgid);
+	list_add(&buf->list, &bl->buf_list);
+	req->flags &= ~REQ_F_BUFFER_SELECTED;
+	req->kbuf = NULL;
+}
+
 static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
 			  bool cancel_all)
 	__must_hold(&req->ctx->timeout_lock)
@@ -1461,7 +1511,7 @@ static __cold void io_fallback_req_func(struct work_struct *work)
 static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 {
 	struct io_ring_ctx *ctx;
-	int hash_bits;
+	int i, hash_bits;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -1488,6 +1538,13 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	/* set invalid range, so io_import_fixed() fails meeting it */
 	ctx->dummy_ubuf->ubuf = -1UL;
 
+	ctx->io_buffers = kcalloc(1U << IO_BUFFERS_HASH_BITS,
+					sizeof(struct list_head), GFP_KERNEL);
+	if (!ctx->io_buffers)
+		goto err;
+	for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++)
+		INIT_LIST_HEAD(&ctx->io_buffers[i]);
+
 	if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
 			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
 		goto err;
@@ -1497,8 +1554,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_LIST_HEAD(&ctx->sqd_list);
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
 	INIT_LIST_HEAD(&ctx->io_buffers_cache);
+	INIT_LIST_HEAD(&ctx->apoll_cache);
 	init_completion(&ctx->ref_comp);
-	xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1);
 	xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
 	mutex_init(&ctx->uring_lock);
 	init_waitqueue_head(&ctx->cq_wait);
@@ -1527,6 +1584,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 err:
 	kfree(ctx->dummy_ubuf);
 	kfree(ctx->cancel_hash);
+	kfree(ctx->io_buffers);
 	kfree(ctx);
 	return NULL;
 }
@@ -1740,22 +1798,27 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
 	spin_unlock_irq(&ctx->timeout_lock);
 }
 
-static __cold void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
-{
-	if (ctx->off_timeout_used)
-		io_flush_timeouts(ctx);
-	if (ctx->drain_active)
-		io_queue_deferred(ctx);
-}
-
 static inline void io_commit_cqring(struct io_ring_ctx *ctx)
 {
-	if (unlikely(ctx->off_timeout_used || ctx->drain_active))
-		__io_commit_cqring_flush(ctx);
 	/* order cqe stores with ring update */
 	smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
 }
 
+static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
+{
+	if (ctx->off_timeout_used || ctx->drain_active) {
+		spin_lock(&ctx->completion_lock);
+		if (ctx->off_timeout_used)
+			io_flush_timeouts(ctx);
+		if (ctx->drain_active)
+			io_queue_deferred(ctx);
+		io_commit_cqring(ctx);
+		spin_unlock(&ctx->completion_lock);
+	}
+	if (ctx->has_evfd)
+		io_eventfd_signal(ctx);
+}
+
 static inline bool io_sqring_full(struct io_ring_ctx *ctx)
 {
 	struct io_rings *r = ctx->rings;
@@ -1789,10 +1852,6 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx)
 {
 	struct io_ev_fd *ev_fd;
 
-	/* Return quickly if ctx->io_ev_fd doesn't exist */
-	if (likely(!rcu_dereference_raw(ctx->io_ev_fd)))
-		return;
-
 	rcu_read_lock();
 	/*
 	 * rcu_dereference ctx->io_ev_fd once and use it for both for checking
@@ -1812,19 +1871,11 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx)
 
 	if (!ev_fd->eventfd_async || io_wq_current_is_worker())
 		eventfd_signal(ev_fd->cq_ev_fd, 1);
-
 out:
 	rcu_read_unlock();
 }
 
-/*
- * This should only get called when at least one event has been posted.
- * Some applications rely on the eventfd notification count only changing
- * IFF a new CQE has been added to the CQ ring. There's no depedency on
- * 1:1 relationship between how many times this function is called (and
- * hence the eventfd count) and number of CQEs posted to the CQ ring.
- */
-static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
+static inline void io_cqring_wake(struct io_ring_ctx *ctx)
 {
 	/*
 	 * wake_up_all() may seem excessive, but io_wake_function() and
@@ -1833,19 +1884,32 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 	 */
 	if (wq_has_sleeper(&ctx->cq_wait))
 		wake_up_all(&ctx->cq_wait);
-	io_eventfd_signal(ctx);
+}
+
+/*
+ * This should only get called when at least one event has been posted.
+ * Some applications rely on the eventfd notification count only changing
+ * IFF a new CQE has been added to the CQ ring. There's no depedency on
+ * 1:1 relationship between how many times this function is called (and
+ * hence the eventfd count) and number of CQEs posted to the CQ ring.
+ */
+static inline void io_cqring_ev_posted(struct io_ring_ctx *ctx)
+{
+	if (unlikely(ctx->off_timeout_used || ctx->drain_active ||
+		     ctx->has_evfd))
+		__io_commit_cqring_flush(ctx);
+
+	io_cqring_wake(ctx);
 }
 
 static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
 {
-	/* see waitqueue_active() comment */
-	smp_mb();
+	if (unlikely(ctx->off_timeout_used || ctx->drain_active ||
+		     ctx->has_evfd))
+		__io_commit_cqring_flush(ctx);
 
-	if (ctx->flags & IORING_SETUP_SQPOLL) {
-		if (waitqueue_active(&ctx->cq_wait))
-			wake_up_all(&ctx->cq_wait);
-	}
-	io_eventfd_signal(ctx);
+	if (ctx->flags & IORING_SETUP_SQPOLL)
+		io_cqring_wake(ctx);
 }
 
 /* Returns true if there are no backlogged entries after the flush */
@@ -1980,7 +2044,7 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
 	return true;
 }
 
-static inline bool __fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
+static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
 				 s32 res, u32 cflags)
 {
 	struct io_uring_cqe *cqe;
@@ -2000,16 +2064,16 @@ static inline bool __fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
 	return io_cqring_event_overflow(ctx, user_data, res, cflags);
 }
 
-static inline bool __io_fill_cqe(struct io_kiocb *req, s32 res, u32 cflags)
+static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
 {
 	trace_io_uring_complete(req->ctx, req, req->user_data, res, cflags);
-	return __fill_cqe(req->ctx, req->user_data, res, cflags);
+	return __io_fill_cqe(req->ctx, req->user_data, res, cflags);
 }
 
 static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
 {
 	if (!(req->flags & REQ_F_CQE_SKIP))
-		__io_fill_cqe(req, res, cflags);
+		__io_fill_cqe_req(req, res, cflags);
 }
 
 static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
@@ -2017,7 +2081,7 @@ static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
 {
 	ctx->cq_extra++;
 	trace_io_uring_complete(ctx, NULL, user_data, res, cflags);
-	return __fill_cqe(ctx, user_data, res, cflags);
+	return __io_fill_cqe(ctx, user_data, res, cflags);
 }
 
 static void __io_req_complete_post(struct io_kiocb *req, s32 res,
@@ -2026,7 +2090,7 @@ static void __io_req_complete_post(struct io_kiocb *req, s32 res,
 	struct io_ring_ctx *ctx = req->ctx;
 
 	if (!(req->flags & REQ_F_CQE_SKIP))
-		__io_fill_cqe(req, res, cflags);
+		__io_fill_cqe_req(req, res, cflags);
 	/*
 	 * If we're the last reference to this request, add to our locked
 	 * free_list cache.
@@ -2085,7 +2149,7 @@ static inline void io_req_complete(struct io_kiocb *req, s32 res)
 static void io_req_complete_failed(struct io_kiocb *req, s32 res)
 {
 	req_set_fail(req);
-	io_req_complete_post(req, res, 0);
+	io_req_complete_post(req, res, io_put_kbuf(req, 0));
 }
 
 static void io_req_complete_fail_submit(struct io_kiocb *req)
@@ -2618,7 +2682,16 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 						    comp_list);
 
 			if (!(req->flags & REQ_F_CQE_SKIP))
-				__io_fill_cqe(req, req->result, req->cflags);
+				__io_fill_cqe_req(req, req->result, req->cflags);
+			if ((req->flags & REQ_F_POLLED) && req->apoll) {
+				struct async_poll *apoll = req->apoll;
+
+				if (apoll->double_poll)
+					kfree(apoll->double_poll);
+				list_add(&apoll->poll.wait.entry,
+						&ctx->apoll_cache);
+				req->flags &= ~REQ_F_POLLED;
+			}
 		}
 
 		io_commit_cqring(ctx);
@@ -2740,7 +2813,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
 		if (unlikely(req->flags & REQ_F_CQE_SKIP))
 			continue;
 
-		__io_fill_cqe(req, req->result, io_put_kbuf(req, 0));
+		__io_fill_cqe_req(req, req->result, io_put_kbuf(req, 0));
 		nr_events++;
 	}
 
@@ -3194,14 +3267,10 @@ static void kiocb_done(struct io_kiocb *req, ssize_t ret,
 
 	if (req->flags & REQ_F_REISSUE) {
 		req->flags &= ~REQ_F_REISSUE;
-		if (io_resubmit_prep(req)) {
+		if (io_resubmit_prep(req))
 			io_req_task_queue_reissue(req);
-		} else {
-			req_set_fail(req);
-			req->result = ret;
-			req->io_task_work.func = io_req_task_complete;
-			io_req_task_work_add(req, false);
-		}
+		else
+			io_req_task_queue_fail(req, ret);
 	}
 }
 
@@ -3299,30 +3368,36 @@ static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
 		mutex_lock(&ctx->uring_lock);
 }
 
+static void io_buffer_add_list(struct io_ring_ctx *ctx,
+			       struct io_buffer_list *bl, unsigned int bgid)
+{
+	struct list_head *list;
+
+	list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
+	INIT_LIST_HEAD(&bl->buf_list);
+	bl->bgid = bgid;
+	list_add(&bl->list, list);
+}
+
 static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
 					  int bgid, unsigned int issue_flags)
 {
 	struct io_buffer *kbuf = req->kbuf;
-	struct io_buffer *head;
 	bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_buffer_list *bl;
 
 	if (req->flags & REQ_F_BUFFER_SELECTED)
 		return kbuf;
 
-	io_ring_submit_lock(req->ctx, needs_lock);
+	io_ring_submit_lock(ctx, needs_lock);
 
-	lockdep_assert_held(&req->ctx->uring_lock);
+	lockdep_assert_held(&ctx->uring_lock);
 
-	head = xa_load(&req->ctx->io_buffers, bgid);
-	if (head) {
-		if (!list_empty(&head->list)) {
-			kbuf = list_last_entry(&head->list, struct io_buffer,
-							list);
-			list_del(&kbuf->list);
-		} else {
-			kbuf = head;
-			xa_erase(&req->ctx->io_buffers, bgid);
-		}
+	bl = io_buffer_get_list(ctx, bgid);
+	if (bl && !list_empty(&bl->buf_list)) {
+		kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list);
+		list_del(&kbuf->list);
 		if (*len > kbuf->len)
 			*len = kbuf->len;
 		req->flags |= REQ_F_BUFFER_SELECTED;
@@ -3537,13 +3612,15 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
 				ret = nr;
 			break;
 		}
+		ret += nr;
 		if (!iov_iter_is_bvec(iter)) {
 			iov_iter_advance(iter, nr);
 		} else {
-			req->rw.len -= nr;
 			req->rw.addr += nr;
+			req->rw.len -= nr;
+			if (!req->rw.len)
+				break;
 		}
-		ret += nr;
 		if (nr != iovec.iov_len)
 			break;
 	}
@@ -3737,6 +3814,16 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		if (unlikely(ret < 0))
 			return ret;
 	} else {
+		/*
+		 * Safe and required to re-import if we're using provided
+		 * buffers, as we dropped the selected one before retry.
+		 */
+		if (req->flags & REQ_F_BUFFER_SELECT) {
+			ret = io_import_iovec(READ, req, &iovec, s, issue_flags);
+			if (unlikely(ret < 0))
+				return ret;
+		}
+
 		rw = req->async_data;
 		s = &rw->s;
 		/*
@@ -3773,6 +3860,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 
 	if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) {
 		req->flags &= ~REQ_F_REISSUE;
+		/* if we can poll, just do that */
+		if (req->opcode == IORING_OP_READ && file_can_poll(req->file))
+			return -EAGAIN;
 		/* IOPOLL retry should happen for io-wq threads */
 		if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
 			goto done;
@@ -4345,9 +4435,8 @@ static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
 static int io_msg_ring_prep(struct io_kiocb *req,
 			    const struct io_uring_sqe *sqe)
 {
-	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
-		     sqe->rw_flags || sqe->splice_fd_in || sqe->buf_index ||
-		     sqe->personality))
+	if (unlikely(sqe->addr || sqe->ioprio || sqe->rw_flags ||
+		     sqe->splice_fd_in || sqe->buf_index || sqe->personality))
 		return -EINVAL;
 
 	if (req->file->f_op != &io_uring_fops)
@@ -4605,8 +4694,8 @@ static int io_remove_buffers_prep(struct io_kiocb *req,
 	return 0;
 }
 
-static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
-			       int bgid, unsigned nbufs)
+static int __io_remove_buffers(struct io_ring_ctx *ctx,
+			       struct io_buffer_list *bl, unsigned nbufs)
 {
 	unsigned i = 0;
 
@@ -4615,17 +4704,16 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
 		return 0;
 
 	/* the head kbuf is the list itself */
-	while (!list_empty(&buf->list)) {
+	while (!list_empty(&bl->buf_list)) {
 		struct io_buffer *nxt;
 
-		nxt = list_first_entry(&buf->list, struct io_buffer, list);
+		nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
 		list_del(&nxt->list);
 		if (++i == nbufs)
 			return i;
 		cond_resched();
 	}
 	i++;
-	xa_erase(&ctx->io_buffers, bgid);
 
 	return i;
 }
@@ -4634,7 +4722,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_provide_buf *p = &req->pbuf;
 	struct io_ring_ctx *ctx = req->ctx;
-	struct io_buffer *head;
+	struct io_buffer_list *bl;
 	int ret = 0;
 	bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
 
@@ -4643,9 +4731,9 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
 	lockdep_assert_held(&ctx->uring_lock);
 
 	ret = -ENOENT;
-	head = xa_load(&ctx->io_buffers, p->bgid);
-	if (head)
-		ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
+	bl = io_buffer_get_list(ctx, p->bgid);
+	if (bl)
+		ret = __io_remove_buffers(ctx, bl, p->nbufs);
 	if (ret < 0)
 		req_set_fail(req);
 
@@ -4734,7 +4822,7 @@ static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
 }
 
 static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
-			  struct io_buffer **head)
+			  struct io_buffer_list *bl)
 {
 	struct io_buffer *buf;
 	u64 addr = pbuf->addr;
@@ -4746,29 +4834,24 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
 			break;
 		buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer,
 					list);
-		list_del(&buf->list);
+		list_move_tail(&buf->list, &bl->buf_list);
 		buf->addr = addr;
 		buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
 		buf->bid = bid;
+		buf->bgid = pbuf->bgid;
 		addr += pbuf->len;
 		bid++;
-		if (!*head) {
-			INIT_LIST_HEAD(&buf->list);
-			*head = buf;
-		} else {
-			list_add_tail(&buf->list, &(*head)->list);
-		}
 		cond_resched();
 	}
 
-	return i ? i : -ENOMEM;
+	return i ? 0 : -ENOMEM;
 }
 
 static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_provide_buf *p = &req->pbuf;
 	struct io_ring_ctx *ctx = req->ctx;
-	struct io_buffer *head, *list;
+	struct io_buffer_list *bl;
 	int ret = 0;
 	bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
 
@@ -4776,14 +4859,18 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 
 	lockdep_assert_held(&ctx->uring_lock);
 
-	list = head = xa_load(&ctx->io_buffers, p->bgid);
-
-	ret = io_add_buffers(ctx, p, &head);
-	if (ret >= 0 && !list) {
-		ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
-		if (ret < 0)
-			__io_remove_buffers(ctx, head, p->bgid, -1U);
+	bl = io_buffer_get_list(ctx, p->bgid);
+	if (unlikely(!bl)) {
+		bl = kmalloc(sizeof(*bl), GFP_KERNEL);
+		if (!bl) {
+			ret = -ENOMEM;
+			goto err;
+		}
+		io_buffer_add_list(ctx, bl, p->bgid);
 	}
+
+	ret = io_add_buffers(ctx, p, bl);
+err:
 	if (ret < 0)
 		req_set_fail(req);
 	/* complete before unlock, IOPOLL may need the lock */
@@ -5464,8 +5551,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	accept->nofile = rlimit(RLIMIT_NOFILE);
 
 	accept->file_slot = READ_ONCE(sqe->file_index);
-	if (accept->file_slot && ((req->open.how.flags & O_CLOEXEC) ||
-				  (accept->flags & SOCK_CLOEXEC)))
+	if (accept->file_slot && (accept->flags & SOCK_CLOEXEC))
 		return -EINVAL;
 	if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return -EINVAL;
@@ -5782,8 +5868,12 @@ static inline void io_poll_remove_entry(struct io_poll_iocb *poll)
 
 static void io_poll_remove_entries(struct io_kiocb *req)
 {
-	struct io_poll_iocb *poll = io_poll_get_single(req);
-	struct io_poll_iocb *poll_double = io_poll_get_double(req);
+	/*
+	 * Nothing to do if neither of those flags are set. Avoid dipping
+	 * into the poll/apoll/double cachelines if we can.
+	 */
+	if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL)))
+		return;
 
 	/*
 	 * While we hold the waitqueue lock and the waitqueue is nonempty,
@@ -5801,9 +5891,10 @@ static void io_poll_remove_entries(struct io_kiocb *req)
 	 * In that case, only RCU prevents the queue memory from being freed.
 	 */
 	rcu_read_lock();
-	io_poll_remove_entry(poll);
-	if (poll_double)
-		io_poll_remove_entry(poll_double);
+	if (req->flags & REQ_F_SINGLE_POLL)
+		io_poll_remove_entry(io_poll_get_single(req));
+	if (req->flags & REQ_F_DOUBLE_POLL)
+		io_poll_remove_entry(io_poll_get_double(req));
 	rcu_read_unlock();
 }
 
@@ -5835,13 +5926,13 @@ static int io_poll_check_events(struct io_kiocb *req)
 			return -ECANCELED;
 
 		if (!req->result) {
-			struct poll_table_struct pt = { ._key = poll->events };
+			struct poll_table_struct pt = { ._key = req->cflags };
 
-			req->result = vfs_poll(req->file, &pt) & poll->events;
+			req->result = vfs_poll(req->file, &pt) & req->cflags;
 		}
 
 		/* multishot, just fill an CQE and proceed */
-		if (req->result && !(poll->events & EPOLLONESHOT)) {
+		if (req->result && !(req->cflags & EPOLLONESHOT)) {
 			__poll_t mask = mangle_poll(req->result & poll->events);
 			bool filled;
 
@@ -5912,9 +6003,16 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
 		io_req_complete_failed(req, ret);
 }
 
-static void __io_poll_execute(struct io_kiocb *req, int mask)
+static void __io_poll_execute(struct io_kiocb *req, int mask, int events)
 {
 	req->result = mask;
+	/*
+	 * This is useful for poll that is armed on behalf of another
+	 * request, and where the wakeup path could be on a different
+	 * CPU. We want to avoid pulling in req->apoll->events for that
+	 * case.
+	 */
+	req->cflags = events;
 	if (req->opcode == IORING_OP_POLL_ADD)
 		req->io_task_work.func = io_poll_task_func;
 	else
@@ -5924,17 +6022,17 @@ static void __io_poll_execute(struct io_kiocb *req, int mask)
 	io_req_task_work_add(req, false);
 }
 
-static inline void io_poll_execute(struct io_kiocb *req, int res)
+static inline void io_poll_execute(struct io_kiocb *req, int res, int events)
 {
 	if (io_poll_get_ownership(req))
-		__io_poll_execute(req, res);
+		__io_poll_execute(req, res, events);
 }
 
 static void io_poll_cancel_req(struct io_kiocb *req)
 {
 	io_poll_mark_cancelled(req);
 	/* kick tw, which should complete the request */
-	io_poll_execute(req, 0);
+	io_poll_execute(req, 0, 0);
 }
 
 static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
@@ -5948,7 +6046,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 	if (unlikely(mask & POLLFREE)) {
 		io_poll_mark_cancelled(req);
 		/* we have to kick tw in case it's not already */
-		io_poll_execute(req, 0);
+		io_poll_execute(req, 0, poll->events);
 
 		/*
 		 * If the waitqueue is being freed early but someone is already
@@ -5978,8 +6076,9 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 		if (mask && poll->events & EPOLLONESHOT) {
 			list_del_init(&poll->wait.entry);
 			poll->head = NULL;
+			req->flags &= ~REQ_F_SINGLE_POLL;
 		}
-		__io_poll_execute(req, mask);
+		__io_poll_execute(req, mask, poll->events);
 	}
 	return 1;
 }
@@ -6014,12 +6113,14 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
 			pt->error = -ENOMEM;
 			return;
 		}
+		req->flags |= REQ_F_DOUBLE_POLL;
 		io_init_poll_iocb(poll, first->events, first->wait.func);
 		*poll_ptr = poll;
 		if (req->opcode == IORING_OP_POLL_ADD)
 			req->flags |= REQ_F_ASYNC_DATA;
 	}
 
+	req->flags |= REQ_F_SINGLE_POLL;
 	pt->nr_entries++;
 	poll->head = head;
 	poll->wait.private = req;
@@ -6083,7 +6184,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
 		/* can't multishot if failed, just queue the event we've got */
 		if (unlikely(ipt->error || !ipt->nr_entries))
 			poll->events |= EPOLLONESHOT;
-		__io_poll_execute(req, mask);
+		__io_poll_execute(req, mask, poll->events);
 		return 0;
 	}
 	io_add_napi(req->file, req->ctx);
@@ -6094,7 +6195,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
 	 */
 	v = atomic_dec_return(&req->poll_refs);
 	if (unlikely(v & IO_POLL_REF_MASK))
-		__io_poll_execute(req, 0);
+		__io_poll_execute(req, 0, poll->events);
 	return 0;
 }
 
@@ -6113,7 +6214,7 @@ enum {
 	IO_APOLL_READY
 };
 
-static int io_arm_poll_handler(struct io_kiocb *req)
+static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
 {
 	const struct io_op_def *def = &io_op_defs[req->opcode];
 	struct io_ring_ctx *ctx = req->ctx;
@@ -6138,9 +6239,16 @@ static int io_arm_poll_handler(struct io_kiocb *req)
 		mask |= POLLOUT | POLLWRNORM;
 	}
 
-	apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
-	if (unlikely(!apoll))
-		return IO_APOLL_ABORTED;
+	if (!(issue_flags & IO_URING_F_UNLOCKED) &&
+	    !list_empty(&ctx->apoll_cache)) {
+		apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
+						poll.wait.entry);
+		list_del_init(&apoll->poll.wait.entry);
+	} else {
+		apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
+		if (unlikely(!apoll))
+			return IO_APOLL_ABORTED;
+	}
 	apoll->double_poll = NULL;
 	req->apoll = apoll;
 	req->flags |= REQ_F_POLLED;
@@ -6285,7 +6393,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
 		return -EINVAL;
 
 	io_req_set_refcount(req);
-	poll->events = io_poll_parse_events(sqe, flags);
+	req->cflags = poll->events = io_poll_parse_events(sqe, flags);
 	return 0;
 }
 
@@ -6402,10 +6510,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
 
 	if (IS_ERR(req))
 		return PTR_ERR(req);
-
-	req_set_fail(req);
-	io_fill_cqe_req(req, -ECANCELED, 0);
-	io_put_req_deferred(req);
+	io_req_task_queue_fail(req, -ECANCELED);
 	return 0;
 }
 
@@ -7245,7 +7350,7 @@ static void io_wq_submit_work(struct io_wq_work *work)
 			continue;
 		}
 
-		if (io_arm_poll_handler(req) == IO_APOLL_OK)
+		if (io_arm_poll_handler(req, issue_flags) == IO_APOLL_OK)
 			return;
 		/* aborted or ready, in either case retry blocking */
 		needs_poll = false;
@@ -7391,7 +7496,7 @@ static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
 {
 	struct io_kiocb *linked_timeout = io_prep_linked_timeout(req);
 
-	switch (io_arm_poll_handler(req)) {
+	switch (io_arm_poll_handler(req, 0)) {
 	case IO_APOLL_READY:
 		io_req_task_queue(req);
 		break;
@@ -7400,8 +7505,12 @@ static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
 		 * Queued up for async execution, worker will release
 		 * submit reference when the iocb is actually submitted.
 		 */
+		io_kbuf_recycle(req);
 		io_queue_async_work(req, NULL);
 		break;
+	case IO_APOLL_OK:
+		io_kbuf_recycle(req);
+		break;
 	}
 
 	if (linked_timeout)
@@ -7770,8 +7879,14 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 		}
 		/* will complete beyond this point, count as submitted */
 		submitted++;
-		if (io_submit_sqe(ctx, req, sqe))
-			break;
+		if (io_submit_sqe(ctx, req, sqe)) {
+			/*
+			 * Continue submitting even for sqe failure if the
+			 * ring was setup with IORING_SETUP_SUBMIT_ALL
+			 */
+			if (!(ctx->flags & IORING_SETUP_SUBMIT_ALL))
+				break;
+		}
 	} while (submitted < nr);
 
 	if (unlikely(submitted != nr)) {
@@ -9829,7 +9944,7 @@ static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
 		return ret;
 	}
 	ev_fd->eventfd_async = eventfd_async;
-
+	ctx->has_evfd = true;
 	rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
 	return 0;
 }
@@ -9849,6 +9964,7 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
 	ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
 					lockdep_is_held(&ctx->uring_lock));
 	if (ev_fd) {
+		ctx->has_evfd = false;
 		rcu_assign_pointer(ctx->io_ev_fd, NULL);
 		call_rcu(&ev_fd->rcu, io_eventfd_put);
 		return 0;
@@ -9859,11 +9975,20 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
 
 static void io_destroy_buffers(struct io_ring_ctx *ctx)
 {
-	struct io_buffer *buf;
-	unsigned long index;
+	int i;
 
-	xa_for_each(&ctx->io_buffers, index, buf)
-		__io_remove_buffers(ctx, buf, index, -1U);
+	for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++) {
+		struct list_head *list = &ctx->io_buffers[i];
+
+		while (!list_empty(list)) {
+			struct io_buffer_list *bl;
+
+			bl = list_first_entry(list, struct io_buffer_list, list);
+			__io_remove_buffers(ctx, bl, -1U);
+			list_del(&bl->list);
+			kfree(bl);
+		}
+	}
 
 	while (!list_empty(&ctx->io_buffers_pages)) {
 		struct page *page;
@@ -9902,6 +10027,18 @@ static void io_wait_rsrc_data(struct io_rsrc_data *data)
 		wait_for_completion(&data->done);
 }
 
+static void io_flush_apoll_cache(struct io_ring_ctx *ctx)
+{
+	struct async_poll *apoll;
+
+	while (!list_empty(&ctx->apoll_cache)) {
+		apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
+						poll.wait.entry);
+		list_del(&apoll->poll.wait.entry);
+		kfree(apoll);
+	}
+}
+
 static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 {
 	io_sq_thread_finish(ctx);
@@ -9924,6 +10061,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	if (ctx->rings)
 		__io_cqring_overflow_flush(ctx, true);
 	io_eventfd_unregister(ctx);
+	io_flush_apoll_cache(ctx);
 	mutex_unlock(&ctx->uring_lock);
 	io_destroy_buffers(ctx);
 	if (ctx->sq_creds)
@@ -9959,6 +10097,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	io_free_napi_list(ctx);
 	kfree(ctx->cancel_hash);
 	kfree(ctx->dummy_ubuf);
+	kfree(ctx->io_buffers);
 	kfree(ctx);
 }
 
@@ -11234,7 +11373,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 	if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
 			IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
 			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
-			IORING_SETUP_R_DISABLED))
+			IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL))
 		return -EINVAL;
 
 	return  io_uring_create(entries, &p, params);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2772dec9dcea..8bde30fa5387 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1105,17 +1105,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 		goto read_super_error;
 	}
 
-	root = d_make_root(inode);
-	if (!root) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto read_super_error;
-	}
-
-	sb->s_root = root;
-
-	ocfs2_complete_mount_recovery(osb);
-
 	osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL,
 						&ocfs2_kset->kobj);
 	if (!osb->osb_dev_kset) {
@@ -1133,6 +1122,17 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 		goto read_super_error;
 	}
 
+	root = d_make_root(inode);
+	if (!root) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto read_super_error;
+	}
+
+	sb->s_root = root;
+
+	ocfs2_complete_mount_recovery(osb);
+
 	if (ocfs2_mount_local(osb))
 		snprintf(nodestr, sizeof(nodestr), "local");
 	else
diff --git a/fs/pipe.c b/fs/pipe.c
index cc28623a67b6..2667db9506e2 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -253,7 +253,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 	 */
 	was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
 	for (;;) {
-		unsigned int head = pipe->head;
+		/* Read ->head with a barrier vs post_one_notification() */
+		unsigned int head = smp_load_acquire(&pipe->head);
 		unsigned int tail = pipe->tail;
 		unsigned int mask = pipe->ring_size - 1;
 
@@ -831,10 +832,8 @@ void free_pipe_info(struct pipe_inode_info *pipe)
 	int i;
 
 #ifdef CONFIG_WATCH_QUEUE
-	if (pipe->watch_queue) {
+	if (pipe->watch_queue)
 		watch_queue_clear(pipe->watch_queue);
-		put_watch_queue(pipe->watch_queue);
-	}
 #endif
 
 	(void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
@@ -844,6 +843,10 @@ void free_pipe_info(struct pipe_inode_info *pipe)
 		if (buf->ops)
 			pipe_buf_release(pipe, buf);
 	}
+#ifdef CONFIG_WATCH_QUEUE
+	if (pipe->watch_queue)
+		put_watch_queue(pipe->watch_queue);
+#endif
 	if (pipe->tmp_page)
 		__free_page(pipe->tmp_page);
 	kfree(pipe->bufs);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index ca88c4706f2b..3f7f01f03869 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -480,6 +480,8 @@ void acpi_initialize_hp_context(struct acpi_device *adev,
 /* acpi_device.dev.bus == &acpi_bus_type */
 extern struct bus_type acpi_bus_type;
 
+int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data);
+
 /*
  * Events
  * ------
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 69e89d572b9e..02c1fa16e638 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -507,8 +507,12 @@ typedef u64 acpi_integer;
 /* Pointer/Integer type conversions */
 
 #define ACPI_TO_POINTER(i)              ACPI_CAST_PTR (void, (acpi_size) (i))
+#ifndef ACPI_TO_INTEGER
 #define ACPI_TO_INTEGER(p)              ACPI_PTR_DIFF (p, (void *) 0)
+#endif
+#ifndef ACPI_OFFSET
 #define ACPI_OFFSET(d, f)               ACPI_PTR_DIFF (&(((d *) 0)->f), (void *) 0)
+#endif
 #define ACPI_PTR_TO_PHYSADDR(i)         ACPI_TO_INTEGER(i)
 
 /* Optimizations for 4-character (32-bit) acpi_name manipulation */
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index ece0a8af2bae..afaca3a075e8 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -27,14 +27,16 @@ extern int hest_disable;
 extern int erst_disable;
 #ifdef CONFIG_ACPI_APEI_GHES
 extern bool ghes_disable;
+void __init acpi_ghes_init(void);
 #else
 #define ghes_disable 1
+static inline void acpi_ghes_init(void) { }
 #endif
 
 #ifdef CONFIG_ACPI_APEI
 void __init acpi_hest_init(void);
 #else
-static inline void acpi_hest_init(void) { return; }
+static inline void acpi_hest_init(void) { }
 #endif
 
 int erst_write(const struct cper_record_header *record);
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index b3ffb9bbf664..cec41e004ecf 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -114,6 +114,11 @@
 #define acpi_raw_spinlock                   raw_spinlock_t *
 #define acpi_cpu_flags                      unsigned long
 
+#define acpi_uintptr_t                      uintptr_t
+
+#define ACPI_TO_INTEGER(p)                  ((uintptr_t)(p))
+#define ACPI_OFFSET(d, f)                   offsetof(d, f)
+
 /* Use native linux version of acpi_os_allocate_zeroed */
 
 #define USE_NATIVE_ALLOCATE_ZEROED
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 42f3866bca69..2a10db2f0bc5 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -321,16 +321,6 @@
 #define THERMAL_TABLE(name)
 #endif
 
-#ifdef CONFIG_DTPM
-#define DTPM_TABLE()							\
-	. = ALIGN(8);							\
-	__dtpm_table = .;						\
-	KEEP(*(__dtpm_table))						\
-	__dtpm_table_end = .;
-#else
-#define DTPM_TABLE()
-#endif
-
 #define KERNEL_DTB()							\
 	STRUCT_ALIGN();							\
 	__dtb_start = .;						\
@@ -723,7 +713,6 @@
 	ACPI_PROBE_TABLE(irqchip)					\
 	ACPI_PROBE_TABLE(timer)						\
 	THERMAL_TABLE(governor)						\
-	DTPM_TABLE()							\
 	EARLYCON_TABLE()						\
 	LSM_TABLE()							\
 	EARLY_LSM_TABLE()						\
diff --git a/include/asm-generic/xor.h b/include/asm-generic/xor.h
index b62a2a56a4d4..44509d48fca2 100644
--- a/include/asm-generic/xor.h
+++ b/include/asm-generic/xor.h
@@ -8,7 +8,8 @@
 #include <linux/prefetch.h>
 
 static void
-xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -27,8 +28,9 @@ xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3)
+xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2,
+	    const unsigned long * __restrict p3)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -48,8 +50,10 @@ xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4)
+xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2,
+	    const unsigned long * __restrict p3,
+	    const unsigned long * __restrict p4)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -70,8 +74,11 @@ xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2,
+	    const unsigned long * __restrict p3,
+	    const unsigned long * __restrict p4,
+	    const unsigned long * __restrict p5)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -93,7 +100,8 @@ xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -129,8 +137,9 @@ xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3)
+xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -175,8 +184,10 @@ xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4)
+xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3,
+	     const unsigned long * __restrict p4)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -230,8 +241,11 @@ xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3,
+	     const unsigned long * __restrict p4,
+	     const unsigned long * __restrict p5)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -294,7 +308,8 @@ xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 	prefetchw(p1);
@@ -320,8 +335,9 @@ xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3)
+xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 	prefetchw(p1);
@@ -350,8 +366,10 @@ xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4)
+xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3,
+	      const unsigned long * __restrict p4)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -384,8 +402,11 @@ xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3,
+	      const unsigned long * __restrict p4,
+	      const unsigned long * __restrict p5)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -421,7 +442,8 @@ xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -466,8 +488,9 @@ xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3)
+xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -523,8 +546,10 @@ xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4)
+xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -591,8 +616,11 @@ xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4,
+	       const unsigned long * __restrict p5)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index e715bdb720d5..057c8964aefb 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -56,6 +56,7 @@ enum arch_timer_spi_nr {
 #define ARCH_TIMER_EVT_TRIGGER_MASK	(0xF << ARCH_TIMER_EVT_TRIGGER_SHIFT)
 #define ARCH_TIMER_USR_VT_ACCESS_EN	(1 << 8) /* virtual timer registers */
 #define ARCH_TIMER_USR_PT_ACCESS_EN	(1 << 9) /* physical timer registers */
+#define ARCH_TIMER_EVT_INTERVAL_SCALE	(1 << 17) /* EVNTIS in the ARMv8 ARM */
 
 #define ARCH_TIMER_EVT_STREAM_PERIOD_US	100
 #define ARCH_TIMER_EVT_STREAM_FREQ				\
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index f76ec723ceae..f50c5d1725da 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -13,6 +13,8 @@
 #include <linux/list.h>
 #include <linux/types.h>
 
+#include <asm/unaligned.h>
+
 /*
  * Maximum values for blocksize and alignmask, used to allocate
  * static buffers that are big enough for any combination of
@@ -154,9 +156,11 @@ static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
 	    (size % sizeof(unsigned long)) == 0) {
 		unsigned long *d = (unsigned long *)dst;
 		unsigned long *s = (unsigned long *)src;
+		unsigned long l;
 
 		while (size > 0) {
-			*d++ ^= *s++;
+			l = get_unaligned(d) ^ get_unaligned(s++);
+			put_unaligned(l, d++);
 			size -= sizeof(unsigned long);
 		}
 	} else {
@@ -173,9 +177,11 @@ static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2,
 		unsigned long *d = (unsigned long *)dst;
 		unsigned long *s1 = (unsigned long *)src1;
 		unsigned long *s2 = (unsigned long *)src2;
+		unsigned long l;
 
 		while (size > 0) {
-			*d++ = *s1++ ^ *s2++;
+			l = get_unaligned(s1++) ^ get_unaligned(s2++);
+			put_unaligned(l, d++);
 			size -= sizeof(unsigned long);
 		}
 	} else {
diff --git a/include/crypto/asym_tpm_subtype.h b/include/crypto/asym_tpm_subtype.h
deleted file mode 100644
index 48198c36d6b9..000000000000
--- a/include/crypto/asym_tpm_subtype.h
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#ifndef _LINUX_ASYM_TPM_SUBTYPE_H
-#define _LINUX_ASYM_TPM_SUBTYPE_H
-
-#include <linux/keyctl.h>
-
-struct tpm_key {
-	void *blob;
-	u32 blob_len;
-	uint16_t key_len; /* Size in bits of the key */
-	const void *pub_key; /* pointer inside blob to the public key bytes */
-	uint16_t pub_key_len; /* length of the public key */
-};
-
-struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len);
-
-extern struct asymmetric_key_subtype asym_tpm_subtype;
-
-#endif /* _LINUX_ASYM_TPM_SUBTYPE_H */
diff --git a/include/crypto/dh.h b/include/crypto/dh.h
index d71e9858ab86..7b863e911cb4 100644
--- a/include/crypto/dh.h
+++ b/include/crypto/dh.h
@@ -24,21 +24,17 @@
  *
  * @key:	Private DH key
  * @p:		Diffie-Hellman parameter P
- * @q:		Diffie-Hellman parameter Q
  * @g:		Diffie-Hellman generator G
  * @key_size:	Size of the private DH key
  * @p_size:	Size of DH parameter P
- * @q_size:	Size of DH parameter Q
  * @g_size:	Size of DH generator G
  */
 struct dh {
-	void *key;
-	void *p;
-	void *q;
-	void *g;
+	const void *key;
+	const void *p;
+	const void *g;
 	unsigned int key_size;
 	unsigned int p_size;
-	unsigned int q_size;
 	unsigned int g_size;
 };
 
@@ -83,4 +79,20 @@ int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params);
  */
 int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params);
 
+/**
+ * __crypto_dh_decode_key() - decode a private key without parameter checks
+ * @buf:	Buffer holding a packet key that should be decoded
+ * @len:	Length of the packet private key buffer
+ * @params:	Buffer allocated by the caller that is filled with the
+ *		unpacked DH private key.
+ *
+ * Internal function providing the same services as the exported
+ * crypto_dh_decode_key(), but without any of those basic parameter
+ * checks conducted by the latter.
+ *
+ * Return:	-EINVAL if buffer has insufficient size, 0 on success
+ */
+int __crypto_dh_decode_key(const char *buf, unsigned int len,
+			   struct dh *params);
+
 #endif
diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h
index 659b642efada..9cb0662ebe87 100644
--- a/include/crypto/internal/kpp.h
+++ b/include/crypto/internal/kpp.h
@@ -10,6 +10,38 @@
 #include <crypto/kpp.h>
 #include <crypto/algapi.h>
 
+/**
+ * struct kpp_instance - KPP template instance
+ * @free: Callback getting invoked upon instance destruction. Must be set.
+ * @s: Internal. Generic crypto core instance state properly layout
+ *     to alias with @alg as needed.
+ * @alg: The &struct kpp_alg implementation provided by the instance.
+ */
+struct kpp_instance {
+	void (*free)(struct kpp_instance *inst);
+	union {
+		struct {
+			char head[offsetof(struct kpp_alg, base)];
+			struct crypto_instance base;
+		} s;
+		struct kpp_alg alg;
+	};
+};
+
+/**
+ * struct crypto_kpp_spawn - KPP algorithm spawn
+ * @base: Internal. Generic crypto core spawn state.
+ *
+ * Template instances can get a hold on some inner KPP algorithm by
+ * binding a &struct crypto_kpp_spawn via
+ * crypto_grab_kpp(). Transforms may subsequently get instantiated
+ * from the referenced inner &struct kpp_alg by means of
+ * crypto_spawn_kpp().
+ */
+struct crypto_kpp_spawn {
+	struct crypto_spawn base;
+};
+
 /*
  * Transform internal helpers.
  */
@@ -33,6 +65,62 @@ static inline const char *kpp_alg_name(struct crypto_kpp *tfm)
 	return crypto_kpp_tfm(tfm)->__crt_alg->cra_name;
 }
 
+/*
+ * Template instance internal helpers.
+ */
+/**
+ * kpp_crypto_instance() - Cast a &struct kpp_instance to the corresponding
+ *                         generic &struct crypto_instance.
+ * @inst: Pointer to the &struct kpp_instance to be cast.
+ * Return: A pointer to the &struct crypto_instance embedded in @inst.
+ */
+static inline struct crypto_instance *kpp_crypto_instance(
+	struct kpp_instance *inst)
+{
+	return &inst->s.base;
+}
+
+/**
+ * kpp_instance() - Cast a generic &struct crypto_instance to the corresponding
+ *                  &struct kpp_instance.
+ * @inst: Pointer to the &struct crypto_instance to be cast.
+ * Return: A pointer to the &struct kpp_instance @inst is embedded in.
+ */
+static inline struct kpp_instance *kpp_instance(struct crypto_instance *inst)
+{
+	return container_of(inst, struct kpp_instance, s.base);
+}
+
+/**
+ * kpp_alg_instance() - Get the &struct kpp_instance a given KPP transform has
+ *                      been instantiated from.
+ * @kpp: The KPP transform instantiated from some &struct kpp_instance.
+ * Return: The &struct kpp_instance associated with @kpp.
+ */
+static inline struct kpp_instance *kpp_alg_instance(struct crypto_kpp *kpp)
+{
+	return kpp_instance(crypto_tfm_alg_instance(&kpp->base));
+}
+
+/**
+ * kpp_instance_ctx() - Get a pointer to a &struct kpp_instance's implementation
+ *                      specific context data.
+ * @inst: The &struct kpp_instance whose context data to access.
+ *
+ * A KPP template implementation may allocate extra memory beyond the
+ * end of a &struct kpp_instance instantiated from &crypto_template.create().
+ * This function provides a means to obtain a pointer to this area.
+ *
+ * Return: A pointer to the implementation specific context data.
+ */
+static inline void *kpp_instance_ctx(struct kpp_instance *inst)
+{
+	return crypto_instance_ctx(kpp_crypto_instance(inst));
+}
+
+/*
+ * KPP algorithm (un)registration functions.
+ */
 /**
  * crypto_register_kpp() -- Register key-agreement protocol primitives algorithm
  *
@@ -56,4 +144,74 @@ int crypto_register_kpp(struct kpp_alg *alg);
  */
 void crypto_unregister_kpp(struct kpp_alg *alg);
 
+/**
+ * kpp_register_instance() - Register a KPP template instance.
+ * @tmpl: The instantiating template.
+ * @inst: The KPP template instance to be registered.
+ * Return: %0 on success, negative error code otherwise.
+ */
+int kpp_register_instance(struct crypto_template *tmpl,
+			  struct kpp_instance *inst);
+
+/*
+ * KPP spawn related functions.
+ */
+/**
+ * crypto_grab_kpp() - Look up a KPP algorithm and bind a spawn to it.
+ * @spawn: The KPP spawn to bind.
+ * @inst: The template instance owning @spawn.
+ * @name: The KPP algorithm name to look up.
+ * @type: The type bitset to pass on to the lookup.
+ * @mask: The mask bismask to pass on to the lookup.
+ * Return: %0 on success, a negative error code otherwise.
+ */
+int crypto_grab_kpp(struct crypto_kpp_spawn *spawn,
+		    struct crypto_instance *inst,
+		    const char *name, u32 type, u32 mask);
+
+/**
+ * crypto_drop_kpp() - Release a spawn previously bound via crypto_grab_kpp().
+ * @spawn: The spawn to release.
+ */
+static inline void crypto_drop_kpp(struct crypto_kpp_spawn *spawn)
+{
+	crypto_drop_spawn(&spawn->base);
+}
+
+/**
+ * crypto_spawn_kpp_alg() - Get the algorithm a KPP spawn has been bound to.
+ * @spawn: The spawn to get the referenced &struct kpp_alg for.
+ *
+ * This function as well as the returned result are safe to use only
+ * after @spawn has been successfully bound via crypto_grab_kpp() and
+ * up to until the template instance owning @spawn has either been
+ * registered successfully or the spawn has been released again via
+ * crypto_drop_spawn().
+ *
+ * Return: A pointer to the &struct kpp_alg referenced from the spawn.
+ */
+static inline struct kpp_alg *crypto_spawn_kpp_alg(
+	struct crypto_kpp_spawn *spawn)
+{
+	return container_of(spawn->base.alg, struct kpp_alg, base);
+}
+
+/**
+ * crypto_spawn_kpp() - Create a transform from a KPP spawn.
+ * @spawn: The spawn previously bound to some &struct kpp_alg via
+ *         crypto_grab_kpp().
+ *
+ * Once a &struct crypto_kpp_spawn has been successfully bound to a
+ * &struct kpp_alg via crypto_grab_kpp(), transforms for the latter
+ * may get instantiated from the former by means of this function.
+ *
+ * Return: A pointer to the freshly created KPP transform on success
+ * or an ``ERR_PTR()`` otherwise.
+ */
+static inline struct crypto_kpp *crypto_spawn_kpp(
+	struct crypto_kpp_spawn *spawn)
+{
+	return crypto_spawn_tfm2(&spawn->base);
+}
+
 #endif
diff --git a/include/crypto/sm3.h b/include/crypto/sm3.h
index 42ea21289ba9..1f021ad0533f 100644
--- a/include/crypto/sm3.h
+++ b/include/crypto/sm3.h
@@ -1,5 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Common values for SM3 algorithm
+ *
+ * Copyright (C) 2017 ARM Limited or its affiliates.
+ * Copyright (C) 2017 Gilad Ben-Yossef <[email protected]>
+ * Copyright (C) 2021 Tianjia Zhang <[email protected]>
  */
 
 #ifndef _CRYPTO_SM3_H
@@ -30,13 +35,30 @@ struct sm3_state {
 	u8 buffer[SM3_BLOCK_SIZE];
 };
 
-struct shash_desc;
+/*
+ * Stand-alone implementation of the SM3 algorithm. It is designed to
+ * have as little dependencies as possible so it can be used in the
+ * kexec_file purgatory. In other cases you should generally use the
+ * hash APIs from include/crypto/hash.h. Especially when hashing large
+ * amounts of data as those APIs may be hw-accelerated.
+ *
+ * For details see lib/crypto/sm3.c
+ */
 
-extern int crypto_sm3_update(struct shash_desc *desc, const u8 *data,
-			      unsigned int len);
+static inline void sm3_init(struct sm3_state *sctx)
+{
+	sctx->state[0] = SM3_IVA;
+	sctx->state[1] = SM3_IVB;
+	sctx->state[2] = SM3_IVC;
+	sctx->state[3] = SM3_IVD;
+	sctx->state[4] = SM3_IVE;
+	sctx->state[5] = SM3_IVF;
+	sctx->state[6] = SM3_IVG;
+	sctx->state[7] = SM3_IVH;
+	sctx->count = 0;
+}
 
-extern int crypto_sm3_final(struct shash_desc *desc, u8 *out);
+void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len);
+void sm3_final(struct sm3_state *sctx, u8 *out);
 
-extern int crypto_sm3_finup(struct shash_desc *desc, const u8 *data,
-			     unsigned int len, u8 *hash);
 #endif
diff --git a/include/dt-bindings/interrupt-controller/apple-aic.h b/include/dt-bindings/interrupt-controller/apple-aic.h
index 604f2bb30ac0..bf3aac0e5491 100644
--- a/include/dt-bindings/interrupt-controller/apple-aic.h
+++ b/include/dt-bindings/interrupt-controller/apple-aic.h
@@ -11,5 +11,7 @@
 #define AIC_TMR_HV_VIRT		1
 #define AIC_TMR_GUEST_PHYS	2
 #define AIC_TMR_GUEST_VIRT	3
+#define AIC_CPU_PMU_E		4
+#define AIC_CPU_PMU_P		5
 
 #endif
diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
index 6acd3cf13a18..2419a735420f 100644
--- a/include/keys/system_keyring.h
+++ b/include/keys/system_keyring.h
@@ -38,6 +38,20 @@ extern int restrict_link_by_builtin_and_secondary_trusted(
 #define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted
 #endif
 
+#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
+extern int restrict_link_by_builtin_secondary_and_machine(
+	struct key *dest_keyring,
+	const struct key_type *type,
+	const union key_payload *payload,
+	struct key *restrict_key);
+extern void __init set_machine_trusted_keys(struct key *keyring);
+#else
+#define restrict_link_by_builtin_secondary_and_machine restrict_link_by_builtin_trusted
+static inline void __init set_machine_trusted_keys(struct key *keyring)
+{
+}
+#endif
+
 extern struct pkcs7_message *pkcs7;
 #ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING
 extern int mark_hash_blacklisted(const char *hash);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6274758648e3..35413793a4d4 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -526,7 +526,7 @@ acpi_status acpi_release_memory(acpi_handle handle, struct resource *res,
 int acpi_resources_are_enforced(void);
 
 #ifdef CONFIG_HIBERNATION
-void __init acpi_check_s4_hw_signature(int check);
+extern int acpi_check_s4_hw_signature;
 #endif
 
 #ifdef CONFIG_PM_SLEEP
@@ -580,6 +580,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 extern bool osc_sb_apei_support_acked;
 extern bool osc_pc_lpi_support_confirmed;
 extern bool osc_sb_native_usb4_support_confirmed;
+extern bool osc_sb_cppc_not_supported;
 
 /* USB4 Capabilities */
 #define OSC_USB_USB3_TUNNELING			0x00000001
@@ -691,7 +692,7 @@ int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
 int acpi_device_modalias(struct device *, char *, int);
 
 struct platform_device *acpi_create_platform_device(struct acpi_device *,
-						    struct property_entry *);
+						    const struct property_entry *);
 #define ACPI_PTR(_ptr)	(_ptr)
 
 static inline void acpi_device_set_enumerated(struct acpi_device *adev)
@@ -930,7 +931,7 @@ static inline int acpi_device_modalias(struct device *dev,
 
 static inline struct platform_device *
 acpi_create_platform_device(struct acpi_device *adev,
-			    struct property_entry *properties)
+			    const struct property_entry *properties)
 {
 	return NULL;
 }
diff --git a/include/linux/acpi_agdi.h b/include/linux/acpi_agdi.h
new file mode 100644
index 000000000000..f477f0b452fa
--- /dev/null
+++ b/include/linux/acpi_agdi.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ACPI_AGDI_H__
+#define __ACPI_AGDI_H__
+
+#include <linux/acpi.h>
+
+#ifdef CONFIG_ACPI_AGDI
+void __init acpi_agdi_init(void);
+#else
+static inline void acpi_agdi_init(void) {}
+#endif
+#endif /* __ACPI_AGDI_H__ */
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 6c7f47846971..6562f543c3e0 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -117,30 +117,9 @@ void amba_device_put(struct amba_device *);
 int amba_device_add(struct amba_device *, struct resource *);
 int amba_device_register(struct amba_device *, struct resource *);
 void amba_device_unregister(struct amba_device *);
-struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int);
 int amba_request_regions(struct amba_device *, const char *);
 void amba_release_regions(struct amba_device *);
 
-static inline int amba_pclk_enable(struct amba_device *dev)
-{
-	return clk_enable(dev->pclk);
-}
-
-static inline void amba_pclk_disable(struct amba_device *dev)
-{
-	clk_disable(dev->pclk);
-}
-
-static inline int amba_pclk_prepare(struct amba_device *dev)
-{
-	return clk_prepare(dev->pclk);
-}
-
-static inline void amba_pclk_unprepare(struct amba_device *dev)
-{
-	clk_unprepare(dev->pclk);
-}
-
 /* Some drivers don't use the struct amba_device */
 #define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff)
 #define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f)
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index cce6136b300a..58cbe18d825c 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -11,6 +11,10 @@
 void topology_normalize_cpu_scale(void);
 int topology_update_cpu_topology(void);
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+void topology_init_cpu_capacity_cppc(void);
+#endif
+
 struct device_node;
 bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);
 
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 63ccb5252190..220c8c60e021 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -92,6 +92,11 @@
 			   ARM_SMCCC_SMC_32,				\
 			   0, 0x7fff)
 
+#define ARM_SMCCC_ARCH_WORKAROUND_3					\
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,				\
+			   ARM_SMCCC_SMC_32,				\
+			   0, 0x3fff)
+
 #define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID				\
 	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,				\
 			   ARM_SMCCC_SMC_32,				\
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 0a241c5c911d..14dc461b0e82 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
 /* For use by arch code when CPU hotplug notifiers are not appropriate. */
 int sdei_mask_local_cpu(void);
 int sdei_unmask_local_cpu(void);
+void __init sdei_init(void);
 #else
 static inline int sdei_mask_local_cpu(void) { return 0; }
 static inline int sdei_unmask_local_cpu(void) { return 0; }
+static inline void sdei_init(void) { }
 #endif /* CONFIG_ARM_SDE_INTERFACE */
 
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d0ad379d1e62..3121d1fc8e75 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1792,6 +1792,11 @@ struct bpf_core_ctx {
 int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
 		   int relo_idx, void *insn);
 
+static inline bool unprivileged_ebpf_enabled(void)
+{
+	return !sysctl_unprivileged_bpf_disabled;
+}
+
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -2011,6 +2016,12 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
 {
 	return NULL;
 }
+
+static inline bool unprivileged_ebpf_enabled(void)
+{
+	return false;
+}
+
 #endif /* CONFIG_BPF_SYSCALL */
 
 void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 3522a272b74d..35c7d6db4139 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -661,6 +661,11 @@ struct gov_attr_set {
 /* sysfs ops for cpufreq governors */
 extern const struct sysfs_ops governor_sysfs_ops;
 
+static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj)
+{
+	return container_of(kobj, struct gov_attr_set, kobj);
+}
+
 void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node);
 void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node);
 unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 411a428ace4d..c7dce7883179 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -100,6 +100,7 @@ enum cpuhp_state {
 	CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
 	CPUHP_PADATA_DEAD,
 	CPUHP_AP_DTPM_CPU_DEAD,
+	CPUHP_RANDOM_PREPARE,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
@@ -231,6 +232,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
 	CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
 	CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE,
+	CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
 	CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
@@ -240,6 +242,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_CSKY_ONLINE,
 	CPUHP_AP_WATCHDOG_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
+	CPUHP_AP_RANDOM_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
 	CPUHP_AP_BASE_CACHEINFO_ONLINE,
 	CPUHP_AP_ONLINE_DYN,
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 855869e1fd32..2324ab6f1846 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -133,6 +133,15 @@
 #define CRYPTO_ALG_ALLOCATES_MEMORY	0x00010000
 
 /*
+ * Mark an algorithm as a service implementation only usable by a
+ * template and never by a normal user of the kernel crypto API.
+ * This is intended to be used by algorithms that are themselves
+ * not FIPS-approved but may instead be used to implement parts of
+ * a FIPS-approved algorithm (e.g., dh vs. ffdhe2048(dh)).
+ */
+#define CRYPTO_ALG_FIPS_INTERNAL	0x00020000
+
+/*
  * Transform masks and values (for crt_flags).
  */
 #define CRYPTO_TFM_NEED_KEY		0x00000001
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 6150d11a607e..dca2b1355bb1 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -62,14 +62,6 @@
 #define DMA_ATTR_PRIVILEGED		(1UL << 9)
 
 /*
- * This is a hint to the DMA-mapping subsystem that the device is expected
- * to overwrite the entire mapped size, thus the caller does not require any
- * of the previous buffer contents to be preserved. This allows
- * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers.
- */
-#define DMA_ATTR_OVERWRITE		(1UL << 10)
-
-/*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.  It can
  * be given to a device to use as a DMA source or target.  It is specific to a
  * given device and there may be a translation between the CPU physical address
diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index d37e5d06a357..a4a13514b730 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -32,28 +32,25 @@ struct dtpm_ops {
 	void (*release)(struct dtpm *);
 };
 
-typedef int (*dtpm_init_t)(void);
+struct device_node;
 
-struct dtpm_descr {
-	dtpm_init_t init;
+struct dtpm_subsys_ops {
+	const char *name;
+	int (*init)(void);
+	void (*exit)(void);
+	int (*setup)(struct dtpm *, struct device_node *);
 };
 
-/* Init section thermal table */
-extern struct dtpm_descr __dtpm_table[];
-extern struct dtpm_descr __dtpm_table_end[];
-
-#define DTPM_TABLE_ENTRY(name, __init)				\
-	static struct dtpm_descr __dtpm_table_entry_##name	\
-	__used __section("__dtpm_table") = {			\
-		.init = __init,					\
-	}
-
-#define DTPM_DECLARE(name, init)	DTPM_TABLE_ENTRY(name, init)
+enum DTPM_NODE_TYPE {
+	DTPM_NODE_VIRTUAL = 0,
+	DTPM_NODE_DT,
+};
 
-#define for_each_dtpm_table(__dtpm)	\
-	for (__dtpm = __dtpm_table;	\
-	     __dtpm < __dtpm_table_end;	\
-	     __dtpm++)
+struct dtpm_node {
+	enum DTPM_NODE_TYPE type;
+	const char *name;
+	struct dtpm_node *parent;
+};
 
 static inline struct dtpm *to_dtpm(struct powercap_zone *zone)
 {
@@ -70,4 +67,7 @@ void dtpm_unregister(struct dtpm *dtpm);
 
 int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent);
 
+int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table);
+
+void dtpm_destroy_hierarchy(void);
 #endif
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 746e081879a5..f8e206e82476 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -114,7 +114,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg
 #endif
 }
 
-#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64)
+#ifdef CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS
 /*
  * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out
  * extra segments containing the gate DSO contents.  Dumping its
@@ -149,6 +149,6 @@ static inline size_t elf_core_extra_data_size(void)
 {
 	return 0;
 }
-#endif
+#endif /* CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS */
 
 #endif /* _LINUX_ELFCORE_H */
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 907cb01890cf..f6783f58c64a 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -93,6 +93,7 @@ enum pm_api_id {
 	PM_FPGA_LOAD = 22,
 	PM_FPGA_GET_STATUS = 23,
 	PM_GET_CHIPID = 24,
+	PM_SECURE_SHA = 26,
 	PM_PINCTRL_REQUEST = 28,
 	PM_PINCTRL_RELEASE = 29,
 	PM_PINCTRL_GET_FUNCTION = 30,
@@ -427,6 +428,7 @@ int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities,
 			      const u32 qos,
 			      const enum zynqmp_pm_request_ack ack);
 int zynqmp_pm_aes_engine(const u64 address, u32 *out);
+int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags);
 int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags);
 int zynqmp_pm_fpga_get_status(u32 *value);
 int zynqmp_pm_write_ggs(u32 index, u32 value);
@@ -601,6 +603,12 @@ static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out)
 	return -ENODEV;
 }
 
+static inline int zynqmp_pm_sha_hash(const u64 address, const u32 size,
+				     const u32 flags)
+{
+	return -ENODEV;
+}
+
 static inline int zynqmp_pm_fpga_load(const u64 address, const u32 size,
 				      const u32 flags)
 {
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 8e6dd908da21..aa1d4da03538 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -60,7 +60,5 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng);
 /** Unregister a Hardware Random Number Generator driver. */
 extern void hwrng_unregister(struct hwrng *rng);
 extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng);
-/** Feed random bits into the pool. */
-extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy);
 
 #endif /* LINUX_HWRANDOM_H_ */
diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index b712217f7030..1ed52441972f 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -52,6 +52,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev)
 	case ARPHRD_VOID:
 	case ARPHRD_NONE:
 	case ARPHRD_RAWIP:
+	case ARPHRD_PIMREG:
 		return false;
 	default:
 		return true;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9367f1cb2e3c..f40754caaefa 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -579,7 +579,16 @@ enum
 	NR_SOFTIRQS
 };
 
-#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ))
+/*
+ * The following vectors can be safely ignored after ksoftirqd is parked:
+ *
+ * _ RCU:
+ * 	1) rcutree_migrate_callbacks() migrates the queue.
+ * 	2) rcu_report_dead() reports the final quiescent states.
+ *
+ * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue
+ */
+#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ))
 
 /* map softirq index to softirq name. update 'softirq_to_name' in
  * kernel/softirq.c when adding a new softirq.
diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h
index e9dacd4b9f6b..af1c9d62e642 100644
--- a/include/linux/ioasid.h
+++ b/include/linux/ioasid.h
@@ -34,13 +34,16 @@ struct ioasid_allocator_ops {
 #if IS_ENABLED(CONFIG_IOASID)
 ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
 		      void *private);
-void ioasid_get(ioasid_t ioasid);
-bool ioasid_put(ioasid_t ioasid);
+void ioasid_free(ioasid_t ioasid);
 void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
 		  bool (*getter)(void *));
 int ioasid_register_allocator(struct ioasid_allocator_ops *allocator);
 void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator);
 int ioasid_set_data(ioasid_t ioasid, void *data);
+static inline bool pasid_valid(ioasid_t ioasid)
+{
+	return ioasid != INVALID_IOASID;
+}
 
 #else /* !CONFIG_IOASID */
 static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min,
@@ -49,14 +52,7 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min,
 	return INVALID_IOASID;
 }
 
-static inline void ioasid_get(ioasid_t ioasid)
-{
-}
-
-static inline bool ioasid_put(ioasid_t ioasid)
-{
-	return false;
-}
+static inline void ioasid_free(ioasid_t ioasid) { }
 
 static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
 				bool (*getter)(void *))
@@ -78,5 +74,10 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data)
 	return -ENOTSUPP;
 }
 
+static inline bool pasid_valid(ioasid_t ioasid)
+{
+	return false;
+}
+
 #endif /* CONFIG_IOASID */
 #endif /* __LINUX_IOASID_H */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 848e1e12c5c6..f92788ccdba2 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -456,7 +456,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
 /**
  * struct irq_chip - hardware interrupt chip descriptor
  *
- * @parent_device:	pointer to parent device for irqchip
  * @name:		name for /proc/interrupts
  * @irq_startup:	start up the interrupt (defaults to ->enable if NULL)
  * @irq_shutdown:	shut down the interrupt (defaults to ->disable if NULL)
@@ -503,7 +502,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
  * @flags:		chip specific flags
  */
 struct irq_chip {
-	struct device	*parent_device;
 	const char	*name;
 	unsigned int	(*irq_startup)(struct irq_data *data);
 	void		(*irq_shutdown)(struct irq_data *data);
@@ -712,10 +710,11 @@ extern struct irq_chip no_irq_chip;
 extern struct irq_chip dummy_irq_chip;
 
 extern void
-irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip,
 			      irq_flow_handler_t handle, const char *name);
 
-static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *chip,
+static inline void irq_set_chip_and_handler(unsigned int irq,
+					    const struct irq_chip *chip,
 					    irq_flow_handler_t handle)
 {
 	irq_set_chip_and_handler_name(irq, chip, handle, NULL);
@@ -805,7 +804,7 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq)
 }
 
 /* Set/get chip/data for an IRQ: */
-extern int irq_set_chip(unsigned int irq, struct irq_chip *chip);
+extern int irq_set_chip(unsigned int irq, const struct irq_chip *chip);
 extern int irq_set_handler_data(unsigned int irq, void *data);
 extern int irq_set_chip_data(unsigned int irq, void *data);
 extern int irq_set_irq_type(unsigned int irq, unsigned int type);
diff --git a/include/linux/irqchip/versatile-fpga.h b/include/linux/irqchip/versatile-fpga.h
deleted file mode 100644
index a978fc8c7996..000000000000
--- a/include/linux/irqchip/versatile-fpga.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef PLAT_FPGA_IRQ_H
-#define PLAT_FPGA_IRQ_H
-
-struct device_node;
-struct pt_regs;
-
-void fpga_handle_irq(struct pt_regs *regs);
-void fpga_irq_init(void __iomem *, const char *, int, int, u32,
-		struct device_node *node);
-int fpga_irq_of_init(struct device_node *node,
-		     struct device_node *parent);
-
-#endif
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 93d270ca0c56..a77584593f7d 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -160,6 +160,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc)
 
 int handle_irq_desc(struct irq_desc *desc);
 int generic_handle_irq(unsigned int irq);
+int generic_handle_irq_safe(unsigned int irq);
 
 #ifdef CONFIG_IRQ_DOMAIN
 /*
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index d476405802e9..00d577f90883 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -151,6 +151,8 @@ struct irq_domain_chip_generic;
  * @gc: Pointer to a list of generic chips. There is a helper function for
  *      setting up one or more generic chips for interrupt controllers
  *      drivers using the generic chip library which uses this pointer.
+ * @dev: Pointer to a device that the domain represent, and that will be
+ *       used for power management purposes.
  * @parent: Pointer to parent irq_domain to support hierarchy irq_domains
  *
  * Revmap data, used internally by irq_domain
@@ -171,6 +173,7 @@ struct irq_domain {
 	struct fwnode_handle *fwnode;
 	enum irq_domain_bus_token bus_token;
 	struct irq_domain_chip_generic *gc;
+	struct device *dev;
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 	struct irq_domain *parent;
 #endif
@@ -226,6 +229,13 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d)
 	return to_of_node(d->fwnode);
 }
 
+static inline void irq_domain_set_pm_device(struct irq_domain *d,
+					    struct device *dev)
+{
+	if (d)
+		d->dev = dev;
+}
+
 #ifdef CONFIG_IRQ_DOMAIN
 struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
 						const char *name, phys_addr_t *pa);
@@ -469,7 +479,8 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest);
 extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
 						unsigned int virq);
 extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
-				irq_hw_number_t hwirq, struct irq_chip *chip,
+				irq_hw_number_t hwirq,
+				const struct irq_chip *chip,
 				void *chip_data, irq_flow_handler_t handler,
 				void *handler_data, const char *handler_name);
 extern void irq_domain_reset_irq_data(struct irq_data *irq_data);
@@ -512,7 +523,7 @@ extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
 extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
 					 unsigned int virq,
 					 irq_hw_number_t hwirq,
-					 struct irq_chip *chip,
+					 const struct irq_chip *chip,
 					 void *chip_data);
 extern void irq_domain_free_irqs_common(struct irq_domain *domain,
 					unsigned int virq,
diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h
new file mode 100644
index 000000000000..6f612d69ea0c
--- /dev/null
+++ b/include/linux/kasan-enabled.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_KASAN_ENABLED_H
+#define _LINUX_KASAN_ENABLED_H
+
+#include <linux/static_key.h>
+
+#ifdef CONFIG_KASAN_HW_TAGS
+
+DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled);
+
+static __always_inline bool kasan_enabled(void)
+{
+	return static_branch_likely(&kasan_flag_enabled);
+}
+
+static inline bool kasan_hw_tags_enabled(void)
+{
+	return kasan_enabled();
+}
+
+#else /* CONFIG_KASAN_HW_TAGS */
+
+static inline bool kasan_enabled(void)
+{
+	return IS_ENABLED(CONFIG_KASAN);
+}
+
+static inline bool kasan_hw_tags_enabled(void)
+{
+	return false;
+}
+
+#endif /* CONFIG_KASAN_HW_TAGS */
+
+#endif /* LINUX_KASAN_ENABLED_H */
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 4a45562d8893..b6a93261c92a 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -3,6 +3,7 @@
 #define _LINUX_KASAN_H
 
 #include <linux/bug.h>
+#include <linux/kasan-enabled.h>
 #include <linux/kernel.h>
 #include <linux/static_key.h>
 #include <linux/types.h>
@@ -83,33 +84,11 @@ static inline void kasan_disable_current(void) {}
 
 #ifdef CONFIG_KASAN_HW_TAGS
 
-DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled);
-
-static __always_inline bool kasan_enabled(void)
-{
-	return static_branch_likely(&kasan_flag_enabled);
-}
-
-static inline bool kasan_hw_tags_enabled(void)
-{
-	return kasan_enabled();
-}
-
 void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags);
 void kasan_free_pages(struct page *page, unsigned int order);
 
 #else /* CONFIG_KASAN_HW_TAGS */
 
-static inline bool kasan_enabled(void)
-{
-	return IS_ENABLED(CONFIG_KASAN);
-}
-
-static inline bool kasan_hw_tags_enabled(void)
-{
-	return false;
-}
-
 static __always_inline void kasan_alloc_pages(struct page *page,
 					      unsigned int order, gfp_t flags)
 {
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index dbf8506decca..acb1ad2356f1 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -165,7 +165,18 @@
 #ifndef SYM_END
 #define SYM_END(name, sym_type)				\
 	.type name sym_type ASM_NL			\
-	.size name, .-name
+	.set .L__sym_size_##name, .-name ASM_NL		\
+	.size name, .L__sym_size_##name
+#endif
+
+/* SYM_ALIAS -- use only if you have to */
+#ifndef SYM_ALIAS
+#define SYM_ALIAS(alias, name, sym_type, linkage)			\
+	linkage(alias) ASM_NL						\
+	.set alias, name ASM_NL						\
+	.type alias sym_type ASM_NL					\
+	.set .L__sym_size_##alias, .L__sym_size_##name ASM_NL		\
+	.size alias, .L__sym_size_##alias
 #endif
 
 /* === code annotations === */
@@ -200,30 +211,8 @@
 	SYM_ENTRY(name, linkage, SYM_A_NONE)
 #endif
 
-/*
- * SYM_FUNC_START_LOCAL_ALIAS -- use where there are two local names for one
- * function
- */
-#ifndef SYM_FUNC_START_LOCAL_ALIAS
-#define SYM_FUNC_START_LOCAL_ALIAS(name)		\
-	SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)
-#endif
-
-/*
- * SYM_FUNC_START_ALIAS -- use where there are two global names for one
- * function
- */
-#ifndef SYM_FUNC_START_ALIAS
-#define SYM_FUNC_START_ALIAS(name)			\
-	SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
-#endif
-
 /* SYM_FUNC_START -- use for global functions */
 #ifndef SYM_FUNC_START
-/*
- * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two
- * later.
- */
 #define SYM_FUNC_START(name)				\
 	SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
 #endif
@@ -236,7 +225,6 @@
 
 /* SYM_FUNC_START_LOCAL -- use for local functions */
 #ifndef SYM_FUNC_START_LOCAL
-/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */
 #define SYM_FUNC_START_LOCAL(name)			\
 	SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)
 #endif
@@ -259,22 +247,39 @@
 	SYM_START(name, SYM_L_WEAK, SYM_A_NONE)
 #endif
 
-/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */
-#ifndef SYM_FUNC_END_ALIAS
-#define SYM_FUNC_END_ALIAS(name)			\
-	SYM_END(name, SYM_T_FUNC)
-#endif
-
 /*
  * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START,
  * SYM_FUNC_START_WEAK, ...
  */
 #ifndef SYM_FUNC_END
-/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */
 #define SYM_FUNC_END(name)				\
 	SYM_END(name, SYM_T_FUNC)
 #endif
 
+/*
+ * SYM_FUNC_ALIAS -- define a global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS
+#define SYM_FUNC_ALIAS(alias, name)					\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_LOCAL
+#define SYM_FUNC_ALIAS_LOCAL(alias, name)				\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_WEAK
+#define SYM_FUNC_ALIAS_WEAK(alias, name)				\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK)
+#endif
+
 /* SYM_CODE_START -- use for non-C (special) functions */
 #ifndef SYM_CODE_START
 #define SYM_CODE_START(name)				\
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 598ac3bcc901..49a48d7709ac 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -3434,7 +3434,6 @@ enum {
 enum {
 	MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO  = BIT(0),
 	MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO  = BIT(1),
-	MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO    = BIT(2),
 };
 
 enum {
@@ -9900,8 +9899,8 @@ struct mlx5_ifc_bufferx_reg_bits {
 	u8         reserved_at_0[0x6];
 	u8         lossy[0x1];
 	u8         epsb[0x1];
-	u8         reserved_at_8[0xc];
-	u8         size[0xc];
+	u8         reserved_at_8[0x8];
+	u8         size[0x10];
 
 	u8         xoff_threshold[0x10];
 	u8         xon_threshold[0x10];
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0f549870da6a..5f7a33890b0f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -634,7 +634,7 @@ struct mm_struct {
 #endif
 		struct work_struct async_put_work;
 
-#ifdef CONFIG_IOMMU_SUPPORT
+#ifdef CONFIG_IOMMU_SVA
 		u32 pasid;
 #endif
 	} __randomize_layout;
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 4bb71979a8fd..5da5d990ff58 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -211,7 +211,7 @@ struct css_device_id {
 	kernel_ulong_t driver_data;
 };
 
-#define ACPI_ID_LEN	9
+#define ACPI_ID_LEN	16
 
 struct acpi_device_id {
 	__u8 id[ACPI_ID_LEN];
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8b5a314db167..f53ea7038441 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4602,6 +4602,8 @@ int skb_csum_hwoffload_help(struct sk_buff *skb,
 
 struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 				  netdev_features_t features, bool tx_path);
+struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features, __be16 type);
 struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 				    netdev_features_t features);
 
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 2512e2f9cd4e..0407a38b470a 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -26,6 +26,8 @@
  */
 /* Event uses a 64bit counter */
 #define ARMPMU_EVT_64BIT		1
+/* Event uses a 47bit counter */
+#define ARMPMU_EVT_47BIT		2
 
 #define HW_OP_UNSUPPORTED		0xFFFF
 #define C(_x)				PERF_COUNT_HW_CACHE_##_x
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 6de8d7a90d78..8fa70ba063a5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -87,8 +87,8 @@ extern const int phy_10gbit_features_array[1];
  *
  * @PHY_INTERFACE_MODE_NA: Not Applicable - don't touch
  * @PHY_INTERFACE_MODE_INTERNAL: No interface, MAC and PHY combined
- * @PHY_INTERFACE_MODE_MII: Median-independent interface
- * @PHY_INTERFACE_MODE_GMII: Gigabit median-independent interface
+ * @PHY_INTERFACE_MODE_MII: Media-independent interface
+ * @PHY_INTERFACE_MODE_GMII: Gigabit media-independent interface
  * @PHY_INTERFACE_MODE_SGMII: Serial gigabit media-independent interface
  * @PHY_INTERFACE_MODE_TBI: Ten Bit Interface
  * @PHY_INTERFACE_MODE_REVMII: Reverse Media Independent Interface
diff --git a/include/linux/pm.h b/include/linux/pm.h
index f7d2be686359..e65b3ab28377 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -770,11 +770,11 @@ extern int dpm_suspend_late(pm_message_t state);
 extern int dpm_suspend(pm_message_t state);
 extern int dpm_prepare(pm_message_t state);
 
-extern void __suspend_report_result(const char *function, void *fn, int ret);
+extern void __suspend_report_result(const char *function, struct device *dev, void *fn, int ret);
 
-#define suspend_report_result(fn, ret)					\
+#define suspend_report_result(dev, fn, ret)				\
 	do {								\
-		__suspend_report_result(__func__, fn, ret);		\
+		__suspend_report_result(__func__, dev, fn, ret);	\
 	} while (0)
 
 extern int device_pm_wait_for_dev(struct device *sub, struct device *dev);
@@ -814,7 +814,7 @@ static inline int dpm_suspend_start(pm_message_t state)
 	return 0;
 }
 
-#define suspend_report_result(fn, ret)		do {} while (0)
+#define suspend_report_result(dev, fn, ret)	do {} while (0)
 
 static inline int device_pm_wait_for_dev(struct device *a, struct device *b)
 {
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 9f09601c465a..2bff6a10095d 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -567,6 +567,10 @@ static inline void pm_runtime_disable(struct device *dev)
  * Allow the runtime PM autosuspend mechanism to be used for @dev whenever
  * requested (or "autosuspend" will be handled as direct runtime-suspend for
  * it).
+ *
+ * NOTE: It's important to undo this with pm_runtime_dont_use_autosuspend()
+ * at driver exit time unless your driver initially enabled pm_runtime
+ * with devm_pm_runtime_enable() (which handles it for you).
  */
 static inline void pm_runtime_use_autosuspend(struct device *dev)
 {
diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h
index 2a9fee8ddae3..51b811b62322 100644
--- a/include/linux/raid/xor.h
+++ b/include/linux/raid/xor.h
@@ -11,13 +11,20 @@ struct xor_block_template {
         struct xor_block_template *next;
         const char *name;
         int speed;
-	void (*do_2)(unsigned long, unsigned long *, unsigned long *);
-	void (*do_3)(unsigned long, unsigned long *, unsigned long *,
-		     unsigned long *);
-	void (*do_4)(unsigned long, unsigned long *, unsigned long *,
-		     unsigned long *, unsigned long *);
-	void (*do_5)(unsigned long, unsigned long *, unsigned long *,
-		     unsigned long *, unsigned long *, unsigned long *);
+	void (*do_2)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict);
+	void (*do_3)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict);
+	void (*do_4)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict);
+	void (*do_5)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict);
 };
 
 #endif
diff --git a/include/linux/random.h b/include/linux/random.h
index c45b2693e51f..c0baffe7afb1 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -1,9 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*
- * include/linux/random.h
- *
- * Include file for the random number generator.
- */
+
 #ifndef _LINUX_RANDOM_H
 #define _LINUX_RANDOM_H
 
@@ -14,14 +10,10 @@
 
 #include <uapi/linux/random.h>
 
-struct random_ready_callback {
-	struct list_head list;
-	void (*func)(struct random_ready_callback *rdy);
-	struct module *owner;
-};
+struct notifier_block;
 
-extern void add_device_randomness(const void *, unsigned int);
-extern void add_bootloader_randomness(const void *, unsigned int);
+extern void add_device_randomness(const void *, size_t);
+extern void add_bootloader_randomness(const void *, size_t);
 
 #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
 static inline void add_latent_entropy(void)
@@ -36,17 +28,27 @@ static inline void add_latent_entropy(void) {}
 extern void add_input_randomness(unsigned int type, unsigned int code,
 				 unsigned int value) __latent_entropy;
 extern void add_interrupt_randomness(int irq) __latent_entropy;
+extern void add_hwgenerator_randomness(const void *buffer, size_t count,
+				       size_t entropy);
+#if IS_ENABLED(CONFIG_VMGENID)
+extern void add_vmfork_randomness(const void *unique_vm_id, size_t size);
+extern int register_random_vmfork_notifier(struct notifier_block *nb);
+extern int unregister_random_vmfork_notifier(struct notifier_block *nb);
+#else
+static inline int register_random_vmfork_notifier(struct notifier_block *nb) { return 0; }
+static inline int unregister_random_vmfork_notifier(struct notifier_block *nb) { return 0; }
+#endif
 
-extern void get_random_bytes(void *buf, int nbytes);
+extern void get_random_bytes(void *buf, size_t nbytes);
 extern int wait_for_random_bytes(void);
 extern int __init rand_initialize(void);
 extern bool rng_is_initialized(void);
-extern int add_random_ready_callback(struct random_ready_callback *rdy);
-extern void del_random_ready_callback(struct random_ready_callback *rdy);
-extern int __must_check get_random_bytes_arch(void *buf, int nbytes);
+extern int register_random_ready_notifier(struct notifier_block *nb);
+extern int unregister_random_ready_notifier(struct notifier_block *nb);
+extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes);
 
 #ifndef MODULE
-extern const struct file_operations random_fops, urandom_fops;
+extern const struct file_operations random_fops;
 #endif
 
 u32 get_random_u32(void);
@@ -87,7 +89,7 @@ static inline unsigned long get_random_canary(void)
 
 /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes).
  * Returns the result of the call to wait_for_random_bytes. */
-static inline int get_random_bytes_wait(void *buf, int nbytes)
+static inline int get_random_bytes_wait(void *buf, size_t nbytes)
 {
 	int ret = wait_for_random_bytes();
 	get_random_bytes(buf, nbytes);
@@ -158,4 +160,9 @@ static inline bool __init arch_get_random_long_early(unsigned long *v)
 }
 #endif
 
+#ifdef CONFIG_SMP
+extern int random_prepare_cpu(unsigned int cpu);
+extern int random_online_cpu(unsigned int cpu);
+#endif
+
 #endif /* _LINUX_RANDOM_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 88b42eb46406..e7c39c200e2b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -84,7 +84,7 @@ static inline int rcu_preempt_depth(void)
 
 /* Internal to kernel */
 void rcu_init(void);
-extern int rcu_scheduler_active __read_mostly;
+extern int rcu_scheduler_active;
 void rcu_sched_clock_irq(int user);
 void rcu_report_dead(unsigned int cpu);
 void rcutree_migrate_callbacks(int cpu);
@@ -924,7 +924,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  *
  *     kvfree_rcu(ptr);
  *
- * where @ptr is a pointer to kvfree().
+ * where @ptr is the pointer to be freed by kvfree().
  *
  * Please note, head-less way of freeing is permitted to
  * use from a context that has to follow might_sleep()
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 858f4d429946..5fed476f977f 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -64,9 +64,8 @@ static inline void rcu_softirq_qs(void)
 		rcu_tasks_qs(current, (preempt)); \
 	} while (0)
 
-static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
+static inline int rcu_needs_cpu(void)
 {
-	*nextevt = KTIME_MAX;
 	return 0;
 }
 
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 53209d669400..9c6cfb742504 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -19,7 +19,7 @@
 
 void rcu_softirq_qs(void);
 void rcu_note_context_switch(bool preempt);
-int rcu_needs_cpu(u64 basem, u64 *nextevt);
+int rcu_needs_cpu(void);
 void rcu_cpu_stall_reset(void);
 
 /*
@@ -62,7 +62,7 @@ static inline void rcu_irq_exit_check_preempt(void) { }
 void exit_rcu(void);
 
 void rcu_scheduler_starting(void);
-extern int rcu_scheduler_active __read_mostly;
+extern int rcu_scheduler_active;
 void rcu_end_inkernel_boot(void);
 bool rcu_inkernel_boot_has_ended(void);
 bool rcu_is_watching(void);
diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 61c56cca95c4..8052d34da782 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -47,11 +47,7 @@ static inline void prepare_to_rcuwait(struct rcuwait *w)
 	rcu_assign_pointer(w->task, current);
 }
 
-static inline void finish_rcuwait(struct rcuwait *w)
-{
-        rcu_assign_pointer(w->task, NULL);
-	__set_current_state(TASK_RUNNING);
-}
+extern void finish_rcuwait(struct rcuwait *w);
 
 #define rcuwait_wait_event(w, condition, state)				\
 ({									\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75ba8aa60248..aab75966396f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -938,6 +938,9 @@ struct task_struct {
 	/* Recursion prevention for eventfd_signal() */
 	unsigned			in_eventfd_signal:1;
 #endif
+#ifdef CONFIG_IOMMU_SVA
+	unsigned			pasid_activated:1;
+#endif
 
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
@@ -1087,6 +1090,9 @@ struct task_struct {
 	/* Restored if set_restore_sigmask() was used: */
 	sigset_t			saved_sigmask;
 	struct sigpending		pending;
+#ifdef CONFIG_RT_DELAYED_SIGNALS
+	struct kernel_siginfo		forced_info;
+#endif
 	unsigned long			sas_ss_sp;
 	size_t				sas_ss_size;
 	unsigned int			sas_ss_flags;
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index aa5f09ca5bcf..a80356e9dc69 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -8,6 +8,7 @@
 #include <linux/mm_types.h>
 #include <linux/gfp.h>
 #include <linux/sync_core.h>
+#include <linux/ioasid.h>
 
 /*
  * Routines for handling mm_structs
@@ -433,4 +434,29 @@ static inline void membarrier_update_current_mm(struct mm_struct *next_mm)
 }
 #endif
 
+#ifdef CONFIG_IOMMU_SVA
+static inline void mm_pasid_init(struct mm_struct *mm)
+{
+	mm->pasid = INVALID_IOASID;
+}
+
+/* Associate a PASID with an mm_struct: */
+static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid)
+{
+	mm->pasid = pasid;
+}
+
+static inline void mm_pasid_drop(struct mm_struct *mm)
+{
+	if (pasid_valid(mm->pasid)) {
+		ioasid_free(mm->pasid);
+		mm->pasid = INVALID_IOASID;
+	}
+}
+#else
+static inline void mm_pasid_init(struct mm_struct *mm) {}
+static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {}
+static inline void mm_pasid_drop(struct mm_struct *mm) {}
+#endif
+
 #endif /* _LINUX_SCHED_MM_H */
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index d10150587d81..892562ebbd3a 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -79,6 +79,8 @@ static inline void *try_get_task_stack(struct task_struct *tsk)
 static inline void put_task_stack(struct task_struct *tsk) {}
 #endif
 
+void exit_task_stack_account(struct task_struct *tsk);
+
 #define task_stack_end_corrupted(task) \
 		(*(end_of_stack(task)) != STACK_END_MAGIC)
 
diff --git a/include/linux/topology.h b/include/linux/topology.h
index a6e201758ae9..f19bc3626297 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -211,6 +211,9 @@ static inline int cpu_to_mem(int cpu)
 #ifndef topology_drawer_id
 #define topology_drawer_id(cpu)			((void)(cpu), -1)
 #endif
+#ifndef topology_ppin
+#define topology_ppin(cpu)			((void)(cpu), 0ull)
+#endif
 #ifndef topology_sibling_cpumask
 #define topology_sibling_cpumask(cpu)		cpumask_of(cpu)
 #endif
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 2de442ececae..721089bb4c84 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -401,18 +401,24 @@ static inline int vdpa_reset(struct vdpa_device *vdev)
 	return ret;
 }
 
-static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features, bool locked)
+static inline int vdpa_set_features_unlocked(struct vdpa_device *vdev, u64 features)
 {
 	const struct vdpa_config_ops *ops = vdev->config;
 	int ret;
 
-	if (!locked)
-		mutex_lock(&vdev->cf_mutex);
-
 	vdev->features_valid = true;
 	ret = ops->set_driver_features(vdev, features);
-	if (!locked)
-		mutex_unlock(&vdev->cf_mutex);
+
+	return ret;
+}
+
+static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
+{
+	int ret;
+
+	mutex_lock(&vdev->cf_mutex);
+	ret = vdpa_set_features_unlocked(vdev, features);
+	mutex_unlock(&vdev->cf_mutex);
 
 	return ret;
 }
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 72292a62cd90..5464f398912a 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -133,7 +133,6 @@ bool is_virtio_device(struct device *dev);
 void virtio_break_device(struct virtio_device *dev);
 
 void virtio_config_changed(struct virtio_device *dev);
-int virtio_finalize_features(struct virtio_device *dev);
 #ifdef CONFIG_PM_SLEEP
 int virtio_device_freeze(struct virtio_device *dev);
 int virtio_device_restore(struct virtio_device *dev);
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 4d107ad31149..dafdc7f48c01 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -64,8 +64,9 @@ struct virtio_shm_region {
  *	Returns the first 64 feature bits (all we currently need).
  * @finalize_features: confirm what device features we'll be using.
  *	vdev: the virtio_device
- *	This gives the final feature bits for the device: it can change
+ *	This sends the driver feature bits to the device: it can change
  *	the dev->feature bits if it wants.
+ * Note: despite the name this can be called any number of times.
  *	Returns 0 on success or error status
  * @bus_name: return the bus name associated with the device (optional)
  *	vdev: the virtio_device
diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h
index c994d1b2cdba..3b9a40ae8bdb 100644
--- a/include/linux/watch_queue.h
+++ b/include/linux/watch_queue.h
@@ -28,7 +28,8 @@ struct watch_type_filter {
 struct watch_filter {
 	union {
 		struct rcu_head	rcu;
-		unsigned long	type_filter[2];	/* Bitmask of accepted types */
+		/* Bitmask of accepted types */
+		DECLARE_BITMAP(type_filter, WATCH_TYPE__NR);
 	};
 	u32			nr_filters;	/* Number of filters */
 	struct watch_type_filter filters[];
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index ab207677e0a8..f742e50207fb 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -205,7 +205,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
 struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
 					 struct sockaddr_vm *dst);
 void vsock_remove_sock(struct vsock_sock *vsk);
-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
+void vsock_for_each_connected_socket(struct vsock_transport *transport,
+				     void (*fn)(struct sock *sk));
 int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
 bool vsock_find_cid(unsigned int cid);
 
diff --git a/include/net/esp.h b/include/net/esp.h
index 9c5637d41d95..90cd02ff77ef 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -4,6 +4,8 @@
 
 #include <linux/skbuff.h>
 
+#define ESP_SKB_FRAG_MAXSIZE (PAGE_SIZE << SKB_FRAG_PAGE_ORDER)
+
 struct ip_esp_hdr;
 
 static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 8731d5bcb47d..b08b70989d2c 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -97,7 +97,6 @@ struct nf_conn {
 	unsigned long status;
 
 	u16		cpu;
-	u16		local_origin:1;
 	possible_net_t ct_net;
 
 #if IS_ENABLED(CONFIG_NF_NAT)
diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index c6f5aa74db89..2c530637e10a 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -56,6 +56,7 @@ enum cachefiles_coherency_trace {
 	cachefiles_coherency_set_ok,
 	cachefiles_coherency_vol_check_cmp,
 	cachefiles_coherency_vol_check_ok,
+	cachefiles_coherency_vol_check_resv,
 	cachefiles_coherency_vol_check_xattr,
 	cachefiles_coherency_vol_set_fail,
 	cachefiles_coherency_vol_set_ok,
@@ -139,6 +140,7 @@ enum cachefiles_error_trace {
 	EM(cachefiles_coherency_set_ok,		"SET ok  ")		\
 	EM(cachefiles_coherency_vol_check_cmp,	"VOL BAD cmp ")		\
 	EM(cachefiles_coherency_vol_check_ok,	"VOL OK      ")		\
+	EM(cachefiles_coherency_vol_check_resv,	"VOL BAD resv")	\
 	EM(cachefiles_coherency_vol_check_xattr,"VOL BAD xatt")		\
 	EM(cachefiles_coherency_vol_set_fail,	"VOL SET fail")		\
 	E_(cachefiles_coherency_vol_set_ok,	"VOL SET ok  ")
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 18d4341c581c..cddf5b6fbeb4 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -44,7 +44,7 @@ TRACE_EVENT(io_uring_create,
 		__entry->flags		= flags;
 	),
 
-	TP_printk("ring %p, fd %d sq size %d, cq size %d, flags %d",
+	TP_printk("ring %p, fd %d sq size %d, cq size %d, flags 0x%x",
 			  __entry->ctx, __entry->fd, __entry->sq_entries,
 			  __entry->cq_entries, __entry->flags)
 );
@@ -125,7 +125,7 @@ TRACE_EVENT(io_uring_file_get,
 		__entry->fd		= fd;
 	),
 
-	TP_printk("ring %p, req %p, user_data %llu, fd %d",
+	TP_printk("ring %p, req %p, user_data 0x%llx, fd %d",
 		__entry->ctx, __entry->req, __entry->user_data, __entry->fd)
 );
 
@@ -169,7 +169,7 @@ TRACE_EVENT(io_uring_queue_async_work,
 		__entry->rw		= rw;
 	),
 
-	TP_printk("ring %p, request %p, user_data %llu, opcode %d, flags %d, %s queue, work %p",
+	TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p",
 		__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
 		__entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
 );
@@ -205,7 +205,7 @@ TRACE_EVENT(io_uring_defer,
 		__entry->opcode	= opcode;
 	),
 
-	TP_printk("ring %p, request %p, user_data %llu, opcode %d",
+	TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d",
 		__entry->ctx, __entry->req, __entry->data, __entry->opcode)
 );
 
@@ -305,7 +305,7 @@ TRACE_EVENT(io_uring_fail_link,
 		__entry->link		= link;
 	),
 
-	TP_printk("ring %p, request %p, user_data %llu, opcode %d, link %p",
+	TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p",
 		__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
 		__entry->link)
 );
@@ -342,9 +342,9 @@ TRACE_EVENT(io_uring_complete,
 		__entry->cflags		= cflags;
 	),
 
-	TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags %x",
+	TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x",
 		__entry->ctx, __entry->req,
-		(unsigned long long)__entry->user_data,
+		__entry->user_data,
 		__entry->res, __entry->cflags)
 );
 
@@ -389,7 +389,7 @@ TRACE_EVENT(io_uring_submit_sqe,
 		__entry->sq_thread	= sq_thread;
 	),
 
-	TP_printk("ring %p, req %p, user_data %llu, opcode %d, flags %u, "
+	TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, "
 		  "non block %d, sq_thread %d", __entry->ctx, __entry->req,
 		  __entry->user_data, __entry->opcode,
 		  __entry->flags, __entry->force_nonblock, __entry->sq_thread)
@@ -433,7 +433,7 @@ TRACE_EVENT(io_uring_poll_arm,
 		__entry->events		= events;
 	),
 
-	TP_printk("ring %p, req %p, user_data %llu, opcode %d, mask 0x%x, events 0x%x",
+	TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x",
 		  __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
 		  __entry->mask, __entry->events)
 );
@@ -470,7 +470,7 @@ TRACE_EVENT(io_uring_task_add,
 		__entry->mask		= mask;
 	),
 
-	TP_printk("ring %p, req %p, user_data %llu, opcode %d, mask %x",
+	TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x",
 		__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
 		__entry->mask)
 );
@@ -529,8 +529,8 @@ TRACE_EVENT(io_uring_req_failed,
 		__entry->error		= error;
 	),
 
-	TP_printk("ring %p, req %p, user_data %llu, "
-		"op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, "
+	TP_printk("ring %p, req %p, user_data 0x%llx, "
+		"op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
 		  "len=%u, rw_flags=0x%x, buf_index=%d, "
 		  "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
 		  __entry->ctx, __entry->req, __entry->user_data,
diff --git a/include/trace/events/random.h b/include/trace/events/random.h
deleted file mode 100644
index a2d9aa16a5d7..000000000000
--- a/include/trace/events/random.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM random
-
-#if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_RANDOM_H
-
-#include <linux/writeback.h>
-#include <linux/tracepoint.h>
-
-TRACE_EVENT(add_device_randomness,
-	TP_PROTO(int bytes, unsigned long IP),
-
-	TP_ARGS(bytes, IP),
-
-	TP_STRUCT__entry(
-		__field(	  int,	bytes			)
-		__field(unsigned long,	IP			)
-	),
-
-	TP_fast_assign(
-		__entry->bytes		= bytes;
-		__entry->IP		= IP;
-	),
-
-	TP_printk("bytes %d caller %pS",
-		__entry->bytes, (void *)__entry->IP)
-);
-
-DECLARE_EVENT_CLASS(random__mix_pool_bytes,
-	TP_PROTO(int bytes, unsigned long IP),
-
-	TP_ARGS(bytes, IP),
-
-	TP_STRUCT__entry(
-		__field(	  int,	bytes			)
-		__field(unsigned long,	IP			)
-	),
-
-	TP_fast_assign(
-		__entry->bytes		= bytes;
-		__entry->IP		= IP;
-	),
-
-	TP_printk("input pool: bytes %d caller %pS",
-		  __entry->bytes, (void *)__entry->IP)
-);
-
-DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes,
-	TP_PROTO(int bytes, unsigned long IP),
-
-	TP_ARGS(bytes, IP)
-);
-
-DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock,
-	TP_PROTO(int bytes, unsigned long IP),
-
-	TP_ARGS(bytes, IP)
-);
-
-TRACE_EVENT(credit_entropy_bits,
-	TP_PROTO(int bits, int entropy_count, unsigned long IP),
-
-	TP_ARGS(bits, entropy_count, IP),
-
-	TP_STRUCT__entry(
-		__field(	  int,	bits			)
-		__field(	  int,	entropy_count		)
-		__field(unsigned long,	IP			)
-	),
-
-	TP_fast_assign(
-		__entry->bits		= bits;
-		__entry->entropy_count	= entropy_count;
-		__entry->IP		= IP;
-	),
-
-	TP_printk("input pool: bits %d entropy_count %d caller %pS",
-		  __entry->bits, __entry->entropy_count, (void *)__entry->IP)
-);
-
-TRACE_EVENT(debit_entropy,
-	TP_PROTO(int debit_bits),
-
-	TP_ARGS( debit_bits),
-
-	TP_STRUCT__entry(
-		__field(	  int,	debit_bits		)
-	),
-
-	TP_fast_assign(
-		__entry->debit_bits	= debit_bits;
-	),
-
-	TP_printk("input pool: debit_bits %d", __entry->debit_bits)
-);
-
-TRACE_EVENT(add_input_randomness,
-	TP_PROTO(int input_bits),
-
-	TP_ARGS(input_bits),
-
-	TP_STRUCT__entry(
-		__field(	  int,	input_bits		)
-	),
-
-	TP_fast_assign(
-		__entry->input_bits	= input_bits;
-	),
-
-	TP_printk("input_pool_bits %d", __entry->input_bits)
-);
-
-TRACE_EVENT(add_disk_randomness,
-	TP_PROTO(dev_t dev, int input_bits),
-
-	TP_ARGS(dev, input_bits),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	  int,	input_bits		)
-	),
-
-	TP_fast_assign(
-		__entry->dev		= dev;
-		__entry->input_bits	= input_bits;
-	),
-
-	TP_printk("dev %d,%d input_pool_bits %d", MAJOR(__entry->dev),
-		  MINOR(__entry->dev), __entry->input_bits)
-);
-
-DECLARE_EVENT_CLASS(random__get_random_bytes,
-	TP_PROTO(int nbytes, unsigned long IP),
-
-	TP_ARGS(nbytes, IP),
-
-	TP_STRUCT__entry(
-		__field(	  int,	nbytes			)
-		__field(unsigned long,	IP			)
-	),
-
-	TP_fast_assign(
-		__entry->nbytes		= nbytes;
-		__entry->IP		= IP;
-	),
-
-	TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP)
-);
-
-DEFINE_EVENT(random__get_random_bytes, get_random_bytes,
-	TP_PROTO(int nbytes, unsigned long IP),
-
-	TP_ARGS(nbytes, IP)
-);
-
-DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch,
-	TP_PROTO(int nbytes, unsigned long IP),
-
-	TP_ARGS(nbytes, IP)
-);
-
-DECLARE_EVENT_CLASS(random__extract_entropy,
-	TP_PROTO(int nbytes, int entropy_count, unsigned long IP),
-
-	TP_ARGS(nbytes, entropy_count, IP),
-
-	TP_STRUCT__entry(
-		__field(	  int,	nbytes			)
-		__field(	  int,	entropy_count		)
-		__field(unsigned long,	IP			)
-	),
-
-	TP_fast_assign(
-		__entry->nbytes		= nbytes;
-		__entry->entropy_count	= entropy_count;
-		__entry->IP		= IP;
-	),
-
-	TP_printk("input pool: nbytes %d entropy_count %d caller %pS",
-		  __entry->nbytes, __entry->entropy_count, (void *)__entry->IP)
-);
-
-
-DEFINE_EVENT(random__extract_entropy, extract_entropy,
-	TP_PROTO(int nbytes, int entropy_count, unsigned long IP),
-
-	TP_ARGS(nbytes, entropy_count, IP)
-);
-
-TRACE_EVENT(urandom_read,
-	TP_PROTO(int got_bits, int pool_left, int input_left),
-
-	TP_ARGS(got_bits, pool_left, input_left),
-
-	TP_STRUCT__entry(
-		__field(	  int,	got_bits		)
-		__field(	  int,	pool_left		)
-		__field(	  int,	input_left		)
-	),
-
-	TP_fast_assign(
-		__entry->got_bits	= got_bits;
-		__entry->pool_left	= pool_left;
-		__entry->input_left	= input_left;
-	),
-
-	TP_printk("got_bits %d nonblocking_pool_entropy_left %d "
-		  "input_entropy_left %d", __entry->got_bits,
-		  __entry->pool_left, __entry->input_left)
-);
-
-TRACE_EVENT(prandom_u32,
-
-	TP_PROTO(unsigned int ret),
-
-	TP_ARGS(ret),
-
-	TP_STRUCT__entry(
-		__field(   unsigned int, ret)
-	),
-
-	TP_fast_assign(
-		__entry->ret = ret;
-	),
-
-	TP_printk("ret=%u" , __entry->ret)
-);
-
-#endif /* _TRACE_RANDOM_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 670e41783edd..90b2fb0292cb 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -794,16 +794,15 @@ TRACE_EVENT_RCU(rcu_torture_read,
  * Tracepoint for rcu_barrier() execution.  The string "s" describes
  * the rcu_barrier phase:
  *	"Begin": rcu_barrier() started.
+ *	"CB": An rcu_barrier_callback() invoked a callback, not the last.
  *	"EarlyExit": rcu_barrier() piggybacked, thus early exit.
  *	"Inc1": rcu_barrier() piggyback check counter incremented.
- *	"OfflineNoCBQ": rcu_barrier() found offline no-CBs CPU with callbacks.
- *	"OnlineQ": rcu_barrier() found online CPU with callbacks.
- *	"OnlineNQ": rcu_barrier() found online CPU, no callbacks.
+ *	"Inc2": rcu_barrier() piggyback check counter incremented.
  *	"IRQ": An rcu_barrier_callback() callback posted on remote CPU.
  *	"IRQNQ": An rcu_barrier_callback() callback found no callbacks.
- *	"CB": An rcu_barrier_callback() invoked a callback, not the last.
  *	"LastCB": An rcu_barrier_callback() invoked the last callback.
- *	"Inc2": rcu_barrier() piggyback check counter incremented.
+ *	"NQ": rcu_barrier() found a CPU with no callbacks.
+ *	"OnlineQ": rcu_barrier() found online CPU with callbacks.
  * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument
  * is the count of remaining callbacks, and "done" is the piggybacking count.
  */
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 61bf4774b8f2..fe8e5b74cb39 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -40,6 +40,9 @@ typedef __s64	Elf64_Sxword;
 
 #define PT_GNU_STACK	(PT_LOOS + 0x474e551)
 
+/* ARM MTE memory tag segment type */
+#define PT_ARM_MEMTAG_MTE	(PT_LOPROC + 0x1)
+
 /*
  * Extended Numbering
  *
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 8bd4bfdd9a89..d2be4eb22008 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -101,6 +101,7 @@ enum {
 #define IORING_SETUP_CLAMP	(1U << 4)	/* clamp SQ/CQ ring sizes */
 #define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
 #define IORING_SETUP_R_DISABLED	(1U << 6)	/* start with ring disabled */
+#define IORING_SETUP_SUBMIT_ALL	(1U << 7)	/* continue submit on error */
 
 enum {
 	IORING_OP_NOP,
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 0425cd79af9a..f724129c0425 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -36,6 +36,7 @@
 #define EFIVARFS_MAGIC		0xde5e81e4
 #define HOSTFS_SUPER_MAGIC	0x00c0ffee
 #define OVERLAYFS_SUPER_MAGIC	0x794c7630
+#define FUSE_SUPER_MAGIC	0x65735546
 
 #define MINIX_SUPER_MAGIC	0x137F		/* minix v1 fs, 14 char names */
 #define MINIX_SUPER_MAGIC2	0x138F		/* minix v1 fs, 30 char names */
diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h
index 9aa2fedfa309..fc78bf3aead7 100644
--- a/include/uapi/linux/thermal.h
+++ b/include/uapi/linux/thermal.h
@@ -44,7 +44,10 @@ enum thermal_genl_attr {
 	THERMAL_GENL_ATTR_CDEV_MAX_STATE,
 	THERMAL_GENL_ATTR_CDEV_NAME,
 	THERMAL_GENL_ATTR_GOV_NAME,
-
+	THERMAL_GENL_ATTR_CPU_CAPABILITY,
+	THERMAL_GENL_ATTR_CPU_CAPABILITY_ID,
+	THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE,
+	THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY,
 	__THERMAL_GENL_ATTR_MAX,
 };
 #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
@@ -71,6 +74,7 @@ enum thermal_genl_event {
 	THERMAL_GENL_EVENT_CDEV_DELETE,		/* Cdev unbound */
 	THERMAL_GENL_EVENT_CDEV_STATE_UPDATE,	/* Cdev state updated */
 	THERMAL_GENL_EVENT_TZ_GOV_CHANGE,	/* Governor policy changed  */
+	THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE,	/* CPU capability changed */
 	__THERMAL_GENL_EVENT_MAX,
 };
 #define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1)
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index cb854df031ce..c9fea9389ebe 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -104,17 +104,32 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly);
  * access has been ended, free the given page too.  Access will be ended
  * immediately iff the grant entry is not in use, otherwise it will happen
  * some time later.  page may be 0, in which case no freeing will occur.
+ * Note that the granted page might still be accessed (read or write) by the
+ * other side after gnttab_end_foreign_access() returns, so even if page was
+ * specified as 0 it is not allowed to just reuse the page for other
+ * purposes immediately. gnttab_end_foreign_access() will take an additional
+ * reference to the granted page in this case, which is dropped only after
+ * the grant is no longer in use.
+ * This requires that multi page allocations for areas subject to
+ * gnttab_end_foreign_access() are done via alloc_pages_exact() (and freeing
+ * via free_pages_exact()) in order to avoid high order pages.
  */
 void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
 			       unsigned long page);
 
+/*
+ * End access through the given grant reference, iff the grant entry is
+ * no longer in use.  In case of success ending foreign access, the
+ * grant reference is deallocated.
+ * Return 1 if the grant entry was freed, 0 if it is still in use.
+ */
+int gnttab_try_end_foreign_access(grant_ref_t ref);
+
 int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
 
 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
 unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
 
-int gnttab_query_foreign_access(grant_ref_t ref);
-
 /*
  * operations on reserved batches of grant references
  */
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index ce77f0265660..5644abd5f8a8 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -132,4 +132,14 @@ config SCHED_CORE
 	  which is the likely usage by Linux distributions, there should
 	  be no measurable impact on performance.
 
-
+config ARCH_WANTS_RT_DELAYED_SIGNALS
+	bool
+	help
+	  This option is selected by architectures where raising signals
+	  can happen in atomic contexts on PREEMPT_RT enabled kernels. This
+	  option delays raising the signal until the return to user space
+	  loop where it is also delivered. X86 requires this to deliver
+	  signals from trap handlers which run on IST stacks.
+
+config RT_DELAYED_SIGNALS
+	def_bool PREEMPT_RT && ARCH_WANTS_RT_DELAYED_SIGNALS
diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config
index 07df6d93c4df..e8db8d938661 100644
--- a/kernel/configs/debug.config
+++ b/kernel/configs/debug.config
@@ -16,6 +16,7 @@ CONFIG_SYMBOLIC_ERRNAME=y
 #
 # Compile-time checks and compiler options
 #
+CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_FRAME_WARN=2048
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 407a2568f35e..238cba15449f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -34,6 +34,7 @@
 #include <linux/scs.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/cpuset.h>
+#include <linux/random.h>
 
 #include <trace/events/power.h>
 #define CREATE_TRACE_POINTS
@@ -1659,6 +1660,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
 		.startup.single		= perf_event_init_cpu,
 		.teardown.single	= perf_event_exit_cpu,
 	},
+	[CPUHP_RANDOM_PREPARE] = {
+		.name			= "random:prepare",
+		.startup.single		= random_prepare_cpu,
+		.teardown.single	= NULL,
+	},
 	[CPUHP_WORKQUEUE_PREP] = {
 		.name			= "workqueue:prepare",
 		.startup.single		= workqueue_prepare_cpu,
@@ -1782,6 +1788,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
 		.startup.single		= workqueue_online_cpu,
 		.teardown.single	= workqueue_offline_cpu,
 	},
+	[CPUHP_AP_RANDOM_ONLINE] = {
+		.name			= "random:online",
+		.startup.single		= random_online_cpu,
+		.teardown.single	= NULL,
+	},
 	[CPUHP_AP_RCUTREE_ONLINE] = {
 		.name			= "RCU/tree:online",
 		.startup.single		= rcutree_online_cpu,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index bfc56cb21705..6db1c475ec82 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -627,10 +627,14 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 	for (i = 0; i < nr_slots(alloc_size + offset); i++)
 		mem->slots[index + i].orig_addr = slot_addr(orig_addr, i);
 	tlb_addr = slot_addr(mem->start, index) + offset;
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-	    (!(attrs & DMA_ATTR_OVERWRITE) || dir == DMA_TO_DEVICE ||
-	    dir == DMA_BIDIRECTIONAL))
-		swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
+	/*
+	 * When dir == DMA_FROM_DEVICE we could omit the copy from the orig
+	 * to the tlb buffer, if we knew for sure the device will
+	 * overwirte the entire current content. But we don't. Thus
+	 * unconditional bounce may prevent leaking swiotlb content (i.e.
+	 * kernel memory) to user-space.
+	 */
+	swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
 	return tlb_addr;
 }
 
@@ -697,10 +701,13 @@ void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
 void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
 		size_t size, enum dma_data_direction dir)
 {
-	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
-		swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE);
-	else
-		BUG_ON(dir != DMA_FROM_DEVICE);
+	/*
+	 * Unconditional bounce is necessary to avoid corruption on
+	 * sync_*_for_cpu or dma_ummap_* when the device didn't overwrite
+	 * the whole lengt of the bounce buffer.
+	 */
+	swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE);
+	BUG_ON(!valid_dma_direction(dir));
 }
 
 void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index bad713684c2e..0543a2c92f20 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -148,6 +148,18 @@ static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work)
 	arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING);
 }
 
+#ifdef CONFIG_RT_DELAYED_SIGNALS
+static inline void raise_delayed_signal(void)
+{
+	if (unlikely(current->forced_info.si_signo)) {
+		force_sig_info(&current->forced_info);
+		current->forced_info.si_signo = 0;
+	}
+}
+#else
+static inline void raise_delayed_signal(void) { }
+#endif
+
 static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
 					    unsigned long ti_work)
 {
@@ -162,6 +174,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
 		if (ti_work & _TIF_NEED_RESCHED)
 			schedule();
 
+		raise_delayed_signal();
+
 		if (ti_work & _TIF_UPROBE)
 			uprobe_notify_resume(regs);
 
diff --git a/kernel/exit.c b/kernel/exit.c
index b00a25bb4ab9..c303cffe7fdb 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -845,6 +845,7 @@ void __noreturn do_exit(long code)
 		put_page(tsk->task_frag.page);
 
 	validate_creds_for_do_exit(tsk);
+	exit_task_stack_account(tsk);
 
 	check_stack_usage();
 	preempt_disable();
diff --git a/kernel/fork.c b/kernel/fork.c
index f1e89007f228..d74c30b6733b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -97,6 +97,7 @@
 #include <linux/scs.h>
 #include <linux/io_uring.h>
 #include <linux/bpf.h>
+#include <linux/sched/mm.h>
 
 #include <asm/pgalloc.h>
 #include <linux/uaccess.h>
@@ -185,7 +186,7 @@ static inline void free_task_struct(struct task_struct *tsk)
  */
 # if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
 
-#ifdef CONFIG_VMAP_STACK
+#  ifdef CONFIG_VMAP_STACK
 /*
  * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
  * flush.  Try to minimize the number of calls by caching stacks.
@@ -193,6 +194,41 @@ static inline void free_task_struct(struct task_struct *tsk)
 #define NR_CACHED_STACKS 2
 static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
 
+struct vm_stack {
+	struct rcu_head rcu;
+	struct vm_struct *stack_vm_area;
+};
+
+static bool try_release_thread_stack_to_cache(struct vm_struct *vm)
+{
+	unsigned int i;
+
+	for (i = 0; i < NR_CACHED_STACKS; i++) {
+		if (this_cpu_cmpxchg(cached_stacks[i], NULL, vm) != NULL)
+			continue;
+		return true;
+	}
+	return false;
+}
+
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+	struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu);
+
+	if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area))
+		return;
+
+	vfree(vm_stack);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+	struct vm_stack *vm_stack = tsk->stack;
+
+	vm_stack->stack_vm_area = tsk->stack_vm_area;
+	call_rcu(&vm_stack->rcu, thread_stack_free_rcu);
+}
+
 static int free_vm_stack_cache(unsigned int cpu)
 {
 	struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
@@ -210,11 +246,35 @@ static int free_vm_stack_cache(unsigned int cpu)
 
 	return 0;
 }
-#endif
 
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+static int memcg_charge_kernel_stack(struct vm_struct *vm)
 {
-#ifdef CONFIG_VMAP_STACK
+	int i;
+	int ret;
+
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
+	BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+
+	for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+		ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0);
+		if (ret)
+			goto err;
+	}
+	return 0;
+err:
+	/*
+	 * If memcg_kmem_charge_page() fails, page's memory cgroup pointer is
+	 * NULL, and memcg_kmem_uncharge_page() in free_thread_stack() will
+	 * ignore this page.
+	 */
+	for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+		memcg_kmem_uncharge_page(vm->pages[i], 0);
+	return ret;
+}
+
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
+	struct vm_struct *vm;
 	void *stack;
 	int i;
 
@@ -232,9 +292,14 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 		/* Clear stale pointers from reused stack. */
 		memset(s->addr, 0, THREAD_SIZE);
 
+		if (memcg_charge_kernel_stack(s)) {
+			vfree(s->addr);
+			return -ENOMEM;
+		}
+
 		tsk->stack_vm_area = s;
 		tsk->stack = s->addr;
-		return s->addr;
+		return 0;
 	}
 
 	/*
@@ -247,71 +312,95 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 				     THREADINFO_GFP & ~__GFP_ACCOUNT,
 				     PAGE_KERNEL,
 				     0, node, __builtin_return_address(0));
+	if (!stack)
+		return -ENOMEM;
 
+	vm = find_vm_area(stack);
+	if (memcg_charge_kernel_stack(vm)) {
+		vfree(stack);
+		return -ENOMEM;
+	}
 	/*
 	 * We can't call find_vm_area() in interrupt context, and
 	 * free_thread_stack() can be called in interrupt context,
 	 * so cache the vm_struct.
 	 */
-	if (stack) {
-		tsk->stack_vm_area = find_vm_area(stack);
-		tsk->stack = stack;
-	}
-	return stack;
-#else
+	tsk->stack_vm_area = vm;
+	tsk->stack = stack;
+	return 0;
+}
+
+static void free_thread_stack(struct task_struct *tsk)
+{
+	if (!try_release_thread_stack_to_cache(tsk->stack_vm_area))
+		thread_stack_delayed_free(tsk);
+
+	tsk->stack = NULL;
+	tsk->stack_vm_area = NULL;
+}
+
+#  else /* !CONFIG_VMAP_STACK */
+
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+	__free_pages(virt_to_page(rh), THREAD_SIZE_ORDER);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+	struct rcu_head *rh = tsk->stack;
+
+	call_rcu(rh, thread_stack_free_rcu);
+}
+
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
 	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
 					     THREAD_SIZE_ORDER);
 
 	if (likely(page)) {
 		tsk->stack = kasan_reset_tag(page_address(page));
-		return tsk->stack;
+		return 0;
 	}
-	return NULL;
-#endif
+	return -ENOMEM;
 }
 
-static inline void free_thread_stack(struct task_struct *tsk)
+static void free_thread_stack(struct task_struct *tsk)
 {
-#ifdef CONFIG_VMAP_STACK
-	struct vm_struct *vm = task_stack_vm_area(tsk);
+	thread_stack_delayed_free(tsk);
+	tsk->stack = NULL;
+}
 
-	if (vm) {
-		int i;
+#  endif /* CONFIG_VMAP_STACK */
+# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */
 
-		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
-			memcg_kmem_uncharge_page(vm->pages[i], 0);
-
-		for (i = 0; i < NR_CACHED_STACKS; i++) {
-			if (this_cpu_cmpxchg(cached_stacks[i],
-					NULL, tsk->stack_vm_area) != NULL)
-				continue;
+static struct kmem_cache *thread_stack_cache;
 
-			return;
-		}
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+	kmem_cache_free(thread_stack_cache, rh);
+}
 
-		vfree_atomic(tsk->stack);
-		return;
-	}
-#endif
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+	struct rcu_head *rh = tsk->stack;
 
-	__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
+	call_rcu(rh, thread_stack_free_rcu);
 }
-# else
-static struct kmem_cache *thread_stack_cache;
 
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
-						  int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
 	unsigned long *stack;
 	stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
 	stack = kasan_reset_tag(stack);
 	tsk->stack = stack;
-	return stack;
+	return stack ? 0 : -ENOMEM;
 }
 
 static void free_thread_stack(struct task_struct *tsk)
 {
-	kmem_cache_free(thread_stack_cache, tsk->stack);
+	thread_stack_delayed_free(tsk);
+	tsk->stack = NULL;
 }
 
 void thread_stack_cache_init(void)
@@ -321,8 +410,26 @@ void thread_stack_cache_init(void)
 					THREAD_SIZE, NULL);
 	BUG_ON(thread_stack_cache == NULL);
 }
-# endif
-#endif
+
+# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
+#else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
+
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
+	unsigned long *stack;
+
+	stack = arch_alloc_thread_stack_node(tsk, node);
+	tsk->stack = stack;
+	return stack ? 0 : -ENOMEM;
+}
+
+static void free_thread_stack(struct task_struct *tsk)
+{
+	arch_free_thread_stack(tsk);
+	tsk->stack = NULL;
+}
+
+#endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
 
 /* SLAB cache for signal_struct structures (tsk->signal) */
 static struct kmem_cache *signal_cachep;
@@ -379,50 +486,34 @@ void vm_area_free(struct vm_area_struct *vma)
 
 static void account_kernel_stack(struct task_struct *tsk, int account)
 {
-	void *stack = task_stack_page(tsk);
-	struct vm_struct *vm = task_stack_vm_area(tsk);
-
-	if (vm) {
+	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+		struct vm_struct *vm = task_stack_vm_area(tsk);
 		int i;
 
 		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
 			mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB,
 					      account * (PAGE_SIZE / 1024));
 	} else {
+		void *stack = task_stack_page(tsk);
+
 		/* All stack pages are in the same node. */
 		mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB,
 				      account * (THREAD_SIZE / 1024));
 	}
 }
 
-static int memcg_charge_kernel_stack(struct task_struct *tsk)
+void exit_task_stack_account(struct task_struct *tsk)
 {
-#ifdef CONFIG_VMAP_STACK
-	struct vm_struct *vm = task_stack_vm_area(tsk);
-	int ret;
-
-	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
+	account_kernel_stack(tsk, -1);
 
-	if (vm) {
+	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+		struct vm_struct *vm;
 		int i;
 
-		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
-
-		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
-			/*
-			 * If memcg_kmem_charge_page() fails, page's
-			 * memory cgroup pointer is NULL, and
-			 * memcg_kmem_uncharge_page() in free_thread_stack()
-			 * will ignore this page.
-			 */
-			ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
-						     0);
-			if (ret)
-				return ret;
-		}
+		vm = task_stack_vm_area(tsk);
+		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+			memcg_kmem_uncharge_page(vm->pages[i], 0);
 	}
-#endif
-	return 0;
 }
 
 static void release_task_stack(struct task_struct *tsk)
@@ -430,12 +521,7 @@ static void release_task_stack(struct task_struct *tsk)
 	if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
 		return;  /* Better to leak the stack than to free prematurely */
 
-	account_kernel_stack(tsk, -1);
 	free_thread_stack(tsk);
-	tsk->stack = NULL;
-#ifdef CONFIG_VMAP_STACK
-	tsk->stack_vm_area = NULL;
-#endif
 }
 
 #ifdef CONFIG_THREAD_INFO_IN_TASK
@@ -874,8 +960,6 @@ void set_task_stack_end_magic(struct task_struct *tsk)
 static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 {
 	struct task_struct *tsk;
-	unsigned long *stack;
-	struct vm_struct *stack_vm_area __maybe_unused;
 	int err;
 
 	if (node == NUMA_NO_NODE)
@@ -884,32 +968,18 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	if (!tsk)
 		return NULL;
 
-	stack = alloc_thread_stack_node(tsk, node);
-	if (!stack)
+	err = arch_dup_task_struct(tsk, orig);
+	if (err)
 		goto free_tsk;
 
-	if (memcg_charge_kernel_stack(tsk))
-		goto free_stack;
-
-	stack_vm_area = task_stack_vm_area(tsk);
-
-	err = arch_dup_task_struct(tsk, orig);
+	err = alloc_thread_stack_node(tsk, node);
+	if (err)
+		goto free_tsk;
 
-	/*
-	 * arch_dup_task_struct() clobbers the stack-related fields.  Make
-	 * sure they're properly initialized before using any stack-related
-	 * functions again.
-	 */
-	tsk->stack = stack;
-#ifdef CONFIG_VMAP_STACK
-	tsk->stack_vm_area = stack_vm_area;
-#endif
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	refcount_set(&tsk->stack_refcount, 1);
 #endif
-
-	if (err)
-		goto free_stack;
+	account_kernel_stack(tsk, 1);
 
 	err = scs_prepare(tsk, node);
 	if (err)
@@ -953,8 +1023,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	tsk->wake_q.next = NULL;
 	tsk->worker_private = NULL;
 
-	account_kernel_stack(tsk, 1);
-
 	kcov_task_init(tsk);
 	kmap_local_fork(tsk);
 
@@ -967,12 +1035,17 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	tsk->use_memdelay = 0;
 #endif
 
+#ifdef CONFIG_IOMMU_SVA
+	tsk->pasid_activated = 0;
+#endif
+
 #ifdef CONFIG_MEMCG
 	tsk->active_memcg = NULL;
 #endif
 	return tsk;
 
 free_stack:
+	exit_task_stack_account(tsk);
 	free_thread_stack(tsk);
 free_tsk:
 	free_task_struct(tsk);
@@ -1019,13 +1092,6 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 #endif
 }
 
-static void mm_init_pasid(struct mm_struct *mm)
-{
-#ifdef CONFIG_IOMMU_SUPPORT
-	mm->pasid = INIT_PASID;
-#endif
-}
-
 static void mm_init_uprobes_state(struct mm_struct *mm)
 {
 #ifdef CONFIG_UPROBES
@@ -1054,7 +1120,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm_init_cpumask(mm);
 	mm_init_aio(mm);
 	mm_init_owner(mm, p);
-	mm_init_pasid(mm);
+	mm_pasid_init(mm);
 	RCU_INIT_POINTER(mm->exe_file, NULL);
 	mmu_notifier_subscriptions_init(mm);
 	init_tlb_flush_pending(mm);
@@ -1121,6 +1187,7 @@ static inline void __mmput(struct mm_struct *mm)
 	}
 	if (mm->binfmt)
 		module_put(mm->binfmt->module);
+	mm_pasid_drop(mm);
 	mmdrop(mm);
 }
 
@@ -2451,6 +2518,7 @@ bad_fork_cleanup_count:
 	exit_creds(p);
 bad_fork_free:
 	WRITE_ONCE(p->__state, TASK_DEAD);
+	exit_task_stack_account(p);
 	put_task_stack(p);
 	delayed_free_task(p);
 fork_out:
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c09324663088..54af0deb239b 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -38,7 +38,7 @@ struct irqaction chained_action = {
  *	@irq:	irq number
  *	@chip:	pointer to irq chip description structure
  */
-int irq_set_chip(unsigned int irq, struct irq_chip *chip)
+int irq_set_chip(unsigned int irq, const struct irq_chip *chip)
 {
 	unsigned long flags;
 	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
@@ -46,10 +46,7 @@ int irq_set_chip(unsigned int irq, struct irq_chip *chip)
 	if (!desc)
 		return -EINVAL;
 
-	if (!chip)
-		chip = &no_irq_chip;
-
-	desc->irq_data.chip = chip;
+	desc->irq_data.chip = (struct irq_chip *)(chip ?: &no_irq_chip);
 	irq_put_desc_unlock(desc, flags);
 	/*
 	 * For !CONFIG_SPARSE_IRQ make the irq show up in
@@ -1073,7 +1070,7 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
 EXPORT_SYMBOL_GPL(irq_set_chained_handler_and_data);
 
 void
-irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip,
 			      irq_flow_handler_t handle, const char *name)
 {
 	irq_set_chip(irq, chip);
@@ -1558,6 +1555,14 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 	return 0;
 }
 
+static struct device *irq_get_parent_device(struct irq_data *data)
+{
+	if (data->domain)
+		return data->domain->dev;
+
+	return NULL;
+}
+
 /**
  * irq_chip_pm_get - Enable power for an IRQ chip
  * @data:	Pointer to interrupt specific data
@@ -1567,12 +1572,13 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
  */
 int irq_chip_pm_get(struct irq_data *data)
 {
+	struct device *dev = irq_get_parent_device(data);
 	int retval;
 
-	if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) {
-		retval = pm_runtime_get_sync(data->chip->parent_device);
+	if (IS_ENABLED(CONFIG_PM) && dev) {
+		retval = pm_runtime_get_sync(dev);
 		if (retval < 0) {
-			pm_runtime_put_noidle(data->chip->parent_device);
+			pm_runtime_put_noidle(dev);
 			return retval;
 		}
 	}
@@ -1590,10 +1596,11 @@ int irq_chip_pm_get(struct irq_data *data)
  */
 int irq_chip_pm_put(struct irq_data *data)
 {
+	struct device *dev = irq_get_parent_device(data);
 	int retval = 0;
 
-	if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device)
-		retval = pm_runtime_put(data->chip->parent_device);
+	if (IS_ENABLED(CONFIG_PM) && dev)
+		retval = pm_runtime_put(dev);
 
 	return (retval < 0) ? retval : 0;
 }
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index e4cff358b437..2b43f5f5033d 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -69,8 +69,12 @@ irq_debug_show_chip(struct seq_file *m, struct irq_data *data, int ind)
 		seq_printf(m, "chip: None\n");
 		return;
 	}
-	seq_printf(m, "%*schip:    %s\n", ind, "", chip->name);
-	seq_printf(m, "%*sflags:   0x%lx\n", ind + 1, "", chip->flags);
+	seq_printf(m, "%*schip:    ", ind, "");
+	if (chip->irq_print_chip)
+		chip->irq_print_chip(data, m);
+	else
+		seq_printf(m, "%s", chip->name);
+	seq_printf(m, "\n%*sflags:   0x%lx\n", ind + 1, "", chip->flags);
 	irq_debug_show_bits(m, ind, chip->flags, irqchip_flags,
 			    ARRAY_SIZE(irqchip_flags));
 }
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 2267e6527db3..939d21cd55c3 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -640,7 +640,7 @@ int handle_irq_desc(struct irq_desc *desc)
 		return -EINVAL;
 
 	data = irq_desc_get_irq_data(desc);
-	if (WARN_ON_ONCE(!in_irq() && handle_enforce_irqctx(data)))
+	if (WARN_ON_ONCE(!in_hardirq() && handle_enforce_irqctx(data)))
 		return -EPERM;
 
 	generic_handle_irq_desc(desc);
@@ -662,6 +662,29 @@ int generic_handle_irq(unsigned int irq)
 }
 EXPORT_SYMBOL_GPL(generic_handle_irq);
 
+/**
+ * generic_handle_irq_safe - Invoke the handler for a particular irq from any
+ *			     context.
+ * @irq:	The irq number to handle
+ *
+ * Returns:	0 on success, a negative value on error.
+ *
+ * This function can be called from any context (IRQ or process context). It
+ * will report an error if not invoked from IRQ context and the irq has been
+ * marked to enforce IRQ-context only.
+ */
+int generic_handle_irq_safe(unsigned int irq)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret = handle_irq_desc(irq_to_desc(irq));
+	local_irq_restore(flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(generic_handle_irq_safe);
+
 #ifdef CONFIG_IRQ_DOMAIN
 /**
  * generic_handle_domain_irq - Invoke the handler for a HW irq belonging
@@ -676,7 +699,7 @@ EXPORT_SYMBOL_GPL(generic_handle_irq);
  */
 int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
 {
-	WARN_ON_ONCE(!in_irq());
+	WARN_ON_ONCE(!in_hardirq());
 	return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
 }
 EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index bf38c546aa25..d5ce96510549 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1319,7 +1319,8 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);
  * @chip_data:	The associated chip data
  */
 int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
-				  irq_hw_number_t hwirq, struct irq_chip *chip,
+				  irq_hw_number_t hwirq,
+				  const struct irq_chip *chip,
 				  void *chip_data)
 {
 	struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
@@ -1328,7 +1329,7 @@ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
 		return -ENOENT;
 
 	irq_data->hwirq = hwirq;
-	irq_data->chip = chip ? chip : &no_irq_chip;
+	irq_data->chip = (struct irq_chip *)(chip ? chip : &no_irq_chip);
 	irq_data->chip_data = chip_data;
 
 	return 0;
@@ -1347,7 +1348,7 @@ EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip);
  * @handler_name:	The interrupt handler name
  */
 void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
-			 irq_hw_number_t hwirq, struct irq_chip *chip,
+			 irq_hw_number_t hwirq, const struct irq_chip *chip,
 			 void *chip_data, irq_flow_handler_t handler,
 			 void *handler_data, const char *handler_name)
 {
@@ -1853,7 +1854,7 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);
  * @handler_name:	The interrupt handler name
  */
 void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
-			 irq_hw_number_t hwirq, struct irq_chip *chip,
+			 irq_hw_number_t hwirq, const struct irq_chip *chip,
 			 void *chip_data, irq_flow_handler_t handler,
 			 void *handler_data, const char *handler_name)
 {
diff --git a/kernel/padata.c b/kernel/padata.c
index 18d3a5c699d8..e5819bb8bd1d 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -181,7 +181,7 @@ int padata_do_parallel(struct padata_shell *ps,
 		goto out;
 
 	if (!cpumask_test_cpu(*cb_cpu, pd->cpumask.cbcpu)) {
-		if (!cpumask_weight(pd->cpumask.cbcpu))
+		if (cpumask_empty(pd->cpumask.cbcpu))
 			goto out;
 
 		/* Select an alternate fallback CPU and notify the caller. */
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index e6af502c2fd7..0ac805b753e5 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -689,8 +689,10 @@ static int load_image_and_restore(void)
 
 	lock_device_hotplug();
 	error = create_basic_memory_bitmaps();
-	if (error)
+	if (error) {
+		swsusp_close(FMODE_READ | FMODE_EXCL);
 		goto Unlock;
+	}
 
 	error = swsusp_read(&flags);
 	swsusp_close(FMODE_READ | FMODE_EXCL);
@@ -1328,7 +1330,7 @@ static int __init resumedelay_setup(char *str)
 	int rc = kstrtouint(str, 0, &resume_delay);
 
 	if (rc)
-		return rc;
+		pr_warn("resumedelay: bad option string '%s'\n", str);
 	return 1;
 }
 
diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
index d20526c5be15..b663a97f5867 100644
--- a/kernel/power/suspend_test.c
+++ b/kernel/power/suspend_test.c
@@ -157,22 +157,22 @@ static int __init setup_test_suspend(char *value)
 	value++;
 	suspend_type = strsep(&value, ",");
 	if (!suspend_type)
-		return 0;
+		return 1;
 
 	repeat = strsep(&value, ",");
 	if (repeat) {
 		if (kstrtou32(repeat, 0, &test_repeat_count_max))
-			return 0;
+			return 1;
 	}
 
 	for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++)
 		if (!strcmp(pm_labels[i], suspend_type)) {
 			test_state_label = pm_labels[i];
-			return 0;
+			return 1;
 		}
 
 	printk(warn_bad_state, suspend_type);
-	return 0;
+	return 1;
 }
 __setup("test_suspend", setup_test_suspend);
 
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index ad10359030a4..c51f5507b34f 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -89,7 +89,7 @@ struct swap_map_page_list {
 	struct swap_map_page_list *next;
 };
 
-/**
+/*
  *	The swap_map_handle structure is used for handling swap in
  *	a file-alike way
  */
@@ -117,7 +117,7 @@ struct swsusp_header {
 
 static struct swsusp_header *swsusp_header;
 
-/**
+/*
  *	The following functions are used for tracing the allocated
  *	swap pages, so that they can be freed in case of an error.
  */
@@ -171,7 +171,7 @@ static int swsusp_extents_insert(unsigned long swap_offset)
 	return 0;
 }
 
-/**
+/*
  *	alloc_swapdev_block - allocate a swap page and register that it has
  *	been allocated, so that it can be freed in case of an error.
  */
@@ -190,7 +190,7 @@ sector_t alloc_swapdev_block(int swap)
 	return 0;
 }
 
-/**
+/*
  *	free_all_swap_pages - free swap pages allocated for saving image data.
  *	It also frees the extents used to register which swap entries had been
  *	allocated.
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index e373fbe44da5..431cee212467 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -56,13 +56,13 @@ static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
 static inline void rcu_segcblist_set_flags(struct rcu_segcblist *rsclp,
 					   int flags)
 {
-	rsclp->flags |= flags;
+	WRITE_ONCE(rsclp->flags, rsclp->flags | flags);
 }
 
 static inline void rcu_segcblist_clear_flags(struct rcu_segcblist *rsclp,
 					     int flags)
 {
-	rsclp->flags &= ~flags;
+	WRITE_ONCE(rsclp->flags, rsclp->flags & ~flags);
 }
 
 static inline bool rcu_segcblist_test_flags(struct rcu_segcblist *rsclp,
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 422f7e4cc08d..55d049c39608 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -284,7 +284,7 @@ static atomic_t barrier_cbs_invoked;	/* Barrier callbacks invoked. */
 static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
 static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
 
-static bool rcu_fwd_cb_nodelay;		/* Short rcu_torture_delay() delays. */
+static atomic_t rcu_fwd_cb_nodelay;	/* Short rcu_torture_delay() delays. */
 
 /*
  * Allocate an element from the rcu_tortures pool.
@@ -387,7 +387,7 @@ rcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
 	 * period, and we want a long delay occasionally to trigger
 	 * force_quiescent_state. */
 
-	if (!READ_ONCE(rcu_fwd_cb_nodelay) &&
+	if (!atomic_read(&rcu_fwd_cb_nodelay) &&
 	    !(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) {
 		started = cur_ops->get_gp_seq();
 		ts = rcu_trace_clock_local();
@@ -674,6 +674,7 @@ static struct rcu_torture_ops srcu_ops = {
 	.call		= srcu_torture_call,
 	.cb_barrier	= srcu_torture_barrier,
 	.stats		= srcu_torture_stats,
+	.cbflood_max	= 50000,
 	.irq_capable	= 1,
 	.no_pi_lock	= IS_ENABLED(CONFIG_TINY_SRCU),
 	.name		= "srcu"
@@ -708,6 +709,7 @@ static struct rcu_torture_ops srcud_ops = {
 	.call		= srcu_torture_call,
 	.cb_barrier	= srcu_torture_barrier,
 	.stats		= srcu_torture_stats,
+	.cbflood_max	= 50000,
 	.irq_capable	= 1,
 	.no_pi_lock	= IS_ENABLED(CONFIG_TINY_SRCU),
 	.name		= "srcud"
@@ -997,7 +999,7 @@ static int rcu_torture_boost(void *arg)
 			goto checkwait;
 
 		/* Wait for the next test interval. */
-		oldstarttime = boost_starttime;
+		oldstarttime = READ_ONCE(boost_starttime);
 		while (time_before(jiffies, oldstarttime)) {
 			schedule_timeout_interruptible(oldstarttime - jiffies);
 			if (stutter_wait("rcu_torture_boost"))
@@ -1041,10 +1043,11 @@ static int rcu_torture_boost(void *arg)
 		 * interval.  Besides, we are running at RT priority,
 		 * so delays should be relatively rare.
 		 */
-		while (oldstarttime == boost_starttime && !kthread_should_stop()) {
+		while (oldstarttime == READ_ONCE(boost_starttime) && !kthread_should_stop()) {
 			if (mutex_trylock(&boost_mutex)) {
 				if (oldstarttime == boost_starttime) {
-					boost_starttime = jiffies + test_boost_interval * HZ;
+					WRITE_ONCE(boost_starttime,
+						   jiffies + test_boost_interval * HZ);
 					n_rcu_torture_boosts++;
 				}
 				mutex_unlock(&boost_mutex);
@@ -1276,7 +1279,7 @@ rcu_torture_writer(void *arg)
 		boot_ended = rcu_inkernel_boot_has_ended();
 		stutter_waited = stutter_wait("rcu_torture_writer");
 		if (stutter_waited &&
-		    !READ_ONCE(rcu_fwd_cb_nodelay) &&
+		    !atomic_read(&rcu_fwd_cb_nodelay) &&
 		    !cur_ops->slow_gps &&
 		    !torture_must_stop() &&
 		    boot_ended)
@@ -2180,7 +2183,6 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
 	for (i = ARRAY_SIZE(rfp->n_launders_hist) - 1; i > 0; i--)
 		if (rfp->n_launders_hist[i].n_launders > 0)
 			break;
-	mutex_lock(&rcu_fwd_mutex); // Serialize histograms.
 	pr_alert("%s: Callback-invocation histogram %d (duration %lu jiffies):",
 		 __func__, rfp->rcu_fwd_id, jiffies - rfp->rcu_fwd_startat);
 	gps_old = rfp->rcu_launder_gp_seq_start;
@@ -2193,7 +2195,6 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
 		gps_old = gps;
 	}
 	pr_cont("\n");
-	mutex_unlock(&rcu_fwd_mutex);
 }
 
 /* Callback function for continuous-flood RCU callbacks. */
@@ -2281,6 +2282,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
 	unsigned long stopat;
 	static DEFINE_TORTURE_RANDOM(trs);
 
+	pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
 	if (!cur_ops->sync)
 		return; // Cannot do need_resched() forward progress testing without ->sync.
 	if (cur_ops->call && cur_ops->cb_barrier) {
@@ -2289,7 +2291,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
 	}
 
 	/* Tight loop containing cond_resched(). */
-	WRITE_ONCE(rcu_fwd_cb_nodelay, true);
+	atomic_inc(&rcu_fwd_cb_nodelay);
 	cur_ops->sync(); /* Later readers see above write. */
 	if  (selfpropcb) {
 		WRITE_ONCE(fcs.stop, 0);
@@ -2325,6 +2327,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
 	if (selfpropcb) {
 		WRITE_ONCE(fcs.stop, 1);
 		cur_ops->sync(); /* Wait for running CB to complete. */
+		pr_alert("%s: Waiting for CBs: %pS() %d\n", __func__, cur_ops->cb_barrier, rfp->rcu_fwd_id);
 		cur_ops->cb_barrier(); /* Wait for queued callbacks. */
 	}
 
@@ -2333,7 +2336,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
 		destroy_rcu_head_on_stack(&fcs.rh);
 	}
 	schedule_timeout_uninterruptible(HZ / 10); /* Let kthreads recover. */
-	WRITE_ONCE(rcu_fwd_cb_nodelay, false);
+	atomic_dec(&rcu_fwd_cb_nodelay);
 }
 
 /* Carry out call_rcu() forward-progress testing. */
@@ -2353,13 +2356,14 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
 	unsigned long stopat;
 	unsigned long stoppedat;
 
+	pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
 	if (READ_ONCE(rcu_fwd_emergency_stop))
 		return; /* Get out of the way quickly, no GP wait! */
 	if (!cur_ops->call)
 		return; /* Can't do call_rcu() fwd prog without ->call. */
 
 	/* Loop continuously posting RCU callbacks. */
-	WRITE_ONCE(rcu_fwd_cb_nodelay, true);
+	atomic_inc(&rcu_fwd_cb_nodelay);
 	cur_ops->sync(); /* Later readers see above write. */
 	WRITE_ONCE(rfp->rcu_fwd_startat, jiffies);
 	stopat = rfp->rcu_fwd_startat + MAX_FWD_CB_JIFFIES;
@@ -2414,6 +2418,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
 	n_launders_cb_snap = READ_ONCE(rfp->n_launders_cb);
 	cver = READ_ONCE(rcu_torture_current_version) - cver;
 	gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps);
+	pr_alert("%s: Waiting for CBs: %pS() %d\n", __func__, cur_ops->cb_barrier, rfp->rcu_fwd_id);
 	cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */
 	(void)rcu_torture_fwd_prog_cbfree(rfp);
 
@@ -2427,11 +2432,13 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
 			 n_launders, n_launders_sa,
 			 n_max_gps, n_max_cbs, cver, gps);
 		atomic_long_add(n_max_cbs, &rcu_fwd_max_cbs);
+		mutex_lock(&rcu_fwd_mutex); // Serialize histograms.
 		rcu_torture_fwd_cb_hist(rfp);
+		mutex_unlock(&rcu_fwd_mutex);
 	}
 	schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
 	tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
-	WRITE_ONCE(rcu_fwd_cb_nodelay, false);
+	atomic_dec(&rcu_fwd_cb_nodelay);
 }
 
 
@@ -2511,7 +2518,7 @@ static int rcu_torture_fwd_prog(void *args)
 			firsttime = false;
 			WRITE_ONCE(rcu_fwd_seq, rcu_fwd_seq + 1);
 		} else {
-			while (READ_ONCE(rcu_fwd_seq) == oldseq)
+			while (READ_ONCE(rcu_fwd_seq) == oldseq && !torture_must_stop())
 				schedule_timeout_interruptible(1);
 			oldseq = READ_ONCE(rcu_fwd_seq);
 		}
@@ -2905,8 +2912,10 @@ rcu_torture_cleanup(void)
 	int i;
 
 	if (torture_cleanup_begin()) {
-		if (cur_ops->cb_barrier != NULL)
+		if (cur_ops->cb_barrier != NULL) {
+			pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier);
 			cur_ops->cb_barrier();
+		}
 		return;
 	}
 	if (!cur_ops) {
@@ -2961,8 +2970,10 @@ rcu_torture_cleanup(void)
 	 * Wait for all RCU callbacks to fire, then do torture-type-specific
 	 * cleanup operations.
 	 */
-	if (cur_ops->cb_barrier != NULL)
+	if (cur_ops->cb_barrier != NULL) {
+		pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier);
 		cur_ops->cb_barrier();
+	}
 	if (cur_ops->cleanup != NULL)
 		cur_ops->cleanup();
 
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index d64f0b1d8cd3..ac17348187e4 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -123,7 +123,7 @@ static struct rcu_tasks rt_name =							\
 	.call_func = call,								\
 	.rtpcpu = &rt_name ## __percpu,							\
 	.name = n,									\
-	.percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS) + 1,				\
+	.percpu_enqueue_shift = order_base_2(CONFIG_NR_CPUS),				\
 	.percpu_enqueue_lim = 1,							\
 	.percpu_dequeue_lim = 1,							\
 	.barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex),		\
@@ -302,7 +302,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
 	if (unlikely(needadjust)) {
 		raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
 		if (rtp->percpu_enqueue_lim != nr_cpu_ids) {
-			WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1);
+			WRITE_ONCE(rtp->percpu_enqueue_shift, 0);
 			WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids);
 			smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids);
 			pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name);
@@ -417,7 +417,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
 	if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) {
 		raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
 		if (rtp->percpu_enqueue_lim > 1) {
-			WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1);
+			WRITE_ONCE(rtp->percpu_enqueue_shift, order_base_2(nr_cpu_ids));
 			smp_store_release(&rtp->percpu_enqueue_lim, 1);
 			rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu();
 			pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a4c25a6283b0..a4b8189455d5 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -87,11 +87,12 @@ static struct rcu_state rcu_state = {
 	.gp_state = RCU_GP_IDLE,
 	.gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT,
 	.barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex),
+	.barrier_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.barrier_lock),
 	.name = RCU_NAME,
 	.abbr = RCU_ABBR,
 	.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
 	.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
-	.ofl_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock),
+	.ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED,
 };
 
 /* Dump rcu_node combining tree at boot to verify correct setup. */
@@ -153,7 +154,7 @@ static void sync_sched_exp_online_cleanup(int cpu);
 static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);
 static bool rcu_rdp_is_offloaded(struct rcu_data *rdp);
 
-/* rcuc/rcub kthread realtime priority */
+/* rcuc/rcub/rcuop kthread realtime priority */
 static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
 module_param(kthread_prio, int, 0444);
 
@@ -222,6 +223,16 @@ static unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
 }
 
 /*
+ * Is the CPU corresponding to the specified rcu_data structure online
+ * from RCU's perspective?  This perspective is given by that structure's
+ * ->qsmaskinitnext field rather than by the global cpu_online_mask.
+ */
+static bool rcu_rdp_cpu_online(struct rcu_data *rdp)
+{
+	return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode));
+}
+
+/*
  * Return true if an RCU grace period is in progress.  The READ_ONCE()s
  * permit this function to be invoked without holding the root rcu_node
  * structure's ->lock, but of course results can be subject to change.
@@ -1086,9 +1097,8 @@ void rcu_irq_enter_irqson(void)
  * Just check whether or not this CPU has non-offloaded RCU callbacks
  * queued.
  */
-int rcu_needs_cpu(u64 basemono, u64 *nextevt)
+int rcu_needs_cpu(void)
 {
-	*nextevt = KTIME_MAX;
 	return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
 		!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
 }
@@ -1167,15 +1177,20 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
 bool rcu_lockdep_current_cpu_online(void)
 {
 	struct rcu_data *rdp;
-	struct rcu_node *rnp;
 	bool ret = false;
 
 	if (in_nmi() || !rcu_scheduler_fully_active)
 		return true;
 	preempt_disable_notrace();
 	rdp = this_cpu_ptr(&rcu_data);
-	rnp = rdp->mynode;
-	if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1)
+	/*
+	 * Strictly, we care here about the case where the current CPU is
+	 * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask
+	 * not being up to date. So arch_spin_is_locked() might have a
+	 * false positive if it's held by some *other* CPU, but that's
+	 * OK because that just means a false *negative* on the warning.
+	 */
+	if (rcu_rdp_cpu_online(rdp) || arch_spin_is_locked(&rcu_state.ofl_lock))
 		ret = true;
 	preempt_enable_notrace();
 	return ret;
@@ -1260,8 +1275,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	 * For more detail, please refer to the "Hotplug CPU" section
 	 * of RCU's Requirements documentation.
 	 */
-	if (WARN_ON_ONCE(!(rdp->grpmask & rcu_rnp_online_cpus(rnp)))) {
-		bool onl;
+	if (WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp))) {
 		struct rcu_node *rnp1;
 
 		pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
@@ -1270,9 +1284,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 		for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
 			pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n",
 				__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rcu_gp_init_mask);
-		onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
 		pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n",
-			__func__, rdp->cpu, ".o"[onl],
+			__func__, rdp->cpu, ".o"[rcu_rdp_cpu_online(rdp)],
 			(long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
 			(long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
 		return 1; /* Break things loose after complaining. */
@@ -1739,7 +1752,6 @@ static void rcu_strict_gp_boundary(void *unused)
  */
 static noinline_for_stack bool rcu_gp_init(void)
 {
-	unsigned long firstseq;
 	unsigned long flags;
 	unsigned long oldmask;
 	unsigned long mask;
@@ -1782,22 +1794,17 @@ static noinline_for_stack bool rcu_gp_init(void)
 	 * of RCU's Requirements documentation.
 	 */
 	WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF);
+	/* Exclude CPU hotplug operations. */
 	rcu_for_each_leaf_node(rnp) {
-		// Wait for CPU-hotplug operations that might have
-		// started before this grace period did.
-		smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
-		firstseq = READ_ONCE(rnp->ofl_seq);
-		if (firstseq & 0x1)
-			while (firstseq == READ_ONCE(rnp->ofl_seq))
-				schedule_timeout_idle(1);  // Can't wake unless RCU is watching.
-		smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
-		raw_spin_lock(&rcu_state.ofl_lock);
-		raw_spin_lock_irq_rcu_node(rnp);
+		local_irq_save(flags);
+		arch_spin_lock(&rcu_state.ofl_lock);
+		raw_spin_lock_rcu_node(rnp);
 		if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
 		    !rnp->wait_blkd_tasks) {
 			/* Nothing to do on this leaf rcu_node structure. */
-			raw_spin_unlock_irq_rcu_node(rnp);
-			raw_spin_unlock(&rcu_state.ofl_lock);
+			raw_spin_unlock_rcu_node(rnp);
+			arch_spin_unlock(&rcu_state.ofl_lock);
+			local_irq_restore(flags);
 			continue;
 		}
 
@@ -1832,8 +1839,9 @@ static noinline_for_stack bool rcu_gp_init(void)
 				rcu_cleanup_dead_rnp(rnp);
 		}
 
-		raw_spin_unlock_irq_rcu_node(rnp);
-		raw_spin_unlock(&rcu_state.ofl_lock);
+		raw_spin_unlock_rcu_node(rnp);
+		arch_spin_unlock(&rcu_state.ofl_lock);
+		local_irq_restore(flags);
 	}
 	rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */
 
@@ -2850,10 +2858,12 @@ static void rcu_cpu_kthread(unsigned int cpu)
 {
 	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
 	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
+	unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity);
 	int spincnt;
 
 	trace_rcu_utilization(TPS("Start CPU kthread@rcu_run"));
 	for (spincnt = 0; spincnt < 10; spincnt++) {
+		WRITE_ONCE(*j, jiffies);
 		local_bh_disable();
 		*statusp = RCU_KTHREAD_RUNNING;
 		local_irq_disable();
@@ -2874,6 +2884,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
 	schedule_timeout_idle(2);
 	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
 	*statusp = RCU_KTHREAD_WAITING;
+	WRITE_ONCE(*j, jiffies);
 }
 
 static struct smp_hotplug_thread rcu_cpu_thread_spec = {
@@ -2894,7 +2905,7 @@ static int __init rcu_spawn_core_kthreads(void)
 
 	for_each_possible_cpu(cpu)
 		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
-	if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)
+	if (use_softirq)
 		return 0;
 	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
 		  "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
@@ -2995,9 +3006,47 @@ static void check_cb_ovld(struct rcu_data *rdp)
 	raw_spin_unlock_rcu_node(rnp);
 }
 
-/* Helper function for call_rcu() and friends.  */
-static void
-__call_rcu(struct rcu_head *head, rcu_callback_t func)
+/**
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed.  However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked.
+ *
+ * RCU read-side critical sections are delimited by rcu_read_lock()
+ * and rcu_read_unlock(), and may be nested.  In addition, but only in
+ * v5.0 and later, regions of code across which interrupts, preemption,
+ * or softirqs have been disabled also serve as RCU read-side critical
+ * sections.  This includes hardware interrupt handlers, softirq handlers,
+ * and NMI handlers.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing RCU read-side critical section.  On systems with more
+ * than one CPU, this means that when "func()" is invoked, each CPU is
+ * guaranteed to have executed a full memory barrier since the end of its
+ * last RCU read-side critical section whose beginning preceded the call
+ * to call_rcu().  It also means that each CPU executing an RCU read-side
+ * critical section that continues beyond the start of "func()" must have
+ * executed a memory barrier after the call_rcu() but before the beginning
+ * of that RCU read-side critical section.  Note that these guarantees
+ * include CPUs that are offline, idle, or executing in user mode, as
+ * well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting RCU callback function "func()", then both CPU A and CPU B are
+ * guaranteed to execute a full memory barrier during the time interval
+ * between the call to call_rcu() and the invocation of "func()" -- even
+ * if CPU A and CPU B are the same CPU (but again only if the system has
+ * more than one CPU).
+ *
+ * Implementation of these memory-ordering guarantees is described here:
+ * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
+ */
+void call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
 	static atomic_t doublefrees;
 	unsigned long flags;
@@ -3011,7 +3060,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
 		/*
 		 * Probable double call_rcu(), so leak the callback.
 		 * Use rcu:rcu_callback trace event to find the previous
-		 * time callback was passed to __call_rcu().
+		 * time callback was passed to call_rcu().
 		 */
 		if (atomic_inc_return(&doublefrees) < 4) {
 			pr_err("%s(): Double-freed CB %p->%pS()!!!  ", __func__, head, head->func);
@@ -3022,8 +3071,8 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
 	}
 	head->func = func;
 	head->next = NULL;
-	local_irq_save(flags);
 	kasan_record_aux_stack_noalloc(head);
+	local_irq_save(flags);
 	rdp = this_cpu_ptr(&rcu_data);
 
 	/* Add the callback to our list. */
@@ -3060,51 +3109,6 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
 		local_irq_restore(flags);
 	}
 }
-
-/**
- * call_rcu() - Queue an RCU callback for invocation after a grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all pre-existing RCU read-side
- * critical sections have completed.  However, the callback function
- * might well execute concurrently with RCU read-side critical sections
- * that started after call_rcu() was invoked.
- *
- * RCU read-side critical sections are delimited by rcu_read_lock()
- * and rcu_read_unlock(), and may be nested.  In addition, but only in
- * v5.0 and later, regions of code across which interrupts, preemption,
- * or softirqs have been disabled also serve as RCU read-side critical
- * sections.  This includes hardware interrupt handlers, softirq handlers,
- * and NMI handlers.
- *
- * Note that all CPUs must agree that the grace period extended beyond
- * all pre-existing RCU read-side critical section.  On systems with more
- * than one CPU, this means that when "func()" is invoked, each CPU is
- * guaranteed to have executed a full memory barrier since the end of its
- * last RCU read-side critical section whose beginning preceded the call
- * to call_rcu().  It also means that each CPU executing an RCU read-side
- * critical section that continues beyond the start of "func()" must have
- * executed a memory barrier after the call_rcu() but before the beginning
- * of that RCU read-side critical section.  Note that these guarantees
- * include CPUs that are offline, idle, or executing in user mode, as
- * well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- * resulting RCU callback function "func()", then both CPU A and CPU B are
- * guaranteed to execute a full memory barrier during the time interval
- * between the call to call_rcu() and the invocation of "func()" -- even
- * if CPU A and CPU B are the same CPU (but again only if the system has
- * more than one CPU).
- *
- * Implementation of these memory-ordering guarantees is described here:
- * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
- */
-void call_rcu(struct rcu_head *head, rcu_callback_t func)
-{
-	__call_rcu(head, func);
-}
 EXPORT_SYMBOL_GPL(call_rcu);
 
 
@@ -3984,13 +3988,16 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
 }
 
 /*
- * Called with preemption disabled, and from cross-cpu IRQ context.
+ * If needed, entrain an rcu_barrier() callback on rdp->cblist.
  */
-static void rcu_barrier_func(void *cpu_in)
+static void rcu_barrier_entrain(struct rcu_data *rdp)
 {
-	uintptr_t cpu = (uintptr_t)cpu_in;
-	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+	unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
+	unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);
 
+	lockdep_assert_held(&rcu_state.barrier_lock);
+	if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
+		return;
 	rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
 	rdp->barrier_head.func = rcu_barrier_callback;
 	debug_rcu_head_queue(&rdp->barrier_head);
@@ -4000,10 +4007,26 @@ static void rcu_barrier_func(void *cpu_in)
 		atomic_inc(&rcu_state.barrier_cpu_count);
 	} else {
 		debug_rcu_head_unqueue(&rdp->barrier_head);
-		rcu_barrier_trace(TPS("IRQNQ"), -1,
-				  rcu_state.barrier_sequence);
+		rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
 	}
 	rcu_nocb_unlock(rdp);
+	smp_store_release(&rdp->barrier_seq_snap, gseq);
+}
+
+/*
+ * Called with preemption disabled, and from cross-cpu IRQ context.
+ */
+static void rcu_barrier_handler(void *cpu_in)
+{
+	uintptr_t cpu = (uintptr_t)cpu_in;
+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+	lockdep_assert_irqs_disabled();
+	WARN_ON_ONCE(cpu != rdp->cpu);
+	WARN_ON_ONCE(cpu != smp_processor_id());
+	raw_spin_lock(&rcu_state.barrier_lock);
+	rcu_barrier_entrain(rdp);
+	raw_spin_unlock(&rcu_state.barrier_lock);
 }
 
 /**
@@ -4017,6 +4040,8 @@ static void rcu_barrier_func(void *cpu_in)
 void rcu_barrier(void)
 {
 	uintptr_t cpu;
+	unsigned long flags;
+	unsigned long gseq;
 	struct rcu_data *rdp;
 	unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence);
 
@@ -4027,15 +4052,16 @@ void rcu_barrier(void)
 
 	/* Did someone else do our work for us? */
 	if (rcu_seq_done(&rcu_state.barrier_sequence, s)) {
-		rcu_barrier_trace(TPS("EarlyExit"), -1,
-				  rcu_state.barrier_sequence);
+		rcu_barrier_trace(TPS("EarlyExit"), -1, rcu_state.barrier_sequence);
 		smp_mb(); /* caller's subsequent code after above check. */
 		mutex_unlock(&rcu_state.barrier_mutex);
 		return;
 	}
 
 	/* Mark the start of the barrier operation. */
+	raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
 	rcu_seq_start(&rcu_state.barrier_sequence);
+	gseq = rcu_state.barrier_sequence;
 	rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);
 
 	/*
@@ -4047,7 +4073,7 @@ void rcu_barrier(void)
 	 */
 	init_completion(&rcu_state.barrier_completion);
 	atomic_set(&rcu_state.barrier_cpu_count, 2);
-	cpus_read_lock();
+	raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
 
 	/*
 	 * Force each CPU with callbacks to register a new callback.
@@ -4056,29 +4082,31 @@ void rcu_barrier(void)
 	 */
 	for_each_possible_cpu(cpu) {
 		rdp = per_cpu_ptr(&rcu_data, cpu);
-		if (cpu_is_offline(cpu) &&
-		    !rcu_rdp_is_offloaded(rdp))
+retry:
+		if (smp_load_acquire(&rdp->barrier_seq_snap) == gseq)
 			continue;
-		if (rcu_segcblist_n_cbs(&rdp->cblist) && cpu_online(cpu)) {
-			rcu_barrier_trace(TPS("OnlineQ"), cpu,
-					  rcu_state.barrier_sequence);
-			smp_call_function_single(cpu, rcu_barrier_func, (void *)cpu, 1);
-		} else if (rcu_segcblist_n_cbs(&rdp->cblist) &&
-			   cpu_is_offline(cpu)) {
-			rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu,
-					  rcu_state.barrier_sequence);
-			local_irq_disable();
-			rcu_barrier_func((void *)cpu);
-			local_irq_enable();
-		} else if (cpu_is_offline(cpu)) {
-			rcu_barrier_trace(TPS("OfflineNoCBNoQ"), cpu,
-					  rcu_state.barrier_sequence);
-		} else {
-			rcu_barrier_trace(TPS("OnlineNQ"), cpu,
-					  rcu_state.barrier_sequence);
+		raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
+		if (!rcu_segcblist_n_cbs(&rdp->cblist)) {
+			WRITE_ONCE(rdp->barrier_seq_snap, gseq);
+			raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
+			rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence);
+			continue;
+		}
+		if (!rcu_rdp_cpu_online(rdp)) {
+			rcu_barrier_entrain(rdp);
+			WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
+			raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
+			rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence);
+			continue;
+		}
+		raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
+		if (smp_call_function_single(cpu, rcu_barrier_handler, (void *)cpu, 1)) {
+			schedule_timeout_uninterruptible(1);
+			goto retry;
 		}
+		WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
+		rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence);
 	}
-	cpus_read_unlock();
 
 	/*
 	 * Now that we have an rcu_barrier_callback() callback on each
@@ -4093,6 +4121,12 @@ void rcu_barrier(void)
 	/* Mark the end of the barrier operation. */
 	rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence);
 	rcu_seq_end(&rcu_state.barrier_sequence);
+	gseq = rcu_state.barrier_sequence;
+	for_each_possible_cpu(cpu) {
+		rdp = per_cpu_ptr(&rcu_data, cpu);
+
+		WRITE_ONCE(rdp->barrier_seq_snap, gseq);
+	}
 
 	/* Other rcu_barrier() invocations can now safely proceed. */
 	mutex_unlock(&rcu_state.barrier_mutex);
@@ -4140,6 +4174,7 @@ rcu_boot_init_percpu_data(int cpu)
 	INIT_WORK(&rdp->strict_work, strict_work_handler);
 	WARN_ON_ONCE(rdp->dynticks_nesting != 1);
 	WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
+	rdp->barrier_seq_snap = rcu_state.barrier_sequence;
 	rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
 	rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
 	rdp->rcu_onl_gp_seq = rcu_state.gp_seq;
@@ -4287,12 +4322,13 @@ void rcu_cpu_starting(unsigned int cpu)
 
 	rnp = rdp->mynode;
 	mask = rdp->grpmask;
-	WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
-	WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+	local_irq_save(flags);
+	arch_spin_lock(&rcu_state.ofl_lock);
 	rcu_dynticks_eqs_online();
-	smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
-	raw_spin_lock_irqsave_rcu_node(rnp, flags);
+	raw_spin_lock(&rcu_state.barrier_lock);
+	raw_spin_lock_rcu_node(rnp);
 	WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
+	raw_spin_unlock(&rcu_state.barrier_lock);
 	newcpu = !(rnp->expmaskinitnext & mask);
 	rnp->expmaskinitnext |= mask;
 	/* Allow lockless access for expedited grace periods. */
@@ -4304,15 +4340,18 @@ void rcu_cpu_starting(unsigned int cpu)
 
 	/* An incoming CPU should never be blocking a grace period. */
 	if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */
+		/* rcu_report_qs_rnp() *really* wants some flags to restore */
+		unsigned long flags2;
+
+		local_irq_save(flags2);
 		rcu_disable_urgency_upon_qs(rdp);
 		/* Report QS -after- changing ->qsmaskinitnext! */
-		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
+		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags2);
 	} else {
-		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+		raw_spin_unlock_rcu_node(rnp);
 	}
-	smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
-	WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
-	WARN_ON_ONCE(rnp->ofl_seq & 0x1);
+	arch_spin_unlock(&rcu_state.ofl_lock);
+	local_irq_restore(flags);
 	smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
 }
 
@@ -4326,7 +4365,7 @@ void rcu_cpu_starting(unsigned int cpu)
  */
 void rcu_report_dead(unsigned int cpu)
 {
-	unsigned long flags;
+	unsigned long flags, seq_flags;
 	unsigned long mask;
 	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 	struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
@@ -4340,10 +4379,8 @@ void rcu_report_dead(unsigned int cpu)
 
 	/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
 	mask = rdp->grpmask;
-	WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
-	WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
-	smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
-	raw_spin_lock(&rcu_state.ofl_lock);
+	local_irq_save(seq_flags);
+	arch_spin_lock(&rcu_state.ofl_lock);
 	raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
 	rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
 	rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags);
@@ -4354,10 +4391,8 @@ void rcu_report_dead(unsigned int cpu)
 	}
 	WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-	raw_spin_unlock(&rcu_state.ofl_lock);
-	smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
-	WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
-	WARN_ON_ONCE(rnp->ofl_seq & 0x1);
+	arch_spin_unlock(&rcu_state.ofl_lock);
+	local_irq_restore(seq_flags);
 
 	rdp->cpu_started = false;
 }
@@ -4380,7 +4415,9 @@ void rcutree_migrate_callbacks(int cpu)
 	    rcu_segcblist_empty(&rdp->cblist))
 		return;  /* No callbacks to migrate. */
 
-	local_irq_save(flags);
+	raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
+	WARN_ON_ONCE(rcu_rdp_cpu_online(rdp));
+	rcu_barrier_entrain(rdp);
 	my_rdp = this_cpu_ptr(&rcu_data);
 	my_rnp = my_rdp->mynode;
 	rcu_nocb_lock(my_rdp); /* irqs already disabled. */
@@ -4390,10 +4427,10 @@ void rcutree_migrate_callbacks(int cpu)
 	needwake = rcu_advance_cbs(my_rnp, rdp) ||
 		   rcu_advance_cbs(my_rnp, my_rdp);
 	rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
+	raw_spin_unlock(&rcu_state.barrier_lock); /* irqs remain disabled. */
 	needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp);
 	rcu_segcblist_disable(&rdp->cblist);
-	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
-		     !rcu_segcblist_n_cbs(&my_rdp->cblist));
+	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != !rcu_segcblist_n_cbs(&my_rdp->cblist));
 	if (rcu_rdp_is_offloaded(my_rdp)) {
 		raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */
 		__call_rcu_nocb_wake(my_rdp, true, flags);
@@ -4440,26 +4477,10 @@ static int rcu_pm_notify(struct notifier_block *self,
 static int __init rcu_spawn_gp_kthread(void)
 {
 	unsigned long flags;
-	int kthread_prio_in = kthread_prio;
 	struct rcu_node *rnp;
 	struct sched_param sp;
 	struct task_struct *t;
 
-	/* Force priority into range. */
-	if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2
-	    && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST))
-		kthread_prio = 2;
-	else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
-		kthread_prio = 1;
-	else if (kthread_prio < 0)
-		kthread_prio = 0;
-	else if (kthread_prio > 99)
-		kthread_prio = 99;
-
-	if (kthread_prio != kthread_prio_in)
-		pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n",
-			 kthread_prio, kthread_prio_in);
-
 	rcu_scheduler_fully_active = 1;
 	t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name);
 	if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n", __func__))
@@ -4570,6 +4591,7 @@ static void __init rcu_init_one(void)
 			init_waitqueue_head(&rnp->exp_wq[2]);
 			init_waitqueue_head(&rnp->exp_wq[3]);
 			spin_lock_init(&rnp->exp_lock);
+			mutex_init(&rnp->boost_kthread_mutex);
 		}
 	}
 
@@ -4585,6 +4607,28 @@ static void __init rcu_init_one(void)
 }
 
 /*
+ * Force priority from the kernel command-line into range.
+ */
+static void __init sanitize_kthread_prio(void)
+{
+	int kthread_prio_in = kthread_prio;
+
+	if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2
+	    && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST))
+		kthread_prio = 2;
+	else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
+		kthread_prio = 1;
+	else if (kthread_prio < 0)
+		kthread_prio = 0;
+	else if (kthread_prio > 99)
+		kthread_prio = 99;
+
+	if (kthread_prio != kthread_prio_in)
+		pr_alert("%s: Limited prio to %d from %d\n",
+			 __func__, kthread_prio, kthread_prio_in);
+}
+
+/*
  * Compute the rcu_node tree geometry from kernel parameters.  This cannot
  * replace the definitions in tree.h because those are needed to size
  * the ->node array in the rcu_state structure.
@@ -4744,6 +4788,7 @@ void __init rcu_init(void)
 
 	kfree_rcu_batch_init();
 	rcu_bootup_announce();
+	sanitize_kthread_prio();
 	rcu_init_geometry();
 	rcu_init_one();
 	if (dump_tree)
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 486fc901bd08..926673ebe355 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -56,8 +56,6 @@ struct rcu_node {
 				/*  Initialized from ->qsmaskinitnext at the */
 				/*  beginning of each grace period. */
 	unsigned long qsmaskinitnext;
-	unsigned long ofl_seq;	/* CPU-hotplug operation sequence count. */
-				/* Online CPUs for next grace period. */
 	unsigned long expmask;	/* CPUs or groups that need to check in */
 				/*  to allow the current expedited GP */
 				/*  to complete. */
@@ -110,6 +108,9 @@ struct rcu_node {
 				/*  side effect, not as a lock. */
 	unsigned long boost_time;
 				/* When to start boosting (jiffies). */
+	struct mutex boost_kthread_mutex;
+				/* Exclusion for thread spawning and affinity */
+				/*  manipulation. */
 	struct task_struct *boost_kthread_task;
 				/* kthread that takes care of priority */
 				/*  boosting for this rcu_node structure. */
@@ -190,6 +191,7 @@ struct rcu_data {
 	bool rcu_forced_tick_exp;	/*   ... provide QS to expedited GP. */
 
 	/* 4) rcu_barrier(), OOM callbacks, and expediting. */
+	unsigned long barrier_seq_snap;	/* Snap of rcu_state.barrier_sequence. */
 	struct rcu_head barrier_head;
 	int exp_dynticks_snap;		/* Double-check need for IPI. */
 
@@ -203,6 +205,8 @@ struct rcu_data {
 	int nocb_defer_wakeup;		/* Defer wakeup of nocb_kthread. */
 	struct timer_list nocb_timer;	/* Enforce finite deferral. */
 	unsigned long nocb_gp_adv_time;	/* Last call_rcu() CB adv (jiffies). */
+	struct mutex nocb_gp_kthread_mutex; /* Exclusion for nocb gp kthread */
+					    /* spawning */
 
 	/* The following fields are used by call_rcu, hence own cacheline. */
 	raw_spinlock_t nocb_bypass_lock ____cacheline_internodealigned_in_smp;
@@ -237,6 +241,7 @@ struct rcu_data {
 					/* rcuc per-CPU kthread or NULL. */
 	unsigned int rcu_cpu_kthread_status;
 	char rcu_cpu_has_work;
+	unsigned long rcuc_activity;
 
 	/* 7) Diagnostic data, including RCU CPU stall warnings. */
 	unsigned int softirq_snap;	/* Snapshot of softirq activity. */
@@ -302,9 +307,8 @@ struct rcu_state {
 
 	/* The following fields are guarded by the root rcu_node's lock. */
 
-	u8	boost ____cacheline_internodealigned_in_smp;
-						/* Subject to priority boost. */
-	unsigned long gp_seq;			/* Grace-period sequence #. */
+	unsigned long gp_seq ____cacheline_internodealigned_in_smp;
+						/* Grace-period sequence #. */
 	unsigned long gp_max;			/* Maximum GP duration in */
 						/*  jiffies. */
 	struct task_struct *gp_kthread;		/* Task for grace periods. */
@@ -323,6 +327,8 @@ struct rcu_state {
 						/*  rcu_barrier(). */
 	/* End of fields guarded by barrier_mutex. */
 
+	raw_spinlock_t barrier_lock;		/* Protects ->barrier_seq_snap. */
+
 	struct mutex exp_mutex;			/* Serialize expedited GP. */
 	struct mutex exp_wake_mutex;		/* Serialize wakeup. */
 	unsigned long expedited_sequence;	/* Take a ticket. */
@@ -355,7 +361,7 @@ struct rcu_state {
 	const char *name;			/* Name of structure. */
 	char abbr;				/* Abbreviated name. */
 
-	raw_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
+	arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
 						/* Synchronize offline with */
 						/*  GP pre-initialization. */
 };
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 237a79989aba..60197ea24ceb 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -502,7 +502,8 @@ static void synchronize_rcu_expedited_wait(void)
 		if (synchronize_rcu_expedited_wait_once(1))
 			return;
 		rcu_for_each_leaf_node(rnp) {
-			for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+			mask = READ_ONCE(rnp->expmask);
+			for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
 				rdp = per_cpu_ptr(&rcu_data, cpu);
 				if (rdp->rcu_forced_tick_exp)
 					continue;
@@ -656,7 +657,7 @@ static void rcu_exp_handler(void *unused)
 	 */
 	if (!depth) {
 		if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
-		    rcu_dynticks_curr_cpu_in_eqs()) {
+		    rcu_is_cpu_rrupt_from_idle()) {
 			rcu_report_exp_rdp(rdp);
 		} else {
 			WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index eeafb546a7a0..636d0546a4e9 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1169,7 +1169,7 @@ void __init rcu_init_nohz(void)
 	struct rcu_data *rdp;
 
 #if defined(CONFIG_NO_HZ_FULL)
-	if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
+	if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask))
 		need_rcu_nocb_mask = true;
 #endif /* #if defined(CONFIG_NO_HZ_FULL) */
 
@@ -1226,6 +1226,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
 	raw_spin_lock_init(&rdp->nocb_gp_lock);
 	timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
 	rcu_cblist_init(&rdp->nocb_bypass);
+	mutex_init(&rdp->nocb_gp_kthread_mutex);
 }
 
 /*
@@ -1238,6 +1239,7 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
 	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 	struct rcu_data *rdp_gp;
 	struct task_struct *t;
+	struct sched_param sp;
 
 	if (!rcu_scheduler_fully_active || !rcu_nocb_is_setup)
 		return;
@@ -1247,20 +1249,30 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
 		return;
 
 	/* If we didn't spawn the GP kthread first, reorganize! */
+	sp.sched_priority = kthread_prio;
 	rdp_gp = rdp->nocb_gp_rdp;
+	mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
 	if (!rdp_gp->nocb_gp_kthread) {
 		t = kthread_run(rcu_nocb_gp_kthread, rdp_gp,
 				"rcuog/%d", rdp_gp->cpu);
-		if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__))
+		if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__)) {
+			mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
 			return;
+		}
 		WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
+		if (kthread_prio)
+			sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
 	}
+	mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
 
 	/* Spawn the kthread for this CPU. */
 	t = kthread_run(rcu_nocb_cb_kthread, rdp,
 			"rcuo%c/%d", rcu_state.abbr, cpu);
 	if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
 		return;
+
+	if (kthread_prio)
+		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
 	WRITE_ONCE(rdp->nocb_cb_kthread, t);
 	WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
 }
@@ -1348,7 +1360,7 @@ static void __init rcu_organize_nocb_kthreads(void)
  */
 void rcu_bind_current_to_nocb(void)
 {
-	if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask))
+	if (cpumask_available(rcu_nocb_mask) && !cpumask_empty(rcu_nocb_mask))
 		WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask));
 }
 EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index c5b45c2f68a1..6082dd23408f 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -330,7 +330,7 @@ void rcu_note_context_switch(bool preempt)
 		 * then queue the task as required based on the states
 		 * of any ongoing and expedited grace periods.
 		 */
-		WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0);
+		WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp));
 		WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
 		trace_rcu_preempt_task(rcu_state.name,
 				       t->pid,
@@ -556,16 +556,16 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		}
 
-		/* Unboost if we were boosted. */
-		if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
-			rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex);
-
 		/*
 		 * If this was the last task on the expedited lists,
 		 * then we need to report up the rcu_node hierarchy.
 		 */
 		if (!empty_exp && empty_exp_now)
 			rcu_report_exp_rnp(rnp, true);
+
+		/* Unboost if we were boosted. */
+		if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
+			rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex);
 	} else {
 		local_irq_restore(flags);
 	}
@@ -773,7 +773,6 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
 	int cpu;
 	int i;
 	struct list_head *lhp;
-	bool onl;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp1;
 
@@ -797,9 +796,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
 	pr_cont("\n");
 	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
 		rdp = per_cpu_ptr(&rcu_data, cpu);
-		onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
 		pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n",
-			cpu, ".o"[onl],
+			cpu, ".o"[rcu_rdp_cpu_online(rdp)],
 			(long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
 			(long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
 	}
@@ -996,12 +994,15 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
  */
 static void rcu_cpu_kthread_setup(unsigned int cpu)
 {
+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 #ifdef CONFIG_RCU_BOOST
 	struct sched_param sp;
 
 	sp.sched_priority = kthread_prio;
 	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
 #endif /* #ifdef CONFIG_RCU_BOOST */
+
+	WRITE_ONCE(rdp->rcuc_activity, jiffies);
 }
 
 #ifdef CONFIG_RCU_BOOST
@@ -1172,15 +1173,14 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
 	struct sched_param sp;
 	struct task_struct *t;
 
+	mutex_lock(&rnp->boost_kthread_mutex);
 	if (rnp->boost_kthread_task || !rcu_scheduler_fully_active)
-		return;
-
-	rcu_state.boost = 1;
+		goto out;
 
 	t = kthread_create(rcu_boost_kthread, (void *)rnp,
 			   "rcub/%d", rnp_index);
 	if (WARN_ON_ONCE(IS_ERR(t)))
-		return;
+		goto out;
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	rnp->boost_kthread_task = t;
@@ -1188,6 +1188,9 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
 	sp.sched_priority = kthread_prio;
 	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
 	wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
+
+ out:
+	mutex_unlock(&rnp->boost_kthread_mutex);
 }
 
 /*
@@ -1210,14 +1213,16 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 		return;
 	if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
 		return;
+	mutex_lock(&rnp->boost_kthread_mutex);
 	for_each_leaf_node_possible_cpu(rnp, cpu)
 		if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
 		    cpu != outgoingcpu)
 			cpumask_set_cpu(cpu, cm);
 	cpumask_and(cm, cm, housekeeping_cpumask(HK_FLAG_RCU));
-	if (cpumask_weight(cm) == 0)
+	if (cpumask_empty(cm))
 		cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU));
 	set_cpus_allowed_ptr(t, cm);
+	mutex_unlock(&rnp->boost_kthread_mutex);
 	free_cpumask_var(cm);
 }
 
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 21bebf7c9030..0c5d8516516a 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -379,6 +379,15 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
 	return j > 2 * HZ;
 }
 
+static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp)
+{
+	unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity);
+
+	if (jp)
+		*jp = j;
+	return j > 2 * HZ;
+}
+
 /*
  * Print out diagnostic information for the specified stalled CPU.
  *
@@ -430,6 +439,29 @@ static void print_cpu_stall_info(int cpu)
 	       falsepositive ? " (false positive?)" : "");
 }
 
+static void rcuc_kthread_dump(struct rcu_data *rdp)
+{
+	int cpu;
+	unsigned long j;
+	struct task_struct *rcuc;
+
+	rcuc = rdp->rcu_cpu_kthread_task;
+	if (!rcuc)
+		return;
+
+	cpu = task_cpu(rcuc);
+	if (cpu_is_offline(cpu) || idle_cpu(cpu))
+		return;
+
+	if (!rcu_is_rcuc_kthread_starving(rdp, &j))
+		return;
+
+	pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
+	sched_show_task(rcuc);
+	if (!trigger_single_cpu_backtrace(cpu))
+		dump_cpu_task(cpu);
+}
+
 /* Complain about starvation of grace-period kthread.  */
 static void rcu_check_gp_kthread_starvation(void)
 {
@@ -601,6 +633,9 @@ static void print_cpu_stall(unsigned long gps)
 	rcu_check_gp_kthread_expired_fqs_timer();
 	rcu_check_gp_kthread_starvation();
 
+	if (!use_softirq)
+		rcuc_kthread_dump(rdp);
+
 	rcu_dump_cpu_stacks();
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 156892c22bb5..180ff9c41fa8 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -407,6 +407,13 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
 }
 EXPORT_SYMBOL_GPL(__wait_rcu_gp);
 
+void finish_rcuwait(struct rcuwait *w)
+{
+	rcu_assign_pointer(w->task, NULL);
+	__set_current_state(TASK_RUNNING);
+}
+EXPORT_SYMBOL_GPL(finish_rcuwait);
+
 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
 void init_rcu_head(struct rcu_head *head)
 {
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 26778884d9ab..cffcd08f4ec8 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -539,7 +539,7 @@ ATTRIBUTE_GROUPS(sugov);
 
 static void sugov_tunables_free(struct kobject *kobj)
 {
-	struct gov_attr_set *attr_set = container_of(kobj, struct gov_attr_set, kobj);
+	struct gov_attr_set *attr_set = to_gov_attr_set(kobj);
 
 	kfree(to_sugov_tunables(attr_set));
 }
diff --git a/kernel/signal.c b/kernel/signal.c
index 9b04631acde8..e93de6daa188 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1308,6 +1308,43 @@ enum sig_handler {
 };
 
 /*
+ * On some archictectures, PREEMPT_RT has to delay sending a signal from a
+ * trap since it cannot enable preemption, and the signal code's
+ * spin_locks turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME
+ * which will send the signal on exit of the trap.
+ */
+#ifdef CONFIG_RT_DELAYED_SIGNALS
+static inline bool force_sig_delayed(struct kernel_siginfo *info,
+				     struct task_struct *t)
+{
+	if (!in_atomic())
+		return false;
+
+	if (WARN_ON_ONCE(t->forced_info.si_signo))
+		return true;
+
+	if (is_si_special(info)) {
+		WARN_ON_ONCE(info != SEND_SIG_PRIV);
+		t->forced_info.si_signo = info->si_signo;
+		t->forced_info.si_errno = 0;
+		t->forced_info.si_code = SI_KERNEL;
+		t->forced_info.si_pid = 0;
+		t->forced_info.si_uid = 0;
+	} else {
+		t->forced_info = *info;
+	}
+	set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+	return true;
+}
+#else
+static inline bool force_sig_delayed(struct kernel_siginfo *info,
+				     struct task_struct *t)
+{
+	return false;
+}
+#endif
+
+/*
  * Force a signal that the process can't ignore: if necessary
  * we unblock the signal and change any SIG_IGN to SIG_DFL.
  *
@@ -1327,6 +1364,9 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t,
 	struct k_sigaction *action;
 	int sig = info->si_signo;
 
+	if (force_sig_delayed(info, t))
+		return 0;
+
 	spin_lock_irqsave(&t->sighand->siglock, flags);
 	action = &t->sighand->action[sig-1];
 	ignored = action->sa.sa_handler == SIG_IGN;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 41f470929e99..fac801815554 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -222,7 +222,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
 	u32 pending;
 	int curcnt;
 
-	WARN_ON_ONCE(in_irq());
+	WARN_ON_ONCE(in_hardirq());
 	lockdep_assert_irqs_enabled();
 
 	local_irq_save(flags);
@@ -305,7 +305,7 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
 {
 	unsigned long flags;
 
-	WARN_ON_ONCE(in_irq());
+	WARN_ON_ONCE(in_hardirq());
 
 	raw_local_irq_save(flags);
 	/*
@@ -352,14 +352,14 @@ static void __local_bh_enable(unsigned int cnt)
  */
 void _local_bh_enable(void)
 {
-	WARN_ON_ONCE(in_irq());
+	WARN_ON_ONCE(in_hardirq());
 	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
 }
 EXPORT_SYMBOL(_local_bh_enable);
 
 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
 {
-	WARN_ON_ONCE(in_irq());
+	WARN_ON_ONCE(in_hardirq());
 	lockdep_assert_irqs_enabled();
 #ifdef CONFIG_TRACE_IRQFLAGS
 	local_irq_disable();
@@ -618,7 +618,7 @@ static inline void tick_irq_exit(void)
 
 	/* Make sure that timer wheel updates are propagated */
 	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
-		if (!in_irq())
+		if (!in_hardirq())
 			tick_nohz_irq_exit();
 	}
 #endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5ae443b2882e..730ab56d9e92 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -180,6 +180,10 @@ static int bpf_stats_handler(struct ctl_table *table, int write,
 	return ret;
 }
 
+void __weak unpriv_ebpf_notify(int new_state)
+{
+}
+
 static int bpf_unpriv_handler(struct ctl_table *table, int write,
 			      void *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -197,6 +201,9 @@ static int bpf_unpriv_handler(struct ctl_table *table, int write,
 			return -EPERM;
 		*(int *)table->data = unpriv_enable;
 	}
+
+	unpriv_ebpf_notify(unpriv_enable);
+
 	return ret;
 }
 #endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 04bfd62f5e5c..27b7868b5c30 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -181,5 +181,14 @@ config HIGH_RES_TIMERS
 	  hardware is not capable then this option only increases
 	  the size of the kernel image.
 
+config CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
+	int "Clocksource watchdog maximum allowable skew (in μs)"
+	depends on CLOCKSOURCE_WATCHDOG
+	range 50 1000
+	default 100
+	help
+	  Specify the maximum amount of allowable watchdog skew in
+	  microseconds before reporting the clocksource to be unstable.
+
 endmenu
 endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1cf73807b450..95d7ca35bdf2 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -107,7 +107,13 @@ static u64 suspend_start;
  * This delay could be due to SMIs, NMIs, or to VCPU preemptions.  Used as
  * a lower bound for cs->uncertainty_margin values when registering clocks.
  */
-#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
+#ifdef CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
+#define MAX_SKEW_USEC	CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
+#else
+#define MAX_SKEW_USEC	100
+#endif
+
+#define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC)
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 static void clocksource_watchdog_work(struct work_struct *work);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 17a283ce2b20..2d76c91b85de 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -169,6 +169,8 @@ static ktime_t tick_init_jiffy_update(void)
 	return period;
 }
 
+#define MAX_STALLED_JIFFIES 5
+
 static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 {
 	int cpu = smp_processor_id();
@@ -196,6 +198,21 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 	if (tick_do_timer_cpu == cpu)
 		tick_do_update_jiffies64(now);
 
+	/*
+	 * If jiffies update stalled for too long (timekeeper in stop_machine()
+	 * or VMEXIT'ed for several msecs), force an update.
+	 */
+	if (ts->last_tick_jiffies != jiffies) {
+		ts->stalled_jiffies = 0;
+		ts->last_tick_jiffies = READ_ONCE(jiffies);
+	} else {
+		if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
+			tick_do_update_jiffies64(now);
+			ts->stalled_jiffies = 0;
+			ts->last_tick_jiffies = READ_ONCE(jiffies);
+		}
+	}
+
 	if (ts->inidle)
 		ts->got_idle_tick = 1;
 }
@@ -768,7 +785,7 @@ static inline bool local_timer_softirq_pending(void)
 
 static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 {
-	u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
+	u64 basemono, next_tick, delta, expires;
 	unsigned long basejiff;
 	unsigned int seq;
 
@@ -791,7 +808,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 	 * minimal delta which brings us back to this place
 	 * immediately. Lather, rinse and repeat...
 	 */
-	if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
+	if (rcu_needs_cpu() || arch_needs_cpu() ||
 	    irq_work_needs_cpu() || local_timer_softirq_pending()) {
 		next_tick = basemono + TICK_NSEC;
 	} else {
@@ -802,10 +819,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 		 * disabled this also looks at the next expiring
 		 * hrtimer.
 		 */
-		next_tmr = get_next_timer_interrupt(basejiff, basemono);
-		ts->next_timer = next_tmr;
-		/* Take the next rcu event into account */
-		next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
+		next_tick = get_next_timer_interrupt(basejiff, basemono);
+		ts->next_timer = next_tick;
 	}
 
 	/*
@@ -984,6 +999,45 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
 	__tick_nohz_full_update_tick(ts, ktime_get());
 }
 
+/*
+ * A pending softirq outside an IRQ (or softirq disabled section) context
+ * should be waiting for ksoftirqd to handle it. Therefore we shouldn't
+ * reach here due to the need_resched() early check in can_stop_idle_tick().
+ *
+ * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the
+ * cpu_down() process, softirqs can still be raised while ksoftirqd is parked,
+ * triggering the below since wakep_softirqd() is ignored.
+ *
+ */
+static bool report_idle_softirq(void)
+{
+	static int ratelimit;
+	unsigned int pending = local_softirq_pending();
+
+	if (likely(!pending))
+		return false;
+
+	/* Some softirqs claim to be safe against hotplug and ksoftirqd parking */
+	if (!cpu_active(smp_processor_id())) {
+		pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK;
+		if (!pending)
+			return false;
+	}
+
+	if (ratelimit < 10)
+		return false;
+
+	/* On RT, softirqs handling may be waiting on some lock */
+	if (!local_bh_blocked())
+		return false;
+
+	pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n",
+		pending);
+	ratelimit++;
+
+	return true;
+}
+
 static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 {
 	/*
@@ -1010,17 +1064,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 	if (need_resched())
 		return false;
 
-	if (unlikely(local_softirq_pending())) {
-		static int ratelimit;
-
-		if (ratelimit < 10 && !local_bh_blocked() &&
-		    (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
-			pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
-				(unsigned int) local_softirq_pending());
-			ratelimit++;
-		}
+	if (unlikely(report_idle_softirq()))
 		return false;
-	}
 
 	if (tick_nohz_full_enabled()) {
 		/*
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index d952ae393423..504649513399 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -49,6 +49,8 @@ enum tick_nohz_mode {
  * @timer_expires_base:	Base time clock monotonic for @timer_expires
  * @next_timer:		Expiry time of next expiring timer for debugging purpose only
  * @tick_dep_mask:	Tick dependency mask - is set, if someone needs the tick
+ * @last_tick_jiffies:	Value of jiffies seen on last tick
+ * @stalled_jiffies:	Number of stalled jiffies detected across ticks
  */
 struct tick_sched {
 	struct hrtimer			sched_timer;
@@ -77,6 +79,8 @@ struct tick_sched {
 	u64				next_timer;
 	ktime_t				idle_expires;
 	atomic_t			tick_dep_mask;
+	unsigned long			last_tick_jiffies;
+	unsigned int			stalled_jiffies;
 };
 
 extern struct tick_sched *tick_get_tick_sched(int cpu);
diff --git a/kernel/torture.c b/kernel/torture.c
index ef27a6c82451..789aeb0e1159 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -911,7 +911,7 @@ void torture_kthread_stopping(char *title)
 {
 	char buf[128];
 
-	snprintf(buf, sizeof(buf), "Stopping %s", title);
+	snprintf(buf, sizeof(buf), "%s is stopping", title);
 	VERBOSE_TOROUT_STRING(buf);
 	while (!kthread_should_stop()) {
 		torture_shutdown_absorb(title);
@@ -931,12 +931,14 @@ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
 	int ret = 0;
 
 	VERBOSE_TOROUT_STRING(m);
-	*tp = kthread_run(fn, arg, "%s", s);
+	*tp = kthread_create(fn, arg, "%s", s);
 	if (IS_ERR(*tp)) {
 		ret = PTR_ERR(*tp);
 		TOROUT_ERRSTRING(f);
 		*tp = NULL;
+		return ret;
 	}
+	wake_up_process(*tp);  // Process is sleeping, so ordering provided.
 	torture_shuffle_task_register(*tp);
 	return ret;
 }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a4b462b6f944..6105b7036482 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -7790,7 +7790,7 @@ int ftrace_is_dead(void)
 
 /**
  * register_ftrace_function - register a function for profiling
- * @ops - ops structure that holds the function for profiling.
+ * @ops:	ops structure that holds the function for profiling.
  *
  * Register a function to be called by all functions in the
  * kernel.
@@ -7817,7 +7817,7 @@ EXPORT_SYMBOL_GPL(register_ftrace_function);
 
 /**
  * unregister_ftrace_function - unregister a function for profiling.
- * @ops - ops structure that holds the function to unregister
+ * @ops:	ops structure that holds the function to unregister
  *
  * Unregister a function that was added to be called by ftrace profiling.
  */
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index cfddb30e65ab..5e3c62a08fc0 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1387,6 +1387,26 @@ static int run_osnoise(void)
 		}
 
 		/*
+		 * In some cases, notably when running on a nohz_full CPU with
+		 * a stopped tick PREEMPT_RCU has no way to account for QSs.
+		 * This will eventually cause unwarranted noise as PREEMPT_RCU
+		 * will force preemption as the means of ending the current
+		 * grace period. We avoid this problem by calling
+		 * rcu_momentary_dyntick_idle(), which performs a zero duration
+		 * EQS allowing PREEMPT_RCU to end the current grace period.
+		 * This call shouldn't be wrapped inside an RCU critical
+		 * section.
+		 *
+		 * Note that in non PREEMPT_RCU kernels QSs are handled through
+		 * cond_resched()
+		 */
+		if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
+			local_irq_disable();
+			rcu_momentary_dyntick_idle();
+			local_irq_enable();
+		}
+
+		/*
 		 * For the non-preemptive kernel config: let threads runs, if
 		 * they so wish.
 		 */
@@ -2200,6 +2220,17 @@ static void osnoise_workload_stop(void)
 	if (osnoise_has_registered_instances())
 		return;
 
+	/*
+	 * If callbacks were already disabled in a previous stop
+	 * call, there is no need to disable then again.
+	 *
+	 * For instance, this happens when tracing is stopped via:
+	 * echo 0 > tracing_on
+	 * echo nop > current_tracer.
+	 */
+	if (!trace_osnoise_callback_enabled)
+		return;
+
 	trace_osnoise_callback_enabled = false;
 	/*
 	 * Make sure that ftrace_nmi_enter/exit() see
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index 9c9eb20dd2c5..00703444a219 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -54,6 +54,7 @@ static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe,
 	bit += page->index;
 
 	set_bit(bit, wqueue->notes_bitmap);
+	generic_pipe_buf_release(pipe, buf);
 }
 
 // No try_steal function => no stealing
@@ -112,7 +113,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
 	buf->offset = offset;
 	buf->len = len;
 	buf->flags = PIPE_BUF_FLAG_WHOLE;
-	pipe->head = head + 1;
+	smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
 
 	if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
 		spin_unlock_irq(&pipe->rd_wait.lock);
@@ -219,7 +220,6 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
 	struct page **pages;
 	unsigned long *bitmap;
 	unsigned long user_bufs;
-	unsigned int bmsize;
 	int ret, i, nr_pages;
 
 	if (!wqueue)
@@ -243,7 +243,8 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
 		goto error;
 	}
 
-	ret = pipe_resize_ring(pipe, nr_notes);
+	nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE;
+	ret = pipe_resize_ring(pipe, roundup_pow_of_two(nr_notes));
 	if (ret < 0)
 		goto error;
 
@@ -258,17 +259,15 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
 		pages[i]->index = i * WATCH_QUEUE_NOTES_PER_PAGE;
 	}
 
-	bmsize = (nr_notes + BITS_PER_LONG - 1) / BITS_PER_LONG;
-	bmsize *= sizeof(unsigned long);
-	bitmap = kmalloc(bmsize, GFP_KERNEL);
+	bitmap = bitmap_alloc(nr_notes, GFP_KERNEL);
 	if (!bitmap)
 		goto error_p;
 
-	memset(bitmap, 0xff, bmsize);
+	bitmap_fill(bitmap, nr_notes);
 	wqueue->notes = pages;
 	wqueue->notes_bitmap = bitmap;
 	wqueue->nr_pages = nr_pages;
-	wqueue->nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE;
+	wqueue->nr_notes = nr_notes;
 	return 0;
 
 error_p:
@@ -320,7 +319,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe,
 		    tf[i].info_mask & WATCH_INFO_LENGTH)
 			goto err_filter;
 		/* Ignore any unknown types */
-		if (tf[i].type >= sizeof(wfilter->type_filter) * 8)
+		if (tf[i].type >= WATCH_TYPE__NR)
 			continue;
 		nr_filter++;
 	}
@@ -336,7 +335,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe,
 
 	q = wfilter->filters;
 	for (i = 0; i < filter.nr_filters; i++) {
-		if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG)
+		if (tf[i].type >= WATCH_TYPE__NR)
 			continue;
 
 		q->type			= tf[i].type;
@@ -371,6 +370,7 @@ static void __put_watch_queue(struct kref *kref)
 
 	for (i = 0; i < wqueue->nr_pages; i++)
 		__free_page(wqueue->notes[i]);
+	bitmap_free(wqueue->notes_bitmap);
 
 	wfilter = rcu_access_pointer(wqueue->filter);
 	if (wfilter)
@@ -566,7 +566,7 @@ void watch_queue_clear(struct watch_queue *wqueue)
 	rcu_read_lock();
 	spin_lock_bh(&wqueue->lock);
 
-	/* Prevent new additions and prevent notifications from happening */
+	/* Prevent new notifications from being stored. */
 	wqueue->defunct = true;
 
 	while (!hlist_empty(&wqueue->watches)) {
diff --git a/lib/crc32.c b/lib/crc32.c
index 2a68dfd3b96c..5649847d0a8d 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -194,13 +194,11 @@ u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 #else
 u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
 {
-	return crc32_le_generic(crc, p, len,
-			(const u32 (*)[256])crc32table_le, CRC32_POLY_LE);
+	return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE);
 }
 u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 {
-	return crc32_le_generic(crc, p, len,
-			(const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
+	return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE);
 }
 #endif
 EXPORT_SYMBOL(crc32_le);
@@ -208,6 +206,7 @@ EXPORT_SYMBOL(__crc32c_le);
 
 u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
 u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
+u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be);
 
 /*
  * This multiplies the polynomials x and y modulo the given modulus.
@@ -332,15 +331,14 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
 }
 
 #if CRC_BE_BITS == 1
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
 	return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
 }
 #else
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
-	return crc32_be_generic(crc, p, len,
-			(const u32 (*)[256])crc32table_be, CRC32_POLY_BE);
+	return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE);
 }
 #endif
 EXPORT_SYMBOL(crc32_be);
diff --git a/lib/crc32test.c b/lib/crc32test.c
index 61ddce2cff77..9b4af79412c4 100644
--- a/lib/crc32test.c
+++ b/lib/crc32test.c
@@ -675,7 +675,7 @@ static int __init crc32c_test(void)
 
 	/* pre-warm the cache */
 	for (i = 0; i < 100; i++) {
-		bytes += 2*test[i].length;
+		bytes += test[i].length;
 
 		crc ^= __crc32c_le(test[i].crc, test_buf +
 		    test[i].start, test[i].length);
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index e8e525650cf2..379a66d7f504 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -123,6 +123,9 @@ config CRYPTO_LIB_CHACHA20POLY1305
 config CRYPTO_LIB_SHA256
 	tristate
 
+config CRYPTO_LIB_SM3
+	tristate
+
 config CRYPTO_LIB_SM4
 	tristate
 
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index ed43a41f2dcc..6c872d05d1e6 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -37,6 +37,9 @@ libpoly1305-y					+= poly1305.o
 obj-$(CONFIG_CRYPTO_LIB_SHA256)			+= libsha256.o
 libsha256-y					:= sha256.o
 
+obj-$(CONFIG_CRYPTO_LIB_SM3)			+= libsm3.o
+libsm3-y					:= sm3.o
+
 obj-$(CONFIG_CRYPTO_LIB_SM4)			+= libsm4.o
 libsm4-y					:= sm4.o
 
diff --git a/lib/crypto/sm3.c b/lib/crypto/sm3.c
new file mode 100644
index 000000000000..d473e358a873
--- /dev/null
+++ b/lib/crypto/sm3.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * SM3 secure hash, as specified by OSCCA GM/T 0004-2012 SM3 and described
+ * at https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
+ *
+ * Copyright (C) 2017 ARM Limited or its affiliates.
+ * Copyright (C) 2017 Gilad Ben-Yossef <[email protected]>
+ * Copyright (C) 2021 Tianjia Zhang <[email protected]>
+ */
+
+#include <linux/module.h>
+#include <asm/unaligned.h>
+#include <crypto/sm3.h>
+
+static const u32 ____cacheline_aligned K[64] = {
+	0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb,
+	0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc,
+	0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce,
+	0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6,
+	0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c,
+	0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce,
+	0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec,
+	0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5,
+	0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53,
+	0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d,
+	0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4,
+	0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43,
+	0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c,
+	0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce,
+	0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec,
+	0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5
+};
+
+/*
+ * Transform the message X which consists of 16 32-bit-words. See
+ * GM/T 004-2012 for details.
+ */
+#define R(i, a, b, c, d, e, f, g, h, t, w1, w2)			\
+	do {							\
+		ss1 = rol32((rol32((a), 12) + (e) + (t)), 7);	\
+		ss2 = ss1 ^ rol32((a), 12);			\
+		d += FF ## i(a, b, c) + ss2 + ((w1) ^ (w2));	\
+		h += GG ## i(e, f, g) + ss1 + (w1);		\
+		b = rol32((b), 9);				\
+		f = rol32((f), 19);				\
+		h = P0((h));					\
+	} while (0)
+
+#define R1(a, b, c, d, e, f, g, h, t, w1, w2) \
+	R(1, a, b, c, d, e, f, g, h, t, w1, w2)
+#define R2(a, b, c, d, e, f, g, h, t, w1, w2) \
+	R(2, a, b, c, d, e, f, g, h, t, w1, w2)
+
+#define FF1(x, y, z)  (x ^ y ^ z)
+#define FF2(x, y, z)  ((x & y) | (x & z) | (y & z))
+
+#define GG1(x, y, z)  FF1(x, y, z)
+#define GG2(x, y, z)  ((x & y) | (~x & z))
+
+/* Message expansion */
+#define P0(x) ((x) ^ rol32((x), 9) ^ rol32((x), 17))
+#define P1(x) ((x) ^ rol32((x), 15) ^ rol32((x), 23))
+#define I(i)  (W[i] = get_unaligned_be32(data + i * 4))
+#define W1(i) (W[i & 0x0f])
+#define W2(i) (W[i & 0x0f] =				\
+		P1(W[i & 0x0f]				\
+			^ W[(i-9) & 0x0f]		\
+			^ rol32(W[(i-3) & 0x0f], 15))	\
+		^ rol32(W[(i-13) & 0x0f], 7)		\
+		^ W[(i-6) & 0x0f])
+
+static void sm3_transform(struct sm3_state *sctx, u8 const *data, u32 W[16])
+{
+	u32 a, b, c, d, e, f, g, h, ss1, ss2;
+
+	a = sctx->state[0];
+	b = sctx->state[1];
+	c = sctx->state[2];
+	d = sctx->state[3];
+	e = sctx->state[4];
+	f = sctx->state[5];
+	g = sctx->state[6];
+	h = sctx->state[7];
+
+	R1(a, b, c, d, e, f, g, h, K[0], I(0), I(4));
+	R1(d, a, b, c, h, e, f, g, K[1], I(1), I(5));
+	R1(c, d, a, b, g, h, e, f, K[2], I(2), I(6));
+	R1(b, c, d, a, f, g, h, e, K[3], I(3), I(7));
+	R1(a, b, c, d, e, f, g, h, K[4], W1(4), I(8));
+	R1(d, a, b, c, h, e, f, g, K[5], W1(5), I(9));
+	R1(c, d, a, b, g, h, e, f, K[6], W1(6), I(10));
+	R1(b, c, d, a, f, g, h, e, K[7], W1(7), I(11));
+	R1(a, b, c, d, e, f, g, h, K[8], W1(8), I(12));
+	R1(d, a, b, c, h, e, f, g, K[9], W1(9), I(13));
+	R1(c, d, a, b, g, h, e, f, K[10], W1(10), I(14));
+	R1(b, c, d, a, f, g, h, e, K[11], W1(11), I(15));
+	R1(a, b, c, d, e, f, g, h, K[12], W1(12), W2(16));
+	R1(d, a, b, c, h, e, f, g, K[13], W1(13), W2(17));
+	R1(c, d, a, b, g, h, e, f, K[14], W1(14), W2(18));
+	R1(b, c, d, a, f, g, h, e, K[15], W1(15), W2(19));
+
+	R2(a, b, c, d, e, f, g, h, K[16], W1(16), W2(20));
+	R2(d, a, b, c, h, e, f, g, K[17], W1(17), W2(21));
+	R2(c, d, a, b, g, h, e, f, K[18], W1(18), W2(22));
+	R2(b, c, d, a, f, g, h, e, K[19], W1(19), W2(23));
+	R2(a, b, c, d, e, f, g, h, K[20], W1(20), W2(24));
+	R2(d, a, b, c, h, e, f, g, K[21], W1(21), W2(25));
+	R2(c, d, a, b, g, h, e, f, K[22], W1(22), W2(26));
+	R2(b, c, d, a, f, g, h, e, K[23], W1(23), W2(27));
+	R2(a, b, c, d, e, f, g, h, K[24], W1(24), W2(28));
+	R2(d, a, b, c, h, e, f, g, K[25], W1(25), W2(29));
+	R2(c, d, a, b, g, h, e, f, K[26], W1(26), W2(30));
+	R2(b, c, d, a, f, g, h, e, K[27], W1(27), W2(31));
+	R2(a, b, c, d, e, f, g, h, K[28], W1(28), W2(32));
+	R2(d, a, b, c, h, e, f, g, K[29], W1(29), W2(33));
+	R2(c, d, a, b, g, h, e, f, K[30], W1(30), W2(34));
+	R2(b, c, d, a, f, g, h, e, K[31], W1(31), W2(35));
+
+	R2(a, b, c, d, e, f, g, h, K[32], W1(32), W2(36));
+	R2(d, a, b, c, h, e, f, g, K[33], W1(33), W2(37));
+	R2(c, d, a, b, g, h, e, f, K[34], W1(34), W2(38));
+	R2(b, c, d, a, f, g, h, e, K[35], W1(35), W2(39));
+	R2(a, b, c, d, e, f, g, h, K[36], W1(36), W2(40));
+	R2(d, a, b, c, h, e, f, g, K[37], W1(37), W2(41));
+	R2(c, d, a, b, g, h, e, f, K[38], W1(38), W2(42));
+	R2(b, c, d, a, f, g, h, e, K[39], W1(39), W2(43));
+	R2(a, b, c, d, e, f, g, h, K[40], W1(40), W2(44));
+	R2(d, a, b, c, h, e, f, g, K[41], W1(41), W2(45));
+	R2(c, d, a, b, g, h, e, f, K[42], W1(42), W2(46));
+	R2(b, c, d, a, f, g, h, e, K[43], W1(43), W2(47));
+	R2(a, b, c, d, e, f, g, h, K[44], W1(44), W2(48));
+	R2(d, a, b, c, h, e, f, g, K[45], W1(45), W2(49));
+	R2(c, d, a, b, g, h, e, f, K[46], W1(46), W2(50));
+	R2(b, c, d, a, f, g, h, e, K[47], W1(47), W2(51));
+
+	R2(a, b, c, d, e, f, g, h, K[48], W1(48), W2(52));
+	R2(d, a, b, c, h, e, f, g, K[49], W1(49), W2(53));
+	R2(c, d, a, b, g, h, e, f, K[50], W1(50), W2(54));
+	R2(b, c, d, a, f, g, h, e, K[51], W1(51), W2(55));
+	R2(a, b, c, d, e, f, g, h, K[52], W1(52), W2(56));
+	R2(d, a, b, c, h, e, f, g, K[53], W1(53), W2(57));
+	R2(c, d, a, b, g, h, e, f, K[54], W1(54), W2(58));
+	R2(b, c, d, a, f, g, h, e, K[55], W1(55), W2(59));
+	R2(a, b, c, d, e, f, g, h, K[56], W1(56), W2(60));
+	R2(d, a, b, c, h, e, f, g, K[57], W1(57), W2(61));
+	R2(c, d, a, b, g, h, e, f, K[58], W1(58), W2(62));
+	R2(b, c, d, a, f, g, h, e, K[59], W1(59), W2(63));
+	R2(a, b, c, d, e, f, g, h, K[60], W1(60), W2(64));
+	R2(d, a, b, c, h, e, f, g, K[61], W1(61), W2(65));
+	R2(c, d, a, b, g, h, e, f, K[62], W1(62), W2(66));
+	R2(b, c, d, a, f, g, h, e, K[63], W1(63), W2(67));
+
+	sctx->state[0] ^= a;
+	sctx->state[1] ^= b;
+	sctx->state[2] ^= c;
+	sctx->state[3] ^= d;
+	sctx->state[4] ^= e;
+	sctx->state[5] ^= f;
+	sctx->state[6] ^= g;
+	sctx->state[7] ^= h;
+}
+#undef R
+#undef R1
+#undef R2
+#undef I
+#undef W1
+#undef W2
+
+static inline void sm3_block(struct sm3_state *sctx,
+		u8 const *data, int blocks, u32 W[16])
+{
+	while (blocks--) {
+		sm3_transform(sctx, data, W);
+		data += SM3_BLOCK_SIZE;
+	}
+}
+
+void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len)
+{
+	unsigned int partial = sctx->count % SM3_BLOCK_SIZE;
+	u32 W[16];
+
+	sctx->count += len;
+
+	if ((partial + len) >= SM3_BLOCK_SIZE) {
+		int blocks;
+
+		if (partial) {
+			int p = SM3_BLOCK_SIZE - partial;
+
+			memcpy(sctx->buffer + partial, data, p);
+			data += p;
+			len -= p;
+
+			sm3_block(sctx, sctx->buffer, 1, W);
+		}
+
+		blocks = len / SM3_BLOCK_SIZE;
+		len %= SM3_BLOCK_SIZE;
+
+		if (blocks) {
+			sm3_block(sctx, data, blocks, W);
+			data += blocks * SM3_BLOCK_SIZE;
+		}
+
+		memzero_explicit(W, sizeof(W));
+
+		partial = 0;
+	}
+	if (len)
+		memcpy(sctx->buffer + partial, data, len);
+}
+EXPORT_SYMBOL_GPL(sm3_update);
+
+void sm3_final(struct sm3_state *sctx, u8 *out)
+{
+	const int bit_offset = SM3_BLOCK_SIZE - sizeof(u64);
+	__be64 *bits = (__be64 *)(sctx->buffer + bit_offset);
+	__be32 *digest = (__be32 *)out;
+	unsigned int partial = sctx->count % SM3_BLOCK_SIZE;
+	u32 W[16];
+	int i;
+
+	sctx->buffer[partial++] = 0x80;
+	if (partial > bit_offset) {
+		memset(sctx->buffer + partial, 0, SM3_BLOCK_SIZE - partial);
+		partial = 0;
+
+		sm3_block(sctx, sctx->buffer, 1, W);
+	}
+
+	memset(sctx->buffer + partial, 0, bit_offset - partial);
+	*bits = cpu_to_be64(sctx->count << 3);
+	sm3_block(sctx, sctx->buffer, 1, W);
+
+	for (i = 0; i < 8; i++)
+		put_unaligned_be32(sctx->state[i], digest++);
+
+	/* Zeroize sensitive information. */
+	memzero_explicit(W, sizeof(W));
+	memzero_explicit(sctx, sizeof(*sctx));
+}
+EXPORT_SYMBOL_GPL(sm3_final);
+
+MODULE_DESCRIPTION("Generic SM3 library");
+MODULE_LICENSE("GPL v2");
diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c
index 142b680835df..070ba784c9f1 100644
--- a/lib/mpi/mpi-bit.c
+++ b/lib/mpi/mpi-bit.c
@@ -242,6 +242,7 @@ void mpi_rshift(MPI x, MPI a, unsigned int n)
 	}
 	MPN_NORMALIZE(x->d, x->nlimbs);
 }
+EXPORT_SYMBOL_GPL(mpi_rshift);
 
 /****************
  * Shift A by COUNT limbs to the left
diff --git a/lib/random32.c b/lib/random32.c
index a57a0e18819d..976632003ec6 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -41,7 +41,6 @@
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
-#include <trace/events/random.h>
 
 /**
  *	prandom_u32_state - seeded pseudo-random number generator.
@@ -387,7 +386,6 @@ u32 prandom_u32(void)
 	struct siprand_state *state = get_cpu_ptr(&net_rand_state);
 	u32 res = siprand_u32(state);
 
-	trace_prandom_u32(res);
 	put_cpu_ptr(&net_rand_state);
 	return res;
 }
@@ -553,9 +551,11 @@ static void prandom_reseed(struct timer_list *unused)
  * To avoid worrying about whether it's safe to delay that interrupt
  * long enough to seed all CPUs, just schedule an immediate timer event.
  */
-static void prandom_timer_start(struct random_ready_callback *unused)
+static int prandom_timer_start(struct notifier_block *nb,
+			       unsigned long action, void *data)
 {
 	mod_timer(&seed_timer, jiffies);
+	return 0;
 }
 
 #ifdef CONFIG_RANDOM32_SELFTEST
@@ -619,13 +619,13 @@ core_initcall(prandom32_state_selftest);
  */
 static int __init prandom_init_late(void)
 {
-	static struct random_ready_callback random_ready = {
-		.func = prandom_timer_start
+	static struct notifier_block random_ready = {
+		.notifier_call = prandom_timer_start
 	};
-	int ret = add_random_ready_callback(&random_ready);
+	int ret = register_random_ready_notifier(&random_ready);
 
 	if (ret == -EALREADY) {
-		prandom_timer_start(&random_ready);
+		prandom_timer_start(&random_ready, 0, NULL);
 		ret = 0;
 	}
 	return ret;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 3b8129dd374c..36574a806a81 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -757,14 +757,16 @@ static void enable_ptr_key_workfn(struct work_struct *work)
 
 static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
 
-static void fill_random_ptr_key(struct random_ready_callback *unused)
+static int fill_random_ptr_key(struct notifier_block *nb,
+			       unsigned long action, void *data)
 {
 	/* This may be in an interrupt handler. */
 	queue_work(system_unbound_wq, &enable_ptr_key_work);
+	return 0;
 }
 
-static struct random_ready_callback random_ready = {
-	.func = fill_random_ptr_key
+static struct notifier_block random_ready = {
+	.notifier_call = fill_random_ptr_key
 };
 
 static int __init initialize_ptr_random(void)
@@ -778,7 +780,7 @@ static int __init initialize_ptr_random(void)
 		return 0;
 	}
 
-	ret = add_random_ready_callback(&random_ready);
+	ret = register_random_ready_notifier(&random_ready);
 	if (!ret) {
 		return 0;
 	} else if (ret == -EALREADY) {
diff --git a/mm/gup.c b/mm/gup.c
index a9d4d724aef7..7bc1ba9ce440 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1729,11 +1729,11 @@ EXPORT_SYMBOL(fault_in_writeable);
  * @uaddr: start of address range
  * @size: length of address range
  *
- * Faults in an address range using get_user_pages, i.e., without triggering
- * hardware page faults.  This is primarily useful when we already know that
- * some or all of the pages in the address range aren't in memory.
+ * Faults in an address range for writing.  This is primarily useful when we
+ * already know that some or all of the pages in the address range aren't in
+ * memory.
  *
- * Other than fault_in_writeable(), this function is non-destructive.
+ * Unlike fault_in_writeable(), this function is non-destructive.
  *
  * Note that we don't pin or otherwise hold the pages referenced that we fault
  * in.  There's no guarantee that they'll stay in memory for any duration of
@@ -1744,46 +1744,27 @@ EXPORT_SYMBOL(fault_in_writeable);
  */
 size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
 {
-	unsigned long start = (unsigned long)untagged_addr(uaddr);
-	unsigned long end, nstart, nend;
+	unsigned long start = (unsigned long)uaddr, end;
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma = NULL;
-	int locked = 0;
+	bool unlocked = false;
 
-	nstart = start & PAGE_MASK;
+	if (unlikely(size == 0))
+		return 0;
 	end = PAGE_ALIGN(start + size);
-	if (end < nstart)
+	if (end < start)
 		end = 0;
-	for (; nstart != end; nstart = nend) {
-		unsigned long nr_pages;
-		long ret;
 
-		if (!locked) {
-			locked = 1;
-			mmap_read_lock(mm);
-			vma = find_vma(mm, nstart);
-		} else if (nstart >= vma->vm_end)
-			vma = vma->vm_next;
-		if (!vma || vma->vm_start >= end)
-			break;
-		nend = end ? min(end, vma->vm_end) : vma->vm_end;
-		if (vma->vm_flags & (VM_IO | VM_PFNMAP))
-			continue;
-		if (nstart < vma->vm_start)
-			nstart = vma->vm_start;
-		nr_pages = (nend - nstart) / PAGE_SIZE;
-		ret = __get_user_pages_locked(mm, nstart, nr_pages,
-					      NULL, NULL, &locked,
-					      FOLL_TOUCH | FOLL_WRITE);
-		if (ret <= 0)
+	mmap_read_lock(mm);
+	do {
+		if (fixup_user_fault(mm, start, FAULT_FLAG_WRITE, &unlocked))
 			break;
-		nend = nstart + ret * PAGE_SIZE;
-	}
-	if (locked)
-		mmap_read_unlock(mm);
-	if (nstart == end)
-		return 0;
-	return size - min_t(size_t, nstart - start, size);
+		start = (start + PAGE_SIZE) & PAGE_MASK;
+	} while (start != end);
+	mmap_read_unlock(mm);
+
+	if (size > (unsigned long)uaddr - start)
+		return size - ((unsigned long)uaddr - start);
+	return 0;
 }
 EXPORT_SYMBOL(fault_in_safe_writeable);
 
diff --git a/mm/init-mm.c b/mm/init-mm.c
index b4a6f38fb51d..fbe7844d0912 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -10,6 +10,7 @@
 
 #include <linux/atomic.h>
 #include <linux/user_namespace.h>
+#include <linux/ioasid.h>
 #include <asm/mmu.h>
 
 #ifndef INIT_MM_CONTEXT
@@ -38,6 +39,9 @@ struct mm_struct init_mm = {
 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
 	.user_ns	= &init_user_ns,
 	.cpu_bitmap	= CPU_BITS_NONE,
+#ifdef CONFIG_IOMMU_SVA
+	.pasid		= INVALID_IOASID,
+#endif
 	INIT_MM_CONTEXT(init_mm)
 };
 
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 8d4104242100..ee67164531c0 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -478,7 +478,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * __read_swap_cache_async(), which has set SWAP_HAS_CACHE
 		 * in swap_map, but not yet added its page to swap cache.
 		 */
-		cond_resched();
+		schedule_timeout_uninterruptible(1);
 	}
 
 	/*
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index eb9fb55280ef..01f8067994d6 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -281,9 +281,9 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
 				ref = priv->rings[i].intf->ref[j];
 				gnttab_end_foreign_access(ref, 0, 0);
 			}
-			free_pages((unsigned long)priv->rings[i].data.in,
-				   priv->rings[i].intf->ring_order -
-				   (PAGE_SHIFT - XEN_PAGE_SHIFT));
+			free_pages_exact(priv->rings[i].data.in,
+				   1UL << (priv->rings[i].intf->ring_order +
+					   XEN_PAGE_SHIFT));
 		}
 		gnttab_end_foreign_access(priv->rings[i].ref, 0, 0);
 		free_page((unsigned long)priv->rings[i].intf);
@@ -322,8 +322,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
 	if (ret < 0)
 		goto out;
 	ring->ref = ret;
-	bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-			order - (PAGE_SHIFT - XEN_PAGE_SHIFT));
+	bytes = alloc_pages_exact(1UL << (order + XEN_PAGE_SHIFT),
+				  GFP_KERNEL | __GFP_ZERO);
 	if (!bytes) {
 		ret = -ENOMEM;
 		goto out;
@@ -354,9 +354,7 @@ out:
 	if (bytes) {
 		for (i--; i >= 0; i--)
 			gnttab_end_foreign_access(ring->intf->ref[i], 0, 0);
-		free_pages((unsigned long)bytes,
-			   ring->intf->ring_order -
-			   (PAGE_SHIFT - XEN_PAGE_SHIFT));
+		free_pages_exact(bytes, 1UL << (order + XEN_PAGE_SHIFT));
 	}
 	gnttab_end_foreign_access(ring->ref, 0, 0);
 	free_page((unsigned long)ring->intf);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index d53cbb4e2503..6bd097180772 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -87,6 +87,13 @@ again:
 	ax25_for_each(s, &ax25_list) {
 		if (s->ax25_dev == ax25_dev) {
 			sk = s->sk;
+			if (!sk) {
+				spin_unlock_bh(&ax25_list_lock);
+				s->ax25_dev = NULL;
+				ax25_disconnect(s, ENETUNREACH);
+				spin_lock_bh(&ax25_list_lock);
+				goto again;
+			}
 			sock_hold(sk);
 			spin_unlock_bh(&ax25_list_lock);
 			lock_sock(sk);
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 9ba2a1a7d481..ab9aa700b6b3 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -276,40 +276,37 @@ EXPORT_SYMBOL(__hci_cmd_sync_status);
 static void hci_cmd_sync_work(struct work_struct *work)
 {
 	struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_work);
-	struct hci_cmd_sync_work_entry *entry;
-	hci_cmd_sync_work_func_t func;
-	hci_cmd_sync_work_destroy_t destroy;
-	void *data;
 
 	bt_dev_dbg(hdev, "");
 
-	mutex_lock(&hdev->cmd_sync_work_lock);
-	entry = list_first_entry(&hdev->cmd_sync_work_list,
-				 struct hci_cmd_sync_work_entry, list);
-	if (entry) {
-		list_del(&entry->list);
-		func = entry->func;
-		data = entry->data;
-		destroy = entry->destroy;
-		kfree(entry);
-	} else {
-		func = NULL;
-		data = NULL;
-		destroy = NULL;
-	}
-	mutex_unlock(&hdev->cmd_sync_work_lock);
+	/* Dequeue all entries and run them */
+	while (1) {
+		struct hci_cmd_sync_work_entry *entry;
 
-	if (func) {
-		int err;
+		mutex_lock(&hdev->cmd_sync_work_lock);
+		entry = list_first_entry_or_null(&hdev->cmd_sync_work_list,
+						 struct hci_cmd_sync_work_entry,
+						 list);
+		if (entry)
+			list_del(&entry->list);
+		mutex_unlock(&hdev->cmd_sync_work_lock);
+
+		if (!entry)
+			break;
 
-		hci_req_sync_lock(hdev);
+		bt_dev_dbg(hdev, "entry %p", entry);
 
-		err = func(hdev, data);
+		if (entry->func) {
+			int err;
 
-		if (destroy)
-			destroy(hdev, data, err);
+			hci_req_sync_lock(hdev);
+			err = entry->func(hdev, entry->data);
+			if (entry->destroy)
+				entry->destroy(hdev, entry->data, err);
+			hci_req_sync_unlock(hdev);
+		}
 
-		hci_req_sync_unlock(hdev);
+		kfree(entry);
 	}
 }
 
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 533cf60673a3..230a7a8196c0 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4541,9 +4541,9 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
 		}
 	}
 
-done:
 	hci_dev_unlock(hdev);
 
+done:
 	if (status == MGMT_STATUS_SUCCESS)
 		device_flags_changed(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
 				     supported_flags, current_flags);
diff --git a/net/core/gro.c b/net/core/gro.c
index a11b286d1495..b7d2b0dc59a2 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -93,6 +93,31 @@ void dev_remove_offload(struct packet_offload *po)
 EXPORT_SYMBOL(dev_remove_offload);
 
 /**
+ *	skb_eth_gso_segment - segmentation handler for ethernet protocols.
+ *	@skb: buffer to segment
+ *	@features: features for the output path (see dev->features)
+ *	@type: Ethernet Protocol ID
+ */
+struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features, __be16 type)
+{
+	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+	struct packet_offload *ptype;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ptype, &offload_base, list) {
+		if (ptype->type == type && ptype->callbacks.gso_segment) {
+			segs = ptype->callbacks.gso_segment(skb, features);
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return segs;
+}
+EXPORT_SYMBOL(skb_eth_gso_segment);
+
+/**
  *	skb_mac_gso_segment - mac layer segmentation handler.
  *	@skb: buffer to segment
  *	@features: features for the output path (see dev->features)
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 7aba35504986..73fae16264e1 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -357,7 +357,8 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 	if (IS_ERR(xdp_alloc))
 		return PTR_ERR(xdp_alloc);
 
-	trace_mem_connect(xdp_alloc, xdp_rxq);
+	if (trace_mem_connect_enabled() && xdp_alloc)
+		trace_mem_connect(xdp_alloc, xdp_rxq);
 	return 0;
 }
 
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index b4e67758e104..88e2808019b4 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -1058,7 +1058,7 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
 static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
 {
 	struct dsa_port *dp;
-	int err;
+	int err = 0;
 
 	rtnl_lock();
 
@@ -1066,13 +1066,13 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
 		if (dsa_port_is_cpu(dp)) {
 			err = dsa_master_setup(dp->master, dp);
 			if (err)
-				return err;
+				break;
 		}
 	}
 
 	rtnl_unlock();
 
-	return 0;
+	return err;
 }
 
 static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
@@ -1436,6 +1436,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
 		const char *user_protocol;
 
 		master = of_find_net_device_by_node(ethernet);
+		of_node_put(ethernet);
 		if (!master)
 			return -EPROBE_DEFER;
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index e1b1d080e908..70e6c87fbe3d 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -446,6 +446,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	struct page *page;
 	struct sk_buff *trailer;
 	int tailen = esp->tailen;
+	unsigned int allocsz;
 
 	/* this is non-NULL only with TCP/UDP Encapsulation */
 	if (x->encap) {
@@ -455,6 +456,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 			return err;
 	}
 
+	allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
+	if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+		goto cow;
+
 	if (!skb_cloned(skb)) {
 		if (tailen <= skb_tailroom(skb)) {
 			nfrags = 1;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index d87f02a6e934..935026f4c807 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -110,8 +110,7 @@ static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x,
 						struct sk_buff *skb,
 						netdev_features_t features)
 {
-	__skb_push(skb, skb->mac_len);
-	return skb_mac_gso_segment(skb, features);
+	return skb_eth_gso_segment(skb, features, htons(ETH_P_IP));
 }
 
 static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
@@ -160,6 +159,9 @@ static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x,
 			skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
 	}
 
+	if (proto == IPPROTO_IPV6)
+		skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4;
+
 	__skb_pull(skb, skb_transport_offset(skb));
 	ops = rcu_dereference(inet_offloads[proto]);
 	if (likely(ops && ops->callbacks.gso_segment))
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 7591160edce1..55d604c9b3b3 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -482,6 +482,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 	struct page *page;
 	struct sk_buff *trailer;
 	int tailen = esp->tailen;
+	unsigned int allocsz;
 
 	if (x->encap) {
 		int err = esp6_output_encap(x, skb, esp);
@@ -490,6 +491,10 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 			return err;
 	}
 
+	allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
+	if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+		goto cow;
+
 	if (!skb_cloned(skb)) {
 		if (tailen <= skb_tailroom(skb)) {
 			nfrags = 1;
@@ -807,8 +812,7 @@ int esp6_input_done2(struct sk_buff *skb, int err)
 		struct tcphdr *th;
 
 		offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
-
-		if (offset < 0) {
+		if (offset == -1) {
 			err = -EINVAL;
 			goto out;
 		}
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index ba5e81cd569c..3a293838a91d 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -145,8 +145,7 @@ static struct sk_buff *xfrm6_tunnel_gso_segment(struct xfrm_state *x,
 						struct sk_buff *skb,
 						netdev_features_t features)
 {
-	__skb_push(skb, skb->mac_len);
-	return skb_mac_gso_segment(skb, features);
+	return skb_eth_gso_segment(skb, features, htons(ETH_P_IPV6));
 }
 
 static struct sk_buff *xfrm6_transport_gso_segment(struct xfrm_state *x,
@@ -199,6 +198,9 @@ static struct sk_buff *xfrm6_beet_gso_segment(struct xfrm_state *x,
 			ipv6_skip_exthdr(skb, 0, &proto, &frag);
 	}
 
+	if (proto == IPPROTO_IPIP)
+		skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6;
+
 	__skb_pull(skb, skb_transport_offset(skb));
 	ops = rcu_dereference(inet6_offloads[proto]);
 	if (likely(ops && ops->callbacks.gso_segment))
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4788f6b37053..194832663d85 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1476,8 +1476,8 @@ static int __ip6_append_data(struct sock *sk,
 		      sizeof(struct frag_hdr) : 0) +
 		     rt->rt6i_nfheader_len;
 
-	if (mtu < fragheaderlen ||
-	    ((mtu - fragheaderlen) & ~7) + fragheaderlen < sizeof(struct frag_hdr))
+	if (mtu <= fragheaderlen ||
+	    ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
 		goto emsgsize;
 
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index d0d280077721..ad07904642ca 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -45,6 +45,19 @@ static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buf
 	return xfrm_output(sk, skb);
 }
 
+static int xfrm6_noneed_fragment(struct sk_buff *skb)
+{
+	struct frag_hdr *fh;
+	u8 prevhdr = ipv6_hdr(skb)->nexthdr;
+
+	if (prevhdr != NEXTHDR_FRAGMENT)
+		return 0;
+	fh = (struct frag_hdr *)(skb->data + sizeof(struct ipv6hdr));
+	if (fh->nexthdr == NEXTHDR_ESP || fh->nexthdr == NEXTHDR_AUTH)
+		return 1;
+	return 0;
+}
+
 static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
@@ -73,6 +86,9 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		xfrm6_local_rxpmtu(skb, mtu);
 		kfree_skb(skb);
 		return -EMSGSIZE;
+	} else if (toobig && xfrm6_noneed_fragment(skb)) {
+		skb->ignore_df = 1;
+		goto skip_frag;
 	} else if (!skb->ignore_df && toobig && skb->sk) {
 		xfrm_local_error(skb, mtu);
 		kfree_skb(skb);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9bf52a09b5ff..fd51db3be91c 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1699,7 +1699,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
 
 	xfrm_probe_algs();
 
-	supp_skb = compose_sadb_supported(hdr, GFP_KERNEL);
+	supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO);
 	if (!supp_skb) {
 		if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
 			pfk->registered &= ~(1<<hdr->sadb_msg_satype);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d6aa5b47031e..bf1e17c678f1 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1748,9 +1748,6 @@ resolve_normal_ct(struct nf_conn *tmpl,
 			return 0;
 		if (IS_ERR(h))
 			return PTR_ERR(h);
-
-		ct = nf_ct_tuplehash_to_ctrack(h);
-		ct->local_origin = state->hook == NF_INET_LOCAL_OUT;
 	}
 	ct = nf_ct_tuplehash_to_ctrack(h);
 
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 2d06a66899b2..ffcf6529afc3 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -494,38 +494,6 @@ another_round:
 	goto another_round;
 }
 
-static bool tuple_force_port_remap(const struct nf_conntrack_tuple *tuple)
-{
-	u16 sp, dp;
-
-	switch (tuple->dst.protonum) {
-	case IPPROTO_TCP:
-		sp = ntohs(tuple->src.u.tcp.port);
-		dp = ntohs(tuple->dst.u.tcp.port);
-		break;
-	case IPPROTO_UDP:
-	case IPPROTO_UDPLITE:
-		sp = ntohs(tuple->src.u.udp.port);
-		dp = ntohs(tuple->dst.u.udp.port);
-		break;
-	default:
-		return false;
-	}
-
-	/* IANA: System port range: 1-1023,
-	 *         user port range: 1024-49151,
-	 *      private port range: 49152-65535.
-	 *
-	 * Linux default ephemeral port range is 32768-60999.
-	 *
-	 * Enforce port remapping if sport is significantly lower
-	 * than dport to prevent NAT port shadowing, i.e.
-	 * accidental match of 'new' inbound connection vs.
-	 * existing outbound one.
-	 */
-	return sp < 16384 && dp >= 32768;
-}
-
 /* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
  * we change the source to map into the range. For NF_INET_PRE_ROUTING
  * and NF_INET_LOCAL_OUT, we change the destination to map into the
@@ -539,17 +507,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 struct nf_conn *ct,
 		 enum nf_nat_manip_type maniptype)
 {
-	bool random_port = range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL;
 	const struct nf_conntrack_zone *zone;
 	struct net *net = nf_ct_net(ct);
 
 	zone = nf_ct_zone(ct);
 
-	if (maniptype == NF_NAT_MANIP_SRC &&
-	    !random_port &&
-	    !ct->local_origin)
-		random_port = tuple_force_port_remap(orig_tuple);
-
 	/* 1) If this srcip/proto/src-proto-part is currently mapped,
 	 * and that same mapping gives a unique tuple within the given
 	 * range, use that.
@@ -558,7 +520,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	 * So far, we don't do local source mappings, so multiple
 	 * manips not an issue.
 	 */
-	if (maniptype == NF_NAT_MANIP_SRC && !random_port) {
+	if (maniptype == NF_NAT_MANIP_SRC &&
+	    !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
 		/* try the original tuple first */
 		if (in_range(orig_tuple, range)) {
 			if (!nf_nat_used_tuple(orig_tuple, ct)) {
@@ -582,7 +545,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	 */
 
 	/* Only bother mapping if it's not already in range and unique */
-	if (!random_port) {
+	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
 		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
 			    l4proto_in_range(tuple, maniptype,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c86748b3873b..d71a33ae39b3 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8260,6 +8260,12 @@ void nf_tables_trans_destroy_flush_work(void)
 }
 EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
 
+static bool nft_expr_reduce(struct nft_regs_track *track,
+			    const struct nft_expr *expr)
+{
+	return false;
+}
+
 static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
 {
 	const struct nft_expr *expr, *last;
@@ -8307,8 +8313,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
 		nft_rule_for_each_expr(expr, last, rule) {
 			track.cur = expr;
 
-			if (expr->ops->reduce &&
-			    expr->ops->reduce(&track, expr)) {
+			if (nft_expr_reduce(&track, expr)) {
 				expr = track.cur;
 				continue;
 			}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ab87f22cc7ec..a7273af2d900 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2317,8 +2317,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 					copy_skb = skb_get(skb);
 					skb_head = skb->data;
 				}
-				if (copy_skb)
+				if (copy_skb) {
+					memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
+					       sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
 					skb_set_owner_r(copy_skb, sk);
+				}
 			}
 			snaplen = po->rx_ring.frame_size - macoff;
 			if ((int)snaplen < 0) {
@@ -3462,6 +3465,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name) {
+		const size_t max_len = min(sizeof(skb->cb),
+					   sizeof(struct sockaddr_storage));
 		int copy_len;
 
 		/* If the address length field is there to be filled
@@ -3484,6 +3489,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 				msg->msg_namelen = sizeof(struct sockaddr_ll);
 			}
 		}
+		if (WARN_ON_ONCE(copy_len > max_len)) {
+			copy_len = max_len;
+			msg->msg_namelen = copy_len;
+		}
 		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
 	}
 
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 034e2c74497d..d9c6d8f30f09 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -61,10 +61,6 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
 		r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
 		r->idiag_retrans = asoc->rtx_data_chunks;
 		r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
-	} else {
-		r->idiag_timer = 0;
-		r->idiag_retrans = 0;
-		r->idiag_expires = 0;
 	}
 }
 
@@ -144,13 +140,14 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
 	r = nlmsg_data(nlh);
 	BUG_ON(!sk_fullsock(sk));
 
+	r->idiag_timer = 0;
+	r->idiag_retrans = 0;
+	r->idiag_expires = 0;
 	if (asoc) {
 		inet_diag_msg_sctpasoc_fill(r, sk, asoc);
 	} else {
 		inet_diag_msg_common_fill(r, sk);
 		r->idiag_state = sk->sk_state;
-		r->idiag_timer = 0;
-		r->idiag_retrans = 0;
 	}
 
 	if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 473a790f5894..a2f9c9640716 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -352,16 +352,18 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 		goto rejected;
 	}
 
-	test_and_set_bit_lock(0, &b->up);
-	rcu_assign_pointer(tn->bearer_list[bearer_id], b);
-	if (skb)
-		tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
-
+	/* Create monitoring data before accepting activate messages */
 	if (tipc_mon_create(net, bearer_id)) {
 		bearer_disable(net, b);
+		kfree_skb(skb);
 		return -ENOMEM;
 	}
 
+	test_and_set_bit_lock(0, &b->up);
+	rcu_assign_pointer(tn->bearer_list[bearer_id], b);
+	if (skb)
+		tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
+
 	pr_info("Enabled bearer <%s>, priority %u\n", name, prio);
 
 	return res;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1e14d7f8f28f..e260c0d557f5 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2286,6 +2286,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 		break;
 
 	case STATE_MSG:
+		/* Validate Gap ACK blocks, drop if invalid */
+		glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
+		if (glen > dlen)
+			break;
+
 		l->rcv_nxt_state = msg_seqno(hdr) + 1;
 
 		/* Update own tolerance if peer indicates a non-zero value */
@@ -2311,10 +2316,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 			break;
 		}
 
-		/* Receive Gap ACK blocks from peer if any */
-		glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
-		if(glen > dlen)
-			break;
 		tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr,
 			     &l->mon_state, l->bearer_id);
 
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 38baeb189d4e..f04abf662ec6 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk)
 }
 EXPORT_SYMBOL_GPL(vsock_remove_sock);
 
-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
+void vsock_for_each_connected_socket(struct vsock_transport *transport,
+				     void (*fn)(struct sock *sk))
 {
 	int i;
 
@@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
 	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
 		struct vsock_sock *vsk;
 		list_for_each_entry(vsk, &vsock_connected_table[i],
-				    connected_table)
+				    connected_table) {
+			if (vsk->transport != transport)
+				continue;
+
 			fn(sk_vsock(vsk));
+		}
 	}
 
 	spin_unlock_bh(&vsock_table_lock);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index fb3302fff627..5afc194a58bb 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -24,6 +24,7 @@
 static struct workqueue_struct *virtio_vsock_workqueue;
 static struct virtio_vsock __rcu *the_virtio_vsock;
 static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
+static struct virtio_transport virtio_transport; /* forward declaration */
 
 struct virtio_vsock {
 	struct virtio_device *vdev;
@@ -384,7 +385,8 @@ static void virtio_vsock_event_handle(struct virtio_vsock *vsock,
 	switch (le32_to_cpu(event->id)) {
 	case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET:
 		virtio_vsock_update_guest_cid(vsock);
-		vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+		vsock_for_each_connected_socket(&virtio_transport.transport,
+						virtio_vsock_reset_sock);
 		break;
 	}
 }
@@ -662,7 +664,8 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
 	synchronize_rcu();
 
 	/* Reset all connected sockets when the device disappear */
-	vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+	vsock_for_each_connected_socket(&virtio_transport.transport,
+					virtio_vsock_reset_sock);
 
 	/* Stop all work handlers to make sure no one is accessing the device,
 	 * so we can safely call virtio_reset_device().
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 7aef34e32bdf..b17dc9745188 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -75,6 +75,8 @@ static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
 
 static int PROTOCOL_OVERRIDE = -1;
 
+static struct vsock_transport vmci_transport; /* forward declaration */
+
 /* Helper function to convert from a VMCI error code to a VSock error code. */
 
 static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
@@ -882,7 +884,8 @@ static void vmci_transport_qp_resumed_cb(u32 sub_id,
 					 const struct vmci_event_data *e_data,
 					 void *client_data)
 {
-	vsock_for_each_connected_socket(vmci_transport_handle_detach);
+	vsock_for_each_connected_socket(&vmci_transport,
+					vmci_transport_handle_detach);
 }
 
 static void vmci_transport_recv_pkt_work(struct work_struct *work)
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 4e3c62d1ad9e..1e8b26eecb3f 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -304,7 +304,10 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 			if (mtu < IPV6_MIN_MTU)
 				mtu = IPV6_MIN_MTU;
 
-			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+			if (skb->len > 1280)
+				icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+			else
+				goto xmit;
 		} else {
 			if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
 				goto xmit;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index a4fb596e87af..72b2f173aac8 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -630,13 +630,8 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	xfrm_smark_init(attrs, &x->props.smark);
 
-	if (attrs[XFRMA_IF_ID]) {
+	if (attrs[XFRMA_IF_ID])
 		x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
-		if (!x->if_id) {
-			err = -EINVAL;
-			goto error;
-		}
-	}
 
 	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
 	if (err)
@@ -1432,13 +1427,8 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	mark = xfrm_mark_get(attrs, &m);
 
-	if (attrs[XFRMA_IF_ID]) {
+	if (attrs[XFRMA_IF_ID])
 		if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
-		if (!if_id) {
-			err = -EINVAL;
-			goto out_noput;
-		}
-	}
 
 	if (p->info.seq) {
 		x = xfrm_find_acq_byseq(net, mark, p->info.seq);
@@ -1751,13 +1741,8 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us
 
 	xfrm_mark_get(attrs, &xp->mark);
 
-	if (attrs[XFRMA_IF_ID]) {
+	if (attrs[XFRMA_IF_ID])
 		xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
-		if (!xp->if_id) {
-			err = -EINVAL;
-			goto error;
-		}
-	}
 
 	return xp;
  error:
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 3106b7536b89..9c084a2ba3b0 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -12,202 +12,46 @@ use strict;
 ## 								 ##
 ## #define enhancements by Armin Kuster <[email protected]>	 ##
 ## Copyright (c) 2000 MontaVista Software, Inc.			 ##
-## 								 ##
-## This software falls under the GNU General Public License.     ##
-## Please read the COPYING file for more information             ##
+#
+# Copyright (C) 2022 Tomasz Warniełło (POD)
 
-# 18/01/2001 - 	Cleanups
-# 		Functions prototyped as foo(void) same as foo()
-# 		Stop eval'ing where we don't need to.
-# -- [email protected]
+use Pod::Usage qw/pod2usage/;
 
-# 27/06/2001 -  Allowed whitespace after initial "/**" and
-#               allowed comments before function declarations.
-# -- Christian Kreibich <[email protected]>
+=head1 NAME
 
-# Still to do:
-# 	- add perldoc documentation
-# 	- Look more closely at some of the scarier bits :)
+kernel-doc - Print formatted kernel documentation to stdout
 
-# 26/05/2001 - 	Support for separate source and object trees.
-#		Return error code.
-# 		Keith Owens <[email protected]>
+=head1 SYNOPSIS
 
-# 23/09/2001 - Added support for typedefs, structs, enums and unions
-#              Support for Context section; can be terminated using empty line
-#              Small fixes (like spaces vs. \s in regex)
-# -- Tim Jansen <[email protected]>
+ kernel-doc [-h] [-v] [-Werror]
+   [ -man |
+     -rst [-sphinx-version VERSION] [-enable-lineno] |
+     -none
+   ]
+   [
+     -export |
+     -internal |
+     [-function NAME] ... |
+     [-nosymbol NAME] ...
+   ]
+   [-no-doc-sections]
+   [-export-file FILE] ...
+   FILE ...
 
-# 25/07/2012 - Added support for HTML5
-# -- Dan Luedtke <[email protected]>
+Run `kernel-doc -h` for details.
 
-sub usage {
-    my $message = <<"EOF";
-Usage: $0 [OPTION ...] FILE ...
+=head1 DESCRIPTION
 
 Read C language source or header FILEs, extract embedded documentation comments,
 and print formatted documentation to standard output.
 
-The documentation comments are identified by "/**" opening comment mark. See
-Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax.
-
-Output format selection (mutually exclusive):
-  -man			Output troff manual page format. This is the default.
-  -rst			Output reStructuredText format.
-  -none			Do not output documentation, only warnings.
-
-Output format selection modifier (affects only ReST output):
-
-  -sphinx-version	Use the ReST C domain dialect compatible with an
-			specific Sphinx Version.
-			If not specified, kernel-doc will auto-detect using
-			the sphinx-build version found on PATH.
-
-Output selection (mutually exclusive):
-  -export		Only output documentation for symbols that have been
-			exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL()
-                        in any input FILE or -export-file FILE.
-  -internal		Only output documentation for symbols that have NOT been
-			exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL()
-                        in any input FILE or -export-file FILE.
-  -function NAME	Only output documentation for the given function(s)
-			or DOC: section title(s). All other functions and DOC:
-			sections are ignored. May be specified multiple times.
-  -nosymbol NAME	Exclude the specified symbols from the output
-		        documentation. May be specified multiple times.
-
-Output selection modifiers:
-  -no-doc-sections	Do not output DOC: sections.
-  -enable-lineno        Enable output of #define LINENO lines. Only works with
-                        reStructuredText format.
-  -export-file FILE     Specify an additional FILE in which to look for
-                        EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(). To be used with
-                        -export or -internal. May be specified multiple times.
-
-Other parameters:
-  -v			Verbose output, more warnings and other information.
-  -h			Print this help.
-  -Werror		Treat warnings as errors.
-
-EOF
-    print $message;
-    exit 1;
-}
+The documentation comments are identified by the "/**" opening comment mark.
 
-#
-# format of comments.
-# In the following table, (...)? signifies optional structure.
-#                         (...)* signifies 0 or more structure elements
-# /**
-#  * function_name(:)? (- short description)?
-# (* @parameterx: (description of parameter x)?)*
-# (* a blank line)?
-#  * (Description:)? (Description of function)?
-#  * (section header: (section description)? )*
-#  (*)?*/
-#
-# So .. the trivial example would be:
-#
-# /**
-#  * my_function
-#  */
-#
-# If the Description: header tag is omitted, then there must be a blank line
-# after the last parameter specification.
-# e.g.
-# /**
-#  * my_function - does my stuff
-#  * @my_arg: its mine damnit
-#  *
-#  * Does my stuff explained.
-#  */
-#
-#  or, could also use:
-# /**
-#  * my_function - does my stuff
-#  * @my_arg: its mine damnit
-#  * Description: Does my stuff explained.
-#  */
-# etc.
-#
-# Besides functions you can also write documentation for structs, unions,
-# enums and typedefs. Instead of the function name you must write the name
-# of the declaration;  the struct/union/enum/typedef must always precede
-# the name. Nesting of declarations is not supported.
-# Use the argument mechanism to document members or constants.
-# e.g.
-# /**
-#  * struct my_struct - short description
-#  * @a: first member
-#  * @b: second member
-#  *
-#  * Longer description
-#  */
-# struct my_struct {
-#     int a;
-#     int b;
-# /* private: */
-#     int c;
-# };
-#
-# All descriptions can be multiline, except the short function description.
-#
-# For really longs structs, you can also describe arguments inside the
-# body of the struct.
-# eg.
-# /**
-#  * struct my_struct - short description
-#  * @a: first member
-#  * @b: second member
-#  *
-#  * Longer description
-#  */
-# struct my_struct {
-#     int a;
-#     int b;
-#     /**
-#      * @c: This is longer description of C
-#      *
-#      * You can use paragraphs to describe arguments
-#      * using this method.
-#      */
-#     int c;
-# };
-#
-# This should be use only for struct/enum members.
-#
-# You can also add additional sections. When documenting kernel functions you
-# should document the "Context:" of the function, e.g. whether the functions
-# can be called form interrupts. Unlike other sections you can end it with an
-# empty line.
-# A non-void function should have a "Return:" section describing the return
-# value(s).
-# Example-sections should contain the string EXAMPLE so that they are marked
-# appropriately in DocBook.
-#
-# Example:
-# /**
-#  * user_function - function that can only be called in user context
-#  * @a: some argument
-#  * Context: !in_interrupt()
-#  *
-#  * Some description
-#  * Example:
-#  *    user_function(22);
-#  */
-# ...
-#
-#
-# All descriptive text is further processed, scanning for the following special
-# patterns, which are highlighted appropriately.
-#
-# 'funcname()' - function
-# '$ENVVAR' - environmental variable
-# '&struct_name' - name of a structure (up to two words including 'struct')
-# '&struct_name.member' - name of a structure member
-# '@parameter' - name of a parameter
-# '%CONST' - name of a constant.
-# '``LITERAL``' - literal string without any spaces on it.
+See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax.
+
+=cut
+
+# more perldoc at the end of the file
 
 ## init lots of data
 
@@ -273,7 +117,13 @@ my $blankline_rst = "\n";
 
 # read arguments
 if ($#ARGV == -1) {
-    usage();
+	pod2usage(
+		-message => "No arguments!\n",
+		-exitval => 1,
+		-verbose => 99,
+		-sections => 'SYNOPSIS',
+		-output => \*STDERR,
+	);
 }
 
 my $kernelversion;
@@ -468,7 +318,7 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
     } elsif ($cmd eq "Werror") {
 	$Werror = 1;
     } elsif (($cmd eq "h") || ($cmd eq "help")) {
-	usage();
+		pod2usage(-exitval => 0, -verbose => 2);
     } elsif ($cmd eq 'no-doc-sections') {
 	    $no_doc_sections = 1;
     } elsif ($cmd eq 'enable-lineno') {
@@ -494,7 +344,22 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
 	}
     } else {
 	# Unknown argument
-        usage();
+	pod2usage(
+	    -message => "Argument unknown!\n",
+	    -exitval => 1,
+	    -verbose => 99,
+	    -sections => 'SYNOPSIS',
+	    -output => \*STDERR,
+	    );
+    }
+    if ($#ARGV < 0){
+	pod2usage(
+	    -message => "FILE argument missing\n",
+	    -exitval => 1,
+	    -verbose => 99,
+	    -sections => 'SYNOPSIS',
+	    -output => \*STDERR,
+	    );
     }
 }
 
@@ -2521,3 +2386,118 @@ if ($Werror && $warnings) {
 } else {
     exit($output_mode eq "none" ? 0 : $errors)
 }
+
+__END__
+
+=head1 OPTIONS
+
+=head2 Output format selection (mutually exclusive):
+
+=over 8
+
+=item -man
+
+Output troff manual page format.
+
+=item -rst
+
+Output reStructuredText format. This is the default.
+
+=item -none
+
+Do not output documentation, only warnings.
+
+=back
+
+=head2 Output format modifiers
+
+=head3 reStructuredText only
+
+=over 8
+
+=item -sphinx-version VERSION
+
+Use the ReST C domain dialect compatible with a specific Sphinx Version.
+
+If not specified, kernel-doc will auto-detect using the sphinx-build version
+found on PATH.
+
+=back
+
+=head2 Output selection (mutually exclusive):
+
+=over 8
+
+=item -export
+
+Only output documentation for the symbols that have been exported using
+EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE.
+
+=item -internal
+
+Only output documentation for the symbols that have NOT been exported using
+EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE.
+
+=item -function NAME
+
+Only output documentation for the given function or DOC: section title.
+All other functions and DOC: sections are ignored.
+
+May be specified multiple times.
+
+=item -nosymbol NAME
+
+Exclude the specified symbol from the output documentation.
+
+May be specified multiple times.
+
+=back
+
+=head2 Output selection modifiers:
+
+=over 8
+
+=item -no-doc-sections
+
+Do not output DOC: sections.
+
+=item -export-file FILE
+
+Specify an additional FILE in which to look for EXPORT_SYMBOL() and
+EXPORT_SYMBOL_GPL().
+
+To be used with -export or -internal.
+
+May be specified multiple times.
+
+=back
+
+=head3 reStructuredText only
+
+=over 8
+
+=item -enable-lineno
+
+Enable output of #define LINENO lines.
+
+=back
+
+=head2 Other parameters:
+
+=over 8
+
+=item -h, -help
+
+Print this help.
+
+=item -v
+
+Verbose output, more warnings and other information.
+
+=item -Werror
+
+Treat warnings as errors.
+
+=back
+
+=cut
diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig
index 71f0177e8716..599429f99f99 100644
--- a/security/integrity/Kconfig
+++ b/security/integrity/Kconfig
@@ -62,6 +62,19 @@ config INTEGRITY_PLATFORM_KEYRING
          provided by the platform for verifying the kexec'ed kerned image
          and, possibly, the initramfs signature.
 
+config INTEGRITY_MACHINE_KEYRING
+	bool "Provide a keyring to which Machine Owner Keys may be added"
+	depends on SECONDARY_TRUSTED_KEYRING
+	depends on INTEGRITY_ASYMMETRIC_KEYS
+	depends on SYSTEM_BLACKLIST_KEYRING
+	depends on LOAD_UEFI_KEYS
+	depends on !IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY
+	help
+	 If set, provide a keyring to which Machine Owner Keys (MOK) may
+	 be added. This keyring shall contain just MOK keys.  Unlike keys
+	 in the platform keyring, keys contained in the .machine keyring will
+	 be trusted within the kernel.
+
 config LOAD_UEFI_KEYS
        depends on INTEGRITY_PLATFORM_KEYRING
        depends on EFI
diff --git a/security/integrity/Makefile b/security/integrity/Makefile
index 7ee39d66cf16..d0ffe37dc1d6 100644
--- a/security/integrity/Makefile
+++ b/security/integrity/Makefile
@@ -10,6 +10,7 @@ integrity-$(CONFIG_INTEGRITY_AUDIT) += integrity_audit.o
 integrity-$(CONFIG_INTEGRITY_SIGNATURE) += digsig.o
 integrity-$(CONFIG_INTEGRITY_ASYMMETRIC_KEYS) += digsig_asymmetric.o
 integrity-$(CONFIG_INTEGRITY_PLATFORM_KEYRING) += platform_certs/platform_keyring.o
+integrity-$(CONFIG_INTEGRITY_MACHINE_KEYRING) += platform_certs/machine_keyring.o
 integrity-$(CONFIG_LOAD_UEFI_KEYS) += platform_certs/efi_parser.o \
 				      platform_certs/load_uefi.o \
 				      platform_certs/keyring_handler.o
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index 3b06a01bd0fd..c8c8a4a4e7a0 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -30,6 +30,7 @@ static const char * const keyring_name[INTEGRITY_KEYRING_MAX] = {
 	".ima",
 #endif
 	".platform",
+	".machine",
 };
 
 #ifdef CONFIG_IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY
@@ -111,6 +112,8 @@ static int __init __integrity_init_keyring(const unsigned int id,
 	} else {
 		if (id == INTEGRITY_KEYRING_PLATFORM)
 			set_platform_trusted_keys(keyring[id]);
+		if (id == INTEGRITY_KEYRING_MACHINE && trust_moklist())
+			set_machine_trusted_keys(keyring[id]);
 		if (id == INTEGRITY_KEYRING_IMA)
 			load_module_cert(keyring[id]);
 	}
@@ -126,7 +129,8 @@ int __init integrity_init_keyring(const unsigned int id)
 	perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW
 		| KEY_USR_READ | KEY_USR_SEARCH;
 
-	if (id == INTEGRITY_KEYRING_PLATFORM) {
+	if (id == INTEGRITY_KEYRING_PLATFORM ||
+	    id == INTEGRITY_KEYRING_MACHINE) {
 		restriction = NULL;
 		goto out;
 	}
@@ -139,7 +143,14 @@ int __init integrity_init_keyring(const unsigned int id)
 		return -ENOMEM;
 
 	restriction->check = restrict_link_to_ima;
-	perm |= KEY_USR_WRITE;
+
+	/*
+	 * MOK keys can only be added through a read-only runtime services
+	 * UEFI variable during boot. No additional keys shall be allowed to
+	 * load into the machine keyring following init from userspace.
+	 */
+	if (id != INTEGRITY_KEYRING_MACHINE)
+		perm |= KEY_USR_WRITE;
 
 out:
 	return __integrity_init_keyring(id, perm, restriction);
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 547425c20e11..2e214c761158 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -151,7 +151,8 @@ int integrity_kernel_read(struct file *file, loff_t offset,
 #define INTEGRITY_KEYRING_EVM		0
 #define INTEGRITY_KEYRING_IMA		1
 #define INTEGRITY_KEYRING_PLATFORM	2
-#define INTEGRITY_KEYRING_MAX		3
+#define INTEGRITY_KEYRING_MACHINE	3
+#define INTEGRITY_KEYRING_MAX		4
 
 extern struct dentry *integrity_dir;
 
@@ -283,3 +284,17 @@ static inline void __init add_to_platform_keyring(const char *source,
 {
 }
 #endif
+
+#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
+void __init add_to_machine_keyring(const char *source, const void *data, size_t len);
+bool __init trust_moklist(void);
+#else
+static inline void __init add_to_machine_keyring(const char *source,
+						  const void *data, size_t len)
+{
+}
+static inline bool __init trust_moklist(void)
+{
+	return false;
+}
+#endif
diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c
index 5604bd57c990..1db4d3b4356d 100644
--- a/security/integrity/platform_certs/keyring_handler.c
+++ b/security/integrity/platform_certs/keyring_handler.c
@@ -9,6 +9,7 @@
 #include <keys/asymmetric-type.h>
 #include <keys/system_keyring.h>
 #include "../integrity.h"
+#include "keyring_handler.h"
 
 static efi_guid_t efi_cert_x509_guid __initdata = EFI_CERT_X509_GUID;
 static efi_guid_t efi_cert_x509_sha256_guid __initdata =
@@ -66,7 +67,7 @@ static __init void uefi_revocation_list_x509(const char *source,
 
 /*
  * Return the appropriate handler for particular signature list types found in
- * the UEFI db and MokListRT tables.
+ * the UEFI db tables.
  */
 __init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type)
 {
@@ -77,6 +78,21 @@ __init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type)
 
 /*
  * Return the appropriate handler for particular signature list types found in
+ * the MokListRT tables.
+ */
+__init efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type)
+{
+	if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) {
+		if (IS_ENABLED(CONFIG_INTEGRITY_MACHINE_KEYRING) && trust_moklist())
+			return add_to_machine_keyring;
+		else
+			return add_to_platform_keyring;
+	}
+	return 0;
+}
+
+/*
+ * Return the appropriate handler for particular signature list types found in
  * the UEFI dbx and MokListXRT tables.
  */
 __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type)
diff --git a/security/integrity/platform_certs/keyring_handler.h b/security/integrity/platform_certs/keyring_handler.h
index 2462bfa08fe3..284558f30411 100644
--- a/security/integrity/platform_certs/keyring_handler.h
+++ b/security/integrity/platform_certs/keyring_handler.h
@@ -25,6 +25,11 @@ void blacklist_binary(const char *source, const void *data, size_t len);
 efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type);
 
 /*
+ * Return the handler for particular signature list types found in the mok.
+ */
+efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type);
+
+/*
  * Return the handler for particular signature list types found in the dbx.
  */
 efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type);
diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c
index 08b6d12f99b4..5f45c3c07dbd 100644
--- a/security/integrity/platform_certs/load_uefi.c
+++ b/security/integrity/platform_certs/load_uefi.c
@@ -95,7 +95,7 @@ static int __init load_moklist_certs(void)
 		rc = parse_efi_signature_list("UEFI:MokListRT (MOKvar table)",
 					      mokvar_entry->data,
 					      mokvar_entry->data_size,
-					      get_handler_for_db);
+					      get_handler_for_mok);
 		/* All done if that worked. */
 		if (!rc)
 			return rc;
@@ -110,7 +110,7 @@ static int __init load_moklist_certs(void)
 	mok = get_cert_list(L"MokListRT", &mok_var, &moksize, &status);
 	if (mok) {
 		rc = parse_efi_signature_list("UEFI:MokListRT",
-					      mok, moksize, get_handler_for_db);
+					      mok, moksize, get_handler_for_mok);
 		kfree(mok);
 		if (rc)
 			pr_err("Couldn't parse MokListRT signatures: %d\n", rc);
diff --git a/security/integrity/platform_certs/machine_keyring.c b/security/integrity/platform_certs/machine_keyring.c
new file mode 100644
index 000000000000..7aaed7950b6e
--- /dev/null
+++ b/security/integrity/platform_certs/machine_keyring.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Machine keyring routines.
+ *
+ * Copyright (c) 2021, Oracle and/or its affiliates.
+ */
+
+#include <linux/efi.h>
+#include "../integrity.h"
+
+static bool trust_mok;
+
+static __init int machine_keyring_init(void)
+{
+	int rc;
+
+	rc = integrity_init_keyring(INTEGRITY_KEYRING_MACHINE);
+	if (rc)
+		return rc;
+
+	pr_notice("Machine keyring initialized\n");
+	return 0;
+}
+device_initcall(machine_keyring_init);
+
+void __init add_to_machine_keyring(const char *source, const void *data, size_t len)
+{
+	key_perm_t perm;
+	int rc;
+
+	perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW;
+	rc = integrity_load_cert(INTEGRITY_KEYRING_MACHINE, source, data, len, perm);
+
+	/*
+	 * Some MOKList keys may not pass the machine keyring restrictions.
+	 * If the restriction check does not pass and the platform keyring
+	 * is configured, try to add it into that keyring instead.
+	 */
+	if (rc && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
+		rc = integrity_load_cert(INTEGRITY_KEYRING_PLATFORM, source,
+					 data, len, perm);
+
+	if (rc)
+		pr_info("Error adding keys to machine keyring %s\n", source);
+}
+
+/*
+ * Try to load the MokListTrustedRT MOK variable to see if we should trust
+ * the MOK keys within the kernel. It is not an error if this variable
+ * does not exist.  If it does not exist, MOK keys should not be trusted
+ * within the machine keyring.
+ */
+static __init bool uefi_check_trust_mok_keys(void)
+{
+	struct efi_mokvar_table_entry *mokvar_entry;
+
+	mokvar_entry = efi_mokvar_entry_find("MokListTrustedRT");
+
+	if (mokvar_entry)
+		return true;
+
+	return false;
+}
+
+bool __init trust_moklist(void)
+{
+	static bool initialized;
+
+	if (!initialized) {
+		initialized = true;
+
+		if (uefi_check_trust_mok_keys())
+			trust_mok = true;
+	}
+
+	return trust_mok;
+}
diff --git a/security/keys/dh.c b/security/keys/dh.c
index 4573fc15617d..b339760a31dd 100644
--- a/security/keys/dh.c
+++ b/security/keys/dh.c
@@ -15,7 +15,7 @@
 #include <keys/user-type.h>
 #include "internal.h"
 
-static ssize_t dh_data_from_key(key_serial_t keyid, void **data)
+static ssize_t dh_data_from_key(key_serial_t keyid, const void **data)
 {
 	struct key *key;
 	key_ref_t key_ref;
diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c
index 5de0d599a274..97bc27bbf079 100644
--- a/security/keys/keyctl_pkey.c
+++ b/security/keys/keyctl_pkey.c
@@ -135,15 +135,23 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par
 
 	switch (op) {
 	case KEYCTL_PKEY_ENCRYPT:
+		if (uparams.in_len  > info.max_dec_size ||
+		    uparams.out_len > info.max_enc_size)
+			return -EINVAL;
+		break;
 	case KEYCTL_PKEY_DECRYPT:
 		if (uparams.in_len  > info.max_enc_size ||
 		    uparams.out_len > info.max_dec_size)
 			return -EINVAL;
 		break;
 	case KEYCTL_PKEY_SIGN:
+		if (uparams.in_len  > info.max_data_size ||
+		    uparams.out_len > info.max_sig_size)
+			return -EINVAL;
+		break;
 	case KEYCTL_PKEY_VERIFY:
-		if (uparams.in_len  > info.max_sig_size ||
-		    uparams.out_len > info.max_data_size)
+		if (uparams.in_len  > info.max_data_size ||
+		    uparams.in2_len > info.max_sig_size)
 			return -EINVAL;
 		break;
 	default:
@@ -151,7 +159,7 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par
 	}
 
 	params->in_len  = uparams.in_len;
-	params->out_len = uparams.out_len;
+	params->out_len = uparams.out_len; /* Note: same as in2_len */
 	return 0;
 }
 
diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c
index d5c891d8d353..9b9d3ef79cbe 100644
--- a/security/keys/trusted-keys/trusted_core.c
+++ b/security/keys/trusted-keys/trusted_core.c
@@ -27,10 +27,10 @@ module_param_named(source, trusted_key_source, charp, 0);
 MODULE_PARM_DESC(source, "Select trusted keys source (tpm or tee)");
 
 static const struct trusted_key_source trusted_key_sources[] = {
-#if defined(CONFIG_TCG_TPM)
+#if IS_REACHABLE(CONFIG_TCG_TPM)
 	{ "tpm", &trusted_key_tpm_ops },
 #endif
-#if defined(CONFIG_TEE)
+#if IS_REACHABLE(CONFIG_TEE)
 	{ "tee", &trusted_key_tee_ops },
 #endif
 };
@@ -351,7 +351,7 @@ static int __init init_trusted(void)
 
 static void __exit cleanup_trusted(void)
 {
-	static_call(trusted_key_exit)();
+	static_call_cond(trusted_key_exit)();
 }
 
 late_initcall(init_trusted);
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index b3edde68bc3e..323e251ed37b 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -281,6 +281,11 @@ struct kvm_arm_copy_mte_tags {
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED	3
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED     	(1U << 4)
 
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3	KVM_REG_ARM_FW_REG(3)
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL		0
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL		1
+#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED	2
+
 /* SVE registers */
 #define KVM_REG_ARM64_SVE		(0x15 << KVM_REG_ARM_COPROC_SHIFT)
 
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 6db4e2932b3d..65d147974f8d 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -204,7 +204,7 @@
 /* FREE!                                ( 7*32+10) */
 #define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
 #define X86_FEATURE_RETPOLINE		( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE	( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_CDP_L2		( 7*32+15) /* Code and Data Prioritization L2 */
 #define X86_FEATURE_MSR_SPEC_CTRL	( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index dc632b41f135..65c0d9ce1e29 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -124,7 +124,7 @@ struct insn {
 #define X86_VEX_B(vex)	((vex) & 0x20)	/* VEX3 Byte1 */
 #define X86_VEX_L(vex)	((vex) & 0x04)	/* VEX3 Byte2, VEX2 Byte1 */
 /* VEX bit fields */
-#define X86_EVEX_M(vex)	((vex) & 0x03)		/* EVEX Byte1 */
+#define X86_EVEX_M(vex)	((vex) & 0x07)		/* EVEX Byte1 */
 #define X86_VEX3_M(vex)	((vex) & 0x1f)		/* VEX3 Byte1 */
 #define X86_VEX2_M	1			/* VEX2.M always 1 */
 #define X86_VEX_V(vex)	(((vex) & 0x78) >> 3)	/* VEX3 Byte2, VEX2 Byte1 */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 59cf2343f3d9..d0d7b9bc6cad 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -27,8 +27,7 @@
  * Output:
  * rax original destination
  */
-SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_WEAK(memcpy)
+SYM_FUNC_START(__memcpy)
 	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
 		      "jmp memcpy_erms", X86_FEATURE_ERMS
 
@@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy)
 	movl %edx, %ecx
 	rep movsb
 	RET
-SYM_FUNC_END(memcpy)
-SYM_FUNC_END_ALIAS(__memcpy)
-EXPORT_SYMBOL(memcpy)
+SYM_FUNC_END(__memcpy)
 EXPORT_SYMBOL(__memcpy)
 
+SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
+EXPORT_SYMBOL(memcpy)
+
 /*
  * memcpy_erms() - enhanced fast string memcpy. This is faster and
  * simpler than memcpy. Use memcpy_erms when possible.
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index d624f2bc42f1..fc9ffd3ff3b2 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -17,7 +17,6 @@
  *
  * rax   original destination
  */
-SYM_FUNC_START_WEAK(memset)
 SYM_FUNC_START(__memset)
 	/*
 	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
@@ -42,10 +41,11 @@ SYM_FUNC_START(__memset)
 	movq %r9,%rax
 	RET
 SYM_FUNC_END(__memset)
-SYM_FUNC_END_ALIAS(memset)
-EXPORT_SYMBOL(memset)
 EXPORT_SYMBOL(__memset)
 
+SYM_FUNC_ALIAS_WEAK(memset, __memset)
+EXPORT_SYMBOL(memset)
+
 /*
  * ISO C memset - set a memory block to a byte value. This function uses
  * enhanced rep stosb to override the fast string function.
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index ec31f5b60323..d12d1358f96d 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -690,7 +690,10 @@ AVXcode: 2
 45: vpsrlvd/q Vx,Hx,Wx (66),(v)
 46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
 47: vpsllvd/q Vx,Hx,Wx (66),(v)
-# Skip 0x48-0x4b
+# Skip 0x48
+49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B)
+# Skip 0x4a
+4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v)
 4c: vrcp14ps/d Vpd,Wpd (66),(ev)
 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
@@ -705,7 +708,10 @@ AVXcode: 2
 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
-# Skip 0x5c-0x61
+5c: TDPBF16PS Vt,Wt,Ht (F3),(v1)
+# Skip 0x5d
+5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1)
+# Skip 0x5f-0x61
 62: vpexpandb/w Vx,Wx (66),(ev)
 63: vpcompressb/w Wx,Vx (66),(ev)
 64: vpblendmd/q Vx,Hx,Wx (66),(ev)
@@ -822,9 +828,9 @@ AVXcode: 3
 05: vpermilpd Vx,Wx,Ib (66),(v)
 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
 07:
-08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo)
 09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
-0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo)
 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
 0c: vblendps Vx,Hx,Wx,Ib (66)
 0d: vblendpd Vx,Hx,Wx,Ib (66)
@@ -846,8 +852,8 @@ AVXcode: 3
 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
 23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
 25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
-26: vgetmantps/d Vx,Wx,Ib (66),(ev)
-27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev)
 30: kshiftrb/w Vk,Uk,Ib (66),(v)
 31: kshiftrd/q Vk,Uk,Ib (66),(v)
 32: kshiftlb/w Vk,Uk,Ib (66),(v)
@@ -871,23 +877,102 @@ AVXcode: 3
 51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
 54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
 55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
-56: vreduceps/d Vx,Wx,Ib (66),(ev)
-57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev)
 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
 61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
 63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
-66: vfpclassps/d Vk,Wx,Ib (66),(ev)
-67: vfpclassss/d Vk,Wx,Ib (66),(ev)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev)
 70: vpshldw Vx,Hx,Wx,Ib (66),(ev)
 71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev)
 72: vpshrdw Vx,Hx,Wx,Ib (66),(ev)
 73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev)
+c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev)
 cc: sha1rnds4 Vdq,Wdq,Ib
 ce: vgf2p8affineqb Vx,Wx,Ib (66)
 cf: vgf2p8affineinvqb Vx,Wx,Ib (66)
 df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
-f0: RORX Gy,Ey,Ib (F2),(v)
+f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B)
+EndTable
+
+Table: EVEX map 5
+Referrer:
+AVXcode: 5
+10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev)
+11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev)
+1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev)
+2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev)
+2c: vcvttsh2si Vx,Wx (F3),(ev)
+2d: vcvtsh2si Vx,Wx (F3),(ev)
+2e: vucomish Vx,Wx (ev)
+2f: vcomish Vx,Wx (ev)
+51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev)
+58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev)
+59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev)
+5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev)
+5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev)
+5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev)
+5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev)
+5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev)
+5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev)
+6e: vmovw Vx,Wx (66),(ev)
+78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev)
+79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev)
+7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev)
+7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev)
+7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev)
+7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev)
+7e: vmovw Wx,Vx (66),(ev)
+EndTable
+
+Table: EVEX map 6
+Referrer:
+AVXcode: 6
+13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev)
+2c: vscalefph Vx,Hx,Wx (66),(ev)
+2d: vscalefsh Vx,Hx,Wx (66),(ev)
+42: vgetexpph Vx,Wx (66),(ev)
+43: vgetexpsh Vx,Hx,Wx (66),(ev)
+4c: vrcpph Vx,Wx (66),(ev)
+4d: vrcpsh Vx,Hx,Wx (66),(ev)
+4e: vrsqrtph Vx,Wx (66),(ev)
+4f: vrsqrtsh Vx,Hx,Wx (66),(ev)
+56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev)
+57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev)
+96: vfmaddsub132ph Vx,Hx,Wx (66),(ev)
+97: vfmsubadd132ph Vx,Hx,Wx (66),(ev)
+98: vfmadd132ph Vx,Hx,Wx (66),(ev)
+99: vfmadd132sh Vx,Hx,Wx (66),(ev)
+9a: vfmsub132ph Vx,Hx,Wx (66),(ev)
+9b: vfmsub132sh Vx,Hx,Wx (66),(ev)
+9c: vfnmadd132ph Vx,Hx,Wx (66),(ev)
+9d: vfnmadd132sh Vx,Hx,Wx (66),(ev)
+9e: vfnmsub132ph Vx,Hx,Wx (66),(ev)
+9f: vfnmsub132sh Vx,Hx,Wx (66),(ev)
+a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev)
+a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev)
+a8: vfmadd213ph Vx,Hx,Wx (66),(ev)
+a9: vfmadd213sh Vx,Hx,Wx (66),(ev)
+aa: vfmsub213ph Vx,Hx,Wx (66),(ev)
+ab: vfmsub213sh Vx,Hx,Wx (66),(ev)
+ac: vfnmadd213ph Vx,Hx,Wx (66),(ev)
+ad: vfnmadd213sh Vx,Hx,Wx (66),(ev)
+ae: vfnmsub213ph Vx,Hx,Wx (66),(ev)
+af: vfnmsub213sh Vx,Hx,Wx (66),(ev)
+b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev)
+b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev)
+b8: vfmadd231ph Vx,Hx,Wx (66),(ev)
+b9: vfmadd231sh Vx,Hx,Wx (66),(ev)
+ba: vfmsub231ph Vx,Hx,Wx (66),(ev)
+bb: vfmsub231sh Vx,Hx,Wx (66),(ev)
+bc: vfnmadd231ph Vx,Hx,Wx (66),(ev)
+bd: vfnmadd231sh Vx,Hx,Wx (66),(ev)
+be: vfnmsub231ph Vx,Hx,Wx (66),(ev)
+bf: vfnmsub231sh Vx,Hx,Wx (66),(ev)
+d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev)
+d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev)
 EndTable
 
 GrpTable: Grp1
@@ -970,7 +1055,7 @@ GrpTable: Grp7
 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
 3: LIDT Ms
 4: SMSW Mw/Rv
-5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B)
+5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B)
 6: LMSW Ew
 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
 EndTable
@@ -987,7 +1072,7 @@ GrpTable: Grp9
 3: xrstors
 4: xsavec
 5: xsaves
-6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3)
 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
 EndTable
 
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index 394ee57d58f2..ee819a402b69 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -485,6 +485,57 @@ have R ->po X.  It wouldn't make sense for a computation to depend
 somehow on a value that doesn't get loaded from shared memory until
 later in the code!
 
+Here's a trick question: When is a dependency not a dependency?  Answer:
+When it is purely syntactic rather than semantic.  We say a dependency
+between two accesses is purely syntactic if the second access doesn't
+actually depend on the result of the first.  Here is a trivial example:
+
+	r1 = READ_ONCE(x);
+	WRITE_ONCE(y, r1 * 0);
+
+There appears to be a data dependency from the load of x to the store
+of y, since the value to be stored is computed from the value that was
+loaded.  But in fact, the value stored does not really depend on
+anything since it will always be 0.  Thus the data dependency is only
+syntactic (it appears to exist in the code) but not semantic (the
+second access will always be the same, regardless of the value of the
+first access).  Given code like this, a compiler could simply discard
+the value returned by the load from x, which would certainly destroy
+any dependency.  (The compiler is not permitted to eliminate entirely
+the load generated for a READ_ONCE() -- that's one of the nice
+properties of READ_ONCE() -- but it is allowed to ignore the load's
+value.)
+
+It's natural to object that no one in their right mind would write
+code like the above.  However, macro expansions can easily give rise
+to this sort of thing, in ways that often are not apparent to the
+programmer.
+
+Another mechanism that can lead to purely syntactic dependencies is
+related to the notion of "undefined behavior".  Certain program
+behaviors are called "undefined" in the C language specification,
+which means that when they occur there are no guarantees at all about
+the outcome.  Consider the following example:
+
+	int a[1];
+	int i;
+
+	r1 = READ_ONCE(i);
+	r2 = READ_ONCE(a[r1]);
+
+Access beyond the end or before the beginning of an array is one kind
+of undefined behavior.  Therefore the compiler doesn't have to worry
+about what will happen if r1 is nonzero, and it can assume that r1
+will always be zero regardless of the value actually loaded from i.
+(If the assumption turns out to be wrong the resulting behavior will
+be undefined anyway, so the compiler doesn't care!)  Thus the value
+from the load can be discarded, breaking the address dependency.
+
+The LKMM is unaware that purely syntactic dependencies are different
+from semantic dependencies and therefore mistakenly predicts that the
+accesses in the two examples above will be ordered.  This is another
+example of how the compiler can undermine the memory model.  Be warned.
+
 
 THE READS-FROM RELATION: rf, rfi, and rfe
 -----------------------------------------
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index c10ef78df050..479e769ca324 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -112,7 +112,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 	const struct elf *elf = file->elf;
 	struct insn insn;
 	int x86_64, ret;
-	unsigned char op1, op2,
+	unsigned char op1, op2, op3,
 		      rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
 		      modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
 		      sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0;
@@ -139,6 +139,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 
 	op1 = insn.opcode.bytes[0];
 	op2 = insn.opcode.bytes[1];
+	op3 = insn.opcode.bytes[2];
 
 	if (insn.rex_prefix.nbytes) {
 		rex = insn.rex_prefix.bytes[0];
@@ -491,6 +492,14 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 			/* nopl/nopw */
 			*type = INSN_NOP;
 
+		} else if (op2 == 0x38 && op3 == 0xf8) {
+			if (insn.prefixes.nbytes == 1 &&
+			    insn.prefixes.bytes[0] == 0xf2) {
+				/* ENQCMD cannot be used in the kernel. */
+				WARN("ENQCMD instruction at %s:%lx", sec->name,
+				     offset);
+			}
+
 		} else if (op2 == 0xa0 || op2 == 0xa8) {
 
 			/* push fs/gs */
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
index 9708ae892061..ba429cadb18f 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@@ -2197,6 +2197,924 @@
 "3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",},
 {{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
 "3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 58 cb    \tvaddph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 58 cb    \tvaddph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 58 cb    \tvaddph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 58 cb    \tvaddsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 48 c2 eb 12 \tvcmple_oqph %zmm3,%zmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%zmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 c2 eb 12 \tvcmple_oqph %xmm3,%xmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%xmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 28 c2 eb 12 \tvcmple_oqph %ymm3,%ymm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%ymm2,%k5",},
+{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6e 08 c2 eb 12 \tvcmple_oqsh %xmm3,%xmm2,%k5",},
+{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%eax,%ecx,8),%xmm2,%k5",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 2f ca    \tvcomish %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 5b ca    \tvcvtdq2ph %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 5b ca    \tvcvtdq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 5b ca    \tvcvtdq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x48, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 48 5a ca    \tvcvtpd2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x08, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 08 5a ca    \tvcvtpd2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x28, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 28 5a ca    \tvcvtpd2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 5b ca    \tvcvtph2dq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 5b ca    \tvcvtph2dq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 5b ca    \tvcvtph2dq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 5a ca    \tvcvtph2pd %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 5a ca    \tvcvtph2pd %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 5a ca    \tvcvtph2pd %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "",
+"62 f2 7d 48 13 ca    \tvcvtph2ps %ymm2,%zmm1",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 79 13 ca       \tvcvtph2ps %xmm2,%xmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 7d 13 ca       \tvcvtph2ps %xmm2,%ymm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 79 13 ca       \tvcvtph2ps %xmm2,%xmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 7d 13 ca       \tvcvtph2ps %xmm2,%ymm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 13 ca    \tvcvtph2psx %ymm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 13 ca    \tvcvtph2psx %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 13 ca    \tvcvtph2psx %xmm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7b ca    \tvcvtph2qq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7b ca    \tvcvtph2qq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7b ca    \tvcvtph2qq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 79 ca    \tvcvtph2udq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 79 ca    \tvcvtph2udq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 79 ca    \tvcvtph2udq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 79 ca    \tvcvtph2uqq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 79 ca    \tvcvtph2uqq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 79 ca    \tvcvtph2uqq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 7d ca    \tvcvtph2uw %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 7d ca    \tvcvtph2uw %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 7d ca    \tvcvtph2uw %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7d ca    \tvcvtph2w %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7d ca    \tvcvtph2w %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7d ca    \tvcvtph2w %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0xd1, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 1d d1 12 \tvcvtps2ph $0x12,%zmm2,%ymm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 79 1d d1 12    \tvcvtps2ph $0x12,%xmm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 7d 1d d1 12    \tvcvtps2ph $0x12,%ymm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 7d 1d d1 12    \tvcvtps2ph $0x12,%ymm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 79 1d d1 12    \tvcvtps2ph $0x12,%xmm2,%xmm1",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 1d ca    \tvcvtps2phx %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 1d ca    \tvcvtps2phx %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 1d ca    \tvcvtps2phx %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 48 5b ca    \tvcvtqq2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 08 5b ca    \tvcvtqq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 28 5b ca    \tvcvtqq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xef, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 ef 08 5a 8c c8 78 56 34 12 \tvcvtsd2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5a 8c c8 78 56 34 12 \tvcvtsh2sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf6, 0x6c, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6c 08 13 8c c8 78 56 34 12 \tvcvtsh2ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 79 c1    \tvcvtsh2usi %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0xc8, }, 6, 0, "", "",
+"62 f5 6e 08 2a c8    \tvcvtsi2sh %eax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 1d cb    \tvcvtss2sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 48 5b ca    \tvcvttph2dq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 08 5b ca    \tvcvttph2dq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 28 5b ca    \tvcvttph2dq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7a ca    \tvcvttph2qq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7a ca    \tvcvttph2qq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7a ca    \tvcvttph2qq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 78 ca    \tvcvttph2udq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 78 ca    \tvcvttph2udq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 78 ca    \tvcvttph2udq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 78 ca    \tvcvttph2uqq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 78 ca    \tvcvttph2uqq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 78 ca    \tvcvttph2uqq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 7c ca    \tvcvttph2uw %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 7c ca    \tvcvttph2uw %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 7c ca    \tvcvttph2uw %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7c ca    \tvcvttph2w %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7c ca    \tvcvttph2w %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7c ca    \tvcvttph2w %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 2c c1    \tvcvttsh2si %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 78 c1    \tvcvttsh2usi %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 48 7a ca    \tvcvtudq2ph %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 08 7a ca    \tvcvtudq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 28 7a ca    \tvcvtudq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 48 7a ca    \tvcvtuqq2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 08 7a ca    \tvcvtuqq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 28 7a ca    \tvcvtuqq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0xc8, }, 6, 0, "", "",
+"62 f5 6e 08 7b c8    \tvcvtusi2sh %eax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 48 7d ca    \tvcvtuw2ph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 08 7d ca    \tvcvtuw2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 28 7d ca    \tvcvtuw2ph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 48 7d ca    \tvcvtw2ph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 08 7d ca    \tvcvtw2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 28 7d ca    \tvcvtw2ph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5e cb    \tvdivph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5e cb    \tvdivph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5e cb    \tvdivph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5e cb    \tvdivsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 48 56 cb    \tvfcmaddcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 56 cb    \tvfcmaddcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 28 56 cb    \tvfcmaddcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 57 cb    \tvfcmaddcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 48 d6 cb    \tvfcmulcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 d6 cb    \tvfcmulcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 28 d6 cb    \tvfcmulcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 d7 cb    \tvfcmulcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 98 cb    \tvfmadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 98 cb    \tvfmadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 98 cb    \tvfmadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 99 cb    \tvfmadd132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a8 cb    \tvfmadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a8 cb    \tvfmadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a8 cb    \tvfmadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a9 cb    \tvfmadd213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b8 cb    \tvfmadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b8 cb    \tvfmadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b8 cb    \tvfmadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b9 cb    \tvfmadd231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 48 56 cb    \tvfmaddcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 56 cb    \tvfmaddcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 28 56 cb    \tvfmaddcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 57 cb    \tvfmaddcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 96 cb    \tvfmaddsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 96 cb    \tvfmaddsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 96 cb    \tvfmaddsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a6 cb    \tvfmaddsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a6 cb    \tvfmaddsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a6 cb    \tvfmaddsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b6 cb    \tvfmaddsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b6 cb    \tvfmaddsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b6 cb    \tvfmaddsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9a cb    \tvfmsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9a cb    \tvfmsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9a cb    \tvfmsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9b cb    \tvfmsub132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 aa cb    \tvfmsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 aa cb    \tvfmsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 aa cb    \tvfmsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ab cb    \tvfmsub213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ba cb    \tvfmsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ba cb    \tvfmsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ba cb    \tvfmsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bb cb    \tvfmsub231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 97 cb    \tvfmsubadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 97 cb    \tvfmsubadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 97 cb    \tvfmsubadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a7 cb    \tvfmsubadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a7 cb    \tvfmsubadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a7 cb    \tvfmsubadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b7 cb    \tvfmsubadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b7 cb    \tvfmsubadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b7 cb    \tvfmsubadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 48 d6 cb    \tvfmulcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 d6 cb    \tvfmulcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 28 d6 cb    \tvfmulcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 d7 cb    \tvfmulcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9c cb    \tvfnmadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9c cb    \tvfnmadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9c cb    \tvfnmadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9d cb    \tvfnmadd132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ac cb    \tvfnmadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ac cb    \tvfnmadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ac cb    \tvfnmadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ad cb    \tvfnmadd213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 bc cb    \tvfnmadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bc cb    \tvfnmadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 bc cb    \tvfnmadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bd cb    \tvfnmadd231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9e cb    \tvfnmsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9e cb    \tvfnmsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9e cb    \tvfnmsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9f cb    \tvfnmsub132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ae cb    \tvfnmsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ae cb    \tvfnmsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ae cb    \tvfnmsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 af cb    \tvfnmsub213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 be cb    \tvfnmsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 be cb    \tvfnmsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 be cb    \tvfnmsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bf cb    \tvfnmsub231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 66 e9 12 \tvfpclassph $0x12,%zmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 66 e9 12 \tvfpclassph $0x12,%xmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 66 e9 12 \tvfpclassph $0x12,%ymm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 67 e9 12 \tvfpclasssh $0x12,%xmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%eax,%ecx,8),%k5",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 42 ca    \tvgetexpph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 42 ca    \tvgetexpph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 42 ca    \tvgetexpph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 43 cb    \tvgetexpsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 26 ca 12 \tvgetmantph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 26 ca 12 \tvgetmantph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 26 ca 12 \tvgetmantph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 27 cb 12 \tvgetmantsh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5f cb    \tvmaxph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5f cb    \tvmaxph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5f cb    \tvmaxph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5f cb    \tvmaxsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5d cb    \tvminph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5d cb    \tvminph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5d cb    \tvminph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5d cb    \tvminsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x10, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 10 cb    \tvmovsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8, }, 6, 0, "", "",
+"62 f5 7d 08 7e c8    \tvmovw  %xmm1,%eax",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw  %xmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8, }, 6, 0, "", "",
+"62 f5 7d 08 6e c8    \tvmovw  %eax,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw  0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 59 cb    \tvmulph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 59 cb    \tvmulph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 59 cb    \tvmulph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 59 cb    \tvmulsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 4c ca    \tvrcpph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 4c ca    \tvrcpph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 4c ca    \tvrcpph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 4d cb    \tvrcpsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 56 ca 12 \tvreduceph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 56 ca 12 \tvreduceph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 56 ca 12 \tvreduceph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 57 cb 12 \tvreducesh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 08 ca 12 \tvrndscaleph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 08 ca 12 \tvrndscaleph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 08 ca 12 \tvrndscaleph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 0a cb 12 \tvrndscalesh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 4e ca    \tvrsqrtph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 4e ca    \tvrsqrtph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 4e ca    \tvrsqrtph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 4f cb    \tvrsqrtsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 2c cb    \tvscalefph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 2c cb    \tvscalefph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 2c cb    \tvscalefph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 2d cb    \tvscalefsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 51 ca    \tvsqrtph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 51 ca    \tvsqrtph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 51 ca    \tvsqrtph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 51 cb    \tvsqrtsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5c cb    \tvsubph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5c cb    \tvsubph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5c cb    \tvsubph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5c cb    \tvsubsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 2e ca    \tvucomish %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "",
+"f3 0f 3a f0 c0 00    \threset $0x0",},
+{{0x0f, 0x01, 0xe8, }, 3, 0, "", "",
+"0f 01 e8             \tserialize ",},
+{{0xf2, 0x0f, 0x01, 0xe9, }, 4, 0, "", "",
+"f2 0f 01 e9          \txresldtrk ",},
+{{0xf2, 0x0f, 0x01, 0xe8, }, 4, 0, "", "",
+"f2 0f 01 e8          \txsusldtrk ",},
 {{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
 "0f 01 cf             \tencls  ",},
 {{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
index 5da17d41d302..3a47e98fec33 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -2459,6 +2459,1432 @@
 "3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%rax,%rcx,8)",},
 {{0x3e, 0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "jmp", "indirect",
 "3e f2 41 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%r8,%rcx,8)",},
+{{0xc4, 0xe2, 0x78, 0x49, 0x04, 0xc8, }, 6, 0, "", "",
+"c4 e2 78 49 04 c8    \tldtilecfg (%rax,%rcx,8)",},
+{{0xc4, 0xc2, 0x78, 0x49, 0x04, 0xc8, }, 6, 0, "", "",
+"c4 c2 78 49 04 c8    \tldtilecfg (%r8,%rcx,8)",},
+{{0xc4, 0xe2, 0x79, 0x49, 0x04, 0xc8, }, 6, 0, "", "",
+"c4 e2 79 49 04 c8    \tsttilecfg (%rax,%rcx,8)",},
+{{0xc4, 0xc2, 0x79, 0x49, 0x04, 0xc8, }, 6, 0, "", "",
+"c4 c2 79 49 04 c8    \tsttilecfg (%r8,%rcx,8)",},
+{{0xc4, 0xe2, 0x7a, 0x5c, 0xd1, }, 5, 0, "", "",
+"c4 e2 7a 5c d1       \ttdpbf16ps %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x7b, 0x5e, 0xd1, }, 5, 0, "", "",
+"c4 e2 7b 5e d1       \ttdpbssd %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x7a, 0x5e, 0xd1, }, 5, 0, "", "",
+"c4 e2 7a 5e d1       \ttdpbsud %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x79, 0x5e, 0xd1, }, 5, 0, "", "",
+"c4 e2 79 5e d1       \ttdpbusd %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x78, 0x5e, 0xd1, }, 5, 0, "", "",
+"c4 e2 78 5e d1       \ttdpbuud %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x7b, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "",
+"c4 e2 7b 4b 0c c8    \ttileloadd (%rax,%rcx,8),%tmm1",},
+{{0xc4, 0xc2, 0x7b, 0x4b, 0x14, 0xc8, }, 6, 0, "", "",
+"c4 c2 7b 4b 14 c8    \ttileloadd (%r8,%rcx,8),%tmm2",},
+{{0xc4, 0xe2, 0x79, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "",
+"c4 e2 79 4b 0c c8    \ttileloaddt1 (%rax,%rcx,8),%tmm1",},
+{{0xc4, 0xc2, 0x79, 0x4b, 0x14, 0xc8, }, 6, 0, "", "",
+"c4 c2 79 4b 14 c8    \ttileloaddt1 (%r8,%rcx,8),%tmm2",},
+{{0xc4, 0xe2, 0x78, 0x49, 0xc0, }, 5, 0, "", "",
+"c4 e2 78 49 c0       \ttilerelease ",},
+{{0xc4, 0xe2, 0x7a, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "",
+"c4 e2 7a 4b 0c c8    \ttilestored %tmm1,(%rax,%rcx,8)",},
+{{0xc4, 0xc2, 0x7a, 0x4b, 0x14, 0xc8, }, 6, 0, "", "",
+"c4 c2 7a 4b 14 c8    \ttilestored %tmm2,(%r8,%rcx,8)",},
+{{0xc4, 0xe2, 0x7b, 0x49, 0xc0, }, 5, 0, "", "",
+"c4 e2 7b 49 c0       \ttilezero %tmm0",},
+{{0xc4, 0xe2, 0x7b, 0x49, 0xf8, }, 5, 0, "", "",
+"c4 e2 7b 49 f8       \ttilezero %tmm7",},
+{{0xf3, 0x0f, 0x01, 0xee, }, 4, 0, "", "",
+"f3 0f 01 ee          \tclui   ",},
+{{0xf3, 0x0f, 0xc7, 0xf0, }, 4, 0, "", "",
+"f3 0f c7 f0          \tsenduipi %rax",},
+{{0xf3, 0x41, 0x0f, 0xc7, 0xf0, }, 5, 0, "", "",
+"f3 41 0f c7 f0       \tsenduipi %r8",},
+{{0xf3, 0x0f, 0x01, 0xef, }, 4, 0, "", "",
+"f3 0f 01 ef          \tstui   ",},
+{{0xf3, 0x0f, 0x01, 0xed, }, 4, 0, "", "",
+"f3 0f 01 ed          \ttestui ",},
+{{0xf3, 0x0f, 0x01, 0xec, }, 4, 0, "", "",
+"f3 0f 01 ec          \tuiret  ",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 58 cb    \tvaddph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 58 cb    \tvaddph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 58 cb    \tvaddph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 58 cb    \tvaddsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 48 c2 eb 12 \tvcmple_oqph %zmm3,%zmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%zmm2,%k5",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%zmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 c2 eb 12 \tvcmple_oqph %xmm3,%xmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%xmm2,%k5",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%xmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 28 c2 eb 12 \tvcmple_oqph %ymm3,%ymm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%ymm2,%k5",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%ymm2,%k5",},
+{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6e 08 c2 eb 12 \tvcmple_oqsh %xmm3,%xmm2,%k5",},
+{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%rax,%rcx,8),%xmm2,%k5",},
+{{0x67, 0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%eax,%ecx,8),%xmm2,%k5",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 2f ca    \tvcomish %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 5b ca    \tvcvtdq2ph %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 5b ca    \tvcvtdq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 5b ca    \tvcvtdq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x48, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 48 5a ca    \tvcvtpd2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x08, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 08 5a ca    \tvcvtpd2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x28, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 28 5a ca    \tvcvtpd2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 5b ca    \tvcvtph2dq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 5b ca    \tvcvtph2dq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 5b ca    \tvcvtph2dq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 5a ca    \tvcvtph2pd %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 5a ca    \tvcvtph2pd %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 5a ca    \tvcvtph2pd %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "",
+"62 f2 7d 48 13 ca    \tvcvtph2ps %ymm2,%zmm1",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 79 13 ca       \tvcvtph2ps %xmm2,%xmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"67 c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 7d 13 ca       \tvcvtph2ps %xmm2,%ymm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"67 c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 79 13 ca       \tvcvtph2ps %xmm2,%xmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"67 c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 7d 13 ca       \tvcvtph2ps %xmm2,%ymm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"67 c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 13 ca    \tvcvtph2psx %ymm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 13 ca    \tvcvtph2psx %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 13 ca    \tvcvtph2psx %xmm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7b ca    \tvcvtph2qq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7b ca    \tvcvtph2qq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7b ca    \tvcvtph2qq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 79 ca    \tvcvtph2udq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 79 ca    \tvcvtph2udq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 79 ca    \tvcvtph2udq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 79 ca    \tvcvtph2uqq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 79 ca    \tvcvtph2uqq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 79 ca    \tvcvtph2uqq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 7d ca    \tvcvtph2uw %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 7d ca    \tvcvtph2uw %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 7d ca    \tvcvtph2uw %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7d ca    \tvcvtph2w %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7d ca    \tvcvtph2w %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7d ca    \tvcvtph2w %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0xd1, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 1d d1 12 \tvcvtps2ph $0x12,%zmm2,%ymm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"67 c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"67 c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 79 1d d1 12    \tvcvtps2ph $0x12,%xmm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 7d 1d d1 12    \tvcvtps2ph $0x12,%ymm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 7d 1d d1 12    \tvcvtps2ph $0x12,%ymm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"67 c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 79 1d d1 12    \tvcvtps2ph $0x12,%xmm2,%xmm1",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"67 c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 1d ca    \tvcvtps2phx %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 1d ca    \tvcvtps2phx %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 1d ca    \tvcvtps2phx %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 48 5b ca    \tvcvtqq2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 08 5b ca    \tvcvtqq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 28 5b ca    \tvcvtqq2ph %ymm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0xef, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 ef 08 5a 8c c8 78 56 34 12 \tvcvtsd2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5a 8c c8 78 56 34 12 \tvcvtsh2sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%eax",},
+{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 fe 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%rax",},
+{{0x67, 0x62, 0xf6, 0x6c, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6c 08 13 8c c8 78 56 34 12 \tvcvtsh2ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 79 c1    \tvcvtsh2usi %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%rax,%rcx,8),%eax",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x79, 0xc1, }, 6, 0, "", "",
+"62 f5 fe 08 79 c1    \tvcvtsh2usi %xmm1,%rax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 fe 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%rax,%rcx,8),%rax",},
+{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 fe 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%rax",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0xc8, }, 6, 0, "", "",
+"62 f5 6e 08 2a c8    \tvcvtsi2sh %eax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0xee, 0x08, 0x2a, 0xc8, }, 6, 0, "", "",
+"62 f5 ee 08 2a c8    \tvcvtsi2sh %rax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 1d cb    \tvcvtss2sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 48 5b ca    \tvcvttph2dq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 08 5b ca    \tvcvttph2dq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 28 5b ca    \tvcvttph2dq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7a ca    \tvcvttph2qq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7a ca    \tvcvttph2qq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7a ca    \tvcvttph2qq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 78 ca    \tvcvttph2udq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 78 ca    \tvcvttph2udq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 78 ca    \tvcvttph2udq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 78 ca    \tvcvttph2uqq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 78 ca    \tvcvttph2uqq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 78 ca    \tvcvttph2uqq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 7c ca    \tvcvttph2uw %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 7c ca    \tvcvttph2uw %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 7c ca    \tvcvttph2uw %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7c ca    \tvcvttph2w %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7c ca    \tvcvttph2w %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7c ca    \tvcvttph2w %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 2c c1    \tvcvttsh2si %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%rax,%rcx,8),%eax",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x2c, 0xc1, }, 6, 0, "", "",
+"62 f5 fe 08 2c c1    \tvcvttsh2si %xmm1,%rax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 fe 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%rax,%rcx,8),%rax",},
+{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 fe 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%rax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 78 c1    \tvcvttsh2usi %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%rax,%rcx,8),%eax",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x78, 0xc1, }, 6, 0, "", "",
+"62 f5 fe 08 78 c1    \tvcvttsh2usi %xmm1,%rax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 fe 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%rax,%rcx,8),%rax",},
+{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 fe 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%rax",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 48 7a ca    \tvcvtudq2ph %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 08 7a ca    \tvcvtudq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 28 7a ca    \tvcvtudq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 48 7a ca    \tvcvtuqq2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 08 7a ca    \tvcvtuqq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 28 7a ca    \tvcvtuqq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0xc8, }, 6, 0, "", "",
+"62 f5 6e 08 7b c8    \tvcvtusi2sh %eax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0xee, 0x08, 0x7b, 0xc8, }, 6, 0, "", "",
+"62 f5 ee 08 7b c8    \tvcvtusi2sh %rax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 48 7d ca    \tvcvtuw2ph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 08 7d ca    \tvcvtuw2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 28 7d ca    \tvcvtuw2ph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 48 7d ca    \tvcvtw2ph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 08 7d ca    \tvcvtw2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 28 7d ca    \tvcvtw2ph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5e cb    \tvdivph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5e cb    \tvdivph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5e cb    \tvdivph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5e cb    \tvdivsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 48 56 cb    \tvfcmaddcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 56 cb    \tvfcmaddcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 28 56 cb    \tvfcmaddcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 57 cb    \tvfcmaddcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 48 d6 cb    \tvfcmulcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 d6 cb    \tvfcmulcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 28 d6 cb    \tvfcmulcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 d7 cb    \tvfcmulcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 98 cb    \tvfmadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 98 cb    \tvfmadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 98 cb    \tvfmadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 99 cb    \tvfmadd132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a8 cb    \tvfmadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a8 cb    \tvfmadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a8 cb    \tvfmadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a9 cb    \tvfmadd213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b8 cb    \tvfmadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b8 cb    \tvfmadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b8 cb    \tvfmadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b9 cb    \tvfmadd231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 48 56 cb    \tvfmaddcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 56 cb    \tvfmaddcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 28 56 cb    \tvfmaddcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 57 cb    \tvfmaddcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 96 cb    \tvfmaddsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 96 cb    \tvfmaddsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 96 cb    \tvfmaddsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a6 cb    \tvfmaddsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a6 cb    \tvfmaddsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a6 cb    \tvfmaddsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b6 cb    \tvfmaddsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b6 cb    \tvfmaddsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b6 cb    \tvfmaddsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9a cb    \tvfmsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9a cb    \tvfmsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9a cb    \tvfmsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9b cb    \tvfmsub132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 aa cb    \tvfmsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 aa cb    \tvfmsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 aa cb    \tvfmsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ab cb    \tvfmsub213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ba cb    \tvfmsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ba cb    \tvfmsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ba cb    \tvfmsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bb cb    \tvfmsub231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 97 cb    \tvfmsubadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 97 cb    \tvfmsubadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 97 cb    \tvfmsubadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a7 cb    \tvfmsubadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a7 cb    \tvfmsubadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a7 cb    \tvfmsubadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b7 cb    \tvfmsubadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b7 cb    \tvfmsubadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b7 cb    \tvfmsubadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 48 d6 cb    \tvfmulcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 d6 cb    \tvfmulcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 28 d6 cb    \tvfmulcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 d7 cb    \tvfmulcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9c cb    \tvfnmadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9c cb    \tvfnmadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9c cb    \tvfnmadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9d cb    \tvfnmadd132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ac cb    \tvfnmadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ac cb    \tvfnmadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ac cb    \tvfnmadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ad cb    \tvfnmadd213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 bc cb    \tvfnmadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bc cb    \tvfnmadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 bc cb    \tvfnmadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bd cb    \tvfnmadd231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9e cb    \tvfnmsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9e cb    \tvfnmsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9e cb    \tvfnmsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9f cb    \tvfnmsub132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ae cb    \tvfnmsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ae cb    \tvfnmsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ae cb    \tvfnmsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 af cb    \tvfnmsub213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 be cb    \tvfnmsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 be cb    \tvfnmsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 be cb    \tvfnmsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bf cb    \tvfnmsub231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 66 e9 12 \tvfpclassph $0x12,%zmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 66 e9 12 \tvfpclassph $0x12,%xmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 66 e9 12 \tvfpclassph $0x12,%ymm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 67 e9 12 \tvfpclasssh $0x12,%xmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%rax,%rcx,8),%k5",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%eax,%ecx,8),%k5",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 42 ca    \tvgetexpph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 42 ca    \tvgetexpph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 42 ca    \tvgetexpph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 43 cb    \tvgetexpsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 26 ca 12 \tvgetmantph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 26 ca 12 \tvgetmantph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 26 ca 12 \tvgetmantph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 27 cb 12 \tvgetmantsh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5f cb    \tvmaxph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5f cb    \tvmaxph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5f cb    \tvmaxph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5f cb    \tvmaxsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5d cb    \tvminph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5d cb    \tvminph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5d cb    \tvminph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5d cb    \tvminsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x10, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 10 cb    \tvmovsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8, }, 6, 0, "", "",
+"62 f5 7d 08 7e c8    \tvmovw  %xmm1,%eax",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw  %xmm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw  %xmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8, }, 6, 0, "", "",
+"62 f5 7d 08 6e c8    \tvmovw  %eax,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw  0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw  0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 59 cb    \tvmulph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 59 cb    \tvmulph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 59 cb    \tvmulph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 59 cb    \tvmulsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 4c ca    \tvrcpph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 4c ca    \tvrcpph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 4c ca    \tvrcpph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 4d cb    \tvrcpsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 56 ca 12 \tvreduceph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 56 ca 12 \tvreduceph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 56 ca 12 \tvreduceph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 57 cb 12 \tvreducesh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 08 ca 12 \tvrndscaleph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 08 ca 12 \tvrndscaleph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 08 ca 12 \tvrndscaleph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 0a cb 12 \tvrndscalesh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 4e ca    \tvrsqrtph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 4e ca    \tvrsqrtph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 4e ca    \tvrsqrtph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 4f cb    \tvrsqrtsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 2c cb    \tvscalefph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 2c cb    \tvscalefph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 2c cb    \tvscalefph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 2d cb    \tvscalefsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 51 ca    \tvsqrtph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 51 ca    \tvsqrtph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 51 ca    \tvsqrtph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 51 cb    \tvsqrtsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5c cb    \tvsubph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5c cb    \tvsubph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5c cb    \tvsubph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5c cb    \tvsubsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 2e ca    \tvucomish %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "",
+"f3 0f 3a f0 c0 00    \threset $0x0",},
+{{0x0f, 0x01, 0xe8, }, 3, 0, "", "",
+"0f 01 e8             \tserialize ",},
+{{0xf2, 0x0f, 0x01, 0xe9, }, 4, 0, "", "",
+"f2 0f 01 e9          \txresldtrk ",},
+{{0xf2, 0x0f, 0x01, 0xe8, }, 4, 0, "", "",
+"f2 0f 01 e8          \txsusldtrk ",},
 {{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
 "0f 01 cf             \tencls  ",},
 {{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index c3808e94c46e..a391464c8dee 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -1910,6 +1910,724 @@ int main(void)
 	asm volatile("notrack bnd jmpq *0x12345678(%rax,%rcx,8)");	/* Expecting: jmp indirect 0 */
 	asm volatile("notrack bnd jmpq *0x12345678(%r8,%rcx,8)");	/* Expecting: jmp indirect 0 */
 
+	/* AMX */
+
+	asm volatile("ldtilecfg (%rax,%rcx,8)");
+	asm volatile("ldtilecfg (%r8,%rcx,8)");
+	asm volatile("sttilecfg (%rax,%rcx,8)");
+	asm volatile("sttilecfg (%r8,%rcx,8)");
+	asm volatile("tdpbf16ps %tmm0, %tmm1, %tmm2");
+	asm volatile("tdpbssd %tmm0, %tmm1, %tmm2");
+	asm volatile("tdpbsud %tmm0, %tmm1, %tmm2");
+	asm volatile("tdpbusd %tmm0, %tmm1, %tmm2");
+	asm volatile("tdpbuud %tmm0, %tmm1, %tmm2");
+	asm volatile("tileloadd (%rax,%rcx,8), %tmm1");
+	asm volatile("tileloadd (%r8,%rcx,8), %tmm2");
+	asm volatile("tileloaddt1 (%rax,%rcx,8), %tmm1");
+	asm volatile("tileloaddt1 (%r8,%rcx,8), %tmm2");
+	asm volatile("tilerelease");
+	asm volatile("tilestored %tmm1, (%rax,%rcx,8)");
+	asm volatile("tilestored %tmm2, (%r8,%rcx,8)");
+	asm volatile("tilezero %tmm0");
+	asm volatile("tilezero %tmm7");
+
+	/* User Interrupt */
+
+	asm volatile("clui");
+	asm volatile("senduipi %rax");
+	asm volatile("senduipi %r8");
+	asm volatile("stui");
+	asm volatile("testui");
+	asm volatile("uiret");
+
+	/* AVX512-FP16 */
+
+	asm volatile("vaddph %zmm3, %zmm2, %zmm1");
+	asm volatile("vaddph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vaddph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vaddph %xmm3, %xmm2, %xmm1");
+	asm volatile("vaddph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vaddph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vaddph %ymm3, %ymm2, %ymm1");
+	asm volatile("vaddph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vaddph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vaddsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vaddsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vaddsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcmpph $0x12, %zmm3, %zmm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %zmm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %zmm2, %k5");
+	asm volatile("vcmpph $0x12, %xmm3, %xmm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5");
+	asm volatile("vcmpph $0x12, %ymm3, %ymm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %ymm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %ymm2, %k5");
+	asm volatile("vcmpsh $0x12, %xmm3, %xmm2, %k5");
+	asm volatile("vcmpsh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %k5");
+	asm volatile("vcmpsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5");
+	asm volatile("vcomish %xmm2, %xmm1");
+	asm volatile("vcomish 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcomish 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtdq2ph %zmm2, %ymm1");
+	asm volatile("vcvtdq2ph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtdq2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtdq2ph %xmm2, %xmm1");
+	asm volatile("vcvtdq2ph %ymm2, %xmm1");
+	asm volatile("vcvtpd2ph %zmm2, %xmm1");
+	asm volatile("vcvtpd2ph %xmm2, %xmm1");
+	asm volatile("vcvtpd2ph %ymm2, %xmm1");
+	asm volatile("vcvtph2dq %ymm2, %zmm1");
+	asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2dq %xmm2, %xmm1");
+	asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2dq %xmm2, %ymm1");
+	asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2pd %xmm2, %zmm1");
+	asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2pd %xmm2, %xmm1");
+	asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2pd %xmm2, %ymm1");
+	asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2ps %ymm2, %zmm1");
+	asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2ps %xmm2, %xmm1");
+	asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2ps %xmm2, %ymm1");
+	asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2ps %xmm2, %xmm1");
+	asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2ps %xmm2, %ymm1");
+	asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2psx %ymm2, %zmm1");
+	asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2psx %xmm2, %xmm1");
+	asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2psx %xmm2, %ymm1");
+	asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2qq %xmm2, %zmm1");
+	asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2qq %xmm2, %xmm1");
+	asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2qq %xmm2, %ymm1");
+	asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2udq %ymm2, %zmm1");
+	asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2udq %xmm2, %xmm1");
+	asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2udq %xmm2, %ymm1");
+	asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2uqq %xmm2, %zmm1");
+	asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2uqq %xmm2, %xmm1");
+	asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2uqq %xmm2, %ymm1");
+	asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2uw %zmm2, %zmm1");
+	asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2uw %xmm2, %xmm1");
+	asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2uw %ymm2, %ymm1");
+	asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2w %zmm2, %zmm1");
+	asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2w %xmm2, %xmm1");
+	asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2w %ymm2, %ymm1");
+	asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%rax,%rcx,8)");
+	asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %zmm2, %ymm1");
+	asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%rax,%rcx,8)");
+	asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%rax,%rcx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%rax,%rcx,8)");
+	asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%rax,%rcx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2phx %zmm2, %ymm1");
+	asm volatile("vcvtps2phx 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtps2phx 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtps2phx %xmm2, %xmm1");
+	asm volatile("vcvtps2phx %ymm2, %xmm1");
+	asm volatile("vcvtqq2ph %zmm2, %xmm1");
+	asm volatile("vcvtqq2ph %xmm2, %xmm1");
+	asm volatile("vcvtqq2ph %ymm2, %xmm1");
+	asm volatile("vcvtsd2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsh2sd 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %rax");
+	asm volatile("vcvtsh2ss 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsh2usi %xmm1, %eax");
+	asm volatile("vcvtsh2usi 0x12345678(%rax,%rcx,8), %eax");
+	asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvtsh2usi %xmm1, %rax");
+	asm volatile("vcvtsh2usi 0x12345678(%rax,%rcx,8), %rax");
+	asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %rax");
+	asm volatile("vcvtsi2sh %eax, %xmm2, %xmm1");
+	asm volatile("vcvtsi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsi2sh %rax, %xmm2, %xmm1");
+	asm volatile("vcvtsi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtss2sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vcvtss2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vcvtss2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvttph2dq %ymm2, %zmm1");
+	asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2dq %xmm2, %xmm1");
+	asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2dq %xmm2, %ymm1");
+	asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2qq %xmm2, %zmm1");
+	asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2qq %xmm2, %xmm1");
+	asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2qq %xmm2, %ymm1");
+	asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2udq %ymm2, %zmm1");
+	asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2udq %xmm2, %xmm1");
+	asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2udq %xmm2, %ymm1");
+	asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2uqq %xmm2, %zmm1");
+	asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2uqq %xmm2, %xmm1");
+	asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2uqq %xmm2, %ymm1");
+	asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2uw %zmm2, %zmm1");
+	asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2uw %xmm2, %xmm1");
+	asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2uw %ymm2, %ymm1");
+	asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2w %zmm2, %zmm1");
+	asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2w %xmm2, %xmm1");
+	asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2w %ymm2, %ymm1");
+	asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttsh2si %xmm1, %eax");
+	asm volatile("vcvttsh2si 0x12345678(%rax,%rcx,8), %eax");
+	asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvttsh2si %xmm1, %rax");
+	asm volatile("vcvttsh2si 0x12345678(%rax,%rcx,8), %rax");
+	asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %rax");
+	asm volatile("vcvttsh2usi %xmm1, %eax");
+	asm volatile("vcvttsh2usi 0x12345678(%rax,%rcx,8), %eax");
+	asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvttsh2usi %xmm1, %rax");
+	asm volatile("vcvttsh2usi 0x12345678(%rax,%rcx,8), %rax");
+	asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %rax");
+	asm volatile("vcvtudq2ph %zmm2, %ymm1");
+	asm volatile("vcvtudq2ph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtudq2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtudq2ph %xmm2, %xmm1");
+	asm volatile("vcvtudq2ph %ymm2, %xmm1");
+	asm volatile("vcvtuqq2ph %zmm2, %xmm1");
+	asm volatile("vcvtuqq2ph %xmm2, %xmm1");
+	asm volatile("vcvtuqq2ph %ymm2, %xmm1");
+	asm volatile("vcvtusi2sh %eax, %xmm2, %xmm1");
+	asm volatile("vcvtusi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtusi2sh %rax, %xmm2, %xmm1");
+	asm volatile("vcvtusi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtuw2ph %zmm2, %zmm1");
+	asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtuw2ph %xmm2, %xmm1");
+	asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtuw2ph %ymm2, %ymm1");
+	asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtw2ph %zmm2, %zmm1");
+	asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtw2ph %xmm2, %xmm1");
+	asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtw2ph %ymm2, %ymm1");
+	asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vdivph %zmm3, %zmm2, %zmm1");
+	asm volatile("vdivph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vdivph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vdivph %xmm3, %xmm2, %xmm1");
+	asm volatile("vdivph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vdivph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vdivph %ymm3, %ymm2, %ymm1");
+	asm volatile("vdivph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vdivph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vdivsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vdivsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vdivsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmaddcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfcmaddcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmaddcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfcmaddcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmaddcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfcmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmulcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfcmulcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmulcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfcmulcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmulcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfcmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddsub213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddsub231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsubadd213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsubadd231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmulcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmulcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmulcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmulcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmulcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfpclassph $0x12, %zmm1, %k5");
+	asm volatile("vfpclassph $0x12, %xmm1, %k5");
+	asm volatile("vfpclassph $0x12, %ymm1, %k5");
+	asm volatile("vfpclasssh $0x12, %xmm1, %k5");
+	asm volatile("vfpclasssh $0x12, 0x12345678(%rax,%rcx,8), %k5");
+	asm volatile("vfpclasssh $0x12, 0x12345678(%eax,%ecx,8), %k5");
+	asm volatile("vgetexpph %zmm2, %zmm1");
+	asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vgetexpph %xmm2, %xmm1");
+	asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vgetexpph %ymm2, %ymm1");
+	asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vgetexpsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vgetexpsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vgetexpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vgetmantph $0x12, %zmm2, %zmm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vgetmantph $0x12, %xmm2, %xmm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vgetmantph $0x12, %ymm2, %ymm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vgetmantsh $0x12, %xmm3, %xmm2, %xmm1");
+	asm volatile("vgetmantsh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vgetmantsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmaxph %zmm3, %zmm2, %zmm1");
+	asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vmaxph %xmm3, %xmm2, %xmm1");
+	asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmaxph %ymm3, %ymm2, %ymm1");
+	asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vmaxsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vmaxsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vmaxsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vminph %zmm3, %zmm2, %zmm1");
+	asm volatile("vminph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vminph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vminph %xmm3, %xmm2, %xmm1");
+	asm volatile("vminph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vminph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vminph %ymm3, %ymm2, %ymm1");
+	asm volatile("vminph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vminph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vminsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vminsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vminsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmovsh %xmm1, 0x12345678(%rax,%rcx,8)");
+	asm volatile("vmovsh %xmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vmovsh 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vmovsh 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vmovsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vmovw %xmm1, %eax");
+	asm volatile("vmovw %xmm1, 0x12345678(%rax,%rcx,8)");
+	asm volatile("vmovw %xmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vmovw %eax, %xmm1");
+	asm volatile("vmovw 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vmovw 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vmulph %zmm3, %zmm2, %zmm1");
+	asm volatile("vmulph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vmulph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vmulph %xmm3, %xmm2, %xmm1");
+	asm volatile("vmulph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vmulph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmulph %ymm3, %ymm2, %ymm1");
+	asm volatile("vmulph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vmulph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vmulsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vmulsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vmulsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vrcpph %zmm2, %zmm1");
+	asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vrcpph %xmm2, %xmm1");
+	asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vrcpph %ymm2, %ymm1");
+	asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vrcpsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vrcpsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vrcpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vreduceph $0x12, %zmm2, %zmm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vreduceph $0x12, %xmm2, %xmm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vreduceph $0x12, %ymm2, %ymm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vreducesh $0x12, %xmm3, %xmm2, %xmm1");
+	asm volatile("vreducesh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vreducesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vrndscaleph $0x12, %zmm2, %zmm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vrndscaleph $0x12, %xmm2, %xmm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vrndscaleph $0x12, %ymm2, %ymm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vrndscalesh $0x12, %xmm3, %xmm2, %xmm1");
+	asm volatile("vrndscalesh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vrndscalesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vrsqrtph %zmm2, %zmm1");
+	asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vrsqrtph %xmm2, %xmm1");
+	asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vrsqrtph %ymm2, %ymm1");
+	asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vrsqrtsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vrsqrtsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vrsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vscalefph %zmm3, %zmm2, %zmm1");
+	asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vscalefph %xmm3, %xmm2, %xmm1");
+	asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vscalefph %ymm3, %ymm2, %ymm1");
+	asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vscalefsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vscalefsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vscalefsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vsqrtph %zmm2, %zmm1");
+	asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vsqrtph %xmm2, %xmm1");
+	asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vsqrtph %ymm2, %ymm1");
+	asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vsqrtsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vsqrtsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vsubph %zmm3, %zmm2, %zmm1");
+	asm volatile("vsubph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vsubph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vsubph %xmm3, %xmm2, %xmm1");
+	asm volatile("vsubph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vsubph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vsubph %ymm3, %ymm2, %ymm1");
+	asm volatile("vsubph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vsubph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vsubsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vsubsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vsubsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vucomish %xmm2, %xmm1");
+	asm volatile("vucomish 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vucomish 0x12345678(%eax,%ecx,8), %xmm1");
+
 #else  /* #ifdef __x86_64__ */
 
 	/* bound r32, mem (same op code as EVEX prefix) */
@@ -3670,8 +4388,479 @@ int main(void)
 	asm volatile("notrack bnd jmp *(0x12345678)");		/* Expecting: jmp indirect 0 */
 	asm volatile("notrack bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
 
+	/* AVX512-FP16 */
+
+	asm volatile("vaddph %zmm3, %zmm2, %zmm1");
+	asm volatile("vaddph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vaddph %xmm3, %xmm2, %xmm1");
+	asm volatile("vaddph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vaddph %ymm3, %ymm2, %ymm1");
+	asm volatile("vaddph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vaddsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vaddsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcmpph $0x12, %zmm3, %zmm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %zmm2, %k5");
+	asm volatile("vcmpph $0x12, %xmm3, %xmm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5");
+	asm volatile("vcmpph $0x12, %ymm3, %ymm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %ymm2, %k5");
+	asm volatile("vcmpsh $0x12, %xmm3, %xmm2, %k5");
+	asm volatile("vcmpsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5");
+	asm volatile("vcomish %xmm2, %xmm1");
+	asm volatile("vcomish 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtdq2ph %zmm2, %ymm1");
+	asm volatile("vcvtdq2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtdq2ph %xmm2, %xmm1");
+	asm volatile("vcvtdq2ph %ymm2, %xmm1");
+	asm volatile("vcvtpd2ph %zmm2, %xmm1");
+	asm volatile("vcvtpd2ph %xmm2, %xmm1");
+	asm volatile("vcvtpd2ph %ymm2, %xmm1");
+	asm volatile("vcvtph2dq %ymm2, %zmm1");
+	asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2dq %xmm2, %xmm1");
+	asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2dq %xmm2, %ymm1");
+	asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2pd %xmm2, %zmm1");
+	asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2pd %xmm2, %xmm1");
+	asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2pd %xmm2, %ymm1");
+	asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2ps %ymm2, %zmm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2ps %xmm2, %xmm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2ps %xmm2, %ymm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2ps %xmm2, %xmm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2ps %xmm2, %ymm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2psx %ymm2, %zmm1");
+	asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2psx %xmm2, %xmm1");
+	asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2psx %xmm2, %ymm1");
+	asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2qq %xmm2, %zmm1");
+	asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2qq %xmm2, %xmm1");
+	asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2qq %xmm2, %ymm1");
+	asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2udq %ymm2, %zmm1");
+	asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2udq %xmm2, %xmm1");
+	asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2udq %xmm2, %ymm1");
+	asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2uqq %xmm2, %zmm1");
+	asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2uqq %xmm2, %xmm1");
+	asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2uqq %xmm2, %ymm1");
+	asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2uw %zmm2, %zmm1");
+	asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2uw %xmm2, %xmm1");
+	asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2uw %ymm2, %ymm1");
+	asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2w %zmm2, %zmm1");
+	asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2w %xmm2, %xmm1");
+	asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2w %ymm2, %ymm1");
+	asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %zmm2, %ymm1");
+	asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2phx %zmm2, %ymm1");
+	asm volatile("vcvtps2phx 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtps2phx %xmm2, %xmm1");
+	asm volatile("vcvtps2phx %ymm2, %xmm1");
+	asm volatile("vcvtqq2ph %zmm2, %xmm1");
+	asm volatile("vcvtqq2ph %xmm2, %xmm1");
+	asm volatile("vcvtqq2ph %ymm2, %xmm1");
+	asm volatile("vcvtsd2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsh2sd 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvtsh2ss 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsh2usi %xmm1, %eax");
+	asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvtsi2sh %eax, %xmm2, %xmm1");
+	asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtss2sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vcvtss2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvttph2dq %ymm2, %zmm1");
+	asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2dq %xmm2, %xmm1");
+	asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2dq %xmm2, %ymm1");
+	asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2qq %xmm2, %zmm1");
+	asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2qq %xmm2, %xmm1");
+	asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2qq %xmm2, %ymm1");
+	asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2udq %ymm2, %zmm1");
+	asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2udq %xmm2, %xmm1");
+	asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2udq %xmm2, %ymm1");
+	asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2uqq %xmm2, %zmm1");
+	asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2uqq %xmm2, %xmm1");
+	asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2uqq %xmm2, %ymm1");
+	asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2uw %zmm2, %zmm1");
+	asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2uw %xmm2, %xmm1");
+	asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2uw %ymm2, %ymm1");
+	asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2w %zmm2, %zmm1");
+	asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2w %xmm2, %xmm1");
+	asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2w %ymm2, %ymm1");
+	asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttsh2si %xmm1, %eax");
+	asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvttsh2usi %xmm1, %eax");
+	asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvtudq2ph %zmm2, %ymm1");
+	asm volatile("vcvtudq2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtudq2ph %xmm2, %xmm1");
+	asm volatile("vcvtudq2ph %ymm2, %xmm1");
+	asm volatile("vcvtuqq2ph %zmm2, %xmm1");
+	asm volatile("vcvtuqq2ph %xmm2, %xmm1");
+	asm volatile("vcvtuqq2ph %ymm2, %xmm1");
+	asm volatile("vcvtusi2sh %eax, %xmm2, %xmm1");
+	asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtuw2ph %zmm2, %zmm1");
+	asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtuw2ph %xmm2, %xmm1");
+	asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtuw2ph %ymm2, %ymm1");
+	asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtw2ph %zmm2, %zmm1");
+	asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtw2ph %xmm2, %xmm1");
+	asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtw2ph %ymm2, %ymm1");
+	asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vdivph %zmm3, %zmm2, %zmm1");
+	asm volatile("vdivph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vdivph %xmm3, %xmm2, %xmm1");
+	asm volatile("vdivph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vdivph %ymm3, %ymm2, %ymm1");
+	asm volatile("vdivph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vdivsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vdivsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmaddcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfcmaddcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmaddcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfcmaddcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmulcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfcmulcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmulcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfcmulcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddsub213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddsub231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsubadd213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsubadd231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmulcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmulcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmulcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmulcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfpclassph $0x12, %zmm1, %k5");
+	asm volatile("vfpclassph $0x12, %xmm1, %k5");
+	asm volatile("vfpclassph $0x12, %ymm1, %k5");
+	asm volatile("vfpclasssh $0x12, %xmm1, %k5");
+	asm volatile("vfpclasssh $0x12, 0x12345678(%eax,%ecx,8), %k5");
+	asm volatile("vgetexpph %zmm2, %zmm1");
+	asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vgetexpph %xmm2, %xmm1");
+	asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vgetexpph %ymm2, %ymm1");
+	asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vgetexpsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vgetexpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vgetmantph $0x12, %zmm2, %zmm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vgetmantph $0x12, %xmm2, %xmm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vgetmantph $0x12, %ymm2, %ymm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vgetmantsh $0x12, %xmm3, %xmm2, %xmm1");
+	asm volatile("vgetmantsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmaxph %zmm3, %zmm2, %zmm1");
+	asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vmaxph %xmm3, %xmm2, %xmm1");
+	asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmaxph %ymm3, %ymm2, %ymm1");
+	asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vmaxsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vmaxsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vminph %zmm3, %zmm2, %zmm1");
+	asm volatile("vminph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vminph %xmm3, %xmm2, %xmm1");
+	asm volatile("vminph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vminph %ymm3, %ymm2, %ymm1");
+	asm volatile("vminph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vminsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vminsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmovsh %xmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vmovsh 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vmovsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vmovw %xmm1, %eax");
+	asm volatile("vmovw %xmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vmovw %eax, %xmm1");
+	asm volatile("vmovw 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vmulph %zmm3, %zmm2, %zmm1");
+	asm volatile("vmulph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vmulph %xmm3, %xmm2, %xmm1");
+	asm volatile("vmulph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmulph %ymm3, %ymm2, %ymm1");
+	asm volatile("vmulph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vmulsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vmulsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vrcpph %zmm2, %zmm1");
+	asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vrcpph %xmm2, %xmm1");
+	asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vrcpph %ymm2, %ymm1");
+	asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vrcpsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vrcpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vreduceph $0x12, %zmm2, %zmm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vreduceph $0x12, %xmm2, %xmm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vreduceph $0x12, %ymm2, %ymm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vreducesh $0x12, %xmm3, %xmm2, %xmm1");
+	asm volatile("vreducesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vrndscaleph $0x12, %zmm2, %zmm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vrndscaleph $0x12, %xmm2, %xmm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vrndscaleph $0x12, %ymm2, %ymm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vrndscalesh $0x12, %xmm3, %xmm2, %xmm1");
+	asm volatile("vrndscalesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vrsqrtph %zmm2, %zmm1");
+	asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vrsqrtph %xmm2, %xmm1");
+	asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vrsqrtph %ymm2, %ymm1");
+	asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vrsqrtsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vrsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vscalefph %zmm3, %zmm2, %zmm1");
+	asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vscalefph %xmm3, %xmm2, %xmm1");
+	asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vscalefph %ymm3, %ymm2, %ymm1");
+	asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vscalefsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vscalefsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vsqrtph %zmm2, %zmm1");
+	asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vsqrtph %xmm2, %xmm1");
+	asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vsqrtph %ymm2, %ymm1");
+	asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vsqrtsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vsubph %zmm3, %zmm2, %zmm1");
+	asm volatile("vsubph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vsubph %xmm3, %xmm2, %xmm1");
+	asm volatile("vsubph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vsubph %ymm3, %ymm2, %ymm1");
+	asm volatile("vsubph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vsubsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vsubsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vucomish %xmm2, %xmm1");
+	asm volatile("vucomish 0x12345678(%eax,%ecx,8), %xmm1");
+
 #endif /* #ifndef __x86_64__ */
 
+	/* Prediction history reset */
+
+	asm volatile("hreset $0");
+
+	/* Serialize instruction execution */
+
+	asm volatile("serialize");
+
+	/* TSX suspend load address tracking */
+
+	asm volatile("xresldtrk");
+	asm volatile("xsusldtrk");
+
 	/* SGX */
 
 	asm volatile("encls");
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index f924246eff78..8d9b55959256 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -29,7 +29,7 @@ struct evsel *arch_evlist__leader(struct list_head *list)
 
 	__evlist__for_each_entry(list, evsel) {
 		if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") &&
-			evsel->name && strstr(evsel->name, "slots"))
+			evsel->name && strcasestr(evsel->name, "slots"))
 			return evsel;
 	}
 	return first;
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index 740ae764537e..134612bde0cb 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -106,7 +106,7 @@ static void nest_epollfd(void)
 	printinfo("Nesting level(s): %d\n", nested);
 
 	epollfdp = calloc(nested, sizeof(int));
-	if (!epollfd)
+	if (!epollfdp)
 		err(EXIT_FAILURE, "calloc");
 
 	for (i = 0; i < nested; i++) {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index eaad04e1672a..41a66a48cbdf 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -346,7 +346,7 @@ struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affin
 {
 	struct evlist_cpu_iterator itr = {
 		.container = evlist,
-		.evsel = evlist__first(evlist),
+		.evsel = NULL,
 		.cpu_map_idx = 0,
 		.evlist_cpu_map_idx = 0,
 		.evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus),
@@ -354,16 +354,22 @@ struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affin
 		.affinity = affinity,
 	};
 
-	if (itr.affinity) {
-		itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0);
-		affinity__set(itr.affinity, itr.cpu.cpu);
-		itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu);
-		/*
-		 * If this CPU isn't in the evsel's cpu map then advance through
-		 * the list.
-		 */
-		if (itr.cpu_map_idx == -1)
-			evlist_cpu_iterator__next(&itr);
+	if (evlist__empty(evlist)) {
+		/* Ensure the empty list doesn't iterate. */
+		itr.evlist_cpu_map_idx = itr.evlist_cpu_map_nr;
+	} else {
+		itr.evsel = evlist__first(evlist);
+		if (itr.affinity) {
+			itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0);
+			affinity__set(itr.affinity, itr.cpu.cpu);
+			itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu);
+			/*
+			 * If this CPU isn't in the evsel's cpu map then advance
+			 * through the list.
+			 */
+			if (itr.cpu_map_idx == -1)
+				evlist_cpu_iterator__next(&itr);
+		}
 	}
 	return itr;
 }
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
index 5acf053fca7d..aa0c5179836d 100644
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -50,41 +50,32 @@
 #ifndef SYM_END
 #define SYM_END(name, sym_type)				\
 	.type name sym_type ASM_NL			\
+	.set .L__sym_size_##name, .-name ASM_NL		\
 	.size name, .-name
 #endif
 
-/*
- * SYM_FUNC_START_ALIAS -- use where there are two global names for one
- * function
- */
-#ifndef SYM_FUNC_START_ALIAS
-#define SYM_FUNC_START_ALIAS(name)			\
-	SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+/* SYM_ALIAS -- use only if you have to */
+#ifndef SYM_ALIAS
+#define SYM_ALIAS(alias, name, sym_type, linkage)			\
+	linkage(alias) ASM_NL						\
+	.set alias, name ASM_NL						\
+	.type alias sym_type ASM_NL					\
+	.set .L__sym_size_##alias, .L__sym_size_##name ASM_NL		\
+	.size alias, .L__sym_size_##alias
 #endif
 
 /* SYM_FUNC_START -- use for global functions */
 #ifndef SYM_FUNC_START
-/*
- * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two
- * later.
- */
 #define SYM_FUNC_START(name)				\
 	SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
 #endif
 
 /* SYM_FUNC_START_LOCAL -- use for local functions */
 #ifndef SYM_FUNC_START_LOCAL
-/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */
 #define SYM_FUNC_START_LOCAL(name)			\
 	SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)
 #endif
 
-/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */
-#ifndef SYM_FUNC_END_ALIAS
-#define SYM_FUNC_END_ALIAS(name)			\
-	SYM_END(name, SYM_T_FUNC)
-#endif
-
 /* SYM_FUNC_START_WEAK -- use for weak functions */
 #ifndef SYM_FUNC_START_WEAK
 #define SYM_FUNC_START_WEAK(name)			\
@@ -96,9 +87,32 @@
  * SYM_FUNC_START_WEAK, ...
  */
 #ifndef SYM_FUNC_END
-/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */
 #define SYM_FUNC_END(name)				\
 	SYM_END(name, SYM_T_FUNC)
 #endif
 
+/*
+ * SYM_FUNC_ALIAS -- define a global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS
+#define SYM_FUNC_ALIAS(alias, name)					\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_LOCAL
+#define SYM_FUNC_ALIAS_LOCAL(alias, name)				\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_WEAK
+#define SYM_FUNC_ALIAS_WEAK(alias, name)				\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK)
+#endif
+
 #endif	/* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 9739b05b999e..24997925ae00 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1648,6 +1648,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
 {
 	struct parse_events_term *term;
 	struct list_head *list = NULL;
+	struct list_head *orig_head = NULL;
 	struct perf_pmu *pmu = NULL;
 	int ok = 0;
 	char *config;
@@ -1674,7 +1675,6 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
 	}
 	list_add_tail(&term->list, head);
 
-
 	/* Add it for all PMUs that support the alias */
 	list = malloc(sizeof(struct list_head));
 	if (!list)
@@ -1687,13 +1687,15 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
 
 		list_for_each_entry(alias, &pmu->aliases, list) {
 			if (!strcasecmp(alias->name, str)) {
+				parse_events_copy_term_list(head, &orig_head);
 				if (!parse_events_add_pmu(parse_state, list,
-							  pmu->name, head,
+							  pmu->name, orig_head,
 							  true, true)) {
 					pr_debug("%s -> %s/%s/\n", str,
 						 pmu->name, alias->str);
 					ok++;
 				}
+				parse_events_terms__delete(orig_head);
 			}
 		}
 	}
@@ -2193,7 +2195,7 @@ int perf_pmu__test_parse_init(void)
 	for (i = 0; i < ARRAY_SIZE(symbols); i++, tmp++) {
 		tmp->type = symbols[i].type;
 		tmp->symbol = strdup(symbols[i].symbol);
-		if (!list->symbol)
+		if (!tmp->symbol)
 			goto err_free;
 	}
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b2ed3140a1fa..dfde9eada224 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -231,7 +231,7 @@ void symbols__fixup_end(struct rb_root_cached *symbols)
 		prev = curr;
 		curr = rb_entry(nd, struct symbol, rb_node);
 
-		if (prev->end == prev->start && prev->end != curr->start)
+		if (prev->end == prev->start || prev->end != curr->start)
 			arch__symbols__fixup_end(prev, curr);
 	}
 
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 3b1594447f29..e9b6de314654 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -143,9 +143,9 @@ UTIL_HEADERS = utils/helpers/helpers.h utils/idle_monitor/cpupower-monitor.h \
 	utils/helpers/bitmask.h \
 	utils/idle_monitor/idle_monitors.h utils/idle_monitor/idle_monitors.def
 
-LIB_HEADERS = 	lib/cpufreq.h lib/cpupower.h lib/cpuidle.h
-LIB_SRC = 	lib/cpufreq.c lib/cpupower.c lib/cpuidle.c
-LIB_OBJS = 	lib/cpufreq.o lib/cpupower.o lib/cpuidle.o
+LIB_HEADERS = 	lib/cpufreq.h lib/cpupower.h lib/cpuidle.h lib/acpi_cppc.h
+LIB_SRC = 	lib/cpufreq.c lib/cpupower.c lib/cpuidle.c lib/acpi_cppc.c
+LIB_OBJS = 	lib/cpufreq.o lib/cpupower.o lib/cpuidle.o lib/acpi_cppc.o
 LIB_OBJS :=	$(addprefix $(OUTPUT),$(LIB_OBJS))
 
 override CFLAGS +=	-pipe
diff --git a/tools/power/cpupower/ToDo b/tools/power/cpupower/TODO
index b196a139a3e4..b196a139a3e4 100644
--- a/tools/power/cpupower/ToDo
+++ b/tools/power/cpupower/TODO
diff --git a/tools/power/cpupower/lib/acpi_cppc.c b/tools/power/cpupower/lib/acpi_cppc.c
new file mode 100644
index 000000000000..c401ac331e9f
--- /dev/null
+++ b/tools/power/cpupower/lib/acpi_cppc.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "cpupower_intern.h"
+#include "acpi_cppc.h"
+
+/* ACPI CPPC sysfs access ***********************************************/
+
+static int acpi_cppc_read_file(unsigned int cpu, const char *fname,
+			       char *buf, size_t buflen)
+{
+	char path[SYSFS_PATH_MAX];
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/acpi_cppc/%s",
+		 cpu, fname);
+	return cpupower_read_sysfs(path, buf, buflen);
+}
+
+static const char * const acpi_cppc_value_files[] = {
+	[HIGHEST_PERF] = "highest_perf",
+	[LOWEST_PERF] = "lowest_perf",
+	[NOMINAL_PERF] = "nominal_perf",
+	[LOWEST_NONLINEAR_PERF] = "lowest_nonlinear_perf",
+	[LOWEST_FREQ] = "lowest_freq",
+	[NOMINAL_FREQ] = "nominal_freq",
+	[REFERENCE_PERF] = "reference_perf",
+	[WRAPAROUND_TIME] = "wraparound_time"
+};
+
+unsigned long acpi_cppc_get_data(unsigned int cpu, enum acpi_cppc_value which)
+{
+	unsigned long long value;
+	unsigned int len;
+	char linebuf[MAX_LINE_LEN];
+	char *endp;
+
+	if (which >= MAX_CPPC_VALUE_FILES)
+		return 0;
+
+	len = acpi_cppc_read_file(cpu, acpi_cppc_value_files[which],
+				  linebuf, sizeof(linebuf));
+	if (len == 0)
+		return 0;
+
+	value = strtoull(linebuf, &endp, 0);
+
+	if (endp == linebuf || errno == ERANGE)
+		return 0;
+
+	return value;
+}
diff --git a/tools/power/cpupower/lib/acpi_cppc.h b/tools/power/cpupower/lib/acpi_cppc.h
new file mode 100644
index 000000000000..85ca83080316
--- /dev/null
+++ b/tools/power/cpupower/lib/acpi_cppc.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ACPI_CPPC_H__
+#define __ACPI_CPPC_H__
+
+enum acpi_cppc_value {
+	HIGHEST_PERF,
+	LOWEST_PERF,
+	NOMINAL_PERF,
+	LOWEST_NONLINEAR_PERF,
+	LOWEST_FREQ,
+	NOMINAL_FREQ,
+	REFERENCE_PERF,
+	WRAPAROUND_TIME,
+	MAX_CPPC_VALUE_FILES
+};
+
+unsigned long acpi_cppc_get_data(unsigned int cpu,
+				 enum acpi_cppc_value which);
+
+#endif /* _ACPI_CPPC_H */
diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c
index c3b56db8b921..1516d23c17c9 100644
--- a/tools/power/cpupower/lib/cpufreq.c
+++ b/tools/power/cpupower/lib/cpufreq.c
@@ -83,20 +83,21 @@ static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = {
 	[STATS_NUM_TRANSITIONS] = "stats/total_trans"
 };
 
-
-static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
-						 enum cpufreq_value which)
+unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu,
+						 const char **table,
+						 unsigned int index,
+						 unsigned int size)
 {
 	unsigned long value;
 	unsigned int len;
 	char linebuf[MAX_LINE_LEN];
 	char *endp;
 
-	if (which >= MAX_CPUFREQ_VALUE_READ_FILES)
+	if (!table || index >= size || !table[index])
 		return 0;
 
-	len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which],
-				linebuf, sizeof(linebuf));
+	len = sysfs_cpufreq_read_file(cpu, table[index], linebuf,
+				      sizeof(linebuf));
 
 	if (len == 0)
 		return 0;
@@ -109,6 +110,14 @@ static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
 	return value;
 }
 
+static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
+						 enum cpufreq_value which)
+{
+	return cpufreq_get_sysfs_value_from_table(cpu, cpufreq_value_files,
+						  which,
+						  MAX_CPUFREQ_VALUE_READ_FILES);
+}
+
 /* read access to files which contain one string */
 
 enum cpufreq_string {
@@ -124,7 +133,7 @@ static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = {
 
 
 static char *sysfs_cpufreq_get_one_string(unsigned int cpu,
-					   enum cpufreq_string which)
+					  enum cpufreq_string which)
 {
 	char linebuf[MAX_LINE_LEN];
 	char *result;
diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h
index 95f4fd9e2656..2f3c84035806 100644
--- a/tools/power/cpupower/lib/cpufreq.h
+++ b/tools/power/cpupower/lib/cpufreq.h
@@ -203,6 +203,18 @@ int cpufreq_modify_policy_governor(unsigned int cpu, char *governor);
 int cpufreq_set_frequency(unsigned int cpu,
 				unsigned long target_frequency);
 
+/*
+ * get the sysfs value from specific table
+ *
+ * Read the value with the sysfs file name from specific table. Does
+ * only work if the cpufreq driver has the specific sysfs interfaces.
+ */
+
+unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu,
+						 const char **table,
+						 unsigned int index,
+						 unsigned int size);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1
index 6aa8d239dff9..dd545b499480 100644
--- a/tools/power/cpupower/man/cpupower-frequency-info.1
+++ b/tools/power/cpupower/man/cpupower-frequency-info.1
@@ -53,6 +53,9 @@ human\-readable output for the \-f, \-w, \-s and \-y parameters.
 \fB\-n\fR \fB\-\-no-rounding\fR
 Output frequencies and latencies without rounding off values.
 .TP  
+\fB\-c\fR \fB\-\-perf\fR
+Get performances and frequencies capabilities of CPPC, by reading it from hardware (only available on the hardware with CPPC).
+.TP
 .SH "REMARKS"
 .LP 
 By default only values of core zero are displayed. How to display settings of
diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1
index 21916cff7516..8cef3c71e19e 100644
--- a/tools/power/cpupower/man/cpupower-idle-set.1
+++ b/tools/power/cpupower/man/cpupower-idle-set.1
@@ -4,7 +4,7 @@
 cpupower\-idle\-set \- Utility to set cpu idle state specific kernel options
 .SH "SYNTAX"
 .LP
-cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP]
+cpupower [ \-c cpulist ] idle\-set [\fIoptions\fP]
 .SH "DESCRIPTION"
 .LP
 The cpupower idle\-set subcommand allows to set cpu idle, also called cpu
diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index f9895e31ff5a..0646f615fe2d 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -84,43 +84,6 @@ static void proc_cpufreq_output(void)
 }
 
 static int no_rounding;
-static void print_speed(unsigned long speed)
-{
-	unsigned long tmp;
-
-	if (no_rounding) {
-		if (speed > 1000000)
-			printf("%u.%06u GHz", ((unsigned int) speed/1000000),
-				((unsigned int) speed%1000000));
-		else if (speed > 1000)
-			printf("%u.%03u MHz", ((unsigned int) speed/1000),
-				(unsigned int) (speed%1000));
-		else
-			printf("%lu kHz", speed);
-	} else {
-		if (speed > 1000000) {
-			tmp = speed%10000;
-			if (tmp >= 5000)
-				speed += 10000;
-			printf("%u.%02u GHz", ((unsigned int) speed/1000000),
-				((unsigned int) (speed%1000000)/10000));
-		} else if (speed > 100000) {
-			tmp = speed%1000;
-			if (tmp >= 500)
-				speed += 1000;
-			printf("%u MHz", ((unsigned int) speed/1000));
-		} else if (speed > 1000) {
-			tmp = speed%100;
-			if (tmp >= 50)
-				speed += 100;
-			printf("%u.%01u MHz", ((unsigned int) speed/1000),
-				((unsigned int) (speed%1000)/100));
-		}
-	}
-
-	return;
-}
-
 static void print_duration(unsigned long duration)
 {
 	unsigned long tmp;
@@ -183,9 +146,12 @@ static int get_boost_mode_x86(unsigned int cpu)
 	printf(_("    Supported: %s\n"), support ? _("yes") : _("no"));
 	printf(_("    Active: %s\n"), active ? _("yes") : _("no"));
 
-	if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
-	     cpupower_cpu_info.family >= 0x10) ||
-	     cpupower_cpu_info.vendor == X86_VENDOR_HYGON) {
+	if (cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
+	    cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) {
+		return 0;
+	} else if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
+		    cpupower_cpu_info.family >= 0x10) ||
+		   cpupower_cpu_info.vendor == X86_VENDOR_HYGON) {
 		ret = decode_pstates(cpu, b_states, pstates, &pstate_no);
 		if (ret)
 			return ret;
@@ -254,11 +220,11 @@ static int get_boost_mode(unsigned int cpu)
 	if (freqs) {
 		printf(_("  boost frequency steps: "));
 		while (freqs->next) {
-			print_speed(freqs->frequency);
+			print_speed(freqs->frequency, no_rounding);
 			printf(", ");
 			freqs = freqs->next;
 		}
-		print_speed(freqs->frequency);
+		print_speed(freqs->frequency, no_rounding);
 		printf("\n");
 		cpufreq_put_available_frequencies(freqs);
 	}
@@ -277,7 +243,7 @@ static int get_freq_kernel(unsigned int cpu, unsigned int human)
 		return -EINVAL;
 	}
 	if (human) {
-		print_speed(freq);
+		print_speed(freq, no_rounding);
 	} else
 		printf("%lu", freq);
 	printf(_(" (asserted by call to kernel)\n"));
@@ -296,7 +262,7 @@ static int get_freq_hardware(unsigned int cpu, unsigned int human)
 		return -EINVAL;
 	}
 	if (human) {
-		print_speed(freq);
+		print_speed(freq, no_rounding);
 	} else
 		printf("%lu", freq);
 	printf(_(" (asserted by call to hardware)\n"));
@@ -316,9 +282,9 @@ static int get_hardware_limits(unsigned int cpu, unsigned int human)
 
 	if (human) {
 		printf(_("  hardware limits: "));
-		print_speed(min);
+		print_speed(min, no_rounding);
 		printf(" - ");
-		print_speed(max);
+		print_speed(max, no_rounding);
 		printf("\n");
 	} else {
 		printf("%lu %lu\n", min, max);
@@ -350,9 +316,9 @@ static int get_policy(unsigned int cpu)
 		return -EINVAL;
 	}
 	printf(_("  current policy: frequency should be within "));
-	print_speed(policy->min);
+	print_speed(policy->min, no_rounding);
 	printf(_(" and "));
-	print_speed(policy->max);
+	print_speed(policy->max, no_rounding);
 
 	printf(".\n                  ");
 	printf(_("The governor \"%s\" may decide which speed to use\n"
@@ -436,7 +402,7 @@ static int get_freq_stats(unsigned int cpu, unsigned int human)
 	struct cpufreq_stats *stats = cpufreq_get_stats(cpu, &total_time);
 	while (stats) {
 		if (human) {
-			print_speed(stats->frequency);
+			print_speed(stats->frequency, no_rounding);
 			printf(":%.2f%%",
 				(100.0 * stats->time_in_state) / total_time);
 		} else
@@ -472,6 +438,17 @@ static int get_latency(unsigned int cpu, unsigned int human)
 	return 0;
 }
 
+/* --performance / -c */
+
+static int get_perf_cap(unsigned int cpu)
+{
+	if (cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
+	    cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE)
+		amd_pstate_show_perf_and_freq(cpu, no_rounding);
+
+	return 0;
+}
+
 static void debug_output_one(unsigned int cpu)
 {
 	struct cpufreq_available_frequencies *freqs;
@@ -486,11 +463,11 @@ static void debug_output_one(unsigned int cpu)
 	if (freqs) {
 		printf(_("  available frequency steps:  "));
 		while (freqs->next) {
-			print_speed(freqs->frequency);
+			print_speed(freqs->frequency, no_rounding);
 			printf(", ");
 			freqs = freqs->next;
 		}
-		print_speed(freqs->frequency);
+		print_speed(freqs->frequency, no_rounding);
 		printf("\n");
 		cpufreq_put_available_frequencies(freqs);
 	}
@@ -500,6 +477,7 @@ static void debug_output_one(unsigned int cpu)
 	if (get_freq_hardware(cpu, 1) < 0)
 		get_freq_kernel(cpu, 1);
 	get_boost_mode(cpu);
+	get_perf_cap(cpu);
 }
 
 static struct option info_opts[] = {
@@ -518,6 +496,7 @@ static struct option info_opts[] = {
 	{"proc",	 no_argument,		 NULL,	 'o'},
 	{"human",	 no_argument,		 NULL,	 'm'},
 	{"no-rounding", no_argument,	 NULL,	 'n'},
+	{"performance", no_argument,	 NULL,	 'c'},
 	{ },
 };
 
@@ -531,7 +510,7 @@ int cmd_freq_info(int argc, char **argv)
 	int output_param = 0;
 
 	do {
-		ret = getopt_long(argc, argv, "oefwldpgrasmybn", info_opts,
+		ret = getopt_long(argc, argv, "oefwldpgrasmybnc", info_opts,
 				  NULL);
 		switch (ret) {
 		case '?':
@@ -554,6 +533,7 @@ int cmd_freq_info(int argc, char **argv)
 		case 'e':
 		case 's':
 		case 'y':
+		case 'c':
 			if (output_param) {
 				output_param = -1;
 				cont = 0;
@@ -660,6 +640,9 @@ int cmd_freq_info(int argc, char **argv)
 		case 'y':
 			ret = get_latency(cpu, human);
 			break;
+		case 'c':
+			ret = get_perf_cap(cpu);
+			break;
 		}
 		if (ret)
 			return ret;
diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
index 97f2c857048e..c519cc89c97f 100644
--- a/tools/power/cpupower/utils/helpers/amd.c
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -8,7 +8,10 @@
 #include <pci/pci.h>
 
 #include "helpers/helpers.h"
+#include "cpufreq.h"
+#include "acpi_cppc.h"
 
+/* ACPI P-States Helper Functions for AMD Processors ***************/
 #define MSR_AMD_PSTATE_STATUS	0xc0010063
 #define MSR_AMD_PSTATE		0xc0010064
 #define MSR_AMD_PSTATE_LIMIT	0xc0010061
@@ -146,4 +149,78 @@ int amd_pci_get_num_boost_states(int *active, int *states)
 	pci_cleanup(pci_acc);
 	return 0;
 }
+
+/* ACPI P-States Helper Functions for AMD Processors ***************/
+
+/* AMD P-State Helper Functions ************************************/
+enum amd_pstate_value {
+	AMD_PSTATE_HIGHEST_PERF,
+	AMD_PSTATE_MAX_FREQ,
+	AMD_PSTATE_LOWEST_NONLINEAR_FREQ,
+	MAX_AMD_PSTATE_VALUE_READ_FILES,
+};
+
+static const char *amd_pstate_value_files[MAX_AMD_PSTATE_VALUE_READ_FILES] = {
+	[AMD_PSTATE_HIGHEST_PERF] = "amd_pstate_highest_perf",
+	[AMD_PSTATE_MAX_FREQ] = "amd_pstate_max_freq",
+	[AMD_PSTATE_LOWEST_NONLINEAR_FREQ] = "amd_pstate_lowest_nonlinear_freq",
+};
+
+static unsigned long amd_pstate_get_data(unsigned int cpu,
+					 enum amd_pstate_value value)
+{
+	return cpufreq_get_sysfs_value_from_table(cpu,
+						  amd_pstate_value_files,
+						  value,
+						  MAX_AMD_PSTATE_VALUE_READ_FILES);
+}
+
+void amd_pstate_boost_init(unsigned int cpu, int *support, int *active)
+{
+	unsigned long highest_perf, nominal_perf, cpuinfo_min,
+		      cpuinfo_max, amd_pstate_max;
+
+	highest_perf = amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF);
+	nominal_perf = acpi_cppc_get_data(cpu, NOMINAL_PERF);
+
+	*support = highest_perf > nominal_perf ? 1 : 0;
+	if (!(*support))
+		return;
+
+	cpufreq_get_hardware_limits(cpu, &cpuinfo_min, &cpuinfo_max);
+	amd_pstate_max = amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ);
+
+	*active = cpuinfo_max == amd_pstate_max ? 1 : 0;
+}
+
+void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding)
+{
+	printf(_("    AMD PSTATE Highest Performance: %lu. Maximum Frequency: "),
+	       amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF));
+	/*
+	 * If boost isn't active, the cpuinfo_max doesn't indicate real max
+	 * frequency. So we read it back from amd-pstate sysfs entry.
+	 */
+	print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ), no_rounding);
+	printf(".\n");
+
+	printf(_("    AMD PSTATE Nominal Performance: %lu. Nominal Frequency: "),
+	       acpi_cppc_get_data(cpu, NOMINAL_PERF));
+	print_speed(acpi_cppc_get_data(cpu, NOMINAL_FREQ) * 1000,
+		    no_rounding);
+	printf(".\n");
+
+	printf(_("    AMD PSTATE Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "),
+	       acpi_cppc_get_data(cpu, LOWEST_NONLINEAR_PERF));
+	print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_NONLINEAR_FREQ),
+		    no_rounding);
+	printf(".\n");
+
+	printf(_("    AMD PSTATE Lowest Performance: %lu. Lowest Frequency: "),
+	       acpi_cppc_get_data(cpu, LOWEST_PERF));
+	print_speed(acpi_cppc_get_data(cpu, LOWEST_FREQ) * 1000, no_rounding);
+	printf(".\n");
+}
+
+/* AMD P-State Helper Functions ************************************/
 #endif /* defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c
index 72eb43593180..eae91f11d187 100644
--- a/tools/power/cpupower/utils/helpers/cpuid.c
+++ b/tools/power/cpupower/utils/helpers/cpuid.c
@@ -149,6 +149,19 @@ out:
 		if (ext_cpuid_level >= 0x80000008 &&
 		    cpuid_ebx(0x80000008) & (1 << 4))
 			cpu_info->caps |= CPUPOWER_CAP_AMD_RDPRU;
+
+		if (cpupower_amd_pstate_enabled()) {
+			cpu_info->caps |= CPUPOWER_CAP_AMD_PSTATE;
+
+			/*
+			 * If AMD P-State is enabled, the firmware will treat
+			 * AMD P-State function as high priority.
+			 */
+			cpu_info->caps &= ~CPUPOWER_CAP_AMD_CPB;
+			cpu_info->caps &= ~CPUPOWER_CAP_AMD_CPB_MSR;
+			cpu_info->caps &= ~CPUPOWER_CAP_AMD_HW_PSTATE;
+			cpu_info->caps &= ~CPUPOWER_CAP_AMD_PSTATEDEF;
+		}
 	}
 
 	if (cpu_info->vendor == X86_VENDOR_INTEL) {
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 33ffacee7fcb..96e4bede078b 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -11,6 +11,7 @@
 
 #include <libintl.h>
 #include <locale.h>
+#include <stdbool.h>
 
 #include "helpers/bitmask.h"
 #include <cpupower.h>
@@ -73,6 +74,7 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL,
 #define CPUPOWER_CAP_AMD_HW_PSTATE	0x00000100
 #define CPUPOWER_CAP_AMD_PSTATEDEF	0x00000200
 #define CPUPOWER_CAP_AMD_CPB_MSR	0x00000400
+#define CPUPOWER_CAP_AMD_PSTATE		0x00000800
 
 #define CPUPOWER_AMD_CPBDIS		0x02000000
 
@@ -135,6 +137,16 @@ extern int decode_pstates(unsigned int cpu, int boost_states,
 
 extern int cpufreq_has_boost_support(unsigned int cpu, int *support,
 				     int *active, int * states);
+
+/* AMD P-State stuff **************************/
+bool cpupower_amd_pstate_enabled(void);
+void amd_pstate_boost_init(unsigned int cpu,
+			   int *support, int *active);
+void amd_pstate_show_perf_and_freq(unsigned int cpu,
+				   int no_rounding);
+
+/* AMD P-State stuff **************************/
+
 /*
  * CPUID functions returning a single datum
  */
@@ -167,6 +179,15 @@ static inline int cpufreq_has_boost_support(unsigned int cpu, int *support,
 					    int *active, int * states)
 { return -1; }
 
+static inline bool cpupower_amd_pstate_enabled(void)
+{ return false; }
+static inline void amd_pstate_boost_init(unsigned int cpu, int *support,
+					 int *active)
+{}
+static inline void amd_pstate_show_perf_and_freq(unsigned int cpu,
+						 int no_rounding)
+{}
+
 /* cpuid and cpuinfo helpers  **************************/
 
 static inline unsigned int cpuid_eax(unsigned int op) { return 0; };
@@ -184,5 +205,6 @@ extern struct bitmask *offline_cpus;
 void get_cpustate(void);
 void print_online_cpus(void);
 void print_offline_cpus(void);
+void print_speed(unsigned long speed, int no_rounding);
 
 #endif /* __CPUPOWERUTILS_HELPERS__ */
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index fc6e34511721..9547b29254a7 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -3,9 +3,11 @@
 #include <stdio.h>
 #include <errno.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include "helpers/helpers.h"
 #include "helpers/sysfs.h"
+#include "cpufreq.h"
 
 #if defined(__i386__) || defined(__x86_64__)
 
@@ -39,6 +41,8 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
 			if (ret)
 				return ret;
 		}
+	} else if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) {
+		amd_pstate_boost_init(cpu, support, active);
 	} else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA)
 		*support = *active = 1;
 	return 0;
@@ -83,6 +87,22 @@ int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val)
 	return 0;
 }
 
+bool cpupower_amd_pstate_enabled(void)
+{
+	char *driver = cpufreq_get_driver(0);
+	bool ret = false;
+
+	if (!driver)
+		return ret;
+
+	if (!strcmp(driver, "amd-pstate"))
+		ret = true;
+
+	cpufreq_put_driver(driver);
+
+	return ret;
+}
+
 #endif /* #if defined(__i386__) || defined(__x86_64__) */
 
 /* get_cpustate
@@ -144,3 +164,43 @@ void print_offline_cpus(void)
 		printf(_("cpupower set operation was not performed on them\n"));
 	}
 }
+
+/*
+ * print_speed
+ *
+ * Print the exact CPU frequency with appropriate unit
+ */
+void print_speed(unsigned long speed, int no_rounding)
+{
+	unsigned long tmp;
+
+	if (no_rounding) {
+		if (speed > 1000000)
+			printf("%u.%06u GHz", ((unsigned int)speed / 1000000),
+			       ((unsigned int)speed % 1000000));
+		else if (speed > 1000)
+			printf("%u.%03u MHz", ((unsigned int)speed / 1000),
+			       (unsigned int)(speed % 1000));
+		else
+			printf("%lu kHz", speed);
+	} else {
+		if (speed > 1000000) {
+			tmp = speed % 10000;
+			if (tmp >= 5000)
+				speed += 10000;
+			printf("%u.%02u GHz", ((unsigned int)speed / 1000000),
+			       ((unsigned int)(speed % 1000000) / 10000));
+		} else if (speed > 100000) {
+			tmp = speed % 1000;
+			if (tmp >= 500)
+				speed += 1000;
+			printf("%u MHz", ((unsigned int)speed / 1000));
+		} else if (speed > 1000) {
+			tmp = speed % 100;
+			if (tmp >= 50)
+				speed += 100;
+			printf("%u.%01u MHz", ((unsigned int)speed / 1000),
+			       ((unsigned int)(speed % 1000) / 100));
+		}
+	}
+}
diff --git a/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
new file mode 100755
index 000000000000..2dea4032ac56
--- /dev/null
+++ b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
@@ -0,0 +1,354 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+# -*- coding: utf-8 -*-
+#
+""" This utility can be used to debug and tune the performance of the
+AMD P-State driver. It imports intel_pstate_tracer to analyze AMD P-State
+trace event.
+
+Prerequisites:
+    Python version 2.7.x or higher
+    gnuplot 5.0 or higher
+    gnuplot-py 1.8 or higher
+    (Most of the distributions have these required packages. They may be called
+     gnuplot-py, phython-gnuplot or phython3-gnuplot, gnuplot-nox, ... )
+
+    Kernel config for Linux trace is enabled
+
+    see print_help(): for Usage and Output details
+
+"""
+from __future__ import print_function
+from datetime import datetime
+import subprocess
+import os
+import time
+import re
+import signal
+import sys
+import getopt
+import Gnuplot
+from numpy import *
+from decimal import *
+sys.path.append('../intel_pstate_tracer')
+#import intel_pstate_tracer
+import intel_pstate_tracer as ipt
+
+__license__ = "GPL version 2"
+
+MAX_CPUS = 256
+# Define the csv file columns
+C_COMM = 15
+C_ELAPSED = 14
+C_SAMPLE = 13
+C_DURATION = 12
+C_LOAD = 11
+C_TSC = 10
+C_APERF = 9
+C_MPERF = 8
+C_FREQ = 7
+C_MAX_PERF = 6
+C_DES_PERF = 5
+C_MIN_PERF = 4
+C_USEC = 3
+C_SEC = 2
+C_CPU = 1
+
+global sample_num, last_sec_cpu, last_usec_cpu, start_time, test_name, trace_file
+
+getcontext().prec = 11
+
+sample_num =0
+last_sec_cpu = [0] * MAX_CPUS
+last_usec_cpu = [0] * MAX_CPUS
+
+def plot_per_cpu_freq(cpu_index):
+    """ Plot per cpu frequency """
+
+    file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+    if os.path.exists(file_name):
+        output_png = "cpu%03d_frequency.png" % cpu_index
+        g_plot = ipt.common_gnuplot_settings()
+        g_plot('set output "' + output_png + '"')
+        g_plot('set yrange [0:7]')
+        g_plot('set ytics 0, 1')
+        g_plot('set ylabel "CPU Frequency (GHz)"')
+        g_plot('set title "{} : frequency : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
+        g_plot('set ylabel "CPU frequency"')
+        g_plot('set key off')
+        ipt.set_4_plot_linestyles(g_plot)
+        g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_FREQ))
+
+def plot_per_cpu_des_perf(cpu_index):
+    """ Plot per cpu desired perf """
+
+    file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+    if os.path.exists(file_name):
+        output_png = "cpu%03d_des_perf.png" % cpu_index
+        g_plot = ipt.common_gnuplot_settings()
+        g_plot('set output "' + output_png + '"')
+        g_plot('set yrange [0:255]')
+        g_plot('set ylabel "des perf"')
+        g_plot('set title "{} : cpu des perf : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
+        g_plot('set key off')
+        ipt.set_4_plot_linestyles(g_plot)
+        g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_DES_PERF))
+
+def plot_per_cpu_load(cpu_index):
+    """ Plot per cpu load """
+
+    file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+    if os.path.exists(file_name):
+        output_png = "cpu%03d_load.png" % cpu_index
+        g_plot = ipt.common_gnuplot_settings()
+        g_plot('set output "' + output_png + '"')
+        g_plot('set yrange [0:100]')
+        g_plot('set ytics 0, 10')
+        g_plot('set ylabel "CPU load (percent)"')
+        g_plot('set title "{} : cpu load : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
+        g_plot('set key off')
+        ipt.set_4_plot_linestyles(g_plot)
+        g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_LOAD))
+
+def plot_all_cpu_frequency():
+    """ Plot all cpu frequencies """
+
+    output_png = 'all_cpu_frequencies.png'
+    g_plot = ipt.common_gnuplot_settings()
+    g_plot('set output "' + output_png + '"')
+    g_plot('set ylabel "CPU Frequency (GHz)"')
+    g_plot('set title "{} : cpu frequencies : {:%F %H:%M}"'.format(test_name, datetime.now()))
+
+    title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+    plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_FREQ)
+    g_plot('title_list = "{}"'.format(title_list))
+    g_plot(plot_str)
+
+def plot_all_cpu_des_perf():
+    """ Plot all cpu desired perf """
+
+    output_png = 'all_cpu_des_perf.png'
+    g_plot = ipt.common_gnuplot_settings()
+    g_plot('set output "' + output_png + '"')
+    g_plot('set ylabel "des perf"')
+    g_plot('set title "{} : cpu des perf : {:%F %H:%M}"'.format(test_name, datetime.now()))
+
+    title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+    plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 255 ps 1 title i".format(C_ELAPSED, C_DES_PERF)
+    g_plot('title_list = "{}"'.format(title_list))
+    g_plot(plot_str)
+
+def plot_all_cpu_load():
+    """ Plot all cpu load  """
+
+    output_png = 'all_cpu_load.png'
+    g_plot = ipt.common_gnuplot_settings()
+    g_plot('set output "' + output_png + '"')
+    g_plot('set yrange [0:100]')
+    g_plot('set ylabel "CPU load (percent)"')
+    g_plot('set title "{} : cpu load : {:%F %H:%M}"'.format(test_name, datetime.now()))
+
+    title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+    plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 255 ps 1 title i".format(C_ELAPSED, C_LOAD)
+    g_plot('title_list = "{}"'.format(title_list))
+    g_plot(plot_str)
+
+def store_csv(cpu_int, time_pre_dec, time_post_dec, min_perf, des_perf, max_perf, freq_ghz, mperf, aperf, tsc, common_comm, load, duration_ms, sample_num, elapsed_time, cpu_mask):
+    """ Store master csv file information """
+
+    global graph_data_present
+
+    if cpu_mask[cpu_int] == 0:
+        return
+
+    try:
+        f_handle = open('cpu.csv', 'a')
+        string_buffer = "CPU_%03u, %05u, %06u, %u, %u, %u, %.4f, %u, %u, %u, %.2f, %.3f, %u, %.3f, %s\n" % (cpu_int, int(time_pre_dec), int(time_post_dec), int(min_perf), int(des_perf), int(max_perf), freq_ghz, int(mperf), int(aperf), int(tsc), load, duration_ms, sample_num, elapsed_time, common_comm)
+        f_handle.write(string_buffer)
+        f_handle.close()
+    except:
+        print('IO error cpu.csv')
+        return
+
+    graph_data_present = True;
+
+
+def cleanup_data_files():
+    """ clean up existing data files """
+
+    if os.path.exists('cpu.csv'):
+        os.remove('cpu.csv')
+    f_handle = open('cpu.csv', 'a')
+    f_handle.write('common_cpu, common_secs, common_usecs, min_perf, des_perf, max_perf, freq, mperf, aperf, tsc, load, duration_ms, sample_num, elapsed_time, common_comm')
+    f_handle.write('\n')
+    f_handle.close()
+
+def read_trace_data(file_name, cpu_mask):
+    """ Read and parse trace data """
+
+    global current_max_cpu
+    global sample_num, last_sec_cpu, last_usec_cpu, start_time
+
+    try:
+        data = open(file_name, 'r').read()
+    except:
+        print('Error opening ', file_name)
+        sys.exit(2)
+
+    for line in data.splitlines():
+        search_obj = \
+            re.search(r'(^(.*?)\[)((\d+)[^\]])(.*?)(\d+)([.])(\d+)(.*?amd_min_perf=)(\d+)(.*?amd_des_perf=)(\d+)(.*?amd_max_perf=)(\d+)(.*?freq=)(\d+)(.*?mperf=)(\d+)(.*?aperf=)(\d+)(.*?tsc=)(\d+)'
+                      , line)
+
+        if search_obj:
+            cpu = search_obj.group(3)
+            cpu_int = int(cpu)
+            cpu = str(cpu_int)
+
+            time_pre_dec = search_obj.group(6)
+            time_post_dec = search_obj.group(8)
+            min_perf = search_obj.group(10)
+            des_perf = search_obj.group(12)
+            max_perf = search_obj.group(14)
+            freq = search_obj.group(16)
+            mperf = search_obj.group(18)
+            aperf = search_obj.group(20)
+            tsc = search_obj.group(22)
+
+            common_comm = search_obj.group(2).replace(' ', '')
+
+            if sample_num == 0 :
+                start_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000)
+            sample_num += 1
+
+            if last_sec_cpu[cpu_int] == 0 :
+                last_sec_cpu[cpu_int] = time_pre_dec
+                last_usec_cpu[cpu_int] = time_post_dec
+            else :
+                duration_us = (int(time_pre_dec) - int(last_sec_cpu[cpu_int])) * 1000000 + (int(time_post_dec) - int(last_usec_cpu[cpu_int]))
+                duration_ms = Decimal(duration_us) / Decimal(1000)
+                last_sec_cpu[cpu_int] = time_pre_dec
+                last_usec_cpu[cpu_int] = time_post_dec
+                elapsed_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000) - start_time
+                load = Decimal(int(mperf)*100)/ Decimal(tsc)
+                freq_ghz = Decimal(freq)/Decimal(1000000)
+                store_csv(cpu_int, time_pre_dec, time_post_dec, min_perf, des_perf, max_perf, freq_ghz, mperf, aperf, tsc, common_comm, load, duration_ms, sample_num, elapsed_time, cpu_mask)
+
+            if cpu_int > current_max_cpu:
+                current_max_cpu = cpu_int
+# Now separate the main overall csv file into per CPU csv files.
+    ipt.split_csv(current_max_cpu, cpu_mask)
+
+
+def signal_handler(signal, frame):
+    print(' SIGINT: Forcing cleanup before exit.')
+    if interval:
+        ipt.disable_trace(trace_file)
+        ipt.clear_trace_file()
+        ipt.free_trace_buffer()
+        sys.exit(0)
+
+trace_file = "/sys/kernel/debug/tracing/events/amd_cpu/enable"
+signal.signal(signal.SIGINT, signal_handler)
+
+interval = ""
+file_name = ""
+cpu_list = ""
+test_name = ""
+memory = "10240"
+graph_data_present = False;
+
+valid1 = False
+valid2 = False
+
+cpu_mask = zeros((MAX_CPUS,), dtype=int)
+
+
+try:
+    opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
+except getopt.GetoptError:
+    ipt.print_help('amd_pstate')
+    sys.exit(2)
+for opt, arg in opts:
+    if opt == '-h':
+        print()
+        sys.exit()
+    elif opt in ("-t", "--trace_file"):
+        valid1 = True
+        location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+        file_name = os.path.join(location, arg)
+    elif opt in ("-i", "--interval"):
+        valid1 = True
+        interval = arg
+    elif opt in ("-c", "--cpu"):
+        cpu_list = arg
+    elif opt in ("-n", "--name"):
+        valid2 = True
+        test_name = arg
+    elif opt in ("-m", "--memory"):
+        memory = arg
+
+if not (valid1 and valid2):
+    ipt.print_help('amd_pstate')
+    sys.exit()
+
+if cpu_list:
+    for p in re.split("[,]", cpu_list):
+        if int(p) < MAX_CPUS :
+            cpu_mask[int(p)] = 1
+else:
+    for i in range (0, MAX_CPUS):
+        cpu_mask[i] = 1
+
+if not os.path.exists('results'):
+    os.mkdir('results')
+    ipt.fix_ownership('results')
+
+os.chdir('results')
+if os.path.exists(test_name):
+    print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
+    sys.exit()
+os.mkdir(test_name)
+ipt.fix_ownership(test_name)
+os.chdir(test_name)
+
+cur_version = sys.version_info
+print('python version (should be >= 2.7):')
+print(cur_version)
+
+cleanup_data_files()
+
+if interval:
+    file_name = "/sys/kernel/debug/tracing/trace"
+    ipt.clear_trace_file()
+    ipt.set_trace_buffer_size(memory)
+    ipt.enable_trace(trace_file)
+    time.sleep(int(interval))
+    ipt.disable_trace(trace_file)
+
+current_max_cpu = 0
+
+read_trace_data(file_name, cpu_mask)
+
+if interval:
+    ipt.clear_trace_file()
+    ipt.free_trace_buffer()
+
+if graph_data_present == False:
+    print('No valid data to plot')
+    sys.exit(2)
+
+for cpu_no in range(0, current_max_cpu + 1):
+    plot_per_cpu_freq(cpu_no)
+    plot_per_cpu_des_perf(cpu_no)
+    plot_per_cpu_load(cpu_no)
+
+plot_all_cpu_des_perf()
+plot_all_cpu_frequency()
+plot_all_cpu_load()
+
+for root, dirs, files in os.walk('.'):
+    for f in files:
+        ipt.fix_ownership(f)
+
+os.chdir('../../')
diff --git a/tools/power/x86/intel-speed-select/Build b/tools/power/x86/intel-speed-select/Build
index b61456d75190..81e36bd578b1 100644
--- a/tools/power/x86/intel-speed-select/Build
+++ b/tools/power/x86/intel-speed-select/Build
@@ -1 +1 @@
-intel-speed-select-y +=  isst-config.o isst-core.o isst-display.o
+intel-speed-select-y +=  isst-config.o isst-core.o isst-display.o isst-daemon.o hfi-events.o
diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile
index 12c6939dca2a..d2fba1297d96 100644
--- a/tools/power/x86/intel-speed-select/Makefile
+++ b/tools/power/x86/intel-speed-select/Makefile
@@ -13,8 +13,8 @@ endif
 # Do not use make's built-in rules
 # (this improves performance and avoids hard-to-debug behaviour);
 MAKEFLAGS += -r
-
-override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I/usr/include/libnl3
+override LDFLAGS += -lnl-genl-3 -lnl-3
 
 ALL_TARGETS := intel-speed-select
 ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
@@ -31,7 +31,11 @@ $(OUTPUT)include/linux/isst_if.h: ../../../../include/uapi/linux/isst_if.h
 	mkdir -p $(OUTPUT)include/linux 2>&1 || true
 	ln -sf $(CURDIR)/../../../../include/uapi/linux/isst_if.h $@
 
-prepare: $(OUTPUT)include/linux/isst_if.h
+$(OUTPUT)include/linux/thermal.h: ../../../../include/uapi/linux/thermal.h
+	mkdir -p $(OUTPUT)include/linux 2>&1 || true
+	ln -sf $(CURDIR)/../../../../include/uapi/linux/thermal.h $@
+
+prepare: $(OUTPUT)include/linux/isst_if.h $(OUTPUT)include/linux/thermal.h
 
 ISST_IN := $(OUTPUT)intel-speed-select-in.o
 
diff --git a/tools/power/x86/intel-speed-select/hfi-events.c b/tools/power/x86/intel-speed-select/hfi-events.c
new file mode 100644
index 000000000000..e85676711372
--- /dev/null
+++ b/tools/power/x86/intel-speed-select/hfi-events.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Speed Select -- Read HFI events for OOB
+ * Copyright (c) 2022 Intel Corporation.
+ */
+
+/*
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+
+ * WPA Supplicant - driver interaction with Linux nl80211/cfg80211
+ * Copyright (c) 2003-2008, Jouni Malinen <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Alternatively, this software may be distributed under the terms of
+ * BSD license.
+ *
+ * Requires
+ * libnl-genl-3-dev
+ *
+ * For Fedora/CenOS
+ * dnf install libnl3-devel
+ * For Ubuntu
+ * apt install libnl-3-dev libnl-genl-3-dev
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/family.h>
+#include <netlink/genl/ctrl.h>
+
+#include <linux/thermal.h>
+#include "isst.h"
+
+struct hfi_event_data {
+	struct nl_sock *nl_handle;
+	struct nl_cb *nl_cb;
+};
+
+struct hfi_event_data drv;
+
+static int ack_handler(struct nl_msg *msg, void *arg)
+{
+	int *err = arg;
+	*err = 0;
+	return NL_STOP;
+}
+
+static int finish_handler(struct nl_msg *msg, void *arg)
+{
+	int *ret = arg;
+	*ret = 0;
+	return NL_SKIP;
+}
+
+static int error_handler(struct sockaddr_nl *nla, struct nlmsgerr *err,
+			 void *arg)
+{
+	int *ret = arg;
+	*ret = err->error;
+	return NL_SKIP;
+}
+
+static int seq_check_handler(struct nl_msg *msg, void *arg)
+{
+	return NL_OK;
+}
+
+static int send_and_recv_msgs(struct hfi_event_data *drv,
+			      struct nl_msg *msg,
+			      int (*valid_handler)(struct nl_msg *, void *),
+			      void *valid_data)
+{
+	struct nl_cb *cb;
+	int err = -ENOMEM;
+
+	cb = nl_cb_clone(drv->nl_cb);
+	if (!cb)
+		goto out;
+
+	err = nl_send_auto_complete(drv->nl_handle, msg);
+	if (err < 0)
+		goto out;
+
+	err = 1;
+
+	nl_cb_err(cb, NL_CB_CUSTOM, error_handler, &err);
+	nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, finish_handler, &err);
+	nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, ack_handler, &err);
+
+	if (valid_handler)
+		nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM,
+			  valid_handler, valid_data);
+
+	while (err > 0)
+		nl_recvmsgs(drv->nl_handle, cb);
+ out:
+	nl_cb_put(cb);
+	nlmsg_free(msg);
+	return err;
+}
+
+struct family_data {
+	const char *group;
+	int id;
+};
+
+static int family_handler(struct nl_msg *msg, void *arg)
+{
+	struct family_data *res = arg;
+	struct nlattr *tb[CTRL_ATTR_MAX + 1];
+	struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+	struct nlattr *mcgrp;
+	int i;
+
+	nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+		  genlmsg_attrlen(gnlh, 0), NULL);
+	if (!tb[CTRL_ATTR_MCAST_GROUPS])
+		return NL_SKIP;
+
+	nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], i) {
+		struct nlattr *tb2[CTRL_ATTR_MCAST_GRP_MAX + 1];
+		nla_parse(tb2, CTRL_ATTR_MCAST_GRP_MAX, nla_data(mcgrp),
+			  nla_len(mcgrp), NULL);
+		if (!tb2[CTRL_ATTR_MCAST_GRP_NAME] ||
+		    !tb2[CTRL_ATTR_MCAST_GRP_ID] ||
+		    strncmp(nla_data(tb2[CTRL_ATTR_MCAST_GRP_NAME]),
+				res->group,
+				nla_len(tb2[CTRL_ATTR_MCAST_GRP_NAME])) != 0)
+			continue;
+		res->id = nla_get_u32(tb2[CTRL_ATTR_MCAST_GRP_ID]);
+		break;
+	};
+
+	return 0;
+}
+
+static int nl_get_multicast_id(struct hfi_event_data *drv,
+			       const char *family, const char *group)
+{
+	struct nl_msg *msg;
+	int ret = -1;
+	struct family_data res = { group, -ENOENT };
+
+	msg = nlmsg_alloc();
+	if (!msg)
+		return -ENOMEM;
+	genlmsg_put(msg, 0, 0, genl_ctrl_resolve(drv->nl_handle, "nlctrl"),
+		    0, 0, CTRL_CMD_GETFAMILY, 0);
+	NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+	ret = send_and_recv_msgs(drv, msg, family_handler, &res);
+	msg = NULL;
+	if (ret == 0)
+		ret = res.id;
+
+nla_put_failure:
+	nlmsg_free(msg);
+	return ret;
+}
+
+struct perf_cap {
+	int cpu;
+	int perf;
+	int eff;
+};
+
+static void process_hfi_event(struct perf_cap *perf_cap)
+{
+	process_level_change(perf_cap->cpu);
+}
+
+static int handle_event(struct nl_msg *n, void *arg)
+{
+	struct nlmsghdr *nlh = nlmsg_hdr(n);
+	struct genlmsghdr *genlhdr = genlmsg_hdr(nlh);
+	struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1];
+	int ret;
+	struct perf_cap perf_cap;
+
+	ret = genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL);
+
+	debug_printf("Received event %d parse_rer:%d\n", genlhdr->cmd, ret);
+	if (genlhdr->cmd == THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE) {
+		struct nlattr *cap;
+		int j, index = 0;
+
+		debug_printf("THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE\n");
+		nla_for_each_nested(cap, attrs[THERMAL_GENL_ATTR_CPU_CAPABILITY], j) {
+			switch (index) {
+			case 0:
+				perf_cap.cpu = nla_get_u32(cap);
+				break;
+			case 1:
+				perf_cap.perf = nla_get_u32(cap);
+				break;
+			case 2:
+				perf_cap.eff = nla_get_u32(cap);
+				break;
+			default:
+				break;
+			}
+			++index;
+			if (index == 3) {
+				index = 0;
+				process_hfi_event(&perf_cap);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int _hfi_exit;
+
+static int check_hf_suport(void)
+{
+	unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
+
+	__cpuid(6, eax, ebx, ecx, edx);
+	if (eax & BIT(19))
+		return 1;
+
+	return 0;
+}
+
+int hfi_main(void)
+{
+	struct nl_sock *sock;
+	struct nl_cb *cb;
+	int err = 0;
+	int mcast_id;
+	int no_block = 0;
+
+	if (!check_hf_suport()) {
+		fprintf(stderr, "CPU Doesn't support HFI\n");
+		return -1;
+	}
+
+	sock = nl_socket_alloc();
+	if (!sock) {
+		fprintf(stderr, "nl_socket_alloc failed\n");
+		return -1;
+	}
+
+	if (genl_connect(sock)) {
+		fprintf(stderr, "genl_connect(sk_event) failed\n");
+		goto free_sock;
+	}
+
+	drv.nl_handle = sock;
+	drv.nl_cb = cb = nl_cb_alloc(NL_CB_DEFAULT);
+	if (drv.nl_cb == NULL) {
+		printf("Failed to allocate netlink callbacks");
+		goto free_sock;
+	}
+
+	mcast_id = nl_get_multicast_id(&drv, THERMAL_GENL_FAMILY_NAME,
+				   THERMAL_GENL_EVENT_GROUP_NAME);
+	if (mcast_id < 0) {
+		fprintf(stderr, "nl_get_multicast_id failed\n");
+		goto free_sock;
+	}
+
+	if (nl_socket_add_membership(sock, mcast_id)) {
+		fprintf(stderr, "nl_socket_add_membership failed");
+		goto free_sock;
+	}
+
+	nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, seq_check_handler, 0);
+	nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, handle_event, NULL);
+
+	if (no_block)
+		nl_socket_set_nonblocking(sock);
+
+	debug_printf("hfi is initialized\n");
+
+	while (!_hfi_exit && !err) {
+		err = nl_recvmsgs(sock, cb);
+		debug_printf("nl_recv_message err:%d\n", err);
+	}
+
+	return 0;
+
+	/* Netlink library doesn't have calls to dealloc cb or disconnect */
+free_sock:
+	nl_socket_free(sock);
+
+	return -1;
+}
+
+void hfi_exit(void)
+{
+	_hfi_exit = 1;
+}
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index efe72fa48224..060390e88e37 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,8 @@ struct process_cmd_struct {
 	int arg;
 };
 
-static const char *version_str = "v1.11";
+static const char *version_str = "v1.12";
+
 static const int supported_api_ver = 1;
 static struct isst_if_platform_info isst_platform_info;
 static char *progname;
@@ -368,7 +369,7 @@ int get_topo_max_cpus(void)
 	return topo_max_cpus;
 }
 
-static void set_cpu_online_offline(int cpu, int state)
+void set_cpu_online_offline(int cpu, int state)
 {
 	char buffer[128];
 	int fd, ret;
@@ -409,12 +410,10 @@ static void force_all_cpus_online(void)
 	unlink("/var/run/isst_cpu_topology.dat");
 }
 
-#define MAX_PACKAGE_COUNT 8
-#define MAX_DIE_PER_PACKAGE 2
-static void for_each_online_package_in_set(void (*callback)(int, void *, void *,
-							    void *, void *),
-					   void *arg1, void *arg2, void *arg3,
-					   void *arg4)
+void for_each_online_package_in_set(void (*callback)(int, void *, void *,
+						     void *, void *),
+				    void *arg1, void *arg2, void *arg3,
+				    void *arg4)
 {
 	int max_packages[MAX_PACKAGE_COUNT * MAX_PACKAGE_COUNT];
 	int pkg_index = 0, i;
@@ -2803,7 +2802,9 @@ static void usage(void)
 	printf("\t[-p|--pause] : Delay between two mail box commands in milliseconds\n");
 	printf("\t[-r|--retry] : Retry count for mail box commands on failure, default 3\n");
 	printf("\t[-v|--version] : Print version\n");
-
+	printf("\t[-b|--oob : Start a daemon to process HFI events for perf profile change from Out of Band agent.\n");
+	printf("\t[-n|--no-daemon : Don't run as daemon. By default --oob will turn on daemon mode\n");
+	printf("\t[-w|--delay : Delay for reading config level state change in OOB poll mode.\n");
 	printf("\nResult format\n");
 	printf("\tResult display uses a common format for each command:\n");
 	printf("\tResults are formatted in text/JSON with\n");
@@ -2837,6 +2838,9 @@ static void cmdline(int argc, char **argv)
 	int opt, force_cpus_online = 0;
 	int option_index = 0;
 	int ret;
+	int oob_mode = 0;
+	int poll_interval = -1;
+	int no_daemon = 0;
 
 	static struct option long_options[] = {
 		{ "all-cpus-online", no_argument, 0, 'a' },
@@ -2849,6 +2853,9 @@ static void cmdline(int argc, char **argv)
 		{ "out", required_argument, 0, 'o' },
 		{ "retry", required_argument, 0, 'r' },
 		{ "version", no_argument, 0, 'v' },
+		{ "oob", no_argument, 0, 'b' },
+		{ "no-daemon", no_argument, 0, 'n' },
+		{ "poll-interval", required_argument, 0, 'w' },
 		{ 0, 0, 0, 0 }
 	};
 
@@ -2875,7 +2882,7 @@ static void cmdline(int argc, char **argv)
 	}
 
 	progname = argv[0];
-	while ((opt = getopt_long_only(argc, argv, "+c:df:hio:va", long_options,
+	while ((opt = getopt_long_only(argc, argv, "+c:df:hio:vabw:n", long_options,
 				       &option_index)) != -1) {
 		switch (opt) {
 		case 'a':
@@ -2920,12 +2927,26 @@ static void cmdline(int argc, char **argv)
 		case 'v':
 			print_version();
 			break;
+		case 'b':
+			oob_mode = 1;
+			break;
+		case 'n':
+			no_daemon = 1;
+			break;
+		case 'w':
+			ret = strtol(optarg, &ptr, 10);
+			if (!ret) {
+				fprintf(stderr, "Invalid poll interval count\n");
+				exit(0);
+			}
+			poll_interval = ret;
+			break;
 		default:
 			usage();
 		}
 	}
 
-	if (optind > (argc - 2)) {
+	if (optind > (argc - 2) && !oob_mode) {
 		usage();
 		exit(0);
 	}
@@ -2936,6 +2957,17 @@ static void cmdline(int argc, char **argv)
 	set_cpu_present_cpu_mask();
 	set_cpu_target_cpu_mask();
 
+	if (oob_mode) {
+		create_cpu_map();
+		if (debug_flag)
+			fprintf(stderr, "OOB mode is enabled in debug mode\n");
+
+		ret = isst_daemon(debug_flag, poll_interval, no_daemon);
+		if (ret)
+			fprintf(stderr, "OOB mode enable failed\n");
+		goto out;
+	}
+
 	if (!is_clx_n_platform()) {
 		ret = isst_fill_platform_info();
 		if (ret)
diff --git a/tools/power/x86/intel-speed-select/isst-daemon.c b/tools/power/x86/intel-speed-select/isst-daemon.c
new file mode 100644
index 000000000000..dd372924bc82
--- /dev/null
+++ b/tools/power/x86/intel-speed-select/isst-daemon.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Speed Select -- Allow speed select to daemonize
+ * Copyright (c) 2022 Intel Corporation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <time.h>
+
+#include "isst.h"
+
+static int per_package_levels_info[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE];
+static time_t per_package_levels_tm[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE];
+
+static void init_levels(void)
+{
+	int i, j;
+
+	for (i = 0; i < MAX_PACKAGE_COUNT; ++i)
+		for (j = 0; j < MAX_DIE_PER_PACKAGE; ++j)
+			per_package_levels_info[i][j] = -1;
+}
+
+void process_level_change(int cpu)
+{
+	struct isst_pkg_ctdp_level_info ctdp_level;
+	int pkg_id = get_physical_package_id(cpu);
+	int die_id = get_physical_die_id(cpu);
+	struct isst_pkg_ctdp pkg_dev;
+	time_t tm;
+	int ret;
+
+	if (pkg_id >= MAX_PACKAGE_COUNT || die_id > MAX_DIE_PER_PACKAGE) {
+		debug_printf("Invalid package/die info for cpu:%d\n", cpu);
+		return;
+	}
+
+	tm = time(NULL);
+	if (tm - per_package_levels_tm[pkg_id][die_id] < 2 )
+		return;
+
+	per_package_levels_tm[pkg_id][die_id] = tm;
+
+	ret = isst_get_ctdp_levels(cpu, &pkg_dev);
+	if (ret) {
+		debug_printf("Can't get tdp levels for cpu:%d\n", cpu);
+		return;
+	}
+
+	debug_printf("Get Config level %d pkg:%d die:%d current_level:%d \n", cpu,
+		      pkg_id, die_id, pkg_dev.current_level);
+
+	if (pkg_dev.locked) {
+		debug_printf("config TDP s locked \n");
+		return;
+	}
+
+	if (per_package_levels_info[pkg_id][die_id] == pkg_dev.current_level)
+		return;
+
+	debug_printf("**Config level change for cpu:%d pkg:%d die:%d from %d to %d\n",
+		      cpu, pkg_id, die_id, per_package_levels_info[pkg_id][die_id],
+		      pkg_dev.current_level);
+
+	per_package_levels_info[pkg_id][die_id] = pkg_dev.current_level;
+
+	ctdp_level.core_cpumask_size =
+		alloc_cpu_set(&ctdp_level.core_cpumask);
+	ret = isst_get_coremask_info(cpu, pkg_dev.current_level, &ctdp_level);
+	if (ret) {
+		free_cpu_set(ctdp_level.core_cpumask);
+		debug_printf("Can't get core_mask:%d\n", cpu);
+		return;
+	}
+
+	if (ctdp_level.cpu_count) {
+		int i, max_cpus = get_topo_max_cpus();
+		for (i = 0; i < max_cpus; ++i) {
+			if (pkg_id != get_physical_package_id(i) || die_id != get_physical_die_id(i))
+				continue;
+			if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) {
+				fprintf(stderr, "online cpu %d\n", i);
+				set_cpu_online_offline(i, 1);
+			} else {
+				fprintf(stderr, "offline cpu %d\n", i);
+				set_cpu_online_offline(i, 0);
+			}
+		}
+	}
+
+	free_cpu_set(ctdp_level.core_cpumask);
+}
+
+static void _poll_for_config_change(int cpu, void *arg1, void *arg2,
+				    void *arg3, void *arg4)
+{
+	process_level_change(cpu);
+}
+
+static void poll_for_config_change(void)
+{
+	for_each_online_package_in_set(_poll_for_config_change, NULL, NULL,
+				       NULL, NULL);
+}
+
+static int done = 0;
+static int pid_file_handle;
+
+static void signal_handler(int sig)
+{
+	switch (sig) {
+	case SIGINT:
+	case SIGTERM:
+		done = 1;
+		hfi_exit();
+		exit(0);
+		break;
+	default:
+		break;
+	}
+}
+
+static void daemonize(char *rundir, char *pidfile)
+{
+	int pid, sid, i;
+	char str[10];
+	struct sigaction sig_actions;
+	sigset_t sig_set;
+	int ret;
+
+	if (getppid() == 1)
+		return;
+
+	sigemptyset(&sig_set);
+	sigaddset(&sig_set, SIGCHLD);
+	sigaddset(&sig_set, SIGTSTP);
+	sigaddset(&sig_set, SIGTTOU);
+	sigaddset(&sig_set, SIGTTIN);
+	sigprocmask(SIG_BLOCK, &sig_set, NULL);
+
+	sig_actions.sa_handler = signal_handler;
+	sigemptyset(&sig_actions.sa_mask);
+	sig_actions.sa_flags = 0;
+
+	sigaction(SIGHUP, &sig_actions, NULL);
+	sigaction(SIGTERM, &sig_actions, NULL);
+	sigaction(SIGINT, &sig_actions, NULL);
+
+	pid = fork();
+	if (pid < 0) {
+		/* Could not fork */
+		exit(EXIT_FAILURE);
+	}
+	if (pid > 0)
+		exit(EXIT_SUCCESS);
+
+	umask(027);
+
+	sid = setsid();
+	if (sid < 0)
+		exit(EXIT_FAILURE);
+
+	/* close all descriptors */
+	for (i = getdtablesize(); i >= 0; --i)
+		close(i);
+
+	i = open("/dev/null", O_RDWR);
+	ret = dup(i);
+	if (ret == -1)
+		exit(EXIT_FAILURE);
+
+	ret = dup(i);
+	if (ret == -1)
+		exit(EXIT_FAILURE);
+
+	ret = chdir(rundir);
+	if (ret == -1)
+		exit(EXIT_FAILURE);
+
+	pid_file_handle = open(pidfile, O_RDWR | O_CREAT, 0600);
+	if (pid_file_handle == -1) {
+		/* Couldn't open lock file */
+		exit(1);
+	}
+	/* Try to lock file */
+#ifdef LOCKF_SUPPORT
+	if (lockf(pid_file_handle, F_TLOCK, 0) == -1) {
+#else
+	if (flock(pid_file_handle, LOCK_EX|LOCK_NB) < 0) {
+#endif
+		/* Couldn't get lock on lock file */
+		fprintf(stderr, "Couldn't get lock file %d\n", getpid());
+		exit(1);
+	}
+	snprintf(str, sizeof(str), "%d\n", getpid());
+	ret = write(pid_file_handle, str, strlen(str));
+	if (ret == -1)
+		exit(EXIT_FAILURE);
+
+	close(i);
+}
+
+int isst_daemon(int debug_mode, int poll_interval, int no_daemon)
+{
+	int ret;
+
+	if (!no_daemon && poll_interval < 0 && !debug_mode) {
+		fprintf(stderr, "OOB mode is enabled and will run as daemon\n");
+		daemonize((char *) "/tmp/",
+				(char *)"/tmp/hfi-events.pid");
+	} else {
+		signal(SIGINT, signal_handler);
+	}
+
+	init_levels();
+
+	if (poll_interval < 0) {
+		ret = hfi_main();
+		if (ret) {
+			fprintf(stderr, "HFI initialization failed\n");
+		}
+		fprintf(stderr, "Must specify poll-interval\n");
+		return ret;
+	}
+
+	debug_printf("Starting loop\n");
+	while (!done) {
+		sleep(poll_interval);
+		poll_for_config_change();
+	}
+
+	return 0;
+}
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 1aa15d5ea57c..0796d8c6a882 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -76,6 +76,9 @@
 
 #define DISP_FREQ_MULTIPLIER 100
 
+#define MAX_PACKAGE_COUNT 8
+#define MAX_DIE_PER_PACKAGE 2
+
 struct isst_clos_config {
 	int pkg_id;
 	int die_id;
@@ -260,4 +263,14 @@ extern int is_skx_based_platform(void);
 extern int is_spr_platform(void);
 extern int is_icx_platform(void);
 extern void isst_trl_display_information(int cpu, FILE *outf, unsigned long long trl);
+
+extern void set_cpu_online_offline(int cpu, int state);
+extern void for_each_online_package_in_set(void (*callback)(int, void *, void *,
+							    void *, void *),
+					   void *arg1, void *arg2, void *arg3,
+					   void *arg4);
+extern int isst_daemon(int debug_mode, int poll_interval, int no_daemon);
+extern void process_level_change(int cpu);
+extern int hfi_main(void);
+extern void hfi_exit(void);
 #endif
diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
index e15e20696d17..b46e9eb8f5aa 100755
--- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -63,7 +63,7 @@ C_USEC = 3
 C_SEC = 2
 C_CPU = 1
 
-global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname
+global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname, trace_file
 
 # 11 digits covers uptime to 115 days
 getcontext().prec = 11
@@ -72,17 +72,17 @@ sample_num =0
 last_sec_cpu = [0] * MAX_CPUS
 last_usec_cpu = [0] * MAX_CPUS
 
-def print_help():
-    print('intel_pstate_tracer.py:')
+def print_help(driver_name):
+    print('%s_tracer.py:'%driver_name)
     print('  Usage:')
     print('    If the trace file is available, then to simply parse and plot, use (sudo not required):')
-    print('      ./intel_pstate_tracer.py [-c cpus] -t <trace_file> -n <test_name>')
+    print('      ./%s_tracer.py [-c cpus] -t <trace_file> -n <test_name>'%driver_name)
     print('    Or')
-    print('      ./intel_pstate_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>')
+    print('      ./%s_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>'%driver_name)
     print('    To generate trace file, parse and plot, use (sudo required):')
-    print('      sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>')
+    print('      sudo ./%s_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>'%driver_name)
     print('    Or')
-    print('      sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>')
+    print('      sudo ./%s_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>'%driver_name)
     print('    Optional argument:')
     print('      cpus:   comma separated list of CPUs')
     print('      kbytes: Kilo bytes of memory per CPU to allocate to the trace buffer. Default: 10240')
@@ -323,7 +323,7 @@ def set_4_plot_linestyles(g_plot):
     g_plot('set style line 3 linetype 1 linecolor rgb "purple" pointtype -1')
     g_plot('set style line 4 linetype 1 linecolor rgb "blue" pointtype -1')
 
-def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz):
+def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz, cpu_mask):
     """ Store master csv file information """
 
     global graph_data_present
@@ -342,11 +342,9 @@ def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _t
 
     graph_data_present = True;
 
-def split_csv():
+def split_csv(current_max_cpu, cpu_mask):
     """ seperate the all csv file into per CPU csv files. """
 
-    global current_max_cpu
-
     if os.path.exists('cpu.csv'):
         for index in range(0, current_max_cpu + 1):
             if cpu_mask[int(index)] != 0:
@@ -381,27 +379,25 @@ def clear_trace_file():
         print('IO error clearing trace file ')
         sys.exit(2)
 
-def enable_trace():
+def enable_trace(trace_file):
     """ Enable trace """
 
     try:
-       open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable'
-                 , 'w').write("1")
+        open(trace_file,'w').write("1")
     except:
         print('IO error enabling trace ')
         sys.exit(2)
 
-def disable_trace():
+def disable_trace(trace_file):
     """ Disable trace """
 
     try:
-       open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable'
-                 , 'w').write("0")
+       open(trace_file, 'w').write("0")
     except:
         print('IO error disabling trace ')
         sys.exit(2)
 
-def set_trace_buffer_size():
+def set_trace_buffer_size(memory):
     """ Set trace buffer size """
 
     try:
@@ -421,7 +417,7 @@ def free_trace_buffer():
         print('IO error freeing trace buffer ')
         sys.exit(2)
 
-def read_trace_data(filename):
+def read_trace_data(filename, cpu_mask):
     """ Read and parse trace data """
 
     global current_max_cpu
@@ -481,135 +477,137 @@ def read_trace_data(filename):
                 tsc_ghz = Decimal(0)
                 if duration_ms != Decimal(0) :
                     tsc_ghz = Decimal(tsc)/duration_ms/Decimal(1000000)
-                store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz)
+                store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz, cpu_mask)
 
             if cpu_int > current_max_cpu:
                 current_max_cpu = cpu_int
 # End of for each trace line loop
 # Now seperate the main overall csv file into per CPU csv files.
-    split_csv()
+    split_csv(current_max_cpu, cpu_mask)
 
 def signal_handler(signal, frame):
     print(' SIGINT: Forcing cleanup before exit.')
     if interval:
-        disable_trace()
+        disable_trace(trace_file)
         clear_trace_file()
         # Free the memory
         free_trace_buffer()
         sys.exit(0)
 
-signal.signal(signal.SIGINT, signal_handler)
+if __name__ == "__main__":
+    trace_file = "/sys/kernel/debug/tracing/events/power/pstate_sample/enable"
+    signal.signal(signal.SIGINT, signal_handler)
 
-interval = ""
-filename = ""
-cpu_list = ""
-testname = ""
-memory = "10240"
-graph_data_present = False;
+    interval = ""
+    filename = ""
+    cpu_list = ""
+    testname = ""
+    memory = "10240"
+    graph_data_present = False;
 
-valid1 = False
-valid2 = False
+    valid1 = False
+    valid2 = False
 
-cpu_mask = zeros((MAX_CPUS,), dtype=int)
+    cpu_mask = zeros((MAX_CPUS,), dtype=int)
 
-try:
-    opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
-except getopt.GetoptError:
-    print_help()
-    sys.exit(2)
-for opt, arg in opts:
-    if opt == '-h':
-        print()
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
+    except getopt.GetoptError:
+        print_help('intel_pstate')
+        sys.exit(2)
+    for opt, arg in opts:
+        if opt == '-h':
+            print_help('intel_pstate')
+            sys.exit()
+        elif opt in ("-t", "--trace_file"):
+            valid1 = True
+            location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+            filename = os.path.join(location, arg)
+        elif opt in ("-i", "--interval"):
+            valid1 = True
+            interval = arg
+        elif opt in ("-c", "--cpu"):
+            cpu_list = arg
+        elif opt in ("-n", "--name"):
+            valid2 = True
+            testname = arg
+        elif opt in ("-m", "--memory"):
+            memory = arg
+
+    if not (valid1 and valid2):
+        print_help('intel_pstate')
         sys.exit()
-    elif opt in ("-t", "--trace_file"):
-        valid1 = True
-        location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
-        filename = os.path.join(location, arg)
-    elif opt in ("-i", "--interval"):
-        valid1 = True
-        interval = arg
-    elif opt in ("-c", "--cpu"):
-        cpu_list = arg
-    elif opt in ("-n", "--name"):
-        valid2 = True
-        testname = arg
-    elif opt in ("-m", "--memory"):
-        memory = arg
-
-if not (valid1 and valid2):
-    print_help()
-    sys.exit()
-
-if cpu_list:
-    for p in re.split("[,]", cpu_list):
-        if int(p) < MAX_CPUS :
-            cpu_mask[int(p)] = 1
-else:
-    for i in range (0, MAX_CPUS):
-        cpu_mask[i] = 1
-
-if not os.path.exists('results'):
-    os.mkdir('results')
+
+    if cpu_list:
+        for p in re.split("[,]", cpu_list):
+            if int(p) < MAX_CPUS :
+                cpu_mask[int(p)] = 1
+    else:
+        for i in range (0, MAX_CPUS):
+            cpu_mask[i] = 1
+
+    if not os.path.exists('results'):
+        os.mkdir('results')
+        # The regular user needs to own the directory, not root.
+        fix_ownership('results')
+
+    os.chdir('results')
+    if os.path.exists(testname):
+        print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
+        sys.exit()
+    os.mkdir(testname)
     # The regular user needs to own the directory, not root.
-    fix_ownership('results')
-
-os.chdir('results')
-if os.path.exists(testname):
-    print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
-    sys.exit()
-os.mkdir(testname)
-# The regular user needs to own the directory, not root.
-fix_ownership(testname)
-os.chdir(testname)
-
-# Temporary (or perhaps not)
-cur_version = sys.version_info
-print('python version (should be >= 2.7):')
-print(cur_version)
-
-# Left as "cleanup" for potential future re-run ability.
-cleanup_data_files()
-
-if interval:
-    filename = "/sys/kernel/debug/tracing/trace"
-    clear_trace_file()
-    set_trace_buffer_size()
-    enable_trace()
-    print('Sleeping for ', interval, 'seconds')
-    time.sleep(int(interval))
-    disable_trace()
-
-current_max_cpu = 0
-
-read_trace_data(filename)
-
-if interval:
-    clear_trace_file()
-    # Free the memory
-    free_trace_buffer()
-
-if graph_data_present == False:
-    print('No valid data to plot')
-    sys.exit(2)
-
-for cpu_no in range(0, current_max_cpu + 1):
-    plot_perf_busy_with_sample(cpu_no)
-    plot_perf_busy(cpu_no)
-    plot_durations(cpu_no)
-    plot_loads(cpu_no)
-
-plot_pstate_cpu_with_sample()
-plot_pstate_cpu()
-plot_load_cpu()
-plot_frequency_cpu()
-plot_duration_cpu()
-plot_scaled_cpu()
-plot_boost_cpu()
-plot_ghz_cpu()
-
-# It is preferrable, but not necessary, that the regular user owns the files, not root.
-for root, dirs, files in os.walk('.'):
-    for f in files:
-        fix_ownership(f)
-
-os.chdir('../../')
+    fix_ownership(testname)
+    os.chdir(testname)
+
+    # Temporary (or perhaps not)
+    cur_version = sys.version_info
+    print('python version (should be >= 2.7):')
+    print(cur_version)
+
+    # Left as "cleanup" for potential future re-run ability.
+    cleanup_data_files()
+
+    if interval:
+        filename = "/sys/kernel/debug/tracing/trace"
+        clear_trace_file()
+        set_trace_buffer_size(memory)
+        enable_trace(trace_file)
+        print('Sleeping for ', interval, 'seconds')
+        time.sleep(int(interval))
+        disable_trace(trace_file)
+
+    current_max_cpu = 0
+
+    read_trace_data(filename, cpu_mask)
+
+    if interval:
+        clear_trace_file()
+        # Free the memory
+        free_trace_buffer()
+
+    if graph_data_present == False:
+        print('No valid data to plot')
+        sys.exit(2)
+
+    for cpu_no in range(0, current_max_cpu + 1):
+        plot_perf_busy_with_sample(cpu_no)
+        plot_perf_busy(cpu_no)
+        plot_durations(cpu_no)
+        plot_loads(cpu_no)
+
+    plot_pstate_cpu_with_sample()
+    plot_pstate_cpu()
+    plot_load_cpu()
+    plot_frequency_cpu()
+    plot_duration_cpu()
+    plot_scaled_cpu()
+    plot_boost_cpu()
+    plot_ghz_cpu()
+
+    # It is preferrable, but not necessary, that the regular user owns the files, not root.
+    for root, dirs, files in os.walk('.'):
+        for f in files:
+            fix_ownership(f)
+
+    os.chdir('../../')
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 47d3ba895d6d..bc5ae0872fed 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2323,7 +2323,7 @@ int skx_pkg_cstate_limits[16] =
 };
 
 int icx_pkg_cstate_limits[16] =
-    { PCL__0, PCL__2, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+    { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
 	PCLRSV, PCLRSV
 };
 
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c
index d8eeeafb50dc..1e13b7523918 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi.c
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.c
@@ -18,7 +18,6 @@
 
 #include "../../kselftest.h"
 
-#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
 #define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1)
 
 extern void do_syscall(int sve_vl);
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index a3c1e67441f9..4c418b2021e0 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -21,8 +21,6 @@
 
 #include "../../kselftest.h"
 
-#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
-
 /* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
 #ifndef NT_ARM_SVE
 #define NT_ARM_SVE 0x405
@@ -489,6 +487,8 @@ static int do_parent(pid_t child)
 	unsigned int vq, vl;
 	bool vl_supported;
 
+	ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
 	/* Attach to the child */
 	while (1) {
 		int sig;
diff --git a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
index a876db1f096a..325bca0de0f6 100644
--- a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
+++ b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
@@ -19,17 +19,6 @@
 #include "kselftest.h"
 #include "mte_common_util.h"
 
-#define PR_SET_TAGGED_ADDR_CTRL 55
-#define PR_GET_TAGGED_ADDR_CTRL 56
-# define PR_TAGGED_ADDR_ENABLE  (1UL << 0)
-# define PR_MTE_TCF_SHIFT	1
-# define PR_MTE_TCF_NONE	(0UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_SYNC	(1UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_ASYNC	(2UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_MASK	(3UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TAG_SHIFT	3
-# define PR_MTE_TAG_MASK	(0xffffUL << PR_MTE_TAG_SHIFT)
-
 #include "mte_def.h"
 
 #define NUM_ITERATIONS		1024
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 1de7a0abd0ae..f4ae5f87a3b7 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -3,6 +3,7 @@
 
 #define _GNU_SOURCE
 
+#include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
@@ -11,6 +12,7 @@
 #include <string.h>
 #include <ucontext.h>
 #include <unistd.h>
+#include <sys/uio.h>
 #include <sys/mman.h>
 
 #include "kselftest.h"
@@ -19,14 +21,28 @@
 
 static size_t page_sz;
 
-static int check_usermem_access_fault(int mem_type, int mode, int mapping)
+#define TEST_NAME_MAX 100
+
+enum test_type {
+	READ_TEST,
+	WRITE_TEST,
+	READV_TEST,
+	WRITEV_TEST,
+	LAST_TEST,
+};
+
+static int check_usermem_access_fault(int mem_type, int mode, int mapping,
+                                      int tag_offset, int tag_len,
+                                      enum test_type test_type)
 {
 	int fd, i, err;
 	char val = 'A';
-	size_t len, read_len;
+	ssize_t len, syscall_len;
 	void *ptr, *ptr_next;
+	int fileoff, ptroff, size;
+	int sizes[] = {1, 2, 3, 8, 16, 32, 4096, page_sz};
 
-	err = KSFT_FAIL;
+	err = KSFT_PASS;
 	len = 2 * page_sz;
 	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
 	fd = create_temp_file();
@@ -43,9 +59,9 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping)
 	}
 	mte_initialize_current_context(mode, (uintptr_t)ptr, len);
 	/* Copy from file into buffer with valid tag */
-	read_len = read(fd, ptr, len);
+	syscall_len = read(fd, ptr, len);
 	mte_wait_after_trig();
-	if (cur_mte_cxt.fault_valid || read_len < len)
+	if (cur_mte_cxt.fault_valid || syscall_len < len)
 		goto usermem_acc_err;
 	/* Verify same pattern is read */
 	for (i = 0; i < len; i++)
@@ -54,36 +70,136 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping)
 	if (i < len)
 		goto usermem_acc_err;
 
-	/* Tag the next half of memory with different value */
-	ptr_next = (void *)((unsigned long)ptr + page_sz);
+	if (!tag_len)
+		tag_len = len - tag_offset;
+	/* Tag a part of memory with different value */
+	ptr_next = (void *)((unsigned long)ptr + tag_offset);
 	ptr_next = mte_insert_new_tag(ptr_next);
-	mte_set_tag_address_range(ptr_next, page_sz);
+	mte_set_tag_address_range(ptr_next, tag_len);
 
-	lseek(fd, 0, 0);
-	/* Copy from file into buffer with invalid tag */
-	read_len = read(fd, ptr, len);
-	mte_wait_after_trig();
-	/*
-	 * Accessing user memory in kernel with invalid tag should fail in sync
-	 * mode without fault but may not fail in async mode as per the
-	 * implemented MTE userspace support in Arm64 kernel.
-	 */
-	if (mode == MTE_SYNC_ERR &&
-	    !cur_mte_cxt.fault_valid && read_len < len) {
-		err = KSFT_PASS;
-	} else if (mode == MTE_ASYNC_ERR &&
-		   !cur_mte_cxt.fault_valid && read_len == len) {
-		err = KSFT_PASS;
+	for (fileoff = 0; fileoff < 16; fileoff++) {
+		for (ptroff = 0; ptroff < 16; ptroff++) {
+			for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+				size = sizes[i];
+				lseek(fd, 0, 0);
+
+				/* perform file operation on buffer with invalid tag */
+				switch (test_type) {
+				case READ_TEST:
+					syscall_len = read(fd, ptr + ptroff, size);
+					break;
+				case WRITE_TEST:
+					syscall_len = write(fd, ptr + ptroff, size);
+					break;
+				case READV_TEST: {
+					struct iovec iov[1];
+					iov[0].iov_base = ptr + ptroff;
+					iov[0].iov_len = size;
+					syscall_len = readv(fd, iov, 1);
+					break;
+				}
+				case WRITEV_TEST: {
+					struct iovec iov[1];
+					iov[0].iov_base = ptr + ptroff;
+					iov[0].iov_len = size;
+					syscall_len = writev(fd, iov, 1);
+					break;
+				}
+				case LAST_TEST:
+					goto usermem_acc_err;
+				}
+
+				mte_wait_after_trig();
+				/*
+				 * Accessing user memory in kernel with invalid tag should fail in sync
+				 * mode without fault but may not fail in async mode as per the
+				 * implemented MTE userspace support in Arm64 kernel.
+				 */
+				if (cur_mte_cxt.fault_valid) {
+					goto usermem_acc_err;
+				}
+				if (mode == MTE_SYNC_ERR && syscall_len < len) {
+					/* test passed */
+				} else if (mode == MTE_ASYNC_ERR && syscall_len == size) {
+					/* test passed */
+				} else {
+					goto usermem_acc_err;
+				}
+			}
+		}
 	}
+
+	goto exit;
+
 usermem_acc_err:
+	err = KSFT_FAIL;
+exit:
 	mte_free_memory((void *)ptr, len, mem_type, true);
 	close(fd);
 	return err;
 }
 
+void format_test_name(char* name, int name_len, int type, int sync, int map, int len, int offset) {
+	const char* test_type;
+	const char* mte_type;
+	const char* map_type;
+
+	switch (type) {
+	case READ_TEST:
+		test_type = "read";
+		break;
+	case WRITE_TEST:
+		test_type = "write";
+		break;
+	case READV_TEST:
+		test_type = "readv";
+		break;
+	case WRITEV_TEST:
+		test_type = "writev";
+		break;
+	default:
+		assert(0);
+		break;
+	}
+
+	switch (sync) {
+	case MTE_SYNC_ERR:
+		mte_type = "MTE_SYNC_ERR";
+		break;
+	case MTE_ASYNC_ERR:
+		mte_type = "MTE_ASYNC_ERR";
+		break;
+	default:
+		assert(0);
+		break;
+	}
+
+	switch (map) {
+	case MAP_SHARED:
+		map_type = "MAP_SHARED";
+		break;
+	case MAP_PRIVATE:
+		map_type = "MAP_PRIVATE";
+		break;
+	default:
+		assert(0);
+		break;
+	}
+
+	snprintf(name, name_len,
+	         "test type: %s, %s, %s, tag len: %d, tag offset: %d\n",
+	         test_type, mte_type, map_type, len, offset);
+}
+
 int main(int argc, char *argv[])
 {
 	int err;
+	int t, s, m, l, o;
+	int mte_sync[] = {MTE_SYNC_ERR, MTE_ASYNC_ERR};
+	int maps[] = {MAP_SHARED, MAP_PRIVATE};
+	int tag_lens[] = {0, MT_GRANULE_SIZE};
+	int tag_offsets[] = {page_sz, MT_GRANULE_SIZE};
+	char test_name[TEST_NAME_MAX];
 
 	page_sz = getpagesize();
 	if (!page_sz) {
@@ -98,17 +214,28 @@ int main(int argc, char *argv[])
 	mte_register_signal(SIGSEGV, mte_default_handler);
 
 	/* Set test plan */
-	ksft_set_plan(4);
+	ksft_set_plan(64);
 
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
-		"Check memory access from kernel in sync mode, private mapping and mmap memory\n");
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
-		"Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
-
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
-		"Check memory access from kernel in async mode, private mapping and mmap memory\n");
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
-		"Check memory access from kernel in async mode, shared mapping and mmap memory\n");
+	for (t = 0; t < LAST_TEST; t++) {
+		for (s = 0; s < ARRAY_SIZE(mte_sync); s++) {
+			for (m = 0; m < ARRAY_SIZE(maps); m++) {
+				for (l = 0; l < ARRAY_SIZE(tag_lens); l++) {
+					for (o = 0; o < ARRAY_SIZE(tag_offsets); o++) {
+						int sync = mte_sync[s];
+						int map = maps[m];
+						int offset = tag_offsets[o];
+						int tag_len = tag_lens[l];
+						int res = check_usermem_access_fault(USE_MMAP, sync,
+						                                     map, offset,
+						                                     tag_len, t);
+						format_test_name(test_name, TEST_NAME_MAX,
+						                 t, sync, map, tag_len, offset);
+						evaluate_test(res, test_name);
+					}
+				}
+			}
+		}
+	}
 
 	mte_restore_setup();
 	ksft_print_cnts();
diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
index ebe8694dbef0..f909b70d9e98 100644
--- a/tools/testing/selftests/arm64/signal/test_signals.h
+++ b/tools/testing/selftests/arm64/signal/test_signals.h
@@ -53,6 +53,7 @@ struct tdescr {
 	char			*name;
 	char			*descr;
 	unsigned long		feats_required;
+	unsigned long		feats_incompatible;
 	/* bitmask of effectively supported feats: populated at run-time */
 	unsigned long		feats_supported;
 	bool			initialized;
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 2f8c23af3b5e..5743897984b0 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -36,6 +36,8 @@ static inline char *feats_to_string(unsigned long feats)
 {
 	size_t flen = MAX_FEATS_SZ - 1;
 
+	feats_string[0] = '\0';
+
 	for (int i = 0; i < FMAX_END; i++) {
 		if (feats & (1UL << i)) {
 			size_t tlen = strlen(feats_names[i]);
@@ -256,7 +258,7 @@ int test_init(struct tdescr *td)
 		td->minsigstksz = MINSIGSTKSZ;
 	fprintf(stderr, "Detected MINSTKSIGSZ:%d\n", td->minsigstksz);
 
-	if (td->feats_required) {
+	if (td->feats_required || td->feats_incompatible) {
 		td->feats_supported = 0;
 		/*
 		 * Checking for CPU required features using both the
@@ -267,15 +269,29 @@ int test_init(struct tdescr *td)
 		if (getauxval(AT_HWCAP) & HWCAP_SVE)
 			td->feats_supported |= FEAT_SVE;
 		if (feats_ok(td)) {
-			fprintf(stderr,
-				"Required Features: [%s] supported\n",
-				feats_to_string(td->feats_required &
-						td->feats_supported));
+			if (td->feats_required & td->feats_supported)
+				fprintf(stderr,
+					"Required Features: [%s] supported\n",
+					feats_to_string(td->feats_required &
+							td->feats_supported));
+			if (!(td->feats_incompatible & td->feats_supported))
+				fprintf(stderr,
+					"Incompatible Features: [%s] absent\n",
+					feats_to_string(td->feats_incompatible));
 		} else {
-			fprintf(stderr,
-				"Required Features: [%s] NOT supported\n",
-				feats_to_string(td->feats_required &
-						~td->feats_supported));
+			if ((td->feats_required & td->feats_supported) !=
+			    td->feats_supported)
+				fprintf(stderr,
+					"Required Features: [%s] NOT supported\n",
+					feats_to_string(td->feats_required &
+							~td->feats_supported));
+			if (td->feats_incompatible & td->feats_supported)
+				fprintf(stderr,
+					"Incompatible Features: [%s] supported\n",
+					feats_to_string(td->feats_incompatible &
+							~td->feats_supported));
+
+
 			td->result = KSFT_SKIP;
 			return 0;
 		}
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h
index 6772b5c8d274..f3aa99ba67bb 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.h
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h
@@ -18,6 +18,8 @@ void test_result(struct tdescr *td);
 
 static inline bool feats_ok(struct tdescr *td)
 {
+	if (td->feats_incompatible & td->feats_supported)
+		return false;
 	return (td->feats_required & td->feats_supported) == td->feats_required;
 }
 
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 543ad7513a8e..694732e4b344 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -374,6 +374,16 @@ run_cmd() {
 	return $rc
 }
 
+run_cmd_bg() {
+	cmd="$*"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "    COMMAND: %s &\n" "${cmd}"
+	fi
+
+	$cmd 2>&1 &
+}
+
 # Find the auto-generated name for this namespace
 nsname() {
 	eval echo \$NS_$1
@@ -670,10 +680,10 @@ setup_nettest_xfrm() {
 	[ ${1} -eq 6 ] && proto="-6" || proto=""
 	port=${2}
 
-	run_cmd ${ns_a} nettest ${proto} -q -D -s -x -p ${port} -t 5 &
+	run_cmd_bg "${ns_a}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
 	nettest_pids="${nettest_pids} $!"
 
-	run_cmd ${ns_b} nettest ${proto} -q -D -s -x -p ${port} -t 5 &
+	run_cmd_bg "${ns_b}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
 	nettest_pids="${nettest_pids} $!"
 }
 
@@ -865,7 +875,6 @@ setup_ovs_bridge() {
 setup() {
 	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
 
-	cleanup
 	for arg do
 		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
 	done
@@ -876,7 +885,7 @@ trace() {
 
 	for arg do
 		[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
-		${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
+		${ns_cmd} tcpdump --immediate-mode -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
 		tcpdump_pids="${tcpdump_pids} $!"
 		ns_cmd=
 	done
@@ -1836,6 +1845,10 @@ run_test() {
 
 	unset IFS
 
+	# Since cleanup() relies on variables modified by this subshell, it
+	# has to run in this context.
+	trap cleanup EXIT
+
 	if [ "$VERBOSE" = "1" ]; then
 		printf "\n##########################################################################\n\n"
 	fi
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index 79fe627b9e81..eb8543b9a5c4 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -880,9 +880,8 @@ EOF
 		return $ksft_skip
 	fi
 
-	# test default behaviour. Packet from ns1 to ns0 is not redirected
-	# due to automatic port translation.
-	test_port_shadow "default" "ROUTER"
+	# test default behaviour. Packet from ns1 to ns0 is redirected to ns2.
+	test_port_shadow "default" "CLIENT"
 
 	# test packet filter based mitigation: prevent forwarding of
 	# packets claiming to come from the service port.
diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh
index e6a132df6172..69f8a5958cef 100755
--- a/tools/testing/selftests/rcutorture/bin/console-badness.sh
+++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh
@@ -10,7 +10,7 @@
 #
 # Authors: Paul E. McKenney <[email protected]>
 
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
+egrep 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
 grep -v 'ODEBUG: ' |
 grep -v 'This means that this is a DEBUG kernel and it is' |
 grep -v 'Warning: unable to open an initial console' |
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
index 5a0023d183da..0941f1ddab65 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -47,8 +47,8 @@ else
 	exit 1
 fi
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 
 dryrun=
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
index 370406bbfeed..f17000a2ccf1 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
@@ -49,8 +49,8 @@ fi
 mkdir $resdir/$ds
 echo Results directory: $resdir/$ds
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 echo Using all `identify_qemu_vcpus` CPUs.
 
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
index e4a00779b8c6..ee886b40a5d2 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
@@ -22,8 +22,8 @@ T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$
 trap 'rm -rf $T' 0
 mkdir $T
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 default_starttime="`get_starttime`"
 starttime="${2-default_starttime}"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 2e9e9e2eedb6..5f682fc892dd 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -30,10 +30,16 @@ editor=${EDITOR-vi}
 files=
 for i in ${rundir}/*/Make.out
 do
+	scenariodir="`dirname $i`"
+	scenariobasedir="`echo ${scenariodir} | sed -e 's/\.[0-9]*$//'`"
 	if egrep -q "error:|warning:|^ld: .*undefined reference to" < $i
 	then
 		egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
 		files="$files $i.diags $i"
+	elif ! test -f ${scenariobasedir}/vmlinux
+	then
+		echo No ${scenariobasedir}/vmlinux file > $i.diags
+		files="$files $i.diags $i"
 	fi
 done
 if test -n "$files"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 1c4c2c727dad..43e1387234d1 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -25,7 +25,7 @@ stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
 	    tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
 	    awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
 	    tr -d '\012\015'`"
-fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }'`"
+fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }' | tr -d '\015'`"
 if test -z "$ngps"
 then
 	echo "$configfile ------- " $stopstate
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index e09b1bc78708..8c4c1e4792d0 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -19,8 +19,8 @@ then
 	exit 1
 fi
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 
 starttime="`get_starttime`"
@@ -108,8 +108,8 @@ else
 		cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
 		exit 2
 	fi
-	cp -a "$rundir" "$KVM/res/"
-	oldrun="$KVM/res/$ds"
+	cp -a "$rundir" "$RCUTORTURE/res/"
+	oldrun="$RCUTORTURE/res/$ds"
 fi
 echo | tee -a "$oldrun/remote-log"
 echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
@@ -155,18 +155,23 @@ do
 	echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
 	cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
 	ret=$?
-	if test "$ret" -ne 0
-	then
-		echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. | tee -a "$oldrun/remote-log"
+	tries=0
+	while test "$ret" -ne 0
+	do
+		echo Unable to download $T/binres.tgz to system $i, waiting and then retrying.  $tries prior retries. | tee -a "$oldrun/remote-log"
 		sleep 60
 		cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
 		ret=$?
 		if test "$ret" -ne 0
 		then
-			echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
-			exit 10
+			if test "$tries" > 5
+			then
+				echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
+				exit 10
+			fi
 		fi
-	fi
+		tries=$((tries+1))
+	done
 done
 
 # Function to check for presence of a file on the specified system.
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 6de0c183db5b..55b2c1533282 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -25,15 +25,15 @@ LANG=en_US.UTF-8; export LANG
 
 dur=$((30*60))
 dryrun=""
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 
 TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`"
 TORTURE_DEFCONFIG=defconfig
 TORTURE_BOOT_IMAGE=""
 TORTURE_BUILDONLY=
-TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
+TORTURE_INITRD="$RCUTORTURE/initrd"; export TORTURE_INITRD
 TORTURE_KCONFIG_ARG=""
 TORTURE_KCONFIG_GDB_ARG=""
 TORTURE_BOOT_GDB_ARG=""
@@ -262,7 +262,7 @@ else
 	exit 1
 fi
 
-CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG
+CONFIGFRAG=${RCUTORTURE}/configs/${TORTURE_SUITE}; export CONFIGFRAG
 
 defaultconfigs="`tr '\012' ' ' < $CONFIGFRAG/CFLIST`"
 if test -z "$configs"
@@ -272,7 +272,7 @@ fi
 
 if test -z "$resdir"
 then
-	resdir=$KVM/res
+	resdir=$RCUTORTURE/res
 fi
 
 # Create a file of test-name/#cpus pairs, sorted by decreasing #cpus.
@@ -280,7 +280,7 @@ configs_derep=
 for CF in $configs
 do
 	case $CF in
-	[0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**)
+	[0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**|[0-9][0-9][0-9][0-9]\**)
 		config_reps=`echo $CF | sed -e 's/\*.*$//'`
 		CF1=`echo $CF | sed -e 's/^[^*]*\*//'`
 		;;
@@ -386,7 +386,7 @@ END {
 # Generate a script to execute the tests in appropriate batches.
 cat << ___EOF___ > $T/script
 CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG
-KVM="$KVM"; export KVM
+RCUTORTURE="$RCUTORTURE"; export RCUTORTURE
 PATH="$PATH"; export PATH
 TORTURE_ALLOTED_CPUS="$TORTURE_ALLOTED_CPUS"; export TORTURE_ALLOTED_CPUS
 TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE
@@ -569,7 +569,7 @@ ___EOF___
 awk < $T/cfgcpu.pack \
 	-v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
 	-v CONFIGDIR="$CONFIGFRAG/" \
-	-v KVM="$KVM" \
+	-v RCUTORTURE="$RCUTORTURE" \
 	-v ncpus=$cpus \
 	-v jitter="$jitter" \
 	-v rd=$resdir/$ds/ \
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 9f624bd53c27..822eb037a057 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -138,6 +138,16 @@ then
 	then
 		summary="$summary  Bugs: $n_bugs"
 	fi
+	n_kcsan=`egrep -c 'BUG: KCSAN: ' $file`
+	if test "$n_kcsan" -ne 0
+	then
+		if test "$n_bugs" = "$n_kcsan"
+		then
+			summary="$summary (all bugs kcsan)"
+		else
+			summary="$summary  KCSAN: $n_kcsan"
+		fi
+	fi
 	n_calltrace=`grep -c 'Call Trace:' $file`
 	if test "$n_calltrace" -ne 0
 	then
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index eae88aacca2a..bfe09e2829c8 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -13,8 +13,8 @@
 scriptname=$0
 args="$*"
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 
 TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`"
@@ -37,7 +37,7 @@ configs_scftorture=
 kcsan_kmake_args=
 
 # Default compression, duration, and apportionment.
-compress_kasan_vmlinux="`identify_qemu_vcpus`"
+compress_concurrency="`identify_qemu_vcpus`"
 duration_base=10
 duration_rcutorture_frac=7
 duration_locktorture_frac=1
@@ -67,12 +67,12 @@ function doyesno () {
 
 usage () {
 	echo "Usage: $scriptname optional arguments:"
-	echo "       --compress-kasan-vmlinux concurrency"
+	echo "       --compress-concurrency concurrency"
 	echo "       --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\""
 	echo "       --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\""
 	echo "       --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
-	echo "       --doall"
-	echo "       --doallmodconfig / --do-no-allmodconfig"
+	echo "       --do-all"
+	echo "       --do-allmodconfig / --do-no-allmodconfig"
 	echo "       --do-clocksourcewd / --do-no-clocksourcewd"
 	echo "       --do-kasan / --do-no-kasan"
 	echo "       --do-kcsan / --do-no-kcsan"
@@ -91,9 +91,9 @@ usage () {
 while test $# -gt 0
 do
 	case "$1" in
-	--compress-kasan-vmlinux)
-		checkarg --compress-kasan-vmlinux "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error'
-		compress_kasan_vmlinux=$2
+	--compress-concurrency)
+		checkarg --compress-concurrency "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error'
+		compress_concurrency=$2
 		shift
 		;;
 	--config-rcutorture|--configs-rcutorture)
@@ -414,8 +414,14 @@ nfailures=0
 echo FAILURES: | tee -a $T/log
 if test -s "$T/failures"
 then
-	cat "$T/failures" | tee -a $T/log
+	awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/  /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum"
 	nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`"
+	grep "^  Summary: " "$T/failuresum" |
+		grep -v '^  Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan"
+	if test -s "$T/nonkcsan"
+	then
+		nonkcsanbug="yes"
+	fi
 	ret=2
 fi
 if test "$do_kcsan" = "yes"
@@ -424,12 +430,16 @@ then
 fi
 echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
 echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log
+if test -z "$nonkcsanbug" && test -s "$T/failuresum"
+then
+	echo "  All bugs were KCSAN failures."
+fi
 tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
-if test -n "$tdir" && test $compress_kasan_vmlinux -gt 0
+if test -n "$tdir" && test $compress_concurrency -gt 0
 then
 	# KASAN vmlinux files can approach 1GB in size, so compress them.
-	echo Looking for KASAN files to compress: `date` > "$tdir/log-xz" 2>&1
-	find "$tdir" -type d -name '*-kasan' -print > $T/xz-todo
+	echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1
+	find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo
 	ncompresses=0
 	batchno=1
 	if test -s $T/xz-todo
@@ -447,7 +457,7 @@ then
 			do
 				xz "$j" >> "$tdir/log-xz" 2>&1 &
 				ncompresses=$((ncompresses+1))
-				if test $ncompresses -ge $compress_kasan_vmlinux
+				if test $ncompresses -ge $compress_concurrency
 				then
 					echo Waiting for batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log
 					wait
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
index 3ca112444ce7..7093422050f6 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -1,5 +1,5 @@
 CONFIG_SMP=y
-CONFIG_NR_CPUS=4
+CONFIG_NR_CPUS=3
 CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
index 238bfe3bd0cc..ce0694fd9b92 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
@@ -1 +1,2 @@
 rcutorture.torture_type=srcu
+rcutorture.fwd_progress=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
index ce48c7b82673..2db39f298d18 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -1,2 +1,4 @@
 rcutorture.torture_type=srcud
 rcupdate.rcu_self_test=1
+rcutorture.fwd_progress=3
+srcutree.big_cpu_lim=5
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
index 34c8ff5a12f2..e4d74e5fc1d0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -1,5 +1,5 @@
 CONFIG_SMP=y
-CONFIG_NR_CPUS=4
+CONFIG_NR_CPUS=5
 CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
index 2956584e1e37..75af864e07b6 100644
--- a/tools/testing/selftests/sgx/Makefile
+++ b/tools/testing/selftests/sgx/Makefile
@@ -4,7 +4,7 @@ include ../lib.mk
 
 .PHONY: all clean
 
-CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \
+CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh "$(CC)" \
 			    ../x86/trivial_64bit_program.c)
 
 ifndef OBJCOPY
diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
index 9d4322c946e2..006b464c8fc9 100644
--- a/tools/testing/selftests/sgx/load.c
+++ b/tools/testing/selftests/sgx/load.c
@@ -21,7 +21,7 @@
 
 void encl_delete(struct encl *encl)
 {
-	struct encl_segment *heap_seg = &encl->segment_tbl[encl->nr_segments - 1];
+	struct encl_segment *heap_seg;
 
 	if (encl->encl_base)
 		munmap((void *)encl->encl_base, encl->encl_size);
@@ -32,10 +32,11 @@ void encl_delete(struct encl *encl)
 	if (encl->fd)
 		close(encl->fd);
 
-	munmap(heap_seg->src, heap_seg->size);
-
-	if (encl->segment_tbl)
+	if (encl->segment_tbl) {
+		heap_seg = &encl->segment_tbl[encl->nr_segments - 1];
+		munmap(heap_seg->src, heap_seg->size);
 		free(encl->segment_tbl);
+	}
 
 	memset(encl, 0, sizeof(*encl));
 }
diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index 370c4995f7c4..dd74fa42302e 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -146,7 +146,8 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
 
 	if (!encl_load("test_encl.elf", encl, heap_size)) {
 		encl_delete(encl);
-		TH_LOG("Failed to load the test enclave.\n");
+		TH_LOG("Failed to load the test enclave.");
+		return false;
 	}
 
 	if (!encl_measure(encl))
@@ -185,8 +186,6 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
 	return true;
 
 err:
-	encl_delete(encl);
-
 	for (i = 0; i < encl->nr_segments; i++) {
 		seg = &encl->segment_tbl[i];
 
@@ -205,7 +204,9 @@ err:
 		fclose(maps_file);
 	}
 
-	TH_LOG("Failed to initialize the test enclave.\n");
+	TH_LOG("Failed to initialize the test enclave.");
+
+	encl_delete(encl);
 
 	return false;
 }
diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
index f34486cd7342..057a4f49c79d 100644
--- a/tools/testing/selftests/tpm2/tpm2.py
+++ b/tools/testing/selftests/tpm2/tpm2.py
@@ -56,6 +56,7 @@ TSS2_RESMGR_TPM_RC_LAYER = (11 << TSS2_RC_LAYER_SHIFT)
 
 TPM2_CAP_HANDLES = 0x00000001
 TPM2_CAP_COMMANDS = 0x00000002
+TPM2_CAP_PCRS = 0x00000005
 TPM2_CAP_TPM_PROPERTIES = 0x00000006
 
 TPM2_PT_FIXED = 0x100
@@ -712,3 +713,33 @@ class Client:
             pt += 1
 
         return handles
+
+    def get_cap_pcrs(self):
+        pcr_banks = {}
+
+        fmt = '>HII III'
+
+        cmd = struct.pack(fmt,
+                          TPM2_ST_NO_SESSIONS,
+                          struct.calcsize(fmt),
+                          TPM2_CC_GET_CAPABILITY,
+                          TPM2_CAP_PCRS, 0, 1)
+        rsp = self.send_cmd(cmd)[10:]
+        _, _, cnt = struct.unpack('>BII', rsp[:9])
+        rsp = rsp[9:]
+
+        # items are TPMS_PCR_SELECTION's
+        for i in range(0, cnt):
+              hash, sizeOfSelect = struct.unpack('>HB', rsp[:3])
+              rsp = rsp[3:]
+
+              pcrSelect = 0
+              if sizeOfSelect > 0:
+                  pcrSelect, = struct.unpack('%ds' % sizeOfSelect,
+                                             rsp[:sizeOfSelect])
+                  rsp = rsp[sizeOfSelect:]
+                  pcrSelect = int.from_bytes(pcrSelect, byteorder='big')
+
+              pcr_banks[hash] = pcrSelect
+
+        return pcr_banks
diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py
index 9d764306887b..ffe98b5c8d22 100644
--- a/tools/testing/selftests/tpm2/tpm2_tests.py
+++ b/tools/testing/selftests/tpm2/tpm2_tests.py
@@ -27,7 +27,17 @@ class SmokeTest(unittest.TestCase):
         result = self.client.unseal(self.root_key, blob, auth, None)
         self.assertEqual(data, result)
 
+    def determine_bank_alg(self, mask):
+        pcr_banks = self.client.get_cap_pcrs()
+        for bank_alg, pcrSelection in pcr_banks.items():
+            if pcrSelection & mask == mask:
+                return bank_alg
+        return None
+
     def test_seal_with_policy(self):
+        bank_alg = self.determine_bank_alg(1 << 16)
+        self.assertIsNotNone(bank_alg)
+
         handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL)
 
         data = ('X' * 64).encode()
@@ -35,7 +45,7 @@ class SmokeTest(unittest.TestCase):
         pcrs = [16]
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             policy_dig = self.client.get_policy_digest(handle)
@@ -47,7 +57,7 @@ class SmokeTest(unittest.TestCase):
         handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             result = self.client.unseal(self.root_key, blob, auth, handle)
@@ -72,6 +82,9 @@ class SmokeTest(unittest.TestCase):
         self.assertEqual(rc, tpm2.TPM2_RC_AUTH_FAIL)
 
     def test_unseal_with_wrong_policy(self):
+        bank_alg = self.determine_bank_alg(1 << 16 | 1 << 1)
+        self.assertIsNotNone(bank_alg)
+
         handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL)
 
         data = ('X' * 64).encode()
@@ -79,7 +92,7 @@ class SmokeTest(unittest.TestCase):
         pcrs = [16]
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             policy_dig = self.client.get_policy_digest(handle)
@@ -91,13 +104,13 @@ class SmokeTest(unittest.TestCase):
         # Extend first a PCR that is not part of the policy and try to unseal.
         # This should succeed.
 
-        ds = tpm2.get_digest_size(tpm2.TPM2_ALG_SHA1)
-        self.client.extend_pcr(1, ('X' * ds).encode())
+        ds = tpm2.get_digest_size(bank_alg)
+        self.client.extend_pcr(1, ('X' * ds).encode(), bank_alg=bank_alg)
 
         handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             result = self.client.unseal(self.root_key, blob, auth, handle)
@@ -109,14 +122,14 @@ class SmokeTest(unittest.TestCase):
 
         # Then, extend a PCR that is part of the policy and try to unseal.
         # This should fail.
-        self.client.extend_pcr(16, ('X' * ds).encode())
+        self.client.extend_pcr(16, ('X' * ds).encode(), bank_alg=bank_alg)
 
         handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
 
         rc = 0
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             result = self.client.unseal(self.root_key, blob, auth, handle)
@@ -302,3 +315,19 @@ class AsyncTest(unittest.TestCase):
         log.debug("Calling get_cap in a NON_BLOCKING mode")
         async_client.get_cap(tpm2.TPM2_CAP_HANDLES, tpm2.HR_LOADED_SESSION)
         async_client.close()
+
+    def test_flush_invalid_context(self):
+        log = logging.getLogger(__name__)
+        log.debug(sys._getframe().f_code.co_name)
+
+        async_client = tpm2.Client(tpm2.Client.FLAG_SPACE | tpm2.Client.FLAG_NONBLOCK)
+        log.debug("Calling flush_context passing in an invalid handle ")
+        handle = 0x80123456
+        rc = 0
+        try:
+            async_client.flush_context(handle)
+        except OSError as e:
+            rc = e.errno
+
+        self.assertEqual(rc, 22)
+        async_client.close()
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 1607322a112c..a14b5b800897 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for vm selftests
 
+LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h
+
 include local_config.mk
 
 uname_M := $(shell uname -m 2>/dev/null || echo not)
@@ -140,10 +142,6 @@ endif
 
 $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
 
-$(OUTPUT)/gup_test: ../../../../mm/gup_test.h
-
-$(OUTPUT)/hmm-tests: local_config.h
-
 # HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
 $(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
 
diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
index 3e2089c8cf54..8c669c0d662e 100755
--- a/tools/testing/selftests/x86/check_cc.sh
+++ b/tools/testing/selftests/x86/check_cc.sh
@@ -7,7 +7,7 @@ CC="$1"
 TESTPROG="$2"
 shift 2
 
-if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
     echo 1
 else
     echo 0
diff --git a/tools/virtio/linux/mm_types.h b/tools/virtio/linux/mm_types.h
new file mode 100644
index 000000000000..356ba4d496f6
--- /dev/null
+++ b/tools/virtio/linux/mm_types.h
@@ -0,0 +1,3 @@
+struct folio {
+       struct page page;
+};
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index cb3f29c09aff..23f142af544a 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -130,6 +130,7 @@ static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
 	memset(dev, 0, sizeof *dev);
 	dev->vdev.features = features;
 	INIT_LIST_HEAD(&dev->vdev.vqs);
+	spin_lock_init(&dev->vdev.vqs_list_lock);
 	dev->buf_size = 1024;
 	dev->buf = malloc(dev->buf_size);
 	assert(dev->buf);