diff options
author | Michael Ellerman <mpe@ellerman.id.au> | 2022-07-25 13:49:22 +1000 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2022-07-25 13:49:22 +1000 |
commit | 3c69a99b62fde9de86a612ef1daaa07d95f0a773 (patch) | |
tree | 2a82c3ba8926092d35a9ef1ddc62a08cf19ac981 /arch | |
parent | be640317a1d0b9cf42fedb2debc2887a7cfa38de (diff) | |
parent | ff6992735ade75aae3e35d16b17da1008d753d28 (diff) |
Merge tag 'v5.19-rc7' into fixes
Merge v5.19-rc7 into fixes to bring in:
d11219ad53dc ("amdgpu: disable powerpc support for the newer display engine")
Diffstat (limited to 'arch')
228 files changed, 3190 insertions, 1505 deletions
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 184899808ee7..5112f493f494 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -1586,7 +1586,6 @@ dtb-$(CONFIG_ARCH_ASPEED) += \ aspeed-bmc-lenovo-hr630.dtb \ aspeed-bmc-lenovo-hr855xg2.dtb \ aspeed-bmc-microsoft-olympus.dtb \ - aspeed-bmc-nuvia-dc-scm.dtb \ aspeed-bmc-opp-lanyang.dtb \ aspeed-bmc-opp-mihawk.dtb \ aspeed-bmc-opp-mowgli.dtb \ @@ -1599,6 +1598,7 @@ dtb-$(CONFIG_ARCH_ASPEED) += \ aspeed-bmc-opp-witherspoon.dtb \ aspeed-bmc-opp-zaius.dtb \ aspeed-bmc-portwell-neptune.dtb \ + aspeed-bmc-qcom-dc-scm-v1.dtb \ aspeed-bmc-quanta-q71l.dtb \ aspeed-bmc-quanta-s6q.dtb \ aspeed-bmc-supermicro-x11spi.dtb \ diff --git a/arch/arm/boot/dts/aspeed-bmc-nuvia-dc-scm.dts b/arch/arm/boot/dts/aspeed-bmc-qcom-dc-scm-v1.dts index f4a97cfb0f23..259ef3f54c5c 100644 --- a/arch/arm/boot/dts/aspeed-bmc-nuvia-dc-scm.dts +++ b/arch/arm/boot/dts/aspeed-bmc-qcom-dc-scm-v1.dts @@ -6,8 +6,8 @@ #include "aspeed-g6.dtsi" / { - model = "Nuvia DC-SCM BMC"; - compatible = "nuvia,dc-scm-bmc", "aspeed,ast2600"; + model = "Qualcomm DC-SCM V1 BMC"; + compatible = "qcom,dc-scm-v1-bmc", "aspeed,ast2600"; aliases { serial4 = &uart5; diff --git a/arch/arm/boot/dts/at91-sam9x60ek.dts b/arch/arm/boot/dts/at91-sam9x60ek.dts index 7719ea3d4933..81ccb0636a00 100644 --- a/arch/arm/boot/dts/at91-sam9x60ek.dts +++ b/arch/arm/boot/dts/at91-sam9x60ek.dts @@ -233,10 +233,9 @@ status = "okay"; eeprom@53 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x53>; pagesize = <16>; - size = <128>; status = "okay"; }; }; diff --git a/arch/arm/boot/dts/at91-sama5d2_icp.dts b/arch/arm/boot/dts/at91-sama5d2_icp.dts index 806eb1d911d7..164201a8fbf2 100644 --- a/arch/arm/boot/dts/at91-sama5d2_icp.dts +++ b/arch/arm/boot/dts/at91-sama5d2_icp.dts @@ -329,21 +329,21 @@ status = "okay"; eeprom@50 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x50>; pagesize = <16>; status = "okay"; }; eeprom@52 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x52>; pagesize = <16>; status = "disabled"; }; eeprom@53 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x53>; pagesize = <16>; status = "disabled"; diff --git a/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts b/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts index 443e8b022897..14af1fd6d247 100644 --- a/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts +++ b/arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts @@ -120,26 +120,31 @@ port@0 { reg = <0>; label = "lan1"; + phy-mode = "internal"; }; port@1 { reg = <1>; label = "lan2"; + phy-mode = "internal"; }; port@2 { reg = <2>; label = "lan3"; + phy-mode = "internal"; }; port@3 { reg = <3>; label = "lan4"; + phy-mode = "internal"; }; port@4 { reg = <4>; label = "lan5"; + phy-mode = "internal"; }; port@5 { diff --git a/arch/arm/boot/dts/bcm2711-rpi-400.dts b/arch/arm/boot/dts/bcm2711-rpi-400.dts index f4d2fc20397c..c53d9eb0b802 100644 --- a/arch/arm/boot/dts/bcm2711-rpi-400.dts +++ b/arch/arm/boot/dts/bcm2711-rpi-400.dts @@ -28,12 +28,12 @@ &expgpio { gpio-line-names = "BT_ON", "WL_ON", - "", + "PWR_LED_OFF", "GLOBAL_RESET", "VDD_SD_IO_SEL", - "CAM_GPIO", + "GLOBAL_SHUTDOWN", "SD_PWR_ON", - "SD_OC_N"; + "SHUTDOWN_REQUEST"; }; &genet_mdio { diff --git a/arch/arm/boot/dts/imx6qdl-colibri.dtsi b/arch/arm/boot/dts/imx6qdl-colibri.dtsi index c383e0e4110c..7df270cea292 100644 --- a/arch/arm/boot/dts/imx6qdl-colibri.dtsi +++ b/arch/arm/boot/dts/imx6qdl-colibri.dtsi @@ -593,7 +593,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_atmel_conn>; reg = <0x4a>; - reset-gpios = <&gpio1 14 GPIO_ACTIVE_HIGH>; /* SODIMM 106 */ + reset-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; /* SODIMM 106 */ status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi index fded07f370b3..d6ba4b2a60f6 100644 --- a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi +++ b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi @@ -226,7 +226,7 @@ reg = <0x28>; #gpio-cells = <2>; gpio-controller; - ngpio = <32>; + ngpios = <62>; }; sgtl5000: codec@a { diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index d27beb47f9a3..652feff33496 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -762,7 +762,7 @@ regulator-name = "vddpu"; regulator-min-microvolt = <725000>; regulator-max-microvolt = <1450000>; - regulator-enable-ramp-delay = <150>; + regulator-enable-ramp-delay = <380>; anatop-reg-offset = <0x140>; anatop-vol-bit-shift = <9>; anatop-vol-bit-width = <5>; diff --git a/arch/arm/boot/dts/imx6ull-colibri.dtsi b/arch/arm/boot/dts/imx6ull-colibri.dtsi index 15621e03fa4d..2c3ae715c683 100644 --- a/arch/arm/boot/dts/imx6ull-colibri.dtsi +++ b/arch/arm/boot/dts/imx6ull-colibri.dtsi @@ -166,7 +166,7 @@ atmel_mxt_ts: touchscreen@4a { compatible = "atmel,maxtouch"; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_atmel_conn>; + pinctrl-0 = <&pinctrl_atmel_conn &pinctrl_atmel_snvs_conn>; reg = <0x4a>; interrupt-parent = <&gpio5>; interrupts = <4 IRQ_TYPE_EDGE_FALLING>; /* SODIMM 107 / INT */ @@ -331,7 +331,6 @@ pinctrl_atmel_conn: atmelconngrp { fsl,pins = < MX6UL_PAD_JTAG_MOD__GPIO1_IO10 0xb0a0 /* SODIMM 106 */ - MX6ULL_PAD_SNVS_TAMPER4__GPIO5_IO04 0xb0a0 /* SODIMM 107 */ >; }; @@ -684,6 +683,12 @@ }; &iomuxc_snvs { + pinctrl_atmel_snvs_conn: atmelsnvsconngrp { + fsl,pins = < + MX6ULL_PAD_SNVS_TAMPER4__GPIO5_IO04 0xb0a0 /* SODIMM 107 */ + >; + }; + pinctrl_snvs_gpio1: snvsgpio1grp { fsl,pins = < MX6ULL_PAD_SNVS_TAMPER6__GPIO5_IO06 0x110a0 /* SODIMM 93 */ diff --git a/arch/arm/boot/dts/imx7d-smegw01.dts b/arch/arm/boot/dts/imx7d-smegw01.dts index c6b32064a009..21b509c43393 100644 --- a/arch/arm/boot/dts/imx7d-smegw01.dts +++ b/arch/arm/boot/dts/imx7d-smegw01.dts @@ -216,10 +216,8 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; + no-1-8-v; non-removable; - cap-sd-highspeed; - sd-uhs-ddr50; - mmc-ddr-1_8v; vmmc-supply = <®_wifi>; enable-sdio-wakeup; status = "okay"; diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 008e3da460f1..039eed79d2e7 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -120,6 +120,7 @@ compatible = "usb-nop-xceiv"; clocks = <&clks IMX7D_USB_HSIC_ROOT_CLK>; clock-names = "main_clk"; + power-domains = <&pgc_hsic_phy>; #phy-cells = <0>; }; @@ -1153,7 +1154,6 @@ compatible = "fsl,imx7d-usb", "fsl,imx27-usb"; reg = <0x30b30000 0x200>; interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>; - power-domains = <&pgc_hsic_phy>; clocks = <&clks IMX7D_USB_CTRL_CLK>; fsl,usbphy = <&usbphynop3>; fsl,usbmisc = <&usbmisc3 0>; diff --git a/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi b/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi index 4cab1b3b3b29..725dcf707b31 100644 --- a/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi +++ b/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi @@ -87,22 +87,22 @@ phy4: ethernet-phy@5 { reg = <5>; - coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>; + coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>; }; phy5: ethernet-phy@6 { reg = <6>; - coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>; + coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>; }; phy6: ethernet-phy@7 { reg = <7>; - coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>; + coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>; }; phy7: ethernet-phy@8 { reg = <8>; - coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>; + coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>; }; }; diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi index 814ad0b46232..c3b8a6d63027 100644 --- a/arch/arm/boot/dts/qcom-msm8974.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974.dtsi @@ -506,6 +506,8 @@ interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP1_UART2_APPS_CLK>, <&gcc GCC_BLSP1_AHB_CLK>; clock-names = "core", "iface"; + pinctrl-names = "default"; + pinctrl-0 = <&blsp1_uart2_default>; status = "disabled"; }; @@ -581,6 +583,9 @@ interrupts = <GIC_SPI 113 IRQ_TYPE_NONE>; clocks = <&gcc GCC_BLSP2_UART1_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>; clock-names = "core", "iface"; + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&blsp2_uart1_default>; + pinctrl-1 = <&blsp2_uart1_sleep>; status = "disabled"; }; @@ -599,6 +604,8 @@ interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP2_UART4_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>; clock-names = "core", "iface"; + pinctrl-names = "default"; + pinctrl-0 = <&blsp2_uart4_default>; status = "disabled"; }; @@ -639,6 +646,9 @@ interrupts = <0 106 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP2_QUP6_I2C_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>; clock-names = "core", "iface"; + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&blsp2_i2c6_default>; + pinctrl-1 = <&blsp2_i2c6_sleep>; #address-cells = <1>; #size-cells = <0>; }; @@ -1256,7 +1266,7 @@ }; }; - blsp1_uart2_active: blsp1-uart2-active { + blsp1_uart2_default: blsp1-uart2-default { rx { pins = "gpio5"; function = "blsp_uart2"; @@ -1272,7 +1282,7 @@ }; }; - blsp2_uart1_active: blsp2-uart1-active { + blsp2_uart1_default: blsp2-uart1-default { tx-rts { pins = "gpio41", "gpio44"; function = "blsp_uart7"; @@ -1295,7 +1305,7 @@ bias-pull-down; }; - blsp2_uart4_active: blsp2-uart4-active { + blsp2_uart4_default: blsp2-uart4-default { tx-rts { pins = "gpio53", "gpio56"; function = "blsp_uart10"; @@ -1406,7 +1416,19 @@ bias-pull-up; }; - /* BLSP2_I2C6 info is missing - nobody uses it though? */ + blsp2_i2c6_default: blsp2-i2c6-default { + pins = "gpio87", "gpio88"; + function = "blsp_i2c12"; + drive-strength = <2>; + bias-disable; + }; + + blsp2_i2c6_sleep: blsp2-i2c6-sleep { + pins = "gpio87", "gpio88"; + function = "blsp_i2c12"; + drive-strength = <2>; + bias-pull-up; + }; spi8_default: spi8_default { mosi { diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 89c71d419f82..659a17fc755c 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -1124,7 +1124,7 @@ clocks = <&pmc PMC_TYPE_PERIPHERAL 55>, <&pmc PMC_TYPE_GCK 55>; clock-names = "pclk", "gclk"; assigned-clocks = <&pmc PMC_TYPE_CORE PMC_I2S1_MUX>; - assigned-parrents = <&pmc PMC_TYPE_GCK 55>; + assigned-clock-parents = <&pmc PMC_TYPE_GCK 55>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/stm32mp15-scmi.dtsi b/arch/arm/boot/dts/stm32mp15-scmi.dtsi new file mode 100644 index 000000000000..543f24c2f4f6 --- /dev/null +++ b/arch/arm/boot/dts/stm32mp15-scmi.dtsi @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * Copyright (C) STMicroelectronics 2022 - All Rights Reserved + * Author: Alexandre Torgue <alexandre.torgue@foss.st.com> for STMicroelectronics. + */ + +/ { + firmware { + optee: optee { + compatible = "linaro,optee-tz"; + method = "smc"; + }; + + scmi: scmi { + compatible = "linaro,scmi-optee"; + #address-cells = <1>; + #size-cells = <0>; + linaro,optee-channel-id = <0>; + shmem = <&scmi_shm>; + + scmi_clk: protocol@14 { + reg = <0x14>; + #clock-cells = <1>; + }; + + scmi_reset: protocol@16 { + reg = <0x16>; + #reset-cells = <1>; + }; + + scmi_voltd: protocol@17 { + reg = <0x17>; + + scmi_reguls: regulators { + #address-cells = <1>; + #size-cells = <0>; + + scmi_reg11: reg11@0 { + reg = <0>; + regulator-name = "reg11"; + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1100000>; + }; + + scmi_reg18: reg18@1 { + voltd-name = "reg18"; + reg = <1>; + regulator-name = "reg18"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + + scmi_usb33: usb33@2 { + reg = <2>; + regulator-name = "usb33"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + }; + }; + }; + }; + + soc { + scmi_sram: sram@2ffff000 { + compatible = "mmio-sram"; + reg = <0x2ffff000 0x1000>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0x2ffff000 0x1000>; + + scmi_shm: scmi-sram@0 { + compatible = "arm,scmi-shmem"; + reg = <0 0x80>; + }; + }; + }; +}; + +®11 { + status = "disabled"; +}; + +®18 { + status = "disabled"; +}; + +&usb33 { + status = "disabled"; +}; + +&usbotg_hs { + usb33d-supply = <&scmi_usb33>; +}; + +&usbphyc { + vdda1v1-supply = <&scmi_reg11>; + vdda1v8-supply = <&scmi_reg18>; +}; + +/delete-node/ &clk_hse; +/delete-node/ &clk_hsi; +/delete-node/ &clk_lse; +/delete-node/ &clk_lsi; +/delete-node/ &clk_csi; diff --git a/arch/arm/boot/dts/stm32mp151.dtsi b/arch/arm/boot/dts/stm32mp151.dtsi index 1b2fd3426a81..e04dda5ddd95 100644 --- a/arch/arm/boot/dts/stm32mp151.dtsi +++ b/arch/arm/boot/dts/stm32mp151.dtsi @@ -115,33 +115,6 @@ status = "disabled"; }; - firmware { - optee: optee { - compatible = "linaro,optee-tz"; - method = "smc"; - status = "disabled"; - }; - - scmi: scmi { - compatible = "linaro,scmi-optee"; - #address-cells = <1>; - #size-cells = <0>; - linaro,optee-channel-id = <0>; - shmem = <&scmi_shm>; - status = "disabled"; - - scmi_clk: protocol@14 { - reg = <0x14>; - #clock-cells = <1>; - }; - - scmi_reset: protocol@16 { - reg = <0x16>; - #reset-cells = <1>; - }; - }; - }; - soc { compatible = "simple-bus"; #address-cells = <1>; @@ -149,20 +122,6 @@ interrupt-parent = <&intc>; ranges; - scmi_sram: sram@2ffff000 { - compatible = "mmio-sram"; - reg = <0x2ffff000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0 0x2ffff000 0x1000>; - - scmi_shm: scmi-sram@0 { - compatible = "arm,scmi-shmem"; - reg = <0 0x80>; - status = "disabled"; - }; - }; - timers2: timer@40000000 { #address-cells = <1>; #size-cells = <0>; @@ -606,7 +565,7 @@ compatible = "st,stm32-cec"; reg = <0x40016000 0x400>; interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&rcc CEC_K>, <&clk_lse>; + clocks = <&rcc CEC_K>, <&rcc CEC>; clock-names = "cec", "hdmi-cec"; status = "disabled"; }; @@ -1515,7 +1474,7 @@ usbh_ohci: usb@5800c000 { compatible = "generic-ohci"; reg = <0x5800c000 0x1000>; - clocks = <&rcc USBH>, <&usbphyc>; + clocks = <&usbphyc>, <&rcc USBH>; resets = <&rcc USBH_R>; interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; @@ -1524,7 +1483,7 @@ usbh_ehci: usb@5800d000 { compatible = "generic-ehci"; reg = <0x5800d000 0x1000>; - clocks = <&rcc USBH>; + clocks = <&usbphyc>, <&rcc USBH>; resets = <&rcc USBH_R>; interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>; companion = <&usbh_ohci>; diff --git a/arch/arm/boot/dts/stm32mp157a-dk1-scmi.dts b/arch/arm/boot/dts/stm32mp157a-dk1-scmi.dts index e3d3f3f30c7d..e539cc80bef8 100644 --- a/arch/arm/boot/dts/stm32mp157a-dk1-scmi.dts +++ b/arch/arm/boot/dts/stm32mp157a-dk1-scmi.dts @@ -7,6 +7,7 @@ /dts-v1/; #include "stm32mp157a-dk1.dts" +#include "stm32mp15-scmi.dtsi" / { model = "STMicroelectronics STM32MP157A-DK1 SCMI Discovery Board"; @@ -28,6 +29,10 @@ clocks = <&scmi_clk CK_SCMI_MPU>; }; +&dsi { + clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>; +}; + &gpioz { clocks = <&scmi_clk CK_SCMI_GPIOZ>; }; @@ -54,10 +59,6 @@ resets = <&scmi_reset RST_SCMI_MCU>; }; -&optee { - status = "okay"; -}; - &rcc { compatible = "st,stm32mp1-rcc-secure", "syscon"; clock-names = "hse", "hsi", "csi", "lse", "lsi"; @@ -76,11 +77,3 @@ &rtc { clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; }; - -&scmi { - status = "okay"; -}; - -&scmi_shm { - status = "okay"; -}; diff --git a/arch/arm/boot/dts/stm32mp157c-dk2-scmi.dts b/arch/arm/boot/dts/stm32mp157c-dk2-scmi.dts index 45dcd299aa9e..97e4f94b0a24 100644 --- a/arch/arm/boot/dts/stm32mp157c-dk2-scmi.dts +++ b/arch/arm/boot/dts/stm32mp157c-dk2-scmi.dts @@ -7,6 +7,7 @@ /dts-v1/; #include "stm32mp157c-dk2.dts" +#include "stm32mp15-scmi.dtsi" / { model = "STMicroelectronics STM32MP157C-DK2 SCMI Discovery Board"; @@ -34,6 +35,7 @@ }; &dsi { + phy-dsi-supply = <&scmi_reg18>; clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>; }; @@ -63,10 +65,6 @@ resets = <&scmi_reset RST_SCMI_MCU>; }; -&optee { - status = "okay"; -}; - &rcc { compatible = "st,stm32mp1-rcc-secure", "syscon"; clock-names = "hse", "hsi", "csi", "lse", "lsi"; @@ -85,11 +83,3 @@ &rtc { clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; }; - -&scmi { - status = "okay"; -}; - -&scmi_shm { - status = "okay"; -}; diff --git a/arch/arm/boot/dts/stm32mp157c-ed1-scmi.dts b/arch/arm/boot/dts/stm32mp157c-ed1-scmi.dts index 458e0ca3cded..9cf0a44d2f47 100644 --- a/arch/arm/boot/dts/stm32mp157c-ed1-scmi.dts +++ b/arch/arm/boot/dts/stm32mp157c-ed1-scmi.dts @@ -7,6 +7,7 @@ /dts-v1/; #include "stm32mp157c-ed1.dts" +#include "stm32mp15-scmi.dtsi" / { model = "STMicroelectronics STM32MP157C-ED1 SCMI eval daughter"; @@ -33,6 +34,10 @@ resets = <&scmi_reset RST_SCMI_CRYP1>; }; +&dsi { + clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>; +}; + &gpioz { clocks = <&scmi_clk CK_SCMI_GPIOZ>; }; @@ -59,10 +64,6 @@ resets = <&scmi_reset RST_SCMI_MCU>; }; -&optee { - status = "okay"; -}; - &rcc { compatible = "st,stm32mp1-rcc-secure", "syscon"; clock-names = "hse", "hsi", "csi", "lse", "lsi"; @@ -81,11 +82,3 @@ &rtc { clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; }; - -&scmi { - status = "okay"; -}; - -&scmi_shm { - status = "okay"; -}; diff --git a/arch/arm/boot/dts/stm32mp157c-ev1-scmi.dts b/arch/arm/boot/dts/stm32mp157c-ev1-scmi.dts index df9c113edb4b..3b9dd6f4ccc9 100644 --- a/arch/arm/boot/dts/stm32mp157c-ev1-scmi.dts +++ b/arch/arm/boot/dts/stm32mp157c-ev1-scmi.dts @@ -7,6 +7,7 @@ /dts-v1/; #include "stm32mp157c-ev1.dts" +#include "stm32mp15-scmi.dtsi" / { model = "STMicroelectronics STM32MP157C-EV1 SCMI eval daughter on eval mother"; @@ -35,6 +36,7 @@ }; &dsi { + phy-dsi-supply = <&scmi_reg18>; clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>; }; @@ -68,10 +70,6 @@ resets = <&scmi_reset RST_SCMI_MCU>; }; -&optee { - status = "okay"; -}; - &rcc { compatible = "st,stm32mp1-rcc-secure", "syscon"; clock-names = "hse", "hsi", "csi", "lse", "lsi"; @@ -90,11 +88,3 @@ &rtc { clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; }; - -&scmi { - status = "okay"; -}; - -&scmi_shm { - status = "okay"; -}; diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts index f19ed981da9d..3706216ffb40 100644 --- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts +++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts @@ -169,7 +169,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "mxicy,mx25l1606e", "winbond,w25q128"; + compatible = "mxicy,mx25l1606e", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <40000000>; }; diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index ca32446b187f..f53086ddc48b 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -93,6 +93,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_DRM=y CONFIG_DRM_PANEL_SEIKO_43WVF1G=y CONFIG_DRM_MXSFB=y +CONFIG_FB=y CONFIG_FB_MODE_HELPERS=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_BACKLIGHT_CLASS_DEVICE=y diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index f1d0a7807cd0..41536feb4392 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -112,19 +112,6 @@ static __always_inline void set_domain(unsigned int val) } #endif -#ifdef CONFIG_CPU_USE_DOMAINS -#define modify_domain(dom,type) \ - do { \ - unsigned int domain = get_domain(); \ - domain &= ~domain_mask(dom); \ - domain = domain | domain_val(dom, type); \ - set_domain(domain); \ - } while (0) - -#else -static inline void modify_domain(unsigned dom, unsigned type) { } -#endif - /* * Generate the T (user) versions of the LDR/STR and related * instructions (inline assembly) diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index 92282558caf7..2b8970d8e5a2 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h @@ -27,6 +27,7 @@ enum { MT_HIGH_VECTORS, MT_MEMORY_RWX, MT_MEMORY_RW, + MT_MEMORY_RO, MT_ROM, MT_MEMORY_RWX_NONCACHED, MT_MEMORY_RW_DTCM, diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 93051e2f402c..1408a6a15d0e 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -163,5 +163,31 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) ((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \ }) + +/* + * Update ITSTATE after normal execution of an IT block instruction. + * + * The 8 IT state bits are split into two parts in CPSR: + * ITSTATE<1:0> are in CPSR<26:25> + * ITSTATE<7:2> are in CPSR<15:10> + */ +static inline unsigned long it_advance(unsigned long cpsr) +{ + if ((cpsr & 0x06000400) == 0) { + /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ + cpsr &= ~PSR_IT_MASK; + } else { + /* We need to shift left ITSTATE<4:0> */ + const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ + unsigned long it = cpsr & mask; + it <<= 1; + it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ + it &= mask; + cpsr &= ~mask; + cpsr |= it; + } + return cpsr; +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 7aa3ded4af92..6a447ac67d80 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -302,6 +302,7 @@ local_restart: b ret_fast_syscall #endif ENDPROC(vector_swi) + .ltorg /* * This is the really slow path. We're going to be doing diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index b1a43d7bc56c..df6d673e83d5 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -202,7 +202,7 @@ static const struct wakeup_source_info ws_info[] = { static const struct of_device_id sama5d2_ws_ids[] = { { .compatible = "atmel,sama5d2-gem", .data = &ws_info[0] }, - { .compatible = "atmel,at91rm9200-rtc", .data = &ws_info[1] }, + { .compatible = "atmel,sama5d2-rtc", .data = &ws_info[1] }, { .compatible = "atmel,sama5d3-udc", .data = &ws_info[2] }, { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, @@ -213,24 +213,24 @@ static const struct of_device_id sama5d2_ws_ids[] = { }; static const struct of_device_id sam9x60_ws_ids[] = { - { .compatible = "atmel,at91sam9x5-rtc", .data = &ws_info[1] }, + { .compatible = "microchip,sam9x60-rtc", .data = &ws_info[1] }, { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, { .compatible = "atmel,at91sam9g45-ehci", .data = &ws_info[2] }, { .compatible = "usb-ehci", .data = &ws_info[2] }, - { .compatible = "atmel,at91sam9260-rtt", .data = &ws_info[4] }, + { .compatible = "microchip,sam9x60-rtt", .data = &ws_info[4] }, { .compatible = "cdns,sam9x60-macb", .data = &ws_info[5] }, { /* sentinel */ } }; static const struct of_device_id sama7g5_ws_ids[] = { - { .compatible = "atmel,at91sam9x5-rtc", .data = &ws_info[1] }, + { .compatible = "microchip,sama7g5-rtc", .data = &ws_info[1] }, { .compatible = "microchip,sama7g5-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, { .compatible = "atmel,at91sam9g45-ehci", .data = &ws_info[2] }, { .compatible = "usb-ehci", .data = &ws_info[2] }, { .compatible = "microchip,sama7g5-sdhci", .data = &ws_info[3] }, - { .compatible = "atmel,at91sam9260-rtt", .data = &ws_info[4] }, + { .compatible = "microchip,sama7g5-rtt", .data = &ws_info[4] }, { /* sentinel */ } }; @@ -1079,7 +1079,7 @@ securam_fail: return ret; } -static void at91_pm_secure_init(void) +static void __init at91_pm_secure_init(void) { int suspend_mode; struct arm_smccc_res res; diff --git a/arch/arm/mach-axxia/platsmp.c b/arch/arm/mach-axxia/platsmp.c index 512943eae30a..2e203626eda5 100644 --- a/arch/arm/mach-axxia/platsmp.c +++ b/arch/arm/mach-axxia/platsmp.c @@ -39,6 +39,7 @@ static int axxia_boot_secondary(unsigned int cpu, struct task_struct *idle) return -ENOENT; syscon = of_iomap(syscon_np, 0); + of_node_put(syscon_np); if (!syscon) return -ENOMEM; diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c index e4f4b20b83a2..3fc4ec830e3a 100644 --- a/arch/arm/mach-cns3xxx/core.c +++ b/arch/arm/mach-cns3xxx/core.c @@ -372,6 +372,7 @@ static void __init cns3xxx_init(void) /* De-Asscer SATA Reset */ cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SATA)); } + of_node_put(dn); dn = of_find_compatible_node(NULL, NULL, "cavium,cns3420-sdhci"); if (of_device_is_available(dn)) { @@ -385,6 +386,7 @@ static void __init cns3xxx_init(void) cns3xxx_pwr_clk_en(CNS3XXX_PWR_CLK_EN(SDIO)); cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SDIO)); } + of_node_put(dn); pm_power_off = cns3xxx_power_off; diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index 8b48326be9fd..51a247ca4da8 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -149,6 +149,7 @@ static void exynos_map_pmu(void) np = of_find_matching_node(NULL, exynos_dt_pmu_match); if (np) pmu_base_addr = of_iomap(np, 0); + of_node_put(np); } static void __init exynos_init_irq(void) diff --git a/arch/arm/mach-meson/platsmp.c b/arch/arm/mach-meson/platsmp.c index 4b8ad728bb42..32ac60b89fdc 100644 --- a/arch/arm/mach-meson/platsmp.c +++ b/arch/arm/mach-meson/platsmp.c @@ -71,6 +71,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible, } sram_base = of_iomap(node, 0); + of_node_put(node); if (!sram_base) { pr_err("Couldn't map SRAM registers\n"); return; @@ -91,6 +92,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible, } scu_base = of_iomap(node, 0); + of_node_put(node); if (!scu_base) { pr_err("Couldn't map SCU registers\n"); return; diff --git a/arch/arm/mach-rockchip/pm.c b/arch/arm/mach-rockchip/pm.c index 87389d9456b9..30d781d80fe0 100644 --- a/arch/arm/mach-rockchip/pm.c +++ b/arch/arm/mach-rockchip/pm.c @@ -311,7 +311,7 @@ void __init rockchip_suspend_init(void) &match); if (!match) { pr_err("Failed to find PMU node\n"); - return; + goto out_put; } pm_data = (struct rockchip_pm_data *) match->data; @@ -320,9 +320,12 @@ void __init rockchip_suspend_init(void) if (ret) { pr_err("%s: matches init error %d\n", __func__, ret); - return; + goto out_put; } } suspend_set_ops(pm_data->ops); + +out_put: + of_node_put(np); } diff --git a/arch/arm/mach-spear/time.c b/arch/arm/mach-spear/time.c index d1fdb6066f7b..c7c17c0f936c 100644 --- a/arch/arm/mach-spear/time.c +++ b/arch/arm/mach-spear/time.c @@ -218,13 +218,13 @@ void __init spear_setup_of_timer(void) irq = irq_of_parse_and_map(np, 0); if (!irq) { pr_err("%s: No irq passed for timer via DT\n", __func__); - return; + goto err_put_np; } gpt_base = of_iomap(np, 0); if (!gpt_base) { pr_err("%s: of iomap failed\n", __func__); - return; + goto err_put_np; } gpt_clk = clk_get_sys("gpt0", NULL); @@ -239,6 +239,8 @@ void __init spear_setup_of_timer(void) goto err_prepare_enable_clk; } + of_node_put(np); + spear_clockevent_init(irq); spear_clocksource_init(); @@ -248,4 +250,6 @@ err_prepare_enable_clk: clk_put(gpt_clk); err_iomap: iounmap(gpt_base); +err_put_np: + of_node_put(np); } diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index a3a4589ec73b..fc439c2c16f8 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -631,7 +631,11 @@ config CPU_USE_DOMAINS bool help This option enables or disables the use of domain switching - via the set_fs() function. + using the DACR (domain access control register) to protect memory + domains from each other. In Linux we use three domains: kernel, user + and IO. The domains are used to protect userspace from kernelspace + and to handle IO-space as a special type of memory by assigning + manager or client roles to running code (such as a process). config CPU_V7M_NUM_IRQ int "Number of external interrupts connected to the NVIC" diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 6f499559d193..f8dd0b3cc8e0 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -935,6 +935,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (type == TYPE_LDST) do_alignment_finish_ldst(addr, instr, regs, offset); + if (thumb_mode(regs)) + regs->ARM_cpsr = it_advance(regs->ARM_cpsr); + return 0; bad_or_fault: diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 5e2be37a198e..cd17e324aa51 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -296,6 +296,13 @@ static struct mem_type mem_types[] __ro_after_init = { .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, + [MT_MEMORY_RO] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN | L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT, + .domain = DOMAIN_KERNEL, + }, [MT_ROM] = { .prot_sect = PMD_TYPE_SECT, .domain = DOMAIN_KERNEL, @@ -489,6 +496,7 @@ static void __init build_mem_type_table(void) /* Also setup NX memory mapping */ mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN; } if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { /* @@ -568,6 +576,7 @@ static void __init build_mem_type_table(void) mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; #endif /* @@ -587,6 +596,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED; @@ -647,6 +658,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd; mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RO].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RO].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; @@ -1360,7 +1373,7 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK); map.virtual = FDT_FIXED_BASE; map.length = FDT_FIXED_SIZE; - map.type = MT_ROM; + map.type = MT_MEMORY_RO; create_mapping(&map); } diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index fb9f3eb6bf48..8bc7a2d6d6c7 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -108,8 +108,7 @@ static unsigned int spectre_v2_install_workaround(unsigned int method) #else static unsigned int spectre_v2_install_workaround(unsigned int method) { - pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n", - smp_processor_id()); + pr_info_once("Spectre V2: workarounds disabled by configuration\n"); return SPECTRE_VULNERABLE; } @@ -209,10 +208,10 @@ static int spectre_bhb_install_workaround(int method) return SPECTRE_VULNERABLE; spectre_bhb_method = method; - } - pr_info("CPU%u: Spectre BHB: using %s workaround\n", - smp_processor_id(), spectre_bhb_method_name(method)); + pr_info("CPU%u: Spectre BHB: enabling %s workaround for all CPUs\n", + smp_processor_id(), spectre_bhb_method_name(method)); + } return SPECTRE_MITIGATED; } diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h index 973173598992..facc889d05ee 100644 --- a/arch/arm/probes/decode.h +++ b/arch/arm/probes/decode.h @@ -14,6 +14,7 @@ #include <linux/types.h> #include <linux/stddef.h> #include <asm/probes.h> +#include <asm/ptrace.h> #include <asm/kprobes.h> void __init arm_probes_decode_init(void); @@ -35,31 +36,6 @@ void __init find_str_pc_offset(void); #endif -/* - * Update ITSTATE after normal execution of an IT block instruction. - * - * The 8 IT state bits are split into two parts in CPSR: - * ITSTATE<1:0> are in CPSR<26:25> - * ITSTATE<7:2> are in CPSR<15:10> - */ -static inline unsigned long it_advance(unsigned long cpsr) - { - if ((cpsr & 0x06000400) == 0) { - /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ - cpsr &= ~PSR_IT_MASK; - } else { - /* We need to shift left ITSTATE<4:0> */ - const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ - unsigned long it = cpsr & mask; - it <<= 1; - it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ - it &= mask; - cpsr &= ~mask; - cpsr |= it; - } - return cpsr; -} - static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs) { long cpsr = regs->ARM_cpsr; diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 84a1cea1f43b..309648c17f48 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -63,11 +63,12 @@ out: unsigned long __pfn_to_mfn(unsigned long pfn) { - struct rb_node *n = phys_to_mach.rb_node; + struct rb_node *n; struct xen_p2m_entry *entry; unsigned long irqflags; read_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; while (n) { entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (entry->pfn <= pfn && @@ -152,10 +153,11 @@ bool __set_phys_to_machine_multi(unsigned long pfn, int rc; unsigned long irqflags; struct xen_p2m_entry *p2m_entry; - struct rb_node *n = phys_to_mach.rb_node; + struct rb_node *n; if (mfn == INVALID_P2M_ENTRY) { write_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; while (n) { p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (p2m_entry->pfn <= pfn && diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi index 66023d553524..d084c33d5ca8 100644 --- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi @@ -9,6 +9,14 @@ /delete-node/ cpu@3; }; + timer { + compatible = "arm,armv8-timer"; + interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>; + }; + pmu { compatible = "arm,cortex-a53-pmu"; interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi index a4be040a00c0..967d2cd3c3ce 100644 --- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi @@ -29,6 +29,8 @@ device_type = "cpu"; compatible = "brcm,brahma-b53"; reg = <0x0>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0xfff8>; next-level-cache = <&l2>; }; diff --git a/arch/arm64/boot/dts/exynos/exynos7885.dtsi b/arch/arm64/boot/dts/exynos/exynos7885.dtsi index 3170661f5b67..9c233c56558c 100644 --- a/arch/arm64/boot/dts/exynos/exynos7885.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos7885.dtsi @@ -280,8 +280,8 @@ interrupts = <GIC_SPI 246 IRQ_TYPE_LEVEL_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&uart0_bus>; - clocks = <&cmu_peri CLK_GOUT_UART0_EXT_UCLK>, - <&cmu_peri CLK_GOUT_UART0_PCLK>; + clocks = <&cmu_peri CLK_GOUT_UART0_PCLK>, + <&cmu_peri CLK_GOUT_UART0_EXT_UCLK>; clock-names = "uart", "clk_uart_baud0"; samsung,uart-fifosize = <64>; status = "disabled"; @@ -293,8 +293,8 @@ interrupts = <GIC_SPI 247 IRQ_TYPE_LEVEL_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&uart1_bus>; - clocks = <&cmu_peri CLK_GOUT_UART1_EXT_UCLK>, - <&cmu_peri CLK_GOUT_UART1_PCLK>; + clocks = <&cmu_peri CLK_GOUT_UART1_PCLK>, + <&cmu_peri CLK_GOUT_UART1_EXT_UCLK>; clock-names = "uart", "clk_uart_baud0"; samsung,uart-fifosize = <256>; status = "disabled"; @@ -306,8 +306,8 @@ interrupts = <GIC_SPI 279 IRQ_TYPE_LEVEL_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&uart2_bus>; - clocks = <&cmu_peri CLK_GOUT_UART2_EXT_UCLK>, - <&cmu_peri CLK_GOUT_UART2_PCLK>; + clocks = <&cmu_peri CLK_GOUT_UART2_PCLK>, + <&cmu_peri CLK_GOUT_UART2_EXT_UCLK>; clock-names = "uart", "clk_uart_baud0"; samsung,uart-fifosize = <256>; status = "disabled"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 92465f777603..d5cdd77e5a95 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -224,9 +224,12 @@ little-endian; }; - efuse@1e80000 { + sfp: efuse@1e80000 { compatible = "fsl,ls1028a-sfp"; reg = <0x0 0x1e80000 0x0 0x10000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(4)>; + clock-names = "sfp"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index 4c3ac4214a2c..9a4de739e6a2 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -395,41 +395,41 @@ &iomuxc { pinctrl_eqos: eqosgrp { fsl,pins = < - MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3 - MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3 - MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91 - MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91 - MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91 - MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91 - MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91 - MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91 - MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f - MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f - MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f - MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f - MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f - MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f - MX8MP_IOMUXC_SAI2_RXC__GPIO4_IO22 0x19 + MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2 + MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2 + MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90 + MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90 + MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90 + MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90 + MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90 + MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90 + MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16 + MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16 + MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16 + MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16 + MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16 + MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16 + MX8MP_IOMUXC_SAI2_RXC__GPIO4_IO22 0x10 >; }; pinctrl_fec: fecgrp { fsl,pins = < - MX8MP_IOMUXC_SAI1_RXD2__ENET1_MDC 0x3 - MX8MP_IOMUXC_SAI1_RXD3__ENET1_MDIO 0x3 - MX8MP_IOMUXC_SAI1_RXD4__ENET1_RGMII_RD0 0x91 - MX8MP_IOMUXC_SAI1_RXD5__ENET1_RGMII_RD1 0x91 - MX8MP_IOMUXC_SAI1_RXD6__ENET1_RGMII_RD2 0x91 - MX8MP_IOMUXC_SAI1_RXD7__ENET1_RGMII_RD3 0x91 - MX8MP_IOMUXC_SAI1_TXC__ENET1_RGMII_RXC 0x91 - MX8MP_IOMUXC_SAI1_TXFS__ENET1_RGMII_RX_CTL 0x91 - MX8MP_IOMUXC_SAI1_TXD0__ENET1_RGMII_TD0 0x1f - MX8MP_IOMUXC_SAI1_TXD1__ENET1_RGMII_TD1 0x1f - MX8MP_IOMUXC_SAI1_TXD2__ENET1_RGMII_TD2 0x1f - MX8MP_IOMUXC_SAI1_TXD3__ENET1_RGMII_TD3 0x1f - MX8MP_IOMUXC_SAI1_TXD4__ENET1_RGMII_TX_CTL 0x1f - MX8MP_IOMUXC_SAI1_TXD5__ENET1_RGMII_TXC 0x1f - MX8MP_IOMUXC_SAI1_RXD0__GPIO4_IO02 0x19 + MX8MP_IOMUXC_SAI1_RXD2__ENET1_MDC 0x2 + MX8MP_IOMUXC_SAI1_RXD3__ENET1_MDIO 0x2 + MX8MP_IOMUXC_SAI1_RXD4__ENET1_RGMII_RD0 0x90 + MX8MP_IOMUXC_SAI1_RXD5__ENET1_RGMII_RD1 0x90 + MX8MP_IOMUXC_SAI1_RXD6__ENET1_RGMII_RD2 0x90 + MX8MP_IOMUXC_SAI1_RXD7__ENET1_RGMII_RD3 0x90 + MX8MP_IOMUXC_SAI1_TXC__ENET1_RGMII_RXC 0x90 + MX8MP_IOMUXC_SAI1_TXFS__ENET1_RGMII_RX_CTL 0x90 + MX8MP_IOMUXC_SAI1_TXD0__ENET1_RGMII_TD0 0x16 + MX8MP_IOMUXC_SAI1_TXD1__ENET1_RGMII_TD1 0x16 + MX8MP_IOMUXC_SAI1_TXD2__ENET1_RGMII_TD2 0x16 + MX8MP_IOMUXC_SAI1_TXD3__ENET1_RGMII_TD3 0x16 + MX8MP_IOMUXC_SAI1_TXD4__ENET1_RGMII_TX_CTL 0x16 + MX8MP_IOMUXC_SAI1_TXD5__ENET1_RGMII_TXC 0x16 + MX8MP_IOMUXC_SAI1_RXD0__GPIO4_IO02 0x10 >; }; @@ -461,28 +461,28 @@ pinctrl_gpio_led: gpioledgrp { fsl,pins = < - MX8MP_IOMUXC_NAND_READY_B__GPIO3_IO16 0x19 + MX8MP_IOMUXC_NAND_READY_B__GPIO3_IO16 0x140 >; }; pinctrl_i2c1: i2c1grp { fsl,pins = < - MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c3 - MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c3 + MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c2 + MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c2 >; }; pinctrl_i2c3: i2c3grp { fsl,pins = < - MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c3 - MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c3 + MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c2 + MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c2 >; }; pinctrl_i2c5: i2c5grp { fsl,pins = < - MX8MP_IOMUXC_SPDIF_RX__I2C5_SDA 0x400001c3 - MX8MP_IOMUXC_SPDIF_TX__I2C5_SCL 0x400001c3 + MX8MP_IOMUXC_SPDIF_RX__I2C5_SDA 0x400001c2 + MX8MP_IOMUXC_SPDIF_TX__I2C5_SCL 0x400001c2 >; }; @@ -500,20 +500,20 @@ pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp { fsl,pins = < - MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x41 + MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40 >; }; pinctrl_uart2: uart2grp { fsl,pins = < - MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x49 - MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x49 + MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x140 + MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x140 >; }; pinctrl_usb1_vbus: usb1grp { fsl,pins = < - MX8MP_IOMUXC_GPIO1_IO14__USB2_OTG_PWR 0x19 + MX8MP_IOMUXC_GPIO1_IO14__USB2_OTG_PWR 0x10 >; }; @@ -525,7 +525,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d0 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d0 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d0 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -537,7 +537,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d4 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d4 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d4 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -549,7 +549,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d6 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d6 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d6 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-icore-mx8mp-edimm2.2.dts b/arch/arm64/boot/dts/freescale/imx8mp-icore-mx8mp-edimm2.2.dts index 70a701a624a6..dd703b6a5e17 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-icore-mx8mp-edimm2.2.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-icore-mx8mp-edimm2.2.dts @@ -110,28 +110,28 @@ &iomuxc { pinctrl_eqos: eqosgrp { fsl,pins = < - MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3 - MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3 - MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91 - MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91 - MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91 - MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91 - MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91 - MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91 - MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f - MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f - MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f - MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f - MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f - MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f - MX8MP_IOMUXC_NAND_DATA01__GPIO3_IO07 0x19 + MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2 + MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2 + MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90 + MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90 + MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90 + MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90 + MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90 + MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90 + MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16 + MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16 + MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16 + MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16 + MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16 + MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16 + MX8MP_IOMUXC_NAND_DATA01__GPIO3_IO07 0x10 >; }; pinctrl_uart2: uart2grp { fsl,pins = < - MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x49 - MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x49 + MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x40 + MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x40 >; }; @@ -151,7 +151,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d0 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d0 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d0 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -163,13 +163,13 @@ pinctrl_reg_usb1: regusb1grp { fsl,pins = < - MX8MP_IOMUXC_GPIO1_IO14__GPIO1_IO14 0x19 + MX8MP_IOMUXC_GPIO1_IO14__GPIO1_IO14 0x10 >; }; pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp { fsl,pins = < - MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x41 + MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40 >; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts index 984a6b9ded8d..6aa720bafe28 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts @@ -116,48 +116,48 @@ &iomuxc { pinctrl_eqos: eqosgrp { fsl,pins = < - MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3 - MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3 - MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91 - MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91 - MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91 - MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91 - MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91 - MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91 - MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f - MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f - MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f - MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f - MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f - MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f + MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2 + MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2 + MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90 + MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90 + MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90 + MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90 + MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90 + MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90 + MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16 + MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16 + MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16 + MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16 + MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16 + MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16 MX8MP_IOMUXC_SAI1_MCLK__GPIO4_IO20 0x10 >; }; pinctrl_i2c2: i2c2grp { fsl,pins = < - MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c3 - MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c3 + MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c2 + MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c2 >; }; pinctrl_i2c2_gpio: i2c2gpiogrp { fsl,pins = < - MX8MP_IOMUXC_I2C2_SCL__GPIO5_IO16 0x1e3 - MX8MP_IOMUXC_I2C2_SDA__GPIO5_IO17 0x1e3 + MX8MP_IOMUXC_I2C2_SCL__GPIO5_IO16 0x1e2 + MX8MP_IOMUXC_I2C2_SDA__GPIO5_IO17 0x1e2 >; }; pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp { fsl,pins = < - MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x41 + MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40 >; }; pinctrl_uart1: uart1grp { fsl,pins = < - MX8MP_IOMUXC_UART1_RXD__UART1_DCE_RX 0x49 - MX8MP_IOMUXC_UART1_TXD__UART1_DCE_TX 0x49 + MX8MP_IOMUXC_UART1_RXD__UART1_DCE_RX 0x40 + MX8MP_IOMUXC_UART1_TXD__UART1_DCE_TX 0x40 >; }; @@ -175,7 +175,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d0 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d0 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d0 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -187,7 +187,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d4 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d4 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d4 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -199,7 +199,7 @@ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d6 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d6 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d6 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts index 101d31147603..521215520a0f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts @@ -622,15 +622,15 @@ pinctrl_hog: hoggrp { fsl,pins = < - MX8MP_IOMUXC_GPIO1_IO09__GPIO1_IO09 0x40000041 /* DIO0 */ - MX8MP_IOMUXC_GPIO1_IO11__GPIO1_IO11 0x40000041 /* DIO1 */ - MX8MP_IOMUXC_NAND_DQS__GPIO3_IO14 0x40000041 /* M2SKT_OFF# */ - MX8MP_IOMUXC_SD2_DATA2__GPIO2_IO17 0x40000159 /* PCIE1_WDIS# */ - MX8MP_IOMUXC_SD2_DATA3__GPIO2_IO18 0x40000159 /* PCIE2_WDIS# */ - MX8MP_IOMUXC_SD2_CMD__GPIO2_IO14 0x40000159 /* PCIE3_WDIS# */ - MX8MP_IOMUXC_NAND_DATA00__GPIO3_IO06 0x40000041 /* M2SKT_RST# */ - MX8MP_IOMUXC_SAI1_TXD6__GPIO4_IO18 0x40000159 /* M2SKT_WDIS# */ - MX8MP_IOMUXC_NAND_ALE__GPIO3_IO00 0x40000159 /* M2SKT_GDIS# */ + MX8MP_IOMUXC_GPIO1_IO09__GPIO1_IO09 0x40000040 /* DIO0 */ + MX8MP_IOMUXC_GPIO1_IO11__GPIO1_IO11 0x40000040 /* DIO1 */ + MX8MP_IOMUXC_NAND_DQS__GPIO3_IO14 0x40000040 /* M2SKT_OFF# */ + MX8MP_IOMUXC_SD2_DATA2__GPIO2_IO17 0x40000150 /* PCIE1_WDIS# */ + MX8MP_IOMUXC_SD2_DATA3__GPIO2_IO18 0x40000150 /* PCIE2_WDIS# */ + MX8MP_IOMUXC_SD2_CMD__GPIO2_IO14 0x40000150 /* PCIE3_WDIS# */ + MX8MP_IOMUXC_NAND_DATA00__GPIO3_IO06 0x40000040 /* M2SKT_RST# */ + MX8MP_IOMUXC_SAI1_TXD6__GPIO4_IO18 0x40000150 /* M2SKT_WDIS# */ + MX8MP_IOMUXC_NAND_ALE__GPIO3_IO00 0x40000150 /* M2SKT_GDIS# */ MX8MP_IOMUXC_SAI3_TXD__GPIO5_IO01 0x40000104 /* UART_TERM */ MX8MP_IOMUXC_SAI3_TXFS__GPIO4_IO31 0x40000104 /* UART_RS485 */ MX8MP_IOMUXC_SAI3_TXC__GPIO5_IO00 0x40000104 /* UART_HALF */ @@ -639,47 +639,47 @@ pinctrl_accel: accelgrp { fsl,pins = < - MX8MP_IOMUXC_GPIO1_IO07__GPIO1_IO07 0x159 + MX8MP_IOMUXC_GPIO1_IO07__GPIO1_IO07 0x150 >; }; pinctrl_eqos: eqosgrp { fsl,pins = < - MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3 - MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3 - MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91 - MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91 - MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91 - MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91 - MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91 - MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91 - MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f - MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f - MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f - MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f - MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f - MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f - MX8MP_IOMUXC_SAI3_RXD__GPIO4_IO30 0x141 /* RST# */ - MX8MP_IOMUXC_SAI3_RXFS__GPIO4_IO28 0x159 /* IRQ# */ + MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2 + MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2 + MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90 + MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90 + MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90 + MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90 + MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90 + MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90 + MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16 + MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16 + MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16 + MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16 + MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16 + MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16 + MX8MP_IOMUXC_SAI3_RXD__GPIO4_IO30 0x140 /* RST# */ + MX8MP_IOMUXC_SAI3_RXFS__GPIO4_IO28 0x150 /* IRQ# */ >; }; pinctrl_fec: fecgrp { fsl,pins = < - MX8MP_IOMUXC_SAI1_RXD4__ENET1_RGMII_RD0 0x91 - MX8MP_IOMUXC_SAI1_RXD5__ENET1_RGMII_RD1 0x91 - MX8MP_IOMUXC_SAI1_RXD6__ENET1_RGMII_RD2 0x91 - MX8MP_IOMUXC_SAI1_RXD7__ENET1_RGMII_RD3 0x91 - MX8MP_IOMUXC_SAI1_TXC__ENET1_RGMII_RXC 0x91 - MX8MP_IOMUXC_SAI1_TXFS__ENET1_RGMII_RX_CTL 0x91 - MX8MP_IOMUXC_SAI1_TXD0__ENET1_RGMII_TD0 0x1f - MX8MP_IOMUXC_SAI1_TXD1__ENET1_RGMII_TD1 0x1f - MX8MP_IOMUXC_SAI1_TXD2__ENET1_RGMII_TD2 0x1f - MX8MP_IOMUXC_SAI1_TXD3__ENET1_RGMII_TD3 0x1f - MX8MP_IOMUXC_SAI1_TXD4__ENET1_RGMII_TX_CTL 0x1f - MX8MP_IOMUXC_SAI1_TXD5__ENET1_RGMII_TXC 0x1f - MX8MP_IOMUXC_SAI1_RXFS__ENET1_1588_EVENT0_IN 0x141 - MX8MP_IOMUXC_SAI1_RXC__ENET1_1588_EVENT0_OUT 0x141 + MX8MP_IOMUXC_SAI1_RXD4__ENET1_RGMII_RD0 0x90 + MX8MP_IOMUXC_SAI1_RXD5__ENET1_RGMII_RD1 0x90 + MX8MP_IOMUXC_SAI1_RXD6__ENET1_RGMII_RD2 0x90 + MX8MP_IOMUXC_SAI1_RXD7__ENET1_RGMII_RD3 0x90 + MX8MP_IOMUXC_SAI1_TXC__ENET1_RGMII_RXC 0x90 + MX8MP_IOMUXC_SAI1_TXFS__ENET1_RGMII_RX_CTL 0x90 + MX8MP_IOMUXC_SAI1_TXD0__ENET1_RGMII_TD0 0x16 + MX8MP_IOMUXC_SAI1_TXD1__ENET1_RGMII_TD1 0x16 + MX8MP_IOMUXC_SAI1_TXD2__ENET1_RGMII_TD2 0x16 + MX8MP_IOMUXC_SAI1_TXD3__ENET1_RGMII_TD3 0x16 + MX8MP_IOMUXC_SAI1_TXD4__ENET1_RGMII_TX_CTL 0x16 + MX8MP_IOMUXC_SAI1_TXD5__ENET1_RGMII_TXC 0x16 + MX8MP_IOMUXC_SAI1_RXFS__ENET1_1588_EVENT0_IN 0x140 + MX8MP_IOMUXC_SAI1_RXC__ENET1_1588_EVENT0_OUT 0x140 >; }; @@ -692,61 +692,61 @@ pinctrl_gsc: gscgrp { fsl,pins = < - MX8MP_IOMUXC_SAI1_MCLK__GPIO4_IO20 0x159 + MX8MP_IOMUXC_SAI1_MCLK__GPIO4_IO20 0x150 >; }; pinctrl_i2c1: i2c1grp { fsl,pins = < - MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c3 - MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c3 + MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c2 + MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c2 >; }; pinctrl_i2c2: i2c2grp { fsl,pins = < - MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c3 - MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c3 + MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c2 + MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c2 >; }; pinctrl_i2c3: i2c3grp { fsl,pins = < - MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c3 - MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c3 + MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c2 + MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c2 >; }; pinctrl_i2c4: i2c4grp { fsl,pins = < - MX8MP_IOMUXC_I2C4_SCL__I2C4_SCL 0x400001c3 - MX8MP_IOMUXC_I2C4_SDA__I2C4_SDA 0x400001c3 + MX8MP_IOMUXC_I2C4_SCL__I2C4_SCL 0x400001c2 + MX8MP_IOMUXC_I2C4_SDA__I2C4_SDA 0x400001c2 >; }; pinctrl_ksz: kszgrp { fsl,pins = < - MX8MP_IOMUXC_SAI3_RXC__GPIO4_IO29 0x159 /* IRQ# */ - MX8MP_IOMUXC_SAI3_MCLK__GPIO5_IO02 0x141 /* RST# */ + MX8MP_IOMUXC_SAI3_RXC__GPIO4_IO29 0x150 /* IRQ# */ + MX8MP_IOMUXC_SAI3_MCLK__GPIO5_IO02 0x140 /* RST# */ >; }; pinctrl_gpio_leds: ledgrp { fsl,pins = < - MX8MP_IOMUXC_SD2_DATA0__GPIO2_IO15 0x19 - MX8MP_IOMUXC_SD2_DATA1__GPIO2_IO16 0x19 + MX8MP_IOMUXC_SD2_DATA0__GPIO2_IO15 0x10 + MX8MP_IOMUXC_SD2_DATA1__GPIO2_IO16 0x10 >; }; pinctrl_pmic: pmicgrp { fsl,pins = < - MX8MP_IOMUXC_NAND_DATA01__GPIO3_IO07 0x141 + MX8MP_IOMUXC_NAND_DATA01__GPIO3_IO07 0x140 >; }; pinctrl_pps: ppsgrp { fsl,pins = < - MX8MP_IOMUXC_GPIO1_IO12__GPIO1_IO12 0x141 + MX8MP_IOMUXC_GPIO1_IO12__GPIO1_IO12 0x140 >; }; @@ -758,13 +758,13 @@ pinctrl_reg_usb2: regusb2grp { fsl,pins = < - MX8MP_IOMUXC_GPIO1_IO06__GPIO1_IO06 0x141 + MX8MP_IOMUXC_GPIO1_IO06__GPIO1_IO06 0x140 >; }; pinctrl_reg_wifi: regwifigrp { fsl,pins = < - MX8MP_IOMUXC_NAND_DATA03__GPIO3_IO09 0x119 + MX8MP_IOMUXC_NAND_DATA03__GPIO3_IO09 0x110 >; }; @@ -811,7 +811,7 @@ pinctrl_uart3_gpio: uart3gpiogrp { fsl,pins = < - MX8MP_IOMUXC_NAND_DATA02__GPIO3_IO08 0x119 + MX8MP_IOMUXC_NAND_DATA02__GPIO3_IO08 0x110 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index d9542dfff83f..410d0d5e6f1e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -595,7 +595,7 @@ pgc_ispdwp: power-domain@18 { #power-domain-cells = <0>; reg = <IMX8MP_POWER_DOMAIN_MEDIAMIX_ISPDWP>; - clocks = <&clk IMX8MP_CLK_MEDIA_ISP_DIV>; + clocks = <&clk IMX8MP_CLK_MEDIA_ISP_ROOT>; }; }; }; diff --git a/arch/arm64/boot/dts/freescale/s32g2.dtsi b/arch/arm64/boot/dts/freescale/s32g2.dtsi index 59ea8a25aa4c..824d401e7a2c 100644 --- a/arch/arm64/boot/dts/freescale/s32g2.dtsi +++ b/arch/arm64/boot/dts/freescale/s32g2.dtsi @@ -79,7 +79,7 @@ }; }; - soc { + soc@0 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi b/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi index 3b0cc85d6674..71e373b11de9 100644 --- a/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8992-lg-bullhead.dtsi @@ -74,7 +74,7 @@ vdd_l17_29-supply = <&vph_pwr>; vdd_l20_21-supply = <&vph_pwr>; vdd_l25-supply = <&pm8994_s5>; - vdd_lvs1_2 = <&pm8994_s4>; + vdd_lvs1_2-supply = <&pm8994_s4>; /* S1, S2, S6 and S12 are managed by RPMPD */ diff --git a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts index 7748b745a5df..afa91ca9a3dc 100644 --- a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts +++ b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts @@ -171,7 +171,7 @@ vdd_l17_29-supply = <&vph_pwr>; vdd_l20_21-supply = <&vph_pwr>; vdd_l25-supply = <&pm8994_s5>; - vdd_lvs1_2 = <&pm8994_s4>; + vdd_lvs1_2-supply = <&pm8994_s4>; /* S1, S2, S6 and S12 are managed by RPMPD */ diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index 0318d42c5736..1ac2913b182c 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -100,7 +100,7 @@ CPU6: cpu@102 { device_type = "cpu"; compatible = "arm,cortex-a57"; - reg = <0x0 0x101>; + reg = <0x0 0x102>; enable-method = "psci"; next-level-cache = <&L2_1>; }; @@ -108,7 +108,7 @@ CPU7: cpu@103 { device_type = "cpu"; compatible = "arm,cortex-a57"; - reg = <0x0 0x101>; + reg = <0x0 0x103>; enable-method = "psci"; next-level-cache = <&L2_1>; }; diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor-homestar.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor-homestar.dtsi index 9b3e3d13c165..d1e2df5164ea 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor-homestar.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor-homestar.dtsi @@ -5,7 +5,7 @@ * Copyright 2021 Google LLC. */ -#include "sc7180-trogdor.dtsi" +/* This file must be included after sc7180-trogdor.dtsi */ / { /* BOARD-SPECIFIC TOP LEVEL NODES */ diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor.dtsi index fe2369c29aad..88f6a7d4d020 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor.dtsi @@ -5,7 +5,7 @@ * Copyright 2020 Google LLC. */ -#include "sc7180-trogdor.dtsi" +/* This file must be included after sc7180-trogdor.dtsi */ &ap_sar_sensor { semtech,cs0-ground; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 0692ae0e60a4..038538c8c614 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -4244,7 +4244,7 @@ power-domains = <&dispcc MDSS_GDSC>; - clocks = <&gcc GCC_DISP_AHB_CLK>, + clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, <&dispcc DISP_CC_MDSS_MDP_CLK>; clock-names = "iface", "core"; diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 7d08fad76371..b87756bf1ce4 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -2853,6 +2853,16 @@ reg = <0x0 0x17100000 0x0 0x10000>, /* GICD */ <0x0 0x17180000 0x0 0x200000>; /* GICR * 8 */ interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + gic_its: msi-controller@17140000 { + compatible = "arm,gic-v3-its"; + reg = <0x0 0x17140000 0x0 0x20000>; + msi-controller; + #msi-cells = <1>; + }; }; timer@17420000 { @@ -3037,8 +3047,8 @@ iommus = <&apps_smmu 0xe0 0x0>; - interconnects = <&aggre1_noc MASTER_UFS_MEM &mc_virt SLAVE_EBI1>, - <&gem_noc MASTER_APPSS_PROC &config_noc SLAVE_UFS_MEM_CFG>; + interconnects = <&aggre1_noc MASTER_UFS_MEM 0 &mc_virt SLAVE_EBI1 0>, + <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_UFS_MEM_CFG 0>; interconnect-names = "ufs-ddr", "cpu-ufs"; clock-names = "core_clk", diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index 913d845eb51a..1977103a5ef4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -376,7 +376,8 @@ camera: &i2c7 { <&cru ACLK_VIO>, <&cru ACLK_GIC_PRE>, <&cru PCLK_DDR>, - <&cru ACLK_HDCP>; + <&cru ACLK_HDCP>, + <&cru ACLK_VDU>; assigned-clock-rates = <600000000>, <1600000000>, <1000000000>, @@ -388,6 +389,7 @@ camera: &i2c7 { <400000000>, <200000000>, <200000000>, + <400000000>, <400000000>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index fbd0346624e6..9d5b0e8c9cca 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1462,7 +1462,8 @@ <&cru HCLK_PERILP1>, <&cru PCLK_PERILP1>, <&cru ACLK_VIO>, <&cru ACLK_HDCP>, <&cru ACLK_GIC_PRE>, - <&cru PCLK_DDR>; + <&cru PCLK_DDR>, + <&cru ACLK_VDU>; assigned-clock-rates = <594000000>, <800000000>, <1000000000>, @@ -1473,7 +1474,8 @@ <100000000>, <50000000>, <400000000>, <400000000>, <200000000>, - <200000000>; + <200000000>, + <400000000>; }; grf: syscon@ff770000 { diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index 1534e11a9ad1..fa953b736642 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -687,6 +687,7 @@ }; &usb_host0_xhci { + dr_mode = "host"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts index 7bdcecc0dfe4..02d5f5a8ca03 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts @@ -133,7 +133,7 @@ assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>; assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>, <&gmac1_clkin>; clock_in_out = "input"; - phy-mode = "rgmii-id"; + phy-mode = "rgmii"; phy-supply = <&vcc_3v3>; pinctrl-names = "default"; pinctrl-0 = <&gmac1m1_miim diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi index f64b368c6c37..cdb530597c5e 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi @@ -456,13 +456,11 @@ clock-names = "clk_ahb", "clk_xin"; mmc-ddr-1_8v; mmc-hs200-1_8v; - mmc-hs400-1_8v; ti,trm-icp = <0x2>; ti,otap-del-sel-legacy = <0x0>; ti,otap-del-sel-mmc-hs = <0x0>; ti,otap-del-sel-ddr52 = <0x6>; ti,otap-del-sel-hs200 = <0x7>; - ti,otap-del-sel-hs400 = <0x4>; }; sdhci1: mmc@fa00000 { diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi index be7f39299894..19966f72c5b3 100644 --- a/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi @@ -33,7 +33,7 @@ ranges; #interrupt-cells = <3>; interrupt-controller; - reg = <0x00 0x01800000 0x00 0x200000>, /* GICD */ + reg = <0x00 0x01800000 0x00 0x100000>, /* GICD */ <0x00 0x01900000 0x00 0x100000>, /* GICR */ <0x00 0x6f000000 0x00 0x2000>, /* GICC */ <0x00 0x6f010000 0x00 0x1000>, /* GICH */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 47a1e25e25bb..de32152cea04 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -363,11 +363,6 @@ struct kvm_vcpu_arch { struct kvm_pmu pmu; /* - * Anything that is not used directly from assembly code goes - * here. - */ - - /* * Guest registers we preserve during guest debugging. * * These shadow registers are updated by the kvm_handle_sys_reg diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 3c8af033a997..0e80db4327b6 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -113,6 +113,9 @@ static __always_inline bool has_vhe(void) /* * Code only run in VHE/NVHE hyp context can assume VHE is present or * absent. Otherwise fall back to caps. + * This allows the compiler to discard VHE-specific code from the + * nVHE object, reducing the number of external symbol references + * needed to link. */ if (is_vhe_hyp_code()) return true; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 42ea2bd856c6..8d88433de81d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1974,15 +1974,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) #ifdef CONFIG_KVM static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused) { - if (kvm_get_mode() != KVM_MODE_PROTECTED) - return false; - - if (is_kernel_in_hyp_mode()) { - pr_warn("Protected KVM not available with VHE\n"); - return false; - } - - return true; + return kvm_get_mode() == KVM_MODE_PROTECTED; } #endif /* CONFIG_KVM */ @@ -3109,7 +3101,6 @@ void cpu_set_feature(unsigned int num) WARN_ON(num >= MAX_CPU_FEATURES); elf_hwcap |= BIT(num); } -EXPORT_SYMBOL_GPL(cpu_set_feature); bool cpu_have_feature(unsigned int num) { diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index d42a205ef625..bd5df50e4643 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -102,7 +102,6 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) * x19-x29 per the AAPCS, and we created frame records upon entry, so we need * to restore x0-x8, x29, and x30. */ -ftrace_common_return: /* Restore function arguments */ ldp x0, x1, [sp] ldp x2, x3, [sp, #S_X2] diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index f447c4a36f69..ea5dc7c90f46 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -78,47 +78,76 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) } /* - * Turn on the call to ftrace_caller() in instrumented function + * Find the address the callsite must branch to in order to reach '*addr'. + * + * Due to the limited range of 'BL' instructions, modules may be placed too far + * away to branch directly and must use a PLT. + * + * Returns true when '*addr' contains a reachable target address, or has been + * modified to contain a PLT address. Returns false otherwise. */ -int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, + struct module *mod, + unsigned long *addr) { unsigned long pc = rec->ip; - u32 old, new; - long offset = (long)pc - (long)addr; + long offset = (long)*addr - (long)pc; + struct plt_entry *plt; - if (offset < -SZ_128M || offset >= SZ_128M) { - struct module *mod; - struct plt_entry *plt; + /* + * When the target is within range of the 'BL' instruction, use 'addr' + * as-is and branch to that directly. + */ + if (offset >= -SZ_128M && offset < SZ_128M) + return true; - if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) - return -EINVAL; + /* + * When the target is outside of the range of a 'BL' instruction, we + * must use a PLT to reach it. We can only place PLTs for modules, and + * only when module PLT support is built-in. + */ + if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) + return false; - /* - * On kernels that support module PLTs, the offset between the - * branch instruction and its target may legally exceed the - * range of an ordinary relative 'bl' opcode. In this case, we - * need to branch via a trampoline in the module. - * - * NOTE: __module_text_address() must be called with preemption - * disabled, but we can rely on ftrace_lock to ensure that 'mod' - * retains its validity throughout the remainder of this code. - */ + /* + * 'mod' is only set at module load time, but if we end up + * dealing with an out-of-range condition, we can assume it + * is due to a module being loaded far away from the kernel. + * + * NOTE: __module_text_address() must be called with preemption + * disabled, but we can rely on ftrace_lock to ensure that 'mod' + * retains its validity throughout the remainder of this code. + */ + if (!mod) { preempt_disable(); mod = __module_text_address(pc); preempt_enable(); + } - if (WARN_ON(!mod)) - return -EINVAL; + if (WARN_ON(!mod)) + return false; - plt = get_ftrace_plt(mod, addr); - if (!plt) { - pr_err("ftrace: no module PLT for %ps\n", (void *)addr); - return -EINVAL; - } - - addr = (unsigned long)plt; + plt = get_ftrace_plt(mod, *addr); + if (!plt) { + pr_err("ftrace: no module PLT for %ps\n", (void *)*addr); + return false; } + *addr = (unsigned long)plt; + return true; +} + +/* + * Turn on the call to ftrace_caller() in instrumented function + */ +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + if (!ftrace_find_callable_addr(rec, NULL, &addr)) + return -EINVAL; + old = aarch64_insn_gen_nop(); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); @@ -132,6 +161,11 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long pc = rec->ip; u32 old, new; + if (!ftrace_find_callable_addr(rec, NULL, &old_addr)) + return -EINVAL; + if (!ftrace_find_callable_addr(rec, NULL, &addr)) + return -EINVAL; + old = aarch64_insn_gen_branch_imm(pc, old_addr, AARCH64_INSN_BRANCH_LINK); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); @@ -181,54 +215,15 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; - bool validate = true; u32 old = 0, new; - long offset = (long)pc - (long)addr; - if (offset < -SZ_128M || offset >= SZ_128M) { - u32 replaced; - - if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) - return -EINVAL; - - /* - * 'mod' is only set at module load time, but if we end up - * dealing with an out-of-range condition, we can assume it - * is due to a module being loaded far away from the kernel. - */ - if (!mod) { - preempt_disable(); - mod = __module_text_address(pc); - preempt_enable(); - - if (WARN_ON(!mod)) - return -EINVAL; - } - - /* - * The instruction we are about to patch may be a branch and - * link instruction that was redirected via a PLT entry. In - * this case, the normal validation will fail, but we can at - * least check that we are dealing with a branch and link - * instruction that points into the right module. - */ - if (aarch64_insn_read((void *)pc, &replaced)) - return -EFAULT; - - if (!aarch64_insn_is_bl(replaced) || - !within_module(pc + aarch64_get_branch_offset(replaced), - mod)) - return -EINVAL; - - validate = false; - } else { - old = aarch64_insn_gen_branch_imm(pc, addr, - AARCH64_INSN_BRANCH_LINK); - } + if (!ftrace_find_callable_addr(rec, mod, &addr)) + return -EINVAL; + old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); new = aarch64_insn_gen_nop(); - return ftrace_modify_code(pc, old, new, validate); + return ftrace_modify_code(pc, old, new, true); } void arch_ftrace_update_code(int command) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index cf3a759f10d4..fea3223704b6 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -303,14 +303,13 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) early_fixmap_init(); early_ioremap_init(); + setup_machine_fdt(__fdt_pointer); + /* * Initialise the static keys early as they may be enabled by the - * cpufeature code, early parameters, and DT setup. + * cpufeature code and early parameters. */ jump_label_init(); - - setup_machine_fdt(__fdt_pointer); - parse_early_param(); /* diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 4e39ace073af..3b8d062e30ea 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1230,6 +1230,9 @@ bool kvm_arch_timer_get_input_level(int vintid) struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); struct arch_timer_context *timer; + if (WARN(!vcpu, "No vcpu context!\n")) + return false; + if (vintid == vcpu_vtimer(vcpu)->irq.irq) timer = vcpu_vtimer(vcpu); else if (vintid == vcpu_ptimer(vcpu)->irq.irq) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 400bb0fe2745..83a7f61354d3 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -150,8 +150,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_free_stage2_pgd; - if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) + if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) { + ret = -ENOMEM; goto out_free_stage2_pgd; + } cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask); kvm_vgic_early_init(kvm); @@ -2110,11 +2112,11 @@ static int finalize_hyp_mode(void) return 0; /* - * Exclude HYP BSS from kmemleak so that it doesn't get peeked - * at, which would end badly once the section is inaccessible. - * None of other sections should ever be introspected. + * Exclude HYP sections from kmemleak so that they don't get peeked + * at, which would end badly once inaccessible. */ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); + kmemleak_free_part(__va(hyp_mem_base), hyp_mem_size); return pkvm_drop_host_privileges(); } @@ -2271,7 +2273,11 @@ static int __init early_kvm_mode_cfg(char *arg) return -EINVAL; if (strcmp(arg, "protected") == 0) { - kvm_mode = KVM_MODE_PROTECTED; + if (!is_kernel_in_hyp_mode()) + kvm_mode = KVM_MODE_PROTECTED; + else + pr_warn_once("Protected KVM not available with VHE\n"); + return 0; } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 3d251a4d2cf7..6012b08ecb14 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -80,6 +80,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED; vcpu->arch.flags |= KVM_ARM64_FP_HOST; + vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED; if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; @@ -93,6 +94,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * operations. Do this for ZA as well for now for simplicity. */ if (system_supports_sme()) { + vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED; if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 78edf077fa3b..1e78acf9662e 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -314,15 +314,11 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot) { - hyp_assert_lock_held(&host_kvm.lock); - return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); } int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) { - hyp_assert_lock_held(&host_kvm.lock); - return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, addr, size, &host_s2_pool, owner_id); } diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index b6d86e423319..35a4331ba5f3 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -243,15 +243,9 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) case SYS_ID_AA64MMFR2_EL1: return get_pvm_id_aa64mmfr2(vcpu); default: - /* - * Should never happen because all cases are covered in - * pvm_sys_reg_descs[]. - */ - WARN_ON(1); - break; + /* Unhandled ID register, RAZ */ + return 0; } - - return 0; } static u64 read_id_reg(const struct kvm_vcpu *vcpu, @@ -332,6 +326,16 @@ static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu, /* Mark the specified system register as an AArch64 feature id register. */ #define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 } +/* + * sys_reg_desc initialiser for architecturally unallocated cpufeature ID + * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2 + * (1 <= crm < 8, 0 <= Op2 < 8). + */ +#define ID_UNALLOCATED(crm, op2) { \ + Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \ + .access = pvm_access_id_aarch64, \ +} + /* Mark the specified system register as Read-As-Zero/Write-Ignored */ #define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi } @@ -375,24 +379,46 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { AARCH32(SYS_MVFR0_EL1), AARCH32(SYS_MVFR1_EL1), AARCH32(SYS_MVFR2_EL1), + ID_UNALLOCATED(3,3), AARCH32(SYS_ID_PFR2_EL1), AARCH32(SYS_ID_DFR1_EL1), AARCH32(SYS_ID_MMFR5_EL1), + ID_UNALLOCATED(3,7), /* AArch64 ID registers */ /* CRm=4 */ AARCH64(SYS_ID_AA64PFR0_EL1), AARCH64(SYS_ID_AA64PFR1_EL1), + ID_UNALLOCATED(4,2), + ID_UNALLOCATED(4,3), AARCH64(SYS_ID_AA64ZFR0_EL1), + ID_UNALLOCATED(4,5), + ID_UNALLOCATED(4,6), + ID_UNALLOCATED(4,7), AARCH64(SYS_ID_AA64DFR0_EL1), AARCH64(SYS_ID_AA64DFR1_EL1), + ID_UNALLOCATED(5,2), + ID_UNALLOCATED(5,3), AARCH64(SYS_ID_AA64AFR0_EL1), AARCH64(SYS_ID_AA64AFR1_EL1), + ID_UNALLOCATED(5,6), + ID_UNALLOCATED(5,7), AARCH64(SYS_ID_AA64ISAR0_EL1), AARCH64(SYS_ID_AA64ISAR1_EL1), + AARCH64(SYS_ID_AA64ISAR2_EL1), + ID_UNALLOCATED(6,3), + ID_UNALLOCATED(6,4), + ID_UNALLOCATED(6,5), + ID_UNALLOCATED(6,6), + ID_UNALLOCATED(6,7), AARCH64(SYS_ID_AA64MMFR0_EL1), AARCH64(SYS_ID_AA64MMFR1_EL1), AARCH64(SYS_ID_AA64MMFR2_EL1), + ID_UNALLOCATED(7,3), + ID_UNALLOCATED(7,4), + ID_UNALLOCATED(7,5), + ID_UNALLOCATED(7,6), + ID_UNALLOCATED(7,7), /* Scalable Vector Registers are restricted. */ diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c index 77a67e9d3d14..e070cda86e12 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -429,11 +429,11 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, vgic_mmio_read_pending, vgic_mmio_write_spending, - NULL, vgic_uaccess_write_spending, 1, + vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, vgic_mmio_read_pending, vgic_mmio_write_cpending, - NULL, vgic_uaccess_write_cpending, 1, + vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, vgic_mmio_read_active, vgic_mmio_write_sactive, diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index f7aa7bcd6fb8..f15e29cc63ce 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -353,42 +353,6 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu, return 0; } -static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) -{ - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - u32 value = 0; - int i; - - /* - * pending state of interrupt is latched in pending_latch variable. - * Userspace will save and restore pending state and line_level - * separately. - * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst - * for handling of ISPENDR and ICPENDR. - */ - for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - bool state = irq->pending_latch; - - if (irq->hw && vgic_irq_is_sgi(irq->intid)) { - int err; - - err = irq_get_irqchip_state(irq->host_irq, - IRQCHIP_STATE_PENDING, - &state); - WARN_ON(err); - } - - if (state) - value |= (1U << i); - - vgic_put_irq(vcpu->kvm, irq); - } - - return value; -} - static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) @@ -666,7 +630,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, vgic_mmio_read_pending, vgic_mmio_write_spending, - vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1, + vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, vgic_mmio_read_pending, vgic_mmio_write_cpending, @@ -750,7 +714,7 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0, vgic_mmio_read_pending, vgic_mmio_write_spending, - vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, + vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0, vgic_mmio_read_pending, vgic_mmio_write_cpending, diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index 49837d3a3ef5..997d0fce2088 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -226,8 +226,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, return 0; } -unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) +static unsigned long __read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + bool is_user) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); u32 value = 0; @@ -239,6 +240,15 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, unsigned long flags; bool val; + /* + * When used from userspace with a GICv3 model: + * + * Pending state of interrupt is latched in pending_latch + * variable. Userspace will save and restore pending state + * and line_level separately. + * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst + * for handling of ISPENDR and ICPENDR. + */ raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw && vgic_irq_is_sgi(irq->intid)) { int err; @@ -248,10 +258,20 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, IRQCHIP_STATE_PENDING, &val); WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); - } else if (vgic_irq_is_mapped_level(irq)) { + } else if (!is_user && vgic_irq_is_mapped_level(irq)) { val = vgic_get_phys_line_level(irq); } else { - val = irq_is_pending(irq); + switch (vcpu->kvm->arch.vgic.vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V3: + if (is_user) { + val = irq->pending_latch; + break; + } + fallthrough; + default: + val = irq_is_pending(irq); + break; + } } value |= ((u32)val << i); @@ -263,6 +283,18 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, return value; } +unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return __read_pending(vcpu, addr, len, false); +} + +unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return __read_pending(vcpu, addr, len, true); +} + static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq) { return (vgic_irq_is_sgi(irq->intid) && diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h index 3fa696f198a3..6082d4b66d39 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.h +++ b/arch/arm64/kvm/vgic/vgic-mmio.h @@ -149,6 +149,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); +unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c index 8d5f0506fd87..d78ae63d7c15 100644 --- a/arch/arm64/kvm/vmid.c +++ b/arch/arm64/kvm/vmid.c @@ -66,7 +66,7 @@ static void flush_context(void) * the next context-switch, we broadcast TLB flush + I-cache * invalidation over the inner shareable domain on rollover. */ - kvm_call_hyp(__kvm_flush_vm_context); + kvm_call_hyp(__kvm_flush_vm_context); } static bool check_update_reserved_vmid(u64 vmid, u64 newvmid) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 0ea6cc25dc66..21c907987080 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -218,8 +218,6 @@ SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area) */ SYM_FUNC_START(__pi___dma_map_area) add x1, x0, x1 - cmp w2, #DMA_FROM_DEVICE - b.eq __pi_dcache_inval_poc b __pi_dcache_clean_poc SYM_FUNC_END(__pi___dma_map_area) SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index e2a5ec9fdc0d..3618ef3f6d81 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -214,6 +214,19 @@ static pte_t get_clear_contig(struct mm_struct *mm, return orig_pte; } +static pte_t get_clear_contig_flush(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long pgsize, + unsigned long ncontig) +{ + pte_t orig_pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig); + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); + + flush_tlb_range(&vma, addr, addr + (pgsize * ncontig)); + return orig_pte; +} + /* * Changing some bits of contiguous entries requires us to follow a * Break-Before-Make approach, breaking the whole contiguous set @@ -447,19 +460,20 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, int ncontig, i; size_t pgsize = 0; unsigned long pfn = pte_pfn(pte), dpfn; + struct mm_struct *mm = vma->vm_mm; pgprot_t hugeprot; pte_t orig_pte; if (!pte_cont(pte)) return ptep_set_access_flags(vma, addr, ptep, pte, dirty); - ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); + ncontig = find_num_contig(mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; if (!__cont_access_flags_changed(ptep, pte, ncontig)) return 0; - orig_pte = get_clear_contig(vma->vm_mm, addr, ptep, pgsize, ncontig); + orig_pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) @@ -470,7 +484,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, hugeprot = pte_pgprot(pte); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) - set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); + set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); return 1; } @@ -492,7 +506,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, ncontig = find_num_contig(mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; - pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig); + pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); pte = pte_wrprotect(pte); hugeprot = pte_pgprot(pte); @@ -505,17 +519,15 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { + struct mm_struct *mm = vma->vm_mm; size_t pgsize; int ncontig; - pte_t orig_pte; if (!pte_cont(READ_ONCE(*ptep))) return ptep_clear_flush(vma, addr, ptep); - ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); - orig_pte = get_clear_contig(vma->vm_mm, addr, ptep, pgsize, ncontig); - flush_tlb_range(vma, addr, addr + pgsize * ncontig); - return orig_pte; + ncontig = find_num_contig(mm, addr, ptep, &pgsize); + return get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); } static int __init hugetlbpage_init(void) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 1920d52653b4..53a912befb62 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -54,7 +54,6 @@ config LOONGARCH select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE select GENERIC_ENTRY - select GENERIC_FIND_FIRST_BIT select GENERIC_GETTIMEOFDAY select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_PROBE @@ -77,7 +76,6 @@ config LOONGARCH select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING - select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_EXIT_THREAD @@ -86,8 +84,6 @@ config LOONGARCH select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING - select HAVE_MEMBLOCK - select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_PERF_EVENTS diff --git a/arch/loongarch/include/asm/branch.h b/arch/loongarch/include/asm/branch.h index 3f33c89f35b4..9a133e4c068e 100644 --- a/arch/loongarch/include/asm/branch.h +++ b/arch/loongarch/include/asm/branch.h @@ -12,10 +12,9 @@ static inline unsigned long exception_era(struct pt_regs *regs) return regs->csr_era; } -static inline int compute_return_era(struct pt_regs *regs) +static inline void compute_return_era(struct pt_regs *regs) { regs->csr_era += 4; - return 0; } #endif /* _ASM_BRANCH_H */ diff --git a/arch/loongarch/include/asm/fpregdef.h b/arch/loongarch/include/asm/fpregdef.h index adb16e4b43b0..b6be527831dd 100644 --- a/arch/loongarch/include/asm/fpregdef.h +++ b/arch/loongarch/include/asm/fpregdef.h @@ -48,6 +48,5 @@ #define fcsr1 $r1 #define fcsr2 $r2 #define fcsr3 $r3 -#define vcsr16 $r16 #endif /* _ASM_FPREGDEF_H */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index 3dba4986f6c9..dc47fc724fa1 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -6,6 +6,7 @@ #define _ASM_PAGE_H #include <linux/const.h> +#include <asm/addrspace.h> /* * PAGE_SHIFT determines the page size diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 5dc84d8f18d6..d9e86cfa53e2 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -426,6 +426,11 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, #define kern_addr_valid(addr) (1) +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT; +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* We don't have hardware dirty/accessed bits, generic_pmdp_establish is fine.*/ @@ -497,11 +502,6 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd) return pmd; } -static inline unsigned long pmd_pfn(pmd_t pmd) -{ - return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT; -} - static inline struct page *pmd_page(pmd_t pmd) { if (pmd_trans_huge(pmd)) diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index 1d63c934b289..57ec45aa078e 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -80,7 +80,6 @@ BUILD_FPR_ACCESS(64) struct loongarch_fpu { unsigned int fcsr; - unsigned int vcsr; uint64_t fcc; /* 8x8 */ union fpureg fpr[NUM_FPU_REGS]; }; @@ -161,7 +160,6 @@ struct thread_struct { */ \ .fpu = { \ .fcsr = 0, \ - .vcsr = 0, \ .fcc = 0, \ .fpr = {{{0,},},}, \ }, \ diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index bfb65eb2844f..20cd9e16a95a 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -166,7 +166,6 @@ void output_thread_fpu_defines(void) OFFSET(THREAD_FCSR, loongarch_fpu, fcsr); OFFSET(THREAD_FCC, loongarch_fpu, fcc); - OFFSET(THREAD_VCSR, loongarch_fpu, vcsr); BLANK(); } diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 6c87ea36b257..529ab8f44ec6 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -263,7 +263,7 @@ void cpu_probe(void) c->cputype = CPU_UNKNOWN; c->processor_id = read_cpucfg(LOONGARCH_CPUCFG0); - c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) >> 3) & 0x3; + c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) & CPUCFG2_FPVERS) >> 3; c->fpu_csr0 = FPU_CSR_RN; c->fpu_mask = FPU_CSR_RSVD; diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 75c6ce0682a2..a631a7137667 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -146,16 +146,6 @@ movgr2fcsr fcsr0, \tmp0 .endm - .macro sc_save_vcsr base, tmp0 - movfcsr2gr \tmp0, vcsr16 - EX st.w \tmp0, \base, 0 - .endm - - .macro sc_restore_vcsr base, tmp0 - EX ld.w \tmp0, \base, 0 - movgr2fcsr vcsr16, \tmp0 - .endm - /* * Save a thread's fp context. */ diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index e596dfcd924b..d01e62dd414f 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -14,8 +14,6 @@ __REF -SYM_ENTRY(_stext, SYM_L_GLOBAL, SYM_A_NONE) - SYM_CODE_START(kernel_entry) # kernel entry point /* Config direct window and set PG */ diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index a76f547a5aa3..a13f92593cfd 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -429,7 +429,6 @@ int __init init_numa_memory(void) return 0; } -EXPORT_SYMBOL(init_numa_memory); #endif void __init paging_init(void) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index e4060f84a221..1bf58c65e2bf 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -475,8 +475,7 @@ asmlinkage void noinstr do_ri(struct pt_regs *regs) die_if_kernel("Reserved instruction in kernel code", regs); - if (unlikely(compute_return_era(regs) < 0)) - goto out; + compute_return_era(regs); if (unlikely(get_user(opcode, era) < 0)) { status = SIGSEGV; diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 9d508158fe1a..69c76f26c1c5 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -37,6 +37,7 @@ SECTIONS HEAD_TEXT_SECTION . = ALIGN(PECOFF_SEGMENT_ALIGN); + _stext = .; .text : { TEXT_TEXT SCHED_TEXT @@ -101,6 +102,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + ELF_DETAILS .gptab.sdata : { *(.gptab.data) diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index e272f8ac57d1..9818ce11546b 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -281,15 +281,16 @@ void setup_tlb_handler(int cpu) if (pcpu_handlers[cpu]) return; - page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, get_order(vec_sz)); + page = alloc_pages_node(cpu_to_node(cpu), GFP_ATOMIC, get_order(vec_sz)); if (!page) return; addr = page_address(page); - pcpu_handlers[cpu] = virt_to_phys(addr); + pcpu_handlers[cpu] = (unsigned long)addr; memcpy((void *)addr, (void *)eentry, vec_sz); local_flush_icache_range((unsigned long)addr, (unsigned long)addr + vec_sz); - csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_TLBRENTRY); + csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY); + csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY); csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY); } #endif diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 6b6e16732c60..92e404032257 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -21,6 +21,7 @@ ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS)) endif cflags-vdso := $(ccflags-vdso) \ + -isystem $(shell $(CC) -print-file-name=include) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ -O2 -g -fno-strict-aliasing -fno-common -fno-builtin -G0 \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi index b0a034b468bb..42e69664efd9 100644 --- a/arch/mips/boot/dts/ingenic/x1000.dtsi +++ b/arch/mips/boot/dts/ingenic/x1000.dtsi @@ -111,8 +111,9 @@ clocks = <&cgu X1000_CLK_RTCLK>, <&cgu X1000_CLK_EXCLK>, - <&cgu X1000_CLK_PCLK>; - clock-names = "rtc", "ext", "pclk"; + <&cgu X1000_CLK_PCLK>, + <&cgu X1000_CLK_TCU>; + clock-names = "rtc", "ext", "pclk", "tcu"; interrupt-controller; #interrupt-cells = <1>; diff --git a/arch/mips/boot/dts/ingenic/x1830.dtsi b/arch/mips/boot/dts/ingenic/x1830.dtsi index dbf21afaccb1..65a5da71c199 100644 --- a/arch/mips/boot/dts/ingenic/x1830.dtsi +++ b/arch/mips/boot/dts/ingenic/x1830.dtsi @@ -104,8 +104,9 @@ clocks = <&cgu X1830_CLK_RTCLK>, <&cgu X1830_CLK_EXCLK>, - <&cgu X1830_CLK_PCLK>; - clock-names = "rtc", "ext", "pclk"; + <&cgu X1830_CLK_PCLK>, + <&cgu X1830_CLK_TCU>; + clock-names = "rtc", "ext", "pclk", "tcu"; interrupt-controller; #interrupt-cells = <1>; diff --git a/arch/mips/generic/board-ranchu.c b/arch/mips/generic/board-ranchu.c index a89aaad59cb1..930c45041882 100644 --- a/arch/mips/generic/board-ranchu.c +++ b/arch/mips/generic/board-ranchu.c @@ -44,6 +44,7 @@ static __init unsigned int ranchu_measure_hpt_freq(void) __func__); rtc_base = of_iomap(np, 0); + of_node_put(np); if (!rtc_base) panic("%s(): Failed to ioremap Goldfish RTC base!", __func__); diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c index 5204fc6d6d50..1187729d8cbb 100644 --- a/arch/mips/lantiq/falcon/sysctrl.c +++ b/arch/mips/lantiq/falcon/sysctrl.c @@ -208,6 +208,12 @@ void __init ltq_soc_init(void) of_address_to_resource(np_sysgpe, 0, &res_sys[2])) panic("Failed to get core resources"); + of_node_put(np_status); + of_node_put(np_ebu); + of_node_put(np_sys1); + of_node_put(np_syseth); + of_node_put(np_sysgpe); + if ((request_mem_region(res_status.start, resource_size(&res_status), res_status.name) < 0) || (request_mem_region(res_ebu.start, resource_size(&res_ebu), diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index b732495f138a..20622bf0a9b3 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -408,6 +408,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) if (!ltq_eiu_membase) panic("Failed to remap eiu memory"); } + of_node_put(eiu_node); return 0; } diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 084f6caba5f2..d444a1b98a72 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -441,6 +441,10 @@ void __init ltq_soc_init(void) of_address_to_resource(np_ebu, 0, &res_ebu)) panic("Failed to get core resources"); + of_node_put(np_pmu); + of_node_put(np_cgu); + of_node_put(np_ebu); + if (!request_mem_region(res_pmu.start, resource_size(&res_pmu), res_pmu.name) || !request_mem_region(res_cgu.start, resource_size(&res_cgu), diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index bbf1e38e1431..2cb708cdf01a 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -214,6 +214,8 @@ static void update_gic_frequency_dt(void) if (of_update_property(node, &gic_frequency_prop) < 0) pr_err("error updating gic frequency property\n"); + + of_node_put(node); } #endif diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c index 129915616763..d9c8c4e46aff 100644 --- a/arch/mips/pic32/pic32mzda/init.c +++ b/arch/mips/pic32/pic32mzda/init.c @@ -98,13 +98,18 @@ static int __init pic32_of_prepare_platform_data(struct of_dev_auxdata *lookup) np = of_find_compatible_node(NULL, NULL, lookup->compatible); if (np) { lookup->name = (char *)np->name; - if (lookup->phys_addr) + if (lookup->phys_addr) { + of_node_put(np); continue; + } if (!of_address_to_resource(np, 0, &res)) lookup->phys_addr = res.start; + of_node_put(np); } } + of_node_put(root); + return 0; } diff --git a/arch/mips/pic32/pic32mzda/time.c b/arch/mips/pic32/pic32mzda/time.c index 7174e9abbb1b..777b515c52c8 100644 --- a/arch/mips/pic32/pic32mzda/time.c +++ b/arch/mips/pic32/pic32mzda/time.c @@ -32,6 +32,9 @@ static unsigned int pic32_xlate_core_timer_irq(void) goto default_map; irq = irq_of_parse_and_map(node, 0); + + of_node_put(node); + if (!irq) goto default_map; diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 587c7b998769..ea8072acf8d9 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -40,6 +40,8 @@ __iomem void *plat_of_remap_node(const char *node) if (of_address_to_resource(np, 0, &res)) panic("Failed to get resource for %s", node); + of_node_put(np); + if (!request_mem_region(res.start, resource_size(&res), res.name)) diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c index 7b7f25b4b057..9240bcdbe74e 100644 --- a/arch/mips/vr41xx/common/icu.c +++ b/arch/mips/vr41xx/common/icu.c @@ -640,8 +640,6 @@ static int icu_get_irq(unsigned int irq) printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2); - atomic_inc(&irq_err_count); - return -1; } diff --git a/arch/openrisc/kernel/unwinder.c b/arch/openrisc/kernel/unwinder.c index 8ae15c2c1845..c6ad6f867a6a 100644 --- a/arch/openrisc/kernel/unwinder.c +++ b/arch/openrisc/kernel/unwinder.c @@ -25,7 +25,7 @@ struct or1k_frameinfo { /* * Verify a frameinfo structure. The return address should be a valid text * address. The frame pointer may be null if its the last frame, otherwise - * the frame pointer should point to a location in the stack after the the + * the frame pointer should point to a location in the stack after the * top of the next frame up. */ static inline int or1k_frameinfo_valid(struct or1k_frameinfo *frameinfo) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 5f2448dc5a2b..fa400055b2d5 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -10,6 +10,7 @@ config PARISC select ARCH_WANT_FRAME_POINTERS select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAS_PTE_SPECIAL select ARCH_NO_SG_CHAIN diff --git a/arch/parisc/include/asm/fb.h b/arch/parisc/include/asm/fb.h index d63a2acb91f2..55d29c4f716e 100644 --- a/arch/parisc/include/asm/fb.h +++ b/arch/parisc/include/asm/fb.h @@ -12,7 +12,7 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; } -#if defined(CONFIG_STI_CONSOLE) || defined(CONFIG_FB_STI) +#if defined(CONFIG_FB_STI) int fb_is_primary_device(struct fb_info *info); #else static inline int fb_is_primary_device(struct fb_info *info) diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 2673d57eeb00..94652e13c260 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -224,8 +224,13 @@ int main(void) BLANK(); DEFINE(ASM_SIGFRAME_SIZE, PARISC_RT_SIGFRAME_SIZE); DEFINE(SIGFRAME_CONTEXT_REGS, offsetof(struct rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE); +#ifdef CONFIG_64BIT DEFINE(ASM_SIGFRAME_SIZE32, PARISC_RT_SIGFRAME_SIZE32); DEFINE(SIGFRAME_CONTEXT_REGS32, offsetof(struct compat_rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE32); +#else + DEFINE(ASM_SIGFRAME_SIZE32, PARISC_RT_SIGFRAME_SIZE); + DEFINE(SIGFRAME_CONTEXT_REGS32, offsetof(struct rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE); +#endif BLANK(); DEFINE(ICACHE_BASE, offsetof(struct pdc_cache_info, ic_base)); DEFINE(ICACHE_STRIDE, offsetof(struct pdc_cache_info, ic_stride)); diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index c8a11fcecf4c..a9bc578e4c52 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -722,7 +722,10 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned lon return; if (parisc_requires_coherency()) { - flush_user_cache_page(vma, vmaddr); + if (vma->vm_flags & VM_SHARED) + flush_data_cache(); + else + flush_user_cache_page(vma, vmaddr); return; } diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index ed1e88a74dc4..bac581b5ecfc 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -146,7 +146,7 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) " depw %%r0,31,2,%4\n" "1: ldw 0(%%sr1,%4),%0\n" "2: ldw 4(%%sr1,%4),%3\n" -" subi 32,%4,%2\n" +" subi 32,%2,%2\n" " mtctl %2,11\n" " vshd %0,%3,%0\n" "3: \n" diff --git a/arch/parisc/math-emu/decode_exc.c b/arch/parisc/math-emu/decode_exc.c index 494ca41df05d..d41ddb3430b5 100644 --- a/arch/parisc/math-emu/decode_exc.c +++ b/arch/parisc/math-emu/decode_exc.c @@ -102,7 +102,7 @@ decode_fpu(unsigned int Fpu_register[], unsigned int trap_counts[]) * that happen. Want to keep this overhead low, but still provide * some information to the customer. All exits from this routine * need to restore Fpu_register[0] - */ + */ bflags=(Fpu_register[0] & 0xf8000000); Fpu_register[0] &= 0x07ffffff; diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c22f58155948..fcbb81feb7ad 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -38,7 +38,7 @@ config RISCV select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_HUGETLBFS if MMU - select ARCH_SUPPORTS_PAGE_TABLE_CHECK + select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU @@ -364,8 +364,13 @@ config RISCV_ISA_SVPBMT select RISCV_ALTERNATIVE default y help - Adds support to dynamically detect the presence of the SVPBMT extension - (Supervisor-mode: page-based memory types) and enable its usage. + Adds support to dynamically detect the presence of the SVPBMT + ISA-extension (Supervisor-mode: page-based memory types) and + enable its usage. + + The memory type for a page contains a combination of attributes + that indicate the cacheability, idempotency, and ordering + properties for access to that page. The SVPBMT extension is only available on 64Bit cpus. diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas index ebfcd5cc6eaf..457ac72c9b36 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.erratas @@ -35,6 +35,7 @@ config ERRATA_SIFIVE_CIP_1200 config ERRATA_THEAD bool "T-HEAD errata" + depends on !XIP_KERNEL select RISCV_ALTERNATIVE help All T-HEAD errata Kconfig depend on this Kconfig. Disabling diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index 8c3259134194..496d3b7642bd 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -50,6 +50,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu1_intc: interrupt-controller { @@ -77,6 +78,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu2_intc: interrupt-controller { @@ -104,6 +106,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu3_intc: interrupt-controller { @@ -131,6 +134,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu4_intc: interrupt-controller { #interrupt-cells = <1>; @@ -192,6 +196,15 @@ riscv,ndev = <186>; }; + pdma: dma-controller@3000000 { + compatible = "sifive,fu540-c000-pdma", "sifive,pdma0"; + reg = <0x0 0x3000000 0x0 0x8000>; + interrupt-parent = <&plic>; + interrupts = <5 6>, <7 8>, <9 10>, <11 12>; + dma-channels = <4>; + #dma-cells = <1>; + }; + clkcfg: clkcfg@20002000 { compatible = "microchip,mpfs-clkcfg"; reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>; diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index 672f02b21ce0..1031038423e7 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -111,6 +111,7 @@ void __init_or_module sifive_errata_patch_func(struct alt_entry *begin, cpu_apply_errata |= tmp; } } - if (cpu_apply_errata != cpu_req_errata) + if (stage != RISCV_ALTERNATIVES_MODULE && + cpu_apply_errata != cpu_req_errata) warn_miss_errata(cpu_req_errata - cpu_apply_errata); } diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 9e2888dbb5b1..416ead0f9a65 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -75,20 +75,20 @@ asm volatile(ALTERNATIVE( \ "nop\n\t" \ "nop\n\t" \ "nop", \ - "li t3, %2\n\t" \ - "slli t3, t3, %4\n\t" \ + "li t3, %1\n\t" \ + "slli t3, t3, %3\n\t" \ "and t3, %0, t3\n\t" \ "bne t3, zero, 2f\n\t" \ - "li t3, %3\n\t" \ - "slli t3, t3, %4\n\t" \ + "li t3, %2\n\t" \ + "slli t3, t3, %3\n\t" \ "or %0, %0, t3\n\t" \ "2:", THEAD_VENDOR_ID, \ ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \ : "+r"(_val) \ - : "0"(_val), \ - "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \ + : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \ "I"(_PAGE_PMA_THEAD >> ALT_THEAD_PBMT_SHIFT), \ - "I"(ALT_THEAD_PBMT_SHIFT)) + "I"(ALT_THEAD_PBMT_SHIFT) \ + : "t3") #else #define ALT_THEAD_PMA(_val) #endif diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 5c2aba5efbd0..dc42375c2357 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -175,7 +175,7 @@ static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot) static inline unsigned long _pud_pfn(pud_t pud) { - return pud_val(pud) >> _PAGE_PFN_SHIFT; + return __page_val_to_pfn(pud_val(pud)); } static inline pmd_t *pud_pgtable(pud_t pud) @@ -278,13 +278,13 @@ static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot) static inline unsigned long _p4d_pfn(p4d_t p4d) { - return p4d_val(p4d) >> _PAGE_PFN_SHIFT; + return __page_val_to_pfn(p4d_val(p4d)); } static inline pud_t *p4d_pgtable(p4d_t p4d) { if (pgtable_l4_enabled) - return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT); + return (pud_t *)pfn_to_virt(__page_val_to_pfn(p4d_val(p4d))); return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) }); } @@ -292,7 +292,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) static inline struct page *p4d_page(p4d_t p4d) { - return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT); + return pfn_to_page(__page_val_to_pfn(p4d_val(p4d))); } #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) @@ -347,7 +347,7 @@ static inline void pgd_clear(pgd_t *pgd) static inline p4d_t *pgd_pgtable(pgd_t pgd) { if (pgtable_l5_enabled) - return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT); + return (p4d_t *)pfn_to_virt(__page_val_to_pfn(pgd_val(pgd))); return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) }); } @@ -355,7 +355,7 @@ static inline p4d_t *pgd_pgtable(pgd_t pgd) static inline struct page *pgd_page(pgd_t pgd) { - return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT); + return pfn_to_page(__page_val_to_pfn(pgd_val(pgd))); } #define pgd_page(pgd) pgd_page(pgd) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 1d1be9d9419c..5dbd6610729b 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -261,7 +261,7 @@ static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot) static inline unsigned long _pgd_pfn(pgd_t pgd) { - return pgd_val(pgd) >> _PAGE_PFN_SHIFT; + return __page_val_to_pfn(pgd_val(pgd)); } static inline struct page *pmd_page(pmd_t pmd) @@ -590,14 +590,14 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd) return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE)); } -#define __pmd_to_phys(pmd) (pmd_val(pmd) >> _PAGE_PFN_SHIFT << PAGE_SHIFT) +#define __pmd_to_phys(pmd) (__page_val_to_pfn(pmd_val(pmd)) << PAGE_SHIFT) static inline unsigned long pmd_pfn(pmd_t pmd) { return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT); } -#define __pud_to_phys(pud) (pud_val(pud) >> _PAGE_PFN_SHIFT << PAGE_SHIFT) +#define __pud_to_phys(pud) (__page_val_to_pfn(pud_val(pud)) << PAGE_SHIFT) static inline unsigned long pud_pfn(pud_t pud) { diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index a6f62a6d1edd..12b05ce164bb 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -293,7 +293,6 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, unsigned int stage) { u32 cpu_req_feature = cpufeature_probe(stage); - u32 cpu_apply_feature = 0; struct alt_entry *alt; u32 tmp; @@ -307,10 +306,8 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, } tmp = (1U << alt->errata_id); - if (cpu_req_feature & tmp) { + if (cpu_req_feature & tmp) patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); - cpu_apply_feature |= tmp; - } } } #endif diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 1c00695ebee7..9826073fbc67 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -54,7 +54,7 @@ static inline unsigned long gstage_pte_index(gpa_t addr, u32 level) static inline unsigned long gstage_pte_page_vaddr(pte_t pte) { - return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT); + return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte))); } static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 7f4ad5e4373a..f3455dc013fa 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -781,9 +781,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { + kvm_vcpu_srcu_read_unlock(vcpu); rcuwait_wait_event(wait, (!vcpu->arch.power_off) && (!vcpu->arch.pause), TASK_INTERRUPTIBLE); + kvm_vcpu_srcu_read_lock(vcpu); if (vcpu->arch.power_off || vcpu->arch.pause) { /* diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c index 9f764df125db..6cd93995fb65 100644 --- a/arch/riscv/kvm/vmid.c +++ b/arch/riscv/kvm/vmid.c @@ -97,7 +97,7 @@ void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu) * We ran out of VMIDs so we increment vmid_version and * start assigning VMIDs from 1. * - * This also means existing VMIDs assignement to all Guest + * This also means existing VMIDs assignment to all Guest * instances is invalid and we have force VMID re-assignement * for all Guest instances. The Guest instances that were not * running will automatically pick-up new VMIDs because will diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 91c0b80a8bf0..8cd9e56c629b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -484,7 +484,6 @@ config KEXEC config KEXEC_FILE bool "kexec file based system call" select KEXEC_CORE - select BUILD_BIN2C depends on CRYPTO depends on CRYPTO_SHA256 depends on CRYPTO_SHA256_S390 diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 495c68a4df1e..4cb5d17e7ead 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -82,7 +82,7 @@ endif ifdef CONFIG_EXPOLINE ifdef CONFIG_EXPOLINE_EXTERN - KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline.o + KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline/expoline.o CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern CC_FLAGS_EXPOLINE += -mfunction-return=thunk-extern else @@ -163,6 +163,12 @@ vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/s390/kernel/vdso64 include/generated/vdso64-offsets.h $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \ $(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h) + +ifdef CONFIG_EXPOLINE_EXTERN +modules_prepare: expoline_prepare +expoline_prepare: + $(Q)$(MAKE) $(build)=arch/s390/lib/expoline arch/s390/lib/expoline/expoline.o +endif endif # Don't use tabs in echo arguments diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 56007c763902..1f2d40993c4d 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -4,232 +4,15 @@ * * Copyright IBM Corp. 2017, 2020 * Author(s): Harald Freudenberger - * - * The s390_arch_random_generate() function may be called from random.c - * in interrupt context. So this implementation does the best to be very - * fast. There is a buffer of random data which is asynchronously checked - * and filled by a workqueue thread. - * If there are enough bytes in the buffer the s390_arch_random_generate() - * just delivers these bytes. Otherwise false is returned until the - * worker thread refills the buffer. - * The worker fills the rng buffer by pulling fresh entropy from the - * high quality (but slow) true hardware random generator. This entropy - * is then spread over the buffer with an pseudo random generator PRNG. - * As the arch_get_random_seed_long() fetches 8 bytes and the calling - * function add_interrupt_randomness() counts this as 1 bit entropy the - * distribution needs to make sure there is in fact 1 bit entropy contained - * in 8 bytes of the buffer. The current values pull 32 byte entropy - * and scatter this into a 2048 byte buffer. So 8 byte in the buffer - * will contain 1 bit of entropy. - * The worker thread is rescheduled based on the charge level of the - * buffer but at least with 500 ms delay to avoid too much CPU consumption. - * So the max. amount of rng data delivered via arch_get_random_seed is - * limited to 4k bytes per second. */ #include <linux/kernel.h> #include <linux/atomic.h> #include <linux/random.h> -#include <linux/slab.h> #include <linux/static_key.h> -#include <linux/workqueue.h> -#include <linux/moduleparam.h> #include <asm/cpacf.h> DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0); EXPORT_SYMBOL(s390_arch_random_counter); - -#define ARCH_REFILL_TICKS (HZ/2) -#define ARCH_PRNG_SEED_SIZE 32 -#define ARCH_RNG_BUF_SIZE 2048 - -static DEFINE_SPINLOCK(arch_rng_lock); -static u8 *arch_rng_buf; -static unsigned int arch_rng_buf_idx; - -static void arch_rng_refill_buffer(struct work_struct *); -static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer); - -bool s390_arch_random_generate(u8 *buf, unsigned int nbytes) -{ - /* max hunk is ARCH_RNG_BUF_SIZE */ - if (nbytes > ARCH_RNG_BUF_SIZE) - return false; - - /* lock rng buffer */ - if (!spin_trylock(&arch_rng_lock)) - return false; - - /* try to resolve the requested amount of bytes from the buffer */ - arch_rng_buf_idx -= nbytes; - if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) { - memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes); - atomic64_add(nbytes, &s390_arch_random_counter); - spin_unlock(&arch_rng_lock); - return true; - } - - /* not enough bytes in rng buffer, refill is done asynchronously */ - spin_unlock(&arch_rng_lock); - - return false; -} -EXPORT_SYMBOL(s390_arch_random_generate); - -static void arch_rng_refill_buffer(struct work_struct *unused) -{ - unsigned int delay = ARCH_REFILL_TICKS; - - spin_lock(&arch_rng_lock); - if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) { - /* buffer is exhausted and needs refill */ - u8 seed[ARCH_PRNG_SEED_SIZE]; - u8 prng_wa[240]; - /* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */ - cpacf_trng(NULL, 0, seed, sizeof(seed)); - /* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */ - memset(prng_wa, 0, sizeof(prng_wa)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, - &prng_wa, NULL, 0, seed, sizeof(seed)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, - &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0); - arch_rng_buf_idx = ARCH_RNG_BUF_SIZE; - } - delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE; - spin_unlock(&arch_rng_lock); - - /* kick next check */ - queue_delayed_work(system_long_wq, &arch_rng_work, delay); -} - -/* - * Here follows the implementation of s390_arch_get_random_long(). - * - * The random longs to be pulled by arch_get_random_long() are - * prepared in an 4K buffer which is filled from the NIST 800-90 - * compliant s390 drbg. By default the random long buffer is refilled - * 256 times before the drbg itself needs a reseed. The reseed of the - * drbg is done with 32 bytes fetched from the high quality (but slow) - * trng which is assumed to deliver 100% entropy. So the 32 * 8 = 256 - * bits of entropy are spread over 256 * 4KB = 1MB serving 131072 - * arch_get_random_long() invocations before reseeded. - * - * How often the 4K random long buffer is refilled with the drbg - * before the drbg is reseeded can be adjusted. There is a module - * parameter 's390_arch_rnd_long_drbg_reseed' accessible via - * /sys/module/arch_random/parameters/rndlong_drbg_reseed - * or as kernel command line parameter - * arch_random.rndlong_drbg_reseed=<value> - * This parameter tells how often the drbg fills the 4K buffer before - * it is re-seeded by fresh entropy from the trng. - * A value of 16 results in reseeding the drbg at every 16 * 4 KB = 64 - * KB with 32 bytes of fresh entropy pulled from the trng. So a value - * of 16 would result in 256 bits entropy per 64 KB. - * A value of 256 results in 1MB of drbg output before a reseed of the - * drbg is done. So this would spread the 256 bits of entropy among 1MB. - * Setting this parameter to 0 forces the reseed to take place every - * time the 4K buffer is depleted, so the entropy rises to 256 bits - * entropy per 4K or 0.5 bit entropy per arch_get_random_long(). With - * setting this parameter to negative values all this effort is - * disabled, arch_get_random long() returns false and thus indicating - * that the arch_get_random_long() feature is disabled at all. - */ - -static unsigned long rndlong_buf[512]; -static DEFINE_SPINLOCK(rndlong_lock); -static int rndlong_buf_index; - -static int rndlong_drbg_reseed = 256; -module_param_named(rndlong_drbg_reseed, rndlong_drbg_reseed, int, 0600); -MODULE_PARM_DESC(rndlong_drbg_reseed, "s390 arch_get_random_long() drbg reseed"); - -static inline void refill_rndlong_buf(void) -{ - static u8 prng_ws[240]; - static int drbg_counter; - - if (--drbg_counter < 0) { - /* need to re-seed the drbg */ - u8 seed[32]; - - /* fetch seed from trng */ - cpacf_trng(NULL, 0, seed, sizeof(seed)); - /* seed drbg */ - memset(prng_ws, 0, sizeof(prng_ws)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, - &prng_ws, NULL, 0, seed, sizeof(seed)); - /* re-init counter for drbg */ - drbg_counter = rndlong_drbg_reseed; - } - - /* fill the arch_get_random_long buffer from drbg */ - cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prng_ws, - (u8 *) rndlong_buf, sizeof(rndlong_buf), - NULL, 0); -} - -bool s390_arch_get_random_long(unsigned long *v) -{ - bool rc = false; - unsigned long flags; - - /* arch_get_random_long() disabled ? */ - if (rndlong_drbg_reseed < 0) - return false; - - /* try to lock the random long lock */ - if (!spin_trylock_irqsave(&rndlong_lock, flags)) - return false; - - if (--rndlong_buf_index >= 0) { - /* deliver next long value from the buffer */ - *v = rndlong_buf[rndlong_buf_index]; - rc = true; - goto out; - } - - /* buffer is depleted and needs refill */ - if (in_interrupt()) { - /* delay refill in interrupt context to next caller */ - rndlong_buf_index = 0; - goto out; - } - - /* refill random long buffer */ - refill_rndlong_buf(); - rndlong_buf_index = ARRAY_SIZE(rndlong_buf); - - /* and provide one random long */ - *v = rndlong_buf[--rndlong_buf_index]; - rc = true; - -out: - spin_unlock_irqrestore(&rndlong_lock, flags); - return rc; -} -EXPORT_SYMBOL(s390_arch_get_random_long); - -static int __init s390_arch_random_init(void) -{ - /* all the needed PRNO subfunctions available ? */ - if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) && - cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) { - - /* alloc arch random working buffer */ - arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL); - if (!arch_rng_buf) - return -ENOMEM; - - /* kick worker queue job to fill the random buffer */ - queue_delayed_work(system_long_wq, - &arch_rng_work, ARCH_REFILL_TICKS); - - /* enable arch random to the outside world */ - static_branch_enable(&s390_arch_random_available); - } - - return 0; -} -arch_initcall(s390_arch_random_init); diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index 5dc712fde3c7..2c6e1c6ecbe7 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -15,17 +15,13 @@ #include <linux/static_key.h> #include <linux/atomic.h> +#include <asm/cpacf.h> DECLARE_STATIC_KEY_FALSE(s390_arch_random_available); extern atomic64_t s390_arch_random_counter; -bool s390_arch_get_random_long(unsigned long *v); -bool s390_arch_random_generate(u8 *buf, unsigned int nbytes); - static inline bool __must_check arch_get_random_long(unsigned long *v) { - if (static_branch_likely(&s390_arch_random_available)) - return s390_arch_get_random_long(v); return false; } @@ -37,7 +33,9 @@ static inline bool __must_check arch_get_random_int(unsigned int *v) static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { if (static_branch_likely(&s390_arch_random_available)) { - return s390_arch_random_generate((u8 *)v, sizeof(*v)); + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; } return false; } @@ -45,7 +43,9 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v) static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { if (static_branch_likely(&s390_arch_random_available)) { - return s390_arch_random_generate((u8 *)v, sizeof(*v)); + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; } return false; } diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index d910d71b5bb5..7e9e99523e95 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -2,8 +2,6 @@ #ifndef _ASM_S390_NOSPEC_ASM_H #define _ASM_S390_NOSPEC_ASM_H -#include <asm/alternative-asm.h> -#include <asm/asm-offsets.h> #include <asm/dwarf.h> #ifdef __ASSEMBLY__ diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 54ae2dc65e3b..2f983e0b95e0 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -133,9 +133,9 @@ struct slibe { * @sb_count: number of storage blocks * @sba: storage block element addresses * @dcount: size of storage block elements - * @user0: user defineable value - * @res4: reserved paramater - * @user1: user defineable value + * @user0: user definable value + * @res4: reserved parameter + * @user1: user definable value */ struct qaob { u64 res0[6]; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index a2c1c55daec0..28124d0fa1d5 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -219,6 +219,11 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize, unsigned long src; int rc; + if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter))) + return -EINVAL; + /* Multi-segment iterators are not supported */ + if (iter->nr_segs > 1) + return -EINVAL; if (!csize) return 0; src = pfn_to_phys(pfn) + offset; @@ -228,7 +233,10 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize, rc = copy_oldmem_user(iter->iov->iov_base, src, csize); else rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize); - return rc; + if (rc < 0) + return rc; + iov_iter_advance(iter, csize); + return csize; } /* diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 483ab5e10164..f7dd3c849e68 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -516,6 +516,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) return err; } +/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different + * attribute::type values: + * - PERF_TYPE_HARDWARE: + * - pmu->type: + * Handle both type of invocations identical. They address the same hardware. + * The result is different when event modifiers exclude_kernel and/or + * exclude_user are also set. + */ +static int cpumf_pmu_event_type(struct perf_event *event) +{ + u64 ev = event->attr.config; + + if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev) + return PERF_TYPE_HARDWARE; + return PERF_TYPE_RAW; +} + static int cpumf_pmu_event_init(struct perf_event *event) { unsigned int type = event->attr.type; @@ -525,7 +545,7 @@ static int cpumf_pmu_event_init(struct perf_event *event) err = __hw_perf_event_init(event, type); else if (event->pmu->type == type) /* Registered as unknown PMU */ - err = __hw_perf_event_init(event, PERF_TYPE_RAW); + err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); else return -ENOENT; diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 8c1545946d85..b38b4ae01589 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -193,8 +193,9 @@ static int paicrypt_event_init(struct perf_event *event) /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */ if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) return -ENOENT; - /* PAI crypto event must be valid */ - if (a->config > PAI_CRYPTO_BASE + paicrypt_cnt) + /* PAI crypto event must be in valid range */ + if (a->config < PAI_CRYPTO_BASE || + a->config > PAI_CRYPTO_BASE + paicrypt_cnt) return -EINVAL; /* Allow only CPU wide operation, no process context for now. */ if (event->hw.target || event->cpu == -1) @@ -208,6 +209,12 @@ static int paicrypt_event_init(struct perf_event *event) if (rc) return rc; + /* Event initialization sets last_tag to 0. When later on the events + * are deleted and re-added, do not reset the event count value to zero. + * Events are added, deleted and re-added when 2 or more events + * are active at the same time. + */ + event->hw.last_tag = 0; cpump->event = event; event->destroy = paicrypt_event_destroy; @@ -242,9 +249,12 @@ static void paicrypt_start(struct perf_event *event, int flags) { u64 sum; - sum = paicrypt_getall(event); /* Get current value */ - local64_set(&event->hw.prev_count, sum); - local64_set(&event->count, 0); + if (!event->hw.last_tag) { + event->hw.last_tag = 1; + sum = paicrypt_getall(event); /* Get current value */ + local64_set(&event->count, 0); + local64_set(&event->hw.prev_count, sum); + } } static int paicrypt_add(struct perf_event *event, int flags) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 8d91eccc0963..0a37f5de2863 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -875,6 +875,11 @@ static void __init setup_randomness(void) if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); memblock_free(vmms, PAGE_SIZE); + +#ifdef CONFIG_ARCH_RANDOM + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) + static_branch_enable(&s390_arch_random_available); +#endif } /* diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 5d415b3db6d1..580d2e3265cb 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -7,7 +7,6 @@ lib-y += delay.o string.o uaccess.o find.o spinlock.o obj-y += mem.o xor.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o -obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o obj-$(CONFIG_S390_KPROBES_SANITY_TEST) += test_kprobes_s390.o test_kprobes_s390-objs += test_kprobes_asm.o test_kprobes.o @@ -22,3 +21,5 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o + +obj-$(CONFIG_EXPOLINE_EXTERN) += expoline/ diff --git a/arch/s390/lib/expoline/Makefile b/arch/s390/lib/expoline/Makefile new file mode 100644 index 000000000000..854631d9cb03 --- /dev/null +++ b/arch/s390/lib/expoline/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y += expoline.o diff --git a/arch/s390/lib/expoline.S b/arch/s390/lib/expoline/expoline.S index 92ed8409a7a4..92ed8409a7a4 100644 --- a/arch/s390/lib/expoline.S +++ b/arch/s390/lib/expoline/expoline.S diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 360ada80d20c..d237bc6841cb 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -48,7 +48,6 @@ OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*' $(obj)/purgatory.ro: $(obj)/purgatory $(obj)/purgatory.chk FORCE $(call if_changed,objcopy) -$(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE - $(call if_changed_rule,as_o_S) +$(obj)/kexec-purgatory.o: $(obj)/purgatory.ro -obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += kexec-purgatory.o +obj-y += kexec-purgatory.o diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index cf9a3ec32406..fba90e670ed4 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -271,8 +271,12 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, #endif /* CONFIG_HAVE_IOREMAP_PROT */ #else /* CONFIG_MMU */ -#define iounmap(addr) do { } while (0) -#define ioremap(offset, size) ((void __iomem *)(unsigned long)(offset)) +static inline void __iomem *ioremap(phys_addr_t offset, size_t size) +{ + return (void __iomem *)(unsigned long)offset; +} + +static inline void iounmap(volatile void __iomem *addr) { } #endif /* CONFIG_MMU */ #define ioremap_uc ioremap diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index 95af12e82a32..cdbd9653aa14 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -102,8 +102,8 @@ extern unsigned long uml_physmem; * casting is the right thing, but 32-bit UML can't have 64-bit virtual * addresses */ -#define __pa(virt) to_phys((void *) (unsigned long) (virt)) -#define __va(phys) to_virt((unsigned long) (phys)) +#define __pa(virt) uml_to_phys((void *) (unsigned long) (virt)) +#define __va(phys) uml_to_virt((unsigned long) (phys)) #define phys_to_pfn(p) ((p) >> PAGE_SHIFT) #define pfn_to_phys(pfn) PFN_PHYS(pfn) diff --git a/arch/um/include/shared/mem.h b/arch/um/include/shared/mem.h index 4862c91d4213..98aacd544108 100644 --- a/arch/um/include/shared/mem.h +++ b/arch/um/include/shared/mem.h @@ -9,12 +9,12 @@ extern int phys_mapping(unsigned long phys, unsigned long long *offset_out); extern unsigned long uml_physmem; -static inline unsigned long to_phys(void *virt) +static inline unsigned long uml_to_phys(void *virt) { return(((unsigned long) virt) - uml_physmem); } -static inline void *to_virt(unsigned long phys) +static inline void *uml_to_virt(unsigned long phys) { return((void *) uml_physmem + phys); } diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 0760e24f2eba..9838967d0b2f 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -432,6 +432,10 @@ void apply_retpolines(s32 *start, s32 *end) { } +void apply_returns(s32 *start, s32 *end) +{ +} + void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 87d3129e7362..c316c993a949 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -251,7 +251,7 @@ static int userspace_tramp(void *stack) signal(SIGTERM, SIG_DFL); signal(SIGWINCH, SIG_IGN); - fd = phys_mapping(to_phys(__syscall_stub_start), &offset); + fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset); addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); if (addr == MAP_FAILED) { @@ -261,7 +261,7 @@ static int userspace_tramp(void *stack) } if (stack != NULL) { - fd = phys_mapping(to_phys(stack), &offset); + fd = phys_mapping(uml_to_phys(stack), &offset); addr = mmap((void *) STUB_DATA, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, offset); @@ -534,7 +534,7 @@ int copy_context_skas0(unsigned long new_stack, int pid) struct stub_data *data = (struct stub_data *) current_stack; struct stub_data *child_data = (struct stub_data *) new_stack; unsigned long long new_offset; - int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset); + int new_fd = phys_mapping(uml_to_phys((void *)new_stack), &new_offset); /* * prepare offset and fd of child's stack as argument for parent's diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index be0b95e51df6..e58798f636d4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -462,29 +462,6 @@ config GOLDFISH def_bool y depends on X86_GOLDFISH -config RETPOLINE - bool "Avoid speculative indirect branches in kernel" - select OBJTOOL if HAVE_OBJTOOL - default y - help - Compile kernel with the retpoline compiler options to guard against - kernel-to-user data leaks by avoiding speculative indirect - branches. Requires a compiler with -mindirect-branch=thunk-extern - support for full protection. The kernel may run slower. - -config CC_HAS_SLS - def_bool $(cc-option,-mharden-sls=all) - -config SLS - bool "Mitigate Straight-Line-Speculation" - depends on CC_HAS_SLS && X86_64 - select OBJTOOL if HAVE_OBJTOOL - default n - help - Compile the kernel with straight-line-speculation options to guard - against straight line speculation. The kernel image might be slightly - larger. - config X86_CPU_RESCTRL bool "x86 CPU resource control support" depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) @@ -2453,6 +2430,91 @@ source "kernel/livepatch/Kconfig" endmenu +config CC_HAS_SLS + def_bool $(cc-option,-mharden-sls=all) + +config CC_HAS_RETURN_THUNK + def_bool $(cc-option,-mfunction-return=thunk-extern) + +menuconfig SPECULATION_MITIGATIONS + bool "Mitigations for speculative execution vulnerabilities" + default y + help + Say Y here to enable options which enable mitigations for + speculative execution hardware vulnerabilities. + + If you say N, all mitigations will be disabled. You really + should know what you are doing to say so. + +if SPECULATION_MITIGATIONS + +config PAGE_TABLE_ISOLATION + bool "Remove the kernel mapping in user mode" + default y + depends on (X86_64 || X86_PAE) + help + This feature reduces the number of hardware side channels by + ensuring that the majority of kernel addresses are not mapped + into userspace. + + See Documentation/x86/pti.rst for more details. + +config RETPOLINE + bool "Avoid speculative indirect branches in kernel" + select OBJTOOL if HAVE_OBJTOOL + default y + help + Compile kernel with the retpoline compiler options to guard against + kernel-to-user data leaks by avoiding speculative indirect + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + +config RETHUNK + bool "Enable return-thunks" + depends on RETPOLINE && CC_HAS_RETURN_THUNK + select OBJTOOL if HAVE_OBJTOOL + default y + help + Compile the kernel with the return-thunks compiler option to guard + against kernel-to-user data leaks by avoiding return speculation. + Requires a compiler with -mfunction-return=thunk-extern + support for full protection. The kernel may run slower. + +config CPU_UNRET_ENTRY + bool "Enable UNRET on kernel entry" + depends on CPU_SUP_AMD && RETHUNK + default y + help + Compile the kernel with support for the retbleed=unret mitigation. + +config CPU_IBPB_ENTRY + bool "Enable IBPB on kernel entry" + depends on CPU_SUP_AMD + default y + help + Compile the kernel with support for the retbleed=ibpb mitigation. + +config CPU_IBRS_ENTRY + bool "Enable IBRS on kernel entry" + depends on CPU_SUP_INTEL + default y + help + Compile the kernel with support for the spectre_v2=ibrs mitigation. + This mitigates both spectre_v2 and retbleed at great cost to + performance. + +config SLS + bool "Mitigate Straight-Line-Speculation" + depends on CC_HAS_SLS && X86_64 + select OBJTOOL if HAVE_OBJTOOL + default n + help + Compile the kernel with straight-line-speculation options to guard + against straight line speculation. The kernel image might be slightly + larger. + +endif + config ARCH_HAS_ADD_PAGES def_bool y depends on ARCH_ENABLE_MEMORY_HOTPLUG diff --git a/arch/x86/Makefile b/arch/x86/Makefile index a74886aed349..1f40dad30d50 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -21,6 +21,12 @@ ifdef CONFIG_CC_IS_CLANG RETPOLINE_CFLAGS := -mretpoline-external-thunk RETPOLINE_VDSO_CFLAGS := -mretpoline endif + +ifdef CONFIG_RETHUNK +RETHUNK_CFLAGS := -mfunction-return=thunk-extern +RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) +endif + export RETPOLINE_CFLAGS export RETPOLINE_VDSO_CFLAGS diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index 44c350d627c7..d4a314cc50d6 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -110,6 +110,7 @@ void kernel_add_identity_map(unsigned long start, unsigned long end) void initialize_identity_maps(void *rmode) { unsigned long cmdline; + struct setup_data *sd; /* Exclude the encryption mask from __PHYSICAL_MASK */ physical_mask &= ~sme_me_mask; @@ -163,6 +164,18 @@ void initialize_identity_maps(void *rmode) cmdline = get_cmd_line_ptr(); kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); + /* + * Also map the setup_data entries passed via boot_params in case they + * need to be accessed by uncompressed kernel via the identity mapping. + */ + sd = (struct setup_data *)boot_params->hdr.setup_data; + while (sd) { + unsigned long sd_addr = (unsigned long)sd; + + kernel_add_identity_map(sd_addr, sd_addr + sizeof(*sd) + sd->len); + sd = (struct setup_data *)sd->next; + } + sev_prep_identity_maps(top_level_pgt); /* Load the new page-table. */ diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 03deb4d6920d..928dcf7a20d9 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -124,6 +124,51 @@ static u64 get_cc_mask(void) return BIT_ULL(gpa_width - 1); } +/* + * The TDX module spec states that #VE may be injected for a limited set of + * reasons: + * + * - Emulation of the architectural #VE injection on EPT violation; + * + * - As a result of guest TD execution of a disallowed instruction, + * a disallowed MSR access, or CPUID virtualization; + * + * - A notification to the guest TD about anomalous behavior; + * + * The last one is opt-in and is not used by the kernel. + * + * The Intel Software Developer's Manual describes cases when instruction + * length field can be used in section "Information for VM Exits Due to + * Instruction Execution". + * + * For TDX, it ultimately means GET_VEINFO provides reliable instruction length + * information if #VE occurred due to instruction execution, but not for EPT + * violations. + */ +static int ve_instr_len(struct ve_info *ve) +{ + switch (ve->exit_reason) { + case EXIT_REASON_HLT: + case EXIT_REASON_MSR_READ: + case EXIT_REASON_MSR_WRITE: + case EXIT_REASON_CPUID: + case EXIT_REASON_IO_INSTRUCTION: + /* It is safe to use ve->instr_len for #VE due instructions */ + return ve->instr_len; + case EXIT_REASON_EPT_VIOLATION: + /* + * For EPT violations, ve->insn_len is not defined. For those, + * the kernel must decode instructions manually and should not + * be using this function. + */ + WARN_ONCE(1, "ve->instr_len is not defined for EPT violations"); + return 0; + default: + WARN_ONCE(1, "Unexpected #VE-type: %lld\n", ve->exit_reason); + return ve->instr_len; + } +} + static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti) { struct tdx_hypercall_args args = { @@ -147,7 +192,7 @@ static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti) return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0); } -static bool handle_halt(void) +static int handle_halt(struct ve_info *ve) { /* * Since non safe halt is mainly used in CPU offlining @@ -158,9 +203,9 @@ static bool handle_halt(void) const bool do_sti = false; if (__halt(irq_disabled, do_sti)) - return false; + return -EIO; - return true; + return ve_instr_len(ve); } void __cpuidle tdx_safe_halt(void) @@ -180,7 +225,7 @@ void __cpuidle tdx_safe_halt(void) WARN_ONCE(1, "HLT instruction emulation failed\n"); } -static bool read_msr(struct pt_regs *regs) +static int read_msr(struct pt_regs *regs, struct ve_info *ve) { struct tdx_hypercall_args args = { .r10 = TDX_HYPERCALL_STANDARD, @@ -194,14 +239,14 @@ static bool read_msr(struct pt_regs *regs) * (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>". */ if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) - return false; + return -EIO; regs->ax = lower_32_bits(args.r11); regs->dx = upper_32_bits(args.r11); - return true; + return ve_instr_len(ve); } -static bool write_msr(struct pt_regs *regs) +static int write_msr(struct pt_regs *regs, struct ve_info *ve) { struct tdx_hypercall_args args = { .r10 = TDX_HYPERCALL_STANDARD, @@ -215,10 +260,13 @@ static bool write_msr(struct pt_regs *regs) * can be found in TDX Guest-Host-Communication Interface * (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>". */ - return !__tdx_hypercall(&args, 0); + if (__tdx_hypercall(&args, 0)) + return -EIO; + + return ve_instr_len(ve); } -static bool handle_cpuid(struct pt_regs *regs) +static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve) { struct tdx_hypercall_args args = { .r10 = TDX_HYPERCALL_STANDARD, @@ -236,7 +284,7 @@ static bool handle_cpuid(struct pt_regs *regs) */ if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) { regs->ax = regs->bx = regs->cx = regs->dx = 0; - return true; + return ve_instr_len(ve); } /* @@ -245,7 +293,7 @@ static bool handle_cpuid(struct pt_regs *regs) * (GHCI), section titled "VP.VMCALL<Instruction.CPUID>". */ if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) - return false; + return -EIO; /* * As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of @@ -257,7 +305,7 @@ static bool handle_cpuid(struct pt_regs *regs) regs->cx = args.r14; regs->dx = args.r15; - return true; + return ve_instr_len(ve); } static bool mmio_read(int size, unsigned long addr, unsigned long *val) @@ -283,10 +331,10 @@ static bool mmio_write(int size, unsigned long addr, unsigned long val) EPT_WRITE, addr, val); } -static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve) +static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) { + unsigned long *reg, val, vaddr; char buffer[MAX_INSN_SIZE]; - unsigned long *reg, val; struct insn insn = {}; enum mmio_type mmio; int size, extend_size; @@ -294,34 +342,49 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve) /* Only in-kernel MMIO is supported */ if (WARN_ON_ONCE(user_mode(regs))) - return false; + return -EFAULT; if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE)) - return false; + return -EFAULT; if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64)) - return false; + return -EINVAL; mmio = insn_decode_mmio(&insn, &size); if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED)) - return false; + return -EINVAL; if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) { reg = insn_get_modrm_reg_ptr(&insn, regs); if (!reg) - return false; + return -EINVAL; } - ve->instr_len = insn.length; + /* + * Reject EPT violation #VEs that split pages. + * + * MMIO accesses are supposed to be naturally aligned and therefore + * never cross page boundaries. Seeing split page accesses indicates + * a bug or a load_unaligned_zeropad() that stepped into an MMIO page. + * + * load_unaligned_zeropad() will recover using exception fixups. + */ + vaddr = (unsigned long)insn_get_addr_ref(&insn, regs); + if (vaddr / PAGE_SIZE != (vaddr + size - 1) / PAGE_SIZE) + return -EFAULT; /* Handle writes first */ switch (mmio) { case MMIO_WRITE: memcpy(&val, reg, size); - return mmio_write(size, ve->gpa, val); + if (!mmio_write(size, ve->gpa, val)) + return -EIO; + return insn.length; case MMIO_WRITE_IMM: val = insn.immediate.value; - return mmio_write(size, ve->gpa, val); + if (!mmio_write(size, ve->gpa, val)) + return -EIO; + return insn.length; case MMIO_READ: case MMIO_READ_ZERO_EXTEND: case MMIO_READ_SIGN_EXTEND: @@ -334,15 +397,15 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve) * decoded or handled properly. It was likely not using io.h * helpers or accessed MMIO accidentally. */ - return false; + return -EINVAL; default: WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?"); - return false; + return -EINVAL; } /* Handle reads */ if (!mmio_read(size, ve->gpa, &val)) - return false; + return -EIO; switch (mmio) { case MMIO_READ: @@ -364,13 +427,13 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve) default: /* All other cases has to be covered with the first switch() */ WARN_ON_ONCE(1); - return false; + return -EINVAL; } if (extend_size) memset(reg, extend_val, extend_size); memcpy(reg, &val, size); - return true; + return insn.length; } static bool handle_in(struct pt_regs *regs, int size, int port) @@ -421,13 +484,14 @@ static bool handle_out(struct pt_regs *regs, int size, int port) * * Return True on success or False on failure. */ -static bool handle_io(struct pt_regs *regs, u32 exit_qual) +static int handle_io(struct pt_regs *regs, struct ve_info *ve) { + u32 exit_qual = ve->exit_qual; int size, port; - bool in; + bool in, ret; if (VE_IS_IO_STRING(exit_qual)) - return false; + return -EIO; in = VE_IS_IO_IN(exit_qual); size = VE_GET_IO_SIZE(exit_qual); @@ -435,9 +499,13 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual) if (in) - return handle_in(regs, size, port); + ret = handle_in(regs, size, port); else - return handle_out(regs, size, port); + ret = handle_out(regs, size, port); + if (!ret) + return -EIO; + + return ve_instr_len(ve); } /* @@ -447,13 +515,19 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual) __init bool tdx_early_handle_ve(struct pt_regs *regs) { struct ve_info ve; + int insn_len; tdx_get_ve_info(&ve); if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION) return false; - return handle_io(regs, ve.exit_qual); + insn_len = handle_io(regs, &ve); + if (insn_len < 0) + return false; + + regs->ip += insn_len; + return true; } void tdx_get_ve_info(struct ve_info *ve) @@ -486,54 +560,65 @@ void tdx_get_ve_info(struct ve_info *ve) ve->instr_info = upper_32_bits(out.r10); } -/* Handle the user initiated #VE */ -static bool virt_exception_user(struct pt_regs *regs, struct ve_info *ve) +/* + * Handle the user initiated #VE. + * + * On success, returns the number of bytes RIP should be incremented (>=0) + * or -errno on error. + */ +static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve) { switch (ve->exit_reason) { case EXIT_REASON_CPUID: - return handle_cpuid(regs); + return handle_cpuid(regs, ve); default: pr_warn("Unexpected #VE: %lld\n", ve->exit_reason); - return false; + return -EIO; } } -/* Handle the kernel #VE */ -static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve) +/* + * Handle the kernel #VE. + * + * On success, returns the number of bytes RIP should be incremented (>=0) + * or -errno on error. + */ +static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve) { switch (ve->exit_reason) { case EXIT_REASON_HLT: - return handle_halt(); + return handle_halt(ve); case EXIT_REASON_MSR_READ: - return read_msr(regs); + return read_msr(regs, ve); case EXIT_REASON_MSR_WRITE: - return write_msr(regs); + return write_msr(regs, ve); case EXIT_REASON_CPUID: - return handle_cpuid(regs); + return handle_cpuid(regs, ve); case EXIT_REASON_EPT_VIOLATION: return handle_mmio(regs, ve); case EXIT_REASON_IO_INSTRUCTION: - return handle_io(regs, ve->exit_qual); + return handle_io(regs, ve); default: pr_warn("Unexpected #VE: %lld\n", ve->exit_reason); - return false; + return -EIO; } } bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve) { - bool ret; + int insn_len; if (user_mode(regs)) - ret = virt_exception_user(regs, ve); + insn_len = virt_exception_user(regs, ve); else - ret = virt_exception_kernel(regs, ve); + insn_len = virt_exception_kernel(regs, ve); + if (insn_len < 0) + return false; /* After successful #VE handling, move the IP */ - if (ret) - regs->ip += ve->instr_len; + regs->ip += insn_len; - return ret; + return true; } static bool tdx_tlb_flush_required(bool private) diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 7fec5dcf6438..eeadbd7d92cc 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -11,7 +11,7 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) CFLAGS_common.o += -fno-stack-protector -obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o +obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += common.o obj-y += vdso/ diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 29b36e9e4e74..f6907627172b 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -7,6 +7,8 @@ #include <asm/asm-offsets.h> #include <asm/processor-flags.h> #include <asm/ptrace-abi.h> +#include <asm/msr.h> +#include <asm/nospec-branch.h> /* @@ -283,6 +285,66 @@ For 32-bit we have the following conventions - kernel is built with #endif /* + * IBRS kernel mitigation for Spectre_v2. + * + * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers + * the regs it uses (AX, CX, DX). Must be called before the first RET + * instruction (NOTE! UNTRAIN_RET includes a RET instruction) + * + * The optional argument is used to save/restore the current value, + * which is used on the paranoid paths. + * + * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. + */ +.macro IBRS_ENTER save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + rdmsr + shl $32, %rdx + or %rdx, %rax + mov %rax, \save_reg + test $SPEC_CTRL_IBRS, %eax + jz .Ldo_wrmsr_\@ + lfence + jmp .Lend_\@ +.Ldo_wrmsr_\@: +.endif + + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +#endif +.endm + +/* + * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) + * regs. Must be called after the last RET. + */ +.macro IBRS_EXIT save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + mov \save_reg, %rdx +.else + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + andl $(~SPEC_CTRL_IBRS), %edx +.endif + + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +#endif +.endm + +/* * Mitigate Spectre v1 for conditional swapgs code paths. * * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S new file mode 100644 index 000000000000..bfb7bcb362bc --- /dev/null +++ b/arch/x86/entry/entry.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common place for both 32- and 64-bit entry routines. + */ + +#include <linux/linkage.h> +#include <asm/export.h> +#include <asm/msr-index.h> + +.pushsection .noinstr.text, "ax" + +SYM_FUNC_START(entry_ibpb) + movl $MSR_IA32_PRED_CMD, %ecx + movl $PRED_CMD_IBPB, %eax + xorl %edx, %edx + wrmsr + RET +SYM_FUNC_END(entry_ibpb) +/* For KVM */ +EXPORT_SYMBOL_GPL(entry_ibpb); + +.popsection diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 887420844066..e309e7156038 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -698,7 +698,6 @@ SYM_CODE_START(__switch_to_asm) movl %ebx, PER_CPU_VAR(__stack_chk_guard) #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -707,7 +706,6 @@ SYM_CODE_START(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* Restore flags or the incoming task to restore AC state. */ popfl diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4300ba49b5ee..9953d966d124 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -85,7 +85,7 @@ */ SYM_CODE_START(entry_SYSCALL_64) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR swapgs @@ -112,6 +112,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) movq %rsp, %rdi /* Sign extend the lower 32bit as syscall numbers are treated as int */ movslq %eax, %rsi + + /* clobbers %rax, make sure it is after saving the syscall nr */ + IBRS_ENTER + UNTRAIN_RET + call do_syscall_64 /* returns with IRQs disabled */ /* @@ -191,6 +196,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: + IBRS_EXIT POP_REGS pop_rdi=0 /* @@ -249,7 +255,6 @@ SYM_FUNC_START(__switch_to_asm) movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -258,7 +263,6 @@ SYM_FUNC_START(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* restore callee-saved registers */ popq %r15 @@ -322,13 +326,13 @@ SYM_CODE_END(ret_from_fork) #endif .endm -/* Save all registers in pt_regs */ -SYM_CODE_START_LOCAL(push_and_clear_regs) +SYM_CODE_START_LOCAL(xen_error_entry) UNWIND_HINT_FUNC PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 + UNTRAIN_RET RET -SYM_CODE_END(push_and_clear_regs) +SYM_CODE_END(xen_error_entry) /** * idtentry_body - Macro to emit code calling the C function @@ -337,9 +341,6 @@ SYM_CODE_END(push_and_clear_regs) */ .macro idtentry_body cfunc has_error_code:req - call push_and_clear_regs - UNWIND_HINT_REGS - /* * Call error_entry() and switch to the task stack if from userspace. * @@ -349,7 +350,7 @@ SYM_CODE_END(push_and_clear_regs) * switch the CR3. So it can skip invoking error_entry(). */ ALTERNATIVE "call error_entry; movq %rax, %rsp", \ - "", X86_FEATURE_XENPV + "call xen_error_entry", X86_FEATURE_XENPV ENCODE_FRAME_POINTER UNWIND_HINT_REGS @@ -612,6 +613,7 @@ __irqentry_text_end: SYM_CODE_START_LOCAL(common_interrupt_return) SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) + IBRS_EXIT #ifdef CONFIG_DEBUG_ENTRY /* Assert that pt_regs indicates user mode. */ testb $3, CS(%rsp) @@ -897,6 +899,9 @@ SYM_CODE_END(xen_failsafe_callback) * 1 -> no SWAPGS on exit * * Y GSBASE value at entry, must be restored in paranoid_exit + * + * R14 - old CR3 + * R15 - old SPEC_CTRL */ SYM_CODE_START_LOCAL(paranoid_entry) UNWIND_HINT_FUNC @@ -940,7 +945,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) * is needed here. */ SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx - RET + jmp .Lparanoid_gsbase_done .Lparanoid_entry_checkgs: /* EBX = 1 -> kernel GSBASE active, no restore required */ @@ -959,8 +964,16 @@ SYM_CODE_START_LOCAL(paranoid_entry) xorl %ebx, %ebx swapgs .Lparanoid_kernel_gsbase: - FENCE_SWAPGS_KERNEL_ENTRY +.Lparanoid_gsbase_done: + + /* + * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like + * CR3 above, keep the old value in a callee saved register. + */ + IBRS_ENTER save_reg=%r15 + UNTRAIN_RET + RET SYM_CODE_END(paranoid_entry) @@ -982,9 +995,19 @@ SYM_CODE_END(paranoid_entry) * 1 -> no SWAPGS on exit * * Y User space GSBASE, must be restored unconditionally + * + * R14 - old CR3 + * R15 - old SPEC_CTRL */ SYM_CODE_START_LOCAL(paranoid_exit) UNWIND_HINT_REGS + + /* + * Must restore IBRS state before both CR3 and %GS since we need access + * to the per-CPU x86_spec_ctrl_shadow variable. + */ + IBRS_EXIT save_reg=%r15 + /* * The order of operations is important. RESTORE_CR3 requires * kernel GSBASE. @@ -1017,6 +1040,10 @@ SYM_CODE_END(paranoid_exit) */ SYM_CODE_START_LOCAL(error_entry) UNWIND_HINT_FUNC + + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -1028,9 +1055,12 @@ SYM_CODE_START_LOCAL(error_entry) FENCE_SWAPGS_USER_ENTRY /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER + UNTRAIN_RET leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ .Lerror_entry_from_usermode_after_swapgs: + /* Put us onto the real thread stack. */ call sync_regs RET @@ -1065,6 +1095,7 @@ SYM_CODE_START_LOCAL(error_entry) .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY leaq 8(%rsp), %rax /* return pt_regs pointer */ + ANNOTATE_UNRET_END RET .Lbstep_iret: @@ -1080,6 +1111,8 @@ SYM_CODE_START_LOCAL(error_entry) swapgs FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER + UNTRAIN_RET /* * Pretend that the exception came from user mode: set up pt_regs @@ -1185,6 +1218,9 @@ SYM_CODE_START(asm_exc_nmi) PUSH_AND_CLEAR_REGS rdx=(%rdx) ENCODE_FRAME_POINTER + IBRS_ENTER + UNTRAIN_RET + /* * At this point we no longer need to worry about stack damage * due to nesting -- we're on the normal thread stack and we're @@ -1409,6 +1445,9 @@ end_repeat_nmi: movq $-1, %rsi call exc_nmi + /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ + IBRS_EXIT save_reg=%r15 + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index d1052742ad0c..682338e7e2a3 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -4,7 +4,6 @@ * * Copyright 2000-2002 Andi Kleen, SuSE Labs. */ -#include "calling.h" #include <asm/asm-offsets.h> #include <asm/current.h> #include <asm/errno.h> @@ -14,9 +13,12 @@ #include <asm/irqflags.h> #include <asm/asm.h> #include <asm/smap.h> +#include <asm/nospec-branch.h> #include <linux/linkage.h> #include <linux/err.h> +#include "calling.h" + .section .entry.text, "ax" /* @@ -47,7 +49,7 @@ * 0(%ebp) arg6 */ SYM_CODE_START(entry_SYSENTER_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR /* Interrupts are off on entry. */ swapgs @@ -88,6 +90,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) cld + IBRS_ENTER + UNTRAIN_RET + /* * SYSENTER doesn't filter flags, so we need to clear NT and AC * ourselves. To save a few cycles, we can check whether @@ -174,7 +179,7 @@ SYM_CODE_END(entry_SYSENTER_compat) * 0(%esp) arg6 */ SYM_CODE_START(entry_SYSCALL_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR /* Interrupts are off on entry. */ swapgs @@ -203,6 +208,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS UNWIND_HINT_REGS + IBRS_ENTER + UNTRAIN_RET + movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ @@ -217,6 +225,8 @@ sysret32_from_system_call: */ STACKLEAK_ERASE + IBRS_EXIT + movq RBX(%rsp), %rbx /* pt_regs->rbx */ movq RBP(%rsp), %rbp /* pt_regs->rbp */ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ @@ -295,7 +305,7 @@ SYM_CODE_END(entry_SYSCALL_compat) * ebp arg6 */ SYM_CODE_START(entry_INT80_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR /* * Interrupts are off on entry. @@ -337,6 +347,9 @@ SYM_CODE_START(entry_INT80_compat) cld + IBRS_ENTER + UNTRAIN_RET + movq %rsp, %rdi call do_int80_syscall_32 jmp swapgs_restore_regs_and_return_to_usermode diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index c2a8b76ae0bc..76cd790ed0bd 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -92,6 +92,7 @@ endif endif $(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S index 15e35159ebb6..ef2dd1827243 100644 --- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S +++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S @@ -19,17 +19,20 @@ __vsyscall_page: mov $__NR_gettimeofday, %rax syscall - RET + ret + int3 .balign 1024, 0xcc mov $__NR_time, %rax syscall - RET + ret + int3 .balign 1024, 0xcc mov $__NR_getcpu, %rax syscall - RET + ret + int3 .balign 4096, 0xcc diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 8b392b6b7b93..3de6d8b53367 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -13,6 +13,7 @@ #include <linux/io.h> #include <asm/apic.h> #include <asm/desc.h> +#include <asm/sev.h> #include <asm/hypervisor.h> #include <asm/hyperv-tlfs.h> #include <asm/mshyperv.h> @@ -405,6 +406,11 @@ void __init hyperv_init(void) } if (hv_isolation_type_snp()) { + /* Negotiate GHCB Version. */ + if (!hv_ghcb_negotiate_protocol()) + hv_ghcb_terminate(SEV_TERM_SET_GEN, + GHCB_SEV_ES_PROT_UNSUPPORTED); + hv_ghcb_pg = alloc_percpu(union hv_ghcb *); if (!hv_ghcb_pg) goto free_vp_assist_page; diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 2b994117581e..1dbcbd9da74d 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -53,6 +53,8 @@ union hv_ghcb { } hypercall; } __packed __aligned(HV_HYP_PAGE_SIZE); +static u16 hv_ghcb_version __ro_after_init; + u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) { union hv_ghcb *hv_ghcb; @@ -96,12 +98,85 @@ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) return status; } +static inline u64 rd_ghcb_msr(void) +{ + return __rdmsr(MSR_AMD64_SEV_ES_GHCB); +} + +static inline void wr_ghcb_msr(u64 val) +{ + native_wrmsrl(MSR_AMD64_SEV_ES_GHCB, val); +} + +static enum es_result hv_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code, + u64 exit_info_1, u64 exit_info_2) +{ + /* Fill in protocol and format specifiers */ + ghcb->protocol_version = hv_ghcb_version; + ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; + + ghcb_set_sw_exit_code(ghcb, exit_code); + ghcb_set_sw_exit_info_1(ghcb, exit_info_1); + ghcb_set_sw_exit_info_2(ghcb, exit_info_2); + + VMGEXIT(); + + if (ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0)) + return ES_VMM_ERROR; + else + return ES_OK; +} + +void hv_ghcb_terminate(unsigned int set, unsigned int reason) +{ + u64 val = GHCB_MSR_TERM_REQ; + + /* Tell the hypervisor what went wrong. */ + val |= GHCB_SEV_TERM_REASON(set, reason); + + /* Request Guest Termination from Hypvervisor */ + wr_ghcb_msr(val); + VMGEXIT(); + + while (true) + asm volatile("hlt\n" : : : "memory"); +} + +bool hv_ghcb_negotiate_protocol(void) +{ + u64 ghcb_gpa; + u64 val; + + /* Save ghcb page gpa. */ + ghcb_gpa = rd_ghcb_msr(); + + /* Do the GHCB protocol version negotiation */ + wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); + VMGEXIT(); + val = rd_ghcb_msr(); + + if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) + return false; + + if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN || + GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) + return false; + + hv_ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), + GHCB_PROTOCOL_MAX); + + /* Write ghcb page back after negotiating protocol. */ + wr_ghcb_msr(ghcb_gpa); + VMGEXIT(); + + return true; +} + void hv_ghcb_msr_write(u64 msr, u64 value) { union hv_ghcb *hv_ghcb; void **ghcb_base; unsigned long flags; - struct es_em_ctxt ctxt; if (!hv_ghcb_pg) return; @@ -120,8 +195,7 @@ void hv_ghcb_msr_write(u64 msr, u64 value) ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value)); ghcb_set_rdx(&hv_ghcb->ghcb, upper_32_bits(value)); - if (sev_es_ghcb_hv_call(&hv_ghcb->ghcb, false, &ctxt, - SVM_EXIT_MSR, 1, 0)) + if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 1, 0)) pr_warn("Fail to write msr via ghcb %llx.\n", msr); local_irq_restore(flags); @@ -133,7 +207,6 @@ void hv_ghcb_msr_read(u64 msr, u64 *value) union hv_ghcb *hv_ghcb; void **ghcb_base; unsigned long flags; - struct es_em_ctxt ctxt; /* Check size of union hv_ghcb here. */ BUILD_BUG_ON(sizeof(union hv_ghcb) != HV_HYP_PAGE_SIZE); @@ -152,8 +225,7 @@ void hv_ghcb_msr_read(u64 msr, u64 *value) } ghcb_set_rcx(&hv_ghcb->ghcb, msr); - if (sev_es_ghcb_hv_call(&hv_ghcb->ghcb, false, &ctxt, - SVM_EXIT_MSR, 0, 0)) + if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 0, 0)) pr_warn("Fail to read msr via ghcb %llx.\n", msr); else *value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 9b10c8c76087..9542c582d546 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -76,6 +76,7 @@ extern int alternatives_patched; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_returns(s32 *start, s32 *end); extern void apply_ibt_endbr(s32 *start, s32 *end); struct module; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 393f2bbb5e3a..00f5227c8459 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -203,8 +203,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ @@ -296,6 +296,12 @@ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -316,6 +322,7 @@ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ #define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ +#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ #define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ @@ -446,5 +453,7 @@ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 36369e76cc63..33d2cd04d254 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -50,6 +50,25 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif +#ifdef CONFIG_RETPOLINE +# define DISABLE_RETPOLINE 0 +#else +# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) +#endif + +#ifdef CONFIG_RETHUNK +# define DISABLE_RETHUNK 0 +#else +# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) +#endif + +#ifdef CONFIG_CPU_UNRET_ENTRY +# define DISABLE_UNRET 0 +#else +# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) +#endif + #ifdef CONFIG_INTEL_IOMMU_SVM # define DISABLE_ENQCMD 0 #else @@ -82,7 +101,7 @@ #define DISABLED_MASK8 (DISABLE_TDX_GUEST) #define DISABLED_MASK9 (DISABLE_SGX) #define DISABLED_MASK10 0 -#define DISABLED_MASK11 0 +#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) #define DISABLED_MASK12 0 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h index 5a39ed59b6db..e8f58ddd06d9 100644 --- a/arch/x86/include/asm/e820/api.h +++ b/arch/x86/include/asm/e820/api.h @@ -4,9 +4,6 @@ #include <asm/e820/types.h> -struct device; -struct resource; - extern struct e820_table *e820_table; extern struct e820_table *e820_table_kexec; extern struct e820_table *e820_table_firmware; @@ -46,8 +43,6 @@ extern void e820__register_nosave_regions(unsigned long limit_pfn); extern int e820__get_entry_type(u64 start, u64 end); -extern void remove_e820_regions(struct device *dev, struct resource *avail); - /* * Returns true iff the specified range [start,end) is completely contained inside * the ISA region. diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 71943dce691e..9636742a80f2 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -323,7 +323,7 @@ static inline u32 efi64_convert_status(efi_status_t status) #define __efi64_argmap_get_memory_space_descriptor(phys, desc) \ (__efi64_split(phys), (desc)) -#define __efi64_argmap_set_memory_space_descriptor(phys, size, flags) \ +#define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \ (__efi64_split(phys), __efi64_split(size), __efi64_split(flags)) /* diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3a240a64ac68..9217bd6cf0d1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1047,14 +1047,77 @@ struct kvm_x86_msr_filter { }; enum kvm_apicv_inhibit { + + /********************************************************************/ + /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */ + /********************************************************************/ + + /* + * APIC acceleration is disabled by a module parameter + * and/or not supported in hardware. + */ APICV_INHIBIT_REASON_DISABLE, + + /* + * APIC acceleration is inhibited because AutoEOI feature is + * being used by a HyperV guest. + */ APICV_INHIBIT_REASON_HYPERV, + + /* + * APIC acceleration is inhibited because the userspace didn't yet + * enable the kernel/split irqchip. + */ + APICV_INHIBIT_REASON_ABSENT, + + /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ + * (out of band, debug measure of blocking all interrupts on this vCPU) + * was enabled, to avoid AVIC/APICv bypassing it. + */ + APICV_INHIBIT_REASON_BLOCKIRQ, + + /* + * For simplicity, the APIC acceleration is inhibited + * first time either APIC ID or APIC base are changed by the guest + * from their reset values. + */ + APICV_INHIBIT_REASON_APIC_ID_MODIFIED, + APICV_INHIBIT_REASON_APIC_BASE_MODIFIED, + + /******************************************************/ + /* INHIBITs that are relevant only to the AMD's AVIC. */ + /******************************************************/ + + /* + * AVIC is inhibited on a vCPU because it runs a nested guest. + * + * This is needed because unlike APICv, the peers of this vCPU + * cannot use the doorbell mechanism to signal interrupts via AVIC when + * a vCPU runs nested. + */ APICV_INHIBIT_REASON_NESTED, + + /* + * On SVM, the wait for the IRQ window is implemented with pending vIRQ, + * which cannot be injected when the AVIC is enabled, thus AVIC + * is inhibited while KVM waits for IRQ window. + */ APICV_INHIBIT_REASON_IRQWIN, + + /* + * PIT (i8254) 're-inject' mode, relies on EOI intercept, + * which AVIC doesn't support for edge triggered interrupts. + */ APICV_INHIBIT_REASON_PIT_REINJ, + + /* + * AVIC is inhibited because the guest has x2apic in its CPUID. + */ APICV_INHIBIT_REASON_X2APIC, - APICV_INHIBIT_REASON_BLOCKIRQ, - APICV_INHIBIT_REASON_ABSENT, + + /* + * AVIC is disabled because SEV doesn't support it. + */ APICV_INHIBIT_REASON_SEV, }; diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 85865f1645bd..73ca20049835 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -19,19 +19,27 @@ #define __ALIGN_STR __stringify(__ALIGN) #endif +#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define RET jmp __x86_return_thunk +#else /* CONFIG_RETPOLINE */ #ifdef CONFIG_SLS #define RET ret; int3 #else #define RET ret #endif +#endif /* CONFIG_RETPOLINE */ #else /* __ASSEMBLY__ */ +#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define ASM_RET "jmp __x86_return_thunk\n\t" +#else /* CONFIG_RETPOLINE */ #ifdef CONFIG_SLS #define ASM_RET "ret; int3\n\t" #else #define ASM_RET "ret\n\t" #endif +#endif /* CONFIG_RETPOLINE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index a82f603d4312..61f0c206bff0 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -179,9 +179,13 @@ int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible); #ifdef CONFIG_AMD_MEM_ENCRYPT void hv_ghcb_msr_write(u64 msr, u64 value); void hv_ghcb_msr_read(u64 msr, u64 *value); +bool hv_ghcb_negotiate_protocol(void); +void hv_ghcb_terminate(unsigned int set, unsigned int reason); #else static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} +static inline bool hv_ghcb_negotiate_protocol(void) { return false; } +static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} #endif extern bool hv_isolation_type_snp(void); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 403e83b4adc8..cc615be27a54 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -51,6 +51,8 @@ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ +#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -93,6 +95,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ #define ARCH_CAP_SSB_NO BIT(4) /* * Not susceptible to Speculative Store Bypass @@ -116,6 +119,37 @@ * Not susceptible to * TSX Async Abort (TAA) vulnerabilities. */ +#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /* + * Not susceptible to SBDR and SSDP + * variants of Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FBSDP_NO BIT(14) /* + * Not susceptible to FBSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_PSDP_NO BIT(15) /* + * Not susceptible to PSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FB_CLEAR BIT(17) /* + * VERW clears CPU fill buffer + * even on MDS_NO CPUs. + */ +#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /* + * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS] + * bit available to control VERW + * behavior. + */ +#define ARCH_CAP_RRSBA BIT(19) /* + * Indicates RET may use predictors + * other than the RSB. With eIBRS + * enabled predictions in kernel mode + * are restricted to targets in + * kernel. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -133,6 +167,7 @@ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 #define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ #define RTM_ALLOW BIT(1) /* TSX development mode */ +#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 @@ -542,6 +577,9 @@ /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 +#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 +#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index acbaeaf83b61..10a3bfc1eb23 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -11,6 +11,7 @@ #include <asm/cpufeatures.h> #include <asm/msr-index.h> #include <asm/unwind_hints.h> +#include <asm/percpu.h> #define RETPOLINE_THUNK_SIZE 32 @@ -76,6 +77,23 @@ .endm /* + * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions + * vs RETBleed validation. + */ +#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE + +/* + * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should + * eventually turn into it's own annotation. + */ +.macro ANNOTATE_UNRET_END +#ifdef CONFIG_DEBUG_ENTRY + ANNOTATE_RETPOLINE_SAFE + nop +#endif +.endm + +/* * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 * attack. @@ -105,10 +123,34 @@ * monstrosity above, manually. */ .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req -#ifdef CONFIG_RETPOLINE ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) .Lskip_rsb_\@: +.endm + +#ifdef CONFIG_CPU_UNRET_ENTRY +#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" +#else +#define CALL_ZEN_UNTRAIN_RET "" +#endif + +/* + * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the + * return thunk isn't mapped into the userspace tables (then again, AMD + * typically has NO_MELTDOWN). + * + * While zen_untrain_ret() doesn't clobber anything but requires stack, + * entry_ibpb() will clobber AX, CX, DX. + * + * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point + * where we have a stack but before any RET instruction. + */ +.macro UNTRAIN_RET +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) + ANNOTATE_UNRET_END + ALTERNATIVE_2 "", \ + CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif .endm @@ -120,17 +162,20 @@ _ASM_PTR " 999b\n\t" \ ".popsection\n\t" -#ifdef CONFIG_RETPOLINE - typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; +extern retpoline_thunk_t __x86_indirect_thunk_array[]; + +extern void __x86_return_thunk(void); +extern void zen_untrain_ret(void); +extern void entry_ibpb(void); + +#ifdef CONFIG_RETPOLINE #define GEN(reg) \ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; #include <asm/GEN-for-each-reg.h> #undef GEN -extern retpoline_thunk_t __x86_indirect_thunk_array[]; - #ifdef CONFIG_X86_64 /* @@ -193,6 +238,7 @@ enum spectre_v2_mitigation { SPECTRE_V2_EIBRS, SPECTRE_V2_EIBRS_RETPOLINE, SPECTRE_V2_EIBRS_LFENCE, + SPECTRE_V2_IBRS, }; /* The indirect branch speculation control variants */ @@ -235,6 +281,9 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; +DECLARE_PER_CPU(u64, x86_spec_ctrl_current); +extern void write_spec_ctrl_current(u64 val, bool force); +extern u64 spec_ctrl_current(void); /* * With retpoline, we must use IBRS to restrict branch prediction @@ -244,18 +293,16 @@ extern u64 x86_spec_ctrl_base; */ #define firmware_restrict_branch_speculation_start() \ do { \ - u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ - \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current() | SPEC_CTRL_IBRS, \ X86_FEATURE_USE_IBRS_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - u64 val = x86_spec_ctrl_base; \ - \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current(), \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) @@ -269,6 +316,8 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); +DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); + #include <asm/segment.h> /** diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index f52a886d35cf..70533fdcbf02 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -69,6 +69,8 @@ void pcibios_scan_specific_bus(int busn); /* pci-irq.c */ +struct pci_dev; + struct irq_info { u8 bus, devfn; /* Bus, device and function */ struct { @@ -246,3 +248,9 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val) # define x86_default_pci_init_irq NULL # define x86_default_pci_fixup_irqs NULL #endif + +#if defined(CONFIG_PCI) && defined(CONFIG_ACPI) +extern bool pci_use_e820; +#else +#define pci_use_e820 false +#endif diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 7590ac2570b9..f37cbff7354c 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -108,21 +108,21 @@ extern unsigned long _brk_end; void *extend_brk(size_t size, size_t align); /* - * Reserve space in the brk section. The name must be unique within the file, - * and somewhat descriptive. The size is in bytes. + * Reserve space in the .brk section, which is a block of memory from which the + * caller is allowed to allocate very early (before even memblock is available) + * by calling extend_brk(). All allocated memory will be eventually converted + * to memblock. Any leftover unallocated memory will be freed. * - * The allocation is done using inline asm (rather than using a section - * attribute on a normal variable) in order to allow the use of @nobits, so - * that it doesn't take up any space in the vmlinux file. + * The size is in bytes. */ -#define RESERVE_BRK(name, size) \ - asm(".pushsection .brk_reservation,\"aw\",@nobits\n\t" \ - ".brk." #name ":\n\t" \ - ".skip " __stringify(size) "\n\t" \ - ".size .brk." #name ", " __stringify(size) "\n\t" \ - ".popsection\n\t") +#define RESERVE_BRK(name, size) \ + __section(".bss..brk") __aligned(1) __used \ + static char __brk_##name[size] extern void probe_roms(void); + +void clear_bss(void); + #ifdef __i386__ asmlinkage void __init i386_start_kernel(void); @@ -133,12 +133,19 @@ asmlinkage void __init x86_64_start_reservations(char *real_mode_data); #endif /* __i386__ */ #endif /* _SETUP */ -#else -#define RESERVE_BRK(name,sz) \ - .pushsection .brk_reservation,"aw",@nobits; \ -.brk.name: \ -1: .skip sz; \ - .size .brk.name,.-1b; \ + +#else /* __ASSEMBLY */ + +.macro __RESERVE_BRK name, size + .pushsection .bss..brk, "aw" +SYM_DATA_START(__brk_\name) + .skip \size +SYM_DATA_END(__brk_\name) .popsection +.endm + +#define RESERVE_BRK(name, size) __RESERVE_BRK name, size + #endif /* __ASSEMBLY__ */ + #endif /* _ASM_X86_SETUP_H */ diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 2d8dacd02643..343b722ccaf2 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -21,6 +21,16 @@ * relative displacement across sections. */ +/* + * The trampoline is 8 bytes and of the general form: + * + * jmp.d32 \func + * ud1 %esp, %ecx + * + * That trailing #UD provides both a speculation stop and serves as a unique + * 3 byte signature identifying static call trampolines. Also see tramp_ud[] + * and __static_call_fixup(). + */ #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ asm(".pushsection .static_call.text, \"ax\" \n" \ ".align 4 \n" \ @@ -28,7 +38,7 @@ STATIC_CALL_TRAMP_STR(name) ": \n" \ ANNOTATE_NOENDBR \ insns " \n" \ - ".byte 0x53, 0x43, 0x54 \n" \ + ".byte 0x0f, 0xb9, 0xcc \n" \ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ ".popsection \n") @@ -36,8 +46,13 @@ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") +#ifdef CONFIG_RETHUNK +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") +#else #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") +#endif #define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) @@ -48,4 +63,6 @@ ".long " STATIC_CALL_KEY_STR(name) " - . \n" \ ".popsection \n") +extern bool __static_call_fixup(void *tramp, u8 op, void *dest); + #endif /* _ASM_STATIC_CALL_H */ diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 8b33674288ea..f66fbe6537dd 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -8,7 +8,11 @@ #ifdef __ASSEMBLY__ .macro UNWIND_HINT_EMPTY - UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 + UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1 +.endm + +.macro UNWIND_HINT_ENTRY + UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1 .endm .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 @@ -52,6 +56,14 @@ UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC .endm +.macro UNWIND_HINT_SAVE + UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE +.endm + +.macro UNWIND_HINT_RESTORE + UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE +.endm + #else #define UNWIND_HINT_FUNC \ diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index bea5cdcdf532..e02a8a8ef23c 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -15,7 +15,7 @@ #define SETUP_INDIRECT (1<<31) /* SETUP_INDIRECT | max(SETUP_*) */ -#define SETUP_TYPE_MAX (SETUP_INDIRECT | SETUP_JAILHOUSE) +#define SETUP_TYPE_MAX (SETUP_INDIRECT | SETUP_CC_BLOB) /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 03364dc40d8d..4c8b6ae802ac 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -36,10 +36,6 @@ KCSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD_test_nx.o := y -ifdef CONFIG_FRAME_POINTER -OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y -endif - # If instrumentation of this dir is enabled, boot hangs during first second. # Probably could be more selective here, but note that files related to irqs, # boot, dumpstack/stacktrace, etc are either non-interesting or can lead to diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 8b8cbf22461a..8d8752b44f11 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -11,6 +11,22 @@ /* Refer to drivers/acpi/cppc_acpi.c for the description of functions */ +bool cpc_supported_by_cpu(void) +{ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: + if (boot_cpu_data.x86 == 0x19 && ((boot_cpu_data.x86_model <= 0x0f) || + (boot_cpu_data.x86_model >= 0x20 && boot_cpu_data.x86_model <= 0x2f))) + return true; + else if (boot_cpu_data.x86 == 0x17 && + boot_cpu_data.x86_model >= 0x70 && boot_cpu_data.x86_model <= 0x7f) + return true; + return boot_cpu_has(X86_FEATURE_CPPC); + } + return false; +} + bool cpc_ffh_supported(void) { return true; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index e257f6c80372..d6858533e6e5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -115,6 +115,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) } extern s32 __retpoline_sites[], __retpoline_sites_end[]; +extern s32 __return_sites[], __return_sites_end[]; extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; @@ -507,9 +508,76 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } } +#ifdef CONFIG_RETHUNK +/* + * Rewrite the compiler generated return thunk tail-calls. + * + * For example, convert: + * + * JMP __x86_return_thunk + * + * into: + * + * RET + */ +static int patch_return(void *addr, struct insn *insn, u8 *bytes) +{ + int i = 0; + + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + return -1; + + bytes[i++] = RET_INSN_OPCODE; + + for (; i < insn->length;) + bytes[i++] = INT3_INSN_OPCODE; + + return i; +} + +void __init_or_module noinline apply_returns(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *dest = NULL, *addr = (void *)s + *s; + struct insn insn; + int len, ret; + u8 bytes[16]; + u8 op; + + ret = insn_decode_kernel(&insn, addr); + if (WARN_ON_ONCE(ret < 0)) + continue; + + op = insn.opcode.bytes[0]; + if (op == JMP32_INSN_OPCODE) + dest = addr + insn.length + insn.immediate.value; + + if (__static_call_fixup(addr, op, dest) || + WARN_ON_ONCE(dest != &__x86_return_thunk)) + continue; + + DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", + addr, addr, insn.length, + addr + insn.length + insn.immediate.value); + + len = patch_return(addr, &insn, bytes); + if (len == insn.length) { + DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); + text_poke_early(addr, bytes, len); + } + } +} +#else +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } +#endif /* CONFIG_RETHUNK */ + #else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #endif /* CONFIG_RETPOLINE && CONFIG_OBJTOOL */ @@ -860,6 +928,7 @@ void __init alternative_instructions(void) * those can rewrite the retpoline thunks. */ apply_retpolines(__retpoline_sites, __retpoline_sites_end); + apply_returns(__return_sites, __return_sites_end); /* * Then patch alternatives, such that those paravirt calls that are in diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 437308004ef2..cb50589a7102 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -19,6 +19,7 @@ #include <asm/suspend.h> #include <asm/tlbflush.h> #include <asm/tdx.h> +#include "../kvm/vmx/vmx.h" #ifdef CONFIG_XEN #include <xen/interface/xen.h> @@ -107,4 +108,9 @@ static void __used common(void) OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); + + if (IS_ENABLED(CONFIG_KVM_INTEL)) { + BLANK(); + OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); + } } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 0c0b09796ced..35d5288394cb 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -862,6 +862,28 @@ static void init_amd_bd(struct cpuinfo_x86 *c) clear_rdrand_cpuid_bit(c); } +void init_spectral_chicken(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_CPU_UNRET_ENTRY + u64 value; + + /* + * On Zen2 we offer this chicken (bit) on the altar of Speculation. + * + * This suppresses speculation from the middle of a basic block, i.e. it + * suppresses non-branch predictions. + * + * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) { + if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) { + value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT; + wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); + } + } +#endif +} + static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); @@ -870,12 +892,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c) node_reclaim_distance = 32; #endif - /* - * Fix erratum 1076: CPB feature bit not being set in CPUID. - * Always set it, except when running under a hypervisor. - */ - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) - set_cpu_cap(c, X86_FEATURE_CPB); + /* Fix up CPUID bits, but only if not virtualised. */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { + + /* Erratum 1076: CPB feature bit not being set in CPUID. */ + if (!cpu_has(c, X86_FEATURE_CPB)) + set_cpu_cap(c, X86_FEATURE_CPB); + + /* + * Zen3 (Fam19 model < 0x10) parts are not susceptible to + * Branch Type Confusion, but predate the allocation of the + * BTC_NO bit. + */ + if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) + set_cpu_cap(c, X86_FEATURE_BTC_NO); + } } static void init_amd(struct cpuinfo_x86 *c) @@ -907,7 +938,8 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; case 0x16: init_amd_jg(c); break; - case 0x17: fallthrough; + case 0x17: init_spectral_chicken(c); + fallthrough; case 0x19: init_amd_zn(c); break; } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d879a6c93609..aa34f908c39f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -38,24 +38,52 @@ static void __init spectre_v1_select_mitigation(void); static void __init spectre_v2_select_mitigation(void); +static void __init retbleed_select_mitigation(void); +static void __init spectre_v2_user_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); -static void __init mds_print_mitigation(void); +static void __init md_clear_update_mitigation(void); +static void __init md_clear_select_mitigation(void); static void __init taa_select_mitigation(void); +static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); static void __init l1d_flush_select_mitigation(void); -/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ +/* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); + +/* The current value of the SPEC_CTRL MSR with task-specific bits set */ +DEFINE_PER_CPU(u64, x86_spec_ctrl_current); +EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); + static DEFINE_MUTEX(spec_ctrl_mutex); /* - * The vendor and possibly platform specific bits which can be modified in - * x86_spec_ctrl_base. + * Keep track of the SPEC_CTRL MSR value for the current task, which may differ + * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; +void write_spec_ctrl_current(u64 val, bool force) +{ + if (this_cpu_read(x86_spec_ctrl_current) == val) + return; + + this_cpu_write(x86_spec_ctrl_current, val); + + /* + * When KERNEL_IBRS this MSR is written on return-to-user, unless + * forced the update can be delayed until that time. + */ + if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + +u64 spec_ctrl_current(void) +{ + return this_cpu_read(x86_spec_ctrl_current); +} +EXPORT_SYMBOL_GPL(spec_ctrl_current); /* * AMD specific MSR info for Speculative Store Bypass control. @@ -85,6 +113,10 @@ EXPORT_SYMBOL_GPL(mds_idle_clear); */ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); +/* Controls CPU Fill buffer clear before KVM guest MMIO accesses */ +DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear); +EXPORT_SYMBOL_GPL(mmio_stale_data_clear); + void __init check_bugs(void) { identify_boot_cpu(); @@ -108,26 +140,27 @@ void __init check_bugs(void) if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - /* Allow STIBP in MSR_SPEC_CTRL if supported */ - if (boot_cpu_has(X86_FEATURE_STIBP)) - x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; - /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); + /* + * retbleed_select_mitigation() relies on the state set by + * spectre_v2_select_mitigation(); specifically it wants to know about + * spectre_v2=ibrs. + */ + retbleed_select_mitigation(); + /* + * spectre_v2_user_select_mitigation() relies on the state set by + * retbleed_select_mitigation(); specifically the STIBP selection is + * forced for UNRET. + */ + spectre_v2_user_select_mitigation(); ssb_select_mitigation(); l1tf_select_mitigation(); - mds_select_mitigation(); - taa_select_mitigation(); + md_clear_select_mitigation(); srbds_select_mitigation(); l1d_flush_select_mitigation(); - /* - * As MDS and TAA mitigations are inter-related, print MDS - * mitigation until after TAA mitigation selection is done. - */ - mds_print_mitigation(); - arch_smt_update(); #ifdef CONFIG_X86_32 @@ -162,31 +195,17 @@ void __init check_bugs(void) #endif } +/* + * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is + * done in vmenter.S. + */ void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 msrval, guestval, hostval = x86_spec_ctrl_base; + u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); struct thread_info *ti = current_thread_info(); - /* Is MSR_SPEC_CTRL implemented ? */ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - /* - * Restrict guest_spec_ctrl to supported values. Clear the - * modifiable bits in the host base value and or the - * modifiable bits from the guest value. - */ - guestval = hostval & ~x86_spec_ctrl_mask; - guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; - - /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) - hostval |= ssbd_tif_to_spec_ctrl(ti->flags); - - /* Conditional STIBP enabled? */ - if (static_branch_unlikely(&switch_to_cond_stibp)) - hostval |= stibp_tif_to_spec_ctrl(ti->flags); - if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); @@ -267,14 +286,6 @@ static void __init mds_select_mitigation(void) } } -static void __init mds_print_mitigation(void) -{ - if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) - return; - - pr_info("%s\n", mds_strings[mds_mitigation]); -} - static int __init mds_cmdline(char *str) { if (!boot_cpu_has_bug(X86_BUG_MDS)) @@ -329,7 +340,7 @@ static void __init taa_select_mitigation(void) /* TSX previously disabled by tsx=off */ if (!boot_cpu_has(X86_FEATURE_RTM)) { taa_mitigation = TAA_MITIGATION_TSX_DISABLED; - goto out; + return; } if (cpu_mitigations_off()) { @@ -343,7 +354,7 @@ static void __init taa_select_mitigation(void) */ if (taa_mitigation == TAA_MITIGATION_OFF && mds_mitigation == MDS_MITIGATION_OFF) - goto out; + return; if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) taa_mitigation = TAA_MITIGATION_VERW; @@ -375,18 +386,6 @@ static void __init taa_select_mitigation(void) if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); - - /* - * Update MDS mitigation, if necessary, as the mds_user_clear is - * now enabled for TAA mitigation. - */ - if (mds_mitigation == MDS_MITIGATION_OFF && - boot_cpu_has_bug(X86_BUG_MDS)) { - mds_mitigation = MDS_MITIGATION_FULL; - mds_select_mitigation(); - } -out: - pr_info("%s\n", taa_strings[taa_mitigation]); } static int __init tsx_async_abort_parse_cmdline(char *str) @@ -411,6 +410,151 @@ static int __init tsx_async_abort_parse_cmdline(char *str) early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); #undef pr_fmt +#define pr_fmt(fmt) "MMIO Stale Data: " fmt + +enum mmio_mitigations { + MMIO_MITIGATION_OFF, + MMIO_MITIGATION_UCODE_NEEDED, + MMIO_MITIGATION_VERW, +}; + +/* Default mitigation for Processor MMIO Stale Data vulnerabilities */ +static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW; +static bool mmio_nosmt __ro_after_init = false; + +static const char * const mmio_strings[] = { + [MMIO_MITIGATION_OFF] = "Vulnerable", + [MMIO_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", + [MMIO_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", +}; + +static void __init mmio_select_mitigation(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || + cpu_mitigations_off()) { + mmio_mitigation = MMIO_MITIGATION_OFF; + return; + } + + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return; + + ia32_cap = x86_read_arch_cap_msr(); + + /* + * Enable CPU buffer clear mitigation for host and VMM, if also affected + * by MDS or TAA. Otherwise, enable mitigation for VMM only. + */ + if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && + boot_cpu_has(X86_FEATURE_RTM))) + static_branch_enable(&mds_user_clear); + else + static_branch_enable(&mmio_stale_data_clear); + + /* + * If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can + * be propagated to uncore buffers, clearing the Fill buffers on idle + * is required irrespective of SMT state. + */ + if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + static_branch_enable(&mds_idle_clear); + + /* + * Check if the system has the right microcode. + * + * CPU Fill buffer clear mitigation is enumerated by either an explicit + * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS + * affected systems. + */ + if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + (boot_cpu_has(X86_FEATURE_MD_CLEAR) && + boot_cpu_has(X86_FEATURE_FLUSH_L1D) && + !(ia32_cap & ARCH_CAP_MDS_NO))) + mmio_mitigation = MMIO_MITIGATION_VERW; + else + mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; + + if (mmio_nosmt || cpu_mitigations_auto_nosmt()) + cpu_smt_disable(false); +} + +static int __init mmio_stale_data_parse_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) { + mmio_mitigation = MMIO_MITIGATION_OFF; + } else if (!strcmp(str, "full")) { + mmio_mitigation = MMIO_MITIGATION_VERW; + } else if (!strcmp(str, "full,nosmt")) { + mmio_mitigation = MMIO_MITIGATION_VERW; + mmio_nosmt = true; + } + + return 0; +} +early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); + +#undef pr_fmt +#define pr_fmt(fmt) "" fmt + +static void __init md_clear_update_mitigation(void) +{ + if (cpu_mitigations_off()) + return; + + if (!static_key_enabled(&mds_user_clear)) + goto out; + + /* + * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data + * mitigation, if necessary. + */ + if (mds_mitigation == MDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MDS)) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_select_mitigation(); + } + if (taa_mitigation == TAA_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_TAA)) { + taa_mitigation = TAA_MITIGATION_VERW; + taa_select_mitigation(); + } + if (mmio_mitigation == MMIO_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { + mmio_mitigation = MMIO_MITIGATION_VERW; + mmio_select_mitigation(); + } +out: + if (boot_cpu_has_bug(X86_BUG_MDS)) + pr_info("MDS: %s\n", mds_strings[mds_mitigation]); + if (boot_cpu_has_bug(X86_BUG_TAA)) + pr_info("TAA: %s\n", taa_strings[taa_mitigation]); + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); +} + +static void __init md_clear_select_mitigation(void) +{ + mds_select_mitigation(); + taa_select_mitigation(); + mmio_select_mitigation(); + + /* + * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update + * and print their mitigation after MDS, TAA and MMIO Stale Data + * mitigation selection is done. + */ + md_clear_update_mitigation(); +} + +#undef pr_fmt #define pr_fmt(fmt) "SRBDS: " fmt enum srbds_mitigations { @@ -478,11 +622,13 @@ static void __init srbds_select_mitigation(void) return; /* - * Check to see if this is one of the MDS_NO systems supporting - * TSX that are only exposed to SRBDS when TSX is enabled. + * Check to see if this is one of the MDS_NO systems supporting TSX that + * are only exposed to SRBDS when TSX is enabled or when CPU is affected + * by Processor MMIO Stale Data vulnerability. */ ia32_cap = x86_read_arch_cap_msr(); - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM)) + if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR; @@ -626,12 +772,180 @@ static int __init nospectre_v1_cmdline(char *str) } early_param("nospectre_v1", nospectre_v1_cmdline); -#undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 : " fmt - static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; +#undef pr_fmt +#define pr_fmt(fmt) "RETBleed: " fmt + +enum retbleed_mitigation { + RETBLEED_MITIGATION_NONE, + RETBLEED_MITIGATION_UNRET, + RETBLEED_MITIGATION_IBPB, + RETBLEED_MITIGATION_IBRS, + RETBLEED_MITIGATION_EIBRS, +}; + +enum retbleed_mitigation_cmd { + RETBLEED_CMD_OFF, + RETBLEED_CMD_AUTO, + RETBLEED_CMD_UNRET, + RETBLEED_CMD_IBPB, +}; + +static const char * const retbleed_strings[] = { + [RETBLEED_MITIGATION_NONE] = "Vulnerable", + [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", + [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB", + [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", + [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", +}; + +static enum retbleed_mitigation retbleed_mitigation __ro_after_init = + RETBLEED_MITIGATION_NONE; +static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = + RETBLEED_CMD_AUTO; + +static int __ro_after_init retbleed_nosmt = false; + +static int __init retbleed_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + while (str) { + char *next = strchr(str, ','); + if (next) { + *next = 0; + next++; + } + + if (!strcmp(str, "off")) { + retbleed_cmd = RETBLEED_CMD_OFF; + } else if (!strcmp(str, "auto")) { + retbleed_cmd = RETBLEED_CMD_AUTO; + } else if (!strcmp(str, "unret")) { + retbleed_cmd = RETBLEED_CMD_UNRET; + } else if (!strcmp(str, "ibpb")) { + retbleed_cmd = RETBLEED_CMD_IBPB; + } else if (!strcmp(str, "nosmt")) { + retbleed_nosmt = true; + } else { + pr_err("Ignoring unknown retbleed option (%s).", str); + } + + str = next; + } + + return 0; +} +early_param("retbleed", retbleed_parse_cmdline); + +#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" +#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" + +static void __init retbleed_select_mitigation(void) +{ + bool mitigate_smt = false; + + if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) + return; + + switch (retbleed_cmd) { + case RETBLEED_CMD_OFF: + return; + + case RETBLEED_CMD_UNRET: + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + } else { + pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n"); + goto do_cmd_auto; + } + break; + + case RETBLEED_CMD_IBPB: + if (!boot_cpu_has(X86_FEATURE_IBPB)) { + pr_err("WARNING: CPU does not support IBPB.\n"); + goto do_cmd_auto; + } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } else { + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + goto do_cmd_auto; + } + break; + +do_cmd_auto: + case RETBLEED_CMD_AUTO: + default: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB)) + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } + + /* + * The Intel mitigation (IBRS or eIBRS) was already selected in + * spectre_v2_select_mitigation(). 'retbleed_mitigation' will + * be set accordingly below. + */ + + break; + } + + switch (retbleed_mitigation) { + case RETBLEED_MITIGATION_UNRET: + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + pr_err(RETBLEED_UNTRAIN_MSG); + + mitigate_smt = true; + break; + + case RETBLEED_MITIGATION_IBPB: + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + mitigate_smt = true; + break; + + default: + break; + } + + if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) && + (retbleed_nosmt || cpu_mitigations_auto_nosmt())) + cpu_smt_disable(false); + + /* + * Let IBRS trump all on Intel without affecting the effects of the + * retbleed= cmdline option. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + switch (spectre_v2_enabled) { + case SPECTRE_V2_IBRS: + retbleed_mitigation = RETBLEED_MITIGATION_IBRS; + break; + case SPECTRE_V2_EIBRS: + case SPECTRE_V2_EIBRS_RETPOLINE: + case SPECTRE_V2_EIBRS_LFENCE: + retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; + break; + default: + pr_err(RETBLEED_INTEL_MSG); + } + } + + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); +} + +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt + static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = SPECTRE_V2_USER_NONE; static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = @@ -702,6 +1016,7 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_EIBRS, SPECTRE_V2_CMD_EIBRS_RETPOLINE, SPECTRE_V2_CMD_EIBRS_LFENCE, + SPECTRE_V2_CMD_IBRS, }; enum spectre_v2_user_cmd { @@ -742,13 +1057,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure) pr_info("spectre_v2_user=%s forced on command line.\n", reason); } +static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd; + static enum spectre_v2_user_cmd __init -spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) +spectre_v2_parse_user_cmdline(void) { char arg[20]; int ret, i; - switch (v2_cmd) { + switch (spectre_v2_cmd) { case SPECTRE_V2_CMD_NONE: return SPECTRE_V2_USER_CMD_NONE; case SPECTRE_V2_CMD_FORCE: @@ -774,15 +1091,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) return SPECTRE_V2_USER_CMD_AUTO; } -static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) +static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) { - return (mode == SPECTRE_V2_EIBRS || - mode == SPECTRE_V2_EIBRS_RETPOLINE || - mode == SPECTRE_V2_EIBRS_LFENCE); + return mode == SPECTRE_V2_IBRS || + mode == SPECTRE_V2_EIBRS || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_EIBRS_LFENCE; } static void __init -spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +spectre_v2_user_select_mitigation(void) { enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; bool smt_possible = IS_ENABLED(CONFIG_SMP); @@ -795,7 +1113,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) cpu_smt_control == CPU_SMT_NOT_SUPPORTED) smt_possible = false; - cmd = spectre_v2_parse_user_cmdline(v2_cmd); + cmd = spectre_v2_parse_user_cmdline(); switch (cmd) { case SPECTRE_V2_USER_CMD_NONE: goto set_mode; @@ -843,12 +1161,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) } /* - * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not - * required. + * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible, + * STIBP is not required. */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + spectre_v2_in_ibrs_mode(spectre_v2_enabled)) return; /* @@ -860,6 +1178,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) mode = SPECTRE_V2_USER_STRICT_PREFERRED; + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { + if (mode != SPECTRE_V2_USER_STRICT && + mode != SPECTRE_V2_USER_STRICT_PREFERRED) + pr_info("Selecting STIBP always-on mode to complement retbleed mitigation\n"); + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + } + spectre_v2_user_stibp = mode; set_mode: @@ -873,6 +1198,7 @@ static const char * const spectre_v2_strings[] = { [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", + [SPECTRE_V2_IBRS] = "Mitigation: IBRS", }; static const struct { @@ -890,6 +1216,7 @@ static const struct { { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, { "auto", SPECTRE_V2_CMD_AUTO, false }, + { "ibrs", SPECTRE_V2_CMD_IBRS, false }, }; static void __init spec_v2_print_cond(const char *reason, bool secure) @@ -952,6 +1279,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } + if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) { + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { + pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) { + pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) { + pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + spec_v2_print_cond(mitigation_options[i].option, mitigation_options[i].secure); return cmd; @@ -967,6 +1318,22 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +/* Disable in-kernel use of non-RSB RET predictors */ +static void __init spec_ctrl_disable_kernel_rrsba(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + return; + + ia32_cap = x86_read_arch_cap_msr(); + + if (ia32_cap & ARCH_CAP_RRSBA) { + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + write_spec_ctrl_current(x86_spec_ctrl_base, true); + } +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -991,6 +1358,15 @@ static void __init spectre_v2_select_mitigation(void) break; } + if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && + boot_cpu_has_bug(X86_BUG_RETBLEED) && + retbleed_cmd != RETBLEED_CMD_OFF && + boot_cpu_has(X86_FEATURE_IBRS) && + boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + mode = SPECTRE_V2_IBRS; + break; + } + mode = spectre_v2_select_retpoline(); break; @@ -1007,6 +1383,10 @@ static void __init spectre_v2_select_mitigation(void) mode = spectre_v2_select_retpoline(); break; + case SPECTRE_V2_CMD_IBRS: + mode = SPECTRE_V2_IBRS; + break; + case SPECTRE_V2_CMD_EIBRS: mode = SPECTRE_V2_EIBRS; break; @@ -1023,10 +1403,9 @@ static void __init spectre_v2_select_mitigation(void) if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); - if (spectre_v2_in_eibrs_mode(mode)) { - /* Force it so VMEXIT will restore correctly */ + if (spectre_v2_in_ibrs_mode(mode)) { x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } switch (mode) { @@ -1034,6 +1413,10 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_EIBRS: break; + case SPECTRE_V2_IBRS: + setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); + break; + case SPECTRE_V2_LFENCE: case SPECTRE_V2_EIBRS_LFENCE: setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); @@ -1045,43 +1428,107 @@ static void __init spectre_v2_select_mitigation(void) break; } + /* + * Disable alternate RSB predictions in kernel when indirect CALLs and + * JMPs gets protection against BHI and Intramode-BTI, but RET + * prediction from a non-RSB predictor is still a risk. + */ + if (mode == SPECTRE_V2_EIBRS_LFENCE || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_RETPOLINE) + spec_ctrl_disable_kernel_rrsba(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); /* - * If spectre v2 protection has been enabled, unconditionally fill - * RSB during a context switch; this protects against two independent - * issues: + * If Spectre v2 protection has been enabled, fill the RSB during a + * context switch. In general there are two types of RSB attacks + * across context switches, for which the CALLs/RETs may be unbalanced. + * + * 1) RSB underflow + * + * Some Intel parts have "bottomless RSB". When the RSB is empty, + * speculated return targets may come from the branch predictor, + * which could have a user-poisoned BTB or BHB entry. + * + * AMD has it even worse: *all* returns are speculated from the BTB, + * regardless of the state of the RSB. + * + * When IBRS or eIBRS is enabled, the "user -> kernel" attack + * scenario is mitigated by the IBRS branch prediction isolation + * properties, so the RSB buffer filling wouldn't be necessary to + * protect against this type of attack. + * + * The "user -> user" attack scenario is mitigated by RSB filling. + * + * 2) Poisoned RSB entry + * + * If the 'next' in-kernel return stack is shorter than 'prev', + * 'next' could be tricked into speculating with a user-poisoned RSB + * entry. * - * - RSB underflow (and switch to BTB) on Skylake+ - * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs + * The "user -> kernel" attack scenario is mitigated by SMEP and + * eIBRS. + * + * The "user -> user" scenario, also known as SpectreBHB, requires + * RSB clearing. + * + * So to mitigate all cases, unconditionally fill RSB on context + * switches. + * + * FIXME: Is this pointless for retbleed-affected AMD? */ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); /* - * Retpoline means the kernel is safe because it has no indirect - * branches. Enhanced IBRS protects firmware too, so, enable restricted - * speculation around firmware calls only when Enhanced IBRS isn't - * supported. + * Similar to context switches, there are two types of RSB attacks + * after vmexit: + * + * 1) RSB underflow + * + * 2) Poisoned RSB entry + * + * When retpoline is enabled, both are mitigated by filling/clearing + * the RSB. + * + * When IBRS is enabled, while #1 would be mitigated by the IBRS branch + * prediction isolation protections, RSB still needs to be cleared + * because of #2. Note that SMEP provides no protection here, unlike + * user-space-poisoned RSB entries. + * + * eIBRS, on the other hand, has RSB-poisoning protections, so it + * doesn't need RSB clearing after vmexit. + */ + if (boot_cpu_has(X86_FEATURE_RETPOLINE) || + boot_cpu_has(X86_FEATURE_KERNEL_IBRS)) + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); + + /* + * Retpoline protects the kernel, but doesn't protect firmware. IBRS + * and Enhanced IBRS protect firmware too, so enable IBRS around + * firmware calls only when IBRS / Enhanced IBRS aren't otherwise + * enabled. * * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because * the user might select retpoline on the kernel command line and if * the CPU supports Enhanced IBRS, kernel might un-intentionally not * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { + if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } /* Set up IBPB and STIBP depending on the general spectre V2 command */ - spectre_v2_user_select_mitigation(cmd); + spectre_v2_cmd = cmd; } static void update_stibp_msr(void * __unused) { - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); + write_spec_ctrl_current(val, true); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1116,6 +1563,8 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { + u64 ia32_cap = x86_read_arch_cap_msr(); + /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -1127,14 +1576,17 @@ static void update_mds_branch_idle(void) if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) return; - if (sched_smt_active()) + if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); - else + } else if (mmio_mitigation == MMIO_MITIGATION_OFF || + (ia32_cap & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); + } } #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" #define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" +#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" void cpu_bugs_smt_update(void) { @@ -1179,6 +1631,16 @@ void cpu_bugs_smt_update(void) break; } + switch (mmio_mitigation) { + case MMIO_MITIGATION_VERW: + case MMIO_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(MMIO_MSG_SMT); + break; + case MMIO_MITIGATION_OFF: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -1283,16 +1745,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) } /* - * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper - * bit in the mask to allow guests to use the mitigation even in the - * case where the host does not enable it. - */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) { - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; - } - - /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass @@ -1309,7 +1761,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } } @@ -1560,7 +2012,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); @@ -1781,9 +2233,23 @@ static ssize_t tsx_async_abort_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t mmio_stale_data_show_state(char *buf) +{ + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + mmio_strings[mmio_mitigation]); + } + + return sysfs_emit(buf, "%s; SMT %s\n", mmio_strings[mmio_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { - if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) return ""; switch (spectre_v2_user_stibp) { @@ -1839,6 +2305,24 @@ static ssize_t srbds_show_state(char *buf) return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); } +static ssize_t retbleed_show_state(char *buf) +{ + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); + + return sprintf(buf, "%s; SMT %s\n", + retbleed_strings[retbleed_mitigation], + !sched_smt_active() ? "disabled" : + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? + "enabled with STIBP protection" : "vulnerable"); + } + + return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -1881,6 +2365,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SRBDS: return srbds_show_state(buf); + case X86_BUG_MMIO_STALE_DATA: + return mmio_stale_data_show_state(buf); + + case X86_BUG_RETBLEED: + return retbleed_show_state(buf); + default: break; } @@ -1932,4 +2422,14 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char * { return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS); } + +ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); +} + +ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c296cb1c0113..736262a76a12 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1205,24 +1205,60 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { {} }; +#define VULNBL(vendor, family, model, blacklist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) + #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ INTEL_FAM6_##model, steppings, \ X86_FEATURE_ANY, issues) +#define VULNBL_AMD(family, blacklist) \ + VULNBL(AMD, family, X86_MODEL_ANY, blacklist) + +#define VULNBL_HYGON(family, blacklist) \ + VULNBL(HYGON, family, X86_MODEL_ANY, blacklist) + #define SRBDS BIT(0) +/* CPU is affected by X86_BUG_MMIO_STALE_DATA */ +#define MMIO BIT(1) +/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ +#define MMIO_SBDS BIT(2) +/* CPU is affected by RETbleed, speculating where you would not expect it */ +#define RETBLEED BIT(3) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + + VULNBL_AMD(0x15, RETBLEED), + VULNBL_AMD(0x16, RETBLEED), + VULNBL_AMD(0x17, RETBLEED), + VULNBL_HYGON(0x18, RETBLEED), {} }; @@ -1243,6 +1279,13 @@ u64 x86_read_arch_cap_msr(void) return ia32_cap; } +static bool arch_cap_mmio_immune(u64 ia32_cap) +{ + return (ia32_cap & ARCH_CAP_FBSDP_NO && + ia32_cap & ARCH_CAP_PSDP_NO && + ia32_cap & ARCH_CAP_SBDR_SSDP_NO); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); @@ -1296,12 +1339,32 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) /* * SRBDS affects CPUs which support RDRAND or RDSEED and are listed * in the vulnerability blacklist. + * + * Some of the implications and mitigation of Shared Buffers Data + * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as + * SRBDS. */ if ((cpu_has(c, X86_FEATURE_RDRAND) || cpu_has(c, X86_FEATURE_RDSEED)) && - cpu_matches(cpu_vuln_blacklist, SRBDS)) + cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS)) setup_force_cpu_bug(X86_BUG_SRBDS); + /* + * Processor MMIO Stale Data bug enumeration + * + * Affected CPU list is generally enough to enumerate the vulnerability, + * but for virtualization case check for ARCH_CAP MSR bits also, VMM may + * not want the guest to enumerate the bug. + */ + if (cpu_matches(cpu_vuln_blacklist, MMIO) && + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + + if (!cpu_has(c, X86_FEATURE_BTC_NO)) { + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + setup_force_cpu_bug(X86_BUG_RETBLEED); + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 2a8e584fc991..7c9b5893c30a 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -61,6 +61,8 @@ static inline void tsx_init(void) { } static inline void tsx_ap_init(void) { } #endif /* CONFIG_CPU_SUP_INTEL */ +extern void init_spectral_chicken(struct cpuinfo_x86 *c); + extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 3fcdda4c1e11..21fd425088fe 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -302,6 +302,12 @@ static void init_hygon(struct cpuinfo_x86 *c) /* get apicid instead of initial apic id from cpuid */ c->apicid = hard_smp_processor_id(); + /* + * XXX someone from Hygon needs to confirm this DTRT + * + init_spectral_chicken(c); + */ + set_cpu_cap(c, X86_FEATURE_ZEN); set_cpu_cap(c, X86_FEATURE_CPB); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index dbaa8326d6f2..fd44b54c90d5 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, + { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 5b4efc927d80..24b9fa89aa27 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -301,7 +301,7 @@ union ftrace_op_code_union { } __attribute__((packed)); }; -#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS) +#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS)) static unsigned long create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) @@ -357,7 +357,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) goto fail; ip = trampoline + size; - memcpy(ip, retq, RET_SIZE); + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE); + else + memcpy(ip, retq, sizeof(retq)); /* No need to test direct calls on created trampolines */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 4ec13608d3c6..dfeb227de561 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -175,6 +175,7 @@ SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL) jmp ftrace_epilogue SYM_FUNC_END(ftrace_caller); +STACK_FRAME_NON_STANDARD_FP(ftrace_caller) SYM_FUNC_START(ftrace_epilogue) /* @@ -282,6 +283,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) jmp ftrace_epilogue SYM_FUNC_END(ftrace_regs_caller) +STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ @@ -311,10 +313,14 @@ trace: jmp ftrace_stub SYM_FUNC_END(__fentry__) EXPORT_SYMBOL(__fentry__) +STACK_FRAME_NON_STANDARD_FP(__fentry__) + #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -SYM_FUNC_START(return_to_handler) +SYM_CODE_START(return_to_handler) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR subq $16, %rsp /* Save the return values */ @@ -339,7 +345,6 @@ SYM_FUNC_START(return_to_handler) int3 .Ldo_rop: mov %rdi, (%rsp) - UNWIND_HINT_FUNC RET -SYM_FUNC_END(return_to_handler) +SYM_CODE_END(return_to_handler) #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index bd4a34100ed0..6a3cfaf6b72a 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -426,10 +426,12 @@ void __init do_early_exception(struct pt_regs *regs, int trapnr) /* Don't add a printk in there. printk relies on the PDA which is not initialized yet. */ -static void __init clear_bss(void) +void __init clear_bss(void) { memset(__bss_start, 0, (unsigned long) __bss_stop - (unsigned long) __bss_start); + memset(__brk_base, 0, + (unsigned long) __brk_limit - (unsigned long) __brk_base); } static unsigned long get_cmd_line_ptr(void) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index eb8656bac99b..9b7acc9c7874 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -23,6 +23,7 @@ #include <asm/cpufeatures.h> #include <asm/percpu.h> #include <asm/nops.h> +#include <asm/nospec-branch.h> #include <asm/bootparam.h> #include <asm/export.h> #include <asm/pgtable_32.h> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 92c4afa2b729..d860d437631b 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -389,6 +389,8 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb) UNWIND_HINT_IRET_REGS offset=8 ENDBR + ANNOTATE_UNRET_END + /* Build pt_regs */ PUSH_AND_CLEAR_REGS @@ -448,6 +450,7 @@ SYM_CODE_END(early_idt_handler_array) SYM_CODE_START_LOCAL(early_idt_handler_common) UNWIND_HINT_IRET_REGS offset=16 + ANNOTATE_UNRET_END /* * The stack is the hardware frame, an error code or zero, and the * vector number. @@ -497,6 +500,8 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb) UNWIND_HINT_IRET_REGS offset=8 ENDBR + ANNOTATE_UNRET_END + /* Build pt_regs */ PUSH_AND_CLEAR_REGS diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index b98ffcf4d250..67828d973389 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr, { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, - *retpolines = NULL, *ibt_endbr = NULL; + *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr, orc_ip = s; if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) retpolines = s; + if (!strcmp(".return_sites", secstrings + s->sh_name)) + returns = s; if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) ibt_endbr = s; } @@ -287,6 +289,10 @@ int module_finalize(const Elf_Ehdr *hdr, void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); } + if (returns) { + void *rseg = (void *)returns->sh_addr; + apply_returns(rseg, rseg + returns->sh_size); + } if (alt) { /* patch .altinstructions */ void *aseg = (void *)alt->sh_addr; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9b2772b7e1f3..d456ce21c255 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - wrmsrl(MSR_IA32_SPEC_CTRL, msr); + write_spec_ctrl_current(msr, false); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index fcc8a7699103..c7c4b1917336 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -7,10 +7,12 @@ #include <linux/linkage.h> #include <asm/page_types.h> #include <asm/kexec.h> +#include <asm/nospec-branch.h> #include <asm/processor-flags.h> /* - * Must be relocatable PIC code callable as a C function + * Must be relocatable PIC code callable as a C function, in particular + * there must be a plain RET and not jump to return thunk. */ #define PTR(x) (x << 2) @@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) movl %edi, %eax addl $(identity_mapped - relocate_kernel), %eax pushl %eax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) @@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) xorl %edx, %edx xorl %esi, %esi xorl %ebp, %ebp - RET + ANNOTATE_UNRET_SAFE + ret + int3 1: popl %edx movl CP_PA_SWAP_PAGE(%edi), %esp addl $PAGE_SIZE, %esp 2: + ANNOTATE_RETPOLINE_SAFE call *%edx /* get the re-entry point of the peer system */ @@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) movl %edi, %eax addl $(virtual_mapped - relocate_kernel), %eax pushl %eax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) @@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) popl %edi popl %esi popl %ebx - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(virtual_mapped) /* Do the copies */ @@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) popl %edi popl %ebx popl %ebp - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(swap_pages) .globl kexec_control_code_size diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index c1d8626c53b6..4809c0dc4eb0 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -13,7 +13,8 @@ #include <asm/unwind_hints.h> /* - * Must be relocatable PIC code callable as a C function + * Must be relocatable PIC code callable as a C function, in particular + * there must be a plain RET and not jump to return thunk. */ #define PTR(x) (x << 3) @@ -105,7 +106,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) /* jump to identity mapped page */ addq $(identity_mapped - relocate_kernel), %r8 pushq %r8 - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) @@ -200,7 +203,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) xorl %r14d, %r14d xorl %r15d, %r15d - RET + ANNOTATE_UNRET_SAFE + ret + int3 1: popq %rdx @@ -219,7 +224,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) call swap_pages movq $virtual_mapped, %rax pushq %rax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) @@ -241,7 +248,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) popq %r12 popq %rbp popq %rbx - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(virtual_mapped) /* Do the copies */ @@ -298,7 +307,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) lea PAGE_SIZE(%rax), %rsi jmp 0b 3: - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(swap_pages) .globl kexec_control_code_size diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index db2b350a37b7..bba1abd05bfe 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/dev_printk.h> #include <linux/ioport.h> +#include <linux/printk.h> #include <asm/e820/api.h> +#include <asm/pci_x86.h> static void resource_clip(struct resource *res, resource_size_t start, resource_size_t end) @@ -24,14 +25,14 @@ static void resource_clip(struct resource *res, resource_size_t start, res->start = end + 1; } -void remove_e820_regions(struct device *dev, struct resource *avail) +static void remove_e820_regions(struct resource *avail) { int i; struct e820_entry *entry; u64 e820_start, e820_end; struct resource orig = *avail; - if (!(avail->flags & IORESOURCE_MEM)) + if (!pci_use_e820) return; for (i = 0; i < e820_table->nr_entries; i++) { @@ -41,7 +42,7 @@ void remove_e820_regions(struct device *dev, struct resource *avail) resource_clip(avail, e820_start, e820_end); if (orig.start != avail->start || orig.end != avail->end) { - dev_info(dev, "clipped %pR to %pR for e820 entry [mem %#010Lx-%#010Lx]\n", + pr_info("clipped %pR to %pR for e820 entry [mem %#010Lx-%#010Lx]\n", &orig, avail, e820_start, e820_end); orig = *avail; } @@ -55,6 +56,9 @@ void arch_remove_reservations(struct resource *avail) * the low 1MB unconditionally, as this area is needed for some ISA * cards requiring a memory range, e.g. the i82365 PCMCIA controller. */ - if (avail->flags & IORESOURCE_MEM) + if (avail->flags & IORESOURCE_MEM) { resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END); + + remove_e820_regions(avail); + } } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3ebb85327edb..bd6c6fd373ae 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -67,11 +67,6 @@ RESERVE_BRK(dmi_alloc, 65536); #endif -/* - * Range of the BSS area. The size of the BSS area is determined - * at link time, with RESERVE_BRK() facility reserving additional - * chunks. - */ unsigned long _brk_start = (unsigned long)__brk_base; unsigned long _brk_end = (unsigned long)__brk_base; diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index aa72cefdd5be..aaaba85d6d7f 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -12,13 +12,21 @@ enum insn_type { }; /* + * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such + * that there is no false-positive trampoline identification while also being a + * speculation stop. + */ +static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc }; + +/* * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax */ static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; -static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) +static void __ref __static_call_transform(void *insn, enum insn_type type, + void *func, bool modinit) { const void *emulate = NULL; int size = CALL_INSN_SIZE; @@ -43,14 +51,17 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void break; case RET: - code = &retinsn; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); + else + code = &retinsn; break; } if (memcmp(insn, code, size) == 0) return; - if (unlikely(system_state == SYSTEM_BOOTING)) + if (system_state == SYSTEM_BOOTING || modinit) return text_poke_early(insn, code, size); text_poke_bp(insn, code, size, emulate); @@ -60,7 +71,7 @@ static void __static_call_validate(void *insn, bool tail, bool tramp) { u8 opcode = *(u8 *)insn; - if (tramp && memcmp(insn+5, "SCT", 3)) { + if (tramp && memcmp(insn+5, tramp_ud, 3)) { pr_err("trampoline signature fail"); BUG(); } @@ -104,14 +115,42 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) if (tramp) { __static_call_validate(tramp, true, true); - __static_call_transform(tramp, __sc_insn(!func, true), func); + __static_call_transform(tramp, __sc_insn(!func, true), func, false); } if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { __static_call_validate(site, tail, false); - __static_call_transform(site, __sc_insn(!func, tail), func); + __static_call_transform(site, __sc_insn(!func, tail), func, false); } mutex_unlock(&text_mutex); } EXPORT_SYMBOL_GPL(arch_static_call_transform); + +#ifdef CONFIG_RETHUNK +/* + * This is called by apply_returns() to fix up static call trampolines, + * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as + * having a return trampoline. + * + * The problem is that static_call() is available before determining + * X86_FEATURE_RETHUNK and, by implication, running alternatives. + * + * This means that __static_call_transform() above can have overwritten the + * return trampoline and we now need to fix things up to be consistent. + */ +bool __static_call_fixup(void *tramp, u8 op, void *dest) +{ + if (memcmp(tramp+5, tramp_ud, 3)) { + /* Not a trampoline site, not our problem. */ + return false; + } + + mutex_lock(&text_mutex); + if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) + __static_call_transform(tramp, RET, NULL, true); + mutex_unlock(&text_mutex); + + return true; +} +#endif diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index f5f6dc2e8007..15f29053cec4 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -141,7 +141,7 @@ SECTIONS #ifdef CONFIG_RETPOLINE __indirect_thunk_start = .; - *(.text.__x86.indirect_thunk) + *(.text.__x86.*) __indirect_thunk_end = .; #endif } :text =0xcccc @@ -283,6 +283,13 @@ SECTIONS *(.retpoline_sites) __retpoline_sites_end = .; } + + . = ALIGN(8); + .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) { + __return_sites = .; + *(.return_sites) + __return_sites_end = .; + } #endif #ifdef CONFIG_X86_KERNEL_IBT @@ -388,7 +395,7 @@ SECTIONS .brk : AT(ADDR(.brk) - LOAD_OFFSET) { __brk_base = .; . += 64 * 1024; /* 64k alignment slop space */ - *(.brk_reservation) /* areas brk users have reserved */ + *(.bss..brk) /* areas brk users have reserved */ __brk_limit = .; } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 89b11e7dca8a..f8382abe22ff 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -189,9 +189,6 @@ #define X8(x...) X4(x), X4(x) #define X16(x...) X8(x), X8(x) -#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT)) - struct opcode { u64 flags; u8 intercept; @@ -306,9 +303,15 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for * different operand sizes can be reached by calculation, rather than a jump * table (which would be bigger than the code). + * + * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR + * and 1 for the straight line speculation INT3, leaves 7 bytes for the + * body of the function. Currently none is larger than 4. */ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); +#define FASTOP_SIZE 16 + #define __FOP_FUNC(name) \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ @@ -325,13 +328,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define FOP_RET(name) \ __FOP_RET(#name) -#define FOP_START(op) \ +#define __FOP_START(op, align) \ extern void em_##op(struct fastop *fake); \ asm(".pushsection .text, \"ax\" \n\t" \ ".global em_" #op " \n\t" \ - ".align " __stringify(FASTOP_SIZE) " \n\t" \ + ".align " __stringify(align) " \n\t" \ "em_" #op ":\n\t" +#define FOP_START(op) __FOP_START(op, FASTOP_SIZE) + #define FOP_END \ ".popsection") @@ -435,17 +440,12 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); /* * Depending on .config the SETcc functions look like: * - * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] - * SETcc %al [3 bytes] - * RET [1 byte] - * INT3 [1 byte; CONFIG_SLS] - * - * Which gives possible sizes 4, 5, 8 or 9. When rounded up to the - * next power-of-two alignment they become 4, 8 or 16 resp. + * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] + * SETcc %al [3 bytes] + * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] + * INT3 [1 byte; CONFIG_SLS] */ -#define SETCC_LENGTH (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS)) -#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT) -static_assert(SETCC_LENGTH <= SETCC_ALIGN); +#define SETCC_ALIGN 16 #define FOP_SETCC(op) \ ".align " __stringify(SETCC_ALIGN) " \n\t" \ @@ -453,9 +453,10 @@ static_assert(SETCC_LENGTH <= SETCC_ALIGN); #op ": \n\t" \ ASM_ENDBR \ #op " %al \n\t" \ - __FOP_RET(#op) + __FOP_RET(#op) \ + ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t" -FOP_START(setcc) +__FOP_START(setcc, SETCC_ALIGN) FOP_SETCC(seto) FOP_SETCC(setno) FOP_SETCC(setc) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f1bdac3f5aa8..0e68b4c937fc 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2039,6 +2039,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) } } +static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic) +{ + struct kvm *kvm = apic->vcpu->kvm; + + if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm)) + return; + + if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id) + return; + + kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED); +} + static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) { int ret = 0; @@ -2047,10 +2060,12 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) switch (reg) { case APIC_ID: /* Local APIC ID */ - if (!apic_x2apic_mode(apic)) + if (!apic_x2apic_mode(apic)) { kvm_apic_set_xapic_id(apic, val >> 24); - else + kvm_lapic_xapic_id_updated(apic); + } else { ret = 1; + } break; case APIC_TASKPRI: @@ -2336,8 +2351,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) MSR_IA32_APICBASE_BASE; if ((value & MSR_IA32_APICBASE_ENABLE) && - apic->base_address != APIC_DEFAULT_PHYS_BASE) - pr_warn_once("APIC base relocation is unsupported by KVM"); + apic->base_address != APIC_DEFAULT_PHYS_BASE) { + kvm_set_apicv_inhibit(apic->vcpu->kvm, + APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); + } } void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) @@ -2648,6 +2665,8 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); } + } else { + kvm_lapic_xapic_id_updated(vcpu->arch.apic); } return 0; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e826ee9138fa..17252f39bd7c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3411,7 +3411,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT), i << 30, PT32_ROOT_LEVEL, true); mmu->pae_root[i] = root | PT_PRESENT_MASK | - shadow_me_mask; + shadow_me_value; } mmu->root.hpa = __pa(mmu->pae_root); } else { diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 54fe03714f8a..d1bc5820ea46 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -291,58 +291,91 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu) static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source, u32 icrl, u32 icrh, u32 index) { - u32 dest, apic_id; - struct kvm_vcpu *vcpu; + u32 l1_physical_id, dest; + struct kvm_vcpu *target_vcpu; int dest_mode = icrl & APIC_DEST_MASK; int shorthand = icrl & APIC_SHORT_MASK; struct kvm_svm *kvm_svm = to_kvm_svm(kvm); - u32 *avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page); if (shorthand != APIC_DEST_NOSHORT) return -EINVAL; - /* - * The AVIC incomplete IPI #vmexit info provides index into - * the physical APIC ID table, which can be used to derive - * guest physical APIC ID. - */ + if (apic_x2apic_mode(source)) + dest = icrh; + else + dest = GET_APIC_DEST_FIELD(icrh); + if (dest_mode == APIC_DEST_PHYSICAL) { - apic_id = index; + /* broadcast destination, use slow path */ + if (apic_x2apic_mode(source) && dest == X2APIC_BROADCAST) + return -EINVAL; + if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST) + return -EINVAL; + + l1_physical_id = dest; + + if (WARN_ON_ONCE(l1_physical_id != index)) + return -EINVAL; + } else { - if (!apic_x2apic_mode(source)) { - /* For xAPIC logical mode, the index is for logical APIC table. */ - apic_id = avic_logical_id_table[index] & 0x1ff; + u32 bitmap, cluster; + int logid_index; + + if (apic_x2apic_mode(source)) { + /* 16 bit dest mask, 16 bit cluster id */ + bitmap = dest & 0xFFFF0000; + cluster = (dest >> 16) << 4; + } else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) { + /* 8 bit dest mask*/ + bitmap = dest; + cluster = 0; } else { - return -EINVAL; + /* 4 bit desk mask, 4 bit cluster id */ + bitmap = dest & 0xF; + cluster = (dest >> 4) << 2; } - } - /* - * Assuming vcpu ID is the same as physical apic ID, - * and use it to retrieve the target vCPU. - */ - vcpu = kvm_get_vcpu_by_id(kvm, apic_id); - if (!vcpu) - return -EINVAL; + if (unlikely(!bitmap)) + /* guest bug: nobody to send the logical interrupt to */ + return 0; - if (apic_x2apic_mode(vcpu->arch.apic)) - dest = icrh; - else - dest = GET_APIC_DEST_FIELD(icrh); + if (!is_power_of_2(bitmap)) + /* multiple logical destinations, use slow path */ + return -EINVAL; - /* - * Try matching the destination APIC ID with the vCPU. - */ - if (kvm_apic_match_dest(vcpu, source, shorthand, dest, dest_mode)) { - vcpu->arch.apic->irr_pending = true; - svm_complete_interrupt_delivery(vcpu, - icrl & APIC_MODE_MASK, - icrl & APIC_INT_LEVELTRIG, - icrl & APIC_VECTOR_MASK); - return 0; + logid_index = cluster + __ffs(bitmap); + + if (apic_x2apic_mode(source)) { + l1_physical_id = logid_index; + } else { + u32 *avic_logical_id_table = + page_address(kvm_svm->avic_logical_id_table_page); + + u32 logid_entry = avic_logical_id_table[logid_index]; + + if (WARN_ON_ONCE(index != logid_index)) + return -EINVAL; + + /* guest bug: non existing/reserved logical destination */ + if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK))) + return 0; + + l1_physical_id = logid_entry & + AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; + } } - return -EINVAL; + target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id); + if (unlikely(!target_vcpu)) + /* guest bug: non existing vCPU is a target of this IPI*/ + return 0; + + target_vcpu->arch.apic->irr_pending = true; + svm_complete_interrupt_delivery(target_vcpu, + icrl & APIC_MODE_MASK, + icrl & APIC_INT_LEVELTRIG, + icrl & APIC_VECTOR_MASK); + return 0; } static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source, @@ -508,35 +541,6 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) return ret; } -static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) -{ - u64 *old, *new; - struct vcpu_svm *svm = to_svm(vcpu); - u32 id = kvm_xapic_id(vcpu->arch.apic); - - if (vcpu->vcpu_id == id) - return 0; - - old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id); - new = avic_get_physical_id_entry(vcpu, id); - if (!new || !old) - return 1; - - /* We need to move physical_id_entry to new offset */ - *new = *old; - *old = 0ULL; - to_svm(vcpu)->avic_physical_id_cache = new; - - /* - * Also update the guest physical APIC ID in the logical - * APIC ID table entry if already setup the LDR. - */ - if (svm->ldr_reg) - avic_handle_ldr_update(vcpu); - - return 0; -} - static void avic_handle_dfr_update(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -555,10 +559,6 @@ static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu) AVIC_UNACCEL_ACCESS_OFFSET_MASK; switch (offset) { - case APIC_ID: - if (avic_handle_apic_id_update(vcpu)) - return 0; - break; case APIC_LDR: if (avic_handle_ldr_update(vcpu)) return 0; @@ -650,8 +650,6 @@ int avic_init_vcpu(struct vcpu_svm *svm) void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu) { - if (avic_handle_apic_id_update(vcpu) != 0) - return; avic_handle_dfr_update(vcpu); avic_handle_ldr_update(vcpu); } @@ -910,7 +908,9 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) BIT(APICV_INHIBIT_REASON_PIT_REINJ) | BIT(APICV_INHIBIT_REASON_X2APIC) | BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | - BIT(APICV_INHIBIT_REASON_SEV); + BIT(APICV_INHIBIT_REASON_SEV) | + BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | + BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); return supported & BIT(reason); } @@ -946,7 +946,7 @@ out: return ret; } -void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; int h_physical_id = kvm_cpu_get_apicid(cpu); @@ -978,7 +978,7 @@ void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); } -void __avic_vcpu_put(struct kvm_vcpu *vcpu) +void avic_vcpu_put(struct kvm_vcpu *vcpu) { u64 entry; struct vcpu_svm *svm = to_svm(vcpu); @@ -997,25 +997,6 @@ void __avic_vcpu_put(struct kvm_vcpu *vcpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); } -static void avic_vcpu_load(struct kvm_vcpu *vcpu) -{ - int cpu = get_cpu(); - - WARN_ON(cpu != vcpu->cpu); - - __avic_vcpu_load(vcpu, cpu); - - put_cpu(); -} - -static void avic_vcpu_put(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - - __avic_vcpu_put(vcpu); - - preempt_enable(); -} void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { @@ -1042,7 +1023,7 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) vmcb_mark_dirty(vmcb, VMCB_AVIC); if (activated) - avic_vcpu_load(vcpu); + avic_vcpu_load(vcpu, vcpu->cpu); else avic_vcpu_put(vcpu); @@ -1075,5 +1056,5 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu) if (!kvm_vcpu_apicv_active(vcpu)) return; - avic_vcpu_load(vcpu); + avic_vcpu_load(vcpu, vcpu->cpu); } diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 3361258640a2..ba7cd26f438f 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -616,6 +616,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; struct vmcb *vmcb01 = svm->vmcb01.ptr; struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; + u32 pause_count12; + u32 pause_thresh12; /* * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2, @@ -671,27 +673,25 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) if (!nested_vmcb_needs_vls_intercept(svm)) vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; + pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0; + pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0; if (kvm_pause_in_guest(svm->vcpu.kvm)) { - /* use guest values since host doesn't use them */ - vmcb02->control.pause_filter_count = - svm->pause_filter_enabled ? - svm->nested.ctl.pause_filter_count : 0; + /* use guest values since host doesn't intercept PAUSE */ + vmcb02->control.pause_filter_count = pause_count12; + vmcb02->control.pause_filter_thresh = pause_thresh12; - vmcb02->control.pause_filter_thresh = - svm->pause_threshold_enabled ? - svm->nested.ctl.pause_filter_thresh : 0; - - } else if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) { - /* use host values when guest doesn't use them */ + } else { + /* start from host values otherwise */ vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count; vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh; - } else { - /* - * Intercept every PAUSE otherwise and - * ignore both host and guest values - */ - vmcb02->control.pause_filter_count = 0; - vmcb02->control.pause_filter_thresh = 0; + + /* ... but ensure filtering is disabled if so requested. */ + if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) { + if (!pause_count12) + vmcb02->control.pause_filter_count = 0; + if (!pause_thresh12) + vmcb02->control.pause_filter_thresh = 0; + } } nested_svm_transition_tlb_flush(vcpu); @@ -951,8 +951,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm) vmcb12->control.event_inj = svm->nested.ctl.event_inj; vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err; - if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02->control.pause_filter_count) + if (!kvm_pause_in_guest(vcpu->kvm)) { vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count; + vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); + + } nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 51fd985cf21d..0c240ed04f96 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -844,7 +844,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, /* If source buffer is not aligned then use an intermediate buffer */ if (!IS_ALIGNED((unsigned long)vaddr, 16)) { - src_tpage = alloc_page(GFP_KERNEL); + src_tpage = alloc_page(GFP_KERNEL_ACCOUNT); if (!src_tpage) return -ENOMEM; @@ -865,7 +865,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { int dst_offset; - dst_tpage = alloc_page(GFP_KERNEL); + dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT); if (!dst_tpage) { ret = -ENOMEM; goto e_free; @@ -1665,19 +1665,24 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) { struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info; struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info; + struct kvm_vcpu *dst_vcpu, *src_vcpu; + struct vcpu_svm *dst_svm, *src_svm; struct kvm_sev_info *mirror; + unsigned long i; dst->active = true; dst->asid = src->asid; dst->handle = src->handle; dst->pages_locked = src->pages_locked; dst->enc_context_owner = src->enc_context_owner; + dst->es_active = src->es_active; src->asid = 0; src->active = false; src->handle = 0; src->pages_locked = 0; src->enc_context_owner = NULL; + src->es_active = false; list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list); @@ -1704,26 +1709,21 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) list_del(&src->mirror_entry); list_add_tail(&dst->mirror_entry, &owner_sev_info->mirror_vms); } -} -static int sev_es_migrate_from(struct kvm *dst, struct kvm *src) -{ - unsigned long i; - struct kvm_vcpu *dst_vcpu, *src_vcpu; - struct vcpu_svm *dst_svm, *src_svm; + kvm_for_each_vcpu(i, dst_vcpu, dst_kvm) { + dst_svm = to_svm(dst_vcpu); - if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus)) - return -EINVAL; + sev_init_vmcb(dst_svm); - kvm_for_each_vcpu(i, src_vcpu, src) { - if (!src_vcpu->arch.guest_state_protected) - return -EINVAL; - } + if (!dst->es_active) + continue; - kvm_for_each_vcpu(i, src_vcpu, src) { + /* + * Note, the source is not required to have the same number of + * vCPUs as the destination when migrating a vanilla SEV VM. + */ + src_vcpu = kvm_get_vcpu(dst_kvm, i); src_svm = to_svm(src_vcpu); - dst_vcpu = kvm_get_vcpu(dst, i); - dst_svm = to_svm(dst_vcpu); /* * Transfer VMSA and GHCB state to the destination. Nullify and @@ -1740,8 +1740,23 @@ static int sev_es_migrate_from(struct kvm *dst, struct kvm *src) src_svm->vmcb->control.vmsa_pa = INVALID_PAGE; src_vcpu->arch.guest_state_protected = false; } - to_kvm_svm(src)->sev_info.es_active = false; - to_kvm_svm(dst)->sev_info.es_active = true; +} + +static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) +{ + struct kvm_vcpu *src_vcpu; + unsigned long i; + + if (!sev_es_guest(src)) + return 0; + + if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus)) + return -EINVAL; + + kvm_for_each_vcpu(i, src_vcpu, src) { + if (!src_vcpu->arch.guest_state_protected) + return -EINVAL; + } return 0; } @@ -1789,11 +1804,9 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) if (ret) goto out_dst_vcpu; - if (sev_es_guest(source_kvm)) { - ret = sev_es_migrate_from(kvm, source_kvm); - if (ret) - goto out_source_vcpu; - } + ret = sev_check_source_vcpus(kvm, source_kvm); + if (ret) + goto out_source_vcpu; sev_migrate_from(kvm, source_kvm); kvm_vm_dead(source_kvm); @@ -2914,7 +2927,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) count, in); } -void sev_es_init_vmcb(struct vcpu_svm *svm) +static void sev_es_init_vmcb(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; @@ -2967,6 +2980,15 @@ void sev_es_init_vmcb(struct vcpu_svm *svm) } } +void sev_init_vmcb(struct vcpu_svm *svm) +{ + svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; + clr_exception_intercept(svm, UD_VECTOR); + + if (sev_es_guest(svm->vcpu.kvm)) + sev_es_init_vmcb(svm); +} + void sev_es_vcpu_reset(struct vcpu_svm *svm) { /* diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1dc02cdf6960..44bbf25dfeb9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -921,7 +921,7 @@ static void grow_ple_window(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; int old = control->pause_filter_count; - if (kvm_pause_in_guest(vcpu->kvm) || !old) + if (kvm_pause_in_guest(vcpu->kvm)) return; control->pause_filter_count = __grow_ple_window(old, @@ -942,7 +942,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; int old = control->pause_filter_count; - if (kvm_pause_in_guest(vcpu->kvm) || !old) + if (kvm_pause_in_guest(vcpu->kvm)) return; control->pause_filter_count = @@ -1212,15 +1212,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK; } - if (sev_guest(vcpu->kvm)) { - svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; - clr_exception_intercept(svm, UD_VECTOR); - - if (sev_es_guest(vcpu->kvm)) { - /* Perform SEV-ES specific VMCB updates */ - sev_es_init_vmcb(svm); - } - } + if (sev_guest(vcpu->kvm)) + sev_init_vmcb(svm); svm_hv_init_vmcb(vmcb); init_vmcb_after_set_cpuid(vcpu); @@ -1400,13 +1393,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) indirect_branch_prediction_barrier(); } if (kvm_vcpu_apicv_active(vcpu)) - __avic_vcpu_load(vcpu, cpu); + avic_vcpu_load(vcpu, cpu); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) { if (kvm_vcpu_apicv_active(vcpu)) - __avic_vcpu_put(vcpu); + avic_vcpu_put(vcpu); svm_prepare_host_switch(vcpu); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 500348c1cb35..9223ac100ef5 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -610,8 +610,8 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb); int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu); int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu); int avic_init_vcpu(struct vcpu_svm *svm); -void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -void __avic_vcpu_put(struct kvm_vcpu *vcpu); +void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void avic_vcpu_put(struct kvm_vcpu *vcpu); void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu); void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu); void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu); @@ -649,10 +649,10 @@ void __init sev_set_cpu_caps(void); void __init sev_hardware_setup(void); void sev_hardware_unsetup(void); int sev_cpu_init(struct svm_cpu_data *sd); +void sev_init_vmcb(struct vcpu_svm *svm); void sev_free_vcpu(struct kvm_vcpu *vcpu); int sev_handle_vmgexit(struct kvm_vcpu *vcpu); int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); -void sev_es_init_vmcb(struct vcpu_svm *svm); void sev_es_vcpu_reset(struct vcpu_svm *svm); void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa); diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index dfaeb47fcf2a..723f8534986c 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -111,6 +111,15 @@ SYM_FUNC_START(__svm_vcpu_run) #endif /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be + * untrained as soon as we exit the VM and are back to the + * kernel. This should be done before re-enabling interrupts + * because interrupt handlers won't sanitize 'ret' if the return is + * from the kernel. + */ + UNTRAIN_RET + + /* * Clear all general purpose registers except RSP and RAX to prevent * speculative use of the guest's values, even those that are reloaded * via the stack. In theory, an L1 cache miss when restoring registers @@ -190,6 +199,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be + * untrained as soon as we exit the VM and are back to the + * kernel. This should be done before re-enabling interrupts + * because interrupt handlers won't sanitize RET if the return is + * from the kernel. + */ + UNTRAIN_RET + pop %_ASM_BX #ifdef CONFIG_X86_64 diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 3f430e218375..c0e24826a86f 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -4,8 +4,8 @@ #include <asm/vmx.h> -#include "lapic.h" -#include "x86.h" +#include "../lapic.h" +#include "../x86.h" extern bool __read_mostly enable_vpid; extern bool __read_mostly flexpriority_enabled; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f5cb18e00e78..ab135f9ef52f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2278,7 +2278,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_ENABLE_VMFUNC | - SECONDARY_EXEC_TSC_SCALING | SECONDARY_EXEC_DESC); if (nested_cpu_has(vmcs12, @@ -3087,7 +3086,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) } vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - vmx->loaded_vmcs->launched); + __vmx_vcpu_run_flags(vmx)); if (vmx->msr_autoload.host.nr) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h new file mode 100644 index 000000000000..edc3f16cc189 --- /dev/null +++ b/arch/x86/kvm/vmx/run_flags.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __KVM_X86_VMX_RUN_FLAGS_H +#define __KVM_X86_VMX_RUN_FLAGS_H + +#define VMX_RUN_VMRESUME (1 << 0) +#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) + +#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 435c187927c4..4182c7ffc909 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -1,10 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/linkage.h> #include <asm/asm.h> +#include <asm/asm-offsets.h> #include <asm/bitsperlong.h> #include <asm/kvm_vcpu_regs.h> #include <asm/nospec-branch.h> +#include <asm/percpu.h> #include <asm/segment.h> +#include "run_flags.h" #define WORD_SIZE (BITS_PER_LONG / 8) @@ -31,72 +34,11 @@ .section .noinstr.text, "ax" /** - * vmx_vmenter - VM-Enter the current loaded VMCS - * - * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME - * - * Returns: - * %RFLAGS.CF is set on VM-Fail Invalid - * %RFLAGS.ZF is set on VM-Fail Valid - * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit - * - * Note that VMRESUME/VMLAUNCH fall-through and return directly if - * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump - * to vmx_vmexit. - */ -SYM_FUNC_START_LOCAL(vmx_vmenter) - /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */ - je 2f - -1: vmresume - RET - -2: vmlaunch - RET - -3: cmpb $0, kvm_rebooting - je 4f - RET -4: ud2 - - _ASM_EXTABLE(1b, 3b) - _ASM_EXTABLE(2b, 3b) - -SYM_FUNC_END(vmx_vmenter) - -/** - * vmx_vmexit - Handle a VMX VM-Exit - * - * Returns: - * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit - * - * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump - * here after hardware loads the host's state, i.e. this is the destination - * referred to by VMCS.HOST_RIP. - */ -SYM_FUNC_START(vmx_vmexit) -#ifdef CONFIG_RETPOLINE - ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE - /* Preserve guest's RAX, it's used to stuff the RSB. */ - push %_ASM_AX - - /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE - - /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ - or $1, %_ASM_AX - - pop %_ASM_AX -.Lvmexit_skip_rsb: -#endif - RET -SYM_FUNC_END(vmx_vmexit) - -/** * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode - * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) + * @vmx: struct vcpu_vmx * * @regs: unsigned long * (to guest registers) - * @launched: %true if the VMCS has been launched + * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH + * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl * * Returns: * 0 on VM-Exit, 1 on VM-Fail @@ -115,24 +57,56 @@ SYM_FUNC_START(__vmx_vcpu_run) #endif push %_ASM_BX + /* Save @vmx for SPEC_CTRL handling */ + push %_ASM_ARG1 + + /* Save @flags for SPEC_CTRL handling */ + push %_ASM_ARG3 + /* * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and * @regs is needed after VM-Exit to save the guest's register values. */ push %_ASM_ARG2 - /* Copy @launched to BL, _ASM_ARG3 is volatile. */ + /* Copy @flags to BL, _ASM_ARG3 is volatile. */ mov %_ASM_ARG3B, %bl - /* Adjust RSP to account for the CALL to vmx_vmenter(). */ - lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 + lea (%_ASM_SP), %_ASM_ARG2 call vmx_update_host_rsp + ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL + + /* + * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the + * host's, write the MSR. + * + * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, + * there must not be any returns or indirect branches between this code + * and vmentry. + */ + mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI + movl VMX_spec_ctrl(%_ASM_DI), %edi + movl PER_CPU_VAR(x86_spec_ctrl_current), %esi + cmp %edi, %esi + je .Lspec_ctrl_done + mov $MSR_IA32_SPEC_CTRL, %ecx + xor %edx, %edx + mov %edi, %eax + wrmsr + +.Lspec_ctrl_done: + + /* + * Since vmentry is serializing on affected CPUs, there's no need for + * an LFENCE to stop speculation from skipping the wrmsr. + */ + /* Load @regs to RAX. */ mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - testb %bl, %bl + testb $VMX_RUN_VMRESUME, %bl /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX @@ -154,11 +128,37 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX - /* Enter guest mode */ - call vmx_vmenter + /* Check EFLAGS.ZF from 'testb' above */ + jz .Lvmlaunch + + /* + * After a successful VMRESUME/VMLAUNCH, control flow "magically" + * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting. + * So this isn't a typical function and objtool needs to be told to + * save the unwind state here and restore it below. + */ + UNWIND_HINT_SAVE + +/* + * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at + * the 'vmx_vmexit' label below. + */ +.Lvmresume: + vmresume + jmp .Lvmfail + +.Lvmlaunch: + vmlaunch + jmp .Lvmfail - /* Jump on VM-Fail. */ - jbe 2f + _ASM_EXTABLE(.Lvmresume, .Lfixup) + _ASM_EXTABLE(.Lvmlaunch, .Lfixup) + +SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) + + /* Restore unwind state from before the VMRESUME/VMLAUNCH. */ + UNWIND_HINT_RESTORE + ENDBR /* Temporarily save guest's RAX. */ push %_ASM_AX @@ -185,21 +185,23 @@ SYM_FUNC_START(__vmx_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif - /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ - xor %eax, %eax + /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ + xor %ebx, %ebx +.Lclear_regs: /* - * Clear all general purpose registers except RSP and RAX to prevent + * Clear all general purpose registers except RSP and RBX to prevent * speculative use of the guest's values, even those that are reloaded * via the stack. In theory, an L1 cache miss when restoring registers * could lead to speculative execution with the guest's values. * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially * free. RSP and RAX are exempt as RSP is restored by hardware during - * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. + * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return + * value. */ -1: xor %ecx, %ecx + xor %eax, %eax + xor %ecx, %ecx xor %edx, %edx - xor %ebx, %ebx xor %ebp, %ebp xor %esi, %esi xor %edi, %edi @@ -216,8 +218,30 @@ SYM_FUNC_START(__vmx_vcpu_run) /* "POP" @regs. */ add $WORD_SIZE, %_ASM_SP - pop %_ASM_BX + /* + * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before + * the first unbalanced RET after vmexit! + * + * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB + * entries and (in some cases) RSB underflow. + * + * eIBRS has its own protection against poisoned RSB, so it doesn't + * need the RSB filling sequence. But it does need to be enabled + * before the first unbalanced RET. + */ + + FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT + + pop %_ASM_ARG2 /* @flags */ + pop %_ASM_ARG1 /* @vmx */ + + call vmx_spec_ctrl_restore_host + + /* Put return value in AX */ + mov %_ASM_BX, %_ASM_AX + + pop %_ASM_BX #ifdef CONFIG_X86_64 pop %r12 pop %r13 @@ -230,9 +254,15 @@ SYM_FUNC_START(__vmx_vcpu_run) pop %_ASM_BP RET - /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ -2: mov $1, %eax - jmp 1b +.Lfixup: + cmpb $0, kvm_rebooting + jne .Lvmfail + ud2 +.Lvmfail: + /* VM-Fail: set return value to 1 */ + mov $1, %_ASM_BX + jmp .Lclear_regs + SYM_FUNC_END(__vmx_vcpu_run) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9bd86ecccdab..be7c19374fdd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -229,6 +229,9 @@ static const struct { #define L1D_CACHE_ORDER 4 static void *vmx_l1d_flush_pages; +/* Control for disabling CPU Fill buffer clear */ +static bool __read_mostly vmx_fb_clear_ctrl_available; + static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) { struct page *page; @@ -360,6 +363,60 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); } +static void vmx_setup_fb_clear_ctrl(void) +{ + u64 msr; + + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) && + !boot_cpu_has_bug(X86_BUG_MDS) && + !boot_cpu_has_bug(X86_BUG_TAA)) { + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); + if (msr & ARCH_CAP_FB_CLEAR_CTRL) + vmx_fb_clear_ctrl_available = true; + } +} + +static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) +{ + u64 msr; + + if (!vmx->disable_fb_clear) + return; + + msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL); + msr |= FB_CLEAR_DIS; + native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + /* Cache the MSR value to avoid reading it later */ + vmx->msr_ia32_mcu_opt_ctrl = msr; +} + +static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) +{ + if (!vmx->disable_fb_clear) + return; + + vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; + native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); +} + +static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) +{ + vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + + /* + * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS + * at VMEntry. Skip the MSR read/write when a guest has no use case to + * execute VERW. + */ + if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) || + ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO))) + vmx->disable_fb_clear = false; +} + static const struct kernel_param_ops vmentry_l1d_flush_ops = { .set = vmentry_l1d_flush_set, .get = vmentry_l1d_flush_get, @@ -782,6 +839,24 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) MSR_IA32_SPEC_CTRL); } +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) +{ + unsigned int flags = 0; + + if (vmx->loaded_vmcs->launched) + flags |= VMX_RUN_VMRESUME; + + /* + * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free + * to change it directly without causing a vmexit. In that case read + * it after vmexit and store it in vmx->spec_ctrl. + */ + if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) + flags |= VMX_RUN_SAVE_SPEC_CTRL; + + return flags; +} + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit) { @@ -2252,6 +2327,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ret = kvm_set_msr_common(vcpu, msr_info); } + /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */ + if (msr_index == MSR_IA32_ARCH_CAPABILITIES) + vmx_update_fb_clear_dis(vcpu, vmx); + return ret; } @@ -4553,6 +4632,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); vpid_sync_context(vmx->vpid); + + vmx_update_fb_clear_dis(vcpu, vmx); } static void vmx_enable_irq_window(struct kvm_vcpu *vcpu) @@ -6750,6 +6831,31 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) } } +void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, + unsigned int flags) +{ + u64 hostval = this_cpu_read(x86_spec_ctrl_current); + + if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) + return; + + if (flags & VMX_RUN_SAVE_SPEC_CTRL) + vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); + + /* + * If the guest/host SPEC_CTRL values differ, restore the host value. + * + * For legacy IBRS, the IBRS bit always needs to be written after + * transitioning from a less privileged predictor mode, regardless of + * whether the guest/host values differ. + */ + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || + vmx->spec_ctrl != hostval) + native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); + + barrier_nospec(); +} + static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { switch (to_vmx(vcpu)->exit_reason.basic) { @@ -6763,7 +6869,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) } static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, - struct vcpu_vmx *vmx) + struct vcpu_vmx *vmx, + unsigned long flags) { guest_state_enter_irqoff(); @@ -6772,15 +6879,22 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&mds_user_clear)) mds_clear_cpu_buffers(); + else if (static_branch_unlikely(&mmio_stale_data_clear) && + kvm_arch_has_assigned_device(vcpu->kvm)) + mds_clear_cpu_buffers(); + + vmx_disable_fb_clear(vmx); if (vcpu->arch.cr2 != native_read_cr2()) native_write_cr2(vcpu->arch.cr2); vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - vmx->loaded_vmcs->launched); + flags); vcpu->arch.cr2 = native_read_cr2(); + vmx_enable_fb_clear(vmx); + guest_state_exit_irqoff(); } @@ -6874,36 +6988,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) kvm_wait_lapic_expire(vcpu); - /* - * If this vCPU has touched SPEC_CTRL, restore the guest's value if - * it's non-zero. Since vmentry is serialising on affected CPUs, there - * is no need to worry about the conditional branch over the wrmsr - * being speculatively taken. - */ - x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); - /* The actual VMENTER/EXIT is in the .noinstr.text section. */ - vmx_vcpu_enter_exit(vcpu, vmx); - - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and - * turn it off. This is much more efficient than blindly adding - * it to the atomic save/restore list. Especially as the former - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. - * - * For non-nested case: - * If the L01 MSR bitmap does not intercept the MSR, then we need to - * save it. - * - * For nested case: - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ - if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - - x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); + vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); /* All fields are clean at this point */ if (static_branch_unlikely(&enable_evmcs)) { @@ -7709,7 +7795,9 @@ static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | BIT(APICV_INHIBIT_REASON_ABSENT) | BIT(APICV_INHIBIT_REASON_HYPERV) | - BIT(APICV_INHIBIT_REASON_BLOCKIRQ); + BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | + BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | + BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); return supported & BIT(reason); } @@ -8212,6 +8300,8 @@ static int __init vmx_init(void) return r; } + vmx_setup_fb_clear_ctrl(); + for_each_possible_cpu(cpu) { INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index b98c7e96697a..1e7f9453894b 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -8,11 +8,12 @@ #include <asm/intel_pt.h> #include "capabilities.h" -#include "kvm_cache_regs.h" +#include "../kvm_cache_regs.h" #include "posted_intr.h" #include "vmcs.h" #include "vmx_ops.h" -#include "cpuid.h" +#include "../cpuid.h" +#include "run_flags.h" #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 @@ -348,6 +349,8 @@ struct vcpu_vmx { u64 msr_ia32_feature_control_valid_bits; /* SGX Launch Control public key hash */ u64 msr_ia32_sgxlepubkeyhash[4]; + u64 msr_ia32_mcu_opt_ctrl; + bool disable_fb_clear; struct pt_desc pt_desc; struct lbr_desc lbr_desc; @@ -402,7 +405,10 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); +void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, + unsigned int flags); int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index 5e7f41225780..5cfc49ddb1b4 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -8,7 +8,7 @@ #include "evmcs.h" #include "vmcs.h" -#include "x86.h" +#include "../x86.h" asmlinkage void vmread_error(unsigned long field, bool fault); __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 03fbfbbec460..143e37298d8a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -298,7 +298,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, directed_yield_successful), STATS_DESC_COUNTER(VCPU, preemption_reported), STATS_DESC_COUNTER(VCPU, preemption_other), - STATS_DESC_ICOUNTER(VCPU, guest_mode) + STATS_DESC_IBOOLEAN(VCPU, guest_mode) }; const struct kvm_stats_header kvm_vcpu_stats_header = { @@ -1617,6 +1617,9 @@ static u64 kvm_get_arch_capabilities(void) */ } + /* Guests don't need to know "Fill buffer clear control" exists */ + data &= ~ARCH_CAP_FB_CLEAR_CTRL; + return data; } @@ -9140,15 +9143,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, */ static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid) { - struct kvm_lapic_irq lapic_irq; - - lapic_irq.shorthand = APIC_DEST_NOSHORT; - lapic_irq.dest_mode = APIC_DEST_PHYSICAL; - lapic_irq.level = 0; - lapic_irq.dest_id = apicid; - lapic_irq.msi_redir_hint = false; + /* + * All other fields are unused for APIC_DM_REMRD, but may be consumed by + * common code, e.g. for tracing. Defer initialization to the compiler. + */ + struct kvm_lapic_irq lapic_irq = { + .delivery_mode = APIC_DM_REMRD, + .dest_mode = APIC_DEST_PHYSICAL, + .shorthand = APIC_DEST_NOSHORT, + .dest_id = apicid, + }; - lapic_irq.delivery_mode = APIC_DM_REMRD; kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } @@ -9850,6 +9855,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) return; down_read(&vcpu->kvm->arch.apicv_update_lock); + preempt_disable(); activate = kvm_vcpu_apicv_activated(vcpu); @@ -9870,6 +9876,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_EVENT, vcpu); out: + preempt_enable(); up_read(&vcpu->kvm->arch.apicv_update_lock); } EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); @@ -12626,9 +12633,9 @@ void kvm_arch_end_assignment(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); -bool kvm_arch_has_assigned_device(struct kvm *kvm) +bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm) { - return atomic_read(&kvm->arch.assigned_device_count); + return arch_atomic_read(&kvm->arch.assigned_device_count); } EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index d83cba364e31..724bbf83eb5b 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -39,7 +39,7 @@ SYM_FUNC_START(__memmove) /* FSRM implies ERMS => no length checks, do the copy directly */ .Lmemmove_begin_forward: ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM - ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS + ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS /* * movsq instruction have many startup latency @@ -205,6 +205,11 @@ SYM_FUNC_START(__memmove) movb %r11b, (%rdi) 13: RET + +.Lmemmove_erms: + movq %rdx, %rcx + rep movsb + RET SYM_FUNC_END(__memmove) EXPORT_SYMBOL(__memmove) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index b2b2366885a2..073289a55f84 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -33,9 +33,9 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) UNWIND_HINT_EMPTY ANNOTATE_NOENDBR - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ - __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE + ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ + __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) .endm @@ -67,3 +67,76 @@ SYM_CODE_END(__x86_indirect_thunk_array) #define GEN(reg) EXPORT_THUNK(reg) #include <asm/GEN-for-each-reg.h> #undef GEN + +/* + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + */ +#ifdef CONFIG_RETHUNK + + .section .text.__x86.return_thunk + +/* + * Safety details here pertain to the AMD Zen{1,2} microarchitecture: + * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for + * alignment within the BTB. + * 2) The instruction at zen_untrain_ret must contain, and not + * end with, the 0xc3 byte of the RET. + * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread + * from re-poisioning the BTB prediction. + */ + .align 64 + .skip 63, 0xcc +SYM_FUNC_START_NOALIGN(zen_untrain_ret); + + /* + * As executed from zen_untrain_ret, this is: + * + * TEST $0xcc, %bl + * LFENCE + * JMP __x86_return_thunk + * + * Executing the TEST instruction has a side effect of evicting any BTB + * prediction (potentially attacker controlled) attached to the RET, as + * __x86_return_thunk + 1 isn't an instruction boundary at the moment. + */ + .byte 0xf6 + + /* + * As executed from __x86_return_thunk, this is a plain RET. + * + * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. + * + * We subsequently jump backwards and architecturally execute the RET. + * This creates a correct BTB prediction (type=ret), but in the + * meantime we suffer Straight Line Speculation (because the type was + * no branch) which is halted by the INT3. + * + * With SMT enabled and STIBP active, a sibling thread cannot poison + * RET's prediction to a type of its choice, but can evict the + * prediction due to competitive sharing. If the prediction is + * evicted, __x86_return_thunk will suffer Straight Line Speculation + * which will be contained safely by the INT3. + */ +SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) + ret + int3 +SYM_CODE_END(__x86_return_thunk) + + /* + * Ensure the TEST decoding / BTB invalidation is complete. + */ + lfence + + /* + * Jump back and execute the RET in the middle of the TEST instruction. + * INT3 is for SLS protection. + */ + jmp __x86_return_thunk + int3 +SYM_FUNC_END(zen_untrain_ret) +__EXPORT_THUNK(zen_untrain_ret) + +EXPORT_SYMBOL(__x86_return_thunk) + +#endif /* CONFIG_RETHUNK */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d8cfce221275..57ba5502aecf 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -77,10 +77,20 @@ static uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; -/* Check that the write-protect PAT entry is set for write-protect */ +/* + * Check that the write-protect PAT entry is set for write-protect. + * To do this without making assumptions how PAT has been set up (Xen has + * another layout than the kernel), translate the _PAGE_CACHE_MODE_WP cache + * mode via the __cachemode2pte_tbl[] into protection bits (those protection + * bits will select a cache mode of WP or better), and then translate the + * protection bits back into the cache mode using __pte2cm_idx() and the + * __pte2cachemode_tbl[] array. This will return the really used cache mode. + */ bool x86_has_pat_wp(void) { - return __pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] == _PAGE_CACHE_MODE_WP; + uint16_t prot = __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP]; + + return __pte2cachemode_tbl[__pte2cm_idx(prot)] == _PAGE_CACHE_MODE_WP; } enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 3d1dba05fce4..9de3d900bc92 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -65,7 +65,10 @@ SYM_FUNC_START(sme_encrypt_execute) movq %rbp, %rsp /* Restore original stack pointer */ pop %rbp - RET + /* Offset to __x86_return_thunk would be wrong here */ + ANNOTATE_UNRET_SAFE + ret + int3 SYM_FUNC_END(sme_encrypt_execute) SYM_FUNC_START(__enc_copy) @@ -151,6 +154,9 @@ SYM_FUNC_START(__enc_copy) pop %r12 pop %r15 - RET + /* Offset to __x86_return_thunk would be wrong here */ + ANNOTATE_UNRET_SAFE + ret + int3 .L__enc_copy_end: SYM_FUNC_END(__enc_copy) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index f298b18a9a3d..b808c9a80d1b 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -412,16 +412,30 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) { u8 *prog = *pprog; -#ifdef CONFIG_RETPOLINE if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { OPTIMIZER_HIDE_VAR(reg); emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); - } else -#endif - EMIT2(0xFF, 0xE0 + reg); + } else { + EMIT2(0xFF, 0xE0 + reg); + } + + *pprog = prog; +} + +static void emit_return(u8 **pprog, u8 *ip) +{ + u8 *prog = *pprog; + + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + emit_jump(&prog, &__x86_return_thunk, ip); + } else { + EMIT1(0xC3); /* ret */ + if (IS_ENABLED(CONFIG_SLS)) + EMIT1(0xCC); /* int3 */ + } *pprog = prog; } @@ -1420,8 +1434,9 @@ st: if (is_imm8(insn->off)) case BPF_JMP | BPF_CALL: func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { + /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ EMIT3_off32(0x48, 0x8B, 0x85, - -(bpf_prog->aux->stack_depth + 8)); + -round_up(bpf_prog->aux->stack_depth, 8) - 8); if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7)) return -EINVAL; } else { @@ -1685,7 +1700,7 @@ emit_jmp: ctx->cleanup_addr = proglen; pop_callee_regs(&prog, callee_regs_used); EMIT1(0xC9); /* leave */ - EMIT1(0xC3); /* ret */ + emit_return(&prog, image + addrs[i - 1] + (prog - temp)); break; default: @@ -2188,7 +2203,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_SKIP_FRAME) /* skip our return address and return to parent */ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ - EMIT1(0xC3); /* ret */ + emit_return(&prog, prog); /* Make sure the trampoline generation logic doesn't overflow */ if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { ret = -EFAULT; diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index a4f43054bc79..2f82480fd430 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -8,7 +8,6 @@ #include <linux/pci-acpi.h> #include <asm/numa.h> #include <asm/pci_x86.h> -#include <asm/e820/api.h> struct pci_root_info { struct acpi_pci_root_info common; @@ -20,7 +19,7 @@ struct pci_root_info { #endif }; -static bool pci_use_e820 = true; +bool pci_use_e820 = true; static bool pci_use_crs = true; static bool pci_ignore_seg; @@ -387,11 +386,6 @@ static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci) status = acpi_pci_probe_root_resources(ci); - if (pci_use_e820) { - resource_list_for_each_entry(entry, &ci->resources) - remove_e820_regions(&device->dev, entry->res); - } - if (pci_use_crs) { resource_list_for_each_entry_safe(entry, tmp, &ci->resources) if (resource_is_pcicfg_ioport(entry->res)) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 9ffe2bad27d5..4e5257a4811b 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -23,6 +23,7 @@ #include <linux/objtool.h> #include <asm/page_types.h> #include <asm/segment.h> +#include <asm/nospec-branch.h> .text .code64 @@ -75,7 +76,9 @@ STACK_FRAME_NON_STANDARD __efi64_thunk 1: movq 0x20(%rsp), %rsp pop %rbx pop %rbp - RET + ANNOTATE_UNRET_SAFE + ret + int3 .code32 2: pushl $__KERNEL_CS diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index e3297b15701c..70fb2ea85e90 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1183,15 +1183,19 @@ static void __init xen_domu_set_legacy_features(void) extern void early_xen_iret_patch(void); /* First C function to be called on Xen boot */ -asmlinkage __visible void __init xen_start_kernel(void) +asmlinkage __visible void __init xen_start_kernel(struct start_info *si) { struct physdev_set_iopl set_iopl; unsigned long initrd_start = 0; int rc; - if (!xen_start_info) + if (!si) return; + clear_bss(); + + xen_start_info = si; + __text_gen_insn(&early_xen_iret_patch, JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret, JMP32_INSN_SIZE); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 81aa46f770c5..cfa99e8f054b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -918,7 +918,7 @@ void xen_enable_sysenter(void) if (!boot_cpu_has(sysenter_feature)) return; - ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); + ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat); if(ret != 0) setup_clear_cpu_cap(sysenter_feature); } @@ -927,7 +927,7 @@ void xen_enable_syscall(void) { int ret; - ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); + ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64); if (ret != 0) { printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); /* Pretty fatal; 64-bit userspace has no other @@ -936,7 +936,7 @@ void xen_enable_syscall(void) if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { ret = register_callback(CALLBACKTYPE_syscall32, - xen_syscall32_target); + xen_entry_SYSCALL_compat); if (ret != 0) setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index caa9bc2fa100..6b4fdf6b9542 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -121,7 +121,7 @@ SYM_FUNC_END(xen_read_cr2_direct); .macro xen_pv_trap name SYM_CODE_START(xen_\name) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR pop %rcx pop %r11 @@ -234,8 +234,8 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) */ /* Normal 64-bit system call target */ -SYM_CODE_START(xen_syscall_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSCALL_64) + UNWIND_HINT_ENTRY ENDBR popq %rcx popq %r11 @@ -249,13 +249,13 @@ SYM_CODE_START(xen_syscall_target) movq $__USER_CS, 1*8(%rsp) jmp entry_SYSCALL_64_after_hwframe -SYM_CODE_END(xen_syscall_target) +SYM_CODE_END(xen_entry_SYSCALL_64) #ifdef CONFIG_IA32_EMULATION /* 32-bit compat syscall target */ -SYM_CODE_START(xen_syscall32_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSCALL_compat) + UNWIND_HINT_ENTRY ENDBR popq %rcx popq %r11 @@ -269,11 +269,11 @@ SYM_CODE_START(xen_syscall32_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSCALL_compat_after_hwframe -SYM_CODE_END(xen_syscall32_target) +SYM_CODE_END(xen_entry_SYSCALL_compat) /* 32-bit compat sysenter target */ -SYM_CODE_START(xen_sysenter_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_ENTRY ENDBR /* * NB: Xen is polite and clears TF from EFLAGS for us. This means @@ -291,19 +291,19 @@ SYM_CODE_START(xen_sysenter_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSENTER_compat_after_hwframe -SYM_CODE_END(xen_sysenter_target) +SYM_CODE_END(xen_entry_SYSENTER_compat) #else /* !CONFIG_IA32_EMULATION */ -SYM_CODE_START(xen_syscall32_target) -SYM_CODE_START(xen_sysenter_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSCALL_compat) +SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_ENTRY ENDBR lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $0 jmp hypercall_iret -SYM_CODE_END(xen_sysenter_target) -SYM_CODE_END(xen_syscall32_target) +SYM_CODE_END(xen_entry_SYSENTER_compat) +SYM_CODE_END(xen_entry_SYSCALL_compat) #endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 3a2cd93bf059..ffaa62167f6e 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -26,6 +26,7 @@ SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) UNWIND_HINT_FUNC ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE ret /* * Xen will write the hypercall page, and sort out ENDBR. @@ -48,15 +49,6 @@ SYM_CODE_START(startup_xen) ANNOTATE_NOENDBR cld - /* Clear .bss */ - xor %eax,%eax - mov $__bss_start, %rdi - mov $__bss_stop, %rcx - sub %rdi, %rcx - shr $3, %rcx - rep stosq - - mov %rsi, xen_start_info mov initial_stack(%rip), %rsp /* Set up %gs. @@ -71,6 +63,7 @@ SYM_CODE_START(startup_xen) cdq wrmsr + mov %rsi, %rdi call xen_start_kernel SYM_CODE_END(startup_xen) __FINIT diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index fd0fec6e92f4..9a8bb972193d 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -10,10 +10,10 @@ /* These are code, but not functions. Defined in entry.S */ extern const char xen_failsafe_callback[]; -void xen_sysenter_target(void); +void xen_entry_SYSENTER_compat(void); #ifdef CONFIG_X86_64 -void xen_syscall_target(void); -void xen_syscall32_target(void); +void xen_entry_SYSCALL_64(void); +void xen_entry_SYSCALL_compat(void); #endif extern void *xen_initial_gdt; diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index e3eae648ba2e..ab30bcb46290 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -2173,7 +2173,7 @@ ENDPROC(ret_from_kernel_thread) #ifdef CONFIG_HIBERNATION - .bss + .section .bss, "aw" .align 4 .Lsaved_regs: #if defined(__XTENSA_WINDOWED_ABI__) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index e8ceb1528608..16b8a6273772 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -154,6 +154,7 @@ static void __init calibrate_ccount(void) cpu = of_find_compatible_node(NULL, NULL, "cdns,xtensa-cpu"); if (cpu) { clk = of_clk_get(cpu, 0); + of_node_put(cpu); if (!IS_ERR(clk)) { ccount_freq = clk_get_rate(clk); return; diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 538e6748e85a..c79c1d09ea86 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -133,6 +133,7 @@ static int __init machine_setup(void) if ((eth = of_find_compatible_node(eth, NULL, "opencores,ethoc"))) update_local_mac(eth); + of_node_put(eth); return 0; } arch_initcall(machine_setup); |