From ccbfea78adf75d3d9e87aa739dab83254f5333fa Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 8 Jul 2024 23:18:57 +0200 Subject: Input: ads7846 - ratelimit the spi_sync error message In case the touch controller is not connected, this message keeps scrolling on the console indefinitelly. Ratelimit it to avoid filling kernel logs. " ads7846 spi2.1: spi_sync --> -22 " Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20240708211913.171243-1-marex@denx.de Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ads7846.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index 4247283c7271..f89c0dd15d8b 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -824,7 +824,7 @@ static void ads7846_read_state(struct ads7846 *ts) m = &ts->msg[msg_idx]; error = spi_sync(ts->spi, m); if (error) { - dev_err(&ts->spi->dev, "spi_sync --> %d\n", error); + dev_err_ratelimited(&ts->spi->dev, "spi_sync --> %d\n", error); packet->ignore = true; return; } -- cgit From da897484557b34a54fabb81f6c223c19a69e546d Mon Sep 17 00:00:00 2001 From: Jonathan Denose Date: Tue, 23 Jul 2024 21:33:30 -0700 Subject: Input: synaptics - enable SMBus for HP Elitebook 840 G2 The kernel reports that the touchpad for this device can support a different bus. With SMBus enabled the touchpad movement is smoother and three-finger gestures are recognized. Signed-off-by: Jonathan Denose Link: https://lore.kernel.org/r/20240719180612.1.Ib652dd808c274076f32cd7fc6c1160d2cf71753b@changeid Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 38191c3b31bf..380aa1614442 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -189,6 +189,7 @@ static const char * const smbus_pnp_ids[] = { "LEN2054", /* E480 */ "LEN2055", /* E580 */ "LEN2068", /* T14 Gen 1 */ + "SYN3015", /* HP EliteBook 840 G2 */ "SYN3052", /* HP EliteBook 840 G4 */ "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ -- cgit From e4ab5d7cb5f19858305395e034f214c92afc3cf5 Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Thu, 25 Jul 2024 11:23:33 -0500 Subject: soc: qcom: pd-mapper: Depend on ARCH_QCOM || COMPILE_TEST The pd-mapper driver doesn't make sense on non Qualcomm systems. Let's follow suit with the rest of the Qualcomm SoC Kconfigs and depend on ARCH_QCOM || COMPILE_TEST to avoid asking users about a config they will not use. Fixes: 1ebcde047c54 ("soc: qcom: add pd-mapper implementation") Signed-off-by: Andrew Halaney Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240725-pd-mapper-config-v1-1-f26e513608c6@redhat.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig index 7f02f0525933..74b9121240f8 100644 --- a/drivers/soc/qcom/Kconfig +++ b/drivers/soc/qcom/Kconfig @@ -77,7 +77,7 @@ config QCOM_PD_MAPPER select QCOM_QMI_HELPERS select QCOM_PDR_MSG select AUXILIARY_BUS - depends on NET && QRTR + depends on NET && QRTR && (ARCH_QCOM || COMPILE_TEST) default QCOM_RPROC_COMMON help The Protection Domain Mapper maps registered services to the domains -- cgit From 10f98bb9d98137b544b00abb4f9df45e9be7878d Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 15 Jul 2024 14:15:40 +0200 Subject: arm64: defconfig: Add CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 This is needed for the display panel to work on the Qualcomm sc7180-trogdor-homestar and x1e80100-crd. Signed-off-by: Stephan Gerhold Reviewed-by: Douglas Anderson Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20240715-x1e80100-crd-backlight-v2-4-31b7f2f658a3@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 7d32fca64996..362df9390263 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -887,6 +887,7 @@ CONFIG_DRM_PANEL_KHADAS_TS050=m CONFIG_DRM_PANEL_MANTIX_MLAF057WE51=m CONFIG_DRM_PANEL_NOVATEK_NT36672E=m CONFIG_DRM_PANEL_RAYDIUM_RM67191=m +CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m CONFIG_DRM_PANEL_SITRONIX_ST7703=m CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA=m CONFIG_DRM_PANEL_VISIONOX_VTDR6130=m -- cgit From f9bb896eab221618927ae6a2f1d566567999839d Mon Sep 17 00:00:00 2001 From: Volodymyr Babchuk Date: Thu, 18 Jul 2024 11:33:23 +0530 Subject: soc: qcom: cmd-db: Map shared memory as WC, not WB Linux does not write into cmd-db region. This region of memory is write protected by XPU. XPU may sometime falsely detect clean cache eviction as "write" into the write protected region leading to secure interrupt which causes an endless loop somewhere in Trust Zone. The only reason it is working right now is because Qualcomm Hypervisor maps the same region as Non-Cacheable memory in Stage 2 translation tables. The issue manifests if we want to use another hypervisor (like Xen or KVM), which does not know anything about those specific mappings. Changing the mapping of cmd-db memory from MEMREMAP_WB to MEMREMAP_WT/WC removes dependency on correct mappings in Stage 2 tables. This patch fixes the issue by updating the mapping to MEMREMAP_WC. I tested this on SA8155P with Xen. Fixes: 312416d9171a ("drivers: qcom: add command DB driver") Cc: stable@vger.kernel.org # 5.4+ Signed-off-by: Volodymyr Babchuk Tested-by: Nikita Travkin # sc7180 WoA in EL2 Signed-off-by: Maulik Shah Tested-by: Pavankumar Kondeti Reviewed-by: Caleb Connolly Link: https://lore.kernel.org/r/20240718-cmd_db_uncached-v2-1-f6cf53164c90@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/cmd-db.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/qcom/cmd-db.c b/drivers/soc/qcom/cmd-db.c index d84572662017..ae66c2623d25 100644 --- a/drivers/soc/qcom/cmd-db.c +++ b/drivers/soc/qcom/cmd-db.c @@ -349,7 +349,7 @@ static int cmd_db_dev_probe(struct platform_device *pdev) return -EINVAL; } - cmd_db_header = memremap(rmem->base, rmem->size, MEMREMAP_WB); + cmd_db_header = memremap(rmem->base, rmem->size, MEMREMAP_WC); if (!cmd_db_header) { ret = -ENOMEM; cmd_db_header = NULL; -- cgit From dbd6bd124e34f9f859271ed9ae2afc39f36c7e8c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 Jul 2024 12:12:31 +0200 Subject: soc: qcom: pd-mapper: mark qcom_pdm_domains as __maybe_unused The qcom_pdm_domains[] array is used only when passing it into of_match_node() but is not also referenced by MODULE_DEVICE_TABLE() or the platform driver as a table. When CONFIG_OF is disabled, this causes a harmless build warning: drivers/soc/qcom/qcom_pd_mapper.c:520:34: error: 'qcom_pdm_domains' defined but not used [-Werror=unused-const-variable=] Avoid this by marking the variable as __maybe_unused. This also makes it clear that anything referenced by it will be dropped by the compiler when it is unused. Fixes: 1ebcde047c54 ("soc: qcom: add pd-mapper implementation") Signed-off-by: Arnd Bergmann Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240719101238.199850-1-arnd@kernel.org Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/qcom_pd_mapper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/qcom/qcom_pd_mapper.c b/drivers/soc/qcom/qcom_pd_mapper.c index a4c007080665..9afa09c3920e 100644 --- a/drivers/soc/qcom/qcom_pd_mapper.c +++ b/drivers/soc/qcom/qcom_pd_mapper.c @@ -517,7 +517,7 @@ static const struct qcom_pdm_domain_data *sm8550_domains[] = { NULL, }; -static const struct of_device_id qcom_pdm_domains[] = { +static const struct of_device_id qcom_pdm_domains[] __maybe_unused = { { .compatible = "qcom,apq8064", .data = NULL, }, { .compatible = "qcom,apq8074", .data = NULL, }, { .compatible = "qcom,apq8084", .data = NULL, }, -- cgit From 8bc7cb73df8644423758c79d4504d501c8ef3854 Mon Sep 17 00:00:00 2001 From: Patrick Wildt Date: Mon, 15 Jul 2024 21:40:41 +0200 Subject: arm64: dts: qcom: x1e80100-yoga: add wifi calibration variant Describe the bus topology for PCIe domain 4 and add the ath12k calibration variant so that the board file (calibration data) can be loaded. Signed-off-by: Patrick Wildt Reviewed-by: Konrad Dybcio Reviewed-by: Manivannan Sadhasivam Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/ZpV7OeGNIGGpqNC0@windev.fritz.box Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts | 9 +++++++++ arch/arm64/boot/dts/qcom/x1e80100.dtsi | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts index fbff558f5b07..f569f0fbd1fc 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts @@ -635,6 +635,15 @@ status = "okay"; }; +&pcie4_port0 { + wifi@0 { + compatible = "pci17cb,1107"; + reg = <0x10000 0x0 0x0 0x0 0x0>; + + qcom,ath12k-calibration-variant = "LES790"; + }; +}; + &pcie6a { perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 7bca5fcd7d52..70eeacd4f9ad 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -3085,6 +3085,16 @@ phy-names = "pciephy"; status = "disabled"; + + pcie4_port0: pcie@0 { + device_type = "pci"; + reg = <0x0 0x0 0x0 0x0 0x0>; + bus-range = <0x01 0xff>; + + #address-cells = <3>; + #size-cells = <2>; + ranges; + }; }; pcie4_phy: phy@1c0e000 { -- cgit From 60a76f7826b88ebf7697a56fdcd9596b23c2b616 Mon Sep 17 00:00:00 2001 From: Varadarajan Narayanan Date: Tue, 23 Jul 2024 15:31:51 +0530 Subject: arm64: dts: qcom: ipq5332: Fix interrupt trigger type for usb Trigger type is incorrectly specified as IRQ_TYPE_EDGE_BOTH instead of IRQ_TYPE_LEVEL_HIGH. This trigger type is not supported for SPIs and results in probe failure with -EINVAL. Reviewed-by: Konrad Dybcio Fixes: 927173bf8a0e ("arm64: dts: qcom: Add missing interrupts for qcs404/ipq5332") Signed-off-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20240723100151.402300-3-quic_varada@quicinc.com Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/ipq5332.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/ipq5332.dtsi b/arch/arm64/boot/dts/qcom/ipq5332.dtsi index 573656587c0d..0a74ed4f72cc 100644 --- a/arch/arm64/boot/dts/qcom/ipq5332.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq5332.dtsi @@ -320,8 +320,8 @@ reg = <0x08af8800 0x400>; interrupts = , - , - ; + , + ; interrupt-names = "pwr_event", "dp_hs_phy_irq", "dm_hs_phy_irq"; -- cgit From 950aeefb34923fe3c28ade35fe05f24e2c5b1d55 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 17 Jul 2024 22:01:30 -0700 Subject: iommufd/device: Fix hwpt at err_unresv in iommufd_device_do_replace() The rewind routine should remove the reserved iovas added to the new hwpt. Fixes: 89db31635c87 ("iommufd: Derive iommufd_hwpt_paging from iommufd_hw_pagetable") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/r/20240718050130.1956804-1-nicolinc@nvidia.com Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 9a7ec5997c61..3214a4c17c6b 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -526,7 +526,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, err_unresv: if (hwpt_is_paging(hwpt)) iommufd_group_remove_reserved_iova(igroup, - to_hwpt_paging(old_hwpt)); + to_hwpt_paging(hwpt)); err_unlock: mutex_unlock(&idev->igroup->lock); return ERR_PTR(rc); -- cgit From 30f593fa0088b89f479f7358640687b3cbca93d4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:42:42 +0200 Subject: arm64: dts: qcom: x1e80100-crd: fix PCIe4 PHY supply The PCIe4 PHY is powered by vreg_l3i (not vreg_l3j). Fixes: d7e03cce0400 ("arm64: dts: qcom: x1e80100-crd: Enable more support") Cc: stable@vger.kernel.org # 6.9 Signed-off-by: Johan Hovold Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20240722094249.26471-2-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-crd.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts index 6152bcd0bc1f..dabc9362c72c 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts @@ -760,7 +760,7 @@ }; &pcie4_phy { - vdda-phy-supply = <&vreg_l3j_0p8>; + vdda-phy-supply = <&vreg_l3i_0p8>; vdda-pll-supply = <&vreg_l3e_1p2>; status = "okay"; -- cgit From f8fa1f2f6412bffa71972f9506b72992d0e6e485 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:42:43 +0200 Subject: arm64: dts: qcom: x1e80100: fix PCIe domain numbers The current PCIe domain numbers are off by one and do not match the numbers that the UEFI firmware (and Windows) uses. Fixes: 5eb83fc10289 ("arm64: dts: qcom: x1e80100: Add PCIe nodes") Cc: stable@vger.kernel.org # 6.9 Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20240722094249.26471-3-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 70eeacd4f9ad..626fb2565cf4 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -2901,7 +2901,7 @@ dma-coherent; - linux,pci-domain = <7>; + linux,pci-domain = <6>; num-lanes = <2>; interrupts = , @@ -3022,7 +3022,7 @@ dma-coherent; - linux,pci-domain = <5>; + linux,pci-domain = <4>; num-lanes = <2>; interrupts = , -- cgit From 98abf2fbd179017833c38edc9f3b587c69d07e2a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:42:44 +0200 Subject: arm64: dts: qcom: x1e80100: add missing PCIe minimum OPP Add the missing PCIe CX performance level votes to avoid relying on other drivers (e.g. USB) to maintain the nominal performance level required for Gen3 speeds. Fixes: 5eb83fc10289 ("arm64: dts: qcom: x1e80100: Add PCIe nodes") Cc: stable@vger.kernel.org # 6.9 Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240722094249.26471-4-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 626fb2565cf4..c13811a4ef90 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -2959,6 +2959,7 @@ "link_down"; power-domains = <&gcc GCC_PCIE_6A_GDSC>; + required-opps = <&rpmhpd_opp_nom>; phys = <&pcie6a_phy>; phy-names = "pciephy"; @@ -3080,6 +3081,7 @@ "link_down"; power-domains = <&gcc GCC_PCIE_4_GDSC>; + required-opps = <&rpmhpd_opp_nom>; phys = <&pcie4_phy>; phy-names = "pciephy"; -- cgit From 6e3902c499544291ac4fd1a1bb69f2e9037a0e86 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:42:45 +0200 Subject: arm64: dts: qcom: x1e80100-crd: fix up PCIe6a pinctrl node The PCIe6a pinctrl node appears to have been copied from the sc8280xp CRD dts, which has the NVMe on pcie2a and uses some funny indentation. Fix up the node name to match the x1e80100 use and label and use only tabs for indentation. Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20240722094249.26471-5-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-crd.dts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts index dabc9362c72c..85e32101a471 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts @@ -931,7 +931,7 @@ bias-disable; }; - pcie6a_default: pcie2a-default-state { + pcie6a_default: pcie6a-default-state { clkreq-n-pins { pins = "gpio153"; function = "pcie6a_clk"; @@ -947,11 +947,11 @@ }; wake-n-pins { - pins = "gpio154"; - function = "gpio"; - drive-strength = <2>; - bias-pull-up; - }; + pins = "gpio154"; + function = "gpio"; + drive-strength = <2>; + bias-pull-up; + }; }; tpad_default: tpad-default-state { -- cgit From 8a6e1dbf1362e78081e71b2690750e9556136f26 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:42:46 +0200 Subject: arm64: dts: qcom: x1e80100-crd: disable PCIe6a perst pull down Disable the PCIe6a perst pull-down resistor to save some power. Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20240722094249.26471-6-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-crd.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts index 85e32101a471..aeb279b1a0cc 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts @@ -943,7 +943,7 @@ pins = "gpio152"; function = "gpio"; drive-strength = <2>; - bias-pull-down; + bias-disable; }; wake-n-pins { -- cgit From 42b33ad188466292eaac9825544b8be8deddb3cb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:42:47 +0200 Subject: arm64: dts: qcom: x1e80100-crd: fix missing PCIe4 gpios Add the missing PCIe4 perst, wake and clkreq GPIOs and pin config. Fixes: d7e03cce0400 ("arm64: dts: qcom: x1e80100-crd: Enable more support") Cc: stable@vger.kernel.org # 6.9 Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20240722094249.26471-7-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-crd.dts | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts index aeb279b1a0cc..d65a22172006 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts @@ -756,6 +756,12 @@ }; &pcie4 { + perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; + + pinctrl-0 = <&pcie4_default>; + pinctrl-names = "default"; + status = "okay"; }; @@ -931,6 +937,29 @@ bias-disable; }; + pcie4_default: pcie4-default-state { + clkreq-n-pins { + pins = "gpio147"; + function = "pcie4_clk"; + drive-strength = <2>; + bias-pull-up; + }; + + perst-n-pins { + pins = "gpio146"; + function = "gpio"; + drive-strength = <2>; + bias-disable; + }; + + wake-n-pins { + pins = "gpio148"; + function = "gpio"; + drive-strength = <2>; + bias-pull-up; + }; + }; + pcie6a_default: pcie6a-default-state { clkreq-n-pins { pins = "gpio153"; -- cgit From f03dd49f884f428ba71efe23383ff842f4f15e0e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:54:48 +0200 Subject: arm64: dts: qcom: x1e80100-qcp: fix PCIe4 PHY supply The PCIe4 PHY is powered by vreg_l3i (not vreg_l3j) on the CRD so assume the same applies to the QCP. Fixes: f9a9c11471da ("arm64: dts: qcom: x1e80100-qcp: Enable more support") Cc: stable@vger.kernel.org # 6.9 Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20240722095459.27437-2-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-qcp.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts index 72a4f4138616..ebfcccbb55e8 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts @@ -664,7 +664,7 @@ }; &pcie4_phy { - vdda-phy-supply = <&vreg_l3j_0p8>; + vdda-phy-supply = <&vreg_l3i_0p8>; vdda-pll-supply = <&vreg_l3e_1p2>; status = "okay"; -- cgit From 0aab6eaac72ac140dfc5e0a38bf3178497762e43 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:54:49 +0200 Subject: arm64: dts: qcom: x1e80100-qcp: fix up PCIe6a pinctrl node The PCIe6a pinctrl node appears to have been copied from the sc8280xp CRD dts, which has the NVMe on pcie2a and uses some funny indentation. Fix up the node name to match the x1e80100 use and label and use only tabs for indentation. Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20240722095459.27437-3-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-qcp.dts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts index ebfcccbb55e8..b067d7841d72 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts @@ -804,7 +804,7 @@ bias-disable; }; - pcie6a_default: pcie2a-default-state { + pcie6a_default: pcie6a-default-state { clkreq-n-pins { pins = "gpio153"; function = "pcie6a_clk"; @@ -820,11 +820,11 @@ }; wake-n-pins { - pins = "gpio154"; - function = "gpio"; - drive-strength = <2>; - bias-pull-up; - }; + pins = "gpio154"; + function = "gpio"; + drive-strength = <2>; + bias-pull-up; + }; }; wcd_default: wcd-reset-n-active-state { -- cgit From 12661b333374c892f9053261b4bceb346a709ea4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:54:50 +0200 Subject: arm64: dts: qcom: x1e80100-qcp: disable PCIe6a perst pull down Disable the PCIe6a perst pull-down resistor to save some power. Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20240722095459.27437-4-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-qcp.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts index b067d7841d72..653673e423bf 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts @@ -816,7 +816,7 @@ pins = "gpio152"; function = "gpio"; drive-strength = <2>; - bias-pull-down; + bias-disable; }; wake-n-pins { -- cgit From 2ac90e4d2b6d6823ca10642ef39595ff1181c3fa Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:54:51 +0200 Subject: arm64: dts: qcom: x1e80100-qcp: fix missing PCIe4 gpios Add the missing PCIe4 perst, wake and clkreq GPIOs and pin config. Fixes: f9a9c11471da ("arm64: dts: qcom: x1e80100-qcp: Enable more support") Cc: stable@vger.kernel.org # 6.9 Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240722095459.27437-5-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-qcp.dts | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts index 653673e423bf..2dcf2a17511d 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts @@ -660,6 +660,12 @@ }; &pcie4 { + perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; + + pinctrl-0 = <&pcie4_default>; + pinctrl-names = "default"; + status = "okay"; }; @@ -804,6 +810,29 @@ bias-disable; }; + pcie4_default: pcie4-default-state { + clkreq-n-pins { + pins = "gpio147"; + function = "pcie4_clk"; + drive-strength = <2>; + bias-pull-up; + }; + + perst-n-pins { + pins = "gpio146"; + function = "gpio"; + drive-strength = <2>; + bias-disable; + }; + + wake-n-pins { + pins = "gpio148"; + function = "gpio"; + drive-strength = <2>; + bias-pull-up; + }; + }; + pcie6a_default: pcie6a-default-state { clkreq-n-pins { pins = "gpio153"; -- cgit From e89fe0596c62363082cabbaa5ccb38989e714e68 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:54:52 +0200 Subject: arm64: dts: qcom: x1e80100-vivobook-s15: fix PCIe4 PHY supply The PCIe4 PHY is powered by vreg_l3i (not vreg_l3j) on the CRD reference design so assume the same applies to the Asus Vivobook S15. Fixes: d0e2f8f62dff ("arm64: dts: qcom: Add device tree for ASUS Vivobook S 15") Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240722095459.27437-6-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts index 7fb980fcb307..f7337251349b 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts @@ -278,6 +278,13 @@ vdd-l3-supply = <&vreg_s1f_0p7>; vdd-s1-supply = <&vph_pwr>; vdd-s2-supply = <&vph_pwr>; + + vreg_l3i_0p8: ldo3 { + regulator-name = "vreg_l3i_0p8"; + regulator-min-microvolt = <880000>; + regulator-max-microvolt = <920000>; + regulator-initial-mode = ; + }; }; regulators-7 { @@ -427,7 +434,7 @@ }; &pcie4_phy { - vdda-phy-supply = <&vreg_l3j_0p8>; + vdda-phy-supply = <&vreg_l3i_0p8>; vdda-pll-supply = <&vreg_l3e_1p2>; status = "okay"; -- cgit From c67b3dfd8d69164f70ab3aaff889fca1e536c909 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:54:53 +0200 Subject: arm64: dts: qcom: x1e80100-vivobook-s15: fix up PCIe6a pinctrl node The PCIe6a pinctrl node appears to have been copied from the sc8280xp CRD dts (via the x1e80100 CRD dts), which has the NVMe on pcie2a. Fix up the node name to match the x1e80100 use and label. Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240722095459.27437-7-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts index f7337251349b..ff51dd98351c 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts @@ -524,7 +524,7 @@ bias-disable; }; - pcie6a_default: pcie2a-default-state { + pcie6a_default: pcie6a-default-state { clkreq-n-pins { pins = "gpio153"; function = "pcie6a_clk"; -- cgit From d7ff5d1868d1cfd1c06a601a7cfa2dbb6dba4be9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:54:54 +0200 Subject: arm64: dts: qcom: x1e80100-vivobook-s15: disable PCIe6a perst pull down Disable the PCIe6a perst pull-down resistor to save some power. Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240722095459.27437-8-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts index ff51dd98351c..1eb0abcbf650 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts @@ -536,7 +536,7 @@ pins = "gpio152"; function = "gpio"; drive-strength = <2>; - bias-pull-down; + bias-disable; }; wake-n-pins { -- cgit From e7f3f3cbbfef84729ad6c10eb589957e7b28b95a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:54:55 +0200 Subject: arm64: dts: qcom: x1e80100-vivobook-s15: fix missing PCIe4 gpios Add the missing PCIe4 perst, wake and clkreq GPIOs and pin config. Fixes: d0e2f8f62dff ("arm64: dts: qcom: Add device tree for ASUS Vivobook S 15") Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240722095459.27437-9-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- .../boot/dts/qcom/x1e80100-asus-vivobook-s15.dts | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts index 1eb0abcbf650..9caa14dda585 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts @@ -430,6 +430,12 @@ }; &pcie4 { + perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; + + pinctrl-0 = <&pcie4_default>; + pinctrl-names = "default"; + status = "okay"; }; @@ -524,6 +530,29 @@ bias-disable; }; + pcie4_default: pcie4-default-state { + clkreq-n-pins { + pins = "gpio147"; + function = "pcie4_clk"; + drive-strength = <2>; + bias-pull-up; + }; + + perst-n-pins { + pins = "gpio146"; + function = "gpio"; + drive-strength = <2>; + bias-disable; + }; + + wake-n-pins { + pins = "gpio148"; + function = "gpio"; + drive-strength = <2>; + bias-pull-up; + }; + }; + pcie6a_default: pcie6a-default-state { clkreq-n-pins { pins = "gpio153"; -- cgit From b90567c262fc3a3e703f3091499dec799a6147ab Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:54:56 +0200 Subject: arm64: dts: qcom: x1e80100-yoga-slim7x: fix PCIe4 PHY supply The PCIe4 PHY is powered by vreg_l3i (not vreg_l3j) on the CRD reference design so assume the same applies to the Lenovo Yoga Slim 7x. Fixes: 45247fe17db2 ("arm64: dts: qcom: x1e80100: add Lenovo Thinkpad Yoga slim 7x devicetree") Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240722095459.27437-10-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts index f569f0fbd1fc..6902548974d0 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts @@ -629,7 +629,7 @@ }; &pcie4_phy { - vdda-phy-supply = <&vreg_l3j_0p8>; + vdda-phy-supply = <&vreg_l3i_0p8>; vdda-pll-supply = <&vreg_l3e_1p2>; status = "okay"; -- cgit From a655dacf2a35a35eadd95f0ba8fe9cf70359eeb9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:54:57 +0200 Subject: arm64: dts: qcom: x1e80100-yoga-slim7x: fix up PCIe6a pinctrl node The PCIe6a pinctrl node appears to have been copied from the sc8280xp CRD dts (via the x1e80100 CRD dts), which has the NVMe on pcie2a and uses some funny indentation. Fix up the node name to match the x1e80100 use and label and use only tabs for indentation. Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240722095459.27437-11-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts index 6902548974d0..ad96bb8d5400 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts @@ -791,7 +791,7 @@ bias-disable; }; - pcie6a_default: pcie2a-default-state { + pcie6a_default: pcie6a-default-state { clkreq-n-pins { pins = "gpio153"; function = "pcie6a_clk"; @@ -807,11 +807,11 @@ }; wake-n-pins { - pins = "gpio154"; - function = "gpio"; - drive-strength = <2>; - bias-pull-up; - }; + pins = "gpio154"; + function = "gpio"; + drive-strength = <2>; + bias-pull-up; + }; }; tpad_default: tpad-default-state { -- cgit From 750b8a3b5a4476cf000f3db1fe46293c97fcd979 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:54:58 +0200 Subject: arm64: dts: qcom: x1e80100-yoga-slim7x: disable PCIe6a perst pull down Disable the PCIe6a perst pull-down resistor to save some power. Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240722095459.27437-12-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts index ad96bb8d5400..48a7b8eb9829 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts @@ -803,7 +803,7 @@ pins = "gpio152"; function = "gpio"; drive-strength = <2>; - bias-pull-down; + bias-disable; }; wake-n-pins { -- cgit From 86c71c0e893d58447e4a9e5c0d1c2c0f89c1b9e1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jul 2024 11:54:59 +0200 Subject: arm64: dts: qcom: x1e80100-yoga-slim7x: fix missing PCIe4 gpios Add the missing PCIe4 perst, wake and clkreq GPIOs and pin config. Fixes: 45247fe17db2 ("arm64: dts: qcom: x1e80100: add Lenovo Thinkpad Yoga slim 7x devicetree") Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240722095459.27437-13-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- .../boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts index 48a7b8eb9829..1943bdbfb8c0 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts @@ -625,6 +625,12 @@ }; &pcie4 { + perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; + + pinctrl-0 = <&pcie4_default>; + pinctrl-names = "default"; + status = "okay"; }; @@ -791,6 +797,29 @@ bias-disable; }; + pcie4_default: pcie4-default-state { + clkreq-n-pins { + pins = "gpio147"; + function = "pcie4_clk"; + drive-strength = <2>; + bias-pull-up; + }; + + perst-n-pins { + pins = "gpio146"; + function = "gpio"; + drive-strength = <2>; + bias-disable; + }; + + wake-n-pins { + pins = "gpio148"; + function = "gpio"; + drive-strength = <2>; + bias-pull-up; + }; + }; + pcie6a_default: pcie6a-default-state { clkreq-n-pins { pins = "gpio153"; -- cgit From a0e6fbf22439f796b51ea583a68eb763b0a99393 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 15 Jul 2024 14:15:39 +0200 Subject: arm64: dts: qcom: x1e80100-crd: Fix backlight The backlight does not work correctly with the current display panel configuration: It works after boot, but once the display gets disabled it is not possible to get it back on. It turns out that the ATNA45AF01 panel needs exactly the same non-standard power sequence as implemented by the panel-samsung-atna33xc20 driver for sc7180-trogdor-homestar. Switch the panel in the DT to the new compatible and make two more changes to make it work correctly: 1. Add the missing GPIO for the panel EL_ON3 line (EDP_BL_EN on CRD and enable-gpios in the DT). 2. Drop the regulator-always-on for the panel regulator. The panel does not seem to power off properly if the regulator stays on. Fixes: d7e03cce0400 ("arm64: dts: qcom: x1e80100-crd: Enable more support") Reviewed-by: Konrad Dybcio Signed-off-by: Stephan Gerhold Reviewed-by: Johan Hovold Tested-by: Johan Hovold Link: https://lore.kernel.org/r/20240715-x1e80100-crd-backlight-v2-3-31b7f2f658a3@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-crd.dts | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts index d65a22172006..82f34dfe4090 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts @@ -268,7 +268,6 @@ pinctrl-0 = <&edp_reg_en>; pinctrl-names = "default"; - regulator-always-on; regulator-boot-on; }; @@ -724,9 +723,13 @@ aux-bus { panel { - compatible = "edp-panel"; + compatible = "samsung,atna45af01", "samsung,atna33xc20"; + enable-gpios = <&pmc8380_3_gpios 4 GPIO_ACTIVE_HIGH>; power-supply = <&vreg_edp_3p3>; + pinctrl-0 = <&edp_bl_en>; + pinctrl-names = "default"; + port { edp_panel_in: endpoint { remote-endpoint = <&mdss_dp3_out>; @@ -791,6 +794,16 @@ status = "okay"; }; +&pmc8380_3_gpios { + edp_bl_en: edp-bl-en-state { + pins = "gpio4"; + function = "normal"; + power-source = <1>; /* 1.8V */ + input-disable; + output-enable; + }; +}; + &qupv3_0 { status = "okay"; }; -- cgit From 72c93f3e0dcdc05fceafcb32e79352a45716d181 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Fri, 26 Jul 2024 13:18:23 +0200 Subject: mailmap: Add an entry for Konrad Dybcio Map my old addresses. Signed-off-by: Konrad Dybcio Signed-off-by: Konrad Dybcio Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/20240726-topic-konrad_email-v1-1-f94665da2919@kernel.org Signed-off-by: Bjorn Andersson --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index e51d76df75c2..d189c6424697 100644 --- a/.mailmap +++ b/.mailmap @@ -353,6 +353,8 @@ Kenneth Westfield Kiran Gunda Kirill Tkhai Kishon Vijay Abraham I +Konrad Dybcio +Konrad Dybcio Konstantin Khlebnikov Konstantin Khlebnikov Koushik -- cgit From fce6a1eefb2a1db706fa17ca21e3e7107811d2e8 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Fri, 26 Jul 2024 13:18:24 +0200 Subject: MAINTAINERS: Update Konrad Dybcio's email address Use my @kernel.org address everywhere. Signed-off-by: Konrad Dybcio Signed-off-by: Konrad Dybcio Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/20240726-topic-konrad_email-v1-2-f94665da2919@kernel.org Signed-off-by: Bjorn Andersson --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 42decde38320..7b599269a821 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2745,7 +2745,7 @@ F: include/linux/soc/qcom/ ARM/QUALCOMM SUPPORT M: Bjorn Andersson -M: Konrad Dybcio +M: Konrad Dybcio L: linux-arm-msm@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git @@ -7106,7 +7106,7 @@ F: drivers/gpu/drm/tiny/panel-mipi-dbi.c DRM DRIVER for Qualcomm Adreno GPUs M: Rob Clark R: Sean Paul -R: Konrad Dybcio +R: Konrad Dybcio L: linux-arm-msm@vger.kernel.org L: dri-devel@lists.freedesktop.org L: freedreno@lists.freedesktop.org @@ -18771,7 +18771,7 @@ F: include/uapi/drm/qaic_accel.h QUALCOMM CORE POWER REDUCTION (CPR) AVS DRIVER M: Bjorn Andersson -M: Konrad Dybcio +M: Konrad Dybcio L: linux-pm@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained -- cgit From 684890a0185dabf5920c43b639133adc4c2632cf Mon Sep 17 00:00:00 2001 From: John Keeping Date: Wed, 31 Jul 2024 10:33:09 +0100 Subject: Input: adc-joystick - fix optional value handling The abs-fuzz and abs-flat properties are documented as optional. When these are absent, fwnode_property_read_u32() will leave the input unchanged, meaning that an axis either picks up the value for the previous axis or an uninitialized value. Explicitly set these values to zero when they are unspecified to match the documented behaviour in the device tree bindings. Signed-off-by: John Keeping Link: https://lore.kernel.org/r/20240731093310.3696919-1-jkeeping@inmusicbrands.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/adc-joystick.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/adc-joystick.c b/drivers/input/joystick/adc-joystick.c index 5f46a7104b52..de1fa4cf291b 100644 --- a/drivers/input/joystick/adc-joystick.c +++ b/drivers/input/joystick/adc-joystick.c @@ -182,8 +182,11 @@ static int adc_joystick_set_axes(struct device *dev, struct adc_joystick *joy) swap(range[0], range[1]); } - fwnode_property_read_u32(child, "abs-fuzz", &fuzz); - fwnode_property_read_u32(child, "abs-flat", &flat); + if (fwnode_property_read_u32(child, "abs-fuzz", &fuzz)) + fuzz = 0; + + if (fwnode_property_read_u32(child, "abs-flat", &flat)) + flat = 0; input_set_abs_params(joy->input, axes[i].code, range[0], range[1], fuzz, flat); -- cgit From 39a3396558fb97e6e7d4c1eb04c2166da31904a9 Mon Sep 17 00:00:00 2001 From: Drew Fustini Date: Tue, 30 Jul 2024 23:14:40 -0700 Subject: clk: thead: fix dependency on clk_ignore_unused Add the CLK_IGNORE_UNUSED flag to the vp-axi clock (CLK_VP_AXI) to avoid depending on clk_ignore_unused in the cmdline. Without this fix, the emmc-sdio clock (CLK_EMMC_SDIO) fails to work after vp-axi is disabled. Signed-off-by: Drew Fustini Link: https://lore.kernel.org/r/20240731061439.3807172-1-drew@pdp7.com Fixes: ae81b69fd2b1 ("clk: thead: Add support for T-Head TH1520 AP_SUBSYS clocks") Signed-off-by: Stephen Boyd --- drivers/clk/thead/clk-th1520-ap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/thead/clk-th1520-ap.c b/drivers/clk/thead/clk-th1520-ap.c index cbc176b27c09..17e32ae08720 100644 --- a/drivers/clk/thead/clk-th1520-ap.c +++ b/drivers/clk/thead/clk-th1520-ap.c @@ -738,7 +738,7 @@ static struct ccu_div vp_axi_clk = { .hw.init = CLK_HW_INIT_PARENTS_HW("vp-axi", video_pll_clk_parent, &ccu_div_ops, - 0), + CLK_IGNORE_UNUSED), }, }; -- cgit From 9374ae912dbb1eed8139ed75fd2c0f1b30ca454d Mon Sep 17 00:00:00 2001 From: Mengqi Zhang Date: Tue, 16 Jul 2024 09:37:04 +0800 Subject: mmc: mtk-sd: receive cmd8 data when hs400 tuning fail When we use cmd8 as the tuning command in hs400 mode, the command response sent back by some eMMC devices cannot be correctly sampled by MTK eMMC controller at some weak sample timing. In this case, command timeout error may occur. So we must receive the following data to make sure the next cmd8 send correctly. Signed-off-by: Mengqi Zhang Fixes: c4ac38c6539b ("mmc: mtk-sd: Add HS400 online tuning support") Cc: stable@vger.stable.com Link: https://lore.kernel.org/r/20240716013704.10578-1-mengqi.zhang@mediatek.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index a94835b8ab93..e386f78e3267 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -1230,7 +1230,7 @@ static bool msdc_cmd_done(struct msdc_host *host, int events, } if (!sbc_error && !(events & MSDC_INT_CMDRDY)) { - if (events & MSDC_INT_CMDTMO || + if ((events & MSDC_INT_CMDTMO && !host->hs400_tuning) || (!mmc_op_tuning(cmd->opcode) && !host->hs400_tuning)) /* * should not clear fifo/interrupt as the tune data @@ -1323,9 +1323,9 @@ static void msdc_start_command(struct msdc_host *host, static void msdc_cmd_next(struct msdc_host *host, struct mmc_request *mrq, struct mmc_command *cmd) { - if ((cmd->error && - !(cmd->error == -EILSEQ && - (mmc_op_tuning(cmd->opcode) || host->hs400_tuning))) || + if ((cmd->error && !host->hs400_tuning && + !(cmd->error == -EILSEQ && + mmc_op_tuning(cmd->opcode))) || (mrq->sbc && mrq->sbc->error)) msdc_request_done(host, mrq); else if (cmd == mrq->sbc) -- cgit From a6e9c391d45b5865b61e569146304cff72821a5d Mon Sep 17 00:00:00 2001 From: Camila Alvarez Date: Tue, 30 Jul 2024 19:42:43 -0400 Subject: HID: cougar: fix slab-out-of-bounds Read in cougar_report_fixup report_fixup for the Cougar 500k Gaming Keyboard was not verifying that the report descriptor size was correct before accessing it Reported-by: syzbot+24c0361074799d02c452@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=24c0361074799d02c452 Signed-off-by: Camila Alvarez Reviewed-by: Silvan Jegen Signed-off-by: Jiri Kosina --- drivers/hid/hid-cougar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-cougar.c b/drivers/hid/hid-cougar.c index cb8bd8aae15b..0fa785f52707 100644 --- a/drivers/hid/hid-cougar.c +++ b/drivers/hid/hid-cougar.c @@ -106,7 +106,7 @@ static void cougar_fix_g6_mapping(void) static __u8 *cougar_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - if (rdesc[2] == 0x09 && rdesc[3] == 0x02 && + if (*rsize >= 117 && rdesc[2] == 0x09 && rdesc[3] == 0x02 && (rdesc[115] | rdesc[116] << 8) >= HID_MAX_USAGES) { hid_info(hdev, "usage count exceeds max: fixing up report descriptor\n"); -- cgit From ab3de2c7ec91db6a3cf5fc07765852c81ca7d6ef Mon Sep 17 00:00:00 2001 From: Aapo Vienamo Date: Thu, 20 Jun 2024 13:43:03 +0300 Subject: thunderbolt: Fix memory leaks in {port|retimer}_sb_regs_write() Add missing free_page() call for the memory allocated by validate_and_copy_from_user(). Fixes: 6d241fa00159 ("thunderbolt: Add sideband register access to debugfs") Signed-off-by: Aapo Vienamo Reviewed-by: Przemek Kitszel Signed-off-by: Mika Westerberg --- drivers/thunderbolt/debugfs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/thunderbolt/debugfs.c b/drivers/thunderbolt/debugfs.c index 11185cc1db92..9ed4bb2e8d05 100644 --- a/drivers/thunderbolt/debugfs.c +++ b/drivers/thunderbolt/debugfs.c @@ -323,16 +323,17 @@ static ssize_t port_sb_regs_write(struct file *file, const char __user *user_buf if (mutex_lock_interruptible(&tb->lock)) { ret = -ERESTARTSYS; - goto out_rpm_put; + goto out; } ret = sb_regs_write(port, port_sb_regs, ARRAY_SIZE(port_sb_regs), USB4_SB_TARGET_ROUTER, 0, buf, count, ppos); mutex_unlock(&tb->lock); -out_rpm_put: +out: pm_runtime_mark_last_busy(&sw->dev); pm_runtime_put_autosuspend(&sw->dev); + free_page((unsigned long)buf); return ret < 0 ? ret : count; } @@ -355,16 +356,17 @@ static ssize_t retimer_sb_regs_write(struct file *file, if (mutex_lock_interruptible(&tb->lock)) { ret = -ERESTARTSYS; - goto out_rpm_put; + goto out; } ret = sb_regs_write(rt->port, retimer_sb_regs, ARRAY_SIZE(retimer_sb_regs), USB4_SB_TARGET_RETIMER, rt->index, buf, count, ppos); mutex_unlock(&tb->lock); -out_rpm_put: +out: pm_runtime_mark_last_busy(&rt->dev); pm_runtime_put_autosuspend(&rt->dev); + free_page((unsigned long)buf); return ret < 0 ? ret : count; } -- cgit From d1aa95e86f178dc597e80228cd9bd81fc3510f34 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Thu, 25 Jul 2024 10:31:25 +1200 Subject: hid-asus: add ROG Ally X prod ID to quirk list The new ASUS ROG Ally X functions almost exactly the same as the previous model, so we can use the same quirks. Signed-off-by: Luke D. Jones Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 3 +++ drivers/hid/hid-ids.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 37e6d25593c2..a282388b7aa5 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1248,6 +1248,9 @@ static const struct hid_device_id asus_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY), QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X), + QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD), QUIRK_ROG_CLAYMORE_II_KEYBOARD }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 72d56ee7ce1b..6e3223389080 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -210,6 +210,7 @@ #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD3 0x1a30 #define USB_DEVICE_ID_ASUSTEK_ROG_Z13_LIGHTBAR 0x18c6 #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY 0x1abe +#define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X 0x1b4c #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD 0x196b #define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD 0x1869 -- cgit From 97155021ae17b86985121b33cf8098bcde00d497 Mon Sep 17 00:00:00 2001 From: Olivier Sobrie Date: Tue, 23 Jul 2024 10:44:35 +0200 Subject: HID: amd_sfh: free driver_data after destroying hid device HID driver callbacks aren't called anymore once hid_destroy_device() has been called. Hence, hid driver_data should be freed only after the hid_destroy_device() function returned as driver_data is used in several callbacks. I observed a crash with kernel 6.10.0 on my T14s Gen 3, after enabling KASAN to debug memory allocation, I got this output: [ 13.050438] ================================================================== [ 13.054060] BUG: KASAN: slab-use-after-free in amd_sfh_get_report+0x3ec/0x530 [amd_sfh] [ 13.054809] psmouse serio1: trackpoint: Synaptics TrackPoint firmware: 0x02, buttons: 3/3 [ 13.056432] Read of size 8 at addr ffff88813152f408 by task (udev-worker)/479 [ 13.060970] CPU: 5 PID: 479 Comm: (udev-worker) Not tainted 6.10.0-arch1-2 #1 893bb55d7f0073f25c46adbb49eb3785fefd74b0 [ 13.063978] Hardware name: LENOVO 21CQCTO1WW/21CQCTO1WW, BIOS R22ET70W (1.40 ) 03/21/2024 [ 13.067860] Call Trace: [ 13.069383] input: TPPS/2 Synaptics TrackPoint as /devices/platform/i8042/serio1/input/input8 [ 13.071486] [ 13.071492] dump_stack_lvl+0x5d/0x80 [ 13.074870] snd_hda_intel 0000:33:00.6: enabling device (0000 -> 0002) [ 13.078296] ? amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.082199] print_report+0x174/0x505 [ 13.085776] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 13.089367] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.093255] ? amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.097464] kasan_report+0xc8/0x150 [ 13.101461] ? amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.105802] amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.110303] amdtp_hid_request+0xb8/0x110 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.114879] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.119450] sensor_hub_get_feature+0x1d3/0x540 [hid_sensor_hub 3f13be3016ff415bea03008d45d99da837ee3082] [ 13.124097] hid_sensor_parse_common_attributes+0x4d0/0xad0 [hid_sensor_iio_common c3a5cbe93969c28b122609768bbe23efe52eb8f5] [ 13.127404] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.131925] ? __pfx_hid_sensor_parse_common_attributes+0x10/0x10 [hid_sensor_iio_common c3a5cbe93969c28b122609768bbe23efe52eb8f5] [ 13.136455] ? _raw_spin_lock_irqsave+0x96/0xf0 [ 13.140197] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 13.143602] ? devm_iio_device_alloc+0x34/0x50 [industrialio 3d261d5e5765625d2b052be40e526d62b1d2123b] [ 13.147234] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.150446] ? __devm_add_action+0x167/0x1d0 [ 13.155061] hid_gyro_3d_probe+0x120/0x7f0 [hid_sensor_gyro_3d 63da36a143b775846ab2dbb86c343b401b5e3172] [ 13.158581] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.161814] platform_probe+0xa2/0x150 [ 13.165029] really_probe+0x1e3/0x8a0 [ 13.168243] __driver_probe_device+0x18c/0x370 [ 13.171500] driver_probe_device+0x4a/0x120 [ 13.175000] __driver_attach+0x190/0x4a0 [ 13.178521] ? __pfx___driver_attach+0x10/0x10 [ 13.181771] bus_for_each_dev+0x106/0x180 [ 13.185033] ? __pfx__raw_spin_lock+0x10/0x10 [ 13.188229] ? __pfx_bus_for_each_dev+0x10/0x10 [ 13.191446] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.194382] bus_add_driver+0x29e/0x4d0 [ 13.197328] driver_register+0x1a5/0x360 [ 13.200283] ? __pfx_hid_gyro_3d_platform_driver_init+0x10/0x10 [hid_sensor_gyro_3d 63da36a143b775846ab2dbb86c343b401b5e3172] [ 13.203362] do_one_initcall+0xa7/0x380 [ 13.206432] ? __pfx_do_one_initcall+0x10/0x10 [ 13.210175] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.213211] ? kasan_unpoison+0x44/0x70 [ 13.216688] do_init_module+0x238/0x750 [ 13.219696] load_module+0x5011/0x6af0 [ 13.223096] ? kasan_save_stack+0x30/0x50 [ 13.226743] ? kasan_save_track+0x14/0x30 [ 13.230080] ? kasan_save_free_info+0x3b/0x60 [ 13.233323] ? poison_slab_object+0x109/0x180 [ 13.236778] ? __pfx_load_module+0x10/0x10 [ 13.239703] ? poison_slab_object+0x109/0x180 [ 13.243070] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.245924] ? init_module_from_file+0x13d/0x150 [ 13.248745] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.251503] ? init_module_from_file+0xdf/0x150 [ 13.254198] init_module_from_file+0xdf/0x150 [ 13.256826] ? __pfx_init_module_from_file+0x10/0x10 [ 13.259428] ? kasan_save_track+0x14/0x30 [ 13.261959] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.264471] ? kasan_save_free_info+0x3b/0x60 [ 13.267026] ? poison_slab_object+0x109/0x180 [ 13.269494] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.271949] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.274324] ? _raw_spin_lock+0x85/0xe0 [ 13.276671] ? __pfx__raw_spin_lock+0x10/0x10 [ 13.278963] ? __rseq_handle_notify_resume+0x1a6/0xad0 [ 13.281193] idempotent_init_module+0x23b/0x650 [ 13.283420] ? __pfx_idempotent_init_module+0x10/0x10 [ 13.285619] ? __pfx___seccomp_filter+0x10/0x10 [ 13.287714] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.289828] ? __fget_light+0x57/0x420 [ 13.291870] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.293880] ? security_capable+0x74/0xb0 [ 13.295820] __x64_sys_finit_module+0xbe/0x130 [ 13.297874] do_syscall_64+0x82/0x190 [ 13.299898] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.301905] ? irqtime_account_irq+0x3d/0x1f0 [ 13.303877] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.305753] ? __irq_exit_rcu+0x4e/0x130 [ 13.307577] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.309489] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 13.311371] RIP: 0033:0x7a21f96ade9d [ 13.313234] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 63 de 0c 00 f7 d8 64 89 01 48 [ 13.317051] RSP: 002b:00007ffeae934e78 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 13.319024] RAX: ffffffffffffffda RBX: 00005987276bfcf0 RCX: 00007a21f96ade9d [ 13.321100] RDX: 0000000000000004 RSI: 00007a21f8eda376 RDI: 000000000000001c [ 13.323314] RBP: 00007a21f8eda376 R08: 0000000000000001 R09: 00007ffeae934ec0 [ 13.325505] R10: 0000000000000050 R11: 0000000000000246 R12: 0000000000020000 [ 13.327637] R13: 00005987276c1250 R14: 0000000000000000 R15: 00005987276c4530 [ 13.329737] [ 13.333945] Allocated by task 139: [ 13.336111] kasan_save_stack+0x30/0x50 [ 13.336121] kasan_save_track+0x14/0x30 [ 13.336125] __kasan_kmalloc+0xaa/0xb0 [ 13.336129] amdtp_hid_probe+0xb1/0x440 [amd_sfh] [ 13.336138] amd_sfh_hid_client_init+0xb8a/0x10f0 [amd_sfh] [ 13.336144] sfh_init_work+0x47/0x120 [amd_sfh] [ 13.336150] process_one_work+0x673/0xeb0 [ 13.336155] worker_thread+0x795/0x1250 [ 13.336160] kthread+0x290/0x350 [ 13.336164] ret_from_fork+0x34/0x70 [ 13.336169] ret_from_fork_asm+0x1a/0x30 [ 13.338175] Freed by task 139: [ 13.340064] kasan_save_stack+0x30/0x50 [ 13.340072] kasan_save_track+0x14/0x30 [ 13.340076] kasan_save_free_info+0x3b/0x60 [ 13.340081] poison_slab_object+0x109/0x180 [ 13.340085] __kasan_slab_free+0x32/0x50 [ 13.340089] kfree+0xe5/0x310 [ 13.340094] amdtp_hid_remove+0xb2/0x160 [amd_sfh] [ 13.340102] amd_sfh_hid_client_deinit+0x324/0x640 [amd_sfh] [ 13.340107] amd_sfh_hid_client_init+0x94a/0x10f0 [amd_sfh] [ 13.340113] sfh_init_work+0x47/0x120 [amd_sfh] [ 13.340118] process_one_work+0x673/0xeb0 [ 13.340123] worker_thread+0x795/0x1250 [ 13.340127] kthread+0x290/0x350 [ 13.340132] ret_from_fork+0x34/0x70 [ 13.340136] ret_from_fork_asm+0x1a/0x30 [ 13.342482] The buggy address belongs to the object at ffff88813152f400 which belongs to the cache kmalloc-64 of size 64 [ 13.347357] The buggy address is located 8 bytes inside of freed 64-byte region [ffff88813152f400, ffff88813152f440) [ 13.347367] The buggy address belongs to the physical page: [ 13.355409] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x13152f [ 13.355416] anon flags: 0x2ffff8000000000(node=0|zone=2|lastcpupid=0x1ffff) [ 13.355423] page_type: 0xffffefff(slab) [ 13.355429] raw: 02ffff8000000000 ffff8881000428c0 ffffea0004c43a00 0000000000000005 [ 13.355435] raw: 0000000000000000 0000000000200020 00000001ffffefff 0000000000000000 [ 13.355439] page dumped because: kasan: bad access detected [ 13.357295] Memory state around the buggy address: [ 13.357299] ffff88813152f300: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 13.357303] ffff88813152f380: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 13.357306] >ffff88813152f400: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 13.357309] ^ [ 13.357311] ffff88813152f480: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc [ 13.357315] ffff88813152f500: 00 00 00 00 00 00 00 06 fc fc fc fc fc fc fc fc [ 13.357318] ================================================================== [ 13.357405] Disabling lock debugging due to kernel taint [ 13.383534] Oops: general protection fault, probably for non-canonical address 0xe0a1bc4140000013: 0000 [#1] PREEMPT SMP KASAN NOPTI [ 13.383544] KASAN: maybe wild-memory-access in range [0x050e020a00000098-0x050e020a0000009f] [ 13.383551] CPU: 3 PID: 479 Comm: (udev-worker) Tainted: G B 6.10.0-arch1-2 #1 893bb55d7f0073f25c46adbb49eb3785fefd74b0 [ 13.383561] Hardware name: LENOVO 21CQCTO1WW/21CQCTO1WW, BIOS R22ET70W (1.40 ) 03/21/2024 [ 13.383565] RIP: 0010:amd_sfh_get_report+0x81/0x530 [amd_sfh] [ 13.383580] Code: 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 78 03 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8b 63 08 49 8d 7c 24 10 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e 1a 03 00 00 45 8b 74 24 10 45 [ 13.383585] RSP: 0018:ffff8881261f7388 EFLAGS: 00010212 [ 13.383592] RAX: dffffc0000000000 RBX: ffff88813152f400 RCX: 0000000000000002 [ 13.383597] RDX: 00a1c04140000013 RSI: 0000000000000008 RDI: 050e020a0000009b [ 13.383600] RBP: ffff88814d010000 R08: 0000000000000002 R09: fffffbfff3ddb8c0 [ 13.383604] R10: ffffffff9eedc607 R11: ffff88810ce98000 R12: 050e020a0000008b [ 13.383607] R13: ffff88814d010000 R14: dffffc0000000000 R15: 0000000000000004 [ 13.383611] FS: 00007a21f94d0880(0000) GS:ffff8887e7d80000(0000) knlGS:0000000000000000 [ 13.383615] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 13.383618] CR2: 00007e0014c438f0 CR3: 000000012614c000 CR4: 0000000000f50ef0 [ 13.383622] PKRU: 55555554 [ 13.383625] Call Trace: [ 13.383629] [ 13.383632] ? __die_body.cold+0x19/0x27 [ 13.383644] ? die_addr+0x46/0x70 [ 13.383652] ? exc_general_protection+0x150/0x240 [ 13.383664] ? asm_exc_general_protection+0x26/0x30 [ 13.383674] ? amd_sfh_get_report+0x81/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.383686] ? amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.383697] amdtp_hid_request+0xb8/0x110 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.383706] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383713] sensor_hub_get_feature+0x1d3/0x540 [hid_sensor_hub 3f13be3016ff415bea03008d45d99da837ee3082] [ 13.383727] hid_sensor_parse_common_attributes+0x4d0/0xad0 [hid_sensor_iio_common c3a5cbe93969c28b122609768bbe23efe52eb8f5] [ 13.383739] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383745] ? __pfx_hid_sensor_parse_common_attributes+0x10/0x10 [hid_sensor_iio_common c3a5cbe93969c28b122609768bbe23efe52eb8f5] [ 13.383753] ? _raw_spin_lock_irqsave+0x96/0xf0 [ 13.383762] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 13.383768] ? devm_iio_device_alloc+0x34/0x50 [industrialio 3d261d5e5765625d2b052be40e526d62b1d2123b] [ 13.383790] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383795] ? __devm_add_action+0x167/0x1d0 [ 13.383806] hid_gyro_3d_probe+0x120/0x7f0 [hid_sensor_gyro_3d 63da36a143b775846ab2dbb86c343b401b5e3172] [ 13.383818] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383826] platform_probe+0xa2/0x150 [ 13.383832] really_probe+0x1e3/0x8a0 [ 13.383838] __driver_probe_device+0x18c/0x370 [ 13.383844] driver_probe_device+0x4a/0x120 [ 13.383851] __driver_attach+0x190/0x4a0 [ 13.383857] ? __pfx___driver_attach+0x10/0x10 [ 13.383863] bus_for_each_dev+0x106/0x180 [ 13.383868] ? __pfx__raw_spin_lock+0x10/0x10 [ 13.383874] ? __pfx_bus_for_each_dev+0x10/0x10 [ 13.383880] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383887] bus_add_driver+0x29e/0x4d0 [ 13.383895] driver_register+0x1a5/0x360 [ 13.383902] ? __pfx_hid_gyro_3d_platform_driver_init+0x10/0x10 [hid_sensor_gyro_3d 63da36a143b775846ab2dbb86c343b401b5e3172] [ 13.383910] do_one_initcall+0xa7/0x380 [ 13.383919] ? __pfx_do_one_initcall+0x10/0x10 [ 13.383927] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383933] ? kasan_unpoison+0x44/0x70 [ 13.383943] do_init_module+0x238/0x750 [ 13.383955] load_module+0x5011/0x6af0 [ 13.383962] ? kasan_save_stack+0x30/0x50 [ 13.383968] ? kasan_save_track+0x14/0x30 [ 13.383973] ? kasan_save_free_info+0x3b/0x60 [ 13.383980] ? poison_slab_object+0x109/0x180 [ 13.383993] ? __pfx_load_module+0x10/0x10 [ 13.384007] ? poison_slab_object+0x109/0x180 [ 13.384012] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384018] ? init_module_from_file+0x13d/0x150 [ 13.384025] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384032] ? init_module_from_file+0xdf/0x150 [ 13.384037] init_module_from_file+0xdf/0x150 [ 13.384044] ? __pfx_init_module_from_file+0x10/0x10 [ 13.384050] ? kasan_save_track+0x14/0x30 [ 13.384055] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384060] ? kasan_save_free_info+0x3b/0x60 [ 13.384066] ? poison_slab_object+0x109/0x180 [ 13.384071] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384080] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384085] ? _raw_spin_lock+0x85/0xe0 [ 13.384091] ? __pfx__raw_spin_lock+0x10/0x10 [ 13.384096] ? __rseq_handle_notify_resume+0x1a6/0xad0 [ 13.384106] idempotent_init_module+0x23b/0x650 [ 13.384114] ? __pfx_idempotent_init_module+0x10/0x10 [ 13.384120] ? __pfx___seccomp_filter+0x10/0x10 [ 13.384129] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384135] ? __fget_light+0x57/0x420 [ 13.384142] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384147] ? security_capable+0x74/0xb0 [ 13.384157] __x64_sys_finit_module+0xbe/0x130 [ 13.384164] do_syscall_64+0x82/0x190 [ 13.384174] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384179] ? irqtime_account_irq+0x3d/0x1f0 [ 13.384188] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384193] ? __irq_exit_rcu+0x4e/0x130 [ 13.384201] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384206] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 13.384212] RIP: 0033:0x7a21f96ade9d [ 13.384263] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 63 de 0c 00 f7 d8 64 89 01 48 [ 13.384267] RSP: 002b:00007ffeae934e78 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 13.384273] RAX: ffffffffffffffda RBX: 00005987276bfcf0 RCX: 00007a21f96ade9d [ 13.384277] RDX: 0000000000000004 RSI: 00007a21f8eda376 RDI: 000000000000001c [ 13.384280] RBP: 00007a21f8eda376 R08: 0000000000000001 R09: 00007ffeae934ec0 [ 13.384284] R10: 0000000000000050 R11: 0000000000000246 R12: 0000000000020000 [ 13.384288] R13: 00005987276c1250 R14: 0000000000000000 R15: 00005987276c4530 [ 13.384297] [ 13.384299] Modules linked in: soundwire_amd(+) hid_sensor_gyro_3d(+) hid_sensor_magn_3d hid_sensor_accel_3d soundwire_generic_allocation amdxcp hid_sensor_trigger drm_exec industrialio_triggered_buffer soundwire_bus gpu_sched kvm_amd kfifo_buf qmi_helpers joydev drm_buddy hid_sensor_iio_common mousedev snd_soc_core industrialio i2c_algo_bit mac80211 snd_compress drm_suballoc_helper kvm snd_hda_intel drm_ttm_helper ac97_bus snd_pcm_dmaengine snd_intel_dspcfg ttm thinkpad_acpi(+) snd_intel_sdw_acpi hid_sensor_hub snd_rpl_pci_acp6x drm_display_helper snd_hda_codec hid_multitouch libarc4 snd_acp_pci platform_profile think_lmi(+) hid_generic firmware_attributes_class wmi_bmof cec snd_acp_legacy_common sparse_keymap rapl snd_hda_core psmouse cfg80211 pcspkr snd_pci_acp6x snd_hwdep video snd_pcm snd_pci_acp5x snd_timer snd_rn_pci_acp3x ucsi_acpi snd_acp_config snd sp5100_tco rfkill snd_soc_acpi typec_ucsi thunderbolt amd_sfh k10temp mhi soundcore i2c_piix4 snd_pci_acp3x typec i2c_hid_acpi roles i2c_hid wmi acpi_tad amd_pmc [ 13.384454] mac_hid i2c_dev crypto_user loop nfnetlink zram ip_tables x_tables dm_crypt cbc encrypted_keys trusted asn1_encoder tee dm_mod crct10dif_pclmul crc32_pclmul polyval_clmulni polyval_generic gf128mul ghash_clmulni_intel serio_raw sha512_ssse3 atkbd sha256_ssse3 libps2 sha1_ssse3 vivaldi_fmap nvme aesni_intel crypto_simd nvme_core cryptd ccp xhci_pci i8042 nvme_auth xhci_pci_renesas serio vfat fat btrfs blake2b_generic libcrc32c crc32c_generic crc32c_intel xor raid6_pq [ 13.384552] ---[ end trace 0000000000000000 ]--- KASAN reports a use-after-free of hid->driver_data in function amd_sfh_get_report(). The backtrace indicates that the function is called by amdtp_hid_request() which is one of the callbacks of hid device. The current make sure that driver_data is freed only once hid_destroy_device() returned. Note that I observed the crash both on v6.9.9 and v6.10.0. The code seems to be as it was from the early days of the driver. Signed-off-by: Olivier Sobrie Acked-by: Basavaraj Natikar Signed-off-by: Jiri Kosina --- drivers/hid/amd-sfh-hid/amd_sfh_hid.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c index 705b52337068..81f3024b7b1b 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c @@ -171,11 +171,13 @@ err_hid_data: void amdtp_hid_remove(struct amdtp_cl_data *cli_data) { int i; + struct amdtp_hid_data *hid_data; for (i = 0; i < cli_data->num_hid_devices; ++i) { if (cli_data->hid_sensor_hubs[i]) { - kfree(cli_data->hid_sensor_hubs[i]->driver_data); + hid_data = cli_data->hid_sensor_hubs[i]->driver_data; hid_destroy_device(cli_data->hid_sensor_hubs[i]); + kfree(hid_data); cli_data->hid_sensor_hubs[i] = NULL; } } -- cgit From c8000deb68365b461b324d68c7ea89d730f0bb85 Mon Sep 17 00:00:00 2001 From: Dmitry Savin Date: Tue, 16 Jul 2024 23:27:57 +0100 Subject: HID: multitouch: Add support for GT7868Q GT7868Q has incorrect data in the report and needs a fixup. The change enables haptic touchpad on Lenovo ThinkBook 13x Gen 4 and has been tested on the device. Signed-off-by: Dmitry Savin Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 2 ++ drivers/hid/hid-multitouch.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 6e3223389080..781c5aa29859 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -521,6 +521,8 @@ #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100 0xe100 #define I2C_VENDOR_ID_GOODIX 0x27c6 +#define I2C_DEVICE_ID_GOODIX_01E8 0x01e8 +#define I2C_DEVICE_ID_GOODIX_01E9 0x01e9 #define I2C_DEVICE_ID_GOODIX_01F0 0x01f0 #define USB_VENDOR_ID_GOODTOUCH 0x1aad diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 56fc78841f24..99812c0f830b 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1441,6 +1441,30 @@ static int mt_event(struct hid_device *hid, struct hid_field *field, return 0; } +static __u8 *mt_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *size) +{ + if (hdev->vendor == I2C_VENDOR_ID_GOODIX && + (hdev->product == I2C_DEVICE_ID_GOODIX_01E8 || + hdev->product == I2C_DEVICE_ID_GOODIX_01E9)) { + if (rdesc[607] == 0x15) { + rdesc[607] = 0x25; + dev_info( + &hdev->dev, + "GT7868Q report descriptor fixup is applied.\n"); + } else { + dev_info( + &hdev->dev, + "The byte is not expected for fixing the report descriptor. \ +It's possible that the touchpad firmware is not suitable for applying the fix. \ +got: %x\n", + rdesc[607]); + } + } + + return rdesc; +} + static void mt_report(struct hid_device *hid, struct hid_report *report) { struct mt_device *td = hid_get_drvdata(hid); @@ -2035,6 +2059,14 @@ static const struct hid_device_id mt_devices[] = { MT_BT_DEVICE(USB_VENDOR_ID_FRUCTEL, USB_DEVICE_ID_GAMETEL_MT_MODE) }, + /* Goodix GT7868Q devices */ + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, + I2C_DEVICE_ID_GOODIX_01E8) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, + I2C_DEVICE_ID_GOODIX_01E8) }, + /* GoodTouch panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_GOODTOUCH, @@ -2270,6 +2302,7 @@ static struct hid_driver mt_driver = { .feature_mapping = mt_feature_mapping, .usage_table = mt_grabbed_usages, .event = mt_event, + .report_fixup = mt_report_fixup, .report = mt_report, .suspend = pm_ptr(mt_suspend), .reset_resume = pm_ptr(mt_reset_resume), -- cgit From 1b8f9c1fb464968a5b18d3acc1da8c00bad24fad Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Tue, 30 Jul 2024 08:51:55 -0700 Subject: HID: wacom: Defer calculation of resolution until resolution_code is known The Wacom driver maps the HID_DG_TWIST usage to ABS_Z (rather than ABS_RZ) for historic reasons. When the code to support twist was introduced in commit 50066a042da5 ("HID: wacom: generic: Add support for height, tilt, and twist usages"), we were careful to write it in such a way that it had HID calculate the resolution of the twist axis assuming ABS_RZ instead (so that we would get correct angular behavior). This was broken with the introduction of commit 08a46b4190d3 ("HID: wacom: Set a default resolution for older tablets"), which moved the resolution calculation to occur *before* the adjustment from ABS_Z to ABS_RZ occurred. This commit moves the calculation of resolution after the point that we are finished setting things up for its proper use. Signed-off-by: Jason Gerecke Fixes: 08a46b4190d3 ("HID: wacom: Set a default resolution for older tablets") Cc: stable@vger.kernel.org Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 1f4564982b95..2541fa2e0fa3 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1878,12 +1878,14 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage, int fmax = field->logical_maximum; unsigned int equivalent_usage = wacom_equivalent_usage(usage->hid); int resolution_code = code; - int resolution = hidinput_calc_abs_res(field, resolution_code); + int resolution; if (equivalent_usage == HID_DG_TWIST) { resolution_code = ABS_RZ; } + resolution = hidinput_calc_abs_res(field, resolution_code); + if (equivalent_usage == HID_GD_X) { fmin += features->offset_left; fmax -= features->offset_right; -- cgit From 0e8a0504da59041e775a95db3ebc1a6211423593 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Thu, 1 Aug 2024 13:40:24 +0300 Subject: phy: qcom: qmp-pcie: Fix X1E80100 PCIe Gen4 PHY initialisation Update the PCIe Gen4 PHY init sequence with the latest based on internal Qualcomm documentation. Fixes: 606060ce8fd0 ("phy: qcom-qmp-pcie: Add support for X1E80100 g3x2 and g4x2 PCIE") Signed-off-by: Abel Vesa Link: https://lore.kernel.org/r/20240801-x1e80100-phy-qmp-pcie-fix-config-v2-1-cdc0f22b4169@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 5b36cc7ac78b..06cd9787e700 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -1245,8 +1245,8 @@ static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_serdes_tbl[] = { static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_ln_shrd_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RXCLK_DIV2_CTRL, 0x01), QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_DFE_DAC_ENABLE1, 0x88), - QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_TX_ADAPT_POST_THRESH1, 0x00), - QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_TX_ADAPT_POST_THRESH2, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_TX_ADAPT_POST_THRESH1, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_TX_ADAPT_POST_THRESH2, 0x0d), QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MODE_RATE_0_1_B0, 0xd4), QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MODE_RATE_0_1_B1, 0x12), QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MODE_RATE_0_1_B2, 0xdb), @@ -1263,6 +1263,7 @@ static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_ln_shrd_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH4_RATE3, 0x1f), QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH5_RATE3, 0x1f), QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH6_RATE3, 0x1f), + QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_SUMMER_CAL_SPD_MODE, 0x5b), }; static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_tx_tbl[] = { @@ -1286,12 +1287,15 @@ static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_rx_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_DFE_1, 0x01), QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_DFE_2, 0x01), QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_DFE_3, 0x45), - QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_VGA_CAL_MAN_VAL, 0x0b), + QMP_PHY_INIT_CFG_LANE(QSERDES_V6_20_RX_VGA_CAL_MAN_VAL, 0x0a, 1), + QMP_PHY_INIT_CFG_LANE(QSERDES_V6_20_RX_VGA_CAL_MAN_VAL, 0x0b, 2), + QMP_PHY_INIT_CFG(QSERDES_V6_20_VGA_CAL_CNTRL1, 0x00), QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_GM_CAL, 0x0d), QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_EQU_ADAPTOR_CNTRL4, 0x0b), QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_SIGDET_ENABLES, 0x1c), QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_PHPRE_CTRL, 0x20), - QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38), + QMP_PHY_INIT_CFG_LANE(QSERDES_V6_20_RX_DFE_CTLE_POST_CAL_OFFSET, 0x3a, 1), + QMP_PHY_INIT_CFG_LANE(QSERDES_V6_20_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38, 2), QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_Q_PI_INTRINSIC_BIAS_RATE32, 0x39), QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B0, 0x14), QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B1, 0xb3), @@ -1307,6 +1311,7 @@ static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_rx_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B4, 0x4b), QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B5, 0x76), QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B6, 0xff), + QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_TX_ADPT_CTRL, 0x10), }; static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_pcs_tbl[] = { @@ -1314,6 +1319,8 @@ static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_RX_SIGDET_LVL, 0xcc), QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_EQ_CONFIG4, 0x00), QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_EQ_CONFIG5, 0x22), + QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_TX_RX_CONFIG1, 0x04), + QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_TX_RX_CONFIG2, 0x02), }; static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_pcs_misc_tbl[] = { @@ -1324,11 +1331,13 @@ static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_pcs_misc_tbl[] = { QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_G4_PRE_GAIN, 0x2e), QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_RX_MARGINING_CONFIG1, 0x03), QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_RX_MARGINING_CONFIG3, 0x28), + QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_G3_RXEQEVAL_TIME, 0x27), + QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_G4_RXEQEVAL_TIME, 0x27), QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_TX_RX_CONFIG, 0xc0), QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_POWER_STATE_CONFIG2, 0x1d), - QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_RX_MARGINING_CONFIG5, 0x0f), - QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_G3_FOM_EQ_CONFIG5, 0xf2), - QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_G4_FOM_EQ_CONFIG5, 0xf2), + QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_RX_MARGINING_CONFIG5, 0x18), + QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_G3_FOM_EQ_CONFIG5, 0x7a), + QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_G4_FOM_EQ_CONFIG5, 0x8a), }; static const struct qmp_phy_init_tbl sm8250_qmp_pcie_serdes_tbl[] = { -- cgit From ce52c2532299c7ccfd34a52db8d071e890a78c59 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 1 Aug 2024 20:46:42 +0800 Subject: phy: fsl-imx8mq-usb: fix tuning parameter name According to fsl,imx8mq-usb-phy.yaml, this tuning parameter should be fsl,phy-pcs-tx-deemph-3p5db-attenuation-db. Fixes: 63c85ad0cd81 ("phy: fsl-imx8mp-usb: add support for phy tuning") Cc: stable@vger.kernel.org Signed-off-by: Xu Yang Reviewed-by: Alexander Stein Link: https://lore.kernel.org/r/20240801124642.1152838-1-xu.yang_2@nxp.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-imx8mq-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8mq-usb.c b/drivers/phy/freescale/phy-fsl-imx8mq-usb.c index 0b9a59d5b8f0..adc6394626ce 100644 --- a/drivers/phy/freescale/phy-fsl-imx8mq-usb.c +++ b/drivers/phy/freescale/phy-fsl-imx8mq-usb.c @@ -176,7 +176,7 @@ static void imx8m_get_phy_tuning_data(struct imx8mq_usb_phy *imx_phy) imx_phy->comp_dis_tune = phy_comp_dis_tune_from_property(imx_phy->comp_dis_tune); - if (device_property_read_u32(dev, "fsl,pcs-tx-deemph-3p5db-attenuation-db", + if (device_property_read_u32(dev, "fsl,phy-pcs-tx-deemph-3p5db-attenuation-db", &imx_phy->pcs_tx_deemph_3p5db)) imx_phy->pcs_tx_deemph_3p5db = PHY_TUNE_DEFAULT; else -- cgit From 3a07703a523045cbdb0a5fa5e0902a9145ee43e9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 Jul 2024 12:04:33 -0500 Subject: phy: exynos5-usbdrd: fix error code in probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return negative -ENOMEM instead of positive ENOMEM. Fixes: 497ddafe915e ("phy: exynos5-usbdrd: convert Vbus supplies to regulator_bulk") Signed-off-by: Dan Carpenter Reviewed-by: Peter Griffin Reviewed-by: André Draszik Reviewed-by: Sam Protsenko Link: https://lore.kernel.org/r/a956a3e2-c6ce-4f07-ad80-ec8a96e00d16@stanley.mountain Signed-off-by: Vinod Koul --- drivers/phy/samsung/phy-exynos5-usbdrd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c b/drivers/phy/samsung/phy-exynos5-usbdrd.c index df52b78a120b..9cbf90142950 100644 --- a/drivers/phy/samsung/phy-exynos5-usbdrd.c +++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c @@ -1745,7 +1745,7 @@ static int exynos5_usbdrd_phy_probe(struct platform_device *pdev) sizeof(*phy_drd->regulators), GFP_KERNEL); if (!phy_drd->regulators) - return ENOMEM; + return -ENOMEM; regulator_bulk_set_supply_names(phy_drd->regulators, drv_data->regulator_names, drv_data->n_regulators); -- cgit From 70d76b0e85ad126358baec1b44f797e61e3ebecc Mon Sep 17 00:00:00 2001 From: Felix Kaechele Date: Sat, 3 Aug 2024 23:13:09 -0400 Subject: dt-bindings: input: touchscreen: edt-ft5x06: Document FT8201 support Document FocalTech FT8201 support by adding the compatible. Signed-off-by: Felix Kaechele Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240804031310.331871-2-felix@kaechele.ca Signed-off-by: Dmitry Torokhov --- Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml index 379721027bf8..51d48d4130d3 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml @@ -42,6 +42,7 @@ properties: - focaltech,ft5426 - focaltech,ft5452 - focaltech,ft6236 + - focaltech,ft8201 - focaltech,ft8719 reg: -- cgit From fc289d3e8698f9b11edad6d73f371ebf35944c57 Mon Sep 17 00:00:00 2001 From: Felix Kaechele Date: Sat, 3 Aug 2024 23:13:10 -0400 Subject: Input: edt-ft5x06 - add support for FocalTech FT8201 The driver supports the FT8201 chip as well. It registers up to 10 touch points. Tested on: Lenovo ThinkSmart View (CD-18781Y), LCM: BOE TV080WXM-LL4 Signed-off-by: Felix Kaechele Link: https://lore.kernel.org/r/20240804031310.331871-3-felix@kaechele.ca Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/edt-ft5x06.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index 42f99e57fbb7..e70415f189a5 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -1474,6 +1474,10 @@ static const struct edt_i2c_chip_data edt_ft6236_data = { .max_support_points = 2, }; +static const struct edt_i2c_chip_data edt_ft8201_data = { + .max_support_points = 10, +}; + static const struct edt_i2c_chip_data edt_ft8719_data = { .max_support_points = 10, }; @@ -1485,6 +1489,7 @@ static const struct i2c_device_id edt_ft5x06_ts_id[] = { { .name = "ft5452", .driver_data = (long)&edt_ft5452_data }, /* Note no edt- prefix for compatibility with the ft6236.c driver */ { .name = "ft6236", .driver_data = (long)&edt_ft6236_data }, + { .name = "ft8201", .driver_data = (long)&edt_ft8201_data }, { .name = "ft8719", .driver_data = (long)&edt_ft8719_data }, { /* sentinel */ } }; @@ -1500,6 +1505,7 @@ static const struct of_device_id edt_ft5x06_of_match[] = { { .compatible = "focaltech,ft5452", .data = &edt_ft5452_data }, /* Note focaltech vendor prefix for compatibility with ft6236.c */ { .compatible = "focaltech,ft6236", .data = &edt_ft6236_data }, + { .compatible = "focaltech,ft8201", .data = &edt_ft8201_data }, { .compatible = "focaltech,ft8719", .data = &edt_ft8719_data }, { /* sentinel */ } }; -- cgit From 206f533a0a7c683982af473079c4111f4a0f9f5e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 4 Aug 2024 17:50:25 -0700 Subject: Input: uinput - reject requests with unreasonable number of slots From: Dmitry Torokhov When exercising uinput interface syzkaller may try setting up device with a really large number of slots, which causes memory allocation failure in input_mt_init_slots(). While this allocation failure is handled properly and request is rejected, it results in syzkaller reports. Additionally, such request may put undue burden on the system which will try to free a lot of memory for a bogus request. Fix it by limiting allowed number of slots to 100. This can easily be extended if we see devices that can track more than 100 contacts. Reported-by: Tetsuo Handa Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=0122fa359a69694395d5 Link: https://lore.kernel.org/r/Zqgi7NYEbpRsJfa2@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/uinput.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index d23f3225b00f..445856c9127a 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -417,6 +417,20 @@ static int uinput_validate_absinfo(struct input_dev *dev, unsigned int code, return -EINVAL; } + /* + * Limit number of contacts to a reasonable value (100). This + * ensures that we need less than 2 pages for struct input_mt + * (we are not using in-kernel slot assignment so not going to + * allocate memory for the "red" table), and we should have no + * trouble getting this much memory. + */ + if (code == ABS_MT_SLOT && max > 99) { + printk(KERN_DEBUG + "%s: unreasonably large number of slots requested: %d\n", + UINPUT_NAME, max); + return -EINVAL; + } + return 0; } -- cgit From b7fd10333713e9984cc9b9c04f3681f80efdc809 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 11 Jul 2024 11:37:57 +0200 Subject: pinctrl: qcom: x1e80100: Update PDC hwirq map The current map seems to be out of sync (and includes a duplicate entry for GPIO193..). Replace it with the map present in shipping devices' ACPI tables. This new one seems more complete, as it e.g. contains GPIO145 (PCIE6a WAKE#) Fixes: 05e4941d97ef ("pinctrl: qcom: Add X1E80100 pinctrl driver") Signed-off-by: Konrad Dybcio Reviewed-by: Abel Vesa Reviewed-by: Rajendra Nayak Link: https://lore.kernel.org/20240711-topic-x1e_pdc_tlmm-v1-1-e278b249d793@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-x1e80100.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-x1e80100.c b/drivers/pinctrl/qcom/pinctrl-x1e80100.c index e30e93840357..6cd4d10e6fd6 100644 --- a/drivers/pinctrl/qcom/pinctrl-x1e80100.c +++ b/drivers/pinctrl/qcom/pinctrl-x1e80100.c @@ -1813,18 +1813,21 @@ static const struct msm_pingroup x1e80100_groups[] = { static const struct msm_gpio_wakeirq_map x1e80100_pdc_map[] = { { 0, 72 }, { 2, 70 }, { 3, 71 }, { 6, 123 }, { 7, 67 }, { 11, 85 }, - { 15, 68 }, { 18, 122 }, { 19, 69 }, { 21, 158 }, { 23, 143 }, { 26, 129 }, - { 27, 144 }, { 28, 77 }, { 29, 78 }, { 30, 92 }, { 32, 145 }, { 33, 115 }, - { 34, 130 }, { 35, 146 }, { 36, 147 }, { 39, 80 }, { 43, 148 }, { 47, 149 }, - { 51, 79 }, { 53, 89 }, { 59, 87 }, { 64, 90 }, { 65, 106 }, { 66, 142 }, - { 67, 88 }, { 71, 91 }, { 75, 152 }, { 79, 153 }, { 80, 125 }, { 81, 128 }, - { 84, 137 }, { 85, 155 }, { 87, 156 }, { 91, 157 }, { 92, 138 }, { 94, 140 }, - { 95, 141 }, { 113, 84 }, { 121, 73 }, { 123, 74 }, { 129, 76 }, { 131, 82 }, - { 134, 83 }, { 141, 93 }, { 144, 94 }, { 147, 96 }, { 148, 97 }, { 150, 102 }, - { 151, 103 }, { 153, 104 }, { 156, 105 }, { 157, 107 }, { 163, 98 }, { 166, 112 }, - { 172, 99 }, { 181, 101 }, { 184, 116 }, { 193, 40 }, { 193, 117 }, { 196, 108 }, - { 203, 133 }, { 212, 120 }, { 213, 150 }, { 214, 121 }, { 215, 118 }, { 217, 109 }, - { 220, 110 }, { 221, 111 }, { 222, 124 }, { 224, 131 }, { 225, 132 }, + { 13, 86 }, { 15, 68 }, { 18, 122 }, { 19, 69 }, { 21, 158 }, { 23, 143 }, + { 24, 126 }, { 26, 129 }, { 27, 144 }, { 28, 77 }, { 29, 78 }, { 30, 92 }, + { 31, 159 }, { 32, 145 }, { 33, 115 }, { 34, 130 }, { 35, 146 }, { 36, 147 }, + { 38, 113 }, { 39, 80 }, { 43, 148 }, { 47, 149 }, { 51, 79 }, { 53, 89 }, + { 55, 81 }, { 59, 87 }, { 64, 90 }, { 65, 106 }, { 66, 142 }, { 67, 88 }, + { 68, 151 }, { 71, 91 }, { 75, 152 }, { 79, 153 }, { 80, 125 }, { 81, 128 }, + { 83, 154 }, { 84, 137 }, { 85, 155 }, { 87, 156 }, { 91, 157 }, { 92, 138 }, + { 93, 139 }, { 94, 140 }, { 95, 141 }, { 113, 84 }, { 121, 73 }, { 123, 74 }, + { 125, 75 }, { 129, 76 }, { 131, 82 }, { 134, 83 }, { 141, 93 }, { 144, 94 }, + { 145, 95 }, { 147, 96 }, { 148, 97 }, { 150, 102 }, { 151, 103 }, { 153, 104 }, + { 154, 100 }, { 156, 105 }, { 157, 107 }, { 163, 98 }, { 166, 112 }, { 172, 99 }, + { 175, 114 }, { 181, 101 }, { 184, 116 }, { 193, 117 }, { 196, 108 }, { 203, 133 }, + { 208, 134 }, { 212, 120 }, { 213, 150 }, { 214, 121 }, { 215, 118 }, { 217, 109 }, + { 219, 119 }, { 220, 110 }, { 221, 111 }, { 222, 124 }, { 224, 131 }, { 225, 132 }, + { 228, 135 }, { 230, 136 }, { 232, 162 }, }; static const struct msm_pinctrl_soc_data x1e80100_pinctrl = { -- cgit From 203ed203fcc223d80737a7799f8244646363b739 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 2 Jul 2024 16:54:17 +0200 Subject: arm64: dts: layerscape: fix thermal node names length Linux kernel expects thermal zone node names to be maximum of 19 characters (see THERMAL_NAME_LENGTH, including terminating NUL byte) and bindings/dtbs_check points that: fsl-ls2088a-rdb.dtb: thermal-zones: 'core-cluster1-thermal', 'core-cluster2-thermal', 'core-cluster3-thermal', 'core-cluster4-thermal' do not match any of the regexes: '^[a-zA-Z][a-zA-Z0-9\\-]{1,10}-thermal$', 'pinctrl-[0-9]+' Name longer than 19 characters leads to driver probe errors when registering such thermal zone. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 2 +- arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi | 2 +- arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi | 2 +- arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi | 2 +- arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi | 8 ++++---- arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 6b6e3ee950e5..acf293310f7a 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -175,7 +175,7 @@ }; }; - core-cluster-thermal { + cluster-thermal { polling-delay-passive = <1000>; polling-delay = <5000>; thermal-sensors = <&tmu 1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index 17f4e3171120..ab4c919e3e16 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -214,7 +214,7 @@ }; }; - core-cluster-thermal { + cluster-thermal { polling-delay-passive = <1000>; polling-delay = <5000>; thermal-sensors = <&tmu 3>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi index 200e52622f99..55019866d6a2 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi @@ -182,7 +182,7 @@ }; }; - core-cluster-thermal { + cluster-thermal { polling-delay-passive = <1000>; polling-delay = <5000>; thermal-sensors = <&tmu 3>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi index 8ce4b6aae79d..e3a7db21fe29 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi @@ -131,7 +131,7 @@ }; thermal-zones { - core-cluster-thermal { + cluster-thermal { polling-delay-passive = <1000>; polling-delay = <5000>; thermal-sensors = <&tmu 0>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index bde89de2576e..1b306d6802ce 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -122,7 +122,7 @@ }; }; - core-cluster1-thermal { + cluster1-thermal { polling-delay-passive = <1000>; polling-delay = <5000>; thermal-sensors = <&tmu 4>; @@ -151,7 +151,7 @@ }; }; - core-cluster2-thermal { + cluster2-thermal { polling-delay-passive = <1000>; polling-delay = <5000>; thermal-sensors = <&tmu 5>; @@ -180,7 +180,7 @@ }; }; - core-cluster3-thermal { + cluster3-thermal { polling-delay-passive = <1000>; polling-delay = <5000>; thermal-sensors = <&tmu 6>; @@ -209,7 +209,7 @@ }; }; - core-cluster4-thermal { + cluster4-thermal { polling-delay-passive = <1000>; polling-delay = <5000>; thermal-sensors = <&tmu 7>; diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi index 26c7ca31e22e..bd75a658767d 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi @@ -492,7 +492,7 @@ }; }; - ddr-cluster5-thermal { + ddr-ctrl5-thermal { polling-delay-passive = <1000>; polling-delay = <5000>; thermal-sensors = <&tmu 1>; -- cgit From 50359c9c3cb3e55e840e3485f5ee37da5b2b16b6 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 17 Jul 2024 10:03:33 +0200 Subject: pmdomain: imx: scu-pd: Remove duplicated clocks These clocks are already added to the list. Remove the duplicates ones. Fixes: a67d780720ff ("genpd: imx: scu-pd: add more PDs") Signed-off-by: Alexander Stein Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240717080334.2210988-1-alexander.stein@ew.tq-group.com Signed-off-by: Ulf Hansson --- drivers/pmdomain/imx/scu-pd.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/pmdomain/imx/scu-pd.c b/drivers/pmdomain/imx/scu-pd.c index 05841b0bf7f3..01d465d88f60 100644 --- a/drivers/pmdomain/imx/scu-pd.c +++ b/drivers/pmdomain/imx/scu-pd.c @@ -223,11 +223,6 @@ static const struct imx_sc_pd_range imx8qxp_scu_pd_ranges[] = { { "lvds1-pwm", IMX_SC_R_LVDS_1_PWM_0, 1, false, 0 }, { "lvds1-lpi2c", IMX_SC_R_LVDS_1_I2C_0, 2, true, 0 }, - { "mipi1", IMX_SC_R_MIPI_1, 1, 0 }, - { "mipi1-pwm0", IMX_SC_R_MIPI_1_PWM_0, 1, 0 }, - { "mipi1-i2c", IMX_SC_R_MIPI_1_I2C_0, 2, 1 }, - { "lvds1", IMX_SC_R_LVDS_1, 1, 0 }, - /* DC SS */ { "dc0", IMX_SC_R_DC_0, 1, false, 0 }, { "dc0-pll", IMX_SC_R_DC_0_PLL_0, 2, true, 0 }, -- cgit From 5af9b304bc6010723c02f74de0bfd24ff19b1a10 Mon Sep 17 00:00:00 2001 From: Piyush Mehta Date: Mon, 5 Aug 2024 11:29:07 +0530 Subject: phy: xilinx: phy-zynqmp: Fix SGMII linkup failure on resume On a few Kria KR260 Robotics Starter Kit the PS-GEM SGMII linkup is not happening after the resume. This is because serdes registers are reset when FPD is off (in suspend state) and needs to be reprogrammed in the resume path with the same default initialization as done in the first stage bootloader psu_init routine. To address the failure introduce a set of serdes registers to be saved in the suspend path and then restore it on resume. Fixes: 4a33bea00314 ("phy: zynqmp: Add PHY driver for the Xilinx ZynqMP Gigabit Transceiver") Signed-off-by: Piyush Mehta Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1722837547-2578381-1-git-send-email-radhey.shyam.pandey@amd.com Signed-off-by: Vinod Koul --- drivers/phy/xilinx/phy-zynqmp.c | 56 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/drivers/phy/xilinx/phy-zynqmp.c b/drivers/phy/xilinx/phy-zynqmp.c index cb15041371c9..e6579002f114 100644 --- a/drivers/phy/xilinx/phy-zynqmp.c +++ b/drivers/phy/xilinx/phy-zynqmp.c @@ -160,6 +160,24 @@ static const char *const xpsgtr_icm_str[] = { /* Timeout values */ #define TIMEOUT_US 1000 +/* Lane 0/1/2/3 offset */ +#define DIG_8(n) ((0x4000 * (n)) + 0x1074) +#define ILL13(n) ((0x4000 * (n)) + 0x1994) +#define DIG_10(n) ((0x4000 * (n)) + 0x107c) +#define RST_DLY(n) ((0x4000 * (n)) + 0x19a4) +#define BYP_15(n) ((0x4000 * (n)) + 0x1038) +#define BYP_12(n) ((0x4000 * (n)) + 0x102c) +#define MISC3(n) ((0x4000 * (n)) + 0x19ac) +#define EQ11(n) ((0x4000 * (n)) + 0x1978) + +static u32 save_reg_address[] = { + /* Lane 0/1/2/3 Register */ + DIG_8(0), ILL13(0), DIG_10(0), RST_DLY(0), BYP_15(0), BYP_12(0), MISC3(0), EQ11(0), + DIG_8(1), ILL13(1), DIG_10(1), RST_DLY(1), BYP_15(1), BYP_12(1), MISC3(1), EQ11(1), + DIG_8(2), ILL13(2), DIG_10(2), RST_DLY(2), BYP_15(2), BYP_12(2), MISC3(2), EQ11(2), + DIG_8(3), ILL13(3), DIG_10(3), RST_DLY(3), BYP_15(3), BYP_12(3), MISC3(3), EQ11(3), +}; + struct xpsgtr_dev; /** @@ -209,6 +227,7 @@ struct xpsgtr_phy { * @tx_term_fix: fix for GT issue * @saved_icm_cfg0: stored value of ICM CFG0 register * @saved_icm_cfg1: stored value of ICM CFG1 register + * @saved_regs: registers to be saved/restored during suspend/resume */ struct xpsgtr_dev { struct device *dev; @@ -221,6 +240,7 @@ struct xpsgtr_dev { bool tx_term_fix; unsigned int saved_icm_cfg0; unsigned int saved_icm_cfg1; + u32 *saved_regs; }; /* @@ -294,6 +314,32 @@ static inline void xpsgtr_clr_set_phy(struct xpsgtr_phy *gtr_phy, writel((readl(addr) & ~clr) | set, addr); } +/** + * xpsgtr_save_lane_regs - Saves registers on suspend + * @gtr_dev: pointer to phy controller context structure + */ +static void xpsgtr_save_lane_regs(struct xpsgtr_dev *gtr_dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(save_reg_address); i++) + gtr_dev->saved_regs[i] = xpsgtr_read(gtr_dev, + save_reg_address[i]); +} + +/** + * xpsgtr_restore_lane_regs - Restores registers on resume + * @gtr_dev: pointer to phy controller context structure + */ +static void xpsgtr_restore_lane_regs(struct xpsgtr_dev *gtr_dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(save_reg_address); i++) + xpsgtr_write(gtr_dev, save_reg_address[i], + gtr_dev->saved_regs[i]); +} + /* * Hardware Configuration */ @@ -837,6 +883,8 @@ static int xpsgtr_runtime_suspend(struct device *dev) gtr_dev->saved_icm_cfg0 = xpsgtr_read(gtr_dev, ICM_CFG0); gtr_dev->saved_icm_cfg1 = xpsgtr_read(gtr_dev, ICM_CFG1); + xpsgtr_save_lane_regs(gtr_dev); + return 0; } @@ -847,6 +895,8 @@ static int xpsgtr_runtime_resume(struct device *dev) unsigned int i; bool skip_phy_init; + xpsgtr_restore_lane_regs(gtr_dev); + icm_cfg0 = xpsgtr_read(gtr_dev, ICM_CFG0); icm_cfg1 = xpsgtr_read(gtr_dev, ICM_CFG1); @@ -994,6 +1044,12 @@ static int xpsgtr_probe(struct platform_device *pdev) return ret; } + gtr_dev->saved_regs = devm_kmalloc(gtr_dev->dev, + sizeof(save_reg_address), + GFP_KERNEL); + if (!gtr_dev->saved_regs) + return -ENOMEM; + return 0; } -- cgit From b336268dde75cb09bd795cb24893d52152a9191f Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 2 Aug 2024 10:50:46 +0300 Subject: dmaengine: dw: Add peripheral bus width verification Currently the src_addr_width and dst_addr_width fields of the dma_slave_config structure are mapped to the CTLx.SRC_TR_WIDTH and CTLx.DST_TR_WIDTH fields of the peripheral bus side in order to have the properly aligned data passed to the target device. It's done just by converting the passed peripheral bus width to the encoded value using the __ffs() function. This implementation has several problematic sides: 1. __ffs() is undefined if no bit exist in the passed value. Thus if the specified addr-width is DMA_SLAVE_BUSWIDTH_UNDEFINED, __ffs() may return unexpected value depending on the platform-specific implementation. 2. DW AHB DMA-engine permits having the power-of-2 transfer width limited by the DMAH_Mk_HDATA_WIDTH IP-core synthesize parameter. Specifying bus-width out of that constraints scope will definitely cause unexpected result since the destination reg will be only partly touched than the client driver implied. Let's fix all of that by adding the peripheral bus width verification method and calling it in dwc_config() which is supposed to be executed before preparing any transfer. The new method will make sure that the passed source or destination address width is valid and if undefined then the driver will just fallback to the 1-byte width transfer. Fixes: 029a40e97d0d ("dmaengine: dw: provide DMA capabilities") Signed-off-by: Serge Semin Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240802075100.6475-2-fancer.lancer@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/dw/core.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 5f7d690e3dba..11e269a31a09 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -780,10 +781,43 @@ bool dw_dma_filter(struct dma_chan *chan, void *param) } EXPORT_SYMBOL_GPL(dw_dma_filter); +static int dwc_verify_p_buswidth(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + u32 reg_width, max_width; + + if (dwc->dma_sconfig.direction == DMA_MEM_TO_DEV) + reg_width = dwc->dma_sconfig.dst_addr_width; + else if (dwc->dma_sconfig.direction == DMA_DEV_TO_MEM) + reg_width = dwc->dma_sconfig.src_addr_width; + else /* DMA_MEM_TO_MEM */ + return 0; + + max_width = dw->pdata->data_width[dwc->dws.p_master]; + + /* Fall-back to 1-byte transfer width if undefined */ + if (reg_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + reg_width = DMA_SLAVE_BUSWIDTH_1_BYTE; + else if (!is_power_of_2(reg_width) || reg_width > max_width) + return -EINVAL; + else /* bus width is valid */ + return 0; + + /* Update undefined addr width value */ + if (dwc->dma_sconfig.direction == DMA_MEM_TO_DEV) + dwc->dma_sconfig.dst_addr_width = reg_width; + else /* DMA_DEV_TO_MEM */ + dwc->dma_sconfig.src_addr_width = reg_width; + + return 0; +} + static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); + int ret; memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig)); @@ -792,6 +826,10 @@ static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig) dwc->dma_sconfig.dst_maxburst = clamp(dwc->dma_sconfig.dst_maxburst, 0U, dwc->max_burst); + ret = dwc_verify_p_buswidth(chan); + if (ret) + return ret; + dw->encode_maxburst(dwc, &dwc->dma_sconfig.src_maxburst); dw->encode_maxburst(dwc, &dwc->dma_sconfig.dst_maxburst); -- cgit From d04b21bfa1c50a2ade4816cab6fdc91827b346b1 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 2 Aug 2024 10:50:47 +0300 Subject: dmaengine: dw: Add memory bus width verification Currently in case of the DEV_TO_MEM or MEM_TO_DEV DMA transfers the memory data width (single transfer width) is determined based on the buffer length, buffer base address or DMA master-channel max address width capability. It isn't enough in case of the channel disabling prior the block transfer is finished. Here is what DW AHB DMA IP-core databook says regarding the port suspension (DMA-transfer pause) implementation in the controller: "When CTLx.SRC_TR_WIDTH < CTLx.DST_TR_WIDTH and the CFGx.CH_SUSP bit is high, the CFGx.FIFO_EMPTY is asserted once the contents of the FIFO do not permit a single word of CTLx.DST_TR_WIDTH to be formed. However, there may still be data in the channel FIFO, but not enough to form a single transfer of CTLx.DST_TR_WIDTH. In this scenario, once the channel is disabled, the remaining data in the channel FIFO is not transferred to the destination peripheral." So in case if the port gets to be suspended and then disabled it's possible to have the data silently discarded even though the controller reported that FIFO is empty and the CTLx.BLOCK_TS indicated the dropped data already received from the source device. This looks as if the data somehow got lost on a way from the peripheral device to memory and causes problems for instance in the DW APB UART driver, which pauses and disables the DMA-transfer as soon as the recv data timeout happens. Here is the way it looks: Memory <------- DMA FIFO <------ UART FIFO <---------------- UART DST_TR_WIDTH -+--------| | | | | | | No more data Current lvl -+--------| |---------+- DMA-burst lvl | | |---------+- Leftover data | | |---------+- SRC_TR_WIDTH -+--------+-------+---------+ In the example above: no more data is getting received over the UART port and BLOCK_TS is not even close to be fully received; some data is left in the UART FIFO, but not enough to perform a bursted DMA-xfer to the DMA FIFO; some data is left in the DMA FIFO, but not enough to be passed further to the system memory in a single transfer. In this situation the 8250 UART driver catches the recv timeout interrupt, pauses the DMA-transfer and terminates it completely, after which the IRQ handler manually fetches the leftover data from the UART FIFO into the recv-buffer. But since the DMA-channel has been disabled with the data left in the DMA FIFO, that data will be just discarded and the recv-buffer will have a gap of the "current lvl" size in the recv-buffer at the tail of the lately received data portion. So the data will be lost just due to the misconfigured DMA transfer. Note this is only relevant for the case of the transfer suspension and _disabling_. No problem will happen if the transfer will be re-enabled afterwards or the block transfer is fully completed. In the later case the "FIFO flush mode" will be executed at the transfer final stage in order to push out the data left in the DMA FIFO. In order to fix the denoted problem the DW AHB DMA-engine driver needs to make sure that the _bursted_ source transfer width is greater or equal to the single destination transfer (note the HW databook describes more strict constraint than actually required). Since the peripheral-device side is prescribed by the client driver logic, the memory-side can be only used for that. The solution can be easily implemented for the DEV_TO_MEM transfers just by adjusting the memory-channel address width. Sadly it's not that easy for the MEM_TO_DEV transfers since the mem-to-dma burst size is normally dynamically determined by the controller. So the only thing that can be done is to make sure that memory-side address width is greater than the peripheral device address width. Fixes: a09820043c9e ("dw_dmac: autoconfigure data_width or get it via platform data") Signed-off-by: Serge Semin Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240802075100.6475-3-fancer.lancer@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/dw/core.c | 51 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 11e269a31a09..b341a6f1b043 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -622,12 +622,10 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, struct dw_desc *prev; struct dw_desc *first; u32 ctllo, ctlhi; - u8 m_master = dwc->dws.m_master; - u8 lms = DWC_LLP_LMS(m_master); + u8 lms = DWC_LLP_LMS(dwc->dws.m_master); dma_addr_t reg; unsigned int reg_width; unsigned int mem_width; - unsigned int data_width = dw->pdata->data_width[m_master]; unsigned int i; struct scatterlist *sg; size_t total_len = 0; @@ -661,7 +659,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, mem = sg_dma_address(sg); len = sg_dma_len(sg); - mem_width = __ffs(data_width | mem | len); + mem_width = __ffs(sconfig->src_addr_width | mem | len); slave_sg_todev_fill_desc: desc = dwc_desc_get(dwc); @@ -721,7 +719,7 @@ slave_sg_fromdev_fill_desc: lli_write(desc, sar, reg); lli_write(desc, dar, mem); lli_write(desc, ctlhi, ctlhi); - mem_width = __ffs(data_width | mem); + mem_width = __ffs(sconfig->dst_addr_width | mem); lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width)); desc->len = dlen; @@ -813,6 +811,41 @@ static int dwc_verify_p_buswidth(struct dma_chan *chan) return 0; } +static int dwc_verify_m_buswidth(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + u32 reg_width, reg_burst, mem_width; + + mem_width = dw->pdata->data_width[dwc->dws.m_master]; + + /* + * It's possible to have a data portion locked in the DMA FIFO in case + * of the channel suspension. Subsequent channel disabling will cause + * that data silent loss. In order to prevent that maintain the src and + * dst transfer widths coherency by means of the relation: + * (CTLx.SRC_TR_WIDTH * CTLx.SRC_MSIZE >= CTLx.DST_TR_WIDTH) + * Look for the details in the commit message that brings this change. + * + * Note the DMA configs utilized in the calculations below must have + * been verified to have correct values by this method call. + */ + if (dwc->dma_sconfig.direction == DMA_MEM_TO_DEV) { + reg_width = dwc->dma_sconfig.dst_addr_width; + if (mem_width < reg_width) + return -EINVAL; + + dwc->dma_sconfig.src_addr_width = mem_width; + } else if (dwc->dma_sconfig.direction == DMA_DEV_TO_MEM) { + reg_width = dwc->dma_sconfig.src_addr_width; + reg_burst = rounddown_pow_of_two(dwc->dma_sconfig.src_maxburst); + + dwc->dma_sconfig.dst_addr_width = min(mem_width, reg_width * reg_burst); + } + + return 0; +} + static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); @@ -822,14 +855,18 @@ static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig) memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig)); dwc->dma_sconfig.src_maxburst = - clamp(dwc->dma_sconfig.src_maxburst, 0U, dwc->max_burst); + clamp(dwc->dma_sconfig.src_maxburst, 1U, dwc->max_burst); dwc->dma_sconfig.dst_maxburst = - clamp(dwc->dma_sconfig.dst_maxburst, 0U, dwc->max_burst); + clamp(dwc->dma_sconfig.dst_maxburst, 1U, dwc->max_burst); ret = dwc_verify_p_buswidth(chan); if (ret) return ret; + ret = dwc_verify_m_buswidth(chan); + if (ret) + return ret; + dw->encode_maxburst(dwc, &dwc->dma_sconfig.src_maxburst); dw->encode_maxburst(dwc, &dwc->dma_sconfig.dst_maxburst); -- cgit From 1fd6fe89055e6dbb4be8f16b8dcab8602e3603d6 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 2 Aug 2024 10:50:48 +0300 Subject: dmaengine: dw: Simplify prepare CTL_LO methods Currently the CTL LO fields are calculated on the platform-specific basis. It's implemented by means of the prepare_ctllo() callbacks using the ternary operator within the local variables init block at the beginning of the block scope. The functions code currently is relatively hard to comprehend and isn't that optimal since implies four conditional statements executed and two additional local variables defined. Let's simplify the DW AHB DMA prepare_ctllo() method by unrolling the ternary operators into the normal if-else statement, dropping redundant master-interface ID variables and initializing the local variables based on the singly evaluated DMA-transfer direction check. Thus the method will look much more readable since now the fields content can be easily inferred right from the if-else branch. Provide the same update in the Intel DMA32 core driver for the sake of the driver code unification. Note besides of the effects described above this update is basically a preparation before dropping the max burst encoding callback. The dropping will require to call the burst fields calculation methods right in the prepare_ctllo() callbacks. It would have made the later functions code even more complex should they were left in the original state. Signed-off-by: Serge Semin Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240802075100.6475-4-fancer.lancer@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/dw/dw.c | 21 +++++++++++++++------ drivers/dma/dw/idma32.c | 8 ++++++-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/dma/dw/dw.c b/drivers/dma/dw/dw.c index a4862263ff14..e3d2cc3ea68c 100644 --- a/drivers/dma/dw/dw.c +++ b/drivers/dma/dw/dw.c @@ -67,12 +67,21 @@ static size_t dw_dma_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width) static u32 dw_dma_prepare_ctllo(struct dw_dma_chan *dwc) { struct dma_slave_config *sconfig = &dwc->dma_sconfig; - u8 smsize = (dwc->direction == DMA_DEV_TO_MEM) ? sconfig->src_maxburst : 0; - u8 dmsize = (dwc->direction == DMA_MEM_TO_DEV) ? sconfig->dst_maxburst : 0; - u8 p_master = dwc->dws.p_master; - u8 m_master = dwc->dws.m_master; - u8 dms = (dwc->direction == DMA_MEM_TO_DEV) ? p_master : m_master; - u8 sms = (dwc->direction == DMA_DEV_TO_MEM) ? p_master : m_master; + u8 smsize = 0, dmsize = 0; + u8 sms, dms; + + if (dwc->direction == DMA_MEM_TO_DEV) { + sms = dwc->dws.m_master; + dms = dwc->dws.p_master; + dmsize = sconfig->dst_maxburst; + } else if (dwc->direction == DMA_DEV_TO_MEM) { + sms = dwc->dws.p_master; + dms = dwc->dws.m_master; + smsize = sconfig->src_maxburst; + } else /* DMA_MEM_TO_MEM */ { + sms = dwc->dws.m_master; + dms = dwc->dws.m_master; + } return DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN | DWC_CTLL_DST_MSIZE(dmsize) | DWC_CTLL_SRC_MSIZE(smsize) | diff --git a/drivers/dma/dw/idma32.c b/drivers/dma/dw/idma32.c index 58f4078d83fe..e0c31f77cd0f 100644 --- a/drivers/dma/dw/idma32.c +++ b/drivers/dma/dw/idma32.c @@ -202,8 +202,12 @@ static size_t idma32_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width) static u32 idma32_prepare_ctllo(struct dw_dma_chan *dwc) { struct dma_slave_config *sconfig = &dwc->dma_sconfig; - u8 smsize = (dwc->direction == DMA_DEV_TO_MEM) ? sconfig->src_maxburst : 0; - u8 dmsize = (dwc->direction == DMA_MEM_TO_DEV) ? sconfig->dst_maxburst : 0; + u8 smsize = 0, dmsize = 0; + + if (dwc->direction == DMA_MEM_TO_DEV) + dmsize = sconfig->dst_maxburst; + else if (dwc->direction == DMA_DEV_TO_MEM) + smsize = sconfig->src_maxburst; return DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN | DWC_CTLL_DST_MSIZE(dmsize) | DWC_CTLL_SRC_MSIZE(smsize); -- cgit From 3acb301d33749a8974e61ecda16a5f5441fc9628 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 2 Aug 2024 10:50:49 +0300 Subject: dmaengine: dw: Define encode_maxburst() above prepare_ctllo() callbacks As a preparatory change before dropping the encode_maxburst() callbacks let's move dw_dma_encode_maxburst() and idma32_encode_maxburst() to being defined above the dw_dma_prepare_ctllo() and idma32_prepare_ctllo() methods respectively. That's required since the former methods will be called from the later ones directly. Signed-off-by: Serge Semin Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240802075100.6475-5-fancer.lancer@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/dw/dw.c | 18 +++++++++--------- drivers/dma/dw/idma32.c | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/dma/dw/dw.c b/drivers/dma/dw/dw.c index e3d2cc3ea68c..628ee1e77505 100644 --- a/drivers/dma/dw/dw.c +++ b/drivers/dma/dw/dw.c @@ -64,6 +64,15 @@ static size_t dw_dma_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width) return DWC_CTLH_BLOCK_TS(block) << width; } +static void dw_dma_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst) +{ + /* + * Fix burst size according to dw_dmac. We need to convert them as: + * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3. + */ + *maxburst = *maxburst > 1 ? fls(*maxburst) - 2 : 0; +} + static u32 dw_dma_prepare_ctllo(struct dw_dma_chan *dwc) { struct dma_slave_config *sconfig = &dwc->dma_sconfig; @@ -88,15 +97,6 @@ static u32 dw_dma_prepare_ctllo(struct dw_dma_chan *dwc) DWC_CTLL_DMS(dms) | DWC_CTLL_SMS(sms); } -static void dw_dma_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst) -{ - /* - * Fix burst size according to dw_dmac. We need to convert them as: - * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3. - */ - *maxburst = *maxburst > 1 ? fls(*maxburst) - 2 : 0; -} - static void dw_dma_set_device_name(struct dw_dma *dw, int id) { snprintf(dw->name, sizeof(dw->name), "dw:dmac%d", id); diff --git a/drivers/dma/dw/idma32.c b/drivers/dma/dw/idma32.c index e0c31f77cd0f..493fcbafa2b8 100644 --- a/drivers/dma/dw/idma32.c +++ b/drivers/dma/dw/idma32.c @@ -199,6 +199,11 @@ static size_t idma32_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width) return IDMA32C_CTLH_BLOCK_TS(block); } +static void idma32_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst) +{ + *maxburst = *maxburst > 1 ? fls(*maxburst) - 1 : 0; +} + static u32 idma32_prepare_ctllo(struct dw_dma_chan *dwc) { struct dma_slave_config *sconfig = &dwc->dma_sconfig; @@ -213,11 +218,6 @@ static u32 idma32_prepare_ctllo(struct dw_dma_chan *dwc) DWC_CTLL_DST_MSIZE(dmsize) | DWC_CTLL_SRC_MSIZE(smsize); } -static void idma32_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst) -{ - *maxburst = *maxburst > 1 ? fls(*maxburst) - 1 : 0; -} - static void idma32_set_device_name(struct dw_dma *dw, int id) { snprintf(dw->name, sizeof(dw->name), "idma32:dmac%d", id); -- cgit From d8fa0802f63502c0409d02c6b701d51841a6f1bd Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 2 Aug 2024 10:50:50 +0300 Subject: dmaengine: dw: Simplify max-burst calculation procedure In order to have a more coherent DW AHB DMA slave configuration method - dwc_config() - let's simplify the source and destination channel max-burst calculation procedure: 1. Create the max-burst verification method as it has been just done for the memory and peripheral address widths. Thus the dwc_config() method will turn to a set of the verification methods execution. 2. Since both the generic DW AHB DMA and Intel iDMA 32-bit engines support the power-of-2 bursts only, then the specified by the client driver max-burst values can be converted to being power-of-2 right in the max-burst verification method. 3. Since max-burst encoded value is required on the CTL_LO fields calculation stage, the encode_maxburst() callback can be easily dropped from the dw_dma structure meanwhile the encoding procedure will be executed right in the CTL_LO register value calculation. Thus the update will provide the next positive effects: the internal DMA-slave config structure will contain only the real DMA-transfer config values, which will be encoded to the DMA-controller register fields only when it's required on the buffer mapping; the redundant encode_maxburst() callback will be dropped simplifying the internal HW-abstraction API; dwc_config() will look more readable executing the verification functions one-by-one. Signed-off-by: Serge Semin Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240802075100.6475-6-fancer.lancer@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/dw/core.c | 30 +++++++++++++++++++++--------- drivers/dma/dw/dw.c | 9 ++++----- drivers/dma/dw/idma32.c | 9 ++++----- drivers/dma/dw/regs.h | 1 - 4 files changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index b341a6f1b043..32a66f9effd9 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -779,6 +779,23 @@ bool dw_dma_filter(struct dma_chan *chan, void *param) } EXPORT_SYMBOL_GPL(dw_dma_filter); +static int dwc_verify_maxburst(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + + dwc->dma_sconfig.src_maxburst = + clamp(dwc->dma_sconfig.src_maxburst, 1U, dwc->max_burst); + dwc->dma_sconfig.dst_maxburst = + clamp(dwc->dma_sconfig.dst_maxburst, 1U, dwc->max_burst); + + dwc->dma_sconfig.src_maxburst = + rounddown_pow_of_two(dwc->dma_sconfig.src_maxburst); + dwc->dma_sconfig.dst_maxburst = + rounddown_pow_of_two(dwc->dma_sconfig.dst_maxburst); + + return 0; +} + static int dwc_verify_p_buswidth(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); @@ -838,7 +855,7 @@ static int dwc_verify_m_buswidth(struct dma_chan *chan) dwc->dma_sconfig.src_addr_width = mem_width; } else if (dwc->dma_sconfig.direction == DMA_DEV_TO_MEM) { reg_width = dwc->dma_sconfig.src_addr_width; - reg_burst = rounddown_pow_of_two(dwc->dma_sconfig.src_maxburst); + reg_burst = dwc->dma_sconfig.src_maxburst; dwc->dma_sconfig.dst_addr_width = min(mem_width, reg_width * reg_burst); } @@ -849,15 +866,13 @@ static int dwc_verify_m_buswidth(struct dma_chan *chan) static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct dw_dma *dw = to_dw_dma(chan->device); int ret; memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig)); - dwc->dma_sconfig.src_maxburst = - clamp(dwc->dma_sconfig.src_maxburst, 1U, dwc->max_burst); - dwc->dma_sconfig.dst_maxburst = - clamp(dwc->dma_sconfig.dst_maxburst, 1U, dwc->max_burst); + ret = dwc_verify_maxburst(chan); + if (ret) + return ret; ret = dwc_verify_p_buswidth(chan); if (ret) @@ -867,9 +882,6 @@ static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig) if (ret) return ret; - dw->encode_maxburst(dwc, &dwc->dma_sconfig.src_maxburst); - dw->encode_maxburst(dwc, &dwc->dma_sconfig.dst_maxburst); - return 0; } diff --git a/drivers/dma/dw/dw.c b/drivers/dma/dw/dw.c index 628ee1e77505..6766142884b6 100644 --- a/drivers/dma/dw/dw.c +++ b/drivers/dma/dw/dw.c @@ -64,13 +64,13 @@ static size_t dw_dma_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width) return DWC_CTLH_BLOCK_TS(block) << width; } -static void dw_dma_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst) +static inline u8 dw_dma_encode_maxburst(u32 maxburst) { /* * Fix burst size according to dw_dmac. We need to convert them as: * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3. */ - *maxburst = *maxburst > 1 ? fls(*maxburst) - 2 : 0; + return maxburst > 1 ? fls(maxburst) - 2 : 0; } static u32 dw_dma_prepare_ctllo(struct dw_dma_chan *dwc) @@ -82,11 +82,11 @@ static u32 dw_dma_prepare_ctllo(struct dw_dma_chan *dwc) if (dwc->direction == DMA_MEM_TO_DEV) { sms = dwc->dws.m_master; dms = dwc->dws.p_master; - dmsize = sconfig->dst_maxburst; + dmsize = dw_dma_encode_maxburst(sconfig->dst_maxburst); } else if (dwc->direction == DMA_DEV_TO_MEM) { sms = dwc->dws.p_master; dms = dwc->dws.m_master; - smsize = sconfig->src_maxburst; + smsize = dw_dma_encode_maxburst(sconfig->src_maxburst); } else /* DMA_MEM_TO_MEM */ { sms = dwc->dws.m_master; dms = dwc->dws.m_master; @@ -125,7 +125,6 @@ int dw_dma_probe(struct dw_dma_chip *chip) dw->suspend_chan = dw_dma_suspend_chan; dw->resume_chan = dw_dma_resume_chan; dw->prepare_ctllo = dw_dma_prepare_ctllo; - dw->encode_maxburst = dw_dma_encode_maxburst; dw->bytes2block = dw_dma_bytes2block; dw->block2bytes = dw_dma_block2bytes; diff --git a/drivers/dma/dw/idma32.c b/drivers/dma/dw/idma32.c index 493fcbafa2b8..dac617c183e6 100644 --- a/drivers/dma/dw/idma32.c +++ b/drivers/dma/dw/idma32.c @@ -199,9 +199,9 @@ static size_t idma32_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width) return IDMA32C_CTLH_BLOCK_TS(block); } -static void idma32_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst) +static inline u8 idma32_encode_maxburst(u32 maxburst) { - *maxburst = *maxburst > 1 ? fls(*maxburst) - 1 : 0; + return maxburst > 1 ? fls(maxburst) - 1 : 0; } static u32 idma32_prepare_ctllo(struct dw_dma_chan *dwc) @@ -210,9 +210,9 @@ static u32 idma32_prepare_ctllo(struct dw_dma_chan *dwc) u8 smsize = 0, dmsize = 0; if (dwc->direction == DMA_MEM_TO_DEV) - dmsize = sconfig->dst_maxburst; + dmsize = idma32_encode_maxburst(sconfig->dst_maxburst); else if (dwc->direction == DMA_DEV_TO_MEM) - smsize = sconfig->src_maxburst; + smsize = idma32_encode_maxburst(sconfig->src_maxburst); return DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN | DWC_CTLL_DST_MSIZE(dmsize) | DWC_CTLL_SRC_MSIZE(smsize); @@ -274,7 +274,6 @@ int idma32_dma_probe(struct dw_dma_chip *chip) dw->suspend_chan = idma32_suspend_chan; dw->resume_chan = idma32_resume_chan; dw->prepare_ctllo = idma32_prepare_ctllo; - dw->encode_maxburst = idma32_encode_maxburst; dw->bytes2block = idma32_bytes2block; dw->block2bytes = idma32_block2bytes; diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 76654bd13c1a..5969d9cc8d7a 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -327,7 +327,6 @@ struct dw_dma { void (*suspend_chan)(struct dw_dma_chan *dwc, bool drain); void (*resume_chan)(struct dw_dma_chan *dwc, bool drain); u32 (*prepare_ctllo)(struct dw_dma_chan *dwc); - void (*encode_maxburst)(struct dw_dma_chan *dwc, u32 *maxburst); u32 (*bytes2block)(struct dw_dma_chan *dwc, size_t bytes, unsigned int width, size_t *len); size_t (*block2bytes)(struct dw_dma_chan *dwc, u32 block, u32 width); -- cgit From 2ebc36b9581df31eed271e5de61fc8a8b66dbc56 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 2 Aug 2024 10:50:51 +0300 Subject: dmaengine: dw: Unify ret-val local variables naming Currently there are two names utilized in the driver to keep the functions call status: ret and err. For the sake of unification convert to using the first version only. Signed-off-by: Serge Semin Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240802075100.6475-7-fancer.lancer@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/dw/core.c | 20 ++++++++++---------- drivers/dma/dw/platform.c | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 32a66f9effd9..dd75f97a33b3 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -1155,7 +1155,7 @@ int do_dma_probe(struct dw_dma_chip *chip) bool autocfg = false; unsigned int dw_params; unsigned int i; - int err; + int ret; dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL); if (!dw->pdata) @@ -1171,7 +1171,7 @@ int do_dma_probe(struct dw_dma_chip *chip) autocfg = dw_params >> DW_PARAMS_EN & 1; if (!autocfg) { - err = -EINVAL; + ret = -EINVAL; goto err_pdata; } @@ -1191,7 +1191,7 @@ int do_dma_probe(struct dw_dma_chip *chip) pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING; pdata->chan_priority = CHAN_PRIORITY_ASCENDING; } else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { - err = -EINVAL; + ret = -EINVAL; goto err_pdata; } else { memcpy(dw->pdata, chip->pdata, sizeof(*dw->pdata)); @@ -1203,7 +1203,7 @@ int do_dma_probe(struct dw_dma_chip *chip) dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan), GFP_KERNEL); if (!dw->chan) { - err = -ENOMEM; + ret = -ENOMEM; goto err_pdata; } @@ -1221,15 +1221,15 @@ int do_dma_probe(struct dw_dma_chip *chip) sizeof(struct dw_desc), 4, 0); if (!dw->desc_pool) { dev_err(chip->dev, "No memory for descriptors dma pool\n"); - err = -ENOMEM; + ret = -ENOMEM; goto err_pdata; } tasklet_setup(&dw->tasklet, dw_dma_tasklet); - err = request_irq(chip->irq, dw_dma_interrupt, IRQF_SHARED, + ret = request_irq(chip->irq, dw_dma_interrupt, IRQF_SHARED, dw->name, dw); - if (err) + if (ret) goto err_pdata; INIT_LIST_HEAD(&dw->dma.channels); @@ -1341,8 +1341,8 @@ int do_dma_probe(struct dw_dma_chip *chip) */ dma_set_max_seg_size(dw->dma.dev, dw->chan[0].block_size); - err = dma_async_device_register(&dw->dma); - if (err) + ret = dma_async_device_register(&dw->dma); + if (ret) goto err_dma_register; dev_info(chip->dev, "DesignWare DMA Controller, %d channels\n", @@ -1356,7 +1356,7 @@ err_dma_register: free_irq(chip->irq, dw); err_pdata: pm_runtime_put_sync_suspend(chip->dev); - return err; + return ret; } int do_dma_remove(struct dw_dma_chip *chip) diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 7d9d4c951724..47c58ad468cb 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -29,7 +29,7 @@ static int dw_probe(struct platform_device *pdev) struct dw_dma_chip_pdata *data; struct dw_dma_chip *chip; struct device *dev = &pdev->dev; - int err; + int ret; match = device_get_match_data(dev); if (!match) @@ -51,9 +51,9 @@ static int dw_probe(struct platform_device *pdev) if (IS_ERR(chip->regs)) return PTR_ERR(chip->regs); - err = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) - return err; + ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) + return ret; if (!data->pdata) data->pdata = dev_get_platdata(dev); @@ -69,14 +69,14 @@ static int dw_probe(struct platform_device *pdev) chip->clk = devm_clk_get_optional(chip->dev, "hclk"); if (IS_ERR(chip->clk)) return PTR_ERR(chip->clk); - err = clk_prepare_enable(chip->clk); - if (err) - return err; + ret = clk_prepare_enable(chip->clk); + if (ret) + return ret; pm_runtime_enable(&pdev->dev); - err = data->probe(chip); - if (err) + ret = data->probe(chip); + if (ret) goto err_dw_dma_probe; platform_set_drvdata(pdev, data); @@ -90,7 +90,7 @@ static int dw_probe(struct platform_device *pdev) err_dw_dma_probe: pm_runtime_disable(&pdev->dev); clk_disable_unprepare(chip->clk); - return err; + return ret; } static void dw_remove(struct platform_device *pdev) -- cgit From 5e5c793c7fc47219998465361d94510fdf55d83f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 16 Jul 2024 14:57:06 -0700 Subject: dmaengine: ti: omap-dma: Initialize sglen after allocation With the new __counted_by annocation, the "sglen" struct member must be set before accessing the "sg" array. This initialization was done in other places where a new struct omap_desc is allocated, but these cases were missed. Set "sglen" after allocation. Fixes: b85178611c11 ("dmaengine: ti: omap-dma: Annotate struct omap_desc with __counted_by") Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20240716215702.work.802-kees@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/ti/omap-dma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c index 7e6c04afbe89..6ab9bfbdc480 100644 --- a/drivers/dma/ti/omap-dma.c +++ b/drivers/dma/ti/omap-dma.c @@ -1186,10 +1186,10 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic( d->dev_addr = dev_addr; d->fi = burst; d->es = es; + d->sglen = 1; d->sg[0].addr = buf_addr; d->sg[0].en = period_len / es_bytes[es]; d->sg[0].fn = buf_len / period_len; - d->sglen = 1; d->ccr = c->ccr; if (dir == DMA_DEV_TO_MEM) @@ -1258,10 +1258,10 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_memcpy( d->dev_addr = src; d->fi = 0; d->es = data_type; + d->sglen = 1; d->sg[0].en = len / BIT(data_type); d->sg[0].fn = 1; d->sg[0].addr = dest; - d->sglen = 1; d->ccr = c->ccr; d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_POSTINC; @@ -1309,6 +1309,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_interleaved( if (data_type > CSDP_DATA_TYPE_32) data_type = CSDP_DATA_TYPE_32; + d->sglen = 1; sg = &d->sg[0]; d->dir = DMA_MEM_TO_MEM; d->dev_addr = xt->src_start; @@ -1316,7 +1317,6 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_interleaved( sg->en = xt->sgl[0].size / BIT(data_type); sg->fn = xt->numf; sg->addr = xt->dst_start; - d->sglen = 1; d->ccr = c->ccr; src_icg = dmaengine_get_src_icg(xt, &xt->sgl[0]); -- cgit From b53b831919a0dc4e6631ebe0497ab2a4d8bef014 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 16 Jul 2024 14:38:33 -0700 Subject: dmaengine: stm32-dma3: Set lli_size after allocation With the new __counted_by annotation, the "lli_size" variable needs to valid for accesses to the "lli" array. This requirement is not met in stm32_dma3_chan_desc_alloc(), since "lli_size" starts at "0", so "lli" index "0" will not be considered valid during the initialization for loop. Fix this by setting lli_size immediately after allocation (similar to how this is handled in stm32_mdma_alloc_desc() for the node/count relationship). Fixes: f561ec8b2b33 ("dmaengine: Add STM32 DMA3 support") Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20240716213830.work.951-kees@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/stm32/stm32-dma3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/stm32/stm32-dma3.c b/drivers/dma/stm32/stm32-dma3.c index 4087e0263a48..0be6e944df6f 100644 --- a/drivers/dma/stm32/stm32-dma3.c +++ b/drivers/dma/stm32/stm32-dma3.c @@ -403,6 +403,7 @@ static struct stm32_dma3_swdesc *stm32_dma3_chan_desc_alloc(struct stm32_dma3_ch swdesc = kzalloc(struct_size(swdesc, lli, count), GFP_NOWAIT); if (!swdesc) return NULL; + swdesc->lli_size = count; for (i = 0; i < count; i++) { swdesc->lli[i].hwdesc = dma_pool_zalloc(chan->lli_pool, GFP_NOWAIT, @@ -410,7 +411,6 @@ static struct stm32_dma3_swdesc *stm32_dma3_chan_desc_alloc(struct stm32_dma3_ch if (!swdesc->lli[i].hwdesc) goto err_pool_free; } - swdesc->lli_size = count; swdesc->ccr = 0; /* Set LL base address */ -- cgit From 5062d9c0cbbc202e495e9b20f147f64ef5cc2897 Mon Sep 17 00:00:00 2001 From: "Sicelo A. Mhlongo" Date: Mon, 22 Jul 2024 13:31:11 +0200 Subject: ARM: dts: omap3-n900: correct the accelerometer orientation Negate the values reported for the accelerometer z-axis in order to match Documentation/devicetree/bindings/iio/mount-matrix.txt. Fixes: 14a213dcb004 ("ARM: dts: n900: use iio driver for accelerometer") Signed-off-by: Sicelo A. Mhlongo Reviewed-By: Andreas Kemnade Link: https://lore.kernel.org/r/20240722113137.3240847-1-absicsz@gmail.com Signed-off-by: Kevin Hilman --- arch/arm/boot/dts/ti/omap/omap3-n900.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/ti/omap/omap3-n900.dts b/arch/arm/boot/dts/ti/omap/omap3-n900.dts index 07c5b963af78..4bde3342bb95 100644 --- a/arch/arm/boot/dts/ti/omap/omap3-n900.dts +++ b/arch/arm/boot/dts/ti/omap/omap3-n900.dts @@ -781,7 +781,7 @@ mount-matrix = "-1", "0", "0", "0", "1", "0", - "0", "0", "1"; + "0", "0", "-1"; }; cam1: camera@3e { -- cgit From df24373435f5899a2a98b7d377479c8d4376613b Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 2 Aug 2024 22:47:34 +0300 Subject: drm/msm/dpu: don't play tricks with debug macros DPU debugging macros need to be converted to a proper drm_debug_* macros, however this is a going an intrusive patch, not suitable for a fix. Wire DPU_DEBUG and DPU_DEBUG_DRIVER to always use DRM_DEBUG_DRIVER to make sure that DPU debugging messages always end up in the drm debug messages and are controlled via the usual drm.debug mask. I don't think that it is a good idea for a generic DPU_DEBUG macro to be tied to DRM_UT_KMS. It is used to report a debug message from driver, so by default it should go to the DRM_UT_DRIVER channel. While refactoring debug macros later on we might end up with particular messages going to ATOMIC or KMS, but DRIVER should be the default. Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/606932/ Link: https://lore.kernel.org/r/20240802-dpu-fix-wb-v2-2-7eac9eb8e895@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index e2adc937ea63..935ff6fd172c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -31,24 +31,14 @@ * @fmt: Pointer to format string */ #define DPU_DEBUG(fmt, ...) \ - do { \ - if (drm_debug_enabled(DRM_UT_KMS)) \ - DRM_DEBUG(fmt, ##__VA_ARGS__); \ - else \ - pr_debug(fmt, ##__VA_ARGS__); \ - } while (0) + DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) /** * DPU_DEBUG_DRIVER - macro for hardware driver logging * @fmt: Pointer to format string */ #define DPU_DEBUG_DRIVER(fmt, ...) \ - do { \ - if (drm_debug_enabled(DRM_UT_DRIVER)) \ - DRM_ERROR(fmt, ##__VA_ARGS__); \ - else \ - pr_debug(fmt, ##__VA_ARGS__); \ - } while (0) + DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) #define DPU_ERROR(fmt, ...) pr_err("[dpu error]" fmt, ##__VA_ARGS__) #define DPU_ERROR_RATELIMITED(fmt, ...) pr_err_ratelimited("[dpu error]" fmt, ##__VA_ARGS__) -- cgit From d19d5b8d8f6dab942ce5ddbcf34bf7275e778250 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Mon, 5 Aug 2024 13:20:08 -0700 Subject: drm/msm/dp: fix the max supported bpp logic Fix the dp_panel_get_supported_bpp() API to return the minimum supported bpp correctly for relevant cases and use this API to correct the behavior of DP driver which hard-codes the max supported bpp to 30. This is incorrect because the number of lanes and max data rate supported by the lanes need to be taken into account. Replace the hardcoded limit with the appropriate math which accounts for the accurate number of lanes and max data rate. changes in v2: - Fix the dp_panel_get_supported_bpp() and use it - Drop the max_t usage as dp_panel_get_supported_bpp() already returns the min_bpp correctly now changes in v3: - replace min_t with just min as all params are u32 Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Reported-by: Dmitry Baryshkov Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/43 Tested-by: Dmitry Baryshkov # SM8350-HDK Reviewed-by: Stephen Boyd Patchwork: https://patchwork.freedesktop.org/patch/607073/ Link: https://lore.kernel.org/r/20240805202009.1120981-1-quic_abhinavk@quicinc.com Signed-off-by: Stephen Boyd Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_panel.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index a916b5f3b317..6ff6c9ef351f 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -90,22 +90,22 @@ static int dp_panel_read_dpcd(struct dp_panel *dp_panel) static u32 dp_panel_get_supported_bpp(struct dp_panel *dp_panel, u32 mode_edid_bpp, u32 mode_pclk_khz) { - struct dp_link_info *link_info; + const struct dp_link_info *link_info; const u32 max_supported_bpp = 30, min_supported_bpp = 18; - u32 bpp = 0, data_rate_khz = 0; + u32 bpp, data_rate_khz; - bpp = min_t(u32, mode_edid_bpp, max_supported_bpp); + bpp = min(mode_edid_bpp, max_supported_bpp); link_info = &dp_panel->link_info; data_rate_khz = link_info->num_lanes * link_info->rate * 8; - while (bpp > min_supported_bpp) { + do { if (mode_pclk_khz * bpp <= data_rate_khz) - break; + return bpp; bpp -= 6; - } + } while (bpp > min_supported_bpp); - return bpp; + return min_supported_bpp; } int dp_panel_read_sink_caps(struct dp_panel *dp_panel, @@ -423,8 +423,9 @@ int dp_panel_init_panel_info(struct dp_panel *dp_panel) drm_mode->clock); drm_dbg_dp(panel->drm_dev, "bpp = %d\n", dp_panel->dp_mode.bpp); - dp_panel->dp_mode.bpp = max_t(u32, 18, - min_t(u32, dp_panel->dp_mode.bpp, 30)); + dp_panel->dp_mode.bpp = dp_panel_get_mode_bpp(dp_panel, dp_panel->dp_mode.bpp, + dp_panel->dp_mode.drm_mode.clock); + drm_dbg_dp(panel->drm_dev, "updated bpp = %d\n", dp_panel->dp_mode.bpp); -- cgit From 959ab6350add903e352890af53e86663739fcb9a Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Sun, 4 Aug 2024 21:30:15 -0400 Subject: cgroup/cpuset: fix panic caused by partcmd_update We find a bug as below: BUG: unable to handle page fault for address: 00000003 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 3 PID: 358 Comm: bash Tainted: G W I 6.6.0-10893-g60d6 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/4 RIP: 0010:partition_sched_domains_locked+0x483/0x600 Code: 01 48 85 d2 74 0d 48 83 05 29 3f f8 03 01 f3 48 0f bc c2 89 c0 48 9 RSP: 0018:ffffc90000fdbc58 EFLAGS: 00000202 RAX: 0000000100000003 RBX: ffff888100b3dfa0 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000002fe80 RBP: ffff888100b3dfb0 R08: 0000000000000001 R09: 0000000000000000 R10: ffffc90000fdbcb0 R11: 0000000000000004 R12: 0000000000000002 R13: ffff888100a92b48 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f44a5425740(0000) GS:ffff888237d80000(0000) knlGS:0000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000100030973 CR3: 000000010722c000 CR4: 00000000000006e0 Call Trace: ? show_regs+0x8c/0xa0 ? __die_body+0x23/0xa0 ? __die+0x3a/0x50 ? page_fault_oops+0x1d2/0x5c0 ? partition_sched_domains_locked+0x483/0x600 ? search_module_extables+0x2a/0xb0 ? search_exception_tables+0x67/0x90 ? kernelmode_fixup_or_oops+0x144/0x1b0 ? __bad_area_nosemaphore+0x211/0x360 ? up_read+0x3b/0x50 ? bad_area_nosemaphore+0x1a/0x30 ? exc_page_fault+0x890/0xd90 ? __lock_acquire.constprop.0+0x24f/0x8d0 ? __lock_acquire.constprop.0+0x24f/0x8d0 ? asm_exc_page_fault+0x26/0x30 ? partition_sched_domains_locked+0x483/0x600 ? partition_sched_domains_locked+0xf0/0x600 rebuild_sched_domains_locked+0x806/0xdc0 update_partition_sd_lb+0x118/0x130 cpuset_write_resmask+0xffc/0x1420 cgroup_file_write+0xb2/0x290 kernfs_fop_write_iter+0x194/0x290 new_sync_write+0xeb/0x160 vfs_write+0x16f/0x1d0 ksys_write+0x81/0x180 __x64_sys_write+0x21/0x30 x64_sys_call+0x2f25/0x4630 do_syscall_64+0x44/0xb0 entry_SYSCALL_64_after_hwframe+0x78/0xe2 RIP: 0033:0x7f44a553c887 It can be reproduced with cammands: cd /sys/fs/cgroup/ mkdir test cd test/ echo +cpuset > ../cgroup.subtree_control echo root > cpuset.cpus.partition cat /sys/fs/cgroup/cpuset.cpus.effective 0-3 echo 0-3 > cpuset.cpus // taking away all cpus from root This issue is caused by the incorrect rebuilding of scheduling domains. In this scenario, test/cpuset.cpus.partition should be an invalid root and should not trigger the rebuilding of scheduling domains. When calling update_parent_effective_cpumask with partcmd_update, if newmask is not null, it should recheck newmask whether there are cpus is available for parect/cs that has tasks. Fixes: 0c7f293efc87 ("cgroup/cpuset: Add cpuset.cpus.exclusive.effective for v2") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Chen Ridong Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 40ec4abaf440..a9b6d56eeffa 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1991,6 +1991,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, part_error = PERR_CPUSEMPTY; goto write_error; } + /* Check newmask again, whether cpus are available for parent/cs */ + nocpu |= tasks_nocpu_error(parent, cs, newmask); /* * partcmd_update with newmask: -- cgit From 311a1bdc44a8e06024df4fd3392be0dfc8298655 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 4 Aug 2024 21:30:16 -0400 Subject: cgroup/cpuset: Clear effective_xcpus on cpus_allowed clearing only if cpus.exclusive not set Commit e2ffe502ba45 ("cgroup/cpuset: Add cpuset.cpus.exclusive for v2") adds a user writable cpuset.cpus.exclusive file for setting exclusive CPUs to be used for the creation of partitions. Since then effective_xcpus depends on both the cpuset.cpus and cpuset.cpus.exclusive setting. If cpuset.cpus.exclusive is set, effective_xcpus will depend only on cpuset.cpus.exclusive. When it is not set, effective_xcpus will be set according to the cpuset.cpus value when the cpuset becomes a valid partition root. When cpuset.cpus is being cleared by the user, effective_xcpus should only be cleared when cpuset.cpus.exclusive is not set. However, that is not currently the case. # cd /sys/fs/cgroup/ # mkdir test # echo +cpuset > cgroup.subtree_control # cd test # echo 3 > cpuset.cpus.exclusive # cat cpuset.cpus.exclusive.effective 3 # echo > cpuset.cpus # cat cpuset.cpus.exclusive.effective // was cleared Fix it by clearing effective_xcpus only if cpuset.cpus.exclusive is not set. Fixes: e2ffe502ba45 ("cgroup/cpuset: Add cpuset.cpus.exclusive for v2") Cc: stable@vger.kernel.org # v6.7+ Reported-by: Chen Ridong Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index a9b6d56eeffa..97d02612b3a6 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2525,7 +2525,8 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, */ if (!*buf) { cpumask_clear(trialcs->cpus_allowed); - cpumask_clear(trialcs->effective_xcpus); + if (cpumask_empty(trialcs->exclusive_cpus)) + cpumask_clear(trialcs->effective_xcpus); } else { retval = cpulist_parse(buf, trialcs->cpus_allowed); if (retval < 0) -- cgit From ff0ce721ec213499ec5a532041fb3a1db2dc5ecb Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 4 Aug 2024 21:30:17 -0400 Subject: cgroup/cpuset: Eliminate unncessary sched domains rebuilds in hotplug It was found that some hotplug operations may cause multiple rebuild_sched_domains_locked() calls. Some of those intermediate calls may use cpuset states not in the final correct form leading to incorrect sched domain setting. Fix this problem by using the existing force_rebuild flag to inhibit immediate rebuild_sched_domains_locked() calls if set and only doing one final call at the end. Also renaming the force_rebuild flag to force_sd_rebuild to make its meaning for clear. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 97d02612b3a6..4bd9e50bcc8e 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -232,6 +232,13 @@ static cpumask_var_t isolated_cpus; /* List of remote partition root children */ static struct list_head remote_children; +/* + * A flag to force sched domain rebuild at the end of an operation while + * inhibiting it in the intermediate stages when set. Currently it is only + * set in hotplug code. + */ +static bool force_sd_rebuild; + /* * Partition root states: * @@ -1475,7 +1482,7 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs) clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); } - if (rebuild_domains) + if (rebuild_domains && !force_sd_rebuild) rebuild_sched_domains_locked(); } @@ -1833,7 +1840,7 @@ static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask, remote_partition_disable(child, tmp); disable_cnt++; } - if (disable_cnt) + if (disable_cnt && !force_sd_rebuild) rebuild_sched_domains_locked(); } @@ -2442,7 +2449,8 @@ get_css: } rcu_read_unlock(); - if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD)) + if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD) && + !force_sd_rebuild) rebuild_sched_domains_locked(); } @@ -3104,7 +3112,8 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, cs->flags = trialcs->flags; spin_unlock_irq(&callback_lock); - if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) + if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed && + !force_sd_rebuild) rebuild_sched_domains_locked(); if (spread_flag_changed) @@ -4501,11 +4510,9 @@ hotplug_update_tasks(struct cpuset *cs, update_tasks_nodemask(cs); } -static bool force_rebuild; - void cpuset_force_rebuild(void) { - force_rebuild = true; + force_sd_rebuild = true; } /** @@ -4653,15 +4660,9 @@ static void cpuset_handle_hotplug(void) !cpumask_empty(subpartitions_cpus); mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); - /* - * In the rare case that hotplug removes all the cpus in - * subpartitions_cpus, we assumed that cpus are updated. - */ - if (!cpus_updated && !cpumask_empty(subpartitions_cpus)) - cpus_updated = true; - /* For v1, synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { + cpuset_force_rebuild(); spin_lock_irq(&callback_lock); if (!on_dfl) cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); @@ -4717,8 +4718,8 @@ static void cpuset_handle_hotplug(void) } /* rebuild sched domains if cpus_allowed has changed */ - if (cpus_updated || force_rebuild) { - force_rebuild = false; + if (force_sd_rebuild) { + force_sd_rebuild = false; rebuild_sched_domains_cpuslocked(); } -- cgit From aedf02e46eb549dac8db4821a6b9f0c6bf6e3990 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Wed, 31 Jul 2024 12:17:22 -0700 Subject: drm/msm/dpu: move dpu_encoder's connector assignment to atomic_enable() For cases where the crtc's connectors_changed was set without enable/active getting toggled , there is an atomic_enable() call followed by an atomic_disable() but without an atomic_mode_set(). This results in a NULL ptr access for the dpu_encoder_get_drm_fmt() call in the atomic_enable() as the dpu_encoder's connector was cleared in the atomic_disable() but not re-assigned as there was no atomic_mode_set() call. Fix the NULL ptr access by moving the assignment for atomic_enable() and also use drm_atomic_get_new_connector_for_encoder() to get the connector from the atomic_state. Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Reported-by: Dmitry Baryshkov Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/59 Suggested-by: Dmitry Baryshkov Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov # SM8350-HDK Patchwork: https://patchwork.freedesktop.org/patch/606729/ Link: https://lore.kernel.org/r/20240731191723.3050932-1-quic_abhinavk@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 34c56e855af7..3b171bf227d1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1171,8 +1171,6 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc, cstate->num_mixers = num_lm; - dpu_enc->connector = conn_state->connector; - for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; @@ -1270,6 +1268,8 @@ static void dpu_encoder_virt_atomic_enable(struct drm_encoder *drm_enc, dpu_enc->commit_done_timedout = false; + dpu_enc->connector = drm_atomic_get_new_connector_for_encoder(state, drm_enc); + cur_mode = &dpu_enc->base.crtc->state->adjusted_mode; dpu_enc->wide_bus_en = dpu_encoder_is_widebus_enabled(drm_enc); -- cgit From 319aca883bfa1b85ee08411541b51b9a934ac858 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Thu, 25 Jul 2024 15:04:50 -0700 Subject: drm/msm/dp: reset the link phy params before link training Before re-starting link training reset the link phy params namely the pre-emphasis and voltage swing levels otherwise the next link training begins at the previously cached levels which can result in link training failures. Fixes: 8ede2ecc3e5e ("drm/msm/dp: Add DP compliance tests on Snapdragon Chipsets") Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov # SM8350-HDK Reviewed-by: Stephen Boyd Patchwork: https://patchwork.freedesktop.org/patch/605946/ Link: https://lore.kernel.org/r/20240725220450.131245-1-quic_abhinavk@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 7bc8a9f0657a..f342fc5ae41e 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1286,6 +1286,8 @@ static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl, link_info.rate = ctrl->link->link_params.rate; link_info.capabilities = DP_LINK_CAP_ENHANCED_FRAMING; + dp_link_reset_phy_params_vx_px(ctrl->link); + dp_aux_link_configure(ctrl->aux, &link_info); if (drm_dp_max_downspread(dpcd)) -- cgit From bfa1a6283be390947d3649c482e5167186a37016 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 25 Jun 2024 00:13:41 +0300 Subject: drm/msm/dpu: cleanup FB if dpu_format_populate_layout fails If the dpu_format_populate_layout() fails, then FB is prepared, but not cleaned up. This ends up leaking the pin_count on the GEM object and causes a splat during DRM file closure: msm_obj->pin_count WARNING: CPU: 2 PID: 569 at drivers/gpu/drm/msm/msm_gem.c:121 update_lru_locked+0xc4/0xcc [...] Call trace: update_lru_locked+0xc4/0xcc put_pages+0xac/0x100 msm_gem_free_object+0x138/0x180 drm_gem_object_free+0x1c/0x30 drm_gem_object_handle_put_unlocked+0x108/0x10c drm_gem_object_release_handle+0x58/0x70 idr_for_each+0x68/0xec drm_gem_release+0x28/0x40 drm_file_free+0x174/0x234 drm_release+0xb0/0x160 __fput+0xc0/0x2c8 __fput_sync+0x50/0x5c __arm64_sys_close+0x38/0x7c invoke_syscall+0x48/0x118 el0_svc_common.constprop.0+0x40/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x4c/0x120 el0t_64_sync_handler+0x100/0x12c el0t_64_sync+0x190/0x194 irq event stamp: 129818 hardirqs last enabled at (129817): [] console_unlock+0x118/0x124 hardirqs last disabled at (129818): [] el1_dbg+0x24/0x8c softirqs last enabled at (129808): [] handle_softirqs+0x4c8/0x4e8 softirqs last disabled at (129785): [] __do_softirq+0x14/0x20 Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/600714/ Link: https://lore.kernel.org/r/20240625-dpu-mode-config-width-v5-1-501d984d634f@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 40c4dd2c3139..a62ac0c0c06a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -681,6 +681,9 @@ static int dpu_plane_prepare_fb(struct drm_plane *plane, new_state->fb, &layout); if (ret) { DPU_ERROR_PLANE(pdpu, "failed to get format layout, %d\n", ret); + if (pstate->aspace) + msm_framebuffer_cleanup(new_state->fb, pstate->aspace, + pstate->needs_dirtyfb); return ret; } -- cgit From 2db13c4a631505029ada9404e09a2b06a268c1c4 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 27 Jun 2024 00:45:55 +0300 Subject: drm/msm/dpu: limit QCM2290 to RGB formats only The QCM2290 doesn't have CSC blocks, so it can not support YUV formats even on ViG blocks. Fix the formats declared by _VIG_SBLK_NOSCALE(). Fixes: 5334087ee743 ("drm/msm: add support for QCM2290 MDSS") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/601048/ Link: https://lore.kernel.org/r/20240627-dpu-virtual-wide-v5-1-5efb90cbb8be@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index fc178ec73907..648c8d0a4c36 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -308,8 +308,8 @@ static const u32 wb2_formats_rgb_yuv[] = { { \ .maxdwnscale = SSPP_UNITY_SCALE, \ .maxupscale = SSPP_UNITY_SCALE, \ - .format_list = plane_formats_yuv, \ - .num_formats = ARRAY_SIZE(plane_formats_yuv), \ + .format_list = plane_formats, \ + .num_formats = ARRAY_SIZE(plane_formats), \ .virt_format_list = plane_formats, \ .virt_num_formats = ARRAY_SIZE(plane_formats), \ } -- cgit From cb18195914e353ece0e789e365a5a16872169805 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 27 Jun 2024 00:45:56 +0300 Subject: drm/msm/dpu: relax YUV requirements YUV formats require only CSC to be enabled. Even decimated formats should not require scaler. Relax the requirement and don't check for the scaler block while checking if YUV format can be enabled. Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/601049/ Link: https://lore.kernel.org/r/20240627-dpu-virtual-wide-v5-2-5efb90cbb8be@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index a62ac0c0c06a..dc1fd95e767b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -747,10 +747,9 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, min_src_size = MSM_FORMAT_IS_YUV(fmt) ? 2 : 1; if (MSM_FORMAT_IS_YUV(fmt) && - (!pipe->sspp->cap->sblk->scaler_blk.len || - !pipe->sspp->cap->sblk->csc_blk.len)) { + !pipe->sspp->cap->sblk->csc_blk.len) { DPU_DEBUG_PLANE(pdpu, - "plane doesn't have scaler/csc for yuv\n"); + "plane doesn't have csc for yuv\n"); return -EINVAL; } -- cgit From d3a785e4f983f523380e023d8a05fb6d04402957 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 27 Jun 2024 00:45:57 +0300 Subject: drm/msm/dpu: take plane rotation into account for wide planes Take into account the plane rotation and flipping when calculating src positions for the wide plane parts. This is not an issue yet, because rotation is only supported for the UBWC planes and wide UBWC planes are rejected anyway because in parallel multirect case only the half of the usual width is supported for tiled formats. However it's better to fix this now rather than stumbling upon it later. Fixes: 80e8ae3b38ab ("drm/msm/dpu: add support for wide planes") Reviewed-by: Abhinav Kumar Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/601059/ Link: https://lore.kernel.org/r/20240627-dpu-virtual-wide-v5-3-5efb90cbb8be@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index dc1fd95e767b..29298e066163 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -866,6 +866,10 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, max_linewidth = pdpu->catalog->caps->max_linewidth; + drm_rect_rotate(&pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + if ((drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) || _dpu_plane_calc_clk(&crtc_state->adjusted_mode, pipe_cfg) > max_mdp_clk_rate) { /* @@ -915,6 +919,14 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, r_pipe_cfg->dst_rect.x1 = pipe_cfg->dst_rect.x2; } + drm_rect_rotate_inv(&pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + if (r_pipe->sspp) + drm_rect_rotate_inv(&r_pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt, &crtc_state->adjusted_mode); if (ret) return ret; -- cgit From 44732f1dad20457d64c525549cd63dcef2563c23 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Fri, 19 Jul 2024 17:30:16 +0300 Subject: workqueue: doc: Fix function name, remove markers - s/alloc_ordered_queue()/alloc_ordered_workqueue()/ - remove markers to convert it into a link. Signed-off-by: Nikita Shubin Signed-off-by: Tejun Heo --- Documentation/core-api/workqueue.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst index bcc370c876be..16f861c9791e 100644 --- a/Documentation/core-api/workqueue.rst +++ b/Documentation/core-api/workqueue.rst @@ -260,7 +260,7 @@ Some users depend on strict execution ordering where only one work item is in flight at any given time and the work items are processed in queueing order. While the combination of ``@max_active`` of 1 and ``WQ_UNBOUND`` used to achieve this behavior, this is no longer the -case. Use ``alloc_ordered_queue()`` instead. +case. Use alloc_ordered_workqueue() instead. Example Execution Scenarios -- cgit From 38f7e14519d39cf524ddc02d4caee9b337dad703 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 30 Jul 2024 12:44:31 +0100 Subject: workqueue: Fix UBSAN 'subtraction overflow' error in shift_and_mask() UBSAN reports the following 'subtraction overflow' error when booting in a virtual machine on Android: | Internal error: UBSAN: integer subtraction overflow: 00000000f2005515 [#1] PREEMPT SMP | Modules linked in: | CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.10.0-00006-g3cbe9e5abd46-dirty #4 | Hardware name: linux,dummy-virt (DT) | pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : cancel_delayed_work+0x34/0x44 | lr : cancel_delayed_work+0x2c/0x44 | sp : ffff80008002ba60 | x29: ffff80008002ba60 x28: 0000000000000000 x27: 0000000000000000 | x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 | x23: 0000000000000000 x22: 0000000000000000 x21: ffff1f65014cd3c0 | x20: ffffc0e84c9d0da0 x19: ffffc0e84cab3558 x18: ffff800080009058 | x17: 00000000247ee1f8 x16: 00000000247ee1f8 x15: 00000000bdcb279d | x14: 0000000000000001 x13: 0000000000000075 x12: 00000a0000000000 | x11: ffff1f6501499018 x10: 00984901651fffff x9 : ffff5e7cc35af000 | x8 : 0000000000000001 x7 : 3d4d455453595342 x6 : 000000004e514553 | x5 : ffff1f6501499265 x4 : ffff1f650ff60b10 x3 : 0000000000000620 | x2 : ffff80008002ba78 x1 : 0000000000000000 x0 : 0000000000000000 | Call trace: | cancel_delayed_work+0x34/0x44 | deferred_probe_extend_timeout+0x20/0x70 | driver_register+0xa8/0x110 | __platform_driver_register+0x28/0x3c | syscon_init+0x24/0x38 | do_one_initcall+0xe4/0x338 | do_initcall_level+0xac/0x178 | do_initcalls+0x5c/0xa0 | do_basic_setup+0x20/0x30 | kernel_init_freeable+0x8c/0xf8 | kernel_init+0x28/0x1b4 | ret_from_fork+0x10/0x20 | Code: f9000fbf 97fffa2f 39400268 37100048 (d42aa2a0) | ---[ end trace 0000000000000000 ]--- | Kernel panic - not syncing: UBSAN: integer subtraction overflow: Fatal exception This is due to shift_and_mask() using a signed immediate to construct the mask and being called with a shift of 31 (WORK_OFFQ_POOL_SHIFT) so that it ends up decrementing from INT_MIN. Use an unsigned constant '1U' to generate the mask in shift_and_mask(). Cc: Tejun Heo Cc: Lai Jiangshan Fixes: 1211f3b21c2a ("workqueue: Preserve OFFQ bits in cancel[_sync] paths") Signed-off-by: Will Deacon Signed-off-by: Tejun Heo --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1745ca788ede..b35f8ce80bc7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -897,7 +897,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work) static unsigned long shift_and_mask(unsigned long v, u32 shift, u32 bits) { - return (v >> shift) & ((1 << bits) - 1); + return (v >> shift) & ((1U << bits) - 1); } static void work_offqd_unpack(struct work_offq_data *offqd, unsigned long data) -- cgit From 98cc1730c89467fc26e2dc2ceb2a014f332daa97 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 25 Jul 2024 09:04:37 +0800 Subject: workqueue: Remove incorrect "WARN_ON_ONCE(!list_empty(&worker->entry));" from dying worker The commit 68f83057b913 ("workqueue: Reap workers via kthread_stop() and remove detach_completion") changes the procedure of destroying workers; the dying workers are kept in the cull_list in wake_dying_workers() with the pool lock held and removed from the cull_list by the newly added reap_dying_workers() without the pool lock. This can cause a warning if the dying worker is wokenup earlier than reaped as reported by Marc: 2024/07/23 18:01:21 [M83LP63]: [ 157.267727] ------------[ cut here ]------------ 2024/07/23 18:01:21 [M83LP63]: [ 157.267735] WARNING: CPU: 21 PID: 725 at kernel/workqueue.c:3340 worker_thread+0x54e/0x558 2024/07/23 18:01:21 [M83LP63]: [ 157.267746] Modules linked in: binfmt_misc nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables sunrpc dm_service_time s390_trng vfio_ccw mdev vfio_iommu_type1 vfio sch_fq_codel 2024/07/23 18:01:21 [M83LP63]: loop dm_multipath configfs nfnetlink lcs ctcm fsm zfcp scsi_transport_fc ghash_s390 prng chacha_s390 libchacha aes_s390 des_s390 libdes sha3_512_s390 sha3_256_s390 sha512_s390 sha256_s390 sha1_s390 sha_common scm_block eadm_sch scsi_dh_rdac scsi_dh_emc scsi_dh_alua pkey zcrypt rng_core autofs4 2024/07/23 18:01:21 [M83LP63]: [ 157.267792] CPU: 21 PID: 725 Comm: kworker/dying Not tainted 6.10.0-rc2-00239-g68f83057b913 #95 2024/07/23 18:01:21 [M83LP63]: [ 157.267796] Hardware name: IBM 3906 M04 704 (LPAR) 2024/07/23 18:01:21 [M83LP63]: [ 157.267802] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 2024/07/23 18:01:21 [M83LP63]: [ 157.267797] Krnl PSW : 0704d00180000000 000003d600fcd9fa (worker_thread+0x552/0x558) 2024/07/23 18:01:21 [M83LP63]: [ 157.267806] Krnl GPRS: 6479696e6700776f 000002c901b62780 000003d602493ec8 000002c914954600 2024/07/23 18:01:21 [M83LP63]: [ 157.267809] 0000000000000000 0000000000000008 000002c901a85400 000002c90719e840 2024/07/23 18:01:21 [M83LP63]: [ 157.267811] 000002c90719e880 000002c901a85420 000002c91127adf0 000002c901a85400 2024/07/23 18:01:21 [M83LP63]: [ 157.267813] 000002c914954600 0000000000000000 000003d600fcd772 000003560452bd98 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] Krnl Code: 000003d600fcd9ec: c0e500674262 brasl %r14,000003d601cb5eb0 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcd9f2: a7f4ffc8 brc 15,000003d600fcd982 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] #000003d600fcd9f6: af000000 mc 0,0 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] >000003d600fcd9fa: a7f4fec2 brc 15,000003d600fcd77e 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcd9fe: 0707 bcr 0,%r7 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcda00: c00400682e10 brcl 0,000003d601cd3620 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcda06: eb7ff0500024 stmg %r7,%r15,80(%r15) 2024/07/23 18:01:21 [M83LP63]: [ 157.267822] 000003d600fcda0c: b90400ef lgr %r14,%r15 2024/07/23 18:01:21 [M83LP63]: [ 157.267853] Call Trace: 2024/07/23 18:01:21 [M83LP63]: [ 157.267855] [<000003d600fcd9fa>] worker_thread+0x552/0x558 2024/07/23 18:01:21 [M83LP63]: [ 157.267859] ([<000003d600fcd772>] worker_thread+0x2ca/0x558) 2024/07/23 18:01:21 [M83LP63]: [ 157.267862] [<000003d600fd6c80>] kthread+0x120/0x128 2024/07/23 18:01:21 [M83LP63]: [ 157.267865] [<000003d600f5305c>] __ret_from_fork+0x3c/0x58 2024/07/23 18:01:21 [M83LP63]: [ 157.267868] [<000003d601cc746a>] ret_from_fork+0xa/0x30 2024/07/23 18:01:21 [M83LP63]: [ 157.267873] Last Breaking-Event-Address: 2024/07/23 18:01:21 [M83LP63]: [ 157.267874] [<000003d600fcd778>] worker_thread+0x2d0/0x558 Since the procedure of destroying workers is changed, the WARN_ON_ONCE() becomes incorrect and should be removed. Cc: Marc Hartmayer Link: https://lore.kernel.org/lkml/87le1sjd2e.fsf@linux.ibm.com/ Reported-by: Marc Hartmayer Fixes: 68f83057b913 ("workqueue: Reap workers via kthread_stop() and remove detach_completion") Cc: stable@vger.kernel.org # v6.11+ Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b35f8ce80bc7..d56bd2277e58 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3351,7 +3351,6 @@ woke_up: set_pf_worker(false); ida_free(&pool->worker_ida, worker->id); - WARN_ON_ONCE(!list_empty(&worker->entry)); return 0; } -- cgit From 8bc35475ef1a23b0e224f3242eb11c76cab0ea88 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Aug 2024 09:37:25 -1000 Subject: workqueue: Fix spruious data race in __flush_work() When flushing a work item for cancellation, __flush_work() knows that it exclusively owns the work item through its PENDING bit. 134874e2eee9 ("workqueue: Allow cancel_work_sync() and disable_work() from atomic contexts on BH work items") added a read of @work->data to determine whether to use busy wait for BH work items that are being canceled. While the read is safe when @from_cancel, @work->data was read before testing @from_cancel to simplify code structure: data = *work_data_bits(work); if (from_cancel && !WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) { While the read data was never used if !@from_cancel, this could trigger KCSAN data race detection spuriously: ================================================================== BUG: KCSAN: data-race in __flush_work / __flush_work write to 0xffff8881223aa3e8 of 8 bytes by task 3998 on cpu 0: instrument_write include/linux/instrumented.h:41 [inline] ___set_bit include/asm-generic/bitops/instrumented-non-atomic.h:28 [inline] insert_wq_barrier kernel/workqueue.c:3790 [inline] start_flush_work kernel/workqueue.c:4142 [inline] __flush_work+0x30b/0x570 kernel/workqueue.c:4178 flush_work kernel/workqueue.c:4229 [inline] ... read to 0xffff8881223aa3e8 of 8 bytes by task 50 on cpu 1: __flush_work+0x42a/0x570 kernel/workqueue.c:4188 flush_work kernel/workqueue.c:4229 [inline] flush_delayed_work+0x66/0x70 kernel/workqueue.c:4251 ... value changed: 0x0000000000400000 -> 0xffff88810006c00d Reorganize the code so that @from_cancel is tested before @work->data is accessed. The only problem is triggering KCSAN detection spuriously. This shouldn't need READ_ONCE() or other access qualifiers. No functional changes. Signed-off-by: Tejun Heo Reported-by: syzbot+b3e4f2f51ed645fd5df2@syzkaller.appspotmail.com Fixes: 134874e2eee9 ("workqueue: Allow cancel_work_sync() and disable_work() from atomic contexts on BH work items") Link: http://lkml.kernel.org/r/000000000000ae429e061eea2157@google.com Cc: Jens Axboe --- kernel/workqueue.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d56bd2277e58..ef174d8c1f63 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4166,7 +4166,6 @@ already_gone: static bool __flush_work(struct work_struct *work, bool from_cancel) { struct wq_barrier barr; - unsigned long data; if (WARN_ON(!wq_online)) return false; @@ -4184,29 +4183,35 @@ static bool __flush_work(struct work_struct *work, bool from_cancel) * was queued on a BH workqueue, we also know that it was running in the * BH context and thus can be busy-waited. */ - data = *work_data_bits(work); - if (from_cancel && - !WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) { - /* - * On RT, prevent a live lock when %current preempted soft - * interrupt processing or prevents ksoftirqd from running by - * keeping flipping BH. If the BH work item runs on a different - * CPU then this has no effect other than doing the BH - * disable/enable dance for nothing. This is copied from - * kernel/softirq.c::tasklet_unlock_spin_wait(). - */ - while (!try_wait_for_completion(&barr.done)) { - if (IS_ENABLED(CONFIG_PREEMPT_RT)) { - local_bh_disable(); - local_bh_enable(); - } else { - cpu_relax(); + if (from_cancel) { + unsigned long data = *work_data_bits(work); + + if (!WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && + (data & WORK_OFFQ_BH)) { + /* + * On RT, prevent a live lock when %current preempted + * soft interrupt processing or prevents ksoftirqd from + * running by keeping flipping BH. If the BH work item + * runs on a different CPU then this has no effect other + * than doing the BH disable/enable dance for nothing. + * This is copied from + * kernel/softirq.c::tasklet_unlock_spin_wait(). + */ + while (!try_wait_for_completion(&barr.done)) { + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + local_bh_disable(); + local_bh_enable(); + } else { + cpu_relax(); + } } + goto out_destroy; } - } else { - wait_for_completion(&barr.done); } + wait_for_completion(&barr.done); + +out_destroy: destroy_work_on_stack(&barr.work); return true; } -- cgit From c4c8f369b6a6d21ce27286de1501137771e01dc3 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 5 Aug 2024 09:30:29 +0200 Subject: workqueue: Correct declaration of cpu_pwq in struct workqueue_struct cpu_pwq is used in various percpu functions that expect variable in __percpu address space. Correct the declaration of cpu_pwq to struct pool_workqueue __rcu * __percpu *cpu_pwq to declare the variable as __percpu pointer. The patch also fixes following sparse errors: workqueue.c:380:37: warning: duplicate [noderef] workqueue.c:380:37: error: multiple address spaces given: __rcu & __percpu workqueue.c:2271:15: error: incompatible types in comparison expression (different address spaces): workqueue.c:2271:15: struct pool_workqueue [noderef] __rcu * workqueue.c:2271:15: struct pool_workqueue [noderef] __percpu * and uncovers a couple of exisiting "incorrect type in assignment" warnings (from __rcu address space), which this patch does not address. Found by GCC's named address space checks. There were no changes in the resulting object files. Signed-off-by: Uros Bizjak Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ef174d8c1f63..e7b005ff3750 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -377,7 +377,7 @@ struct workqueue_struct { /* hot fields used during command issue, aligned to cacheline */ unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */ - struct pool_workqueue __percpu __rcu **cpu_pwq; /* I: per-cpu pwqs */ + struct pool_workqueue __rcu * __percpu *cpu_pwq; /* I: per-cpu pwqs */ struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */ }; -- cgit From 8fcd8d1e63c05c48b3ac16d0c3e2cd6a7a5c8ec4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 26 Jul 2024 04:23:14 +0900 Subject: kbuild: clean up code duplication in cmd_fdtoverlay When resolving a merge conflict, Linus noticed the fdtoverlay command duplication introduced by commit 49636c5680b9 ("kbuild: verify dtoverlay files against schema"). He suggested a clean-up. I eliminated the duplication and refactored the code a little further. No functional changes are intended, except for the short logs. The log will look as follows: $ make ARCH=arm64 defconfig dtbs_check [ snip ] DTC [C] arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxca.dtb DTC [C] arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dtb DTC [C] arch/arm64/boot/dts/freescale/imx93-var-som-symphony.dtb DTC [C] arch/arm64/boot/dts/freescale/imx95-19x19-evk.dtb DTC arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx-0x-imx219.dtbo OVL [C] arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx-0x-imx219.dtb The tag [C] indicates that the schema check is executed. Link: https://lore.kernel.org/lkml/CAHk-=wiF3yeWehcvqY-4X7WNb8n4yw_5t0H1CpEpKi7JMjaMfw@mail.gmail.com/#t Requested-by: Linus Torvalds Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- scripts/Makefile.lib | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index fe3668dc4954..207325eaf1d1 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -400,26 +400,23 @@ $(obj)/%.dtb.S: $(obj)/%.dtb FORCE $(obj)/%.dtbo.S: $(obj)/%.dtbo FORCE $(call if_changed,wrap_S_dtb) -quiet_cmd_dtc = DTC $@ +quiet_dtb_check_tag = $(if $(dtb-check-enabled),[C], ) +cmd_dtb_check = $(if $(dtb-check-enabled),; $(DT_CHECKER) $(DT_CHECKER_FLAGS) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) $@ || true) + +quiet_cmd_dtc = DTC $(quiet_dtb_check_tag) $@ cmd_dtc = $(HOSTCC) -E $(dtc_cpp_flags) -x assembler-with-cpp -o $(dtc-tmp) $< ; \ $(DTC) -o $@ -b 0 \ $(addprefix -i,$(dir $<) $(DTC_INCLUDE)) $(DTC_FLAGS) \ -d $(depfile).dtc.tmp $(dtc-tmp) ; \ - cat $(depfile).pre.tmp $(depfile).dtc.tmp > $(depfile) - -DT_CHECK_CMD = $(DT_CHECKER) $(DT_CHECKER_FLAGS) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) + cat $(depfile).pre.tmp $(depfile).dtc.tmp > $(depfile) \ + $(cmd_dtb_check) # NOTE: # Do not replace $(filter %.dtb %.dtbo, $^) with $(real-prereqs). When a single # DTB is turned into a multi-blob DTB, $^ will contain header file dependencies # recorded in the .*.cmd file. -ifneq ($(CHECK_DTBS),) -quiet_cmd_fdtoverlay = DTOVLCH $@ - cmd_fdtoverlay = $(objtree)/scripts/dtc/fdtoverlay -o $@ -i $(filter %.dtb %.dtbo, $^) ; $(DT_CHECK_CMD) $@ || true -else -quiet_cmd_fdtoverlay = DTOVL $@ - cmd_fdtoverlay = $(objtree)/scripts/dtc/fdtoverlay -o $@ -i $(filter %.dtb %.dtbo, $^) -endif +quiet_cmd_fdtoverlay = OVL $(quiet_dtb_check_tag) $@ + cmd_fdtoverlay = $(objtree)/scripts/dtc/fdtoverlay -o $@ -i $(filter %.dtb %.dtbo, $^) $(cmd_dtb_check) $(multi-dtb-y): FORCE $(call if_changed,fdtoverlay) @@ -430,16 +427,11 @@ DT_CHECKER ?= dt-validate DT_CHECKER_FLAGS ?= $(if $(DT_SCHEMA_FILES),-l $(DT_SCHEMA_FILES),-m) DT_BINDING_DIR := Documentation/devicetree/bindings DT_TMP_SCHEMA := $(objtree)/$(DT_BINDING_DIR)/processed-schema.json - -quiet_cmd_dtb = DTC_CHK $@ - cmd_dtb = $(cmd_dtc) ; $(DT_CHECK_CMD) $@ || true -else -quiet_cmd_dtb = $(quiet_cmd_dtc) - cmd_dtb = $(cmd_dtc) +dtb-check-enabled = $(if $(filter %.dtb, $@),y) endif $(obj)/%.dtb: $(obj)/%.dts $(DTC) $(DT_TMP_SCHEMA) FORCE - $(call if_changed_dep,dtb) + $(call if_changed_dep,dtc) $(obj)/%.dtbo: $(src)/%.dtso $(DTC) FORCE $(call if_changed_dep,dtc) -- cgit From 6fc9aacad49e3fbecd270c266850d50c453d52ef Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sun, 4 Aug 2024 14:50:57 +0900 Subject: Makefile: add $(srctree) to dependency of compile_commands.json target When trying to build compile_commands.json for an external module against the kernel built in a separate output directory, the following error is displayed: make[1]: *** No rule to make target 'scripts/clang-tools/gen_compile_commands.py', needed by 'compile_commands.json'. Stop. This is because gen_compile_commands.py was previously looked up using a relative path to $(srctree), but commit b1992c3772e6 ("kbuild: use $(src) instead of $(srctree)/$(src) for source directory") stopped defining VPATH for external module builds. Prefixing gen_compile_commands.py with $(srctree) fixes the problem. Fixes: b1992c3772e6 ("kbuild: use $(src) instead of $(srctree)/$(src) for source directory") Signed-off-by: Alexandre Courbot Reviewed-by: Nicolas Schier Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 44c02a6f60a1..44c33de28e93 100644 --- a/Makefile +++ b/Makefile @@ -1980,7 +1980,7 @@ nsdeps: modules quiet_cmd_gen_compile_commands = GEN $@ cmd_gen_compile_commands = $(PYTHON3) $< -a $(AR) -o $@ $(filter-out $<, $(real-prereqs)) -$(extmod_prefix)compile_commands.json: scripts/clang-tools/gen_compile_commands.py \ +$(extmod_prefix)compile_commands.json: $(srctree)/scripts/clang-tools/gen_compile_commands.py \ $(if $(KBUILD_EXTMOD),, vmlinux.a $(KBUILD_VMLINUX_LIBS)) \ $(if $(CONFIG_MODULES), $(MODORDER)) FORCE $(call if_changed,gen_compile_commands) -- cgit From e2006140ad2e01a02ed0aff49cc2ae3ceeb11f8d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 13 Jun 2024 15:05:03 +0300 Subject: thunderbolt: Mark XDomain as unplugged when router is removed I noticed that when we do discrete host router NVM upgrade and it gets hot-removed from the PCIe side as a result of NVM firmware authentication, if there is another host connected with enabled paths we hang in tearing them down. This is due to fact that the Thunderbolt networking driver also tries to cleanup the paths and ends up blocking in tb_disconnect_xdomain_paths() waiting for the domain lock. However, at this point we already cleaned the paths in tb_stop() so there is really no need for tb_disconnect_xdomain_paths() to do that anymore. Furthermore it already checks if the XDomain is unplugged and bails out early so take advantage of that and mark the XDomain as unplugged when we remove the parent router. Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 326433df5880..6a2116cbb06f 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -3392,6 +3392,7 @@ void tb_switch_remove(struct tb_switch *sw) tb_switch_remove(port->remote->sw); port->remote = NULL; } else if (port->xdomain) { + port->xdomain->is_unplugged = true; tb_xdomain_remove(port->xdomain); port->xdomain = NULL; } -- cgit From 33330bcf031818e60a816db0cfd3add9eecc3b28 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Mon, 5 Aug 2024 11:22:34 +0200 Subject: scripts: kconfig: merge_config: config files: add a trailing newline When merging files without trailing newlines at the end of the file, two config fragments end up at the same row if file1.config doens't have a trailing newline at the end of the file. file1.config "CONFIG_1=y" file2.config "CONFIG_2=y" ./scripts/kconfig/merge_config.sh -m .config file1.config file2.config This will generate a .config looking like this. cat .config ... CONFIG_1=yCONFIG_2=y" Making sure so we add a newline at the end of every config file that is passed into the script. Signed-off-by: Anders Roxell Signed-off-by: Masahiro Yamada --- scripts/kconfig/merge_config.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh index 902eb429b9db..0b7952471c18 100755 --- a/scripts/kconfig/merge_config.sh +++ b/scripts/kconfig/merge_config.sh @@ -167,6 +167,8 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do sed -i "/$CFG[ =]/d" $MERGE_FILE fi done + # In case the previous file lacks a new line at the end + echo >> $TMP_FILE cat $MERGE_FILE >> $TMP_FILE done -- cgit From 76fe372ccb81b0c89b6cd2fec26e2f38c958be85 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 22 Jul 2024 12:28:42 -0700 Subject: can: bcm: Remove proc entry when dev is unregistered. syzkaller reported a warning in bcm_connect() below. [0] The repro calls connect() to vxcan1, removes vxcan1, and calls connect() with ifindex == 0. Calling connect() for a BCM socket allocates a proc entry. Then, bcm_sk(sk)->bound is set to 1 to prevent further connect(). However, removing the bound device resets bcm_sk(sk)->bound to 0 in bcm_notify(). The 2nd connect() tries to allocate a proc entry with the same name and sets NULL to bcm_sk(sk)->bcm_proc_read, leaking the original proc entry. Since the proc entry is available only for connect()ed sockets, let's clean up the entry when the bound netdev is unregistered. [0]: proc_dir_entry 'can-bcm/2456' already registered WARNING: CPU: 1 PID: 394 at fs/proc/generic.c:376 proc_register+0x645/0x8f0 fs/proc/generic.c:375 Modules linked in: CPU: 1 PID: 394 Comm: syz-executor403 Not tainted 6.10.0-rc7-g852e42cc2dd4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 RIP: 0010:proc_register+0x645/0x8f0 fs/proc/generic.c:375 Code: 00 00 00 00 00 48 85 ed 0f 85 97 02 00 00 4d 85 f6 0f 85 9f 02 00 00 48 c7 c7 9b cb cf 87 48 89 de 4c 89 fa e8 1c 6f eb fe 90 <0f> 0b 90 90 48 c7 c7 98 37 99 89 e8 cb 7e 22 05 bb 00 00 00 10 48 RSP: 0018:ffa0000000cd7c30 EFLAGS: 00010246 RAX: 9e129be1950f0200 RBX: ff1100011b51582c RCX: ff1100011857cd80 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000002 RBP: 0000000000000000 R08: ffd400000000000f R09: ff1100013e78cac0 R10: ffac800000cd7980 R11: ff1100013e12b1f0 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ff1100011a99a2ec FS: 00007fbd7086f740(0000) GS:ff1100013fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200071c0 CR3: 0000000118556004 CR4: 0000000000771ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: proc_create_net_single+0x144/0x210 fs/proc/proc_net.c:220 bcm_connect+0x472/0x840 net/can/bcm.c:1673 __sys_connect_file net/socket.c:2049 [inline] __sys_connect+0x5d2/0x690 net/socket.c:2066 __do_sys_connect net/socket.c:2076 [inline] __se_sys_connect net/socket.c:2073 [inline] __x64_sys_connect+0x8f/0x100 net/socket.c:2073 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd9/0x1c0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7fbd708b0e5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 9f 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007fff8cd33f08 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007fbd708b0e5d RDX: 0000000000000010 RSI: 0000000020000040 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000040 R09: 0000000000000040 R10: 0000000000000040 R11: 0000000000000246 R12: 00007fff8cd34098 R13: 0000000000401280 R14: 0000000000406de8 R15: 00007fbd70ab9000 remove_proc_entry: removing non-empty directory 'net/can-bcm', leaking at least '2456' Fixes: ffd980f976e7 ("[CAN]: Add broadcast manager (bcm) protocol") Reported-by: syzkaller Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://lore.kernel.org/all/20240722192842.37421-1-kuniyu@amazon.com Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/can/bcm.c b/net/can/bcm.c index 27d5fcf0eac9..46d3ec3aa44b 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1470,6 +1470,10 @@ static void bcm_notify(struct bcm_sock *bo, unsigned long msg, /* remove device reference, if this is our bound device */ if (bo->bound && bo->ifindex == dev->ifindex) { +#if IS_ENABLED(CONFIG_PROC_FS) + if (sock_net(sk)->can.bcmproc_dir && bo->bcm_proc_read) + remove_proc_entry(bo->procname, sock_net(sk)->can.bcmproc_dir); +#endif bo->bound = 0; bo->ifindex = 0; notify_enodev = 1; -- cgit From 06d4ef3056a7ac31be331281bb7a6302ef5a7f8a Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 5 Aug 2024 15:01:58 +0100 Subject: can: m_can: Release irq on error in m_can_open It appears that the irq requested in m_can_open() may be leaked if an error subsequently occurs: if m_can_start() fails. Address this by calling free_irq in the unwind path for such cases. Flagged by Smatch. Compile tested only. Fixes: eaacfeaca7ad ("can: m_can: Call the RAM init directly from m_can_chip_config") Acked-by: Marc Kleine-Budde Signed-off-by: Simon Horman Link: https://lore.kernel.org/all/20240805-mcan-irq-v2-1-7154c0484819@kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 7f63f866083e..cd83c8b5d4b1 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2052,7 +2052,7 @@ static int m_can_open(struct net_device *dev) /* start the m_can controller */ err = m_can_start(dev); if (err) - goto exit_irq_fail; + goto exit_start_fail; if (!cdev->is_peripheral) napi_enable(&cdev->napi); @@ -2061,6 +2061,9 @@ static int m_can_open(struct net_device *dev) return 0; +exit_start_fail: + if (cdev->is_peripheral || dev->irq) + free_irq(dev->irq, dev); exit_irq_fail: if (cdev->is_peripheral) destroy_workqueue(cdev->tx_wq); -- cgit From a651261ac74298535f6d6316ebe27beceb6b17b1 Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Mon, 5 Aug 2024 20:30:41 +0200 Subject: can: m_can: Reset coalescing during suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During resume the interrupts are limited to IR_RF0N and the chip keeps running. In this case if coalescing is enabled and active we may miss waterlevel interrupts during suspend. It is safer to reset the coalescing by stopping the timer and adding IR_RF0N | IR_TEFN to the interrupts. This is a theoratical issue and probably extremely rare. Cc: Martin Hundebøll Fixes: 4a94d7e31cf5 ("can: m_can: allow keeping the transceiver running in suspend") Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/all/20240805183047.305630-2-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index cd83c8b5d4b1..31991e2f343e 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2430,12 +2430,15 @@ int m_can_class_suspend(struct device *dev) netif_device_detach(ndev); /* leave the chip running with rx interrupt enabled if it is - * used as a wake-up source. + * used as a wake-up source. Coalescing needs to be reset then, + * the timer is cancelled here, interrupts are done in resume. */ - if (cdev->pm_wake_source) + if (cdev->pm_wake_source) { + hrtimer_cancel(&cdev->hrtimer); m_can_write(cdev, M_CAN_IE, IR_RF0N); - else + } else { m_can_stop(ndev); + } m_can_clk_stop(cdev); } @@ -2465,6 +2468,13 @@ int m_can_class_resume(struct device *dev) return ret; if (cdev->pm_wake_source) { + /* Restore active interrupts but disable coalescing as + * we may have missed important waterlevel interrupts + * between suspend and resume. Timers are already + * stopped in suspend. Here we enable all interrupts + * again. + */ + cdev->active_interrupts |= IR_RF0N | IR_TEFN; m_can_write(cdev, M_CAN_IE, cdev->active_interrupts); } else { ret = m_can_start(ndev); -- cgit From 6eff1cead75ff330bb33264424c1da6cc7179ab8 Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Mon, 5 Aug 2024 20:30:42 +0200 Subject: can: m_can: Remove coalesing disable in isr during suspend We don't need to disable coalescing when the interrupt handler executes while the chip is suspended. The coalescing is already reset during suspend. Fixes: 07f25091ca02 ("can: m_can: Implement receive coalescing") Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/all/20240805183047.305630-3-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 31991e2f343e..ba416c973e8d 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1223,10 +1223,8 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) struct m_can_classdev *cdev = netdev_priv(dev); u32 ir; - if (pm_runtime_suspended(cdev->dev)) { - m_can_coalescing_disable(cdev); + if (pm_runtime_suspended(cdev->dev)) return IRQ_NONE; - } ir = m_can_read(cdev, M_CAN_IR); m_can_coalescing_update(cdev, ir); -- cgit From 40e4552eeef0e3090a5988de15889795936fd38f Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Mon, 5 Aug 2024 20:30:43 +0200 Subject: can: m_can: Remove m_can_rx_peripheral indirection m_can_rx_peripheral() is a wrapper around m_can_rx_handler() that calls m_can_disable_all_interrupts() on error. The same handling for the same error path is done in m_can_isr() as well. So remove m_can_rx_peripheral() and do the call from m_can_isr() directly. Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/all/20240805183047.305630-4-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index ba416c973e8d..a37ed376de9b 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1037,22 +1037,6 @@ end: return work_done; } -static int m_can_rx_peripheral(struct net_device *dev, u32 irqstatus) -{ - struct m_can_classdev *cdev = netdev_priv(dev); - int work_done; - - work_done = m_can_rx_handler(dev, NAPI_POLL_WEIGHT, irqstatus); - - /* Don't re-enable interrupts if the driver had a fatal error - * (e.g., FIFO read failure). - */ - if (work_done < 0) - m_can_disable_all_interrupts(cdev); - - return work_done; -} - static int m_can_poll(struct napi_struct *napi, int quota) { struct net_device *dev = napi->dev; @@ -1250,7 +1234,7 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) } else { int pkts; - pkts = m_can_rx_peripheral(dev, ir); + pkts = m_can_rx_handler(dev, NAPI_POLL_WEIGHT, ir); if (pkts < 0) goto out_fail; } -- cgit From 4d5159bfafa8d1a205d8213b7434e0402588b9ed Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Mon, 5 Aug 2024 20:30:44 +0200 Subject: can: m_can: Do not cancel timer from within timer On setups without interrupts, the interrupt handler is called from a timer callback. For non-peripheral receives napi is scheduled, interrupts are disabled and the timer is canceled with a blocking call. In case of an error this can happen as well. Check if napi is scheduled in the timer callback after the interrupt handler executed. If napi is scheduled, the timer is disabled. It will be reenabled by m_can_poll(). Return error values from the interrupt handler so that interrupt threads and timer callback can deal differently with it. In case of the timer we only disable the timer. The rest will be done when stopping the interface. Fixes: b382380c0d2d ("can: m_can: Add hrtimer to generate software interrupt") Fixes: a163c5761019 ("can: m_can: Start/Cancel polling timer together with interrupts") Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/all/20240805183047.305630-5-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 57 +++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index a37ed376de9b..5228304779f1 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -487,7 +487,7 @@ static inline void m_can_disable_all_interrupts(struct m_can_classdev *cdev) if (!cdev->net->irq) { dev_dbg(cdev->dev, "Stop hrtimer\n"); - hrtimer_cancel(&cdev->hrtimer); + hrtimer_try_to_cancel(&cdev->hrtimer); } } @@ -1201,11 +1201,15 @@ static void m_can_coalescing_update(struct m_can_classdev *cdev, u32 ir) HRTIMER_MODE_REL); } -static irqreturn_t m_can_isr(int irq, void *dev_id) +/* This interrupt handler is called either from the interrupt thread or a + * hrtimer. This has implications like cancelling a timer won't be possible + * blocking. + */ +static int m_can_interrupt_handler(struct m_can_classdev *cdev) { - struct net_device *dev = (struct net_device *)dev_id; - struct m_can_classdev *cdev = netdev_priv(dev); + struct net_device *dev = cdev->net; u32 ir; + int ret; if (pm_runtime_suspended(cdev->dev)) return IRQ_NONE; @@ -1232,11 +1236,9 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) m_can_disable_all_interrupts(cdev); napi_schedule(&cdev->napi); } else { - int pkts; - - pkts = m_can_rx_handler(dev, NAPI_POLL_WEIGHT, ir); - if (pkts < 0) - goto out_fail; + ret = m_can_rx_handler(dev, NAPI_POLL_WEIGHT, ir); + if (ret < 0) + return ret; } } @@ -1254,8 +1256,9 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) } else { if (ir & (IR_TEFN | IR_TEFW)) { /* New TX FIFO Element arrived */ - if (m_can_echo_tx_event(dev) != 0) - goto out_fail; + ret = m_can_echo_tx_event(dev); + if (ret != 0) + return ret; } } @@ -1263,16 +1266,31 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) can_rx_offload_threaded_irq_finish(&cdev->offload); return IRQ_HANDLED; +} -out_fail: - m_can_disable_all_interrupts(cdev); - return IRQ_HANDLED; +static irqreturn_t m_can_isr(int irq, void *dev_id) +{ + struct net_device *dev = (struct net_device *)dev_id; + struct m_can_classdev *cdev = netdev_priv(dev); + int ret; + + ret = m_can_interrupt_handler(cdev); + if (ret < 0) { + m_can_disable_all_interrupts(cdev); + return IRQ_HANDLED; + } + + return ret; } static enum hrtimer_restart m_can_coalescing_timer(struct hrtimer *timer) { struct m_can_classdev *cdev = container_of(timer, struct m_can_classdev, hrtimer); + if (cdev->can.state == CAN_STATE_BUS_OFF || + cdev->can.state == CAN_STATE_STOPPED) + return HRTIMER_NORESTART; + irq_wake_thread(cdev->net->irq, cdev->net); return HRTIMER_NORESTART; @@ -1973,8 +1991,17 @@ static enum hrtimer_restart hrtimer_callback(struct hrtimer *timer) { struct m_can_classdev *cdev = container_of(timer, struct m_can_classdev, hrtimer); + int ret; + + if (cdev->can.state == CAN_STATE_BUS_OFF || + cdev->can.state == CAN_STATE_STOPPED) + return HRTIMER_NORESTART; + + ret = m_can_interrupt_handler(cdev); - m_can_isr(0, cdev->net); + /* On error or if napi is scheduled to read, stop the timer */ + if (ret < 0 || napi_is_scheduled(&cdev->napi)) + return HRTIMER_NORESTART; hrtimer_forward_now(timer, ms_to_ktime(HRTIMER_POLL_INTERVAL_MS)); -- cgit From a572fea86c9b06cd3e6e89d79d565b52cb7e7cff Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Mon, 5 Aug 2024 20:30:45 +0200 Subject: can: m_can: disable_all_interrupts, not clear active_interrupts active_interrupts is a cache for the enabled interrupts and not the global masking of interrupts. Do not clear this variable otherwise we may loose the state of the interrupts. Fixes: 07f25091ca02 ("can: m_can: Implement receive coalescing") Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/all/20240805183047.305630-6-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 5228304779f1..68bd4a00ecca 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -483,7 +483,6 @@ static inline void m_can_disable_all_interrupts(struct m_can_classdev *cdev) { m_can_coalescing_disable(cdev); m_can_write(cdev, M_CAN_ILE, 0x0); - cdev->active_interrupts = 0x0; if (!cdev->net->irq) { dev_dbg(cdev->dev, "Stop hrtimer\n"); -- cgit From 733dbf556cd5b71d5e6f6aa7a93f117b438ab785 Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Mon, 5 Aug 2024 20:30:46 +0200 Subject: can: m_can: Reset cached active_interrupts on start To force writing the enabled interrupts, reset the active_interrupts cache. Fixes: 07f25091ca02 ("can: m_can: Implement receive coalescing") Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/all/20240805183047.305630-7-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 68bd4a00ecca..67c4c740c416 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1541,6 +1541,7 @@ static int m_can_chip_config(struct net_device *dev) else interrupts &= ~(IR_ERR_LEC_31X); } + cdev->active_interrupts = 0; m_can_interrupt_enable(cdev, interrupts); /* route all interrupts to INT0 */ -- cgit From e443d15b949952ee039b731d5c35bcbafa300024 Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Mon, 5 Aug 2024 20:30:47 +0200 Subject: can: m_can: Limit coalescing to peripheral instances The use of coalescing for non-peripheral chips in the current implementation is limited to non-existing. Disable the possibility to set coalescing through ethtool. Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/all/20240805183047.305630-8-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 67c4c740c416..012c3d22b01d 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2184,7 +2184,7 @@ static int m_can_set_coalesce(struct net_device *dev, return 0; } -static const struct ethtool_ops m_can_ethtool_ops = { +static const struct ethtool_ops m_can_ethtool_ops_coalescing = { .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS_IRQ | ETHTOOL_COALESCE_RX_MAX_FRAMES_IRQ | ETHTOOL_COALESCE_TX_USECS_IRQ | @@ -2195,18 +2195,20 @@ static const struct ethtool_ops m_can_ethtool_ops = { .set_coalesce = m_can_set_coalesce, }; -static const struct ethtool_ops m_can_ethtool_ops_polling = { +static const struct ethtool_ops m_can_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, }; -static int register_m_can_dev(struct net_device *dev) +static int register_m_can_dev(struct m_can_classdev *cdev) { + struct net_device *dev = cdev->net; + dev->flags |= IFF_ECHO; /* we support local echo */ dev->netdev_ops = &m_can_netdev_ops; - if (dev->irq) - dev->ethtool_ops = &m_can_ethtool_ops; + if (dev->irq && cdev->is_peripheral) + dev->ethtool_ops = &m_can_ethtool_ops_coalescing; else - dev->ethtool_ops = &m_can_ethtool_ops_polling; + dev->ethtool_ops = &m_can_ethtool_ops; return register_candev(dev); } @@ -2392,7 +2394,7 @@ int m_can_class_register(struct m_can_classdev *cdev) if (ret) goto rx_offload_del; - ret = register_m_can_dev(cdev->net); + ret = register_m_can_dev(cdev); if (ret) { dev_err(cdev->dev, "registering %s failed (err=%d)\n", cdev->net->name, ret); -- cgit From 50ea5449c56310d2d31c28ba91a59232116d3c1e Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 5 Jul 2024 17:28:27 +0200 Subject: can: mcp251xfd: fix ring configuration when switching from CAN-CC to CAN-FD mode If the ring (rx, tx) and/or coalescing parameters (rx-frames-irq, tx-frames-irq) have been configured while the interface was in CAN-CC mode, but the interface is brought up in CAN-FD mode, the ring parameters might be too big. Use the default CAN-FD values in this case. Fixes: 9263c2e92be9 ("can: mcp251xfd: ring: add support for runtime configurable RX/TX ring parameters") Link: https://lore.kernel.org/all/20240805-mcp251xfd-fix-ringconfig-v1-1-72086f0ca5ee@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c | 11 ++++++++++- drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c | 20 +++++++++++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c index 9e8e82cdba46..61b0d6fa52dd 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c @@ -97,7 +97,16 @@ void can_ram_get_layout(struct can_ram_layout *layout, if (ring) { u8 num_rx_coalesce = 0, num_tx_coalesce = 0; - num_rx = can_ram_rounddown_pow_of_two(config, &config->rx, 0, ring->rx_pending); + /* If the ring parameters have been configured in + * CAN-CC mode, but and we are in CAN-FD mode now, + * they might be to big. Use the default CAN-FD values + * in this case. + */ + num_rx = ring->rx_pending; + if (num_rx > layout->max_rx) + num_rx = layout->default_rx; + + num_rx = can_ram_rounddown_pow_of_two(config, &config->rx, 0, num_rx); /* The ethtool doc says: * To disable coalescing, set usecs = 0 and max_frames = 1. diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c index 7bd2bcb5cf87..f72582d4d3e8 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c @@ -469,11 +469,25 @@ int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv) /* switching from CAN-2.0 to CAN-FD mode or vice versa */ if (fd_mode != test_bit(MCP251XFD_FLAGS_FD_MODE, priv->flags)) { + const struct ethtool_ringparam ring = { + .rx_pending = priv->rx_obj_num, + .tx_pending = priv->tx->obj_num, + }; + const struct ethtool_coalesce ec = { + .rx_coalesce_usecs_irq = priv->rx_coalesce_usecs_irq, + .rx_max_coalesced_frames_irq = priv->rx_obj_num_coalesce_irq, + .tx_coalesce_usecs_irq = priv->tx_coalesce_usecs_irq, + .tx_max_coalesced_frames_irq = priv->tx_obj_num_coalesce_irq, + }; struct can_ram_layout layout; - can_ram_get_layout(&layout, &mcp251xfd_ram_config, NULL, NULL, fd_mode); - priv->rx_obj_num = layout.default_rx; - tx_ring->obj_num = layout.default_tx; + can_ram_get_layout(&layout, &mcp251xfd_ram_config, &ring, &ec, fd_mode); + + priv->rx_obj_num = layout.cur_rx; + priv->rx_obj_num_coalesce_irq = layout.rx_coalesce; + + tx_ring->obj_num = layout.cur_tx; + priv->tx_obj_num_coalesce_irq = layout.tx_coalesce; } if (fd_mode) { -- cgit From ac2b81eb8b2d104033560daea886ee84531e3d0a Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 5 Jul 2024 17:24:42 +0200 Subject: can: mcp251xfd: mcp251xfd_ring_init(): check TX-coalescing configuration When changing the interface from CAN-CC to CAN-FD mode the old coalescing parameters are re-used. This might cause problem, as the configured parameters are too big for CAN-FD mode. During testing an invalid TX coalescing configuration has been seen. The problem should be been fixed in the previous patch, but add a safeguard here to ensure that the number of TEF coalescing buffers (if configured) is exactly the half of all TEF buffers. Link: https://lore.kernel.org/all/20240805-mcp251xfd-fix-ringconfig-v1-2-72086f0ca5ee@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c index f72582d4d3e8..83c18035b2a2 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c @@ -290,7 +290,7 @@ int mcp251xfd_ring_init(struct mcp251xfd_priv *priv) const struct mcp251xfd_rx_ring *rx_ring; u16 base = 0, ram_used; u8 fifo_nr = 1; - int i; + int err = 0, i; netdev_reset_queue(priv->ndev); @@ -386,10 +386,18 @@ int mcp251xfd_ring_init(struct mcp251xfd_priv *priv) netdev_err(priv->ndev, "Error during ring configuration, using more RAM (%u bytes) than available (%u bytes).\n", ram_used, MCP251XFD_RAM_SIZE); - return -ENOMEM; + err = -ENOMEM; } - return 0; + if (priv->tx_obj_num_coalesce_irq && + priv->tx_obj_num_coalesce_irq * 2 != priv->tx->obj_num) { + netdev_err(priv->ndev, + "Error during ring configuration, number of TEF coalescing buffers (%u) must be half of TEF buffers (%u).\n", + priv->tx_obj_num_coalesce_irq, priv->tx->obj_num); + err = -EINVAL; + } + + return err; } void mcp251xfd_ring_free(struct mcp251xfd_priv *priv) -- cgit From e9408fa234fb2c0f087d718c7172212bb0dd7e6f Mon Sep 17 00:00:00 2001 From: Andrei Simion Date: Wed, 31 Jul 2024 17:41:00 +0300 Subject: MAINTAINERS: Update DTS path for ARM/Microchip (AT91) SoC Update the path to the supported DTS files for ARM/Microchip (AT91) SoC to ensure that the output of the get_maintainer.pl script includes the email addresses of the maintainers for all files located in arch/arm/boot/dts/microchip. Suggested-by: Conor Dooley Signed-off-by: Andrei Simion Reviewed-by: Cristian Birsan Link: https://lore.kernel.org/r/20240731144100.182221-1-andrei.simion@microchip.com Signed-off-by: Claudiu Beznea --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 42decde38320..515323593f99 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2535,8 +2535,7 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported W: http://www.linux4sam.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git -F: arch/arm/boot/dts/microchip/at91* -F: arch/arm/boot/dts/microchip/sama* +F: arch/arm/boot/dts/microchip/ F: arch/arm/include/debug/at91.S F: arch/arm/mach-at91/ F: drivers/memory/atmel* -- cgit From 49f6202ce991742f451fc724f03d0c17460d06cd Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 3 Aug 2024 13:41:40 +0300 Subject: ASoC: codecs: lpass-macro: fix version strings returned for 1.x codecs Add missing cases to lpass_macro_get_codec_version_string() to let it print the correct codec version for 1.x codec platforms. Fixes: 378918d59181 ("ASoC: codecs: lpass-macro: add helpers to get codec version") Signed-off-by: Dmitry Baryshkov Link: https://patch.msgid.link/20240803-codec-version-v1-1-bc29baa5e417@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-macro-common.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/codecs/lpass-macro-common.h b/sound/soc/codecs/lpass-macro-common.h index 21cb30ab706d..fb4b96cb2b23 100644 --- a/sound/soc/codecs/lpass-macro-common.h +++ b/sound/soc/codecs/lpass-macro-common.h @@ -49,6 +49,12 @@ static inline void lpass_macro_pds_exit_action(void *pds) static inline const char *lpass_macro_get_codec_version_string(int version) { switch (version) { + case LPASS_CODEC_VERSION_1_0: + return "v1.0"; + case LPASS_CODEC_VERSION_1_1: + return "v1.1"; + case LPASS_CODEC_VERSION_1_2: + return "v1.2"; case LPASS_CODEC_VERSION_2_0: return "v2.0"; case LPASS_CODEC_VERSION_2_1: -- cgit From a9a7a2d80790d06cd32c535e2e7b10f72ce592e7 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 3 Aug 2024 13:41:41 +0300 Subject: ASoC: codecs: lpass-va-macro: warn on unknown version Warn the users if the driver doesn't know the codec version. This helps in debugging the issues with other codec not detecting the correct version. va_macro 3370000.codec: Unknown VA Codec version, ID: 00 / 0f / 00 Signed-off-by: Dmitry Baryshkov Link: https://patch.msgid.link/20240803-codec-version-v1-2-bc29baa5e417@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-va-macro.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c index a62ccd09bacd..8454193ed22a 100644 --- a/sound/soc/codecs/lpass-va-macro.c +++ b/sound/soc/codecs/lpass-va-macro.c @@ -1485,6 +1485,10 @@ static void va_macro_set_lpass_codec_version(struct va_macro *va) if ((core_id_0 == 0x02) && (core_id_1 == 0x0F) && (core_id_2 == 0x80 || core_id_2 == 0x81)) version = LPASS_CODEC_VERSION_2_8; + if (version == LPASS_CODEC_VERSION_UNKNOWN) + dev_warn(va->dev, "Unknown Codec version, ID: %02x / %02x / %02x\n", + core_id_0, core_id_1, core_id_2); + lpass_macro_set_codec_version(version); dev_dbg(va->dev, "LPASS Codec Version %s\n", lpass_macro_get_codec_version_string(version)); -- cgit From 06ce0af34177a110d6a5cf71f924965b9b230691 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 29 May 2024 00:11:23 +0100 Subject: soc: fsl: qbman: remove unused struct 'cgr_comp' 'cgr_comp' has been unused since commit 96f413f47677 ("soc/fsl/qbman: fix issue in qman_delete_cgr_safe()"). Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Sean Anderson Signed-off-by: Michael Ellerman Link: https://msgid.link/20240528231123.136664-1-linux@treblig.org --- drivers/soc/fsl/qbman/qman.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 7e9074519ad2..4dc8aba33d9b 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -2546,11 +2546,6 @@ release_lock: } EXPORT_SYMBOL(qman_delete_cgr); -struct cgr_comp { - struct qman_cgr *cgr; - struct completion completion; -}; - static void qman_delete_cgr_smp_call(void *p) { qman_delete_cgr((struct qman_cgr *)p); -- cgit From 1cb6ab446424649f03c82334634360c2e3043684 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 23 Jul 2024 17:15:44 +0800 Subject: MIPS: Loongson64: Set timer mode in cpu-probe Loongson64 C and G processors have EXTIMER feature which is conflicting with CP0 counter. Although the processor resets in EXTIMER disabled & INTIMER enabled mode, which is compatible with MIPS CP0 compare, firmware may attempt to enable EXTIMER and interfere CP0 compare. Set timer mode back to MIPS compatible mode to fix booting on systems with such firmware before we have an actual driver for EXTIMER. Cc: stable@vger.kernel.org Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/cpu-probe.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index bda7f193baab..af7412549e6e 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -1724,12 +1724,16 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM | MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2); c->ases &= ~MIPS_ASE_VZ; /* VZ of Loongson-3A2000/3000 is incomplete */ + change_c0_config6(LOONGSON_CONF6_EXTIMER | LOONGSON_CONF6_INTIMER, + LOONGSON_CONF6_INTIMER); break; case PRID_IMP_LOONGSON_64G: __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); set_isa(c, MIPS_CPU_ISA_M64R2); decode_cpucfg(c); + change_c0_config6(LOONGSON_CONF6_EXTIMER | LOONGSON_CONF6_INTIMER, + LOONGSON_CONF6_INTIMER); break; default: panic("Unknown Loongson Processor ID!"); -- cgit From 8c251c5ab1b7cd204231e4ee936bfe078a33f234 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 9 Aug 2024 08:27:49 +0000 Subject: cxl/pci: Get AER capability address from RCRB only for RCH dport cxl_setup_parent_dport() needs to get RCH dport AER capability address from RCRB to disable AER interrupt. The function does not check if dport is RCH dport, it will get a wrong pci_host_bridge structure by dport_dev in VH case because dport_dev points to a pci device(RP or switch DSP) rather than a pci host bridge device. Fixes: f05fd10d138d ("cxl/pci: Add RCH downstream port AER register discovery") Signed-off-by: Li Ming Reviewed-by: Dan Williams Reviewed-by: Ira Weiny Tested-by: Ira Weiny Tested-by: Alison Schofield Link: https://patch.msgid.link/20240809082750.3015641-2-ming4.li@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index a663e7566c48..51132a575b27 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -834,11 +834,13 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) { struct device *dport_dev = dport->dport_dev; - struct pci_host_bridge *host_bridge; - host_bridge = to_pci_host_bridge(dport_dev); - if (host_bridge->native_aer) - dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + if (dport->rch) { + struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport_dev); + + if (host_bridge->native_aer) + dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + } dport->reg_map.host = host; cxl_dport_map_regs(dport); -- cgit From 2c402bd2e85b44dc00ef85b5c0e217de684b5372 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Fri, 9 Aug 2024 08:27:50 +0000 Subject: cxl/test: Skip cxl_setup_parent_dport() for emulated dports The cxl_test unit test environment on qemu always hits below call trace with KASAN enabled: BUG: KASAN: slab-out-of-bounds in cxl_setup_parent_dport+0x480/0x530 [cxl_core] Read of size 1 at addr ff110000676014f8 by task (udev-worker)/676[ 24.424403] CPU: 2 PID: 676 Comm: (udev-worker) Tainted: G O N 6.10.0-qemucxl #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20240214-2.el9 02/14/2024 Call Trace: dump_stack_lvl+0xea/0x150 print_report+0xce/0x610 ? kasan_complete_mode_report_info+0x40/0x200 kasan_report+0xcc/0x110 __asan_report_load1_noabort+0x18/0x20 cxl_setup_parent_dport+0x480/0x530 [cxl_core] cxl_mem_probe+0x49b/0xaa0 [cxl_mem] cxl_test module models a CXL topology for testing, it creates some emulated dports with platform devices in the CXL topology, so the dport_dev of an emulated dport points to a platform device rather than a pci device or a pci host bridge in the case. Currently, cxl_setup_parent_dport() is used to set up RAS and AER capability on the dport connected to the CXL memory device, but cxl_test does not support RAS or AER functionality yet, so the fix is implementing a __wrap_cxl_setup_parent_dport() to filter out all emulated dports, guarantees only real dports can be handled by cxl_setup_parent_dport(). Fixes: f05fd10d138d ("cxl/pci: Add RCH downstream port AER register discovery") Reported-by: Pengfei Xu Closes: https://lore.kernel.org/linux-cxl/ZrHTBp2O+HtUe6kt@xpf.sh.intel.com/T/#t Signed-off-by: Li Ming Reviewed-by: Dan Williams Reviewed-by: Ira Weiny Reviewed-by: Alison Schofield Tested-by: Ira Weiny Tested-by: Alison Schofield Link: https://patch.msgid.link/20240809082750.3015641-3-ming4.li@intel.com Signed-off-by: Dave Jiang --- tools/testing/cxl/Kbuild | 1 + tools/testing/cxl/test/mock.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 030b388800f0..3d1ca9e38b1f 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -14,6 +14,7 @@ ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat +ldflags-y += --wrap=cxl_setup_parent_dport DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 6f737941dc0e..d619672faa49 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -299,6 +299,18 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL); +void __wrap_cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (!ops || !ops->is_mock_port(dport->dport_dev)) + cxl_setup_parent_dport(host, dport); + + put_cxl_mock_ops(index); +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_setup_parent_dport, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); -- cgit From 4e69cd835a2d5c3915838491f59a68ee697a87d0 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 14 Jul 2024 12:20:17 -0500 Subject: arm64: dts: imx8mp-beacon-kit: Fix Stereo Audio on WM8962 The L/R clock needs to be controlled by the SAI3 instead of the CODEC to properly achieve stereo sound. Doing this allows removes the need for unnecessary clock manipulation to try to get the CODEC's clock in sync with the SAI3 clock, since the CODEC can cope with a wide variety of clock inputs. Fixes: 161af16c18f3 ("arm64: dts: imx8mp-beacon-kit: Fix audio_pll2 clock") Fixes: 69e2f37a6ddc ("arm64: dts: imx8mp-beacon-kit: Enable WM8962 Audio CODEC") Signed-off-by: Adam Ford Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts b/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts index 17e2c19d8455..cc9b81d46188 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts @@ -211,13 +211,12 @@ simple-audio-card,cpu { sound-dai = <&sai3>; + frame-master; + bitclock-master; }; simple-audio-card,codec { sound-dai = <&wm8962>; - clocks = <&clk IMX8MP_CLK_IPP_DO_CLKO1>; - frame-master; - bitclock-master; }; }; }; @@ -507,10 +506,9 @@ &sai3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sai3>; - assigned-clocks = <&clk IMX8MP_CLK_SAI3>, - <&clk IMX8MP_AUDIO_PLL2> ; - assigned-clock-parents = <&clk IMX8MP_AUDIO_PLL2_OUT>; - assigned-clock-rates = <12288000>, <361267200>; + assigned-clocks = <&clk IMX8MP_CLK_SAI3>; + assigned-clock-parents = <&clk IMX8MP_AUDIO_PLL1_OUT>; + assigned-clock-rates = <12288000>; fsl,sai-mclk-direction-output; status = "okay"; }; -- cgit From 4736ad9422cb86f15464d2bd579c1f5d7786bb61 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 15 Jul 2024 11:32:31 +0200 Subject: arm64: dts: freescale: tqma9352: Fix watchdog reset On the tqma9352 the board is reset through an external PMIC, so set the fsl,ext-reset-output property to enable triggering the output pin on a watchdog trigger. Signed-off-by: Sascha Hauer Reviewed-by: Fabio Estevam Reviewed-by: Alexander Stein Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi b/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi index edbd8cad35bc..d3a0e1244aae 100644 --- a/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi @@ -156,6 +156,7 @@ &wdog3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_wdog>; + fsl,ext-reset-output; status = "okay"; }; -- cgit From 109f256285dd6a5f8c3bd0d80d39b2ccd4fe314e Mon Sep 17 00:00:00 2001 From: Shenwei Wang Date: Mon, 15 Jul 2024 08:17:22 -0500 Subject: arm64: dts: imx93: update default value for snps,clk-csr For the i.MX93 SoC, the default clock rate for the IP of STMMAC EQOS is 312.5 MHz. According to the following mapping table from the i.MX93 reference manual, this clock rate corresponds to a CSR value of 6. 0000: CSR clock = 60-100 MHz; MDC clock = CSR clock/42 0001: CSR clock = 100-150 MHz; MDC clock = CSR clock/62 0010: CSR clock = 20-35 MHz; MDC clock = CSR clock/16 0011: CSR clock = 35-60 MHz; MDC clock = CSR clock/26 0100: CSR clock = 150-250 MHz; MDC clock = CSR clock/102 0101: CSR clock = 250-300 MHz; MDC clock = CSR clock/124 0110: CSR clock = 300-500 MHz; MDC clock = CSR clock/204 0111: CSR clock = 500-800 MHz; MDC clock = CSR clock/324 Fixes: f2d03ba997cb ("arm64: dts: imx93: reorder device nodes") Signed-off-by: Shenwei Wang Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 4a3f42355cb8..a0993022c102 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -1105,7 +1105,7 @@ <&clk IMX93_CLK_SYS_PLL_PFD0_DIV2>; assigned-clock-rates = <100000000>, <250000000>; intf_mode = <&wakeupmix_gpr 0x28>; - snps,clk-csr = <0>; + snps,clk-csr = <6>; nvmem-cells = <ð_mac2>; nvmem-cell-names = "mac-address"; status = "disabled"; -- cgit From e332a5aba83500e8d422c90d2a84d8a5f888673e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 8 Aug 2024 02:47:28 +0900 Subject: treewide: remove unnecessary inclusion These files do not use any macros defined in . Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- drivers/accessibility/speakup/genmap.c | 1 - drivers/accessibility/speakup/makemapdata.c | 1 - drivers/staging/media/atomisp/include/linux/atomisp.h | 1 - samples/trace_events/trace_custom_sched.c | 1 - sound/soc/codecs/cs42l42.c | 1 - 5 files changed, 5 deletions(-) diff --git a/drivers/accessibility/speakup/genmap.c b/drivers/accessibility/speakup/genmap.c index 0125000e00d9..0882bab10fb8 100644 --- a/drivers/accessibility/speakup/genmap.c +++ b/drivers/accessibility/speakup/genmap.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include "utils.h" diff --git a/drivers/accessibility/speakup/makemapdata.c b/drivers/accessibility/speakup/makemapdata.c index d7d41bb9b05f..55e4ef8a93dc 100644 --- a/drivers/accessibility/speakup/makemapdata.c +++ b/drivers/accessibility/speakup/makemapdata.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include "utils.h" diff --git a/drivers/staging/media/atomisp/include/linux/atomisp.h b/drivers/staging/media/atomisp/include/linux/atomisp.h index 16c9da172c03..fefbe3cd08f3 100644 --- a/drivers/staging/media/atomisp/include/linux/atomisp.h +++ b/drivers/staging/media/atomisp/include/linux/atomisp.h @@ -20,7 +20,6 @@ #define _ATOM_ISP_H #include -#include /* struct media_device_info.hw_revision */ #define ATOMISP_HW_REVISION_MASK 0x0000ff00 diff --git a/samples/trace_events/trace_custom_sched.c b/samples/trace_events/trace_custom_sched.c index b99d9ab7db85..dd409b704b35 100644 --- a/samples/trace_events/trace_custom_sched.c +++ b/samples/trace_events/trace_custom_sched.c @@ -8,7 +8,6 @@ #define pr_fmt(fmt) fmt #include -#include #include #include diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 60d366e53526..6400ac875e6f 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -11,7 +11,6 @@ #include #include -#include #include #include #include -- cgit From 1472464c6248575bf2d01c7f076b94704bb32c95 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 8 Aug 2024 03:03:00 +0900 Subject: kbuild: avoid scripts/kallsyms parsing /dev/null On macOS, as reported by Daniel Gomez, getline() sets ENOTTY to errno if it is requested to read from /dev/null. If this is worth fixing, I would rather pass an empty file to scripts/kallsyms instead of adding the ugly #ifdef __APPLE__. Fixes: c442db3f49f2 ("kbuild: remove PROVIDE() for kallsyms symbols") Reported-by: Daniel Gomez Closes: https://lore.kernel.org/all/20240807-macos-build-support-v1-12-4cd1ded85694@samsung.com/ Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier Reviewed-by: Daniel Gomez --- scripts/link-vmlinux.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index f7b2503cdba9..41c68ae3415d 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -219,7 +219,8 @@ kallsymso= strip_debug= if is_enabled CONFIG_KALLSYMS; then - kallsyms /dev/null .tmp_vmlinux0.kallsyms + truncate -s0 .tmp_vmlinux.kallsyms0.syms + kallsyms .tmp_vmlinux.kallsyms0.syms .tmp_vmlinux0.kallsyms fi if is_enabled CONFIG_KALLSYMS || is_enabled CONFIG_DEBUG_INFO_BTF; then -- cgit From 8512fbb64b0e599412da661412d10d4ba1cb003c Mon Sep 17 00:00:00 2001 From: Michal Vokáč Date: Tue, 23 Jul 2024 16:25:19 +0200 Subject: ARM: dts: imx6dl-yapp43: Increase LED current to match the yapp4 HW design MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the imx6dl-yapp4 revision based boards, the RGB LED is not driven directly by the LP5562 driver but through FET transistors. Hence the LED current is not determined by the driver but by the LED series resistors. On the imx6dl-yapp43 revision based boards, we removed the FET transistors to drive the LED directly from the LP5562 but forgot to tune the output current to match the previous HW design. Set the LED current on imx6dl-yapp43 based boards to the same values measured on the imx6dl-yapp4 boards and limit the maximum current to 20mA. Fixes: 7da4734751e0 ("ARM: dts: imx6dl-yapp43: Add support for new HW revision of the IOTA board") Cc: Signed-off-by: Michal Vokáč Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/imx/imx6dl-yapp43-common.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/nxp/imx/imx6dl-yapp43-common.dtsi b/arch/arm/boot/dts/nxp/imx/imx6dl-yapp43-common.dtsi index 52a0f6ee426f..bcf4d9c870ec 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6dl-yapp43-common.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6dl-yapp43-common.dtsi @@ -274,24 +274,24 @@ led@0 { chan-name = "R"; - led-cur = /bits/ 8 <0x20>; - max-cur = /bits/ 8 <0x60>; + led-cur = /bits/ 8 <0x6e>; + max-cur = /bits/ 8 <0xc8>; reg = <0>; color = ; }; led@1 { chan-name = "G"; - led-cur = /bits/ 8 <0x20>; - max-cur = /bits/ 8 <0x60>; + led-cur = /bits/ 8 <0xbe>; + max-cur = /bits/ 8 <0xc8>; reg = <1>; color = ; }; led@2 { chan-name = "B"; - led-cur = /bits/ 8 <0x20>; - max-cur = /bits/ 8 <0x60>; + led-cur = /bits/ 8 <0xbe>; + max-cur = /bits/ 8 <0xc8>; reg = <2>; color = ; }; -- cgit From e7a9af8c93aa9f408f9972809b642faeec5287e1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Aug 2024 11:32:47 +0200 Subject: powerpc/mm: Fix size of allocated PGDIR Commit 6b0e82791bd0 ("powerpc/e500: switch to 64 bits PGD on 85xx (32 bits)") increased the size of PGD entries but failed to increase the PGD directory. Use the size of pgd_t instead of the size of pointers to calculate the allocated size. Reported-by: Guenter Roeck Fixes: 6b0e82791bd0 ("powerpc/e500: switch to 64 bits PGD on 85xx (32 bits)") Signed-off-by: Christophe Leroy Tested-by: Guenter Roeck Signed-off-by: Michael Ellerman Link: https://msgid.link/1cdaacb391cbd3e0240f0e0faf691202874e9422.1723109462.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/init-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 9b4a675eb8f8..2978fcbe307e 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -73,7 +73,7 @@ void setup_kup(void) #define CTOR(shift) static void ctor_##shift(void *addr) \ { \ - memset(addr, 0, sizeof(void *) << (shift)); \ + memset(addr, 0, sizeof(pgd_t) << (shift)); \ } CTOR(0); CTOR(1); CTOR(2); CTOR(3); CTOR(4); CTOR(5); CTOR(6); CTOR(7); @@ -117,7 +117,7 @@ EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */ void pgtable_cache_add(unsigned int shift) { char *name; - unsigned long table_size = sizeof(void *) << shift; + unsigned long table_size = sizeof(pgd_t) << shift; unsigned long align = table_size; /* When batching pgtable pointers for RCU freeing, we store -- cgit From e7e846dc6c73fbc94ae8b4ec20d05627646416f2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Aug 2024 09:05:08 +0200 Subject: powerpc/mm: Fix boot warning with hugepages and CONFIG_DEBUG_VIRTUAL Booting with CONFIG_DEBUG_VIRTUAL leads to following warning when passing hugepage reservation on command line: Kernel command line: hugepagesz=1g hugepages=1 hugepagesz=64m hugepages=1 hugepagesz=256m hugepages=1 noreboot HugeTLB: allocating 1 of page size 1.00 GiB failed. Only allocated 0 hugepages. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at arch/powerpc/include/asm/io.h:948 __alloc_bootmem_huge_page+0xd4/0x284 Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 6.10.0-rc6-00396-g6b0e82791bd0-dirty #936 Hardware name: MPC8544DS e500v2 0x80210030 MPC8544 DS NIP: c1020240 LR: c10201d0 CTR: 00000000 REGS: c13fdd30 TRAP: 0700 Not tainted (6.10.0-rc6-00396-g6b0e82791bd0-dirty) MSR: 00021000 CR: 44084288 XER: 20000000 GPR00: c10201d0 c13fde20 c130b560 e8000000 e8001000 00000000 00000000 c1420000 GPR08: 00000000 00028001 00000000 00000004 44084282 01066ac0 c0eb7c9c efffe149 GPR16: c0fc4228 0000005f ffffffff c0eb7d0c c0eb7cc0 c0eb7ce0 ffffffff 00000000 GPR24: c1441cec efffe153 e8001000 c14240c0 00000000 c1441d64 00000000 e8000000 NIP [c1020240] __alloc_bootmem_huge_page+0xd4/0x284 LR [c10201d0] __alloc_bootmem_huge_page+0x64/0x284 Call Trace: [c13fde20] [c10201d0] __alloc_bootmem_huge_page+0x64/0x284 (unreliable) [c13fde50] [c10207b8] hugetlb_hstate_alloc_pages+0x8c/0x3e8 [c13fdeb0] [c1021384] hugepages_setup+0x240/0x2cc [c13fdef0] [c1000574] unknown_bootoption+0xfc/0x280 [c13fdf30] [c0078904] parse_args+0x200/0x4c4 [c13fdfa0] [c1000d9c] start_kernel+0x238/0x7d0 [c13fdff0] [c0000434] set_ivor+0x12c/0x168 Code: 554aa33e 7c042840 3ce0c142 80a7427c 5109a016 50caa016 7c9a2378 7fdcf378 4180000c 7c052040 41810160 7c095040 <0fe00000> 38c00000 40800108 3c60c0eb ---[ end trace 0000000000000000 ]--- This is due to virt_addr_valid() using high_memory before it is set. high_memory is set in mem_init() using max_low_pfn, but max_low_pfn is available long before, it is set in mem_topology_setup(). So just like commit daa9ada2093e ("powerpc/mm: Fix boot crash with FLATMEM") moved the setting of max_mapnr immediately after the call to mem_topology_setup(), the same can be done for high_memory. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/62b69c4baad067093f39e7e60df0fe27a86b8d2a.1723100702.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/setup-common.c | 1 + arch/powerpc/mm/mem.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4bd2f87616ba..943430077375 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -959,6 +959,7 @@ void __init setup_arch(char **cmdline_p) mem_topology_setup(); /* Set max_mapnr before paging_init() */ set_max_mapnr(max_pfn); + high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* * Release secondary cpus out of their spinloops at 0x60 now that diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d325217ab201..da21cb018984 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -290,8 +290,6 @@ void __init mem_init(void) swiotlb_init(ppc_swiotlb_enable, ppc_swiotlb_flags); #endif - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - kasan_late_init(); memblock_free_all(); -- cgit From 054308ad90ae43ba2d4b9c83c6582e8fe94f6fed Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 12 Aug 2024 11:27:07 +0530 Subject: MAINTAINERS: Add Manivannan Sadhasivam as Reviewer for PCI native host bridge and endpoint drivers I've been reviewing the native host bridge drivers for some time and would like to be listed as a Reviewer formally. Link: https://lore.kernel.org/r/20240812055707.6778-1-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 42decde38320..3fb27f41515d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17616,6 +17616,7 @@ F: drivers/pci/controller/pci-xgene-msi.c PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS M: Lorenzo Pieralisi M: Krzysztof Wilczyński +R: Manivannan Sadhasivam R: Rob Herring L: linux-pci@vger.kernel.org S: Supported -- cgit From 6b9935da2a6b2a72774c15c844ae201a3fc362ac Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Sat, 10 Aug 2024 13:27:00 +0900 Subject: scsi: mpi3mr: Add missing spin_lock_init() for mrioc->trigger_lock Commit fc4444941140 ("scsi: mpi3mr: HDB allocation and posting for hardware and firmware buffers") added the spinlock trigger_lock to the struct mpi3mr_ioc. However, spin_lock_init() call was not added for it, then the lock does not work as expected. Also, the kernel reports the message below when lockdep is enabled. INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? To fix the issue and to avoid the INFO message, add the missing spin_lock_init() call. Fixes: fc4444941140 ("scsi: mpi3mr: HDB allocation and posting for hardware and firmware buffers") Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20240810042701.661841-2-shinichiro.kawasaki@wdc.com Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_os.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index ca8f132e03ae..616894571c6a 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -5234,6 +5234,7 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id) spin_lock_init(&mrioc->watchdog_lock); spin_lock_init(&mrioc->chain_buf_lock); spin_lock_init(&mrioc->sas_node_lock); + spin_lock_init(&mrioc->trigger_lock); INIT_LIST_HEAD(&mrioc->fwevt_list); INIT_LIST_HEAD(&mrioc->tgtdev_list); -- cgit From 6c17ea1f3eaa330d445ac14a9428402ce4e3055e Mon Sep 17 00:00:00 2001 From: "Nysal Jan K.A" Date: Wed, 31 Jul 2024 08:31:12 +0530 Subject: cpu/SMT: Enable SMT only if a core is online If a core is offline then enabling SMT should not online CPUs of this core. By enabling SMT, what is intended is either changing the SMT value from "off" to "on" or setting the SMT level (threads per core) from a lower to higher value. On PowerPC the ppc64_cpu utility can be used, among other things, to perform the following functions: ppc64_cpu --cores-on # Get the number of online cores ppc64_cpu --cores-on=X # Put exactly X cores online ppc64_cpu --offline-cores=X[,Y,...] # Put specified cores offline ppc64_cpu --smt={on|off|value} # Enable, disable or change SMT level If the user has decided to offline certain cores, enabling SMT should not online CPUs in those cores. This patch fixes the issue and changes the behaviour as described, by introducing an arch specific function topology_is_core_online(). It is currently implemented only for PowerPC. Fixes: 73c58e7e1412 ("powerpc: Add HOTPLUG_SMT support") Reported-by: Tyrel Datwyler Closes: https://groups.google.com/g/powerpc-utils-devel/c/wrwVzAAnRlI/m/5KJSoqP4BAAJ Signed-off-by: Nysal Jan K.A Reviewed-by: Shrikanth Hegde Reviewed-by: Thomas Gleixner Signed-off-by: Michael Ellerman Link: https://msgid.link/20240731030126.956210-2-nysal@linux.ibm.com --- Documentation/ABI/testing/sysfs-devices-system-cpu | 3 ++- kernel/cpu.c | 12 +++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 325873385b71..de725ca3be82 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -562,7 +562,8 @@ Description: Control Symmetric Multi Threading (SMT) ================ ========================================= If control status is "forceoff" or "notsupported" writes - are rejected. + are rejected. Note that enabling SMT on PowerPC skips + offline cores. What: /sys/devices/system/cpu/cpuX/power/energy_perf_bias Date: March 2019 diff --git a/kernel/cpu.c b/kernel/cpu.c index 1209ddaec026..b1fd2a3db91a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2689,6 +2689,16 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) return ret; } +/** + * Check if the core a CPU belongs to is online + */ +#if !defined(topology_is_core_online) +static inline bool topology_is_core_online(unsigned int cpu) +{ + return true; +} +#endif + int cpuhp_smt_enable(void) { int cpu, ret = 0; @@ -2699,7 +2709,7 @@ int cpuhp_smt_enable(void) /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) continue; - if (!cpu_smt_thread_allowed(cpu)) + if (!cpu_smt_thread_allowed(cpu) || !topology_is_core_online(cpu)) continue; ret = _cpu_up(cpu, 0, CPUHP_ONLINE); if (ret) -- cgit From 227bbaabe64b6f9cd98aa051454c1d4a194a8c6a Mon Sep 17 00:00:00 2001 From: "Nysal Jan K.A" Date: Wed, 31 Jul 2024 08:31:13 +0530 Subject: powerpc/topology: Check if a core is online topology_is_core_online() checks if the core a CPU belongs to is online. The core is online if at least one of the sibling CPUs is online. The first CPU of an online core is also online in the common case, so this should be fairly quick. Fixes: 73c58e7e1412 ("powerpc: Add HOTPLUG_SMT support") Signed-off-by: Nysal Jan K.A Reviewed-by: Shrikanth Hegde Signed-off-by: Michael Ellerman Link: https://msgid.link/20240731030126.956210-3-nysal@linux.ibm.com --- arch/powerpc/include/asm/topology.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index f4e6f2dd04b7..16bacfe8c7a2 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -145,6 +145,7 @@ static inline int cpu_to_coregroup_id(int cpu) #ifdef CONFIG_HOTPLUG_SMT #include +#include #include static inline bool topology_is_primary_thread(unsigned int cpu) @@ -156,6 +157,18 @@ static inline bool topology_smt_thread_allowed(unsigned int cpu) { return cpu_thread_in_core(cpu) < cpu_smt_num_threads; } + +#define topology_is_core_online topology_is_core_online +static inline bool topology_is_core_online(unsigned int cpu) +{ + int i, first_cpu = cpu_first_thread_sibling(cpu); + + for (i = first_cpu; i < first_cpu + threads_per_core; ++i) { + if (cpu_online(i)) + return true; + } + return false; +} #endif #endif /* __KERNEL__ */ -- cgit From 8c6b808c8c2a9de21503944bd6308979410fd812 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Sat, 10 Aug 2024 13:27:01 +0900 Subject: scsi: mpi3mr: Avoid MAX_PAGE_ORDER WARNING for buffer allocations Commit fc4444941140 ("scsi: mpi3mr: HDB allocation and posting for hardware and firmware buffers") added mpi3mr_alloc_diag_bufs() which calls dma_alloc_coherent() to allocate the trace buffer and the firmware buffer. mpi3mr_alloc_diag_bufs() decides the buffer sizes from the driver configuration. In my environment, the sizes are 8MB. With the sizes, dma_alloc_coherent() fails and report this WARNING: WARNING: CPU: 4 PID: 438 at mm/page_alloc.c:4676 __alloc_pages_noprof+0x52f/0x640 The WARNING indicates that the order of the allocation size is larger than MAX_PAGE_ORDER. After this failure, mpi3mr_alloc_diag_bufs() reduces the buffer sizes and retries dma_alloc_coherent(). In the end, the buffer allocations succeed with 4MB size in my environment, which corresponds to MAX_PAGE_ORDER=10. Though the allocations succeed, the WARNING message is misleading and should be avoided. To avoid the WARNING, check the orders of the buffer allocation sizes before calling dma_alloc_coherent(). If the orders are larger than MAX_PAGE_ORDER, fall back to the retry path. Fixes: fc4444941140 ("scsi: mpi3mr: HDB allocation and posting for hardware and firmware buffers") Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20240810042701.661841-3-shinichiro.kawasaki@wdc.com Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_app.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 8b0eded6ef36..01f035f9330e 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -100,7 +100,8 @@ retry_trace: dprint_init(mrioc, "trying to allocate trace diag buffer of size = %dKB\n", trace_size / 1024); - if (mpi3mr_alloc_trace_buffer(mrioc, trace_size)) { + if (get_order(trace_size) > MAX_PAGE_ORDER || + mpi3mr_alloc_trace_buffer(mrioc, trace_size)) { retry = true; trace_size -= trace_dec_size; dprint_init(mrioc, "trace diag buffer allocation failed\n" @@ -118,8 +119,12 @@ retry_fw: diag_buffer->type = MPI3_DIAG_BUFFER_TYPE_FW; diag_buffer->status = MPI3MR_HDB_BUFSTATUS_NOT_ALLOCATED; if ((mrioc->facts.diag_fw_sz < fw_size) && (fw_size >= fw_min_size)) { - diag_buffer->addr = dma_alloc_coherent(&mrioc->pdev->dev, - fw_size, &diag_buffer->dma_addr, GFP_KERNEL); + if (get_order(fw_size) <= MAX_PAGE_ORDER) { + diag_buffer->addr + = dma_alloc_coherent(&mrioc->pdev->dev, fw_size, + &diag_buffer->dma_addr, + GFP_KERNEL); + } if (!retry) dprint_init(mrioc, "%s:trying to allocate firmware diag buffer of size = %dKB\n", -- cgit From cd0c6872aab4d2c556a5e953e6926a1b4485e543 Mon Sep 17 00:00:00 2001 From: Markus Niebel Date: Wed, 24 Jul 2024 14:58:48 +0200 Subject: arm64: dts: freescale: imx93-tqma9352: fix CMA alloc-ranges DRAM starts at 0x80000000. Fixes: c982ecfa7992 ("arm64: dts: freescale: add initial device tree for MBa93xxLA SBC board") Signed-off-by: Markus Niebel Signed-off-by: Alexander Stein Reviewed-by: Peng Fan Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi b/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi index d3a0e1244aae..72a9a5d4e27a 100644 --- a/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi @@ -19,7 +19,7 @@ linux,cma { compatible = "shared-dma-pool"; reusable; - alloc-ranges = <0 0x60000000 0 0x40000000>; + alloc-ranges = <0 0x80000000 0 0x40000000>; size = <0 0x10000000>; linux,cma-default; }; -- cgit From 5f0a894bfa3c26ce61deda4c52b12e8ec84d876a Mon Sep 17 00:00:00 2001 From: Markus Niebel Date: Wed, 24 Jul 2024 14:58:52 +0200 Subject: arm64: dts: freescale: imx93-tqma9352-mba93xxla: fix typo Fix typo in assignment of SD-Card cd-gpios. Fixes: c982ecfa7992 ("arm64: dts: freescale: add initial device tree for MBa93xxLA SBC board") Signed-off-by: Markus Niebel Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts index da8f19a646a9..e2ee9f5a042c 100644 --- a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts +++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts @@ -499,7 +499,7 @@ pinctrl-0 = <&pinctrl_usdhc2_hs>, <&pinctrl_usdhc2_gpio>; pinctrl-1 = <&pinctrl_usdhc2_uhs>, <&pinctrl_usdhc2_gpio>; pinctrl-2 = <&pinctrl_usdhc2_uhs>, <&pinctrl_usdhc2_gpio>; - cd-gpios = <&gpio3 00 GPIO_ACTIVE_LOW>; + cd-gpios = <&gpio3 0 GPIO_ACTIVE_LOW>; vmmc-supply = <®_usdhc2_vmmc>; bus-width = <4>; no-sdio; -- cgit From 046667c4d3196938e992fba0dfcde570aa85cd0e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Jul 2024 14:45:08 -0400 Subject: memcg_write_event_control(): fix a user-triggerable oops we are *not* guaranteed that anything past the terminating NUL is mapped (let alone initialized with anything sane). Fixes: 0dea116876ee ("cgroup: implement eventfd-based generic API for notifications") Cc: stable@vger.kernel.org Cc: Andrew Morton Acked-by: Michal Hocko Signed-off-by: Al Viro --- mm/memcontrol-v1.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 2aeea4d8bf8e..417c96f2da28 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -1842,9 +1842,12 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, buf = endp + 1; cfd = simple_strtoul(buf, &endp, 10); - if ((*endp != ' ') && (*endp != '\0')) + if (*endp == '\0') + buf = endp; + else if (*endp == ' ') + buf = endp + 1; + else return -EINVAL; - buf = endp + 1; event = kzalloc(sizeof(*event), GFP_KERNEL); if (!event) -- cgit From 915d9d914a25575055804cb8bfd13490111282ec Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 29 Jul 2024 14:41:11 +0800 Subject: arm64: dts: imx95: correct a55 power-domains The A55 power domains is for SCMI performance usage, so for device power on/off. Correct the power-domains entry to use scmi_perf not scmi_devpd. Fixes: 5e3cbb8e4256 ("arm64: dts: freescale: add i.MX95 basic dtsi") Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx95.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi index 1bbf9a0468f6..3499d4eb2496 100644 --- a/arch/arm64/boot/dts/freescale/imx95.dtsi +++ b/arch/arm64/boot/dts/freescale/imx95.dtsi @@ -27,7 +27,7 @@ reg = <0x0>; enable-method = "psci"; #cooling-cells = <2>; - power-domains = <&scmi_devpd IMX95_PERF_A55>; + power-domains = <&scmi_perf IMX95_PERF_A55>; power-domain-names = "perf"; i-cache-size = <32768>; i-cache-line-size = <64>; @@ -44,7 +44,7 @@ reg = <0x100>; enable-method = "psci"; #cooling-cells = <2>; - power-domains = <&scmi_devpd IMX95_PERF_A55>; + power-domains = <&scmi_perf IMX95_PERF_A55>; power-domain-names = "perf"; i-cache-size = <32768>; i-cache-line-size = <64>; @@ -61,7 +61,7 @@ reg = <0x200>; enable-method = "psci"; #cooling-cells = <2>; - power-domains = <&scmi_devpd IMX95_PERF_A55>; + power-domains = <&scmi_perf IMX95_PERF_A55>; power-domain-names = "perf"; i-cache-size = <32768>; i-cache-line-size = <64>; @@ -78,7 +78,7 @@ reg = <0x300>; enable-method = "psci"; #cooling-cells = <2>; - power-domains = <&scmi_devpd IMX95_PERF_A55>; + power-domains = <&scmi_perf IMX95_PERF_A55>; power-domain-names = "perf"; i-cache-size = <32768>; i-cache-line-size = <64>; @@ -93,7 +93,7 @@ device_type = "cpu"; compatible = "arm,cortex-a55"; reg = <0x400>; - power-domains = <&scmi_devpd IMX95_PERF_A55>; + power-domains = <&scmi_perf IMX95_PERF_A55>; power-domain-names = "perf"; enable-method = "psci"; #cooling-cells = <2>; @@ -110,7 +110,7 @@ device_type = "cpu"; compatible = "arm,cortex-a55"; reg = <0x500>; - power-domains = <&scmi_devpd IMX95_PERF_A55>; + power-domains = <&scmi_perf IMX95_PERF_A55>; power-domain-names = "perf"; enable-method = "psci"; #cooling-cells = <2>; -- cgit From d3c2b2a8923abc087c2e585f5828fb7fae8fedfe Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 5 Aug 2024 11:05:35 +0800 Subject: arm64: dts: imx95: correct L3Cache cache-sets The L3Cache size is 512KB. Size = Cache Line Size(64) * num sets(512) * Assoc(0x10). Correct the number of Cache sets. Fixes: 5e3cbb8e4256 ("arm64: dts: freescale: add i.MX95 basic dtsi") Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx95.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi index 3499d4eb2496..425272aa5a81 100644 --- a/arch/arm64/boot/dts/freescale/imx95.dtsi +++ b/arch/arm64/boot/dts/freescale/imx95.dtsi @@ -187,7 +187,7 @@ compatible = "cache"; cache-size = <524288>; cache-line-size = <64>; - cache-sets = <1024>; + cache-sets = <512>; cache-level = <3>; cache-unified; }; -- cgit From d2dfed310aae0739dc87b68c660357e6a4f29819 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Tue, 6 Aug 2024 11:46:03 +1200 Subject: platform/x86: asus-wmi: Add quirk for ROG Ally X MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new ROG Ally X functions the same as the previus model so we can use the same method to ensure the MCU USB devices wake and reconnect correctly. Given that two devices marks the start of a trend, this patch also adds a quirk table to make future additions easier if the MCU is the same. Signed-off-by: Luke D. Jones Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240805234603.38736-1-luke@ljones.dev Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-wmi.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index cc735931f97b..37636e5a38e3 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -146,6 +146,20 @@ static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; static int throttle_thermal_policy_write(struct asus_wmi *); +static const struct dmi_system_id asus_ally_mcu_quirk[] = { + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "RC71L"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "RC72L"), + }, + }, + { }, +}; + static bool ashs_present(void) { int i = 0; @@ -4685,7 +4699,7 @@ static int asus_wmi_add(struct platform_device *pdev) asus->dgpu_disable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_DGPU); asus->kbd_rgb_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_STATE); asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) - && dmi_match(DMI_BOARD_NAME, "RC71L"); + && dmi_check_system(asus_ally_mcu_quirk); if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE)) asus->mini_led_dev_id = ASUS_WMI_DEVID_MINI_LED_MODE; -- cgit From 9c8e022567bbec53bee8ae75c44b3d6cd2080d42 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 11 Aug 2024 15:19:44 +0200 Subject: platform/surface: aggregator_registry: Add Support for Surface Pro 10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SAM client device nodes for the Surface Pro 10. It seems to use the same SAM client devices as the Surface Pro 9, so re-use its node group. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20240811131948.261806-2-luzmaximilian@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/surface_aggregator_registry.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 1c4d74db08c9..fa5b896e5f4e 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -324,7 +324,7 @@ static const struct software_node *ssam_node_group_sp8[] = { NULL, }; -/* Devices for Surface Pro 9 */ +/* Devices for Surface Pro 9 and 10 */ static const struct software_node *ssam_node_group_sp9[] = { &ssam_node_root, &ssam_node_hub_kip, @@ -365,6 +365,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Pro 9 */ { "MSHW0343", (unsigned long)ssam_node_group_sp9 }, + /* Surface Pro 10 */ + { "MSHW0510", (unsigned long)ssam_node_group_sp9 }, + /* Surface Book 2 */ { "MSHW0107", (unsigned long)ssam_node_group_gen5 }, -- cgit From ed235163c3f02329d5e37ed4485bbc39ed2568d4 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 11 Aug 2024 15:19:45 +0200 Subject: platform/surface: aggregator_registry: Add support for Surface Laptop Go 3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SAM client device nodes for the Surface Laptop Go 3. It seems to use the same SAM client devices as the Surface Laptop Go 1 and 2, so re-use their node group. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20240811131948.261806-3-luzmaximilian@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/surface_aggregator_registry.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index fa5b896e5f4e..4d36810c2308 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -398,6 +398,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Laptop Go 2 */ { "MSHW0290", (unsigned long)ssam_node_group_slg1 }, + /* Surface Laptop Go 3 */ + { "MSHW0440", (unsigned long)ssam_node_group_slg1 }, + /* Surface Laptop Studio */ { "MSHW0123", (unsigned long)ssam_node_group_sls }, -- cgit From 28d04b4a2cc20981c95787f9c449e6fc51d904f9 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 11 Aug 2024 15:19:46 +0200 Subject: platform/surface: aggregator_registry: Add support for Surface Laptop Studio 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SAM client device nodes for the Surface Laptop Studio 2 (SLS2). The SLS2 is quite similar to the SLS1, but it does not provide the touchpad as a SAM-HID device. Therefore, add a new node group for the SLS2 and update the comments accordingly. In addition, it uses the new fan control interface. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20240811131948.261806-4-luzmaximilian@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../platform/surface/surface_aggregator_registry.c | 27 ++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 4d36810c2308..892ba9549f6a 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -273,8 +273,8 @@ static const struct software_node *ssam_node_group_sl5[] = { NULL, }; -/* Devices for Surface Laptop Studio. */ -static const struct software_node *ssam_node_group_sls[] = { +/* Devices for Surface Laptop Studio 1. */ +static const struct software_node *ssam_node_group_sls1[] = { &ssam_node_root, &ssam_node_bat_ac, &ssam_node_bat_main, @@ -289,6 +289,22 @@ static const struct software_node *ssam_node_group_sls[] = { NULL, }; +/* Devices for Surface Laptop Studio 2. */ +static const struct software_node *ssam_node_group_sls2[] = { + &ssam_node_root, + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, + &ssam_node_pos_tablet_switch, + &ssam_node_hid_sam_keyboard, + &ssam_node_hid_sam_penstash, + &ssam_node_hid_sam_sensors, + &ssam_node_hid_sam_ucm_ucsi, + NULL, +}; + /* Devices for Surface Laptop Go. */ static const struct software_node *ssam_node_group_slg1[] = { &ssam_node_root, @@ -401,8 +417,11 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Laptop Go 3 */ { "MSHW0440", (unsigned long)ssam_node_group_slg1 }, - /* Surface Laptop Studio */ - { "MSHW0123", (unsigned long)ssam_node_group_sls }, + /* Surface Laptop Studio 1 */ + { "MSHW0123", (unsigned long)ssam_node_group_sls1 }, + + /* Surface Laptop Studio 2 */ + { "MSHW0360", (unsigned long)ssam_node_group_sls2 }, { }, }; -- cgit From 002adda09bc1c983c75c82a7e12285c7423aec31 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 11 Aug 2024 15:19:47 +0200 Subject: platform/surface: aggregator_registry: Add fan and thermal sensor support for Surface Laptop 5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EC on the Surface Laptop 5 exposes the fan interface. With the recently introduced driver for it, we can now also enable it here. In addition, also enable the thermal sensor interface. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20240811131948.261806-5-luzmaximilian@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/surface_aggregator_registry.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 892ba9549f6a..4d3f5b3111ba 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -265,7 +265,9 @@ static const struct software_node *ssam_node_group_sl5[] = { &ssam_node_root, &ssam_node_bat_ac, &ssam_node_bat_main, - &ssam_node_tmp_perf_profile, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, &ssam_node_hid_main_keyboard, &ssam_node_hid_main_touchpad, &ssam_node_hid_main_iid5, -- cgit From 99ae7b9ba047ad029a0a23b2bd51608ce79c8e97 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 11 Aug 2024 15:19:48 +0200 Subject: platform/surface: aggregator_registry: Add support for Surface Laptop 6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SAM client device nodes for the Surface Laptop Studio 6 (SL6). The SL6 is similar to the SL5, with the typical battery/AC, platform profile, and HID nodes. It also has support for the newly supported fan interface. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20240811131948.261806-6-luzmaximilian@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../platform/surface/surface_aggregator_registry.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 4d3f5b3111ba..a23dff35f8ca 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -275,6 +275,22 @@ static const struct software_node *ssam_node_group_sl5[] = { NULL, }; +/* Devices for Surface Laptop 6. */ +static const struct software_node *ssam_node_group_sl6[] = { + &ssam_node_root, + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, + &ssam_node_hid_main_keyboard, + &ssam_node_hid_main_touchpad, + &ssam_node_hid_main_iid5, + &ssam_node_hid_sam_sensors, + &ssam_node_hid_sam_ucm_ucsi, + NULL, +}; + /* Devices for Surface Laptop Studio 1. */ static const struct software_node *ssam_node_group_sls1[] = { &ssam_node_root, @@ -410,6 +426,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Laptop 5 */ { "MSHW0350", (unsigned long)ssam_node_group_sl5 }, + /* Surface Laptop 6 */ + { "MSHW0530", (unsigned long)ssam_node_group_sl6 }, + /* Surface Laptop Go 1 */ { "MSHW0118", (unsigned long)ssam_node_group_slg1 }, -- cgit From ccbde4b128ef9c73d14d0d7817d68ef795f6d131 Mon Sep 17 00:00:00 2001 From: Eli Billauer Date: Thu, 1 Aug 2024 15:11:26 +0300 Subject: char: xillybus: Don't destroy workqueue from work item running on it Triggered by a kref decrement, destroy_workqueue() may be called from within a work item for destroying its own workqueue. This illegal situation is averted by adding a module-global workqueue for exclusive use of the offending work item. Other work items continue to be queued on per-device workqueues to ensure performance. Reported-by: syzbot+91dbdfecdd3287734d8e@syzkaller.appspotmail.com Cc: stable Closes: https://lore.kernel.org/lkml/0000000000000ab25a061e1dfe9f@google.com/ Signed-off-by: Eli Billauer Link: https://lore.kernel.org/r/20240801121126.60183-1-eli.billauer@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/xillybus/xillyusb.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/char/xillybus/xillyusb.c b/drivers/char/xillybus/xillyusb.c index 5a5afa14ca8c..33ca0f4af390 100644 --- a/drivers/char/xillybus/xillyusb.c +++ b/drivers/char/xillybus/xillyusb.c @@ -50,6 +50,7 @@ MODULE_LICENSE("GPL v2"); static const char xillyname[] = "xillyusb"; static unsigned int fifo_buf_order; +static struct workqueue_struct *wakeup_wq; #define USB_VENDOR_ID_XILINX 0x03fd #define USB_VENDOR_ID_ALTERA 0x09fb @@ -569,10 +570,6 @@ static void cleanup_dev(struct kref *kref) * errors if executed. The mechanism relies on that xdev->error is assigned * a non-zero value by report_io_error() prior to queueing wakeup_all(), * which prevents bulk_in_work() from calling process_bulk_in(). - * - * The fact that wakeup_all() and bulk_in_work() are queued on the same - * workqueue makes their concurrent execution very unlikely, however the - * kernel's API doesn't seem to ensure this strictly. */ static void wakeup_all(struct work_struct *work) @@ -627,7 +624,7 @@ static void report_io_error(struct xillyusb_dev *xdev, if (do_once) { kref_get(&xdev->kref); /* xdev is used by work item */ - queue_work(xdev->workq, &xdev->wakeup_workitem); + queue_work(wakeup_wq, &xdev->wakeup_workitem); } } @@ -2258,6 +2255,10 @@ static int __init xillyusb_init(void) { int rc = 0; + wakeup_wq = alloc_workqueue(xillyname, 0, 0); + if (!wakeup_wq) + return -ENOMEM; + if (LOG2_INITIAL_FIFO_BUF_SIZE > PAGE_SHIFT) fifo_buf_order = LOG2_INITIAL_FIFO_BUF_SIZE - PAGE_SHIFT; else @@ -2265,11 +2266,16 @@ static int __init xillyusb_init(void) rc = usb_register(&xillyusb_driver); + if (rc) + destroy_workqueue(wakeup_wq); + return rc; } static void __exit xillyusb_exit(void) { + destroy_workqueue(wakeup_wq); + usb_deregister(&xillyusb_driver); } -- cgit From bc923d594db21bee0ead128eb4bb78f7e77467a4 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 11 Aug 2024 14:46:44 +0200 Subject: platform/surface: aggregator: Fix warning when controller is destroyed in probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a small window in ssam_serial_hub_probe() where the controller is initialized but has not been started yet. Specifically, between ssam_controller_init() and ssam_controller_start(). Any failure in this window, for example caused by a failure of serdev_device_open(), currently results in an incorrect warning being emitted. In particular, any failure in this window results in the controller being destroyed via ssam_controller_destroy(). This function checks the state of the controller and, in an attempt to validate that the controller has been cleanly shut down before we try and deallocate any resources, emits a warning if that state is not SSAM_CONTROLLER_STOPPED. However, since we have only just initialized the controller and have not yet started it, its state is SSAM_CONTROLLER_INITIALIZED. Note that this is the only point at which the controller has this state, as it will change after we start the controller with ssam_controller_start() and never revert back. Further, at this point no communication has taken place and the sender and receiver threads have not been started yet (and we may not even have an open serdev device either). Therefore, it is perfectly safe to call ssam_controller_destroy() with a state of SSAM_CONTROLLER_INITIALIZED. This, however, means that the warning currently being emitted is incorrect. Fix it by extending the check. Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20240811124645.246016-1-luzmaximilian@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/aggregator/controller.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c index 7fc602e01487..7e89f547999b 100644 --- a/drivers/platform/surface/aggregator/controller.c +++ b/drivers/platform/surface/aggregator/controller.c @@ -1354,7 +1354,8 @@ void ssam_controller_destroy(struct ssam_controller *ctrl) if (ctrl->state == SSAM_CONTROLLER_UNINITIALIZED) return; - WARN_ON(ctrl->state != SSAM_CONTROLLER_STOPPED); + WARN_ON(ctrl->state != SSAM_CONTROLLER_STOPPED && + ctrl->state != SSAM_CONTROLLER_INITIALIZED); /* * Note: New events could still have been received after the previous -- cgit From dcdb52d948f3a17ccd3fce757d9bd981d7c32039 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Aug 2024 15:44:07 +0300 Subject: usb: xhci: Check for xhci->interrupters being allocated in xhci_mem_clearup() If xhci_mem_init() fails, it calls into xhci_mem_cleanup() to mop up the damage. If it fails early enough, before xhci->interrupters is allocated but after xhci->max_interrupters has been set, which happens in most (all?) cases, things get uglier, as xhci_mem_cleanup() unconditionally derefences xhci->interrupters. With prejudice. Gate the interrupt freeing loop with a check on xhci->interrupters being non-NULL. Found while debugging a DMA allocation issue that led the XHCI driver on this exact path. Fixes: c99b38c41234 ("xhci: add support to allocate several interrupters") Cc: Mathias Nyman Cc: Wesley Cheng Cc: Greg Kroah-Hartman Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org # 6.8+ Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240809124408.505786-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index d7654f475daf..937ce5fd5809 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1872,7 +1872,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) cancel_delayed_work_sync(&xhci->cmd_timer); - for (i = 0; i < xhci->max_interrupters; i++) { + for (i = 0; xhci->interrupters && i < xhci->max_interrupters; i++) { if (xhci->interrupters[i]) { xhci_remove_interrupter(xhci, xhci->interrupters[i]); xhci_free_interrupter(xhci, xhci->interrupters[i]); -- cgit From 741b41b48faf41c0bcf3c26fbb3448b0fda4fc5d Mon Sep 17 00:00:00 2001 From: Niklas Neronin Date: Fri, 9 Aug 2024 15:44:08 +0300 Subject: usb: xhci: fix duplicate stall handling in handle_tx_event() Stall handling is managed in the 'process_*' functions, which are called right before the 'goto' stall handling code snippet. Thus, there should be a return after the 'process_*' functions. Otherwise, the stall code may run twice. Fixes: 1b349f214ac7 ("usb: xhci: add 'goto' for halted endpoint check in handle_tx_event()") Reported-by: Michal Pecio Signed-off-by: Niklas Neronin Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240809124408.505786-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b7517c3c8059..4ea2c3e072a9 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2910,6 +2910,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, process_isoc_td(xhci, ep, ep_ring, td, ep_trb, event); else process_bulk_intr_td(xhci, ep, ep_ring, td, ep_trb, event); + return 0; check_endpoint_halted: if (xhci_halted_host_endpoint(ep_ctx, trb_comp_code)) -- cgit From d209d1634e6562eafc369b28f8a1f67a2e9e5222 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 9 Aug 2024 18:03:43 +0300 Subject: usb: typec: ucsi: Fix the return value of ucsi_run_command() The command execution routines need to return the amount of data that was transferred when succesful. This fixes an issue where the alternate modes and the power delivery capabilities are not getting registered. Fixes: 5e9c1662a89b ("usb: typec: ucsi: rework command execution functions") Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240809150343.286942-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 432a2d6266d7..4039851551c1 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -137,7 +137,7 @@ static int ucsi_run_command(struct ucsi *ucsi, u64 command, u32 *cci, if (ret) return ret; - return err; + return err ?: UCSI_CCI_LENGTH(*cci); } static int ucsi_read_error(struct ucsi *ucsi, u8 connector_num) -- cgit From 21ea1ce37fc267dc45fe27517bbde926211683df Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 9 Aug 2024 19:29:01 +0800 Subject: Revert "usb: typec: tcpm: clear pd_event queue in PORT_RESET" This reverts commit bf20c69cf3cf9c6445c4925dd9a8a6ca1b78bfdf. During tcpm_init() stage, if the VBUS is still present after tcpm_reset_port(), then we assume that VBUS will off and goto safe0v after a specific discharge time. Following a TCPM_VBUS_EVENT event if VBUS reach to off state. TCPM_VBUS_EVENT event may be set during PORT_RESET handling stage. If pd_events reset to 0 after TCPM_VBUS_EVENT set, we will lost this VBUS event. Then the port state machine may stuck at one state. Before: [ 2.570172] pending state change PORT_RESET -> PORT_RESET_WAIT_OFF @ 100 ms [rev1 NONE_AMS] [ 2.570179] state change PORT_RESET -> PORT_RESET_WAIT_OFF [delayed 100 ms] [ 2.570182] pending state change PORT_RESET_WAIT_OFF -> SNK_UNATTACHED @ 920 ms [rev1 NONE_AMS] [ 3.490213] state change PORT_RESET_WAIT_OFF -> SNK_UNATTACHED [delayed 920 ms] [ 3.490220] Start toggling [ 3.546050] CC1: 0 -> 0, CC2: 0 -> 2 [state TOGGLING, polarity 0, connected] [ 3.546057] state change TOGGLING -> SRC_ATTACH_WAIT [rev1 NONE_AMS] After revert this patch, we can see VBUS off event and the port will goto expected state. [ 2.441992] pending state change PORT_RESET -> PORT_RESET_WAIT_OFF @ 100 ms [rev1 NONE_AMS] [ 2.441999] state change PORT_RESET -> PORT_RESET_WAIT_OFF [delayed 100 ms] [ 2.442002] pending state change PORT_RESET_WAIT_OFF -> SNK_UNATTACHED @ 920 ms [rev1 NONE_AMS] [ 2.442122] VBUS off [ 2.442125] state change PORT_RESET_WAIT_OFF -> SNK_UNATTACHED [rev1 NONE_AMS] [ 2.442127] VBUS VSAFE0V [ 2.442351] CC1: 0 -> 0, CC2: 0 -> 0 [state SNK_UNATTACHED, polarity 0, disconnected] [ 2.442357] Start toggling [ 2.491850] CC1: 0 -> 0, CC2: 0 -> 2 [state TOGGLING, polarity 0, connected] [ 2.491858] state change TOGGLING -> SRC_ATTACH_WAIT [rev1 NONE_AMS] [ 2.491863] pending state change SRC_ATTACH_WAIT -> SNK_TRY @ 200 ms [rev1 NONE_AMS] [ 2.691905] state change SRC_ATTACH_WAIT -> SNK_TRY [delayed 200 ms] Fixes: bf20c69cf3cf ("usb: typec: tcpm: clear pd_event queue in PORT_RESET") Cc: stable@vger.kernel.org Signed-off-by: Xu Yang Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240809112901.535072-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index cce39818e99a..4b02d6474259 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5655,7 +5655,6 @@ static void run_state_machine(struct tcpm_port *port) break; case PORT_RESET: tcpm_reset_port(port); - port->pd_events = 0; if (port->self_powered) tcpm_set_cc(port, TYPEC_CC_OPEN); else -- cgit From 3ed486e383ccee9b0c8d727608f12a937c6603ca Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 12 Aug 2024 11:50:38 +0200 Subject: usb: misc: ljca: Add Lunar Lake ljca GPIO HID to ljca_gpio_hids[] Add LJCA GPIO support for the Lunar Lake platform. New HID taken from out of tree ivsc-driver git repo. Link: https://github.com/intel/ivsc-driver/commit/47e7c4a446c8ea8c741ff5a32fa7b19f9e6fd47e Cc: stable Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240812095038.555837-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/usb-ljca.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c index 2d30fc1be306..1a8d5e80b9ae 100644 --- a/drivers/usb/misc/usb-ljca.c +++ b/drivers/usb/misc/usb-ljca.c @@ -169,6 +169,7 @@ static const struct acpi_device_id ljca_gpio_hids[] = { { "INTC1096" }, { "INTC100B" }, { "INTC10D1" }, + { "INTC10B5" }, {}, }; -- cgit From f149be46e4c13e277e013c0fff13cb2aa7a4399c Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 7 Aug 2024 10:52:09 -0400 Subject: arm64: dts: imx8mm-phygate: fix typo pinctrcl-0 Fix typo pinctrcl-0 with pinctrl-0. Fix below warning: arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs485.dtb: gpio@30220000: 'pinctrl-0' is a dependency of 'pinctrl-names' from schema $id: http://devicetree.org/schemas/pinctrl/pinctrl-consumer.yaml# arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs485.dtb: uart4_rs485_en: $nodename:0: 'uart4_rs485_en' does not match '^(hog-[0-9]+|.+-hog(-[0-9]+)?)$ Fixes: 8d97083c0b5d ("arm64: dts: phygate-tauri-l: add overlays for RS232 and RS485") Reviewed-by: Teresa Remmet Signed-off-by: Frank Li Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs232.dtso | 2 +- arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs485.dtso | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs232.dtso b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs232.dtso index bf3e04651ba0..353ace3601dc 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs232.dtso +++ b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs232.dtso @@ -21,7 +21,7 @@ &gpio3 { pinctrl-names = "default"; - pinctrcl-0 = <&pinctrl_gpio3_hog>; + pinctrl-0 = <&pinctrl_gpio3_hog>; uart4_rs485_en { gpio-hog; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs485.dtso b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs485.dtso index f4448cde0407..8a75d6783ad2 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs485.dtso +++ b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l-rs232-rs485.dtso @@ -22,7 +22,7 @@ &gpio3 { pinctrl-names = "default"; - pinctrcl-0 = <&pinctrl_gpio3_hog>; + pinctrl-0 = <&pinctrl_gpio3_hog>; uart4_rs485_en { gpio-hog; -- cgit From dc98d76a15bc29a9a4e76f2f65f39f3e590fb15c Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 8 Aug 2024 22:03:25 +0800 Subject: tty: serial: fsl_lpuart: mark last busy before uart_add_one_port With "earlycon initcall_debug=1 loglevel=8" in bootargs, kernel sometimes boot hang. It is because normal console still is not ready, but runtime suspend is called, so early console putchar will hang in waiting TRDE set in UARTSTAT. The lpuart driver has auto suspend delay set to 3000ms, but during uart_add_one_port, a child device serial ctrl will added and probed with its pm runtime enabled(see serial_ctrl.c). The runtime suspend call path is: device_add |-> bus_probe_device |->device_initial_probe |->__device_attach |-> pm_runtime_get_sync(dev->parent); |-> pm_request_idle(dev); |-> pm_runtime_put(dev->parent); So in the end, before normal console ready, the lpuart get runtime suspended. And earlycon putchar will hang. To address the issue, mark last busy just after pm_runtime_enable, three seconds is long enough to switch from bootconsole to normal console. Fixes: 43543e6f539b ("tty: serial: fsl_lpuart: Add runtime pm support") Cc: stable Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20240808140325.580105-1-peng.fan@oss.nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 615291ea9b5e..77efa7ee6eda 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2923,6 +2923,7 @@ static int lpuart_probe(struct platform_device *pdev) pm_runtime_set_autosuspend_delay(&pdev->dev, UART_AUTOSUSPEND_TIMEOUT); pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); + pm_runtime_mark_last_busy(&pdev->dev); ret = lpuart_global_reset(sport); if (ret) -- cgit From 7258fdd7d7459616b3fe1a603e33900584b10c13 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 10 Aug 2024 01:07:20 +0900 Subject: tty: vt: conmakehash: remove non-portable code printing comment header Commit 6e20753da6bc ("tty: vt: conmakehash: cope with abs_srctree no longer in env") included , which invoked another (wrong) patch that tried to address a build error on macOS. According to the specification [1], the correct header to use PATH_MAX is . The minimal fix would be to replace with . However, the following commits seem questionable to me: - 3bd85c6c97b2 ("tty: vt: conmakehash: Don't mention the full path of the input in output") - 6e20753da6bc ("tty: vt: conmakehash: cope with abs_srctree no longer in env") These commits made too many efforts to cope with a comment header in drivers/tty/vt/consolemap_deftbl.c: /* * Do not edit this file; it was automatically generated by * * conmakehash drivers/tty/vt/cp437.uni > [this file] * */ With this commit, the header part of the generate C file will be simplified as follows: /* * Automatically generated file; Do not edit. */ BTW, another series of excessive efforts for a comment header can be seen in the following: - 5ef6dc08cfde ("lib/build_OID_registry: don't mention the full path of the script in output") - 2fe29fe94563 ("lib/build_OID_registry: avoid non-destructive substitution for Perl < 5.13.2 compat") [1]: https://pubs.opengroup.org/onlinepubs/009695399/basedefs/limits.h.html Fixes: 6e20753da6bc ("tty: vt: conmakehash: cope with abs_srctree no longer in env") Cc: stable Reported-by: Daniel Gomez Closes: https://lore.kernel.org/all/20240807-macos-build-support-v1-11-4cd1ded85694@samsung.com/ Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20240809160853.1269466-1-masahiroy@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/conmakehash.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/tty/vt/conmakehash.c b/drivers/tty/vt/conmakehash.c index 82d9db68b2ce..a931fcde7ad9 100644 --- a/drivers/tty/vt/conmakehash.c +++ b/drivers/tty/vt/conmakehash.c @@ -11,8 +11,6 @@ * Copyright (C) 1995-1997 H. Peter Anvin */ -#include -#include #include #include #include @@ -79,7 +77,6 @@ int main(int argc, char *argv[]) { FILE *ctbl; const char *tblname; - char base_tblname[PATH_MAX]; char buffer[65536]; int fontlen; int i, nuni, nent; @@ -245,20 +242,15 @@ int main(int argc, char *argv[]) for ( i = 0 ; i < fontlen ; i++ ) nuni += unicount[i]; - strncpy(base_tblname, tblname, PATH_MAX); - base_tblname[PATH_MAX - 1] = 0; printf("\ /*\n\ - * Do not edit this file; it was automatically generated by\n\ - *\n\ - * conmakehash %s > [this file]\n\ - *\n\ + * Automatically generated file; Do not edit.\n\ */\n\ \n\ #include \n\ \n\ u8 dfont_unicount[%d] = \n\ -{\n\t", basename(base_tblname), fontlen); +{\n\t", fontlen); for ( i = 0 ; i < fontlen ; i++ ) { -- cgit From c9f6613b16123989f2c3bd04b1d9b2365d6914e7 Mon Sep 17 00:00:00 2001 From: Mathieu Othacehe Date: Thu, 8 Aug 2024 08:06:37 +0200 Subject: tty: atmel_serial: use the correct RTS flag. In RS485 mode, the RTS pin is driven high by hardware when the transmitter is operating. This behaviour cannot be changed. This means that the driver should claim that it supports SER_RS485_RTS_ON_SEND and not SER_RS485_RTS_AFTER_SEND. Otherwise, when configuring the port with the SER_RS485_RTS_ON_SEND, one get the following warning: kern.warning kernel: atmel_usart_serial atmel_usart_serial.2.auto: ttyS1 (1): invalid RTS setting, using RTS_AFTER_SEND instead which is contradictory with what's really happening. Signed-off-by: Mathieu Othacehe Cc: stable Tested-by: Alexander Dahl Fixes: af47c491e3c7 ("serial: atmel: Fill in rs485_supported") Link: https://lore.kernel.org/r/20240808060637.19886-1-othacehe@gnu.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 0a90964d6d10..09b246c9e389 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -2514,7 +2514,7 @@ static const struct uart_ops atmel_pops = { }; static const struct serial_rs485 atmel_rs485_supported = { - .flags = SER_RS485_ENABLED | SER_RS485_RTS_AFTER_SEND | SER_RS485_RX_DURING_TX, + .flags = SER_RS485_ENABLED | SER_RS485_RTS_ON_SEND | SER_RS485_RX_DURING_TX, .delay_rts_before_send = 1, .delay_rts_after_send = 1, }; -- cgit From abfceba0a7a246ac082bf569807738ff7416f59f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 5 Aug 2024 16:20:20 -0700 Subject: ARM: riscpc: ecard: Fix the build Fix a recently introduced build failure. Cc: Russell King Fixes: d69d80484598 ("driver core: have match() callback in struct bus_type take a const *") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240805232026.65087-2-bvanassche@acm.org Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-rpc/ecard.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index c30df1097c52..9f7454b8efa7 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -1109,7 +1109,7 @@ void ecard_remove_driver(struct ecard_driver *drv) driver_unregister(&drv->drv); } -static int ecard_match(struct device *_dev, struct device_driver *_drv) +static int ecard_match(struct device *_dev, const struct device_driver *_drv) { struct expansion_card *ec = ECARD_DEV(_dev); struct ecard_driver *drv = ECARD_DRV(_drv); -- cgit From cdd1fa91a6b8c7cd93b3abf9f3ef05b8ce741b61 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 5 Aug 2024 16:20:21 -0700 Subject: mips: sgi-ip22: Fix the build Fix a recently introduced build failure. Fixes: d69d80484598 ("driver core: have match() callback in struct bus_type take a const *") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240805232026.65087-3-bvanassche@acm.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/sgi-ip22/ip22-gio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c index 2738325e98dd..d20eec742bfa 100644 --- a/arch/mips/sgi-ip22/ip22-gio.c +++ b/arch/mips/sgi-ip22/ip22-gio.c @@ -111,7 +111,7 @@ void gio_device_unregister(struct gio_device *giodev) } EXPORT_SYMBOL_GPL(gio_device_unregister); -static int gio_bus_match(struct device *dev, struct device_driver *drv) +static int gio_bus_match(struct device *dev, const struct device_driver *drv) { struct gio_device *gio_dev = to_gio_device(dev); struct gio_driver *gio_drv = to_gio_driver(drv); -- cgit From c56ba3e44784527fd6efe5eb7a4fa6c9f6969a58 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Tue, 13 Aug 2024 16:29:43 +0530 Subject: ASoC: SOF: amd: move iram-dram fence register programming sequence The existing code modifies IRAM and DRAM size after sha dma start for vangogh platform. The problem with this sequence is that it might cause sha dma failure when firmware code binary size is greater than the default IRAM size. To fix this issue, Move the iram-dram fence register sequence prior to sha dma start. Fixes: 094d11768f74 ("ASoC: SOF: amd: Skip IRAM/DRAM size modification for Steam Deck OLED") Signed-off-by: Vijendar Mukunda Link: https://patch.msgid.link/20240813105944.3126903-1-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/acp.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index 74fd5f2b148b..9123427fab4e 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -263,6 +263,17 @@ int configure_and_run_sha_dma(struct acp_dev_data *adata, void *image_addr, snd_sof_dsp_write(sdev, ACP_DSP_BAR, ACP_SHA_DMA_STRT_ADDR, start_addr); snd_sof_dsp_write(sdev, ACP_DSP_BAR, ACP_SHA_DMA_DESTINATION_ADDR, dest_addr); snd_sof_dsp_write(sdev, ACP_DSP_BAR, ACP_SHA_MSG_LENGTH, image_length); + + /* psp_send_cmd only required for vangogh platform (rev - 5) */ + if (desc->rev == 5 && !(adata->quirks && adata->quirks->skip_iram_dram_size_mod)) { + /* Modify IRAM and DRAM size */ + ret = psp_send_cmd(adata, MBOX_ACP_IRAM_DRAM_FENCE_COMMAND | IRAM_DRAM_FENCE_2); + if (ret) + return ret; + ret = psp_send_cmd(adata, MBOX_ACP_IRAM_DRAM_FENCE_COMMAND | MBOX_ISREADY_FLAG); + if (ret) + return ret; + } snd_sof_dsp_write(sdev, ACP_DSP_BAR, ACP_SHA_DMA_CMD, ACP_SHA_RUN); ret = snd_sof_dsp_read_poll_timeout(sdev, ACP_DSP_BAR, ACP_SHA_TRANSFER_BYTE_CNT, @@ -280,17 +291,6 @@ int configure_and_run_sha_dma(struct acp_dev_data *adata, void *image_addr, return ret; } - /* psp_send_cmd only required for vangogh platform (rev - 5) */ - if (desc->rev == 5 && !(adata->quirks && adata->quirks->skip_iram_dram_size_mod)) { - /* Modify IRAM and DRAM size */ - ret = psp_send_cmd(adata, MBOX_ACP_IRAM_DRAM_FENCE_COMMAND | IRAM_DRAM_FENCE_2); - if (ret) - return ret; - ret = psp_send_cmd(adata, MBOX_ACP_IRAM_DRAM_FENCE_COMMAND | MBOX_ISREADY_FLAG); - if (ret) - return ret; - } - ret = snd_sof_dsp_read_poll_timeout(sdev, ACP_DSP_BAR, ACP_SHA_DSP_FW_QUALIFIER, fw_qualifier, fw_qualifier & DSP_FW_RUN_ENABLE, ACP_REG_POLL_INTERVAL, ACP_DMA_COMPLETE_TIMEOUT_US); -- cgit From 897e91e995b338002b00454fd0018af26a098148 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Tue, 13 Aug 2024 16:29:44 +0530 Subject: ASoC: SOF: amd: Fix for incorrect acp error register offsets Addition of 'dsp_intr_base' to ACP error register offsets points to wrong register offsets in irq handler. Correct the acp error register offsets. ACP error status register offset and acp error reason register offset got changed from ACP6.0 onwards. Add 'acp_error_stat' and 'acp_sw0_i2s_err_reason' as descriptor fields in sof_amd_acp_desc structure and update the values based on the ACP variant. >From Rembrandt platform onwards, errors related to SW1 Soundwire manager instance/I2S controller connected on P1 power tile is reported with ACP_SW1_I2S_ERROR_REASON register. Add conditional check for the same. Fixes: 96eb81851012 ("ASoC: SOF: amd: add interrupt handling for SoundWire manager devices") Signed-off-by: Vijendar Mukunda Link: https://patch.msgid.link/20240813105944.3126903-2-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/acp-dsp-offset.h | 6 ++++-- sound/soc/sof/amd/acp.c | 11 +++++++---- sound/soc/sof/amd/acp.h | 2 ++ sound/soc/sof/amd/pci-acp63.c | 2 ++ sound/soc/sof/amd/pci-rmb.c | 2 ++ sound/soc/sof/amd/pci-rn.c | 2 ++ 6 files changed, 19 insertions(+), 6 deletions(-) diff --git a/sound/soc/sof/amd/acp-dsp-offset.h b/sound/soc/sof/amd/acp-dsp-offset.h index 59afbe2e0f42..072b703f9b3f 100644 --- a/sound/soc/sof/amd/acp-dsp-offset.h +++ b/sound/soc/sof/amd/acp-dsp-offset.h @@ -76,13 +76,15 @@ #define DSP_SW_INTR_CNTL_OFFSET 0x0 #define DSP_SW_INTR_STAT_OFFSET 0x4 #define DSP_SW_INTR_TRIG_OFFSET 0x8 -#define ACP_ERROR_STATUS 0x18C4 +#define ACP3X_ERROR_STATUS 0x18C4 +#define ACP6X_ERROR_STATUS 0x1A4C #define ACP3X_AXI2DAGB_SEM_0 0x1880 #define ACP5X_AXI2DAGB_SEM_0 0x1884 #define ACP6X_AXI2DAGB_SEM_0 0x1874 /* ACP common registers to report errors related to I2S & SoundWire interfaces */ -#define ACP_SW0_I2S_ERROR_REASON 0x18B4 +#define ACP3X_SW_I2S_ERROR_REASON 0x18C8 +#define ACP6X_SW0_I2S_ERROR_REASON 0x18B4 #define ACP_SW1_I2S_ERROR_REASON 0x1A50 /* Registers from ACP_SHA block */ diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index 9123427fab4e..d95f865669a6 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -92,6 +92,7 @@ static int config_dma_channel(struct acp_dev_data *adata, unsigned int ch, unsigned int idx, unsigned int dscr_count) { struct snd_sof_dev *sdev = adata->dev; + const struct sof_amd_acp_desc *desc = get_chip_info(sdev->pdata); unsigned int val, status; int ret; @@ -102,7 +103,7 @@ static int config_dma_channel(struct acp_dev_data *adata, unsigned int ch, val & (1 << ch), ACP_REG_POLL_INTERVAL, ACP_REG_POLL_TIMEOUT_US); if (ret < 0) { - status = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP_ERROR_STATUS); + status = snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->acp_error_stat); val = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP_DMA_ERR_STS_0 + ch * sizeof(u32)); dev_err(sdev->dev, "ACP_DMA_ERR_STS :0x%x ACP_ERROR_STATUS :0x%x\n", val, status); @@ -402,9 +403,11 @@ static irqreturn_t acp_irq_handler(int irq, void *dev_id) if (val & ACP_ERROR_IRQ_MASK) { snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->ext_intr_stat, ACP_ERROR_IRQ_MASK); - snd_sof_dsp_write(sdev, ACP_DSP_BAR, base + ACP_SW0_I2S_ERROR_REASON, 0); - snd_sof_dsp_write(sdev, ACP_DSP_BAR, base + ACP_SW1_I2S_ERROR_REASON, 0); - snd_sof_dsp_write(sdev, ACP_DSP_BAR, base + ACP_ERROR_STATUS, 0); + snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->acp_sw0_i2s_err_reason, 0); + /* ACP_SW1_I2S_ERROR_REASON is newly added register from rmb platform onwards */ + if (desc->rev >= 6) + snd_sof_dsp_write(sdev, ACP_DSP_BAR, ACP_SW1_I2S_ERROR_REASON, 0); + snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->acp_error_stat, 0); irq_flag = 1; } diff --git a/sound/soc/sof/amd/acp.h b/sound/soc/sof/amd/acp.h index 87e79d500865..1af86b5b28db 100644 --- a/sound/soc/sof/amd/acp.h +++ b/sound/soc/sof/amd/acp.h @@ -203,6 +203,8 @@ struct sof_amd_acp_desc { u32 probe_reg_offset; u32 reg_start_addr; u32 reg_end_addr; + u32 acp_error_stat; + u32 acp_sw0_i2s_err_reason; u32 sdw_max_link_count; u64 sdw_acpi_dev_addr; }; diff --git a/sound/soc/sof/amd/pci-acp63.c b/sound/soc/sof/amd/pci-acp63.c index fc8984447365..986f5928caed 100644 --- a/sound/soc/sof/amd/pci-acp63.c +++ b/sound/soc/sof/amd/pci-acp63.c @@ -35,6 +35,8 @@ static const struct sof_amd_acp_desc acp63_chip_info = { .ext_intr_cntl = ACP6X_EXTERNAL_INTR_CNTL, .ext_intr_stat = ACP6X_EXT_INTR_STAT, .ext_intr_stat1 = ACP6X_EXT_INTR_STAT1, + .acp_error_stat = ACP6X_ERROR_STATUS, + .acp_sw0_i2s_err_reason = ACP6X_SW0_I2S_ERROR_REASON, .dsp_intr_base = ACP6X_DSP_SW_INTR_BASE, .sram_pte_offset = ACP6X_SRAM_PTE_OFFSET, .hw_semaphore_offset = ACP6X_AXI2DAGB_SEM_0, diff --git a/sound/soc/sof/amd/pci-rmb.c b/sound/soc/sof/amd/pci-rmb.c index 4bc30951f8b0..a366f904e6f3 100644 --- a/sound/soc/sof/amd/pci-rmb.c +++ b/sound/soc/sof/amd/pci-rmb.c @@ -33,6 +33,8 @@ static const struct sof_amd_acp_desc rembrandt_chip_info = { .pgfsm_base = ACP6X_PGFSM_BASE, .ext_intr_stat = ACP6X_EXT_INTR_STAT, .dsp_intr_base = ACP6X_DSP_SW_INTR_BASE, + .acp_error_stat = ACP6X_ERROR_STATUS, + .acp_sw0_i2s_err_reason = ACP6X_SW0_I2S_ERROR_REASON, .sram_pte_offset = ACP6X_SRAM_PTE_OFFSET, .hw_semaphore_offset = ACP6X_AXI2DAGB_SEM_0, .fusion_dsp_offset = ACP6X_DSP_FUSION_RUNSTALL, diff --git a/sound/soc/sof/amd/pci-rn.c b/sound/soc/sof/amd/pci-rn.c index e08875bdfa8b..2b7c53470ce8 100644 --- a/sound/soc/sof/amd/pci-rn.c +++ b/sound/soc/sof/amd/pci-rn.c @@ -33,6 +33,8 @@ static const struct sof_amd_acp_desc renoir_chip_info = { .pgfsm_base = ACP3X_PGFSM_BASE, .ext_intr_stat = ACP3X_EXT_INTR_STAT, .dsp_intr_base = ACP3X_DSP_SW_INTR_BASE, + .acp_error_stat = ACP3X_ERROR_STATUS, + .acp_sw0_i2s_err_reason = ACP3X_SW_I2S_ERROR_REASON, .sram_pte_offset = ACP3X_SRAM_PTE_OFFSET, .hw_semaphore_offset = ACP3X_AXI2DAGB_SEM_0, .acp_clkmux_sel = ACP3X_CLKMUX_SEL, -- cgit From b919a27fab37e108164d657ac6e77bf870bf95e6 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 30 Jul 2024 12:35:11 +0200 Subject: ASoC: MAINTAINERS: Drop Banajit Goswami from Qualcomm sound drivers There was no active maintenance from Banajit Goswami - last email is from 2019 - so make obvious that Qualcomm sound drivers are maintained by only one person. Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240730103511.21728-1-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d304054d661e..61a21efc357b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18522,7 +18522,6 @@ F: drivers/crypto/intel/qat/ QCOM AUDIO (ASoC) DRIVERS M: Srinivas Kandagatla -M: Banajit Goswami L: alsa-devel@alsa-project.org (moderated for non-subscribers) L: linux-arm-msm@vger.kernel.org S: Supported -- cgit From 5d61841c74db8b5bbbf9403f1bd4879f614617d2 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 9 Aug 2024 16:15:39 -0400 Subject: spi: zynqmp-gqspi: Scale timeout by data size Large blocks of data time out when reading because we don't wait long enough for the transfer to complete. Scale our timeouts based on the amount of data we are tranferring, with a healthy dose of pessimism. Signed-off-by: Sean Anderson Link: https://patch.msgid.link/20240809201540.3363243-1-sean.anderson@linux.dev Signed-off-by: Mark Brown --- drivers/spi/spi-zynqmp-gqspi.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index 99524a3c9f38..558c466135a5 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -1033,6 +1033,18 @@ static int __maybe_unused zynqmp_runtime_resume(struct device *dev) return 0; } +static unsigned long zynqmp_qspi_timeout(struct zynqmp_qspi *xqspi, u8 bits, + unsigned long bytes) +{ + unsigned long timeout; + + /* Assume we are at most 2x slower than the nominal bus speed */ + timeout = mult_frac(bytes, 2 * 8 * MSEC_PER_SEC, + bits * xqspi->speed_hz); + /* And add 100 ms for scheduling delays */ + return msecs_to_jiffies(timeout + 100); +} + /** * zynqmp_qspi_exec_op() - Initiates the QSPI transfer * @mem: The SPI memory @@ -1049,6 +1061,7 @@ static int zynqmp_qspi_exec_op(struct spi_mem *mem, { struct zynqmp_qspi *xqspi = spi_controller_get_devdata (mem->spi->controller); + unsigned long timeout; int err = 0, i; u32 genfifoentry = 0; u16 opcode = op->cmd.opcode; @@ -1077,8 +1090,10 @@ static int zynqmp_qspi_exec_op(struct spi_mem *mem, zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST, GQSPI_IER_GENFIFOEMPTY_MASK | GQSPI_IER_TXNOT_FULL_MASK); - if (!wait_for_completion_timeout - (&xqspi->data_completion, msecs_to_jiffies(1000))) { + timeout = zynqmp_qspi_timeout(xqspi, op->cmd.buswidth, + op->cmd.nbytes); + if (!wait_for_completion_timeout(&xqspi->data_completion, + timeout)) { err = -ETIMEDOUT; goto return_err; } @@ -1104,8 +1119,10 @@ static int zynqmp_qspi_exec_op(struct spi_mem *mem, GQSPI_IER_TXEMPTY_MASK | GQSPI_IER_GENFIFOEMPTY_MASK | GQSPI_IER_TXNOT_FULL_MASK); - if (!wait_for_completion_timeout - (&xqspi->data_completion, msecs_to_jiffies(1000))) { + timeout = zynqmp_qspi_timeout(xqspi, op->addr.buswidth, + op->addr.nbytes); + if (!wait_for_completion_timeout(&xqspi->data_completion, + timeout)) { err = -ETIMEDOUT; goto return_err; } @@ -1173,8 +1190,9 @@ static int zynqmp_qspi_exec_op(struct spi_mem *mem, GQSPI_IER_RXEMPTY_MASK); } } - if (!wait_for_completion_timeout - (&xqspi->data_completion, msecs_to_jiffies(1000))) + timeout = zynqmp_qspi_timeout(xqspi, op->data.buswidth, + op->data.nbytes); + if (!wait_for_completion_timeout(&xqspi->data_completion, timeout)) err = -ETIMEDOUT; } -- cgit From bcc954c6caba01fca143162d5fbb90e46aa1ad80 Mon Sep 17 00:00:00 2001 From: Ryo Takakura Date: Mon, 12 Aug 2024 16:27:03 +0900 Subject: printk/panic: Allow cpu backtraces to be written into ringbuffer during panic commit 779dbc2e78d7 ("printk: Avoid non-panic CPUs writing to ringbuffer") disabled non-panic CPUs to further write messages to ringbuffer after panicked. Since the commit, non-panicked CPU's are not allowed to write to ring buffer after panicked and CPU backtrace which is triggered after panicked to sample non-panicked CPUs' backtrace no longer serves its function as it has nothing to print. Fix the issue by allowing non-panicked CPUs to write into ringbuffer while CPU backtrace is in flight. Fixes: 779dbc2e78d7 ("printk: Avoid non-panic CPUs writing to ringbuffer") Signed-off-by: Ryo Takakura Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240812072703.339690-1-takakura@valinux.co.jp Signed-off-by: Petr Mladek --- include/linux/panic.h | 1 + kernel/panic.c | 8 +++++++- kernel/printk/printk.c | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/linux/panic.h b/include/linux/panic.h index 3130e0b5116b..54d90b6c5f47 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -16,6 +16,7 @@ extern void oops_enter(void); extern void oops_exit(void); extern bool oops_may_print(void); +extern bool panic_triggering_all_cpu_backtrace; extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; diff --git a/kernel/panic.c b/kernel/panic.c index f861bedc1925..2a0449144f82 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -64,6 +64,8 @@ unsigned long panic_on_taint; bool panic_on_taint_nousertaint = false; static unsigned int warn_limit __read_mostly; +bool panic_triggering_all_cpu_backtrace; + int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); @@ -253,8 +255,12 @@ void check_panic_on_warn(const char *origin) */ static void panic_other_cpus_shutdown(bool crash_kexec) { - if (panic_print & PANIC_PRINT_ALL_CPU_BT) + if (panic_print & PANIC_PRINT_ALL_CPU_BT) { + /* Temporary allow non-panic CPUs to write their backtraces. */ + panic_triggering_all_cpu_backtrace = true; trigger_all_cpu_backtrace(); + panic_triggering_all_cpu_backtrace = false; + } /* * Note that smp_send_stop() is the usual SMP shutdown function, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 054c0e7784fd..c22b07049c38 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2316,7 +2316,7 @@ asmlinkage int vprintk_emit(int facility, int level, * non-panic CPUs are generating any messages, they will be * silently dropped. */ - if (other_cpu_in_panic()) + if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace) return 0; if (level == LOGLEVEL_SCHED) { -- cgit From 0ecc5be200c84e67114f3640064ba2bae3ba2f5a Mon Sep 17 00:00:00 2001 From: Yuntao Wang Date: Tue, 13 Aug 2024 09:48:27 +0800 Subject: x86/apic: Make x2apic_disable() work correctly x2apic_disable() clears x2apic_state and x2apic_mode unconditionally, even when the state is X2APIC_ON_LOCKED, which prevents the kernel to disable it thereby creating inconsistent state. Due to the early state check for X2APIC_ON, the code path which warns about a locked X2APIC cannot be reached. Test for state < X2APIC_ON instead and move the clearing of the state and mode variables to the place which actually disables X2APIC. [ tglx: Massaged change log. Added Fixes tag. Moved clearing so it's at the right place for back ports ] Fixes: a57e456a7b28 ("x86/apic: Fix fallout from x2apic cleanup") Signed-off-by: Yuntao Wang Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240813014827.895381-1-yuntao.wang@linux.dev --- arch/x86/kernel/apic/apic.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 66fd4b2a37a3..373638691cd4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1775,12 +1775,9 @@ static __init void apic_set_fixmap(bool read_apic); static __init void x2apic_disable(void) { - u32 x2apic_id, state = x2apic_state; + u32 x2apic_id; - x2apic_mode = 0; - x2apic_state = X2APIC_DISABLED; - - if (state != X2APIC_ON) + if (x2apic_state < X2APIC_ON) return; x2apic_id = read_apic_id(); @@ -1793,6 +1790,10 @@ static __init void x2apic_disable(void) } __x2apic_disable(); + + x2apic_mode = 0; + x2apic_state = X2APIC_DISABLED; + /* * Don't reread the APIC ID as it was already done from * check_x2apic() and the APIC driver still is a x2APIC variant, -- cgit From c0196faaa927321a63e680427e075734ee656e42 Mon Sep 17 00:00:00 2001 From: Albert Jakieła Date: Fri, 9 Aug 2024 13:56:27 +0000 Subject: ASoC: SOF: mediatek: Add missing board compatible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Google Dojo compatible. Signed-off-by: Albert Jakieła Reviewed-by: Chen-Yu Tsai Link: https://patch.msgid.link/20240809135627.544429-1-jakiela@google.com Signed-off-by: Mark Brown --- sound/soc/sof/mediatek/mt8195/mt8195.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/sof/mediatek/mt8195/mt8195.c b/sound/soc/sof/mediatek/mt8195/mt8195.c index 1c6e035fd313..82d221f53a46 100644 --- a/sound/soc/sof/mediatek/mt8195/mt8195.c +++ b/sound/soc/sof/mediatek/mt8195/mt8195.c @@ -574,6 +574,9 @@ static struct snd_sof_of_mach sof_mt8195_machs[] = { { .compatible = "google,tomato", .sof_tplg_filename = "sof-mt8195-mt6359-rt1019-rt5682.tplg" + }, { + .compatible = "google,dojo", + .sof_tplg_filename = "sof-mt8195-mt6359-max98390-rt5682.tplg" }, { .compatible = "mediatek,mt8195", .sof_tplg_filename = "sof-mt8195.tplg" -- cgit From 3e30296b374af33cb4c12ff93df0b1e5b2d0f80b Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Thu, 8 Aug 2024 16:52:27 -0700 Subject: drm/msm: fix the highest_bank_bit for sc7180 sc7180 programs the ubwc settings as 0x1e as that would mean a highest bank bit of 14 which matches what the GPU sets as well. However, the highest_bank_bit field of the msm_mdss_data which is being used to program the SSPP's fetch configuration is programmed to a highest bank bit of 16 as 0x3 translates to 16 and not 14. Fix the highest bank bit field used for the SSPP to match the mdss and gpu settings. Fixes: 6f410b246209 ("drm/msm/mdss: populate missing data") Reviewed-by: Rob Clark Tested-by: Stephen Boyd # Trogdor.Lazor Patchwork: https://patchwork.freedesktop.org/patch/607625/ Link: https://lore.kernel.org/r/20240808235227.2701479-1-quic_abhinavk@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/msm_mdss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index d90b9471ba6f..faa88fd6eb4d 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -577,7 +577,7 @@ static const struct msm_mdss_data sc7180_data = { .ubwc_enc_version = UBWC_2_0, .ubwc_dec_version = UBWC_2_0, .ubwc_static = 0x1e, - .highest_bank_bit = 0x3, + .highest_bank_bit = 0x1, .reg_bus_bw = 76800, }; -- cgit From 4e91fa1ef3ce6290b4c598e54b5eb6cf134fbec8 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Mon, 12 Aug 2024 21:40:28 +0200 Subject: i2c: qcom-geni: Add missing geni_icc_disable in geni_i2c_runtime_resume Add the missing geni_icc_disable() call before returning in the geni_i2c_runtime_resume() function. Commit 9ba48db9f77c ("i2c: qcom-geni: Add missing geni_icc_disable in geni_i2c_runtime_resume") by Gaosheng missed disabling the interconnect in one case. Fixes: bf225ed357c6 ("i2c: i2c-qcom-geni: Add interconnect support") Cc: Gaosheng Cui Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-qcom-geni.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 365e37bba0f3..06e836e3e877 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -986,8 +986,10 @@ static int __maybe_unused geni_i2c_runtime_resume(struct device *dev) return ret; ret = clk_prepare_enable(gi2c->core_clk); - if (ret) + if (ret) { + geni_icc_disable(&gi2c->se); return ret; + } ret = geni_se_resources_on(&gi2c->se); if (ret) { -- cgit From a24e6e7146e361aa0855cf8ee3b2e80b8eb692e3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Aug 2024 22:40:39 -0400 Subject: bcachefs: delete faulty fastpath in bch2_btree_path_traverse_cached() bch2_btree_path_traverse_cached() was previously checking if it could just relock the path, which is a common idiom in path traversal. However, it was using btree_node_relock(), not btree_path_relock(); btree_path_relock() only succeeds if the path was in state BTREE_ITER_NEED_RELOCK. If the path was in state BTREE_ITER_NEED_TRAVERSE a full traversal is needed; this led to a null ptr deref in bch2_btree_path_traverse_cached(). And the short circuit check here isn't needed, since it was already done in the main bch2_btree_path_traverse_one(). Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index f2f2e525460b..79954490627c 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -497,11 +497,6 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path path->l[1].b = NULL; - if (bch2_btree_node_relock_notrace(trans, path, 0)) { - path->uptodate = BTREE_ITER_UPTODATE; - return 0; - } - int ret; do { ret = btree_path_traverse_cached_fast(trans, path); -- cgit From bd864bc2d90790e00b02b17c75fb951cb4b0bb8b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 23:24:03 -0400 Subject: bcachefs: Fix bch2_trigger_alloc when upgrading from old versions bch2_trigger_alloc was assuming that the new key would always be newly created and thus always an alloc_v4 key, but - not when called from btree_gc. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index d9c5a92fa708..0a8a1bc9a4ac 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -829,7 +829,19 @@ int bch2_trigger_alloc(struct btree_trans *trans, struct bch_alloc_v4 old_a_convert; const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert); - struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; + + struct bch_alloc_v4 *new_a; + if (likely(new.k->type == KEY_TYPE_alloc_v4)) { + new_a = bkey_s_to_alloc_v4(new).v; + } else { + BUG_ON(!(flags & BTREE_TRIGGER_gc)); + + struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); + ret = PTR_ERR_OR_ZERO(new_ka); + if (unlikely(ret)) + goto err; + new_a = &new_ka->v; + } if (flags & BTREE_TRIGGER_transactional) { alloc_data_type_set(new_a, new_a->data_type); -- cgit From d9e615762bf2eb7459fb0f270525f8b186bce6b7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Aug 2024 04:53:12 -0400 Subject: bcachefs: bch2_accounting_invalid() fixup Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 046ac92b6639..212f53927111 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -154,7 +154,7 @@ int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, "accounting key replicas entry with bad nr_required"); for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++) - bkey_fsck_err_on(acc_k.replicas.devs[i] > acc_k.replicas.devs[i + 1], + bkey_fsck_err_on(acc_k.replicas.devs[i] >= acc_k.replicas.devs[i + 1], c, err, accounting_key_replicas_devs_unsorted, "accounting key replicas entry with unsorted devs"); -- cgit From 486d920735325e507d965c2639ba2775b81fd329 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Aug 2024 22:47:55 -0400 Subject: bcachefs: disk accounting: ignore unknown types forward compat fix Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 212f53927111..03a9de6c2e0a 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -528,6 +528,9 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, e->pos); + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + continue; + u64 src_v[BCH_ACCOUNTING_MAX_COUNTERS]; u64 dst_v[BCH_ACCOUNTING_MAX_COUNTERS]; @@ -760,6 +763,12 @@ void bch2_verify_accounting_clean(struct bch_fs *c) struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k); unsigned nr = bch2_accounting_counters(k.k); + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + continue; + bch2_accounting_mem_read(c, k.k->p, v, nr); if (memcmp(a.v->d, v, nr * sizeof(u64))) { @@ -775,9 +784,6 @@ void bch2_verify_accounting_clean(struct bch_fs *c) mismatch = true; } - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); - switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; -- cgit From 48d6cc1b4895ada0781da11a0a483332a236ec14 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Aug 2024 01:01:35 -0400 Subject: bcachefs: Add missing downgrade table entry Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-downgrade.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 6c4469f53313..9f82d497d9e0 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -104,6 +104,7 @@ BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ BCH_FSCK_ERR_fs_usage_replicas_wrong, \ + BCH_FSCK_ERR_accounting_replicas_not_marked, \ BCH_FSCK_ERR_bkey_version_in_future) struct upgrade_downgrade_entry { -- cgit From 968feb854a86b59cc4bc72af3105989706ca2c7d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2024 16:34:28 -0400 Subject: bcachefs: Convert for_each_btree_node() to lockrestart_do() for_each_btree_node() now works similarly to for_each_btree_key(), where the loop body is passed as an argument to be passed to lockrestart_do(). This now calls trans_begin() on every loop iteration - which fixes an SRCU warning in backpointers fsck. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 15 +++++---------- fs/bcachefs/btree_iter.c | 1 + fs/bcachefs/btree_iter.h | 42 +++++++++++++++++++++++++++--------------- fs/bcachefs/debug.c | 38 +++++++++----------------------------- 4 files changed, 42 insertions(+), 54 deletions(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 3cc02479a982..9edc4c5f735c 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -763,27 +763,22 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, btree < BTREE_ID_NR && !ret; btree++) { unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; - struct btree_iter iter; - struct btree *b; if (!(BIT_ULL(btree) & btree_leaf_mask) && !(BIT_ULL(btree) & btree_interior_mask)) continue; - bch2_trans_begin(trans); - - __for_each_btree_node(trans, iter, btree, + ret = __for_each_btree_node(trans, iter, btree, btree == start.btree ? start.pos : POS_MIN, - 0, depth, BTREE_ITER_prefetch, b, ret) { + 0, depth, BTREE_ITER_prefetch, b, ({ mem_may_pin -= btree_buf_bytes(b); if (mem_may_pin <= 0) { c->btree_cache.pinned_nodes_end = *end = BBPOS(btree, b->key.k.p); - bch2_trans_iter_exit(trans, &iter); - return 0; + break; } - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); } return ret; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index aa8a049071f4..2e84d22e17bd 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1900,6 +1900,7 @@ err: goto out; } +/* Only kept for -tools */ struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter) { struct btree *b; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index c7725865309c..dca62375d7d3 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -600,23 +600,35 @@ void bch2_trans_srcu_unlock(struct btree_trans *); u32 bch2_trans_begin(struct btree_trans *); -/* - * XXX - * this does not handle transaction restarts from bch2_btree_iter_next_node() - * correctly - */ -#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - _locks_want, _depth, _flags, _b, _ret) \ - for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \ - _start, _locks_want, _depth, _flags); \ - (_b) = bch2_btree_iter_peek_node_and_restart(&(_iter)), \ - !((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b); \ - (_b) = bch2_btree_iter_next_node(&(_iter))) +#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + _locks_want, _depth, _flags, _b, _do) \ +({ \ + bch2_trans_begin((_trans)); \ + \ + struct btree_iter _iter; \ + bch2_trans_node_iter_init((_trans), &_iter, (_btree_id), \ + _start, _locks_want, _depth, _flags); \ + int _ret3 = 0; \ + do { \ + _ret3 = lockrestart_do((_trans), ({ \ + struct btree *_b = bch2_btree_iter_peek_node(&_iter); \ + if (!_b) \ + break; \ + \ + PTR_ERR_OR_ZERO(_b) ?: (_do); \ + })) ?: \ + lockrestart_do((_trans), \ + PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(&_iter))); \ + } while (!_ret3); \ + \ + bch2_trans_iter_exit((_trans), &(_iter)); \ + _ret3; \ +}) #define for_each_btree_node(_trans, _iter, _btree_id, _start, \ - _flags, _b, _ret) \ - __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - 0, 0, _flags, _b, _ret) + _flags, _b, _do) \ + __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + 0, 0, _flags, _b, _do) static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter, unsigned flags) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index ebabab171fe5..45aec1afdb0e 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -397,47 +397,27 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans *trans; - struct btree_iter iter; - struct btree *b; - ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - ret = flush_buf(i); + ssize_t ret = flush_buf(i); if (ret) return ret; if (bpos_eq(SPOS_MAX, i->from)) return i->ret; - trans = bch2_trans_get(i->c); -retry: - bch2_trans_begin(trans); - - for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) { - bch2_btree_node_to_text(&i->buf, i->c, b); - i->from = !bpos_eq(SPOS_MAX, b->key.k.p) - ? bpos_successor(b->key.k.p) - : b->key.k.p; - - ret = drop_locks_do(trans, flush_buf(i)); - if (ret) - break; - } - bch2_trans_iter_exit(trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - - bch2_trans_put(trans); - - if (!ret) - ret = flush_buf(i); + return bch2_trans_run(i->c, + for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({ + bch2_btree_node_to_text(&i->buf, i->c, b); + i->from = !bpos_eq(SPOS_MAX, b->key.k.p) + ? bpos_successor(b->key.k.p) + : b->key.k.p; - return ret ?: i->ret; + drop_locks_do(trans, flush_buf(i)); + }))) ?: i->ret; } static const struct file_operations btree_format_debug_ops = { -- cgit From b2f11c6f3e1fc60742673b8675c95b78447f3dae Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Aug 2024 21:04:35 -0400 Subject: lib/generic-radix-tree.c: Fix rare race in __genradix_ptr_alloc() If we need to increase the tree depth, allocate a new node, and then race with another thread that increased the tree depth before us, we'll still have a preallocated node that might be used later. If we then use that node for a new non-root node, it'll still have a pointer to the old root instead of being zeroed - fix this by zeroing it in the cmpxchg failure path. Signed-off-by: Kent Overstreet --- lib/generic-radix-tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c index aaefb9b678c8..fa692c86f069 100644 --- a/lib/generic-radix-tree.c +++ b/lib/generic-radix-tree.c @@ -121,6 +121,8 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) { v = new_root; new_node = NULL; + } else { + new_node->children[0] = NULL; } } -- cgit From 7254555c440ff6b136aa97fb3c33fd5e0bb4fb9f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Aug 2024 14:40:09 -0400 Subject: bcachefs: Add hysteresis to waiting on btree key cache flush This helps ensure key cache reclaim isn't contending with threads waiting for the key cache to be helped, and fixes a severe performance bug. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.h | 9 +++++++++ fs/bcachefs/btree_trans_commit.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index e6b2cd0dd2c1..113309f62918 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -20,6 +20,15 @@ static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) return nr_dirty > max_dirty; } +static inline bool bch2_btree_key_cache_wait_done(struct bch_fs *c) +{ + size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); + size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); + size_t max_dirty = 2048 + (nr_keys * 5) / 8; + + return nr_dirty <= max_dirty; +} + int bch2_btree_key_cache_journal_flush(struct journal *, struct journal_entry_pin *, u64); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index cca336fe46e9..f567bfb82850 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -927,7 +927,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags static int journal_reclaim_wait_done(struct bch_fs *c) { int ret = bch2_journal_error(&c->journal) ?: - !bch2_btree_key_cache_must_wait(c); + bch2_btree_key_cache_wait_done(c); if (!ret) journal_reclaim_kick(&c->journal); -- cgit From 790666c8ac6427ddaa00f502dc44073c1e039355 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Aug 2024 14:31:17 -0400 Subject: bcachefs: Improve trans_blocked_journal_reclaim tracepoint include information about the state of the btree key cache Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.h | 9 +++++++-- fs/bcachefs/trace.c | 1 + fs/bcachefs/trace.h | 27 +++++++++++++++++++++++++-- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index 113309f62918..51d6289b8dee 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -11,13 +11,18 @@ static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c) return max_t(ssize_t, 0, nr_dirty - max_dirty); } -static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) +static inline ssize_t __bch2_btree_key_cache_must_wait(struct bch_fs *c) { size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); size_t max_dirty = 4096 + (nr_keys * 3) / 4; - return nr_dirty > max_dirty; + return nr_dirty - max_dirty; +} + +static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) +{ + return __bch2_btree_key_cache_must_wait(c) > 0; } static inline bool bch2_btree_key_cache_wait_done(struct bch_fs *c) diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c index dc48b52b01b4..dfad1d06633d 100644 --- a/fs/bcachefs/trace.c +++ b/fs/bcachefs/trace.c @@ -4,6 +4,7 @@ #include "buckets.h" #include "btree_cache.h" #include "btree_iter.h" +#include "btree_key_cache.h" #include "btree_locking.h" #include "btree_update_interior.h" #include "keylist.h" diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index d0e6b9deb6cb..c62f00322d1e 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -988,10 +988,33 @@ TRACE_EVENT(trans_restart_split_race, __entry->u64s_remaining) ); -DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim, +TRACE_EVENT(trans_blocked_journal_reclaim, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans, caller_ip) + TP_ARGS(trans, caller_ip), + + TP_STRUCT__entry( + __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + + __field(unsigned long, key_cache_nr_keys ) + __field(unsigned long, key_cache_nr_dirty ) + __field(long, must_wait ) + ), + + TP_fast_assign( + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + __entry->key_cache_nr_keys = atomic_long_read(&trans->c->btree_key_cache.nr_keys); + __entry->key_cache_nr_dirty = atomic_long_read(&trans->c->btree_key_cache.nr_dirty); + __entry->must_wait = __bch2_btree_key_cache_must_wait(trans->c); + ), + + TP_printk("%s %pS key cache keys %lu dirty %lu must_wait %li", + __entry->trans_fn, (void *) __entry->caller_ip, + __entry->key_cache_nr_keys, + __entry->key_cache_nr_dirty, + __entry->must_wait) ); TRACE_EVENT(trans_restart_journal_preres_get, -- cgit From 06a8693b890c0cf7d94bf7c6f0e2adf3a3aaa346 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Aug 2024 15:48:18 -0400 Subject: bcachefs: Add a time_stat for blocked on key cache flush Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/btree_trans_commit.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index eedf2d6045e7..0c7086e00d18 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -447,6 +447,7 @@ BCH_DEBUG_PARAMS_DEBUG() x(blocked_journal_low_on_space) \ x(blocked_journal_low_on_pin) \ x(blocked_journal_max_in_flight) \ + x(blocked_key_cache_flush) \ x(blocked_allocate) \ x(blocked_allocate_open_bucket) \ x(blocked_write_buffer_full) \ diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index f567bfb82850..ac0c92683aad 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -973,9 +973,13 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, bch2_trans_unlock(trans); trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip); + track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], true); wait_event_freezable(c->journal.reclaim_wait, (ret = journal_reclaim_wait_done(c))); + + track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], false); + if (ret < 0) break; -- cgit From c99471024f24b3cbafc02bf5b112ecf34b0dbd40 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 23:29:46 -0400 Subject: bcachefs: Fix warning in __bch2_fsck_err() for trans not passed in Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 2c424435ca4a..70ebcca08ba2 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1767,6 +1767,8 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, set_btree_node_read_in_flight(b); + /* we can't pass the trans to read_done() for fsck errors, so it must be unlocked */ + bch2_trans_unlock(trans); bch2_btree_node_read(trans, b, true); if (btree_node_read_error(b)) { -- cgit From d97de0d017cde0d442c3d144b4f969f43064cc0f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 21:31:25 -0400 Subject: bcachefs: Make bkey_fsck_err() a wrapper around fsck_err() bkey_fsck_err() was added as an interface that looks like fsck_err(), but previously all it did was ensure that the appropriate error counter was incremented in the superblock. This is a cleanup and bugfix patch that converts it to a wrapper around fsck_err(). This is needed to fix an issue with the upgrade path to disk_accounting_v3, where the "silent fix" error list now includes bkey_fsck errors; fsck_err() handles this in a unified way, and since we need to change printing of bkey fsck errors from the caller to the inner bkey_fsck_err() calls, this ends up being a pretty big change. Als,, rename .invalid() methods to .validate(), for clarity, while we're changing the function signature anyways (to drop the printbuf argument). Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 63 ++++++++-------- fs/bcachefs/alloc_background.h | 26 +++---- fs/bcachefs/backpointers.c | 8 +- fs/bcachefs/backpointers.h | 5 +- fs/bcachefs/bkey.h | 7 +- fs/bcachefs/bkey_methods.c | 109 +++++++++++++-------------- fs/bcachefs/bkey_methods.h | 21 +++--- fs/bcachefs/btree_io.c | 67 +++++------------ fs/bcachefs/btree_node_scan.c | 2 +- fs/bcachefs/btree_trans_commit.c | 72 ++++-------------- fs/bcachefs/btree_update_interior.c | 16 +--- fs/bcachefs/data_update.c | 6 +- fs/bcachefs/dirent.c | 33 ++++----- fs/bcachefs/dirent.h | 5 +- fs/bcachefs/disk_accounting.c | 13 ++-- fs/bcachefs/disk_accounting.h | 5 +- fs/bcachefs/ec.c | 15 ++-- fs/bcachefs/ec.h | 5 +- fs/bcachefs/errcode.h | 1 + fs/bcachefs/error.c | 22 ++++++ fs/bcachefs/error.h | 39 ++++++---- fs/bcachefs/extents.c | 144 ++++++++++++++++++------------------ fs/bcachefs/extents.h | 24 +++--- fs/bcachefs/inode.c | 77 +++++++++---------- fs/bcachefs/inode.h | 24 +++--- fs/bcachefs/journal_io.c | 24 ++---- fs/bcachefs/lru.c | 9 +-- fs/bcachefs/lru.h | 5 +- fs/bcachefs/quota.c | 8 +- fs/bcachefs/quota.h | 5 +- fs/bcachefs/reflink.c | 19 ++--- fs/bcachefs/reflink.h | 22 +++--- fs/bcachefs/snapshot.c | 42 +++++------ fs/bcachefs/snapshot.h | 11 ++- fs/bcachefs/subvolume.c | 16 ++-- fs/bcachefs/subvolume.h | 5 +- fs/bcachefs/xattr.c | 21 +++--- fs/bcachefs/xattr.h | 5 +- 38 files changed, 448 insertions(+), 553 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 0a8a1bc9a4ac..fd3a2522bc3e 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -196,75 +196,71 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) return DIV_ROUND_UP(bytes, sizeof(u64)); } -int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); int ret = 0; /* allow for unknown fields */ - bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err, - alloc_v1_val_size_bad, + bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), + c, alloc_v1_val_size_bad, "incorrect value size (%zu < %u)", bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v)); fsck_err: return ret; } -int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_alloc_unpacked u; int ret = 0; - bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err, - alloc_v2_unpack_error, + bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), + c, alloc_v2_unpack_error, "unpack error"); fsck_err: return ret; } -int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_alloc_unpacked u; int ret = 0; - bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err, - alloc_v2_unpack_error, + bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), + c, alloc_v2_unpack_error, "unpack error"); fsck_err: return ret; } -int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); int ret = 0; - bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), c, err, - alloc_v4_val_size_bad, + bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), + c, alloc_v4_val_size_bad, "bad val size (%u > %zu)", alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k)); bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && - BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err, - alloc_v4_backpointers_start_bad, + BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), + c, alloc_v4_backpointers_start_bad, "invalid backpointers_start"); - bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err, - alloc_key_data_type_bad, + bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, + c, alloc_key_data_type_bad, "invalid data type (got %u should be %u)", a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); for (unsigned i = 0; i < 2; i++) bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX, - c, err, - alloc_key_io_time_bad, + c, alloc_key_io_time_bad, "invalid io_time[%s]: %llu, max %llu", i == READ ? "read" : "write", a.v->io_time[i], LRU_TIME_MAX); @@ -282,7 +278,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, a.v->dirty_sectors || a.v->cached_sectors || a.v->stripe, - c, err, alloc_key_empty_but_have_data, + c, alloc_key_empty_but_have_data, "empty data type free but have data %u.%u.%u %u", stripe_sectors, a.v->dirty_sectors, @@ -296,7 +292,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_DATA_parity: bkey_fsck_err_on(!a.v->dirty_sectors && !stripe_sectors, - c, err, alloc_key_dirty_sectors_0, + c, alloc_key_dirty_sectors_0, "data_type %s but dirty_sectors==0", bch2_data_type_str(a.v->data_type)); break; @@ -305,12 +301,12 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, a.v->dirty_sectors || stripe_sectors || a.v->stripe, - c, err, alloc_key_cached_inconsistency, + c, alloc_key_cached_inconsistency, "data type inconsistency"); bkey_fsck_err_on(!a.v->io_time[READ] && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, - c, err, alloc_key_cached_but_read_time_zero, + c, alloc_key_cached_but_read_time_zero, "cached bucket with read_time == 0"); break; case BCH_DATA_stripe: @@ -513,14 +509,13 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) : 0; } -int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err, - bucket_gens_val_size_bad, + bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), + c, bucket_gens_val_size_bad, "bad val size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens)); fsck_err: diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 96a0444ea78f..260e7fa83d05 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -240,52 +240,48 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); -int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_alloc_v4_swab(struct bkey_s); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_alloc ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v1_invalid, \ + .key_validate = bch2_alloc_v1_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v2_invalid, \ + .key_validate = bch2_alloc_v2_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v3_invalid, \ + .key_validate = bch2_alloc_v3_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_alloc_v4 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v4_invalid, \ + .key_validate = bch2_alloc_v4_validate, \ .val_to_text = bch2_alloc_to_text, \ .swab = bch2_alloc_v4_swab, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 48, \ }) -int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_bucket_gens_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \ - .key_invalid = bch2_bucket_gens_invalid, \ + .key_validate = bch2_bucket_gens_validate, \ .val_to_text = bch2_bucket_gens_to_text, \ }) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 9edc4c5f735c..d4da6343efa9 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -47,9 +47,8 @@ static bool extent_matches_bp(struct bch_fs *c, return false; } -int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); @@ -68,8 +67,7 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || !bpos_eq(bp.k->p, bp_pos), - c, err, - backpointer_bucket_offset_wrong, + c, backpointer_bucket_offset_wrong, "backpointer bucket_offset wrong"); fsck_err: return ret; diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 6021de1c5e98..7daecadb764e 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -18,14 +18,13 @@ static inline u64 swab40(u64 x) ((x & 0xff00000000ULL) >> 32)); } -int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k, - enum bch_validate_flags, struct printbuf *); +int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags); void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_backpointer_swab(struct bkey_s); #define bch2_bkey_ops_backpointer ((struct bkey_ops) { \ - .key_invalid = bch2_backpointer_invalid, \ + .key_validate = bch2_backpointer_validate, \ .val_to_text = bch2_backpointer_k_to_text, \ .swab = bch2_backpointer_swab, \ .min_val_size = 32, \ diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 936357149cf0..e34cb2bf329c 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -10,9 +10,10 @@ #include "vstructs.h" enum bch_validate_flags { - BCH_VALIDATE_write = (1U << 0), - BCH_VALIDATE_commit = (1U << 1), - BCH_VALIDATE_journal = (1U << 2), + BCH_VALIDATE_write = BIT(0), + BCH_VALIDATE_commit = BIT(1), + BCH_VALIDATE_journal = BIT(2), + BCH_VALIDATE_silent = BIT(3), }; #if 0 diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 5f07cf853d0c..88d8958281e8 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -27,27 +27,27 @@ const char * const bch2_bkey_types[] = { NULL }; -static int deleted_key_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } #define bch2_bkey_ops_deleted ((struct bkey_ops) { \ - .key_invalid = deleted_key_invalid, \ + .key_validate = deleted_key_validate, \ }) #define bch2_bkey_ops_whiteout ((struct bkey_ops) { \ - .key_invalid = deleted_key_invalid, \ + .key_validate = deleted_key_validate, \ }) -static int empty_val_key_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k), c, err, - bkey_val_size_nonzero, + bkey_fsck_err_on(bkey_val_bytes(k.k), + c, bkey_val_size_nonzero, "incorrect value size (%zu != 0)", bkey_val_bytes(k.k)); fsck_err: @@ -55,11 +55,11 @@ fsck_err: } #define bch2_bkey_ops_error ((struct bkey_ops) { \ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ }) -static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int key_type_cookie_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } @@ -73,17 +73,17 @@ static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c, } #define bch2_bkey_ops_cookie ((struct bkey_ops) { \ - .key_invalid = key_type_cookie_invalid, \ + .key_validate = key_type_cookie_validate, \ .val_to_text = key_type_cookie_to_text, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_hash_whiteout ((struct bkey_ops) {\ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ }) -static int key_type_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int key_type_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } @@ -98,9 +98,9 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c, datalen, min(datalen, 32U), d.v->data); } -#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ - .key_invalid = key_type_inline_data_invalid, \ - .val_to_text = key_type_inline_data_to_text, \ +#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ + .key_validate = key_type_inline_data_validate, \ + .val_to_text = key_type_inline_data_to_text, \ }) static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) @@ -110,7 +110,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_ } #define bch2_bkey_ops_set ((struct bkey_ops) { \ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ .key_merge = key_type_set_merge, \ }) @@ -123,9 +123,8 @@ const struct bkey_ops bch2_bkey_ops[] = { const struct bkey_ops bch2_bkey_null_ops = { }; -int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) return 0; @@ -133,15 +132,15 @@ int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type); int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, c, err, - bkey_val_size_too_small, + bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, + c, bkey_val_size_too_small, "bad val size (%zu < %u)", bkey_val_bytes(k.k), ops->min_val_size); - if (!ops->key_invalid) + if (!ops->key_validate) return 0; - ret = ops->key_invalid(c, k, flags, err); + ret = ops->key_validate(c, k, flags); fsck_err: return ret; } @@ -161,18 +160,17 @@ const char *bch2_btree_node_type_str(enum btree_node_type type) return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1); } -int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, - enum btree_node_type type, - enum bch_validate_flags flags, - struct printbuf *err) +int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, + enum btree_node_type type, + enum bch_validate_flags flags) { if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) return 0; int ret = 0; - bkey_fsck_err_on(k.k->u64s < BKEY_U64s, c, err, - bkey_u64s_too_small, + bkey_fsck_err_on(k.k->u64s < BKEY_U64s, + c, bkey_u64s_too_small, "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s); if (type >= BKEY_TYPE_NR) @@ -180,8 +178,8 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && (type == BKEY_TYPE_btree || (flags & BCH_VALIDATE_commit)) && - !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, - bkey_invalid_type_for_btree, + !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), + c, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", bch2_btree_node_type_str(type), k.k->type < KEY_TYPE_MAX @@ -189,17 +187,17 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, : "(unknown)"); if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { - bkey_fsck_err_on(k.k->size == 0, c, err, - bkey_extent_size_zero, + bkey_fsck_err_on(k.k->size == 0, + c, bkey_extent_size_zero, "size == 0"); - bkey_fsck_err_on(k.k->size > k.k->p.offset, c, err, - bkey_extent_size_greater_than_offset, + bkey_fsck_err_on(k.k->size > k.k->p.offset, + c, bkey_extent_size_greater_than_offset, "size greater than offset (%u > %llu)", k.k->size, k.k->p.offset); } else { - bkey_fsck_err_on(k.k->size, c, err, - bkey_size_nonzero, + bkey_fsck_err_on(k.k->size, + c, bkey_size_nonzero, "size != 0"); } @@ -207,12 +205,12 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, enum btree_id btree = type - 1; if (btree_type_has_snapshots(btree)) { - bkey_fsck_err_on(!k.k->p.snapshot, c, err, - bkey_snapshot_zero, + bkey_fsck_err_on(!k.k->p.snapshot, + c, bkey_snapshot_zero, "snapshot == 0"); } else if (!btree_type_has_snapshot_field(btree)) { - bkey_fsck_err_on(k.k->p.snapshot, c, err, - bkey_snapshot_nonzero, + bkey_fsck_err_on(k.k->p.snapshot, + c, bkey_snapshot_nonzero, "nonzero snapshot"); } else { /* @@ -221,34 +219,33 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, */ } - bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err, - bkey_at_pos_max, + bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), + c, bkey_at_pos_max, "key at POS_MAX"); } fsck_err: return ret; } -int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, +int bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, enum btree_node_type type, - enum bch_validate_flags flags, - struct printbuf *err) + enum bch_validate_flags flags) { - return __bch2_bkey_invalid(c, k, type, flags, err) ?: - bch2_bkey_val_invalid(c, k, flags, err); + return __bch2_bkey_validate(c, k, type, flags) ?: + bch2_bkey_val_validate(c, k, flags); } int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, struct printbuf *err) + struct bkey_s_c k, enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), c, err, - bkey_before_start_of_btree_node, + bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), + c, bkey_before_start_of_btree_node, "key before start of btree node"); - bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), c, err, - bkey_after_end_of_btree_node, + bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), + c, bkey_after_end_of_btree_node, "key past end of btree node"); fsck_err: return ret; diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index baef0722f5fb..3df3dd2723a1 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -14,15 +14,15 @@ extern const char * const bch2_bkey_types[]; extern const struct bkey_ops bch2_bkey_null_ops; /* - * key_invalid: checks validity of @k, returns 0 if good or -EINVAL if bad. If + * key_validate: checks validity of @k, returns 0 if good or -EINVAL if bad. If * invalid, entire key will be deleted. * * When invalid, error string is returned via @err. @rw indicates whether key is * being read or written; more aggressive checks can be enabled when rw == WRITE. */ struct bkey_ops { - int (*key_invalid)(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err); + int (*key_validate)(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags); void (*val_to_text)(struct printbuf *, struct bch_fs *, struct bkey_s_c); void (*swab)(struct bkey_s); @@ -48,14 +48,13 @@ static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type) : &bch2_bkey_null_ops; } -int bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, - enum bch_validate_flags, struct printbuf *); -int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, - enum bch_validate_flags, struct printbuf *); -int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, - struct bkey_s_c, struct printbuf *); +int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, + enum bch_validate_flags); +int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, + enum bch_validate_flags); +int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, struct bkey_s_c, + enum bch_validate_flags); void bch2_bpos_to_text(struct printbuf *, struct bpos); void bch2_bkey_to_text(struct printbuf *, const struct bkey *); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 70ebcca08ba2..56ea9a77cd4a 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -836,14 +836,13 @@ fsck_err: return ret; } -static int bset_key_invalid(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, - bool updated_range, int rw, - struct printbuf *err) +static int bset_key_validate(struct bch_fs *c, struct btree *b, + struct bkey_s_c k, + bool updated_range, int rw) { - return __bch2_bkey_invalid(c, k, btree_node_type(b), READ, err) ?: - (!updated_range ? bch2_bkey_in_btree_node(c, b, k, err) : 0) ?: - (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0); + return __bch2_bkey_validate(c, k, btree_node_type(b), 0) ?: + (!updated_range ? bch2_bkey_in_btree_node(c, b, k, 0) : 0) ?: + (rw == WRITE ? bch2_bkey_val_validate(c, k, 0) : 0); } static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, @@ -858,12 +857,9 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, if (!bkeyp_u64s_valid(&b->format, k)) return false; - struct printbuf buf = PRINTBUF; struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); - bool ret = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b), READ, &buf); - printbuf_exit(&buf); - return ret; + return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent); } static int validate_bset_keys(struct bch_fs *c, struct btree *b, @@ -915,19 +911,11 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, u = __bkey_disassemble(b, k, &tmp); - printbuf_reset(&buf); - if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) { - printbuf_reset(&buf); - bset_key_invalid(c, b, u.s_c, updated_range, write, &buf); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, u.s_c); - - btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, k, - btree_node_bad_bkey, - "invalid bkey: %s", buf.buf); + ret = bset_key_validate(c, b, u.s_c, updated_range, write); + if (ret == -BCH_ERR_fsck_delete_bkey) goto drop_this_key; - } + if (ret) + goto fsck_err; if (write) bch2_bkey_compat(b->c.level, b->c.btree_id, version, @@ -1228,23 +1216,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); - printbuf_reset(&buf); - - if (bch2_bkey_val_invalid(c, u.s_c, READ, &buf) || + ret = bch2_bkey_val_validate(c, u.s_c, READ); + if (ret == -BCH_ERR_fsck_delete_bkey || (bch2_inject_invalid_keys && !bversion_cmp(u.k->version, MAX_VERSION))) { - printbuf_reset(&buf); - - prt_printf(&buf, "invalid bkey: "); - bch2_bkey_val_invalid(c, u.s_c, READ, &buf); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, u.s_c); - - btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, k, - btree_node_bad_bkey, - "%s", buf.buf); - btree_keys_account_key_drop(&b->nr, 0, k); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); @@ -1253,6 +1228,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, set_btree_bset_end(b, b->set); continue; } + if (ret) + goto fsck_err; if (u.k->type == KEY_TYPE_btree_ptr_v2) { struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u); @@ -1954,18 +1931,14 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - struct printbuf buf = PRINTBUF; bool saw_error; - int ret; - - ret = bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), - BKEY_TYPE_btree, WRITE, &buf); - if (ret) - bch2_fs_inconsistent(c, "invalid btree node key before write: %s", buf.buf); - printbuf_exit(&buf); - if (ret) + int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), + BKEY_TYPE_btree, WRITE); + if (ret) { + bch2_fs_inconsistent(c, "invalid btree node key before write"); return ret; + } ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?: validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error); diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 001107226377..b28c649c6838 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -530,7 +530,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); printbuf_exit(&buf); - BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL)); + BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0)); ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k); if (ret) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index ac0c92683aad..1a1e9d2036da 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -818,50 +818,6 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p); } -static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, - enum bch_validate_flags flags, - struct btree_insert_entry *i, - struct printbuf *err) -{ - struct bch_fs *c = trans->c; - - printbuf_reset(err); - prt_printf(err, "invalid bkey on insert from %s -> %ps\n", - trans->fn, (void *) i->ip_allocated); - printbuf_indent_add(err, 2); - - bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k)); - prt_newline(err); - - bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err); - bch2_print_string_as_lines(KERN_ERR, err->buf); - - bch2_inconsistent_error(c); - bch2_dump_trans_updates(trans); - - return -EINVAL; -} - -static noinline int bch2_trans_commit_journal_entry_invalid(struct btree_trans *trans, - struct jset_entry *i) -{ - struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; - - prt_printf(&buf, "invalid bkey on insert from %s\n", trans->fn); - printbuf_indent_add(&buf, 2); - - bch2_journal_entry_to_text(&buf, c, i); - prt_newline(&buf); - - bch2_print_string_as_lines(KERN_ERR, buf.buf); - - bch2_inconsistent_error(c); - bch2_dump_trans_updates(trans); - - return -EINVAL; -} - static int bch2_trans_commit_journal_pin_flush(struct journal *j, struct journal_entry_pin *_pin, u64 seq) { @@ -1064,20 +1020,19 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out_reset; trans_for_each_update(trans, i) { - struct printbuf buf = PRINTBUF; enum bch_validate_flags invalid_flags = 0; if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), - i->bkey_type, invalid_flags, &buf))) - ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf); - btree_insert_entry_checks(trans, i); - printbuf_exit(&buf); - - if (ret) + ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), + i->bkey_type, invalid_flags); + if (unlikely(ret)){ + bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", + trans->fn, (void *) i->ip_allocated); return ret; + } + btree_insert_entry_checks(trans, i); } for (struct jset_entry *i = trans->journal_entries; @@ -1088,13 +1043,14 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - if (unlikely(bch2_journal_entry_validate(c, NULL, i, - bcachefs_metadata_version_current, - CPU_BIG_ENDIAN, invalid_flags))) - ret = bch2_trans_commit_journal_entry_invalid(trans, i); - - if (ret) + ret = bch2_journal_entry_validate(c, NULL, i, + bcachefs_metadata_version_current, + CPU_BIG_ENDIAN, invalid_flags); + if (unlikely(ret)) { + bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n", + trans->fn); return ret; + } } if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index e61f9695771e..b3454d4619e8 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1364,18 +1364,10 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, if (unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags))) bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p); - if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - btree_node_type(b), WRITE, &buf) ?: - bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) { - printbuf_reset(&buf); - prt_printf(&buf, "inserting invalid bkey\n "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); - prt_printf(&buf, "\n "); - bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - btree_node_type(b), WRITE, &buf); - bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf); - - bch2_fs_inconsistent(c, "%s", buf.buf); + if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), + btree_node_type(b), BCH_VALIDATE_write) ?: + bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), BCH_VALIDATE_write)) { + bch2_fs_inconsistent(c, "%s: inserting invalid bkey", __func__); dump_stack(); } diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 0087b8555ead..6a854c918496 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -250,10 +250,8 @@ restart_drop_extra_replicas: * it's been hard to reproduce, so this should give us some more * information when it does occur: */ - struct printbuf err = PRINTBUF; - int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err); - printbuf_exit(&err); - + int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), + BCH_VALIDATE_commit); if (invalid) { struct printbuf buf = PRINTBUF; diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index d743da89308e..32bfdf19289a 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -100,20 +100,19 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { .is_visible = dirent_is_visible, }; -int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); struct qstr d_name = bch2_dirent_get_name(d); int ret = 0; - bkey_fsck_err_on(!d_name.len, c, err, - dirent_empty_name, + bkey_fsck_err_on(!d_name.len, + c, dirent_empty_name, "empty name"); - bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err, - dirent_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), + c, dirent_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); @@ -121,27 +120,27 @@ int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, * Check new keys don't exceed the max length * (older keys may be larger.) */ - bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, c, err, - dirent_name_too_long, + bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, + c, dirent_name_too_long, "dirent name too big (%u > %u)", d_name.len, BCH_NAME_MAX); - bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err, - dirent_name_embedded_nul, + bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), + c, dirent_name_embedded_nul, "dirent has stray data after name's NUL"); bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) || - (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err, - dirent_name_dot_or_dotdot, + (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), + c, dirent_name_dot_or_dotdot, "invalid name"); - bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err, - dirent_name_has_slash, + bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), + c, dirent_name_has_slash, "name with /"); bkey_fsck_err_on(d.v->d_type != DT_SUBVOL && - le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err, - dirent_to_itself, + le64_to_cpu(d.v->d_inum) == d.k->p.inode, + c, dirent_to_itself, "dirent points to own directory"); fsck_err: return ret; diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 24037e6e0a09..8945145865c5 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -7,12 +7,11 @@ enum bch_validate_flags; extern const struct bch_hash_desc bch2_dirent_hash_desc; -int bch2_dirent_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_dirent ((struct bkey_ops) { \ - .key_invalid = bch2_dirent_invalid, \ + .key_validate = bch2_dirent_validate, \ .val_to_text = bch2_dirent_to_text, \ .min_val_size = 16, \ }) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 03a9de6c2e0a..f059cbffdf23 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -126,9 +126,8 @@ static inline bool is_zero(char *start, char *end) #define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member)) -int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, k.k->p); @@ -144,18 +143,18 @@ int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, break; case BCH_DISK_ACCOUNTING_replicas: bkey_fsck_err_on(!acc_k.replicas.nr_devs, - c, err, accounting_key_replicas_nr_devs_0, + c, accounting_key_replicas_nr_devs_0, "accounting key replicas entry with nr_devs=0"); bkey_fsck_err_on(acc_k.replicas.nr_required > acc_k.replicas.nr_devs || (acc_k.replicas.nr_required > 1 && acc_k.replicas.nr_required == acc_k.replicas.nr_devs), - c, err, accounting_key_replicas_nr_required_bad, + c, accounting_key_replicas_nr_required_bad, "accounting key replicas entry with bad nr_required"); for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++) bkey_fsck_err_on(acc_k.replicas.devs[i] >= acc_k.replicas.devs[i + 1], - c, err, accounting_key_replicas_devs_unsorted, + c, accounting_key_replicas_devs_unsorted, "accounting key replicas entry with unsorted devs"); end = (void *) &acc_k.replicas + replicas_entry_bytes(&acc_k.replicas); @@ -178,7 +177,7 @@ int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, } bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)), - c, err, accounting_key_junk_at_end, + c, accounting_key_junk_at_end, "junk at end of accounting key"); fsck_err: return ret; diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 3d3f25e08b69..b92f8c2e3054 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -82,14 +82,13 @@ int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *, s64 *, unsigned, bool); int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool); -int bch2_accounting_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_accounting_key_to_text(struct printbuf *, struct disk_accounting_pos *); void bch2_accounting_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_accounting_swab(struct bkey_s); #define bch2_bkey_ops_accounting ((struct bkey_ops) { \ - .key_invalid = bch2_accounting_invalid, \ + .key_validate = bch2_accounting_validate, \ .val_to_text = bch2_accounting_to_text, \ .swab = bch2_accounting_swab, \ .min_val_size = 8, \ diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 84f1cbf6497f..141a4c63142f 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -107,24 +107,23 @@ struct ec_bio { /* Stripes btree keys: */ -int bch2_stripe_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; int ret = 0; bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) || - bpos_gt(k.k->p, POS(0, U32_MAX)), c, err, - stripe_pos_bad, + bpos_gt(k.k->p, POS(0, U32_MAX)), + c, stripe_pos_bad, "stripe at bad pos"); - bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), c, err, - stripe_val_size_bad, + bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), + c, stripe_val_size_bad, "incorrect value size (%zu < %u)", bkey_val_u64s(k.k), stripe_val_u64s(s)); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 84a23eeb6249..90962b3c0130 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -8,8 +8,7 @@ enum bch_validate_flags; -int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, @@ -17,7 +16,7 @@ int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_stripe ((struct bkey_ops) { \ - .key_invalid = bch2_stripe_invalid, \ + .key_validate = bch2_stripe_validate, \ .val_to_text = bch2_stripe_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_stripe, \ diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index a268af3e52bf..ab5a7adece10 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -166,6 +166,7 @@ x(0, journal_reclaim_would_deadlock) \ x(EINVAL, fsck) \ x(BCH_ERR_fsck, fsck_fix) \ + x(BCH_ERR_fsck, fsck_delete_bkey) \ x(BCH_ERR_fsck, fsck_ignore) \ x(BCH_ERR_fsck, fsck_errors_not_fixed) \ x(BCH_ERR_fsck, fsck_repair_unimplemented) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index a62b63108820..95afa7bf2020 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -416,6 +416,28 @@ err: return ret; } +int __bch2_bkey_fsck_err(struct bch_fs *c, + struct bkey_s_c k, + enum bch_fsck_flags flags, + enum bch_sb_error_id err, + const char *fmt, ...) +{ + struct printbuf buf = PRINTBUF; + va_list args; + + prt_str(&buf, "invalid bkey "); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, "\n "); + va_start(args, fmt); + prt_vprintf(&buf, fmt, args); + va_end(args); + prt_str(&buf, ": delete?"); + + int ret = __bch2_fsck_err(c, NULL, flags, err, "%s", buf.buf); + printbuf_exit(&buf); + return ret; +} + void bch2_flush_fsck_errs(struct bch_fs *c) { struct fsck_err_state *s, *n; diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 995e6bba9bad..2f1b86978f36 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -4,6 +4,7 @@ #include #include +#include "bkey_types.h" #include "sb-errors.h" struct bch_dev; @@ -166,24 +167,30 @@ void bch2_flush_fsck_errs(struct bch_fs *); #define fsck_err_on(cond, c, _err_type, ...) \ __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -__printf(4, 0) -static inline void bch2_bkey_fsck_err(struct bch_fs *c, - struct printbuf *err_msg, - enum bch_sb_error_id err_type, - const char *fmt, ...) -{ - va_list args; +__printf(5, 6) +int __bch2_bkey_fsck_err(struct bch_fs *, + struct bkey_s_c, + enum bch_fsck_flags, + enum bch_sb_error_id, + const char *, ...); - va_start(args, fmt); - prt_vprintf(err_msg, fmt, args); - va_end(args); -} - -#define bkey_fsck_err(c, _err_msg, _err_type, ...) \ +/* + * for now, bkey fsck errors are always handled by deleting the entire key - + * this will change at some point + */ +#define bkey_fsck_err(c, _err_type, _err_msg, ...) \ do { \ - prt_printf(_err_msg, __VA_ARGS__); \ - bch2_sb_error_count(c, BCH_FSCK_ERR_##_err_type); \ - ret = -BCH_ERR_invalid_bkey; \ + if ((flags & BCH_VALIDATE_silent)) { \ + ret = -BCH_ERR_fsck_delete_bkey; \ + goto fsck_err; \ + } \ + int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX, \ + BCH_FSCK_ERR_##_err_type, \ + _err_msg, ##__VA_ARGS__); \ + if (_ret != -BCH_ERR_fsck_fix && \ + _ret != -BCH_ERR_fsck_ignore) \ + ret = _ret; \ + ret = -BCH_ERR_fsck_delete_bkey; \ goto fsck_err; \ } while (0) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 07973198e35f..4419ad3e454e 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -171,17 +171,16 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err, - btree_ptr_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, + c, btree_ptr_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -192,28 +191,27 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, bch2_bkey_ptrs_to_text(out, c, k); } -int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); int ret = 0; bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, - c, err, btree_ptr_v2_val_too_big, + c, btree_ptr_v2_val_too_big, "value too big (%zu > %zu)", bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p), - c, err, btree_ptr_v2_min_key_bad, + c, btree_ptr_v2_min_key_bad, "min_key > key"); if (flags & BCH_VALIDATE_write) bkey_fsck_err_on(!bp.v->sectors_written, - c, err, btree_ptr_v2_written_0, + c, btree_ptr_v2_written_0, "sectors_written == 0"); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -399,15 +397,14 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reservation_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); int ret = 0; - bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err, - reservation_key_nr_replicas_invalid, + bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, + c, reservation_key_nr_replicas_invalid, "invalid nr_replicas (%u)", r.v->nr_replicas); fsck_err: return ret; @@ -1102,14 +1099,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, } } - -static int extent_ptr_invalid(struct bch_fs *c, - struct bkey_s_c k, - enum bch_validate_flags flags, - const struct bch_extent_ptr *ptr, - unsigned size_ondisk, - bool metadata, - struct printbuf *err) +static int extent_ptr_validate(struct bch_fs *c, + struct bkey_s_c k, + enum bch_validate_flags flags, + const struct bch_extent_ptr *ptr, + unsigned size_ondisk, + bool metadata) { int ret = 0; @@ -1128,28 +1123,27 @@ static int extent_ptr_invalid(struct bch_fs *c, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr2) - bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err, - ptr_to_duplicate_device, + bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, + c, ptr_to_duplicate_device, "multiple pointers to same device (%u)", ptr->dev); - bkey_fsck_err_on(bucket >= nbuckets, c, err, - ptr_after_last_bucket, + bkey_fsck_err_on(bucket >= nbuckets, + c, ptr_after_last_bucket, "pointer past last bucket (%llu > %llu)", bucket, nbuckets); - bkey_fsck_err_on(bucket < first_bucket, c, err, - ptr_before_first_bucket, + bkey_fsck_err_on(bucket < first_bucket, + c, ptr_before_first_bucket, "pointer before first bucket (%llu < %u)", bucket, first_bucket); - bkey_fsck_err_on(bucket_offset + size_ondisk > bucket_size, c, err, - ptr_spans_multiple_buckets, + bkey_fsck_err_on(bucket_offset + size_ondisk > bucket_size, + c, ptr_spans_multiple_buckets, "pointer spans multiple buckets (%u + %u > %u)", bucket_offset, size_ondisk, bucket_size); fsck_err: return ret; } -int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1164,25 +1158,24 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, size_ondisk = btree_sectors(c); bkey_extent_entry_for_each(ptrs, entry) { - bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err, - extent_ptrs_invalid_entry, - "invalid extent entry type (got %u, max %u)", - __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); + bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, + c, extent_ptrs_invalid_entry, + "invalid extent entry type (got %u, max %u)", + __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); bkey_fsck_err_on(bkey_is_btree_ptr(k.k) && - !extent_entry_is_ptr(entry), c, err, - btree_ptr_has_non_ptr, + !extent_entry_is_ptr(entry), + c, btree_ptr_has_non_ptr, "has non ptr field"); switch (extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: - ret = extent_ptr_invalid(c, k, flags, &entry->ptr, - size_ondisk, false, err); + ret = extent_ptr_validate(c, k, flags, &entry->ptr, size_ondisk, false); if (ret) return ret; - bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err, - ptr_cached_and_erasure_coded, + bkey_fsck_err_on(entry->ptr.cached && have_ec, + c, ptr_cached_and_erasure_coded, "cached, erasure coded ptr"); if (!entry->ptr.unwritten) @@ -1199,44 +1192,50 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT_ENTRY_crc128: crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err, - ptr_crc_uncompressed_size_too_small, + bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, + c, ptr_crc_uncompressed_size_too_small, "checksum offset + key size > uncompressed size"); - bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err, - ptr_crc_csum_type_unknown, + bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), + c, ptr_crc_csum_type_unknown, "invalid checksum type"); - bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err, - ptr_crc_compression_type_unknown, + bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, + c, ptr_crc_compression_type_unknown, "invalid compression type"); if (bch2_csum_type_is_encryption(crc.csum_type)) { if (nonce == UINT_MAX) nonce = crc.offset + crc.nonce; else if (nonce != crc.offset + crc.nonce) - bkey_fsck_err(c, err, ptr_crc_nonce_mismatch, + bkey_fsck_err(c, ptr_crc_nonce_mismatch, "incorrect nonce"); } - bkey_fsck_err_on(crc_since_last_ptr, c, err, - ptr_crc_redundant, + bkey_fsck_err_on(crc_since_last_ptr, + c, ptr_crc_redundant, "redundant crc entry"); crc_since_last_ptr = true; bkey_fsck_err_on(crc_is_encoded(crc) && (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && - (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), c, err, - ptr_crc_uncompressed_size_too_big, + (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), + c, ptr_crc_uncompressed_size_too_big, "too large encoded extent"); size_ondisk = crc.compressed_size; break; case BCH_EXTENT_ENTRY_stripe_ptr: - bkey_fsck_err_on(have_ec, c, err, - ptr_stripe_redundant, + bkey_fsck_err_on(have_ec, + c, ptr_stripe_redundant, "redundant stripe entry"); have_ec = true; break; case BCH_EXTENT_ENTRY_rebalance: { + /* + * this shouldn't be a fsck error, for forward + * compatibility; the rebalance code should just refetch + * the compression opt if it's unknown + */ +#if 0 const struct bch_extent_rebalance *r = &entry->rebalance; if (!bch2_compression_opt_valid(r->compression)) { @@ -1245,28 +1244,29 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, opt.type, opt.level); return -BCH_ERR_invalid_bkey; } +#endif break; } } } - bkey_fsck_err_on(!nr_ptrs, c, err, - extent_ptrs_no_ptrs, + bkey_fsck_err_on(!nr_ptrs, + c, extent_ptrs_no_ptrs, "no ptrs"); - bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err, - extent_ptrs_too_many_ptrs, + bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, + c, extent_ptrs_too_many_ptrs, "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX); - bkey_fsck_err_on(have_written && have_unwritten, c, err, - extent_ptrs_written_and_unwritten, + bkey_fsck_err_on(have_written && have_unwritten, + c, extent_ptrs_written_and_unwritten, "extent with unwritten and written ptrs"); - bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err, - extent_ptrs_unwritten, + bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, + c, extent_ptrs_unwritten, "has unwritten ptrs"); - bkey_fsck_err_on(crc_since_last_ptr, c, err, - extent_ptrs_redundant_crc, + bkey_fsck_err_on(crc_since_last_ptr, + c, extent_ptrs_redundant_crc, "redundant crc entry"); - bkey_fsck_err_on(have_ec, c, err, - extent_ptrs_redundant_stripe, + bkey_fsck_err_on(have_ec, + c, extent_ptrs_redundant_stripe, "redundant stripe entry"); fsck_err: return ret; diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index facdb8a86eec..1a6ddee48041 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -409,26 +409,26 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_btree_ptr_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_btree_ptr_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_btree_ptr_v2_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, int, struct bkey_s); #define bch2_bkey_ops_btree_ptr ((struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_invalid, \ + .key_validate = bch2_btree_ptr_validate, \ .val_to_text = bch2_btree_ptr_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_extent, \ }) #define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_v2_invalid, \ + .key_validate = bch2_btree_ptr_v2_validate, \ .val_to_text = bch2_btree_ptr_v2_to_text, \ .swab = bch2_ptr_swab, \ .compat = bch2_btree_ptr_v2_compat, \ @@ -441,7 +441,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_extent ((struct bkey_ops) { \ - .key_invalid = bch2_bkey_ptrs_invalid, \ + .key_validate = bch2_bkey_ptrs_validate, \ .val_to_text = bch2_bkey_ptrs_to_text, \ .swab = bch2_ptr_swab, \ .key_normalize = bch2_extent_normalize, \ @@ -451,13 +451,13 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_reservation_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_reservation ((struct bkey_ops) { \ - .key_invalid = bch2_reservation_invalid, \ + .key_validate = bch2_reservation_validate, \ .val_to_text = bch2_reservation_to_text, \ .key_merge = bch2_reservation_merge, \ .trigger = bch2_trigger_reservation, \ @@ -683,8 +683,8 @@ bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_bkey_ptrs_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_ptr_swab(struct bkey_s); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 1e20020eadd1..2be6be33afa3 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -434,100 +434,98 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) return &inode_p->inode.k_i; } -static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) +static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bch_inode_unpacked unpacked; int ret = 0; - bkey_fsck_err_on(k.k->p.inode, c, err, - inode_pos_inode_nonzero, + bkey_fsck_err_on(k.k->p.inode, + c, inode_pos_inode_nonzero, "nonzero k.p.inode"); - bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err, - inode_pos_blockdev_range, + bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, + c, inode_pos_blockdev_range, "fs inode in blockdev range"); - bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err, - inode_unpack_error, + bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), + c, inode_unpack_error, "invalid variable length fields"); - bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err, - inode_checksum_type_invalid, + bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, + c, inode_checksum_type_invalid, "invalid data checksum type (%u >= %u", unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); bkey_fsck_err_on(unpacked.bi_compression && - !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err, - inode_compression_type_invalid, + !bch2_compression_opt_valid(unpacked.bi_compression - 1), + c, inode_compression_type_invalid, "invalid compression opt %u", unpacked.bi_compression - 1); bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) && - unpacked.bi_nlink != 0, c, err, - inode_unlinked_but_nlink_nonzero, + unpacked.bi_nlink != 0, + c, inode_unlinked_but_nlink_nonzero, "flagged as unlinked but bi_nlink != 0"); - bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err, - inode_subvol_root_but_not_dir, + bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), + c, inode_subvol_root_but_not_dir, "subvolume root but not a directory"); fsck_err: return ret; } -int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); int ret = 0; - bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } -int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); int ret = 0; - bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } -int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); int ret = 0; bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || - INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err, - inode_v3_fields_start_bad, + INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), + c, inode_v3_fields_start_bad, "invalid fields_start (got %llu, min %u max %zu)", INODEv3_FIELDS_START(inode.v), INODEv3_FIELDS_START_INITIAL, bkey_val_u64s(inode.k)); - bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } @@ -625,14 +623,13 @@ int bch2_trigger_inode(struct btree_trans *trans, return 0; } -int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_generation_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(k.k->p.inode, c, err, - inode_pos_inode_nonzero, + bkey_fsck_err_on(k.k->p.inode, + c, inode_pos_inode_nonzero, "nonzero k.p.inode"); fsck_err: return ret; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index da0e4a745099..f1fcb4c58039 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -9,12 +9,12 @@ enum bch_validate_flags; extern const char * const bch2_inode_opts[]; -int bch2_inode_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_inode_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_inode_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); +int bch2_inode_v2_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); +int bch2_inode_v3_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, @@ -22,21 +22,21 @@ int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_inode ((struct bkey_ops) { \ - .key_invalid = bch2_inode_invalid, \ + .key_validate = bch2_inode_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_inode_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_inode_v2_invalid, \ + .key_validate = bch2_inode_v2_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 32, \ }) #define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \ - .key_invalid = bch2_inode_v3_invalid, \ + .key_validate = bch2_inode_v3_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 48, \ @@ -49,12 +49,12 @@ static inline bool bkey_is_inode(const struct bkey *k) k->type == KEY_TYPE_inode_v3; } -int bch2_inode_generation_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_inode_generation_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \ - .key_invalid = bch2_inode_generation_invalid, \ + .key_validate = bch2_inode_generation_validate, \ .val_to_text = bch2_inode_generation_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 7a833a3f1c63..7664b68e6a15 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -332,7 +332,6 @@ static int journal_validate_key(struct bch_fs *c, { int write = flags & BCH_VALIDATE_write; void *next = vstruct_next(entry); - struct printbuf buf = PRINTBUF; int ret = 0; if (journal_entry_err_on(!k->k.u64s, @@ -368,34 +367,21 @@ static int journal_validate_key(struct bch_fs *c, bch2_bkey_compat(level, btree_id, version, big_endian, write, NULL, bkey_to_packed(k)); - if (bch2_bkey_invalid(c, bkey_i_to_s_c(k), - __btree_node_type(level, btree_id), write, &buf)) { - printbuf_reset(&buf); - journal_entry_err_msg(&buf, version, jset, entry); - prt_newline(&buf); - printbuf_indent_add(&buf, 2); - - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); - prt_newline(&buf); - bch2_bkey_invalid(c, bkey_i_to_s_c(k), - __btree_node_type(level, btree_id), write, &buf); - - mustfix_fsck_err(c, journal_entry_bkey_invalid, - "%s", buf.buf); - + ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), + __btree_node_type(level, btree_id), write); + if (ret == -BCH_ERR_fsck_delete_bkey) { le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); journal_entry_null_range(vstruct_next(entry), next); - - printbuf_exit(&buf); return FSCK_DELETED_KEY; } + if (ret) + goto fsck_err; if (write) bch2_bkey_compat(level, btree_id, version, big_endian, write, NULL, bkey_to_packed(k)); fsck_err: - printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 83b1586cb371..96f2f4f8c397 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -10,14 +10,13 @@ #include "recovery.h" /* KEY_TYPE_lru is obsolete: */ -int bch2_lru_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_lru_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(!lru_pos_time(k.k->p), c, err, - lru_entry_at_time_0, + bkey_fsck_err_on(!lru_pos_time(k.k->p), + c, lru_entry_at_time_0, "lru entry at time=0"); fsck_err: return ret; diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index 5bd8974a7f11..e6a7d8241bb8 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -33,14 +33,13 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l) return BCH_LRU_read; } -int bch2_lru_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_lru_pos_to_text(struct printbuf *, struct bpos); #define bch2_bkey_ops_lru ((struct bkey_ops) { \ - .key_invalid = bch2_lru_invalid, \ + .key_validate = bch2_lru_validate, \ .val_to_text = bch2_lru_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index a0cca8b70e0a..c32a05e252e2 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -59,13 +59,13 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { .to_text = bch2_sb_quota_to_text, }; -int bch2_quota_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_quota_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, c, err, - quota_type_invalid, + bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, + c, quota_type_invalid, "invalid quota type (%llu >= %u)", k.k->p.inode, QTYP_NR); fsck_err: diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h index 02d37a332218..a62abcc5332a 100644 --- a/fs/bcachefs/quota.h +++ b/fs/bcachefs/quota.h @@ -8,12 +8,11 @@ enum bch_validate_flags; extern const struct bch_sb_field_ops bch_sb_field_ops_quota; -int bch2_quota_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_quota_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_quota ((struct bkey_ops) { \ - .key_invalid = bch2_quota_invalid, \ + .key_validate = bch2_quota_validate, \ .val_to_text = bch2_quota_to_text, \ .min_val_size = 32, \ }) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 5f92715e1525..e59c0abb4772 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -29,15 +29,14 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k) /* reflink pointers */ -int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reflink_p_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); int ret = 0; bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad), - c, err, reflink_p_front_pad_bad, + c, reflink_p_front_pad_bad, "idx < front_pad (%llu < %u)", le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); fsck_err: @@ -256,11 +255,10 @@ int bch2_trigger_reflink_p(struct btree_trans *trans, /* indirect extents */ -int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { - return bch2_bkey_ptrs_invalid(c, k, flags, err); + return bch2_bkey_ptrs_validate(c, k, flags); } void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, @@ -311,9 +309,8 @@ int bch2_trigger_reflink_v(struct btree_trans *trans, /* indirect inline data */ -int bch2_indirect_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index e894f3a2c67a..51afe11d8ed6 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -4,41 +4,37 @@ enum bch_validate_flags; -int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); +int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \ - .key_invalid = bch2_reflink_p_invalid, \ + .key_validate = bch2_reflink_p_validate, \ .val_to_text = bch2_reflink_p_to_text, \ .key_merge = bch2_reflink_p_merge, \ .trigger = bch2_trigger_reflink_p, \ .min_val_size = 16, \ }) -int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); +int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \ - .key_invalid = bch2_reflink_v_invalid, \ + .key_validate = bch2_reflink_v_validate, \ .val_to_text = bch2_reflink_v_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_reflink_v, \ .min_val_size = 8, \ }) -int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_indirect_inline_data_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_indirect_inline_data_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_indirect_inline_data(struct btree_trans *, @@ -47,7 +43,7 @@ int bch2_trigger_indirect_inline_data(struct btree_trans *, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \ - .key_invalid = bch2_indirect_inline_data_invalid, \ + .key_validate = bch2_indirect_inline_data_validate, \ .val_to_text = bch2_indirect_inline_data_to_text, \ .trigger = bch2_trigger_indirect_inline_data, \ .min_val_size = 8, \ diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 96744b1a76f5..8b18a9b483a4 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -31,15 +31,14 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(t.v->root_snapshot)); } -int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1)), c, err, - snapshot_tree_pos_bad, + bkey_lt(k.k->p, POS(0, 1)), + c, snapshot_tree_pos_bad, "bad pos"); fsck_err: return ret; @@ -225,55 +224,54 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(s.v->skip[2])); } -int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_snapshot s; u32 i, id; int ret = 0; bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1)), c, err, - snapshot_pos_bad, + bkey_lt(k.k->p, POS(0, 1)), + c, snapshot_pos_bad, "bad pos"); s = bkey_s_c_to_snapshot(k); id = le32_to_cpu(s.v->parent); - bkey_fsck_err_on(id && id <= k.k->p.offset, c, err, - snapshot_parent_bad, + bkey_fsck_err_on(id && id <= k.k->p.offset, + c, snapshot_parent_bad, "bad parent node (%u <= %llu)", id, k.k->p.offset); - bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err, - snapshot_children_not_normalized, + bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), + c, snapshot_children_not_normalized, "children not normalized"); - bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err, - snapshot_child_duplicate, + bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], + c, snapshot_child_duplicate, "duplicate child nodes"); for (i = 0; i < 2; i++) { id = le32_to_cpu(s.v->children[i]); - bkey_fsck_err_on(id >= k.k->p.offset, c, err, - snapshot_child_bad, + bkey_fsck_err_on(id >= k.k->p.offset, + c, snapshot_child_bad, "bad child node (%u >= %llu)", id, k.k->p.offset); } if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || - le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err, - snapshot_skiplist_not_normalized, + le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), + c, snapshot_skiplist_not_normalized, "skiplist not normalized"); for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { id = le32_to_cpu(s.v->skip[i]); - bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err, - snapshot_skiplist_bad, + bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), + c, snapshot_skiplist_bad, "bad skiplist node %u", id); } } diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 31b0ee03e962..eb5ef64221d6 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -5,11 +5,11 @@ enum bch_validate_flags; void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_tree_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_snapshot_tree_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); #define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_tree_invalid, \ + .key_validate = bch2_snapshot_tree_validate, \ .val_to_text = bch2_snapshot_tree_to_text, \ .min_val_size = 8, \ }) @@ -19,14 +19,13 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *); int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_snapshot ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_invalid, \ + .key_validate = bch2_snapshot_validate, \ .val_to_text = bch2_snapshot_to_text, \ .trigger = bch2_mark_snapshot, \ .min_val_size = 24, \ diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index f56720b55862..dbe834cb349f 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -207,23 +207,23 @@ int bch2_check_subvol_children(struct bch_fs *c) /* Subvolumes: */ -int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_subvolume_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_subvolume subvol = bkey_s_c_to_subvolume(k); int ret = 0; bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || - bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, - subvol_pos_bad, + bkey_gt(k.k->p, SUBVOL_POS_MAX), + c, subvol_pos_bad, "invalid pos"); - bkey_fsck_err_on(!subvol.v->snapshot, c, err, - subvol_snapshot_bad, + bkey_fsck_err_on(!subvol.v->snapshot, + c, subvol_snapshot_bad, "invalid snapshot"); - bkey_fsck_err_on(!subvol.v->inode, c, err, - subvol_inode_bad, + bkey_fsck_err_on(!subvol.v->inode, + c, subvol_inode_bad, "invalid inode"); fsck_err: return ret; diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index afa5e871efb2..a8299ba2cab2 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -10,15 +10,14 @@ enum bch_validate_flags; int bch2_check_subvols(struct bch_fs *); int bch2_check_subvol_children(struct bch_fs *); -int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_subvolume ((struct bkey_ops) { \ - .key_invalid = bch2_subvolume_invalid, \ + .key_validate = bch2_subvolume_validate, \ .val_to_text = bch2_subvolume_to_text, \ .trigger = bch2_subvolume_trigger, \ .min_val_size = 16, \ diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index c11bf6dacc2c..f2b4c17a0307 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -70,17 +70,16 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { .cmp_bkey = xattr_cmp_bkey, }; -int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_xattr_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len)); int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, c, err, - xattr_val_size_too_small, + bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, + c, xattr_val_size_too_small, "value too small (%zu < %u)", bkey_val_u64s(k.k), val_u64s); @@ -88,17 +87,17 @@ int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, val_u64s = xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len) + 4); - bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, c, err, - xattr_val_size_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, + c, xattr_val_size_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), val_u64s); - bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), c, err, - xattr_invalid_type, + bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), + c, xattr_invalid_type, "invalid type (%u)", xattr.v->x_type); - bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), c, err, - xattr_name_invalid_chars, + bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), + c, xattr_name_invalid_chars, "xattr name has invalid characters"); fsck_err: return ret; diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index 1574b9eb4c85..c188a5ad64ce 100644 --- a/fs/bcachefs/xattr.h +++ b/fs/bcachefs/xattr.h @@ -6,12 +6,11 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc; -int bch2_xattr_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_xattr ((struct bkey_ops) { \ - .key_invalid = bch2_xattr_invalid, \ + .key_validate = bch2_xattr_validate, \ .val_to_text = bch2_xattr_to_text, \ .min_val_size = 8, \ }) -- cgit From 5132b99bb62664c02bd6c0dd62ad3fedc75294d8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 02:35:10 -0400 Subject: bcachefs: Kill __bch2_accounting_mem_mod() The next patch will be adding a disk accounting counter type which is not kept in the in-memory eytzinger tree. As prep, fold __bch2_accounting_mem_mod() into bch2_accounting_mem_mod_locked() so that we can check for that counter type and bail out without calling bpos_to_disk_accounting_pos() twice. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 4 +-- fs/bcachefs/disk_accounting.c | 4 +-- fs/bcachefs/disk_accounting.h | 54 ++++++++++++++++++---------------------- 3 files changed, 28 insertions(+), 34 deletions(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 1a1e9d2036da..a0101d9c5d83 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -712,7 +712,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, a->k.version = journal_pos_to_bversion(&trans->journal_res, (u64 *) entry - (u64 *) trans->journal_entries); BUG_ON(bversion_zero(a->k.version)); - ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false); + ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false, false); if (ret) goto revert_fs_usage; } @@ -798,7 +798,7 @@ revert_fs_usage: struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); bch2_accounting_neg(a); - bch2_accounting_mem_mod_locked(trans, a.c, false); + bch2_accounting_mem_mod_locked(trans, a.c, false, false); bch2_accounting_neg(a); } percpu_up_read(&c->mark_lock); diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index f059cbffdf23..e57b40623cd9 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -566,7 +566,7 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i; accounting_key_init(&k_i.k, &acc_k, src_v, nr); - bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false); + bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false, false); preempt_disable(); struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); @@ -595,7 +595,7 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) return 0; percpu_down_read(&c->mark_lock); - int ret = __bch2_accounting_mem_mod(c, bkey_s_c_to_accounting(k), false); + int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), false, true); percpu_up_read(&c->mark_lock); if (bch2_accounting_key_is_zero(bkey_s_c_to_accounting(k)) && diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index b92f8c2e3054..653090667aaa 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -106,41 +106,17 @@ static inline int accounting_pos_cmp(const void *_l, const void *_r) int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, bool); void bch2_accounting_mem_gc(struct bch_fs *); -static inline int __bch2_accounting_mem_mod(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) -{ - struct bch_accounting_mem *acc = &c->accounting; - unsigned idx; - - EBUG_ON(gc && !acc->gc_running); - - while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { - int ret = bch2_accounting_mem_insert(c, a, gc); - if (ret) - return ret; - } - - struct accounting_mem_entry *e = &acc->k.data[idx]; - - EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters); - - for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++) - this_cpu_add(e->v[gc][i], a.v->d[i]); - return 0; -} - /* * Update in memory counters so they match the btree update we're doing; called * from transaction commit path */ -static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) +static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc, bool read) { struct bch_fs *c = trans->c; + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, a.k->p); - if (!gc) { - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); - + if (!gc && !read) { switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; @@ -161,13 +137,31 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru } } - return __bch2_accounting_mem_mod(c, a, gc); + struct bch_accounting_mem *acc = &c->accounting; + unsigned idx; + + EBUG_ON(gc && !acc->gc_running); + + while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { + int ret = bch2_accounting_mem_insert(c, a, gc); + if (ret) + return ret; + } + + struct accounting_mem_entry *e = &acc->k.data[idx]; + + EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters); + + for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++) + this_cpu_add(e->v[gc][i], a.v->d[i]); + return 0; } static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) { percpu_down_read(&trans->c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, a, gc); + int ret = bch2_accounting_mem_mod_locked(trans, a, gc, false); percpu_up_read(&trans->c->mark_lock); return ret; } -- cgit From 58474f76a770bcc79d4b2d7232e4d6650e732b50 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Aug 2024 02:27:36 -0400 Subject: bcachefs: bcachefs_metadata_version_disk_accounting_inum This adds another disk accounting counter to track usage per inode number (any snapshot ID). This will be used for a couple things: - It'll give us a way to tell the user how much space a given file ista consuming in all snapshots; i.e. how much extra space it's consuming due to snapshot versioning. - It counts number of extents and total size of extents (both in btree keyspace sectors and actual disk usage), meaning it gives us average extent size: that is, it'll let us cheaply find fragmented files that should be defragmented. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 ++- fs/bcachefs/buckets.c | 14 ++++++++++++++ fs/bcachefs/disk_accounting.c | 3 +++ fs/bcachefs/disk_accounting.h | 3 +++ fs/bcachefs/disk_accounting_format.h | 8 +++++++- fs/bcachefs/sb-downgrade.c | 5 ++++- 6 files changed, 33 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index b25f86356728..c75f2e0f32bb 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -676,7 +676,8 @@ struct bch_sb_field_ext { x(mi_btree_bitmap, BCH_VERSION(1, 7)) \ x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ x(disk_accounting_v2, BCH_VERSION(1, 9)) \ - x(disk_accounting_v3, BCH_VERSION(1, 10)) + x(disk_accounting_v3, BCH_VERSION(1, 10)) \ + x(disk_accounting_inum, BCH_VERSION(1, 11)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 9f7004e941ce..b69ef4b3de6e 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -810,6 +810,20 @@ static int __trigger_extent(struct btree_trans *trans, ret = bch2_disk_accounting_mod(trans, &acc_btree_key, &replicas_sectors, 1, gc); if (ret) return ret; + } else { + bool insert = !(flags & BTREE_TRIGGER_overwrite); + struct disk_accounting_pos acc_inum_key = { + .type = BCH_DISK_ACCOUNTING_inum, + .inum.inum = k.k->p.inode, + }; + s64 v[3] = { + insert ? 1 : -1, + insert ? k.k->size : -((s64) k.k->size), + replicas_sectors, + }; + ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc); + if (ret) + return ret; } if (bch2_bkey_rebalance_opts(k)) { diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index e57b40623cd9..e972e2bca546 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -768,6 +768,9 @@ void bch2_verify_accounting_clean(struct bch_fs *c) if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) continue; + if (acc_k.type == BCH_DISK_ACCOUNTING_inum) + continue; + bch2_accounting_mem_read(c, k.k->p, v, nr); if (memcmp(a.v->d, v, nr * sizeof(u64))) { diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 653090667aaa..f29fd0dd9581 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -116,6 +116,9 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, a.k->p); + if (acc_k.type == BCH_DISK_ACCOUNTING_inum) + return 0; + if (!gc && !read) { switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h index a93cf26ff4a9..7b6e6c97e6aa 100644 --- a/fs/bcachefs/disk_accounting_format.h +++ b/fs/bcachefs/disk_accounting_format.h @@ -103,7 +103,8 @@ static inline bool data_type_is_hidden(enum bch_data_type type) x(compression, 4) \ x(snapshot, 5) \ x(btree, 6) \ - x(rebalance_work, 7) + x(rebalance_work, 7) \ + x(inum, 8) enum disk_accounting_type { #define x(f, nr) BCH_DISK_ACCOUNTING_##f = nr, @@ -136,6 +137,10 @@ struct bch_acct_btree { __u32 id; } __packed; +struct bch_acct_inum { + __u64 inum; +} __packed; + struct bch_acct_rebalance_work { }; @@ -152,6 +157,7 @@ struct disk_accounting_pos { struct bch_acct_snapshot snapshot; struct bch_acct_btree btree; struct bch_acct_rebalance_work rebalance_work; + struct bch_acct_inum inum; } __packed; } __packed; struct bpos _pad; diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 9f82d497d9e0..650a1f77ca40 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -72,7 +72,10 @@ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \ BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ - BCH_FSCK_ERR_accounting_key_junk_at_end) + BCH_FSCK_ERR_accounting_key_junk_at_end) \ + x(disk_accounting_inum, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_accounting_mismatch) #define DOWNGRADE_TABLE() \ x(bucket_stripe_sectors, \ -- cgit From 73c34b0b85d46bf9c2c0b367aeaffa1e2481b136 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 29 Jul 2024 13:44:33 -0700 Subject: xfs: attr forks require attr, not attr2 It turns out that I misunderstood the difference between the attr and attr2 feature bits. "attr" means that at some point an attr fork was created somewhere in the filesystem. "attr2" means that inodes have variable-sized forks, but says nothing about whether or not there actually /are/ attr forks in the system. If we have an attr fork, we only need to check that attr is set. Fixes: 99d9d8d05da26 ("xfs: scrub inode block mappings") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/scrub/bmap.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 24a15bf784f1..5ab2ac53c920 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -938,7 +938,13 @@ xchk_bmap( } break; case XFS_ATTR_FORK: - if (!xfs_has_attr(mp) && !xfs_has_attr2(mp)) + /* + * "attr" means that an attr fork was created at some point in + * the life of this filesystem. "attr2" means that inodes have + * variable-sized data/attr fork areas. Hence we only check + * attr here. + */ + if (!xfs_has_attr(mp)) xchk_ino_set_corrupt(sc, sc->ip->i_ino); break; default: -- cgit From 04d6dbb55301a29aeb9a197e6b0012cdc265f1e4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 4 Aug 2024 14:39:34 -0700 Subject: xfs: revert AIL TASK_KILLABLE threshold In commit 9adf40249e6c, we changed the behavior of the AIL thread to set its own task state to KILLABLE whenever the timeout value is nonzero. Unfortunately, this missed the fact that xfsaild_push will return 50ms (aka a longish sleep) when we reach the push target or the AIL becomes empty, so xfsaild goes to sleep for a long period of time in uninterruptible D state. This results in artificially high load averages because KILLABLE processes are UNINTERRUPTIBLE, which contributes to load average even though the AIL is asleep waiting for someone to interrupt it. It's not blocked on IOs or anything, but people scrap ps for processes that look like they're stuck in D state, so restore the previous threshold. Fixes: 9adf40249e6c ("xfs: AIL doesn't need manual pushing") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/xfs_trans_ail.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 0fafcc9f3dbe..8ede9d099d1f 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -644,7 +644,12 @@ xfsaild( set_freezable(); while (1) { - if (tout) + /* + * Long waits of 50ms or more occur when we've run out of items + * to push, so we only want uninterruptible state if we're + * actually blocked on something. + */ + if (tout && tout <= 20) set_current_state(TASK_KILLABLE|TASK_FREEZABLE); else set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); -- cgit From 8d16762047c627073955b7ed171a36addaf7b1ff Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 4 Aug 2024 14:39:57 -0700 Subject: xfs: conditionally allow FS_XFLAG_REALTIME changes if S_DAX is set If a file has the S_DAX flag (aka fsdax access mode) set, we cannot allow users to change the realtime flag unless the datadev and rtdev both support fsdax access modes. Even if there are no extents allocated to the file, the setattr thread could be racing with another thread that has already started down the write code paths. Fixes: ba23cba9b3bdc ("fs: allow per-device dax status checking for filesystems") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/xfs_ioctl.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 4e933db75b12..6b13666d4e96 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -483,6 +483,17 @@ xfs_ioctl_setattr_xflags( /* Can't change realtime flag if any extents are allocated. */ if (ip->i_df.if_nextents || ip->i_delayed_blks) return -EINVAL; + + /* + * If S_DAX is enabled on this file, we can only switch the + * device if both support fsdax. We can't update S_DAX because + * there might be other threads walking down the access paths. + */ + if (IS_DAX(VFS_I(ip)) && + (mp->m_ddev_targp->bt_daxdev == NULL || + (mp->m_rtdev_targp && + mp->m_rtdev_targp->bt_daxdev == NULL))) + return -EINVAL; } if (rtflag) { -- cgit From f94511df53bb792e505c98662971434c7995388a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 7 Aug 2024 11:37:31 +0100 Subject: arm64: uaccess: correct thinko in __get_mem_asm() In the CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y version of __get_mem_asm(), we incorrectly use _ASM_EXTABLE_##type##ACCESS_ERR() such that upon a fault the extable fixup handler writes -EFAULT into "%w0", which is the register containing 'x' (the result of the load). This was a thinko in commit: 86a6a68febfcf57b ("arm64: start using 'asm goto' for get_user() when available") Prior to that commit _ASM_EXTABLE_##type##ACCESS_ERR_ZERO() was used such that the extable fixup handler wrote -EFAULT into "%w0" (the register containing 'err'), and zero into "%w1" (the register containing 'x'). When the 'err' variable was removed, the extable entry was updated incorrectly. Writing -EFAULT to the value register is unnecessary but benign: * We never want -EFAULT in the value register, and previously this would have been zeroed in the extable fixup handler. * In __get_user_error() the value is overwritten with zero explicitly in the error path. * The asm goto outputs cannot be used when the goto label is taken, as older compilers (e.g. clang < 16.0.0) do not guarantee that asm goto outputs are usable in this path and may use a stale value rather than the value in an output register. Consequently, zeroing in the extable fixup handler is insufficient to ensure callers see zero in the error path. * The expected usage of unsafe_get_user() and get_kernel_nofault() requires that the value is not consumed in the error path. Some versions of GCC would mis-compile asm goto with outputs, and erroneously omit subsequent assignments, breaking the error path handling in __get_user_error(). This was discussed at: https://lore.kernel.org/lkml/ZpfxLrJAOF2YNqCk@J2N7QTR9R3.cambridge.arm.com/ ... and was fixed by removing support for asm goto with outputs on those broken compilers in commit: f2f6a8e887172503 ("init/Kconfig: remove CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND") With that out of the way, we can safely replace the usage of _ASM_EXTABLE_##type##ACCESS_ERR() with _ASM_EXTABLE_##type##ACCESS(), leaving the value register unchanged in the case a fault is taken, as was originally intended. This matches other architectures and matches our __put_mem_asm(). Signed-off-by: Mark Rutland Cc: Will Deacon Link: https://lore.kernel.org/r/20240807103731.2498893-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 28f665e0975a..1aa4ecb73429 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -188,7 +188,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) #define __get_mem_asm(load, reg, x, addr, label, type) \ asm_goto_output( \ "1: " load " " reg "0, [%1]\n" \ - _ASM_EXTABLE_##type##ACCESS_ERR(1b, %l2, %w0) \ + _ASM_EXTABLE_##type##ACCESS(1b, %l2) \ : "=r" (x) \ : "r" (addr) : : label) #else -- cgit From a21dcf0ea8566ebbe011c79d6ed08cdfea771de3 Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Mon, 5 Aug 2024 11:30:24 +0800 Subject: arm64: ACPI: NUMA: initialize all values of acpi_early_node_map to NUMA_NO_NODE Currently, only acpi_early_node_map[0] was initialized to NUMA_NO_NODE. To ensure all the values were properly initialized, switch to initialize all of them to NUMA_NO_NODE. Fixes: e18962491696 ("arm64: numa: rework ACPI NUMA initialization") Cc: # 4.19.x Reported-by: Andrew Jones Suggested-by: Andrew Jones Signed-off-by: Haibo Xu Reviewed-by: Anshuman Khandual Reviewed-by: Sunil V L Reviewed-by: Andrew Jones Acked-by: Catalin Marinas Acked-by: Lorenzo Pieralisi Reviewed-by: Hanjun Guo Link: https://lore.kernel.org/r/853d7f74aa243f6f5999e203246f0d1ae92d2b61.1722828421.git.haibo1.xu@intel.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/acpi_numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index 0c036a9a3c33..2465f291c7e1 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -27,7 +27,7 @@ #include -static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; int __init acpi_numa_get_nid(unsigned int cpu) { -- cgit From 2251db28edcc70b7ee8a8c6bcbaecf752b3ea5ec Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 6 Aug 2024 13:49:13 +0200 Subject: ASoC: codecs: wcd937x: Fix missing de-assert of reset GPIO The device never comes online from a reset/shutdown state, because the driver de-asserts reset GPIO when requesting it but then, at the end of probe() through wcd937x_reset(), leaves it asserted. Cc: stable@vger.kernel.org Fixes: 9be3ec196da4 ("ASoC: codecs: wcd937x: add wcd937x codec driver") Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240806114913.40022-1-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd937x.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wcd937x.c b/sound/soc/codecs/wcd937x.c index 13926f4b0d9f..af296b77a723 100644 --- a/sound/soc/codecs/wcd937x.c +++ b/sound/soc/codecs/wcd937x.c @@ -242,10 +242,9 @@ static const struct regmap_irq_chip wcd937x_regmap_irq_chip = { static void wcd937x_reset(struct wcd937x_priv *wcd937x) { - usleep_range(20, 30); - gpiod_set_value(wcd937x->reset_gpio, 1); - + usleep_range(20, 30); + gpiod_set_value(wcd937x->reset_gpio, 0); usleep_range(20, 30); } -- cgit From 57d5af2660e9443b081eeaf1c373b3ce48477828 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Wed, 14 Aug 2024 20:42:37 +0530 Subject: spi: spi-cadence-quadspi: Fix OSPI NOR failures during system resume Its necessary to call pm_runtime_force_*() hooks as part of system suspend/resume calls so that the runtime_pm hooks get called. This ensures latest state of the IP is cached and restored during system sleep. This is especially true if runtime autosuspend is enabled as runtime suspend hooks may not be called at all before system sleeps. Without this patch, OSPI NOR enumeration (READ_ID) fails during resume as context saved during suspend path is inconsistent. Fixes: 078d62de433b ("spi: cadence-qspi: add system-wide suspend and resume callbacks") Signed-off-by: Vignesh Raghavendra Link: https://patch.msgid.link/20240814151237.3856184-1-vigneshr@ti.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 05ebb03d319f..d4607cb89c48 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -2000,13 +2000,25 @@ static int cqspi_runtime_resume(struct device *dev) static int cqspi_suspend(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); + int ret; - return spi_controller_suspend(cqspi->host); + ret = spi_controller_suspend(cqspi->host); + if (ret) + return ret; + + return pm_runtime_force_suspend(dev); } static int cqspi_resume(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); + int ret; + + ret = pm_runtime_force_resume(dev); + if (ret) { + dev_err(dev, "pm_runtime_force_resume failed on resume\n"); + return ret; + } return spi_controller_resume(cqspi->host); } -- cgit From 71833e79a42178d8a50b5081c98c78ace9325628 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 14 Aug 2024 13:16:49 +0100 Subject: i2c: Use IS_REACHABLE() for substituting empty ACPI functions Replace IS_ENABLED() with IS_REACHABLE() to substitute empty stubs for: i2c_acpi_get_i2c_resource() i2c_acpi_client_count() i2c_acpi_find_bus_speed() i2c_acpi_new_device_by_fwnode() i2c_adapter *i2c_acpi_find_adapter_by_handle() i2c_acpi_waive_d0_probe() commit f17c06c6608a ("i2c: Fix conditional for substituting empty ACPI functions") partially fixed this conditional to depend on CONFIG_I2C, but used IS_ENABLED(), which is wrong since CONFIG_I2C is tristate. CONFIG_ACPI is boolean but let's also change it to use IS_REACHABLE() to future-proof it against becoming tristate. Somehow despite testing various combinations of CONFIG_I2C and CONFIG_ACPI we missed the combination CONFIG_I2C=m, CONFIG_ACPI=y. Signed-off-by: Richard Fitzgerald Fixes: f17c06c6608a ("i2c: Fix conditional for substituting empty ACPI functions") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408141333.gYnaitcV-lkp@intel.com/ Reviewed-by: Takashi Iwai Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 7eedd0c662da..377def497298 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -1066,7 +1066,7 @@ static inline int of_i2c_get_board_info(struct device *dev, struct acpi_resource; struct acpi_resource_i2c_serialbus; -#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_I2C) +#if IS_REACHABLE(CONFIG_ACPI) && IS_REACHABLE(CONFIG_I2C) bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c); int i2c_acpi_client_count(struct acpi_device *adev); -- cgit From 2848ff28d180bd63a95da8e5dcbcdd76c1beeb7b Mon Sep 17 00:00:00 2001 From: Mitchell Levy Date: Mon, 12 Aug 2024 13:44:12 -0700 Subject: x86/fpu: Avoid writing LBR bit to IA32_XSS unless supported There are two distinct CPU features related to the use of XSAVES and LBR: whether LBR is itself supported and whether XSAVES supports LBR. The LBR subsystem correctly checks both in intel_pmu_arch_lbr_init(), but the XSTATE subsystem does not. The LBR bit is only removed from xfeatures_mask_independent when LBR is not supported by the CPU, but there is no validation of XSTATE support. If XSAVES does not support LBR the write to IA32_XSS causes a #GP fault, leaving the state of IA32_XSS unchanged, i.e. zero. The fault is handled with a warning and the boot continues. Consequently the next XRSTORS which tries to restore supervisor state fails with #GP because the RFBM has zero for all supervisor features, which does not match the XCOMP_BV field. As XFEATURE_MASK_FPSTATE includes supervisor features setting up the FPU causes a #GP, which ends up in fpu_reset_from_exception_fixup(). That fails due to the same problem resulting in recursive #GPs until the kernel runs out of stack space and double faults. Prevent this by storing the supported independent features in fpu_kernel_cfg during XSTATE initialization and use that cached value for retrieving the independent feature bits to be written into IA32_XSS. [ tglx: Massaged change log ] Fixes: f0dccc9da4c0 ("x86/fpu/xstate: Support dynamic supervisor feature for LBR") Suggested-by: Thomas Gleixner Signed-off-by: Mitchell Levy Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240812-xsave-lbr-fix-v3-1-95bac1bf62f4@gmail.com --- arch/x86/include/asm/fpu/types.h | 7 +++++++ arch/x86/kernel/fpu/xstate.c | 3 +++ arch/x86/kernel/fpu/xstate.h | 4 ++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index eb17f31b06d2..de16862bf230 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -591,6 +591,13 @@ struct fpu_state_config { * even without XSAVE support, i.e. legacy features FP + SSE */ u64 legacy_features; + /* + * @independent_features: + * + * Features that are supported by XSAVES, but not managed as part of + * the FPU core, such as LBR + */ + u64 independent_features; }; /* FPU state configuration information */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index c5a026fee5e0..1339f8328db5 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -788,6 +788,9 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) goto out_disable; } + fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features & + XFEATURE_MASK_INDEPENDENT; + /* * Clear XSAVE features that are disabled in the normal CPUID. */ diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 2ee0b9c53dcc..afb404cd2059 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -62,9 +62,9 @@ static inline u64 xfeatures_mask_supervisor(void) static inline u64 xfeatures_mask_independent(void) { if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) - return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR; + return fpu_kernel_cfg.independent_features & ~XFEATURE_MASK_LBR; - return XFEATURE_MASK_INDEPENDENT; + return fpu_kernel_cfg.independent_features; } /* XSAVE/XRSTOR wrapper functions */ -- cgit From ff9f065318e17a1a97981d9e535fcfc6ce5d5614 Mon Sep 17 00:00:00 2001 From: YR Yang Date: Thu, 1 Aug 2024 16:43:26 +0800 Subject: ASoC: mediatek: mt8188: Mark AFE_DAC_CON0 register as volatile Add AFE Control Register 0 to the volatile_register. AFE_DAC_CON0 can be modified by both the SOF and ALSA drivers. If this register is read and written in cache mode, the cached value might not reflect the actual value when the register is modified by another driver. It can cause playback or capture failures. Therefore, it is necessary to add AFE_DAC_CON0 to the list of volatile registers. Signed-off-by: YR Yang Reviewed-by: Fei Shao Reviewed-by: Trevor Wu Link: https://patch.msgid.link/20240801084326.1472-1-yr.yang@mediatek.com Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8188/mt8188-afe-pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/mediatek/mt8188/mt8188-afe-pcm.c b/sound/soc/mediatek/mt8188/mt8188-afe-pcm.c index ccb6c1f3adc7..73e5c63aeec8 100644 --- a/sound/soc/mediatek/mt8188/mt8188-afe-pcm.c +++ b/sound/soc/mediatek/mt8188/mt8188-afe-pcm.c @@ -2748,6 +2748,7 @@ static bool mt8188_is_volatile_reg(struct device *dev, unsigned int reg) case AFE_ASRC12_NEW_CON9: case AFE_LRCK_CNT: case AFE_DAC_MON0: + case AFE_DAC_CON0: case AFE_DL2_CUR: case AFE_DL3_CUR: case AFE_DL6_CUR: -- cgit From 14d069d92951a3e150c0a81f2ca3b93e54da913b Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 13 Aug 2024 09:12:53 -0700 Subject: i2c: tegra: Do not mark ACPI devices as irq safe On ACPI machines, the tegra i2c module encounters an issue due to a mutex being called inside a spinlock. This leads to the following bug: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:585 ... Call trace: __might_sleep __mutex_lock_common mutex_lock_nested acpi_subsys_runtime_resume rpm_resume tegra_i2c_xfer The problem arises because during __pm_runtime_resume(), the spinlock &dev->power.lock is acquired before rpm_resume() is called. Later, rpm_resume() invokes acpi_subsys_runtime_resume(), which relies on mutexes, triggering the error. To address this issue, devices on ACPI are now marked as not IRQ-safe, considering the dependency of acpi_subsys_runtime_resume() on mutexes. Fixes: bd2fdedbf2ba ("i2c: tegra: Add the ACPI support") Cc: # v5.17+ Co-developed-by: Michael van der Westhuizen Signed-off-by: Michael van der Westhuizen Signed-off-by: Breno Leitao Reviewed-by: Dmitry Osipenko Reviewed-by: Andy Shevchenko Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-tegra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 85b31edc558d..1df5b4204142 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1802,9 +1802,9 @@ static int tegra_i2c_probe(struct platform_device *pdev) * domain. * * VI I2C device shouldn't be marked as IRQ-safe because VI I2C won't - * be used for atomic transfers. + * be used for atomic transfers. ACPI device is not IRQ safe also. */ - if (!IS_VI(i2c_dev)) + if (!IS_VI(i2c_dev) && !has_acpi_companion(i2c_dev->dev)) pm_runtime_irq_safe(i2c_dev->dev); pm_runtime_enable(i2c_dev->dev); -- cgit From 1f7574a1f9a892dd79e0dfc03f38573e9c399ec2 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 15 Jul 2024 22:17:44 +0300 Subject: arm64: dts: qcom: disable GPU on x1e80100 by default The GPU on X1E80100 requires ZAP 'shader' file to be useful. Since the file is signed by the OEM keys and might be not available by default, disable the GPU node and drop the firmware name from the x1e80100.dtsi file. Devices not being fused to use OEM keys can specify generic location at `qcom/x1e80100/gen70500_zap.mbn` while enabling the GPU. The CRD and QCP were lucky enough to work with the default settings, so reenable the GPU on those platforms and provide correct firmware-name (including the SoC subdir). Fixes: 721e38301b79 ("arm64: dts: qcom: x1e80100: Add gpu support") Cc: Akhil P Oommen Reviewed-by: Konrad Dybcio Signed-off-by: Dmitry Baryshkov Reviewed-by: Caleb Connolly Reviewed-by: Akhil P Oommen Link: https://lore.kernel.org/r/20240715-x1e8-zap-name-v3-1-e7a5258c3c2e@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100-crd.dts | 8 ++++++++ arch/arm64/boot/dts/qcom/x1e80100-qcp.dts | 8 ++++++++ arch/arm64/boot/dts/qcom/x1e80100.dtsi | 3 ++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts index 82f34dfe4090..e17ab8251e2a 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts @@ -636,6 +636,14 @@ }; }; +&gpu { + status = "okay"; + + zap-shader { + firmware-name = "qcom/x1e80100/gen70500_zap.mbn"; + }; +}; + &i2c0 { clock-frequency = <400000>; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts index 2dcf2a17511d..8098e6730ae5 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts @@ -606,6 +606,14 @@ }; }; +&gpu { + status = "okay"; + + zap-shader { + firmware-name = "qcom/x1e80100/gen70500_zap.mbn"; + }; +}; + &lpass_tlmm { spkr_01_sd_n_active: spkr-01-sd-n-active-state { pins = "gpio12"; diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index c13811a4ef90..29cb3dddbb6f 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -3167,9 +3167,10 @@ interconnects = <&gem_noc MASTER_GFX3D 0 &mc_virt SLAVE_EBI1 0>; interconnect-names = "gfx-mem"; + status = "disabled"; + zap-shader { memory-region = <&gpu_microcode_mem>; - firmware-name = "qcom/gen70500_zap.mbn"; }; gpu_opp_table: opp-table { -- cgit From dfbe93f32c12f5628bd83303e10ba63621c259ae Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 16 Jul 2024 12:35:03 +0200 Subject: arm64: dts: qcom: x1e80100: Fix Adreno SMMU global interrupt Fix the unfortunate off-by-one. Fixes: 721e38301b79 ("arm64: dts: qcom: x1e80100: Add gpu support") Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240716-topic-h_bits-v1-1-f6c5d3ff982c@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 29cb3dddbb6f..cd732ef88cd8 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -3301,7 +3301,7 @@ reg = <0x0 0x03da0000 0x0 0x40000>; #iommu-cells = <2>; #global-interrupts = <1>; - interrupts = , + interrupts = , , , , -- cgit From 9960085a3a82c58d3323c1c20b991db6045063b0 Mon Sep 17 00:00:00 2001 From: Murali Nalajala Date: Wed, 14 Aug 2024 15:32:44 -0700 Subject: firmware: qcom: scm: Mark get_wq_ctx() as atomic call Currently get_wq_ctx() is wrongly configured as a standard call. When two SMC calls are in sleep and one SMC wakes up, it calls get_wq_ctx() to resume the corresponding sleeping thread. But if get_wq_ctx() is interrupted, goes to sleep and another SMC call is waiting to be allocated a waitq context, it leads to a deadlock. To avoid this get_wq_ctx() must be an atomic call and can't be a standard SMC call. Hence mark get_wq_ctx() as a fast call. Fixes: 6bf325992236 ("firmware: qcom: scm: Add wait-queue handling logic") Cc: stable@vger.kernel.org Signed-off-by: Murali Nalajala Signed-off-by: Unnathi Chalicheemala Reviewed-by: Elliot Berman Link: https://lore.kernel.org/r/20240814223244.40081-1-quic_uchalich@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/firmware/qcom/qcom_scm-smc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/qcom/qcom_scm-smc.c b/drivers/firmware/qcom/qcom_scm-smc.c index dca5f3f1883b..2b4c2826f572 100644 --- a/drivers/firmware/qcom/qcom_scm-smc.c +++ b/drivers/firmware/qcom/qcom_scm-smc.c @@ -73,7 +73,7 @@ int scm_get_wq_ctx(u32 *wq_ctx, u32 *flags, u32 *more_pending) struct arm_smccc_res get_wq_res; struct arm_smccc_args get_wq_ctx = {0}; - get_wq_ctx.args[0] = ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL, + get_wq_ctx.args[0] = ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64, ARM_SMCCC_OWNER_SIP, SCM_SMC_FNID(QCOM_SCM_SVC_WAITQ, QCOM_SCM_WAITQ_GET_WQ_CTX)); -- cgit From 1c753d001a259d0278fe318a1ed3c8aa5f3ea09e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 31 Jul 2024 09:44:56 +0200 Subject: firmware: qcom: tzmem: fix virtual-to-physical address conversion We currently only correctly convert the virtual address passed by the caller to qcom_tzmem_to_phys() if it corresponds to the base address of the chunk. If the user wants to convert some pointer at an offset relative to that base address, we'll return 0. Let's change the implementation of qcom_tzmem_to_phys(): iterate over the chunks and try to call gen_pool_virt_to_phys() just-in-time instead of trying to call it only once when creating the chunk. Fixes: 84f5a7b67b61 ("firmware: qcom: add a dedicated TrustZone buffer allocator") Reported-by: Johan Hovold Closes: https://lore.kernel.org/lkml/20240729095542.21097-1-johan+linaro@kernel.org/ Acked-by: Andrew Halaney Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20240731-tzmem-efivars-fix-v2-1-f0e84071ec07@linaro.org Signed-off-by: Bjorn Andersson --- drivers/firmware/qcom/qcom_tzmem.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/firmware/qcom/qcom_tzmem.c b/drivers/firmware/qcom/qcom_tzmem.c index 17948cfc82e7..caedeef0059c 100644 --- a/drivers/firmware/qcom/qcom_tzmem.c +++ b/drivers/firmware/qcom/qcom_tzmem.c @@ -40,7 +40,6 @@ struct qcom_tzmem_pool { }; struct qcom_tzmem_chunk { - phys_addr_t paddr; size_t size; struct qcom_tzmem_pool *owner; }; @@ -385,7 +384,6 @@ again: return NULL; } - chunk->paddr = gen_pool_virt_to_phys(pool->genpool, vaddr); chunk->size = size; chunk->owner = pool; @@ -431,25 +429,37 @@ void qcom_tzmem_free(void *vaddr) EXPORT_SYMBOL_GPL(qcom_tzmem_free); /** - * qcom_tzmem_to_phys() - Map the virtual address of a TZ buffer to physical. - * @vaddr: Virtual address of the buffer allocated from a TZ memory pool. + * qcom_tzmem_to_phys() - Map the virtual address of TZ memory to physical. + * @vaddr: Virtual address of memory allocated from a TZ memory pool. * - * Can be used in any context. The address must have been returned by a call - * to qcom_tzmem_alloc(). + * Can be used in any context. The address must point to memory allocated + * using qcom_tzmem_alloc(). * - * Returns: Physical address of the buffer. + * Returns: + * Physical address mapped from the virtual or 0 if the mapping failed. */ phys_addr_t qcom_tzmem_to_phys(void *vaddr) { struct qcom_tzmem_chunk *chunk; + struct radix_tree_iter iter; + void __rcu **slot; + phys_addr_t ret; guard(spinlock_irqsave)(&qcom_tzmem_chunks_lock); - chunk = radix_tree_lookup(&qcom_tzmem_chunks, (unsigned long)vaddr); - if (!chunk) - return 0; + radix_tree_for_each_slot(slot, &qcom_tzmem_chunks, &iter, 0) { + chunk = radix_tree_deref_slot_protected(slot, + &qcom_tzmem_chunks_lock); - return chunk->paddr; + ret = gen_pool_virt_to_phys(chunk->owner->genpool, + (unsigned long)vaddr); + if (ret == -1) + continue; + + return ret; + } + + return 0; } EXPORT_SYMBOL_GPL(qcom_tzmem_to_phys); -- cgit From 924fc22c282edbf93869b150d9e1b47e0b10485e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 31 Jul 2024 09:44:57 +0200 Subject: firmware: qcom: qseecom: remove unused functions qseecom_scm_dev(), qseecom_dma_alloc() and qseecom_dma_free() are no longer used following the conversion to using tzmem. Remove them. Fixes: 6612103ec35a ("firmware: qcom: qseecom: convert to using the TZ allocator") Reviewed-by: Andrew Halaney Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20240731-tzmem-efivars-fix-v2-2-f0e84071ec07@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/firmware/qcom/qcom_qseecom.h | 45 ------------------------------ 1 file changed, 45 deletions(-) diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h index 1dc5b3b50aa9..3387897bf368 100644 --- a/include/linux/firmware/qcom/qcom_qseecom.h +++ b/include/linux/firmware/qcom/qcom_qseecom.h @@ -25,51 +25,6 @@ struct qseecom_client { u32 app_id; }; -/** - * qseecom_scm_dev() - Get the SCM device associated with the QSEECOM client. - * @client: The QSEECOM client device. - * - * Returns the SCM device under which the provided QSEECOM client device - * operates. This function is intended to be used for DMA allocations. - */ -static inline struct device *qseecom_scm_dev(struct qseecom_client *client) -{ - return client->aux_dev.dev.parent->parent; -} - -/** - * qseecom_dma_alloc() - Allocate DMA memory for a QSEECOM client. - * @client: The QSEECOM client to allocate the memory for. - * @size: The number of bytes to allocate. - * @dma_handle: Pointer to where the DMA address should be stored. - * @gfp: Allocation flags. - * - * Wrapper function for dma_alloc_coherent(), allocating DMA memory usable for - * TZ/QSEECOM communication. Refer to dma_alloc_coherent() for details. - */ -static inline void *qseecom_dma_alloc(struct qseecom_client *client, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - return dma_alloc_coherent(qseecom_scm_dev(client), size, dma_handle, gfp); -} - -/** - * dma_free_coherent() - Free QSEECOM DMA memory. - * @client: The QSEECOM client for which the memory has been allocated. - * @size: The number of bytes allocated. - * @cpu_addr: Virtual memory address to free. - * @dma_handle: DMA memory address to free. - * - * Wrapper function for dma_free_coherent(), freeing memory previously - * allocated with qseecom_dma_alloc(). Refer to dma_free_coherent() for - * details. - */ -static inline void qseecom_dma_free(struct qseecom_client *client, size_t size, - void *cpu_addr, dma_addr_t dma_handle) -{ - return dma_free_coherent(qseecom_scm_dev(client), size, cpu_addr, dma_handle); -} - /** * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app. * @client: The QSEECOM client associated with the target app. -- cgit From 0863bffda1131fd2fa9c05b653ad9ee3d8db127e Mon Sep 17 00:00:00 2001 From: Griffin Kroah-Hartman Date: Wed, 14 Aug 2024 13:17:47 +0200 Subject: Revert "serial: 8250_omap: Set the console genpd always on if no console suspend" This reverts commit 68e6939ea9ec3d6579eadeab16060339cdeaf940. Kevin reported that this causes a crash during suspend on platforms that dont use PM domains. Link: https://lore.kernel.org/r/7ha5hgpchq.fsf@baylibre.com Cc: Thomas Richard Fixes: 68e6939ea9ec ("serial: 8250_omap: Set the console genpd always on if no console suspend") Cc: stable Reported-by: Kevin Hilman Signed-off-by: Griffin Kroah-Hartman Link: https://lore.kernel.org/r/20240814111747.82371-1-griffin@kroah.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 1af9aed99c65..afef1dd4ddf4 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -27,7 +27,6 @@ #include #include #include -#include #include "8250.h" @@ -119,12 +118,6 @@ #define UART_OMAP_TO_L 0x26 #define UART_OMAP_TO_H 0x27 -/* - * Copy of the genpd flags for the console. - * Only used if console suspend is disabled - */ -static unsigned int genpd_flags_console; - struct omap8250_priv { void __iomem *membase; int line; @@ -1655,7 +1648,6 @@ static int omap8250_suspend(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); - struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain); int err = 0; serial8250_suspend_port(priv->line); @@ -1666,19 +1658,8 @@ static int omap8250_suspend(struct device *dev) if (!device_may_wakeup(dev)) priv->wer = 0; serial_out(up, UART_OMAP_WER, priv->wer); - if (uart_console(&up->port)) { - if (console_suspend_enabled) - err = pm_runtime_force_suspend(dev); - else { - /* - * The pd shall not be powered-off (no console suspend). - * Make copy of genpd flags before to set it always on. - * The original value is restored during the resume. - */ - genpd_flags_console = genpd->flags; - genpd->flags |= GENPD_FLAG_ALWAYS_ON; - } - } + if (uart_console(&up->port) && console_suspend_enabled) + err = pm_runtime_force_suspend(dev); flush_work(&priv->qos_work); return err; @@ -1688,16 +1669,12 @@ static int omap8250_resume(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); - struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain); int err; if (uart_console(&up->port) && console_suspend_enabled) { - if (console_suspend_enabled) { - err = pm_runtime_force_resume(dev); - if (err) - return err; - } else - genpd->flags = genpd_flags_console; + err = pm_runtime_force_resume(dev); + if (err) + return err; } serial8250_resume_port(priv->line); -- cgit From f75c235565f90c4a17b125e47f1c68ef6b8c2bce Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 14 Aug 2024 02:09:53 -0700 Subject: arm64: Fix KASAN random tag seed initialization Currently, kasan_init_sw_tags() is called before setup_per_cpu_areas(), so per_cpu(prng_state, cpu) accesses the same address regardless of the value of "cpu", and the same seed value gets copied to the percpu area for every CPU. Fix this by moving the call to smp_prepare_boot_cpu(), which is the first architecture hook after setup_per_cpu_areas(). Fixes: 3c9e3aa11094 ("kasan: add tag related helper functions") Fixes: 3f41b6093823 ("kasan: fix random seed generation for tag-based mode") Signed-off-by: Samuel Holland Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20240814091005.969756-1-samuel.holland@sifive.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/setup.c | 3 --- arch/arm64/kernel/smp.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index a096e2451044..b22d28ec8028 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -355,9 +355,6 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash(); - /* Init percpu seeds for random tags after cpus are set up. */ - kasan_init_sw_tags(); - #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Make sure init_thread_info.ttbr0 always generates translation diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5e18fbcee9a2..f01f0fd7b7fe 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -467,6 +467,8 @@ void __init smp_prepare_boot_cpu(void) init_gic_priority_masking(); kasan_init_hw_tags(); + /* Init percpu seeds for random tags after cpus are set up. */ + kasan_init_sw_tags(); } /* -- cgit From 52dd070c62e4ae2b5e7411b920e3f7a64235ecfb Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 14 Aug 2024 20:47:40 +0800 Subject: pmdomain: imx: wait SSAR when i.MX93 power domain on With "quiet" set in bootargs, there is power domain failure: "imx93_power_domain 44462400.power-domain: pd_off timeout: name: 44462400.power-domain, stat: 4" The current power on opertation takes ISO state as power on finished flag, but it is wrong. Before powering on operation really finishes, powering off comes and powering off will never finish because the last powering on still not finishes, so the following powering off actually not trigger hardware state machine to run. SSAR is the last step when powering on a domain, so need to wait SSAR done when powering on. Since EdgeLock Enclave(ELE) handshake is involved in the flow, enlarge the waiting time to 10ms for both on and off to avoid timeout. Cc: stable@vger.kernel.org Fixes: 0a0f7cc25d4a ("soc: imx: add i.MX93 SRC power domain driver") Reviewed-by: Jacky Bai Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20240814124740.2778952-1-peng.fan@oss.nxp.com Signed-off-by: Ulf Hansson --- drivers/pmdomain/imx/imx93-pd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pmdomain/imx/imx93-pd.c b/drivers/pmdomain/imx/imx93-pd.c index 1e94b499c19b..d750a7dc58d2 100644 --- a/drivers/pmdomain/imx/imx93-pd.c +++ b/drivers/pmdomain/imx/imx93-pd.c @@ -20,6 +20,7 @@ #define FUNC_STAT_PSW_STAT_MASK BIT(0) #define FUNC_STAT_RST_STAT_MASK BIT(2) #define FUNC_STAT_ISO_STAT_MASK BIT(4) +#define FUNC_STAT_SSAR_STAT_MASK BIT(8) struct imx93_power_domain { struct generic_pm_domain genpd; @@ -50,7 +51,7 @@ static int imx93_pd_on(struct generic_pm_domain *genpd) writel(val, addr + MIX_SLICE_SW_CTRL_OFF); ret = readl_poll_timeout(addr + MIX_FUNC_STAT_OFF, val, - !(val & FUNC_STAT_ISO_STAT_MASK), 1, 10000); + !(val & FUNC_STAT_SSAR_STAT_MASK), 1, 10000); if (ret) { dev_err(domain->dev, "pd_on timeout: name: %s, stat: %x\n", genpd->name, val); return ret; @@ -72,7 +73,7 @@ static int imx93_pd_off(struct generic_pm_domain *genpd) writel(val, addr + MIX_SLICE_SW_CTRL_OFF); ret = readl_poll_timeout(addr + MIX_FUNC_STAT_OFF, val, - val & FUNC_STAT_PSW_STAT_MASK, 1, 1000); + val & FUNC_STAT_PSW_STAT_MASK, 1, 10000); if (ret) { dev_err(domain->dev, "pd_off timeout: name: %s, stat: %x\n", genpd->name, val); return ret; -- cgit From d33d26036a0274b472299d7dcdaa5fb34329f91b Mon Sep 17 00:00:00 2001 From: Roland Xu Date: Thu, 15 Aug 2024 10:58:13 +0800 Subject: rtmutex: Drop rt_mutex::wait_lock before scheduling rt_mutex_handle_deadlock() is called with rt_mutex::wait_lock held. In the good case it returns with the lock held and in the deadlock case it emits a warning and goes into an endless scheduling loop with the lock held, which triggers the 'scheduling in atomic' warning. Unlock rt_mutex::wait_lock in the dead lock case before issuing the warning and dropping into the schedule for ever loop. [ tglx: Moved unlock before the WARN(), removed the pointless comment, massaged changelog, added Fixes tag ] Fixes: 3d5c9340d194 ("rtmutex: Handle deadlock detection smarter") Signed-off-by: Roland Xu Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/ME0P300MB063599BEF0743B8FA339C2CECC802@ME0P300MB0635.AUSP300.PROD.OUTLOOK.COM --- kernel/locking/rtmutex.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 88d08eeb8bc0..fba1229f1de6 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1644,6 +1644,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, } static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, + struct rt_mutex_base *lock, struct rt_mutex_waiter *w) { /* @@ -1656,10 +1657,10 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, if (build_ww_mutex() && w->ww_ctx) return; - /* - * Yell loudly and stop the task right here. - */ + raw_spin_unlock_irq(&lock->wait_lock); + WARN(1, "rtmutex deadlock detected\n"); + while (1) { set_current_state(TASK_INTERRUPTIBLE); rt_mutex_schedule(); @@ -1713,7 +1714,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, } else { __set_current_state(TASK_RUNNING); remove_waiter(lock, waiter); - rt_mutex_handle_deadlock(ret, chwalk, waiter); + rt_mutex_handle_deadlock(ret, chwalk, lock, waiter); } /* -- cgit From af8e119f52e9c13e556be9e03f27957554a84656 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 15 Aug 2024 17:11:17 +0300 Subject: xhci: Fix Panther point NULL pointer deref at full-speed re-enumeration re-enumerating full-speed devices after a failed address device command can trigger a NULL pointer dereference. Full-speed devices may need to reconfigure the endpoint 0 Max Packet Size value during enumeration. Usb core calls usb_ep0_reinit() in this case, which ends up calling xhci_configure_endpoint(). On Panther point xHC the xhci_configure_endpoint() function will additionally check and reserve bandwidth in software. Other hosts do this in hardware If xHC address device command fails then a new xhci_virt_device structure is allocated as part of re-enabling the slot, but the bandwidth table pointers are not set up properly here. This triggers the NULL pointer dereference the next time usb_ep0_reinit() is called and xhci_configure_endpoint() tries to check and reserve bandwidth [46710.713538] usb 3-1: new full-speed USB device number 5 using xhci_hcd [46710.713699] usb 3-1: Device not responding to setup address. [46710.917684] usb 3-1: Device not responding to setup address. [46711.125536] usb 3-1: device not accepting address 5, error -71 [46711.125594] BUG: kernel NULL pointer dereference, address: 0000000000000008 [46711.125600] #PF: supervisor read access in kernel mode [46711.125603] #PF: error_code(0x0000) - not-present page [46711.125606] PGD 0 P4D 0 [46711.125610] Oops: Oops: 0000 [#1] PREEMPT SMP PTI [46711.125615] CPU: 1 PID: 25760 Comm: kworker/1:2 Not tainted 6.10.3_2 #1 [46711.125620] Hardware name: Gigabyte Technology Co., Ltd. [46711.125623] Workqueue: usb_hub_wq hub_event [usbcore] [46711.125668] RIP: 0010:xhci_reserve_bandwidth (drivers/usb/host/xhci.c Fix this by making sure bandwidth table pointers are set up correctly after a failed address device command, and additionally by avoiding checking for bandwidth in cases like this where no actual endpoints are added or removed, i.e. only context for default control endpoint 0 is evaluated. Reported-by: Karel Balej Closes: https://lore.kernel.org/linux-usb/D3CKQQAETH47.1MUO22RTCH2O3@matfyz.cz/ Cc: stable@vger.kernel.org Fixes: 651aaf36a7d7 ("usb: xhci: Handle USB transaction error on address command") Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240815141117.2702314-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0a8cf6c17f82..efdf4c228b8c 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -2837,7 +2837,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, xhci->num_active_eps); return -ENOMEM; } - if ((xhci->quirks & XHCI_SW_BW_CHECKING) && + if ((xhci->quirks & XHCI_SW_BW_CHECKING) && !ctx_change && xhci_reserve_bandwidth(xhci, virt_dev, command->in_ctx)) { if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) xhci_free_host_resources(xhci, ctrl_ctx); @@ -4200,8 +4200,10 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_unlock(&xhci->mutex); ret = xhci_disable_slot(xhci, udev->slot_id); xhci_free_virt_device(xhci, udev->slot_id); - if (!ret) - xhci_alloc_dev(hcd, udev); + if (!ret) { + if (xhci_alloc_dev(hcd, udev) == 1) + xhci_setup_addressable_virt_dev(xhci, udev); + } kfree(command->completion); kfree(command); return -EPROTO; -- cgit From 164199615ae230ace4519141285f06766d6d8036 Mon Sep 17 00:00:00 2001 From: Yuntao Liu Date: Thu, 15 Aug 2024 08:49:23 +0000 Subject: ASoC: amd: acp: fix module autoloading Add MODULE_DEVICE_TABLE(), so modules could be properly autoloaded based on the alias from platform_device_id table. Fixes: 9d8a7be88b336 ("ASoC: amd: acp: Add legacy sound card support for Chrome audio") Signed-off-by: Yuntao Liu Link: https://patch.msgid.link/20240815084923.756476-1-liuyuntao12@huawei.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-legacy-mach.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/amd/acp/acp-legacy-mach.c b/sound/soc/amd/acp/acp-legacy-mach.c index 47c3b5f167f5..0d529e32e552 100644 --- a/sound/soc/amd/acp/acp-legacy-mach.c +++ b/sound/soc/amd/acp/acp-legacy-mach.c @@ -227,6 +227,8 @@ static const struct platform_device_id board_ids[] = { }, { } }; +MODULE_DEVICE_TABLE(platform, board_ids); + static struct platform_driver acp_asoc_audio = { .driver = { .pm = &snd_soc_pm_ops, -- cgit From 9bb5e74b2bf88fbb024bb15ded3b011e02c673be Mon Sep 17 00:00:00 2001 From: Griffin Kroah-Hartman Date: Thu, 15 Aug 2024 11:49:20 +0200 Subject: Revert "misc: fastrpc: Restrict untrusted app to attach to privileged PD" This reverts commit bab2f5e8fd5d2f759db26b78d9db57412888f187. Joel reported that this commit breaks userspace and stops sensors in SDM845 from working. Also breaks other qcom SoC devices running postmarketOS. Cc: stable Cc: Ekansh Gupta Cc: Dmitry Baryshkov Reported-by: Joel Selvaraj Link: https://lore.kernel.org/r/9a9f5646-a554-4b65-8122-d212bb665c81@umsystem.edu Signed-off-by: Griffin Kroah-Hartman Acked-by: Srinivas Kandagatla Fixes: bab2f5e8fd5d ("misc: fastrpc: Restrict untrusted app to attach to privileged PD") Link: https://lore.kernel.org/r/20240815094920.8242-1-griffin@kroah.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 22 +++------------------- include/uapi/misc/fastrpc.h | 3 --- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 5204fda51da3..339d126414d4 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -2085,16 +2085,6 @@ err_invoke: return err; } -static int is_attach_rejected(struct fastrpc_user *fl) -{ - /* Check if the device node is non-secure */ - if (!fl->is_secure_dev) { - dev_dbg(&fl->cctx->rpdev->dev, "untrusted app trying to attach to privileged DSP PD\n"); - return -EACCES; - } - return 0; -} - static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2107,19 +2097,13 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, err = fastrpc_invoke(fl, argp); break; case FASTRPC_IOCTL_INIT_ATTACH: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_attach(fl, ROOT_PD); + err = fastrpc_init_attach(fl, ROOT_PD); break; case FASTRPC_IOCTL_INIT_ATTACH_SNS: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_attach(fl, SENSORS_PD); + err = fastrpc_init_attach(fl, SENSORS_PD); break; case FASTRPC_IOCTL_INIT_CREATE_STATIC: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_create_static_process(fl, argp); + err = fastrpc_init_create_static_process(fl, argp); break; case FASTRPC_IOCTL_INIT_CREATE: err = fastrpc_init_create_process(fl, argp); diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 91583690bddc..f33d914d8f46 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -8,14 +8,11 @@ #define FASTRPC_IOCTL_ALLOC_DMA_BUFF _IOWR('R', 1, struct fastrpc_alloc_dma_buf) #define FASTRPC_IOCTL_FREE_DMA_BUFF _IOWR('R', 2, __u32) #define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_invoke) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH _IO('R', 4) #define FASTRPC_IOCTL_INIT_CREATE _IOWR('R', 5, struct fastrpc_init_create) #define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) #define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static) #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) -- cgit From aae6b81260fd9a7224f7eb4fc440d625852245bb Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 12 Aug 2024 10:43:48 -0400 Subject: Bluetooth: HCI: Invert LE State quirk to be opt-out rather then opt-in This inverts the LE State quirk so by default we assume the controllers would report valid states rather than invalid which is how quirks normally behave, also this would result in HCI command failing it the LE States are really broken thus exposing the controllers that are really broken in this respect. Link: https://github.com/bluez/bluez/issues/584 Fixes: 220915857e29 ("Bluetooth: Adding driver and quirk defs for multi-role LE") Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 10 ---------- drivers/bluetooth/btintel_pcie.c | 3 --- drivers/bluetooth/btmtksdio.c | 3 --- drivers/bluetooth/btrtl.c | 1 - drivers/bluetooth/btusb.c | 4 ++-- drivers/bluetooth/hci_qca.c | 4 ++-- drivers/bluetooth/hci_vhci.c | 2 -- include/net/bluetooth/hci.h | 17 ++++++++++------- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_event.c | 2 +- 10 files changed, 16 insertions(+), 32 deletions(-) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 2ebc970e6573..7d5e4de64e3c 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2945,9 +2945,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) INTEL_ROM_LEGACY_NO_WBS_SUPPORT)) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22) - set_bit(HCI_QUIRK_VALID_LE_STATES, - &hdev->quirks); err = btintel_legacy_rom_setup(hdev, &ver); break; @@ -2956,7 +2953,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) case 0x12: /* ThP */ case 0x13: /* HrP */ case 0x14: /* CcP */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); fallthrough; case 0x0c: /* WsP */ /* Apply the device specific HCI quirks @@ -3048,9 +3044,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) /* These variants don't seem to support LE Coded PHY */ set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks); - /* Set Valid LE States quirk */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, ver.hw_variant); @@ -3076,9 +3069,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) */ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - /* Apply LE States quirk from solar onwards */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, INTEL_HW_VARIANT(ver_tlv.cnvi_bt)); diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 0d1a0415557b..1c7631f22c52 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1180,9 +1180,6 @@ static int btintel_pcie_setup(struct hci_dev *hdev) */ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - /* Apply LE States quirk from solar onwards */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, INTEL_HW_VARIANT(ver_tlv.cnvi_bt)); diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index 39d6898497a4..497e4c87f5be 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -1148,9 +1148,6 @@ static int btmtksdio_setup(struct hci_dev *hdev) } } - /* Valid LE States quirk for MediaTek 7921 */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - break; case 0x7663: case 0x7668: diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index f2f37143c454..fd7991ea7672 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -1287,7 +1287,6 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev) case CHIP_ID_8852C: case CHIP_ID_8851B: case CHIP_ID_8852BT: - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); /* RTL8852C needs to transmit mSBC data continuously without diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index acdba5d77694..51d9d4532dda 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3956,8 +3956,8 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_WIDEBAND_SPEECH) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - if (id->driver_info & BTUSB_VALID_LE_STATES) - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); + if (!(id->driver_info & BTUSB_VALID_LE_STATES)) + set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks); if (id->driver_info & BTUSB_DIGIANSWER) { data->cmdreq_type = USB_TYPE_VENDOR; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 45adc1560d94..4b1ad7ea5b95 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2474,8 +2474,8 @@ static int qca_serdev_probe(struct serdev_device *serdev) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - if (data->capabilities & QCA_CAP_VALID_LE_STATES) - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); + if (!(data->capabilities & QCA_CAP_VALID_LE_STATES)) + set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks); } return 0; diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index c4046f8f1985..43e9ac5a3324 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -425,8 +425,6 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) if (opcode & 0x80) set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); hci_free_dev(hdev); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index e372a88e8c3f..d1d073089f38 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -206,14 +206,17 @@ enum { */ HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, - /* When this quirk is set, the controller has validated that - * LE states reported through the HCI_LE_READ_SUPPORTED_STATES are - * valid. This mechanism is necessary as many controllers have - * been seen has having trouble initiating a connectable - * advertisement despite the state combination being reported as - * supported. + /* When this quirk is set, the LE states reported through the + * HCI_LE_READ_SUPPORTED_STATES are invalid/broken. + * + * This mechanism is necessary as many controllers have been seen has + * having trouble initiating a connectable advertisement despite the + * state combination being reported as supported. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. */ - HCI_QUIRK_VALID_LE_STATES, + HCI_QUIRK_BROKEN_LE_STATES, /* When this quirk is set, then erroneous data reporting * is ignored. This is mainly due to the fact that the HCI diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 31020891fc68..e449dba698f3 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -825,7 +825,7 @@ extern struct mutex hci_cb_list_lock; } while (0) #define hci_dev_le_state_simultaneous(hdev) \ - (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \ + (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) && \ (hdev->le_states[4] & 0x08) && /* Central */ \ (hdev->le_states[4] & 0x40) && /* Peripheral */ \ (hdev->le_states[3] & 0x10)) /* Simultaneous */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d0c118c47f6c..1c82dcdf6e8f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5920,7 +5920,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, * while we have an existing one in peripheral role. */ if (hdev->conn_hash.le_num_peripheral > 0 && - (!test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) || + (test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) || !(hdev->le_states[3] & 0x10))) return NULL; -- cgit From 932021a11805b9da4bd6abf66fe233cccd59fe0e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 12 Aug 2024 11:22:08 -0400 Subject: Bluetooth: hci_core: Fix LE quote calculation Function hci_sched_le needs to update the respective counter variable inplace other the likes of hci_quote_sent would attempt to use the possible outdated value of conn->{le_cnt,acl_cnt}. Link: https://github.com/bluez/bluez/issues/915 Fixes: 73d80deb7bdf ("Bluetooth: prioritizing data over HCI") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 06da8ac13dca..f25a21f532aa 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3664,19 +3664,19 @@ static void hci_sched_le(struct hci_dev *hdev) { struct hci_chan *chan; struct sk_buff *skb; - int quote, cnt, tmp; + int quote, *cnt, tmp; BT_DBG("%s", hdev->name); if (!hci_conn_num(hdev, LE_LINK)) return; - cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt; + cnt = hdev->le_pkts ? &hdev->le_cnt : &hdev->acl_cnt; - __check_timeout(hdev, cnt, LE_LINK); + __check_timeout(hdev, *cnt, LE_LINK); - tmp = cnt; - while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) { + tmp = *cnt; + while (*cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) { u32 priority = (skb_peek(&chan->data_q))->priority; while (quote-- && (skb = skb_peek(&chan->data_q))) { BT_DBG("chan %p skb %p len %d priority %u", chan, skb, @@ -3691,7 +3691,7 @@ static void hci_sched_le(struct hci_dev *hdev) hci_send_frame(hdev, skb); hdev->le_last_tx = jiffies; - cnt--; + (*cnt)--; chan->sent++; chan->conn->sent++; @@ -3701,12 +3701,7 @@ static void hci_sched_le(struct hci_dev *hdev) } } - if (hdev->le_pkts) - hdev->le_cnt = cnt; - else - hdev->acl_cnt = cnt; - - if (cnt != tmp) + if (*cnt != tmp) hci_prio_recalculate(hdev, LE_LINK); } -- cgit From 28cd47f75185c4818b0fb1b46f2f02faaba96376 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 30 Aug 2023 15:08:06 -0700 Subject: Bluetooth: SMP: Fix assumption of Central always being Initiator SMP initiator role shall be considered the one that initiates the pairing procedure with SMP_CMD_PAIRING_REQ: BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part H page 1557: Figure 2.1: LE pairing phases Note that by sending SMP_CMD_SECURITY_REQ it doesn't change the role to be Initiator. Link: https://github.com/bluez/bluez/issues/567 Fixes: b28b4943660f ("Bluetooth: Add strict checks for allowed SMP PDUs") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/smp.c | 144 ++++++++++++++++++++++++++-------------------------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 1e7ea3a4b7ef..4f9fdf400584 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -914,7 +914,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, * Confirms and the responder Enters the passkey. */ if (smp->method == OVERLAP) { - if (hcon->role == HCI_ROLE_MASTER) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) smp->method = CFM_PASSKEY; else smp->method = REQ_PASSKEY; @@ -964,7 +964,7 @@ static u8 smp_confirm(struct smp_chan *smp) smp_send_cmd(smp->conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp); - if (conn->hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); else SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); @@ -980,7 +980,8 @@ static u8 smp_random(struct smp_chan *smp) int ret; bt_dev_dbg(conn->hcon->hdev, "conn %p %s", conn, - conn->hcon->out ? "initiator" : "responder"); + test_bit(SMP_FLAG_INITIATOR, &smp->flags) ? "initiator" : + "responder"); ret = smp_c1(smp->tk, smp->rrnd, smp->preq, smp->prsp, hcon->init_addr_type, &hcon->init_addr, @@ -994,7 +995,7 @@ static u8 smp_random(struct smp_chan *smp) return SMP_CONFIRM_FAILED; } - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { u8 stk[16]; __le64 rand = 0; __le16 ediv = 0; @@ -1256,14 +1257,15 @@ static void smp_distribute_keys(struct smp_chan *smp) rsp = (void *) &smp->prsp[1]; /* The responder sends its keys first */ - if (hcon->out && (smp->remote_key_dist & KEY_DIST_MASK)) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags) && + (smp->remote_key_dist & KEY_DIST_MASK)) { smp_allow_key_dist(smp); return; } req = (void *) &smp->preq[1]; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { keydist = &rsp->init_key_dist; *keydist &= req->init_key_dist; } else { @@ -1432,7 +1434,7 @@ static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16]) struct hci_conn *hcon = smp->conn->hcon; u8 *na, *nb, a[7], b[7]; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { na = smp->prnd; nb = smp->rrnd; } else { @@ -1460,7 +1462,7 @@ static void sc_dhkey_check(struct smp_chan *smp) a[6] = hcon->init_addr_type; b[6] = hcon->resp_addr_type; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { local_addr = a; remote_addr = b; memcpy(io_cap, &smp->preq[1], 3); @@ -1539,7 +1541,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) /* The round is only complete when the initiator * receives pairing random. */ - if (!hcon->out) { + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); if (smp->passkey_round == 20) @@ -1567,7 +1569,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); return 0; @@ -1578,7 +1580,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) case SMP_CMD_PUBLIC_KEY: default: /* Initiating device starts the round */ - if (!hcon->out) + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return 0; bt_dev_dbg(hdev, "Starting passkey round %u", @@ -1623,7 +1625,7 @@ static int sc_user_reply(struct smp_chan *smp, u16 mgmt_op, __le32 passkey) } /* Initiator sends DHKey check first */ - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { sc_dhkey_check(smp); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); } else if (test_and_clear_bit(SMP_FLAG_DHKEY_PENDING, &smp->flags)) { @@ -1746,7 +1748,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) struct smp_cmd_pairing rsp, *req = (void *) skb->data; struct l2cap_chan *chan = conn->smp; struct hci_dev *hdev = conn->hcon->hdev; - struct smp_chan *smp; + struct smp_chan *smp = chan->data; u8 key_size, auth, sec_level; int ret; @@ -1755,16 +1757,14 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (skb->len < sizeof(*req)) return SMP_INVALID_PARAMS; - if (conn->hcon->role != HCI_ROLE_SLAVE) + if (smp && test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return SMP_CMD_NOTSUPP; - if (!chan->data) + if (!smp) { smp = smp_chan_create(conn); - else - smp = chan->data; - - if (!smp) - return SMP_UNSPECIFIED; + if (!smp) + return SMP_UNSPECIFIED; + } /* We didn't start the pairing, so match remote */ auth = req->auth_req & AUTH_REQ_MASK(hdev); @@ -1946,7 +1946,7 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) if (skb->len < sizeof(*rsp)) return SMP_INVALID_PARAMS; - if (conn->hcon->role != HCI_ROLE_MASTER) + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return SMP_CMD_NOTSUPP; skb_pull(skb, sizeof(*rsp)); @@ -2041,7 +2041,7 @@ static u8 sc_check_confirm(struct smp_chan *smp) if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) return sc_passkey_round(smp, SMP_CMD_PAIRING_CONFIRM); - if (conn->hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); @@ -2063,7 +2063,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp) u8 auth; /* The issue is only observed when we're in responder role */ - if (hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return SMP_UNSPECIFIED; if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { @@ -2099,7 +2099,8 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) struct hci_dev *hdev = hcon->hdev; bt_dev_dbg(hdev, "conn %p %s", conn, - hcon->out ? "initiator" : "responder"); + test_bit(SMP_FLAG_INITIATOR, &smp->flags) ? "initiator" : + "responder"); if (skb->len < sizeof(smp->pcnf)) return SMP_INVALID_PARAMS; @@ -2121,7 +2122,7 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) return ret; } - if (conn->hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); @@ -2156,7 +2157,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) if (!test_bit(SMP_FLAG_SC, &smp->flags)) return smp_random(smp); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { pkax = smp->local_pk; pkbx = smp->remote_pk; na = smp->prnd; @@ -2169,7 +2170,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) } if (smp->method == REQ_OOB) { - if (!hcon->out) + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); @@ -2180,7 +2181,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) return sc_passkey_round(smp, SMP_CMD_PAIRING_RANDOM); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { u8 cfm[16]; err = smp_f4(smp->tfm_cmac, smp->remote_pk, smp->local_pk, @@ -2221,7 +2222,7 @@ mackey_and_ltk: return SMP_UNSPECIFIED; if (smp->method == REQ_OOB) { - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { sc_dhkey_check(smp); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); } @@ -2295,10 +2296,27 @@ bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level, return false; } +static void smp_send_pairing_req(struct smp_chan *smp, __u8 auth) +{ + struct smp_cmd_pairing cp; + + if (smp->conn->hcon->type == ACL_LINK) + build_bredr_pairing_cmd(smp, &cp, NULL); + else + build_pairing_cmd(smp->conn, &cp, NULL, auth); + + smp->preq[0] = SMP_CMD_PAIRING_REQ; + memcpy(&smp->preq[1], &cp, sizeof(cp)); + + smp_send_cmd(smp->conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); + + set_bit(SMP_FLAG_INITIATOR, &smp->flags); +} + static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_security_req *rp = (void *) skb->data; - struct smp_cmd_pairing cp; struct hci_conn *hcon = conn->hcon; struct hci_dev *hdev = hcon->hdev; struct smp_chan *smp; @@ -2347,16 +2365,20 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) skb_pull(skb, sizeof(*rp)); - memset(&cp, 0, sizeof(cp)); - build_pairing_cmd(conn, &cp, NULL, auth); + smp_send_pairing_req(smp, auth); - smp->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&smp->preq[1], &cp, sizeof(cp)); + return 0; +} - smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); +static void smp_send_security_req(struct smp_chan *smp, __u8 auth) +{ + struct smp_cmd_security_req cp; - return 0; + cp.auth_req = auth; + smp_send_cmd(smp->conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp); + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_REQ); + + clear_bit(SMP_FLAG_INITIATOR, &smp->flags); } int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) @@ -2427,23 +2449,11 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) authreq |= SMP_AUTH_MITM; } - if (hcon->role == HCI_ROLE_MASTER) { - struct smp_cmd_pairing cp; - - build_pairing_cmd(conn, &cp, NULL, authreq); - smp->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&smp->preq[1], &cp, sizeof(cp)); - - smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); - } else { - struct smp_cmd_security_req cp; - cp.auth_req = authreq; - smp_send_cmd(conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_REQ); - } + if (hcon->role == HCI_ROLE_MASTER) + smp_send_pairing_req(smp, authreq); + else + smp_send_security_req(smp, authreq); - set_bit(SMP_FLAG_INITIATOR, &smp->flags); ret = 0; unlock: @@ -2694,8 +2704,6 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb) static u8 sc_select_method(struct smp_chan *smp) { - struct l2cap_conn *conn = smp->conn; - struct hci_conn *hcon = conn->hcon; struct smp_cmd_pairing *local, *remote; u8 local_mitm, remote_mitm, local_io, remote_io, method; @@ -2708,7 +2716,7 @@ static u8 sc_select_method(struct smp_chan *smp) * the "struct smp_cmd_pairing" from them we need to skip the * first byte which contains the opcode. */ - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { local = (void *) &smp->preq[1]; remote = (void *) &smp->prsp[1]; } else { @@ -2777,7 +2785,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) /* Non-initiating device sends its public key after receiving * the key from the initiating device. */ - if (!hcon->out) { + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { err = sc_send_public_key(smp); if (err) return err; @@ -2839,7 +2847,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) } if (smp->method == REQ_OOB) { - if (hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); @@ -2848,7 +2856,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) return 0; } - if (hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); if (smp->method == REQ_PASSKEY) { @@ -2863,7 +2871,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) /* The Initiating device waits for the non-initiating device to * send the confirm value. */ - if (conn->hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return 0; err = smp_f4(smp->tfm_cmac, smp->local_pk, smp->remote_pk, smp->prnd, @@ -2897,7 +2905,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) a[6] = hcon->init_addr_type; b[6] = hcon->resp_addr_type; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { local_addr = a; remote_addr = b; memcpy(io_cap, &smp->prsp[1], 3); @@ -2922,7 +2930,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) if (crypto_memneq(check->e, e, 16)) return SMP_DHKEY_CHECK_FAILED; - if (!hcon->out) { + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { if (test_bit(SMP_FLAG_WAIT_USER, &smp->flags)) { set_bit(SMP_FLAG_DHKEY_PENDING, &smp->flags); return 0; @@ -2934,7 +2942,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) sc_add_ltk(smp); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { hci_le_start_enc(hcon, 0, 0, smp->tk, smp->enc_key_size); hcon->enc_key_size = smp->enc_key_size; } @@ -3083,7 +3091,6 @@ static void bredr_pairing(struct l2cap_chan *chan) struct l2cap_conn *conn = chan->conn; struct hci_conn *hcon = conn->hcon; struct hci_dev *hdev = hcon->hdev; - struct smp_cmd_pairing req; struct smp_chan *smp; bt_dev_dbg(hdev, "chan %p", chan); @@ -3135,14 +3142,7 @@ static void bredr_pairing(struct l2cap_chan *chan) bt_dev_dbg(hdev, "starting SMP over BR/EDR"); - /* Prepare and send the BR/EDR SMP Pairing Request */ - build_bredr_pairing_cmd(smp, &req, NULL); - - smp->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&smp->preq[1], &req, sizeof(req)); - - smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(req), &req); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); + smp_send_pairing_req(smp, 0x00); } static void smp_resume_cb(struct l2cap_chan *chan) -- cgit From 538fd3921afac97158d4177139a0ad39f056dbb2 Mon Sep 17 00:00:00 2001 From: Griffin Kroah-Hartman Date: Thu, 15 Aug 2024 13:51:00 +0200 Subject: Bluetooth: MGMT: Add error handling to pair_device() hci_conn_params_add() never checks for a NULL value and could lead to a NULL pointer dereference causing a crash. Fixed by adding error handling in the function. Cc: Stable Fixes: 5157b8a503fa ("Bluetooth: Fix initializing conn_params in scan phase") Signed-off-by: Griffin Kroah-Hartman Reported-by: Yiwei Zhang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 40d4887c7f79..25979f4283a6 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3456,6 +3456,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, * will be kept and this function does nothing. */ p = hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type); + if (!p) { + err = -EIO; + goto unlock; + } if (p->auto_connect == HCI_AUTO_CONN_EXPLICIT) p->auto_connect = HCI_AUTO_CONN_DISABLED; -- cgit From 624ab9cde26a9f150b4fd268b0f3dae3184dc40c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 16 Jul 2024 09:06:30 -0700 Subject: drm/msm/adreno: Fix error return if missing firmware-name -ENODEV is used to signify that there is no zap shader for the platform, and the CPU can directly take the GPU out of secure mode. We want to use this return code when there is no zap-shader node. But not when there is, but without a firmware-name property. This case we want to treat as-if the needed fw is not found. Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Reviewed-by: Akhil P Oommen Patchwork: https://patchwork.freedesktop.org/patch/604564/ --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 1c6626747b98..ecc3fc5cec22 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -99,7 +99,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, * was a bad idea, and is only provided for backwards * compatibility for older targets. */ - return -ENODEV; + return -ENOENT; } if (IS_ERR(fw)) { -- cgit From c0247d289e73e18f6ddb0895de30c09770fbed95 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 13 Aug 2024 12:53:15 +0200 Subject: btrfs: send: annotate struct name_cache_entry with __counted_by() Add the __counted_by compiler attribute to the flexible array member name to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. Signed-off-by: Thorsten Blum Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 7fc692fc76e1..619fa0b8b3f6 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -347,7 +347,7 @@ struct name_cache_entry { int ret; int need_later_update; int name_len; - char name[]; + char name[] __counted_by(name_len); }; /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */ -- cgit From 3bc2ac2f8f0b78a13140fc72022771efe0c9b778 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 12 Aug 2024 12:30:52 -0400 Subject: btrfs: update target inode's ctime on unlink Unlink changes the link count on the target inode. POSIX mandates that the ctime must also change when this occurs. According to https://pubs.opengroup.org/onlinepubs/9699919799/functions/unlink.html: "Upon successful completion, unlink() shall mark for update the last data modification and last file status change timestamps of the parent directory. Also, if the file's link count is not 0, the last file status change timestamp of the file shall be marked for update." Signed-off-by: Jeff Layton Reviewed-by: David Sterba [ add link to the opengroup docs ] Signed-off-by: David Sterba --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 333b0e8587a2..b1b6564ab68f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4195,6 +4195,7 @@ err: btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2); inode_inc_iversion(&inode->vfs_inode); + inode_set_ctime_current(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); inode_set_mtime_to_ts(&dir->vfs_inode, inode_set_ctime_current(&dir->vfs_inode)); ret = btrfs_update_inode(trans, dir); -- cgit From 008e2512dc5696ab2dc5bf264e98a9fe9ceb830e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 11 Aug 2024 15:00:22 +0930 Subject: btrfs: tree-checker: add dev extent item checks [REPORT] There is a corruption report that btrfs refused to mount a fs that has overlapping dev extents: BTRFS error (device sdc): dev extent devid 4 physical offset 14263979671552 overlap with previous dev extent end 14263980982272 BTRFS error (device sdc): failed to verify dev extents against chunks: -117 BTRFS error (device sdc): open_ctree failed [CAUSE] The direct cause is very obvious, there is a bad dev extent item with incorrect length. With btrfs check reporting two overlapping extents, the second one shows some clue on the cause: ERROR: dev extent devid 4 offset 14263979671552 len 6488064 overlap with previous dev extent end 14263980982272 ERROR: dev extent devid 13 offset 2257707008000 len 6488064 overlap with previous dev extent end 2257707270144 ERROR: errors found in extent allocation tree or chunk allocation The second one looks like a bitflip happened during new chunk allocation: hex(2257707008000) = 0x20da9d30000 hex(2257707270144) = 0x20da9d70000 diff = 0x00000040000 So it looks like a bitflip happened during new dev extent allocation, resulting the second overlap. Currently we only do the dev-extent verification at mount time, but if the corruption is caused by memory bitflip, we really want to catch it before writing the corruption to the storage. Furthermore the dev extent items has the following key definition: ( DEV_EXTENT ) Thus we can not just rely on the generic key order check to make sure there is no overlapping. [ENHANCEMENT] Introduce dedicated dev extent checks, including: - Fixed member checks * chunk_tree should always be BTRFS_CHUNK_TREE_OBJECTID (3) * chunk_objectid should always be BTRFS_FIRST_CHUNK_CHUNK_TREE_OBJECTID (256) - Alignment checks * chunk_offset should be aligned to sectorsize * length should be aligned to sectorsize * key.offset should be aligned to sectorsize - Overlap checks If the previous key is also a dev-extent item, with the same device id, make sure we do not overlap with the previous dev extent. Reported: Stefan N Link: https://lore.kernel.org/linux-btrfs/CA+W5K0rSO3koYTo=nzxxTm1-Pdu1HYgVxEpgJ=aGc7d=E8mGEg@mail.gmail.com/ CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 6f1e2f2215d9..634d69964fe4 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1764,6 +1764,72 @@ static int check_raid_stripe_extent(const struct extent_buffer *leaf, return 0; } +static int check_dev_extent_item(const struct extent_buffer *leaf, + const struct btrfs_key *key, + int slot, + struct btrfs_key *prev_key) +{ + struct btrfs_dev_extent *de; + const u32 sectorsize = leaf->fs_info->sectorsize; + + de = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); + /* Basic fixed member checks. */ + if (unlikely(btrfs_dev_extent_chunk_tree(leaf, de) != + BTRFS_CHUNK_TREE_OBJECTID)) { + generic_err(leaf, slot, + "invalid dev extent chunk tree id, has %llu expect %llu", + btrfs_dev_extent_chunk_tree(leaf, de), + BTRFS_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + if (unlikely(btrfs_dev_extent_chunk_objectid(leaf, de) != + BTRFS_FIRST_CHUNK_TREE_OBJECTID)) { + generic_err(leaf, slot, + "invalid dev extent chunk objectid, has %llu expect %llu", + btrfs_dev_extent_chunk_objectid(leaf, de), + BTRFS_FIRST_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + /* Alignment check. */ + if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent key.offset, has %llu not aligned to %u", + key->offset, sectorsize); + return -EUCLEAN; + } + if (unlikely(!IS_ALIGNED(btrfs_dev_extent_chunk_offset(leaf, de), + sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent chunk offset, has %llu not aligned to %u", + btrfs_dev_extent_chunk_objectid(leaf, de), + sectorsize); + return -EUCLEAN; + } + if (unlikely(!IS_ALIGNED(btrfs_dev_extent_length(leaf, de), + sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent length, has %llu not aligned to %u", + btrfs_dev_extent_length(leaf, de), sectorsize); + return -EUCLEAN; + } + /* Overlap check with previous dev extent. */ + if (slot && prev_key->objectid == key->objectid && + prev_key->type == key->type) { + struct btrfs_dev_extent *prev_de; + u64 prev_len; + + prev_de = btrfs_item_ptr(leaf, slot - 1, struct btrfs_dev_extent); + prev_len = btrfs_dev_extent_length(leaf, prev_de); + if (unlikely(prev_key->offset + prev_len > key->offset)) { + generic_err(leaf, slot, + "dev extent overlap, prev offset %llu len %llu current offset %llu", + prev_key->objectid, prev_len, key->offset); + return -EUCLEAN; + } + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -1800,6 +1866,9 @@ static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf, case BTRFS_DEV_ITEM_KEY: ret = check_dev_item(leaf, key, slot); break; + case BTRFS_DEV_EXTENT_KEY: + ret = check_dev_extent_item(leaf, key, slot, prev_key); + break; case BTRFS_INODE_ITEM_KEY: ret = check_inode_item(leaf, key, slot); break; -- cgit From e30729d4bd4001881be4d1ad4332a5d4985398f8 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 1 Aug 2024 16:47:52 +0900 Subject: btrfs: zoned: properly take lock to read/update block group's zoned variables __btrfs_add_free_space_zoned() references and modifies bg's alloc_offset, ro, and zone_unusable, but without taking the lock. It is mostly safe because they monotonically increase (at least for now) and this function is mostly called by a transaction commit, which is serialized by itself. Still, taking the lock is a safer and correct option and I'm going to add a change to reset zone_unusable while a block group is still alive. So, add locking around the operations. Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f5996a43db24..eaa1dbd31352 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2697,15 +2697,16 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, u64 offset = bytenr - block_group->start; u64 to_free, to_unusable; int bg_reclaim_threshold = 0; - bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0)); + bool initial; u64 reclaimable_unusable; - WARN_ON(!initial && offset + size > block_group->zone_capacity); + spin_lock(&block_group->lock); + initial = ((size == block_group->length) && (block_group->alloc_offset == 0)); + WARN_ON(!initial && offset + size > block_group->zone_capacity); if (!initial) bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold); - spin_lock(&ctl->tree_lock); if (!used) to_free = size; else if (initial) @@ -2718,7 +2719,9 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, to_free = offset + size - block_group->alloc_offset; to_unusable = size - to_free; + spin_lock(&ctl->tree_lock); ctl->free_space += to_free; + spin_unlock(&ctl->tree_lock); /* * If the block group is read-only, we should account freed space into * bytes_readonly. @@ -2727,11 +2730,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, block_group->zone_unusable += to_unusable; WARN_ON(block_group->zone_unusable > block_group->length); } - spin_unlock(&ctl->tree_lock); if (!used) { - spin_lock(&block_group->lock); block_group->alloc_offset -= size; - spin_unlock(&block_group->lock); } reclaimable_unusable = block_group->zone_unusable - @@ -2745,6 +2745,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, btrfs_mark_bg_to_reclaim(block_group); } + spin_unlock(&block_group->lock); + return 0; } -- cgit From 74c2ab6d653b4c2354df65a7f7f2df1925a40a51 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 8 Aug 2024 20:23:32 +0800 Subject: smb/client: avoid possible NULL dereference in cifs_free_subrequest() Clang static checker (scan-build) warning: cifsglob.h:line 890, column 3 Access to field 'ops' results in a dereference of a null pointer. Commit 519be989717c ("cifs: Add a tracepoint to track credits involved in R/W requests") adds a check for 'rdata->server', and let clang throw this warning about NULL dereference. When 'rdata->credits.value != 0 && rdata->server == NULL' happens, add_credits_and_wake_if() will call rdata->server->ops->add_credits(). This will cause NULL dereference problem. Add a check for 'rdata->server' to avoid NULL dereference. Cc: stable@vger.kernel.org Fixes: 69c3c023af25 ("cifs: Implement netfslib hooks") Reviewed-by: David Howells Signed-off-by: Su Hui Signed-off-by: Steve French --- fs/smb/client/file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index b2405dd4d4d4..45459af5044d 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -315,7 +315,7 @@ static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) #endif } - if (rdata->credits.value != 0) + if (rdata->credits.value != 0) { trace_smb3_rw_credits(rdata->rreq->debug_id, rdata->subreq.debug_index, rdata->credits.value, @@ -323,8 +323,12 @@ static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) rdata->server ? rdata->server->in_flight : 0, -rdata->credits.value, cifs_trace_rw_credits_free_subreq); + if (rdata->server) + add_credits_and_wake_if(rdata->server, &rdata->credits, 0); + else + rdata->credits.value = 0; + } - add_credits_and_wake_if(rdata->server, &rdata->credits, 0); if (rdata->have_xid) free_xid(rdata->xid); } -- cgit From 836bb3268db405cf9021496ac4dbc26d3e4758fe Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 15 Aug 2024 14:03:43 -0500 Subject: smb3: fix lock breakage for cached writes Mandatory locking is enforced for cached writes, which violates default posix semantics, and also it is enforced inconsistently. This apparently breaks recent versions of libreoffice, but can also be demonstrated by opening a file twice from the same client, locking it from handle one and writing to it from handle two (which fails, returning EACCES). Since there was already a mount option "forcemandatorylock" (which defaults to off), with this change only when the user intentionally specifies "forcemandatorylock" on mount will we break posix semantics on write to a locked range (ie we will only fail the write in this case, if the user mounts with "forcemandatorylock"). Fixes: 85160e03a79e ("CIFS: Implement caching mechanism for mandatory brlocks") Cc: stable@vger.kernel.org Cc: Pavel Shilovsky Reported-by: abartlet@samba.org Reported-by: Kevin Ottens Reviewed-by: David Howells Signed-off-by: Steve French --- fs/smb/client/file.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 45459af5044d..06a0667f8ff2 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -2753,6 +2753,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file->f_mapping->host; struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); ssize_t rc; rc = netfs_start_io_write(inode); @@ -2769,12 +2770,16 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) if (rc <= 0) goto out; - if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) && + (cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), server->vals->exclusive_lock_type, 0, - NULL, CIFS_WRITE_OP)) - rc = netfs_buffered_write_iter_locked(iocb, from, NULL); - else + NULL, CIFS_WRITE_OP))) { rc = -EACCES; + goto out; + } + + rc = netfs_buffered_write_iter_locked(iocb, from, NULL); + out: up_read(&cinode->lock_sem); netfs_end_io_write(inode); -- cgit From 5b4f3af39b6588e8de4444d8e1ccf759b40f9414 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 8 Aug 2024 16:04:04 -0600 Subject: smb: smb2pdu.h: Use static_assert() to check struct sizes Commit 9f9bef9bc5c6 ("smb: smb2pdu.h: Avoid -Wflex-array-member-not-at-end warnings") introduced tagged `struct create_context_hdr`. We want to ensure that when new members need to be added to the flexible structure, they are always included within this tagged struct. So, we use `static_assert()` to ensure that the memory layout for both the flexible structure and the tagged struct is the same after any changes. Acked-by: Namjae Jeon Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steve French --- fs/smb/common/smb2pdu.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index c3ee42188d25..c769f9dbc0b4 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1216,6 +1216,8 @@ struct create_context { ); __u8 Buffer[]; } __packed; +static_assert(offsetof(struct create_context, Buffer) == sizeof(struct create_context_hdr), + "struct member likely outside of __struct_group()"); struct smb2_create_req { struct smb2_hdr hdr; -- cgit From 9b340aeb26d50e9a9ec99599e2a39b035fac978e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 16 Aug 2024 06:19:23 +1000 Subject: nouveau/firmware: use dma non-coherent allocator Currently, enabling SG_DEBUG in the kernel will cause nouveau to hit a BUG() on startup, when the iommu is enabled: kernel BUG at include/linux/scatterlist.h:187! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 7 PID: 930 Comm: (udev-worker) Not tainted 6.9.0-rc3Lyude-Test+ #30 Hardware name: MSI MS-7A39/A320M GAMING PRO (MS-7A39), BIOS 1.I0 01/22/2019 RIP: 0010:sg_init_one+0x85/0xa0 Code: 69 88 32 01 83 e1 03 f6 c3 03 75 20 a8 01 75 1e 48 09 cb 41 89 54 24 08 49 89 1c 24 41 89 6c 24 0c 5b 5d 41 5c e9 7b b9 88 00 <0f> 0b 0f 0b 0f 0b 48 8b 05 5e 46 9a 01 eb b2 66 66 2e 0f 1f 84 00 RSP: 0018:ffffa776017bf6a0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffffa77600d87000 RCX: 000000000000002b RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffffa77680d87000 RBP: 000000000000e000 R08: 0000000000000000 R09: 0000000000000000 R10: ffff98f4c46aa508 R11: 0000000000000000 R12: ffff98f4c46aa508 R13: ffff98f4c46aa008 R14: ffffa77600d4a000 R15: ffffa77600d4a018 FS: 00007feeb5aae980(0000) GS:ffff98f5c4dc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f22cb9a4520 CR3: 00000001043ba000 CR4: 00000000003506f0 Call Trace: ? die+0x36/0x90 ? do_trap+0xdd/0x100 ? sg_init_one+0x85/0xa0 ? do_error_trap+0x65/0x80 ? sg_init_one+0x85/0xa0 ? exc_invalid_op+0x50/0x70 ? sg_init_one+0x85/0xa0 ? asm_exc_invalid_op+0x1a/0x20 ? sg_init_one+0x85/0xa0 nvkm_firmware_ctor+0x14a/0x250 [nouveau] nvkm_falcon_fw_ctor+0x42/0x70 [nouveau] ga102_gsp_booter_ctor+0xb4/0x1a0 [nouveau] r535_gsp_oneinit+0xb3/0x15f0 [nouveau] ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? nvkm_udevice_new+0x95/0x140 [nouveau] ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? ktime_get+0x47/0xb0 Fix this by using the non-coherent allocator instead, I think there might be a better answer to this, but it involve ripping up some of APIs using sg lists. Cc: stable@vger.kernel.org Fixes: 2541626cfb79 ("drm/nouveau/acr: use common falcon HS FW code for ACR FWs") Signed-off-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240815201923.632803-1-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/core/firmware.c | 9 ++++++--- drivers/gpu/drm/nouveau/nvkm/falcon/fw.c | 6 ++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c index adc60b25f8e6..0af01a0ec601 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c @@ -205,7 +205,8 @@ nvkm_firmware_dtor(struct nvkm_firmware *fw) break; case NVKM_FIRMWARE_IMG_DMA: nvkm_memory_unref(&memory); - dma_free_coherent(fw->device->dev, sg_dma_len(&fw->mem.sgl), fw->img, fw->phys); + dma_free_noncoherent(fw->device->dev, sg_dma_len(&fw->mem.sgl), + fw->img, fw->phys, DMA_TO_DEVICE); break; case NVKM_FIRMWARE_IMG_SGT: nvkm_memory_unref(&memory); @@ -236,10 +237,12 @@ nvkm_firmware_ctor(const struct nvkm_firmware_func *func, const char *name, break; case NVKM_FIRMWARE_IMG_DMA: { dma_addr_t addr; - len = ALIGN(fw->len, PAGE_SIZE); - fw->img = dma_alloc_coherent(fw->device->dev, len, &addr, GFP_KERNEL); + fw->img = dma_alloc_noncoherent(fw->device->dev, + len, &addr, + DMA_TO_DEVICE, + GFP_KERNEL); if (fw->img) { memcpy(fw->img, src, fw->len); fw->phys = addr; diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c index 80a480b12174..a1c8545f1249 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c @@ -89,6 +89,12 @@ nvkm_falcon_fw_boot(struct nvkm_falcon_fw *fw, struct nvkm_subdev *user, nvkm_falcon_fw_dtor_sigs(fw); } + /* after last write to the img, sync dma mappings */ + dma_sync_single_for_device(fw->fw.device->dev, + fw->fw.phys, + sg_dma_len(&fw->fw.mem.sgl), + DMA_TO_DEVICE); + FLCNFW_DBG(fw, "resetting"); fw->func->reset(fw); -- cgit From b96ed2c97c791954abc881ef384e773010945aec Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Aug 2024 14:25:00 +0200 Subject: virtio_net: move netdev_tx_reset_queue() call before RX napi enable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During suspend/resume the following BUG was hit: ------------[ cut here ]------------ kernel BUG at lib/dynamic_queue_limits.c:99! Internal error: Oops - BUG: 0 [#1] SMP ARM Modules linked in: bluetooth ecdh_generic ecc libaes CPU: 1 PID: 1282 Comm: rtcwake Not tainted 6.10.0-rc3-00732-gc8bd1f7f3e61 #15240 Hardware name: Generic DT based system PC is at dql_completed+0x270/0x2cc LR is at __free_old_xmit+0x120/0x198 pc : []    lr : []    psr: 80000013 ... Flags: Nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none Control: 10c5387d  Table: 43a4406a  DAC: 00000051 ... Process rtcwake (pid: 1282, stack limit = 0xfbc21278) Stack: (0xe0805e80 to 0xe0806000) ... Call trace:  dql_completed from __free_old_xmit+0x120/0x198  __free_old_xmit from free_old_xmit+0x44/0xe4  free_old_xmit from virtnet_poll_tx+0x88/0x1b4  virtnet_poll_tx from __napi_poll+0x2c/0x1d4  __napi_poll from net_rx_action+0x140/0x2b4  net_rx_action from handle_softirqs+0x11c/0x350  handle_softirqs from call_with_stack+0x18/0x20  call_with_stack from do_softirq+0x48/0x50  do_softirq from __local_bh_enable_ip+0xa0/0xa4  __local_bh_enable_ip from virtnet_open+0xd4/0x21c  virtnet_open from virtnet_restore+0x94/0x120  virtnet_restore from virtio_device_restore+0x110/0x1f4  virtio_device_restore from dpm_run_callback+0x3c/0x100  dpm_run_callback from device_resume+0x12c/0x2a8  device_resume from dpm_resume+0x12c/0x1e0  dpm_resume from dpm_resume_end+0xc/0x18  dpm_resume_end from suspend_devices_and_enter+0x1f0/0x72c  suspend_devices_and_enter from pm_suspend+0x270/0x2a0  pm_suspend from state_store+0x68/0xc8  state_store from kernfs_fop_write_iter+0x10c/0x1cc  kernfs_fop_write_iter from vfs_write+0x2b0/0x3dc  vfs_write from ksys_write+0x5c/0xd4  ksys_write from ret_fast_syscall+0x0/0x54 Exception stack(0xe8bf1fa8 to 0xe8bf1ff0) ... ---[ end trace 0000000000000000 ]--- After virtnet_napi_enable() is called, the following path is hit: __napi_poll() -> virtnet_poll() -> virtnet_poll_cleantx() -> netif_tx_wake_queue() That wakes the TX queue and allows skbs to be submitted and accounted by BQL counters. Then netdev_tx_reset_queue() is called that resets BQL counters and eventually leads to the BUG in dql_completed(). Move virtnet_napi_tx_enable() what does BQL counters reset before RX napi enable to avoid the issue. Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/netdev/e632e378-d019-4de7-8f13-07c572ab37a9@samsung.com/ Fixes: c8bd1f7f3e61 ("virtio_net: add support for Byte Queue Limits") Tested-by: Marek Szyprowski Signed-off-by: Jiri Pirko Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Link: https://patch.msgid.link/20240814122500.1710279-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 3f10c72743e9..c6af18948092 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2867,8 +2867,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) if (err < 0) goto err_xdp_reg_mem_model; - virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index)); + virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); return 0; -- cgit From c948c0973df5db9314459da621342e1170bd9e8e Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Wed, 14 Aug 2024 15:54:29 -0700 Subject: bnxt_en: Don't clear ntuple filters and rss contexts during ethtool ops The driver currently blindly deletes its cache of RSS cotexts and ntuple filters when the ethtool channel count is changing. It also deletes the ntuple filters cache when the default indirection table is changing. The core will not allow ethtool channels to drop below any that have been configured as ntuple destinations since this commit from 2022: 47f3ecf4763d ("ethtool: Fail number of channels change when it conflicts with rxnfc") So there is absolutely no need to delete the ntuple filters and RSS contexts when changing ethtool channels. It is also unnecessary to delete ntuple filters when the default RSS indirection table is changing. Remove bnxt_clear_usr_fltrs() and bnxt_clear_rss_ctxis() from the ethtool ops and change them to static functions. This bug will cause confusion to the end user and causes failure when running the rss_ctx.py selftest. Fixes: 1018319f949c ("bnxt_en: Invalidate user filters when needed") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20240725111912.7bc17cf6@kernel.org/ Reviewed-by: Andy Gospodarek Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20240814225429.199280-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 -- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 ---- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e27e1082ee33..04a623b3eee2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5056,7 +5056,7 @@ void bnxt_del_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr) list_del_init(&fltr->list); } -void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all) +static void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all) { struct bnxt_filter_base *usr_fltr, *tmp; @@ -10248,7 +10248,7 @@ static void bnxt_hwrm_realloc_rss_ctx_vnic(struct bnxt *bp) } } -void bnxt_clear_rss_ctxs(struct bnxt *bp) +static void bnxt_clear_rss_ctxs(struct bnxt *bp) { struct ethtool_rxfh_context *ctx; unsigned long context; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 6bbdc718c3a7..059a6f81c1a8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2790,7 +2790,6 @@ void bnxt_set_ring_params(struct bnxt *); int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode); void bnxt_insert_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr); void bnxt_del_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr); -void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all); int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size, bool async_only); int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp); @@ -2842,7 +2841,6 @@ int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); int __bnxt_setup_vnic_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, bool all); -void bnxt_clear_rss_ctxs(struct bnxt *bp); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); void bnxt_half_close_nic(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 9dadc89378f0..4cf9bf8b01b0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -968,9 +968,6 @@ static int bnxt_set_channels(struct net_device *dev, return -EINVAL; } - bnxt_clear_usr_fltrs(bp, true); - if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) - bnxt_clear_rss_ctxs(bp); if (netif_running(dev)) { if (BNXT_PF(bp)) { /* TODO CHIMP_FW: Send message to all VF's @@ -2000,7 +1997,6 @@ static int bnxt_set_rxfh(struct net_device *dev, bnxt_modify_rss(bp, NULL, NULL, rxfh); - bnxt_clear_usr_fltrs(bp, false); if (netif_running(bp->dev)) { bnxt_close_nic(bp, false, false); rc = bnxt_open_nic(bp, false, false); -- cgit From b153b3c747003e1ce312ba205e552db4bd9e8df7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 14 Aug 2024 07:28:32 -0700 Subject: MAINTAINERS: add selftests to network drivers tools/testing/selftests/drivers/net/ is not listed under networking entries. Add it to NETWORKING DRIVERS. Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240814142832.3473685-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f328373463b0..a964a34651f5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15886,6 +15886,7 @@ F: include/linux/netdevice.h F: include/uapi/linux/cn_proc.h F: include/uapi/linux/if_* F: include/uapi/linux/netdevice.h +F: tools/testing/selftests/drivers/net/ X: drivers/net/wireless/ NETWORKING DRIVERS (WIRELESS) -- cgit From e46bc2e7eb90a370bc27fa2fd98cb8251e7da1ec Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Wed, 7 Aug 2024 18:33:35 +0100 Subject: mseal: fix is_madv_discard() is_madv_discard did its check wrong. MADV_ flags are not bitwise, they're normal sequential numbers. So, for instance: behavior & (/* ... */ | MADV_REMOVE) tagged both MADV_REMOVE and MADV_RANDOM (bit 0 set) as discard operations. As a result the kernel could erroneously block certain madvises (e.g MADV_RANDOM or MADV_HUGEPAGE) on sealed VMAs due to them sharing bits with blocked MADV operations (e.g REMOVE or WIPEONFORK). This is obviously incorrect, so use a switch statement instead. Link: https://lkml.kernel.org/r/20240807173336.2523757-1-pedro.falcato@gmail.com Link: https://lkml.kernel.org/r/20240807173336.2523757-2-pedro.falcato@gmail.com Fixes: 8be7258aad44 ("mseal: add mseal syscall") Signed-off-by: Pedro Falcato Tested-by: Jeff Xu Reviewed-by: Jeff Xu Cc: Kees Cook Cc: Liam R. Howlett Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- mm/mseal.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/mm/mseal.c b/mm/mseal.c index bf783bba8ed0..15bba28acc00 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -40,9 +40,17 @@ static bool can_modify_vma(struct vm_area_struct *vma) static bool is_madv_discard(int behavior) { - return behavior & - (MADV_FREE | MADV_DONTNEED | MADV_DONTNEED_LOCKED | - MADV_REMOVE | MADV_DONTFORK | MADV_WIPEONFORK); + switch (behavior) { + case MADV_FREE: + case MADV_DONTNEED: + case MADV_DONTNEED_LOCKED: + case MADV_REMOVE: + case MADV_DONTFORK: + case MADV_WIPEONFORK: + return true; + } + + return false; } static bool is_ro_anon(struct vm_area_struct *vma) -- cgit From 5f75cfbd6bb02295ddaed48adf667b6c828ce07b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 1 Aug 2024 22:47:48 +0200 Subject: mm/hugetlb: fix hugetlb vs. core-mm PT locking We recently made GUP's common page table walking code to also walk hugetlb VMAs without most hugetlb special-casing, preparing for the future of having less hugetlb-specific page table walking code in the codebase. Turns out that we missed one page table locking detail: page table locking for hugetlb folios that are not mapped using a single PMD/PUD. Assume we have hugetlb folio that spans multiple PTEs (e.g., 64 KiB hugetlb folios on arm64 with 4 KiB base page size). GUP, as it walks the page tables, will perform a pte_offset_map_lock() to grab the PTE table lock. However, hugetlb that concurrently modifies these page tables would actually grab the mm->page_table_lock: with USE_SPLIT_PTE_PTLOCKS, the locks would differ. Something similar can happen right now with hugetlb folios that span multiple PMDs when USE_SPLIT_PMD_PTLOCKS. This issue can be reproduced [1], for example triggering: [ 3105.936100] ------------[ cut here ]------------ [ 3105.939323] WARNING: CPU: 31 PID: 2732 at mm/gup.c:142 try_grab_folio+0x11c/0x188 [ 3105.944634] Modules linked in: [...] [ 3105.974841] CPU: 31 PID: 2732 Comm: reproducer Not tainted 6.10.0-64.eln141.aarch64 #1 [ 3105.980406] Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-4.fc40 05/24/2024 [ 3105.986185] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 3105.991108] pc : try_grab_folio+0x11c/0x188 [ 3105.994013] lr : follow_page_pte+0xd8/0x430 [ 3105.996986] sp : ffff80008eafb8f0 [ 3105.999346] x29: ffff80008eafb900 x28: ffffffe8d481f380 x27: 00f80001207cff43 [ 3106.004414] x26: 0000000000000001 x25: 0000000000000000 x24: ffff80008eafba48 [ 3106.009520] x23: 0000ffff9372f000 x22: ffff7a54459e2000 x21: ffff7a546c1aa978 [ 3106.014529] x20: ffffffe8d481f3c0 x19: 0000000000610041 x18: 0000000000000001 [ 3106.019506] x17: 0000000000000001 x16: ffffffffffffffff x15: 0000000000000000 [ 3106.024494] x14: ffffb85477fdfe08 x13: 0000ffff9372ffff x12: 0000000000000000 [ 3106.029469] x11: 1fffef4a88a96be1 x10: ffff7a54454b5f0c x9 : ffffb854771b12f0 [ 3106.034324] x8 : 0008000000000000 x7 : ffff7a546c1aa980 x6 : 0008000000000080 [ 3106.038902] x5 : 00000000001207cf x4 : 0000ffff9372f000 x3 : ffffffe8d481f000 [ 3106.043420] x2 : 0000000000610041 x1 : 0000000000000001 x0 : 0000000000000000 [ 3106.047957] Call trace: [ 3106.049522] try_grab_folio+0x11c/0x188 [ 3106.051996] follow_pmd_mask.constprop.0.isra.0+0x150/0x2e0 [ 3106.055527] follow_page_mask+0x1a0/0x2b8 [ 3106.058118] __get_user_pages+0xf0/0x348 [ 3106.060647] faultin_page_range+0xb0/0x360 [ 3106.063651] do_madvise+0x340/0x598 Let's make huge_pte_lockptr() effectively use the same PT locks as any core-mm page table walker would. Add ptep_lockptr() to obtain the PTE page table lock using a pte pointer -- unfortunately we cannot convert pte_lockptr() because virt_to_page() doesn't work with kmap'ed page tables we can have with CONFIG_HIGHPTE. Handle CONFIG_PGTABLE_LEVELS correctly by checking in reverse order, such that when e.g., CONFIG_PGTABLE_LEVELS==2 with PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE will work as expected. Document why that works. There is one ugly case: powerpc 8xx, whereby we have an 8 MiB hugetlb folio being mapped using two PTE page tables. While hugetlb wants to take the PMD table lock, core-mm would grab the PTE table lock of one of both PTE page tables. In such corner cases, we have to make sure that both locks match, which is (fortunately!) currently guaranteed for 8xx as it does not support SMP and consequently doesn't use split PT locks. [1] https://lore.kernel.org/all/1bbfcc7f-f222-45a5-ac44-c5a1381c596d@redhat.com/ Link: https://lkml.kernel.org/r/20240801204748.99107-1-david@redhat.com Fixes: 9cb28da54643 ("mm/gup: handle hugetlb in the generic follow_page_mask code") Signed-off-by: David Hildenbrand Acked-by: Peter Xu Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Peter Xu Cc: Oscar Salvador Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 33 ++++++++++++++++++++++++++++++--- include/linux/mm.h | 11 +++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c9bf68c239a0..45bf05ad5c53 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -944,10 +944,37 @@ static inline bool htlb_allow_alloc_fallback(int reason) static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { - if (huge_page_size(h) == PMD_SIZE) + const unsigned long size = huge_page_size(h); + + VM_WARN_ON(size == PAGE_SIZE); + + /* + * hugetlb must use the exact same PT locks as core-mm page table + * walkers would. When modifying a PTE table, hugetlb must take the + * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD + * PT lock etc. + * + * The expectation is that any hugetlb folio smaller than a PMD is + * always mapped into a single PTE table and that any hugetlb folio + * smaller than a PUD (but at least as big as a PMD) is always mapped + * into a single PMD table. + * + * If that does not hold for an architecture, then that architecture + * must disable split PT locks such that all *_lockptr() functions + * will give us the same result: the per-MM PT lock. + * + * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where + * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr() + * and core-mm would use pmd_lockptr(). However, in such configurations + * split PMD locks are disabled -- they don't make sense on a single + * PGDIR page table -- and the end result is the same. + */ + if (size >= PUD_SIZE) + return pud_lockptr(mm, (pud_t *) pte); + else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE)) return pmd_lockptr(mm, (pmd_t *) pte); - VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); - return &mm->page_table_lock; + /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */ + return ptep_lockptr(mm, pte); } #ifndef hugepages_supported diff --git a/include/linux/mm.h b/include/linux/mm.h index c4b238a20b76..6549d0979b28 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2920,6 +2920,13 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); } +static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) +{ + BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE)); + BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE); + return ptlock_ptr(virt_to_ptdesc(pte)); +} + static inline bool ptlock_init(struct ptdesc *ptdesc) { /* @@ -2944,6 +2951,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { return &mm->page_table_lock; } +static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) +{ + return &mm->page_table_lock; +} static inline void ptlock_cache_init(void) {} static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} -- cgit From ace0741a55e453c265cbf3d965eea7f687cd6d45 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 8 Aug 2024 21:34:34 +0000 Subject: mm: don't account memmap on failure Patch series "Fixes for memmap accounting", v4. Memmap accounting provides us with observability of how much memory is used for per-page metadata: i.e. "struct page"'s and "struct page_ext". It also provides with information of how much was allocated using boot allocator (i.e. not part of MemTotal), and how much was allocated using buddy allocated (i.e. part of MemTotal). This small series fixes a few problems that were discovered with the original patch. This patch (of 3): When we fail to allocate the mmemmap in alloc_vmemmap_page_list(), do not account any already-allocated pages: we're going to free all them before we return from the function. Link: https://lkml.kernel.org/r/20240809191020.1142142-1-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240808213437.682006-1-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240808213437.682006-2-pasha.tatashin@soleen.com Fixes: 15995a352474 ("mm: report per-page metadata information") Signed-off-by: Pasha Tatashin Reviewed-by: Fan Ni Reviewed-by: Yosry Ahmed Acked-by: David Hildenbrand Tested-by: Alison Schofield Reviewed-by: Muchun Song Acked-by: David Rientjes Cc: Dan Williams Cc: Domenico Cerasuolo Cc: Joel Granados Cc: Johannes Weiner Cc: Li Zhijian Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport Cc: Nhat Pham Cc: Sourav Panda Cc: Vlastimil Babka Cc: Yi Zhang Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 829112b0a914..4f51e0596197 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -392,13 +392,10 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, for (i = 0; i < nr_pages; i++) { page = alloc_pages_node(nid, gfp_mask, 0); - if (!page) { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, i); + if (!page) goto out; - } list_add(&page->lru, list); } - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, nr_pages); return 0; -- cgit From f4cb78af91e3b2b7aa76dbf8213b898fa8811b12 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 8 Aug 2024 21:34:35 +0000 Subject: mm: add system wide stats items category /proc/vmstat contains events and stats, events can only grow, but stats can grow and shrink. vmstat has the following: ------------------------- NR_VM_ZONE_STAT_ITEMS: per-zone stats NR_VM_NUMA_EVENT_ITEMS: per-numa events NR_VM_NODE_STAT_ITEMS: per-numa stats NR_VM_WRITEBACK_STAT_ITEMS: system-wide background-writeback and dirty-throttling tresholds. NR_VM_EVENT_ITEMS: system-wide events ------------------------- Rename NR_VM_WRITEBACK_STAT_ITEMS to NR_VM_STAT_ITEMS, to track the system-wide stats, we are going to add per-page metadata stats to this category in the next patch. Also delete unused writeback_stat_name(). Link: https://lkml.kernel.org/r/20240809191020.1142142-2-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240808213437.682006-3-pasha.tatashin@soleen.com Fixes: 15995a352474 ("mm: report per-page metadata information") Signed-off-by: Pasha Tatashin Suggested-by: Yosry Ahmed Tested-by: Alison Schofield Acked-by: David Hildenbrand Acked-by: David Rientjes Cc: Dan Williams Cc: Domenico Cerasuolo Cc: Joel Granados Cc: Johannes Weiner Cc: Li Zhijian Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport Cc: Muchun Song Cc: Nhat Pham Cc: Sourav Panda Cc: Vlastimil Babka Cc: Yi Zhang Cc: Fan Ni Signed-off-by: Andrew Morton --- include/linux/vmstat.h | 15 ++++----------- mm/vmstat.c | 6 +++--- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 23cd17942036..9ab4fa5e09b5 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -34,10 +34,11 @@ struct reclaim_stat { unsigned nr_lazyfree_fail; }; -enum writeback_stat_item { +/* Stat data for system wide items */ +enum vm_stat_item { NR_DIRTY_THRESHOLD, NR_DIRTY_BG_THRESHOLD, - NR_VM_WRITEBACK_STAT_ITEMS, + NR_VM_STAT_ITEMS, }; #ifdef CONFIG_VM_EVENT_COUNTERS @@ -514,21 +515,13 @@ static inline const char *lru_list_name(enum lru_list lru) return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" } -static inline const char *writeback_stat_name(enum writeback_stat_item item) -{ - return vmstat_text[NR_VM_ZONE_STAT_ITEMS + - NR_VM_NUMA_EVENT_ITEMS + - NR_VM_NODE_STAT_ITEMS + - item]; -} - #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) static inline const char *vm_event_name(enum vm_event_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS + NR_VM_NODE_STAT_ITEMS + - NR_VM_WRITEBACK_STAT_ITEMS + + NR_VM_STAT_ITEMS + item]; } #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ diff --git a/mm/vmstat.c b/mm/vmstat.c index 04a1cb6cc636..6f8aa4766f16 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1257,7 +1257,7 @@ const char * const vmstat_text[] = { "pgdemote_khugepaged", "nr_memmap", "nr_memmap_boot", - /* enum writeback_stat_item counters */ + /* system-wide enum vm_stat_item counters */ "nr_dirty_threshold", "nr_dirty_background_threshold", @@ -1790,7 +1790,7 @@ static const struct seq_operations zoneinfo_op = { #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ NR_VM_NUMA_EVENT_ITEMS + \ NR_VM_NODE_STAT_ITEMS + \ - NR_VM_WRITEBACK_STAT_ITEMS + \ + NR_VM_STAT_ITEMS + \ (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ NR_VM_EVENT_ITEMS : 0)) @@ -1827,7 +1827,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, v + NR_DIRTY_THRESHOLD); - v += NR_VM_WRITEBACK_STAT_ITEMS; + v += NR_VM_STAT_ITEMS; #ifdef CONFIG_VM_EVENT_COUNTERS all_vm_events(v); -- cgit From 9d85731110241fb8ca9445ea4177d816041a8825 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 8 Aug 2024 21:34:36 +0000 Subject: mm: don't account memmap per-node Fix invalid access to pgdat during hot-remove operation: ndctl users reported a GPF when trying to destroy a namespace: $ ndctl destroy-namespace all -r all -f Segmentation fault dmesg: Oops: general protection fault, probably for non-canonical address 0xdffffc0000005650: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: probably user-memory-access in range [0x000000000002b280-0x000000000002b287] CPU: 26 UID: 0 PID: 1868 Comm: ndctl Not tainted 6.11.0-rc1 #1 Hardware name: Dell Inc. PowerEdge R640/08HT8T, BIOS 2.20.1 09/13/2023 RIP: 0010:mod_node_page_state+0x2a/0x110 cxl-test users report a GPF when trying to unload the test module: $ modrpobe -r cxl-test dmesg BUG: unable to handle page fault for address: 0000000000004200 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] PREEMPT SMP PTI CPU: 0 UID: 0 PID: 1076 Comm: modprobe Tainted: G O N 6.11.0-rc1 #197 Tainted: [O]=OOT_MODULE, [N]=TEST Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/15 RIP: 0010:mod_node_page_state+0x6/0x90 Currently, when memory is hot-plugged or hot-removed the accounting is done based on the assumption that memmap is allocated from the same node as the hot-plugged/hot-removed memory, which is not always the case. In addition, there are challenges with keeping the node id of the memory that is being remove to the time when memmap accounting is actually performed: since this is done after remove_pfn_range_from_zone(), and also after remove_memory_block_devices(). Meaning that we cannot use pgdat nor walking though memblocks to get the nid. Given all of that, account the memmap overhead system wide instead. For this we are going to be using global atomic counters, but given that memmap size is rarely modified, and normally is only modified either during early boot when there is only one CPU, or under a hotplug global mutex lock, therefore there is no need for per-cpu optimizations. Also, while we are here rename nr_memmap to nr_memmap_pages, and nr_memmap_boot to nr_memmap_boot_pages to be self explanatory that the units are in page count. [pasha.tatashin@soleen.com: address a few nits from David Hildenbrand] Link: https://lkml.kernel.org/r/20240809191020.1142142-4-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240809191020.1142142-4-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20240808213437.682006-4-pasha.tatashin@soleen.com Fixes: 15995a352474 ("mm: report per-page metadata information") Signed-off-by: Pasha Tatashin Reported-by: Yi Zhang Closes: https://lore.kernel.org/linux-cxl/CAHj4cs9Ax1=CoJkgBGP_+sNu6-6=6v=_L-ZBZY0bVLD3wUWZQg@mail.gmail.com Reported-by: Alison Schofield Closes: https://lore.kernel.org/linux-mm/Zq0tPd2h6alFz8XF@aschofie-mobl2/#t Tested-by: Dan Williams Tested-by: Alison Schofield Acked-by: David Hildenbrand Acked-by: David Rientjes Tested-by: Yi Zhang Cc: Domenico Cerasuolo Cc: Fan Ni Cc: Joel Granados Cc: Johannes Weiner Cc: Li Zhijian Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport Cc: Muchun Song Cc: Nhat Pham Cc: Sourav Panda Cc: Vlastimil Babka Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 2 -- include/linux/vmstat.h | 7 ++++--- mm/hugetlb_vmemmap.c | 8 ++++---- mm/mm_init.c | 3 +-- mm/page_alloc.c | 1 - mm/page_ext.c | 18 ++++-------------- mm/sparse-vmemmap.c | 11 ++++------- mm/sparse.c | 5 ++--- mm/vmstat.c | 46 ++++++++++++++++++++++------------------------ 9 files changed, 41 insertions(+), 60 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 41458892bc8a..1dc6248feb83 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -220,8 +220,6 @@ enum node_stat_item { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, - NR_MEMMAP, /* page metadata allocated through buddy allocator */ - NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */ NR_VM_NODE_STAT_ITEMS }; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 9ab4fa5e09b5..9eb77c9007e6 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -38,6 +38,8 @@ struct reclaim_stat { enum vm_stat_item { NR_DIRTY_THRESHOLD, NR_DIRTY_BG_THRESHOLD, + NR_MEMMAP_PAGES, /* page metadata allocated through buddy allocator */ + NR_MEMMAP_BOOT_PAGES, /* page metadata allocated through boot allocator */ NR_VM_STAT_ITEMS, }; @@ -618,7 +620,6 @@ static inline void lruvec_stat_sub_folio(struct folio *folio, lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } -void __meminit mod_node_early_perpage_metadata(int nid, long delta); -void __meminit store_early_perpage_metadata(void); - +void memmap_boot_pages_add(long delta); +void memmap_pages_add(long delta); #endif /* _LINUX_VMSTAT_H */ diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 4f51e0596197..0c3f56b3578e 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -185,11 +185,11 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, static inline void free_vmemmap_page(struct page *page) { if (PageReserved(page)) { + memmap_boot_pages_add(-1); free_bootmem_page(page); - mod_node_page_state(page_pgdat(page), NR_MEMMAP_BOOT, -1); } else { + memmap_pages_add(-1); __free_page(page); - mod_node_page_state(page_pgdat(page), NR_MEMMAP, -1); } } @@ -341,7 +341,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, copy_page(page_to_virt(walk.reuse_page), (void *)walk.reuse_addr); list_add(&walk.reuse_page->lru, vmemmap_pages); - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, 1); + memmap_pages_add(1); } /* @@ -396,7 +396,7 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, goto out; list_add(&page->lru, list); } - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, nr_pages); + memmap_pages_add(nr_pages); return 0; out: diff --git a/mm/mm_init.c b/mm/mm_init.c index 75c3bd42799b..f9a60ffc5532 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1623,8 +1623,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) panic("Failed to allocate %ld bytes for node %d memory map\n", size, pgdat->node_id); pgdat->node_mem_map = map + offset; - mod_node_early_perpage_metadata(pgdat->node_id, - DIV_ROUND_UP(size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", __func__, pgdat->node_id, (unsigned long)pgdat, (unsigned long)pgdat->node_mem_map); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 28f80daf5c04..875d76e8684a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5755,7 +5755,6 @@ void __init setup_per_cpu_pageset(void) for_each_online_pgdat(pgdat) pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat); - store_early_perpage_metadata(); } __meminit void zone_pcp_init(struct zone *zone) diff --git a/mm/page_ext.c b/mm/page_ext.c index c191e490c401..641d93f6af4c 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -214,8 +214,7 @@ static int __init alloc_node_page_ext(int nid) return -ENOMEM; NODE_DATA(nid)->node_page_ext = base; total_usage += table_size; - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, - DIV_ROUND_UP(table_size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(table_size, PAGE_SIZE)); return 0; } @@ -275,10 +274,8 @@ static void *__meminit alloc_page_ext(size_t size, int nid) else addr = vzalloc_node(size, nid); - if (addr) { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, - DIV_ROUND_UP(size, PAGE_SIZE)); - } + if (addr) + memmap_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); return addr; } @@ -323,25 +320,18 @@ static void free_page_ext(void *addr) { size_t table_size; struct page *page; - struct pglist_data *pgdat; table_size = page_ext_size * PAGES_PER_SECTION; + memmap_pages_add(-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); if (is_vmalloc_addr(addr)) { - page = vmalloc_to_page(addr); - pgdat = page_pgdat(page); vfree(addr); } else { page = virt_to_page(addr); - pgdat = page_pgdat(page); BUG_ON(PageReserved(page)); kmemleak_free(addr); free_pages_exact(addr, table_size); } - - mod_node_page_state(pgdat, NR_MEMMAP, - -1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); - } static void __free_page_ext(unsigned long pfn) diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 1dda6c53370b..edcc7a6b0f6f 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -469,13 +469,10 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn, if (r < 0) return NULL; - if (system_state == SYSTEM_BOOTING) { - mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(end - start, - PAGE_SIZE)); - } else { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, - DIV_ROUND_UP(end - start, PAGE_SIZE)); - } + if (system_state == SYSTEM_BOOTING) + memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); + else + memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); return pfn_to_page(pfn); } diff --git a/mm/sparse.c b/mm/sparse.c index e4b830091d13..0f018c6f9ec5 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -463,7 +463,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid) sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true); sparsemap_buf_end = sparsemap_buf + size; #ifndef CONFIG_SPARSEMEM_VMEMMAP - mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); #endif } @@ -643,8 +643,7 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, unsigned long start = (unsigned long) pfn_to_page(pfn); unsigned long end = start + nr_pages * sizeof(struct page); - mod_node_page_state(page_pgdat(pfn_to_page(pfn)), NR_MEMMAP, - -1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); + memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); vmemmap_free(start, end, altmap); } static void free_map_bootmem(struct page *memmap) diff --git a/mm/vmstat.c b/mm/vmstat.c index 6f8aa4766f16..e875f2a4915f 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1033,6 +1033,24 @@ unsigned long node_page_state(struct pglist_data *pgdat, } #endif +/* + * Count number of pages "struct page" and "struct page_ext" consume. + * nr_memmap_boot_pages: # of pages allocated by boot allocator + * nr_memmap_pages: # of pages that were allocated by buddy allocator + */ +static atomic_long_t nr_memmap_boot_pages = ATOMIC_LONG_INIT(0); +static atomic_long_t nr_memmap_pages = ATOMIC_LONG_INIT(0); + +void memmap_boot_pages_add(long delta) +{ + atomic_long_add(delta, &nr_memmap_boot_pages); +} + +void memmap_pages_add(long delta) +{ + atomic_long_add(delta, &nr_memmap_pages); +} + #ifdef CONFIG_COMPACTION struct contig_page_info { @@ -1255,11 +1273,11 @@ const char * const vmstat_text[] = { "pgdemote_kswapd", "pgdemote_direct", "pgdemote_khugepaged", - "nr_memmap", - "nr_memmap_boot", /* system-wide enum vm_stat_item counters */ "nr_dirty_threshold", "nr_dirty_background_threshold", + "nr_memmap_pages", + "nr_memmap_boot_pages", #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) /* enum vm_event_item counters */ @@ -1827,6 +1845,8 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, v + NR_DIRTY_THRESHOLD); + v[NR_MEMMAP_PAGES] = atomic_long_read(&nr_memmap_pages); + v[NR_MEMMAP_BOOT_PAGES] = atomic_long_read(&nr_memmap_boot_pages); v += NR_VM_STAT_ITEMS; #ifdef CONFIG_VM_EVENT_COUNTERS @@ -2285,25 +2305,3 @@ static int __init extfrag_debug_init(void) module_init(extfrag_debug_init); #endif - -/* - * Page metadata size (struct page and page_ext) in pages - */ -static unsigned long early_perpage_metadata[MAX_NUMNODES] __meminitdata; - -void __meminit mod_node_early_perpage_metadata(int nid, long delta) -{ - early_perpage_metadata[nid] += delta; -} - -void __meminit store_early_perpage_metadata(void) -{ - int nid; - struct pglist_data *pgdat; - - for_each_online_pgdat(pgdat) { - nid = pgdat->node_id; - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, - early_perpage_metadata[nid]); - } -} -- cgit From d75abd0d0bc29e6ebfebbf76d11b4067b35844af Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 6 Aug 2024 12:41:07 -0400 Subject: mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu The memory_failure_cpu structure is a per-cpu structure. Access to its content requires the use of get_cpu_var() to lock in the current CPU and disable preemption. The use of a regular spinlock_t for locking purpose is fine for a non-RT kernel. Since the integration of RT spinlock support into the v5.15 kernel, a spinlock_t in a RT kernel becomes a sleeping lock and taking a sleeping lock in a preemption disabled context is illegal resulting in the following kind of warning. [12135.732244] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 [12135.732248] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 270076, name: kworker/0:0 [12135.732252] preempt_count: 1, expected: 0 [12135.732255] RCU nest depth: 2, expected: 2 : [12135.732420] Hardware name: Dell Inc. PowerEdge R640/0HG0J8, BIOS 2.10.2 02/24/2021 [12135.732423] Workqueue: kacpi_notify acpi_os_execute_deferred [12135.732433] Call Trace: [12135.732436] [12135.732450] dump_stack_lvl+0x57/0x81 [12135.732461] __might_resched.cold+0xf4/0x12f [12135.732479] rt_spin_lock+0x4c/0x100 [12135.732491] memory_failure_queue+0x40/0xe0 [12135.732503] ghes_do_memory_failure+0x53/0x390 [12135.732516] ghes_do_proc.constprop.0+0x229/0x3e0 [12135.732575] ghes_proc+0xf9/0x1a0 [12135.732591] ghes_notify_hed+0x6a/0x150 [12135.732602] notifier_call_chain+0x43/0xb0 [12135.732626] blocking_notifier_call_chain+0x43/0x60 [12135.732637] acpi_ev_notify_dispatch+0x47/0x70 [12135.732648] acpi_os_execute_deferred+0x13/0x20 [12135.732654] process_one_work+0x41f/0x500 [12135.732695] worker_thread+0x192/0x360 [12135.732715] kthread+0x111/0x140 [12135.732733] ret_from_fork+0x29/0x50 [12135.732779] Fix it by using a raw_spinlock_t for locking instead. Also move the pr_err() out of the lock critical section and after put_cpu_ptr() to avoid indeterminate latency and the possibility of sleep with this call. [longman@redhat.com: don't hold percpu ref across pr_err(), per Miaohe] Link: https://lkml.kernel.org/r/20240807181130.1122660-1-longman@redhat.com Link: https://lkml.kernel.org/r/20240806164107.1044956-1-longman@redhat.com Fixes: 0f383b6dc96e ("locking/spinlock: Provide RT variant") Signed-off-by: Waiman Long Acked-by: Miaohe Lin Cc: "Huang, Ying" Cc: Juri Lelli Cc: Len Brown Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 581d3e5c9117..7066fc84f351 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2417,7 +2417,7 @@ struct memory_failure_entry { struct memory_failure_cpu { DECLARE_KFIFO(fifo, struct memory_failure_entry, MEMORY_FAILURE_FIFO_SIZE); - spinlock_t lock; + raw_spinlock_t lock; struct work_struct work; }; @@ -2443,20 +2443,22 @@ void memory_failure_queue(unsigned long pfn, int flags) { struct memory_failure_cpu *mf_cpu; unsigned long proc_flags; + bool buffer_overflow; struct memory_failure_entry entry = { .pfn = pfn, .flags = flags, }; mf_cpu = &get_cpu_var(memory_failure_cpu); - spin_lock_irqsave(&mf_cpu->lock, proc_flags); - if (kfifo_put(&mf_cpu->fifo, entry)) + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); + buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry); + if (!buffer_overflow) schedule_work_on(smp_processor_id(), &mf_cpu->work); - else + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + put_cpu_var(memory_failure_cpu); + if (buffer_overflow) pr_err("buffer overflow when queuing memory failure at %#lx\n", pfn); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); - put_cpu_var(memory_failure_cpu); } EXPORT_SYMBOL_GPL(memory_failure_queue); @@ -2469,9 +2471,9 @@ static void memory_failure_work_func(struct work_struct *work) mf_cpu = container_of(work, struct memory_failure_cpu, work); for (;;) { - spin_lock_irqsave(&mf_cpu->lock, proc_flags); + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); gotten = kfifo_get(&mf_cpu->fifo, &entry); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); if (!gotten) break; if (entry.flags & MF_SOFT_OFFLINE) @@ -2501,7 +2503,7 @@ static int __init memory_failure_init(void) for_each_possible_cpu(cpu) { mf_cpu = &per_cpu(memory_failure_cpu, cpu); - spin_lock_init(&mf_cpu->lock); + raw_spin_lock_init(&mf_cpu->lock); INIT_KFIFO(mf_cpu->fifo); INIT_WORK(&mf_cpu->work, memory_failure_work_func); } -- cgit From 61ebe5a747da649057c37be1c37eb934b4af79ca Mon Sep 17 00:00:00 2001 From: Hailong Liu Date: Thu, 8 Aug 2024 20:19:56 +0800 Subject: mm/vmalloc: fix page mapping if vm_area_alloc_pages() with high order fallback to order 0 The __vmap_pages_range_noflush() assumes its argument pages** contains pages with the same page shift. However, since commit e9c3cda4d86e ("mm, vmalloc: fix high order __GFP_NOFAIL allocations"), if gfp_flags includes __GFP_NOFAIL with high order in vm_area_alloc_pages() and page allocation failed for high order, the pages** may contain two different page shifts (high order and order-0). This could lead __vmap_pages_range_noflush() to perform incorrect mappings, potentially resulting in memory corruption. Users might encounter this as follows (vmap_allow_huge = true, 2M is for PMD_SIZE): kvmalloc(2M, __GFP_NOFAIL|GFP_X) __vmalloc_node_range_noprof(vm_flags=VM_ALLOW_HUGE_VMAP) vm_area_alloc_pages(order=9) ---> order-9 allocation failed and fallback to order-0 vmap_pages_range() vmap_pages_range_noflush() __vmap_pages_range_noflush(page_shift = 21) ----> wrong mapping happens We can remove the fallback code because if a high-order allocation fails, __vmalloc_node_range_noprof() will retry with order-0. Therefore, it is unnecessary to fallback to order-0 here. Therefore, fix this by removing the fallback code. Link: https://lkml.kernel.org/r/20240808122019.3361-1-hailong.liu@oppo.com Fixes: e9c3cda4d86e ("mm, vmalloc: fix high order __GFP_NOFAIL allocations") Signed-off-by: Hailong Liu Reported-by: Tangquan Zheng Reviewed-by: Baoquan He Reviewed-by: Uladzislau Rezki (Sony) Acked-by: Barry Song Acked-by: Michal Hocko Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton --- mm/vmalloc.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6b783baf12a1..af2de36549d6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3584,15 +3584,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, page = alloc_pages_noprof(alloc_gfp, order); else page = alloc_pages_node_noprof(nid, alloc_gfp, order); - if (unlikely(!page)) { - if (!nofail) - break; - - /* fall back to the zero order allocations */ - alloc_gfp |= __GFP_NOFAIL; - order = 0; - continue; - } + if (unlikely(!page)) + break; /* * Higher order allocations must be able to be treated as -- cgit From 40b760cfd44566bca791c80e0720d70d75382b84 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 9 Aug 2024 10:59:04 -0400 Subject: mm/numa: no task_numa_fault() call if PTE is changed When handling a numa page fault, task_numa_fault() should be called by a process that restores the page table of the faulted folio to avoid duplicated stats counting. Commit b99a342d4f11 ("NUMA balancing: reduce TLB flush via delaying mapping on hint page fault") restructured do_numa_page() and did not avoid task_numa_fault() call in the second page table check after a numa migration failure. Fix it by making all !pte_same() return immediately. This issue can cause task_numa_fault() being called more than necessary and lead to unexpected numa balancing results (It is hard to tell whether the issue will cause positive or negative performance impact due to duplicated numa fault counting). Link: https://lkml.kernel.org/r/20240809145906.1513458-2-ziy@nvidia.com Fixes: b99a342d4f11 ("NUMA balancing: reduce TLB flush via delaying mapping on hint page fault") Signed-off-by: Zi Yan Reported-by: "Huang, Ying" Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@yhuang6-desk2.ccr.corp.intel.com/ Acked-by: David Hildenbrand Cc: Baolin Wang Cc: Kefeng Wang Cc: Mel Gorman Cc: Yang Shi Cc: Signed-off-by: Andrew Morton --- mm/memory.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 34f8402d2046..3c01d68065be 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5295,7 +5295,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + return 0; } pte = pte_modify(old_pte, vma->vm_page_prot); @@ -5358,23 +5358,19 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (!migrate_misplaced_folio(folio, vma, target_nid)) { nid = target_nid; flags |= TNF_MIGRATED; - } else { - flags |= TNF_MIGRATE_FAIL; - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); - if (unlikely(!vmf->pte)) - goto out; - if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { - pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; - } - goto out_map; + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } -out: - if (nid != NUMA_NO_NODE) - task_numa_fault(last_cpupid, nid, nr_pages, flags); - return 0; + flags |= TNF_MIGRATE_FAIL; + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + if (unlikely(!vmf->pte)) + return 0; + if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { + pte_unmap_unlock(vmf->pte, vmf->ptl); + return 0; + } out_map: /* * Make it present again, depending on how arch implements @@ -5387,7 +5383,10 @@ out_map: numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, writable); pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) -- cgit From fd8c35a92910f4829b7c99841f39b1b952c259d5 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 9 Aug 2024 10:59:05 -0400 Subject: mm/numa: no task_numa_fault() call if PMD is changed When handling a numa page fault, task_numa_fault() should be called by a process that restores the page table of the faulted folio to avoid duplicated stats counting. Commit c5b5a3dd2c1f ("mm: thp: refactor NUMA fault handling") restructured do_huge_pmd_numa_page() and did not avoid task_numa_fault() call in the second page table check after a numa migration failure. Fix it by making all !pmd_same() return immediately. This issue can cause task_numa_fault() being called more than necessary and lead to unexpected numa balancing results (It is hard to tell whether the issue will cause positive or negative performance impact due to duplicated numa fault counting). Link: https://lkml.kernel.org/r/20240809145906.1513458-3-ziy@nvidia.com Fixes: c5b5a3dd2c1f ("mm: thp: refactor NUMA fault handling") Reported-by: "Huang, Ying" Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@yhuang6-desk2.ccr.corp.intel.com/ Signed-off-by: Zi Yan Acked-by: David Hildenbrand Cc: Baolin Wang Cc: "Huang, Ying" Cc: Kefeng Wang Cc: Mel Gorman Cc: Yang Shi Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f4be468e06a4..67c86a5d64a6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1685,7 +1685,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { spin_unlock(vmf->ptl); - goto out; + return 0; } pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1728,22 +1728,16 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) if (!migrate_misplaced_folio(folio, vma, target_nid)) { flags |= TNF_MIGRATED; nid = target_nid; - } else { - flags |= TNF_MIGRATE_FAIL; - vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { - spin_unlock(vmf->ptl); - goto out; - } - goto out_map; - } - -out: - if (nid != NUMA_NO_NODE) task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; + } - return 0; - + flags |= TNF_MIGRATE_FAIL; + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { + spin_unlock(vmf->ptl); + return 0; + } out_map: /* Restore the PMD */ pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1753,7 +1747,10 @@ out_map: set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); spin_unlock(vmf->ptl); - goto out; + + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; } /* -- cgit From af3b7d09a9934220a8136065a0e6985fe0b67a1b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Aug 2024 15:32:30 +0300 Subject: selftests/mm: compaction_test: fix off by one in check_compaction() The "initial_nr_hugepages" variable is unsigned long so it takes up to 20 characters to print, plus 1 more character for the NUL terminator. Unfortunately, this buffer is not quite large enough for the terminator to fit. Also use snprintf() for a belt and suspenders approach. Link: https://lkml.kernel.org/r/87470c06-b45a-4e83-92ff-aac2e7b9c6ba@stanley.mountain Fixes: fb9293b6b015 ("selftests/mm: compaction_test: fix bogus test success and reduce probability of OOM-killer invocation") Signed-off-by: Dan Carpenter Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/compaction_test.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index e140558e6f53..2c3a0eb6b22d 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -89,9 +89,10 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, int fd, ret = -1; int compaction_index = 0; char nr_hugepages[20] = {0}; - char init_nr_hugepages[20] = {0}; + char init_nr_hugepages[24] = {0}; - sprintf(init_nr_hugepages, "%lu", initial_nr_hugepages); + snprintf(init_nr_hugepages, sizeof(init_nr_hugepages), + "%lu", initial_nr_hugepages); /* We want to test with 80% of available memory. Else, OOM killer comes in to play */ -- cgit From 807174a93d24c456503692dc3f5af322ee0b640a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 9 Aug 2024 14:48:47 +0300 Subject: mm: fix endless reclaim on machines with unaccepted memory Unaccepted memory is considered unusable free memory, which is not counted as free on the zone watermark check. This causes get_page_from_freelist() to accept more memory to hit the high watermark, but it creates problems in the reclaim path. The reclaim path encounters a failed zone watermark check and attempts to reclaim memory. This is usually successful, but if there is little or no reclaimable memory, it can result in endless reclaim with little to no progress. This can occur early in the boot process, just after start of the init process when the only reclaimable memory is the page cache of the init executable and its libraries. Make unaccepted memory free from watermark check point of view. This way unaccepted memory will never be the trigger of memory reclaim. Accept more memory in the get_page_from_freelist() if needed. Link: https://lkml.kernel.org/r/20240809114854.3745464-2-kirill.shutemov@linux.intel.com Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory") Signed-off-by: Kirill A. Shutemov Reported-by: Jianxiong Gao Acked-by: David Hildenbrand Tested-by: Jianxiong Gao Cc: Borislav Petkov Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Mel Gorman Cc: Mike Rapoport (Microsoft) Cc: Tom Lendacky Cc: Vlastimil Babka Cc: [6.5+] Signed-off-by: Andrew Morton --- mm/page_alloc.c | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 875d76e8684a..8747087acee3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes); static bool page_contains_unaccepted(struct page *page, unsigned int order); static void accept_page(struct page *page, unsigned int order); -static bool try_to_accept_memory(struct zone *zone, unsigned int order); +static bool cond_accept_memory(struct zone *zone, unsigned int order); static inline bool has_unaccepted_memory(void); static bool __free_unaccepted(struct page *page); @@ -3072,9 +3072,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z, if (!(alloc_flags & ALLOC_CMA)) unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES); #endif -#ifdef CONFIG_UNACCEPTED_MEMORY - unusable_free += zone_page_state(z, NR_UNACCEPTED); -#endif return unusable_free; } @@ -3368,6 +3365,8 @@ retry: } } + cond_accept_memory(zone, order); + /* * Detect whether the number of free pages is below high * watermark. If so, we will decrease pcp->high and free @@ -3393,10 +3392,8 @@ check_alloc_wmark: gfp_mask)) { int ret; - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* @@ -3450,10 +3447,8 @@ try_this_zone: return page; } else { - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* Try again if zone has deferred pages */ @@ -6950,9 +6945,6 @@ static bool try_to_accept_memory_one(struct zone *zone) struct page *page; bool last; - if (list_empty(&zone->unaccepted_pages)) - return false; - spin_lock_irqsave(&zone->lock, flags); page = list_first_entry_or_null(&zone->unaccepted_pages, struct page, lru); @@ -6978,23 +6970,29 @@ static bool try_to_accept_memory_one(struct zone *zone) return true; } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { long to_accept; - int ret = false; + bool ret = false; + + if (!has_unaccepted_memory()) + return false; + + if (list_empty(&zone->unaccepted_pages)) + return false; /* How much to accept to get to high watermark? */ to_accept = high_wmark_pages(zone) - (zone_page_state(zone, NR_FREE_PAGES) - - __zone_watermark_unusable_free(zone, order, 0)); + __zone_watermark_unusable_free(zone, order, 0) - + zone_page_state(zone, NR_UNACCEPTED)); - /* Accept at least one page */ - do { + while (to_accept > 0) { if (!try_to_accept_memory_one(zone)) break; ret = true; to_accept -= MAX_ORDER_NR_PAGES; - } while (to_accept > 0); + } return ret; } @@ -7037,7 +7035,7 @@ static void accept_page(struct page *page, unsigned int order) { } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { return false; } -- cgit From 7c5e8d212d7d81991a580e7de3904ea213d9a852 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 9 Aug 2024 12:56:42 +0500 Subject: selftests: memfd_secret: don't build memfd_secret test on unsupported arches [1] mentions that memfd_secret is only supported on arm64, riscv, x86 and x86_64 for now. It doesn't support other architectures. I found the build error on arm and decided to send the fix as it was creating noise on KernelCI: memfd_secret.c: In function 'memfd_secret': memfd_secret.c:42:24: error: '__NR_memfd_secret' undeclared (first use in this function); did you mean 'memfd_secret'? 42 | return syscall(__NR_memfd_secret, flags); | ^~~~~~~~~~~~~~~~~ | memfd_secret Hence I'm adding condition that memfd_secret should only be compiled on supported architectures. Also check in run_vmtests script if memfd_secret binary is present before executing it. Link: https://lkml.kernel.org/r/20240812061522.1933054-1-usama.anjum@collabora.com Link: https://lore.kernel.org/all/20210518072034.31572-7-rppt@kernel.org/ [1] Link: https://lkml.kernel.org/r/20240809075642.403247-1-usama.anjum@collabora.com Fixes: 76fe17ef588a ("secretmem: test: add basic selftest for memfd_secret(2)") Signed-off-by: Muhammad Usama Anjum Reviewed-by: Shuah Khan Acked-by: Mike Rapoport (Microsoft) Cc: Albert Ou Cc: James Bottomley Cc: Mike Rapoport (Microsoft) Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/Makefile | 2 ++ tools/testing/selftests/mm/run_vmtests.sh | 3 +++ 2 files changed, 5 insertions(+) diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 7b8a5def54a1..cfad627e8d94 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -53,7 +53,9 @@ TEST_GEN_FILES += madv_populate TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate +ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64)) TEST_GEN_FILES += memfd_secret +endif TEST_GEN_FILES += migration TEST_GEN_FILES += mkdirty TEST_GEN_FILES += mlock-random-test diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 03ac4f2e1cce..36045edb10de 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -374,8 +374,11 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +if [ -x ./memfd_secret ] +then (echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret +fi # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 CATEGORY="ksm" run_test ./ksm_tests -H -s 100 -- cgit From edb907a6133323e19311901a39dee68b1c6a2ef8 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 12 Aug 2024 14:20:17 +0800 Subject: crash: fix riscv64 crash memory reserve dead loop On RISCV64 Qemu machine with 512MB memory, cmdline "crashkernel=500M,high" will cause system stall as below: Zone ranges: DMA32 [mem 0x0000000080000000-0x000000009fffffff] Normal empty Movable zone start for each node Early memory node ranges node 0: [mem 0x0000000080000000-0x000000008005ffff] node 0: [mem 0x0000000080060000-0x000000009fffffff] Initmem setup node 0 [mem 0x0000000080000000-0x000000009fffffff] (stall here) commit 5d99cadf1568 ("crash: fix x86_32 crash memory reserve dead loop bug") fix this on 32-bit architecture. However, the problem is not completely solved. If `CRASH_ADDR_LOW_MAX = CRASH_ADDR_HIGH_MAX` on 64-bit architecture, for example, when system memory is equal to CRASH_ADDR_LOW_MAX on RISCV64, the following infinite loop will also occur: -> reserve_crashkernel_generic() and high is true -> alloc at [CRASH_ADDR_LOW_MAX, CRASH_ADDR_HIGH_MAX] fail -> alloc at [0, CRASH_ADDR_LOW_MAX] fail and repeatedly (because CRASH_ADDR_LOW_MAX = CRASH_ADDR_HIGH_MAX). As Catalin suggested, do not remove the ",high" reservation fallback to ",low" logic which will change arm64's kdump behavior, but fix it by skipping the above situation similar to commit d2f32f23190b ("crash: fix x86_32 crash memory reserve dead loop"). After this patch, it print: cannot allocate crashkernel (size:0x1f400000) Link: https://lkml.kernel.org/r/20240812062017.2674441-1-ruanjinjie@huawei.com Signed-off-by: Jinjie Ruan Suggested-by: Catalin Marinas Reviewed-by: Catalin Marinas Acked-by: Baoquan He Cc: Albert Ou Cc: Dave Young Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Vivek Goyal Signed-off-by: Andrew Morton --- kernel/crash_reserve.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index d3b4cd12bdd1..64d44a52c011 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -423,7 +423,8 @@ retry: if (high && search_end == CRASH_ADDR_HIGH_MAX) { search_end = CRASH_ADDR_LOW_MAX; search_base = 0; - goto retry; + if (search_end != CRASH_ADDR_HIGH_MAX) + goto retry; } pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); -- cgit From a8fc28dad6d574582cdf2f7e78c73c59c623df30 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 13 Aug 2024 08:07:56 -0700 Subject: alloc_tag: introduce clear_page_tag_ref() helper function In several cases we are freeing pages which were not allocated using common page allocators. For such cases, in order to keep allocation accounting correct, we should clear the page tag to indicate that the page being freed is expected to not have a valid allocation tag. Introduce clear_page_tag_ref() helper function to be used for this. Link: https://lkml.kernel.org/r/20240813150758.855881-1-surenb@google.com Fixes: d224eb0287fb ("codetag: debug: mark codetags for reserved pages as empty") Signed-off-by: Suren Baghdasaryan Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Reviewed-by: Pasha Tatashin Cc: Kees Cook Cc: Kent Overstreet Cc: Sourav Panda Cc: Vlastimil Babka Cc: [6.10] Signed-off-by: Andrew Morton --- include/linux/pgalloc_tag.h | 13 +++++++++++++ mm/mm_init.c | 10 +--------- mm/page_alloc.c | 9 +-------- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 18cd0c0c73d9..207f0c83c8e9 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -43,6 +43,18 @@ static inline void put_page_tag_ref(union codetag_ref *ref) page_ext_put(page_ext_from_codetag_ref(ref)); } +static inline void clear_page_tag_ref(struct page *page) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + set_codetag_empty(ref); + put_page_tag_ref(ref); + } + } +} + static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) { @@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; } static inline void put_page_tag_ref(union codetag_ref *ref) {} +static inline void clear_page_tag_ref(struct page *page) {} static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) {} static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} diff --git a/mm/mm_init.c b/mm/mm_init.c index f9a60ffc5532..adc3127573cd 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2459,15 +2459,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, } /* pages were reserved and not allocated */ - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } - + clear_page_tag_ref(page); __free_pages_core(page, order, MEMINIT_EARLY); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8747087acee3..c565de8f48e9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5815,14 +5815,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char void free_reserved_page(struct page *page) { - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } + clear_page_tag_ref(page); ClearPageReserved(page); init_page_count(page); __free_page(page); -- cgit From 766c163c2068b45330664fb67df67268e588a22d Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 13 Aug 2024 08:07:57 -0700 Subject: alloc_tag: mark pages reserved during CMA activation as not tagged During CMA activation, pages in CMA area are prepared and then freed without being allocated. This triggers warnings when memory allocation debug config (CONFIG_MEM_ALLOC_PROFILING_DEBUG) is enabled. Fix this by marking these pages not tagged before freeing them. Link: https://lkml.kernel.org/r/20240813150758.855881-2-surenb@google.com Fixes: d224eb0287fb ("codetag: debug: mark codetags for reserved pages as empty") Signed-off-by: Suren Baghdasaryan Acked-by: David Hildenbrand Cc: Kees Cook Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Sourav Panda Cc: Vlastimil Babka Cc: [6.10] Signed-off-by: Andrew Morton --- mm/mm_init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/mm_init.c b/mm/mm_init.c index adc3127573cd..51960079875b 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2244,6 +2244,8 @@ void __init init_cma_reserved_pageblock(struct page *page) set_pageblock_migratetype(page, MIGRATE_CMA); set_page_refcounted(page); + /* pages were reserved and not allocated */ + clear_page_tag_ref(page); __free_pages(page, pageblock_order); adjust_managed_page_count(page, pageblock_nr_pages); -- cgit From 2e6506e1c4eed2676a8412231046f31e10e240da Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 29 Jul 2024 10:13:06 +0800 Subject: mm/migrate: fix deadlock in migrate_pages_batch() on large folios Currently, migrate_pages_batch() can lock multiple locked folios with an arbitrary order. Although folio_trylock() is used to avoid deadlock as commit 2ef7dbb26990 ("migrate_pages: try migrate in batch asynchronously firstly") mentioned, it seems try_split_folio() is still missing. It was found by compaction stress test when I explicitly enable EROFS compressed files to use large folios, which case I cannot reproduce with the same workload if large folio support is off (current mainline). Typically, filesystem reads (with locked file-backed folios) could use another bdev/meta inode to load some other I/Os (e.g. inode extent metadata or caching compressed data), so the locking order will be: file-backed folios (A) bdev/meta folios (B) The following calltrace shows the deadlock: Thread 1 takes (B) lock and tries to take folio (A) lock Thread 2 takes (A) lock and tries to take folio (B) lock [Thread 1] INFO: task stress:1824 blocked for more than 30 seconds. Tainted: G OE 6.10.0-rc7+ #6 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:stress state:D stack:0 pid:1824 tgid:1824 ppid:1822 flags:0x0000000c Call trace: __switch_to+0xec/0x138 __schedule+0x43c/0xcb0 schedule+0x54/0x198 io_schedule+0x44/0x70 folio_wait_bit_common+0x184/0x3f8 <-- folio mapping ffff00036d69cb18 index 996 (**) __folio_lock+0x24/0x38 migrate_pages_batch+0x77c/0xea0 // try_split_folio (mm/migrate.c:1486:2) // migrate_pages_batch (mm/migrate.c:1734:16) <--- LIST_HEAD(unmap_folios) has .. folio mapping 0xffff0000d184f1d8 index 1711; (*) folio mapping 0xffff0000d184f1d8 index 1712; .. migrate_pages+0xb28/0xe90 compact_zone+0xa08/0x10f0 compact_node+0x9c/0x180 sysctl_compaction_handler+0x8c/0x118 proc_sys_call_handler+0x1a8/0x280 proc_sys_write+0x1c/0x30 vfs_write+0x240/0x380 ksys_write+0x78/0x118 __arm64_sys_write+0x24/0x38 invoke_syscall+0x78/0x108 el0_svc_common.constprop.0+0x48/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x3c/0x148 el0t_64_sync_handler+0x100/0x130 el0t_64_sync+0x190/0x198 [Thread 2] INFO: task stress:1825 blocked for more than 30 seconds. Tainted: G OE 6.10.0-rc7+ #6 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:stress state:D stack:0 pid:1825 tgid:1825 ppid:1822 flags:0x0000000c Call trace: __switch_to+0xec/0x138 __schedule+0x43c/0xcb0 schedule+0x54/0x198 io_schedule+0x44/0x70 folio_wait_bit_common+0x184/0x3f8 <-- folio = 0xfffffdffc6b503c0 (mapping == 0xffff0000d184f1d8 index == 1711) (*) __folio_lock+0x24/0x38 z_erofs_runqueue+0x384/0x9c0 [erofs] z_erofs_readahead+0x21c/0x350 [erofs] <-- folio mapping 0xffff00036d69cb18 range from [992, 1024] (**) read_pages+0x74/0x328 page_cache_ra_order+0x26c/0x348 ondemand_readahead+0x1c0/0x3a0 page_cache_sync_ra+0x9c/0xc0 filemap_get_pages+0xc4/0x708 filemap_read+0x104/0x3a8 generic_file_read_iter+0x4c/0x150 vfs_read+0x27c/0x330 ksys_pread64+0x84/0xd0 __arm64_sys_pread64+0x28/0x40 invoke_syscall+0x78/0x108 el0_svc_common.constprop.0+0x48/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x3c/0x148 el0t_64_sync_handler+0x100/0x130 el0t_64_sync+0x190/0x198 Link: https://lkml.kernel.org/r/20240729021306.398286-1-hsiangkao@linux.alibaba.com Fixes: 5dfab109d519 ("migrate_pages: batch _unmap and _move") Signed-off-by: Gao Xiang Reviewed-by: "Huang, Ying" Acked-by: David Hildenbrand Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- mm/migrate.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index e7296c0fb5d5..923ea80ba744 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1479,11 +1479,17 @@ out: return rc; } -static inline int try_split_folio(struct folio *folio, struct list_head *split_folios) +static inline int try_split_folio(struct folio *folio, struct list_head *split_folios, + enum migrate_mode mode) { int rc; - folio_lock(folio); + if (mode == MIGRATE_ASYNC) { + if (!folio_trylock(folio)) + return -EAGAIN; + } else { + folio_lock(folio); + } rc = split_folio_to_list(folio, split_folios); folio_unlock(folio); if (!rc) @@ -1677,7 +1683,7 @@ static int migrate_pages_batch(struct list_head *from, */ if (nr_pages > 2 && !list_empty(&folio->_deferred_list)) { - if (try_split_folio(folio, split_folios) == 0) { + if (!try_split_folio(folio, split_folios, mode)) { nr_failed++; stats->nr_thp_failed += is_thp; stats->nr_thp_split += is_thp; @@ -1699,7 +1705,7 @@ static int migrate_pages_batch(struct list_head *from, if (!thp_migration_supported() && is_thp) { nr_failed++; stats->nr_thp_failed++; - if (!try_split_folio(folio, split_folios)) { + if (!try_split_folio(folio, split_folios, mode)) { stats->nr_thp_split++; stats->nr_split++; continue; @@ -1731,7 +1737,7 @@ static int migrate_pages_batch(struct list_head *from, stats->nr_thp_failed += is_thp; /* Large folio NUMA faulting doesn't split to retry. */ if (is_large && !nosplit) { - int ret = try_split_folio(folio, split_folios); + int ret = try_split_folio(folio, split_folios, mode); if (!ret) { stats->nr_thp_split += is_thp; -- cgit From ad899c301c880766cc709aad277991b3ab671b66 Mon Sep 17 00:00:00 2001 From: Eli Billauer Date: Fri, 16 Aug 2024 10:01:59 +0300 Subject: char: xillybus: Refine workqueue handling As the wakeup work item now runs on a separate workqueue, it needs to be flushed separately along with flushing the device's workqueue. Also, move the destroy_workqueue() call to the end of the exit method, so that deinitialization is done in the opposite order of initialization. Fixes: ccbde4b128ef ("char: xillybus: Don't destroy workqueue from work item running on it") Cc: stable Signed-off-by: Eli Billauer Link: https://lore.kernel.org/r/20240816070200.50695-1-eli.billauer@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/xillybus/xillyusb.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/char/xillybus/xillyusb.c b/drivers/char/xillybus/xillyusb.c index 33ca0f4af390..e12d359194f8 100644 --- a/drivers/char/xillybus/xillyusb.c +++ b/drivers/char/xillybus/xillyusb.c @@ -2093,9 +2093,11 @@ static int xillyusb_discovery(struct usb_interface *interface) * just after responding with the IDT, there is no reason for any * work item to be running now. To be sure that xdev->channels * is updated on anything that might run in parallel, flush the - * workqueue, which rarely does anything. + * device's workqueue and the wakeup work item. This rarely + * does anything. */ flush_workqueue(xdev->workq); + flush_work(&xdev->wakeup_workitem); xdev->num_channels = num_channels; @@ -2274,9 +2276,9 @@ static int __init xillyusb_init(void) static void __exit xillyusb_exit(void) { - destroy_workqueue(wakeup_wq); - usb_deregister(&xillyusb_driver); + + destroy_workqueue(wakeup_wq); } module_init(xillyusb_init); -- cgit From 2374bf7558de915edc6ec8cb10ec3291dfab9594 Mon Sep 17 00:00:00 2001 From: Eli Billauer Date: Fri, 16 Aug 2024 10:02:00 +0300 Subject: char: xillybus: Check USB endpoints when probing device Ensure, as the driver probes the device, that all endpoints that the driver may attempt to access exist and are of the correct type. All XillyUSB devices must have a Bulk IN and Bulk OUT endpoint at address 1. This is verified in xillyusb_setup_base_eps(). On top of that, a XillyUSB device may have additional Bulk OUT endpoints. The information about these endpoints' addresses is deduced from a data structure (the IDT) that the driver fetches from the device while probing it. These endpoints are checked in setup_channels(). A XillyUSB device never has more than one IN endpoint, as all data towards the host is multiplexed in this single Bulk IN endpoint. This is why setup_channels() only checks OUT endpoints. Reported-by: syzbot+eac39cba052f2e750dbe@syzkaller.appspotmail.com Cc: stable Closes: https://lore.kernel.org/all/0000000000001d44a6061f7a54ee@google.com/T/ Fixes: a53d1202aef1 ("char: xillybus: Add driver for XillyUSB (Xillybus variant for USB)"). Signed-off-by: Eli Billauer Link: https://lore.kernel.org/r/20240816070200.50695-2-eli.billauer@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/xillybus/xillyusb.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/char/xillybus/xillyusb.c b/drivers/char/xillybus/xillyusb.c index e12d359194f8..45771b1a3716 100644 --- a/drivers/char/xillybus/xillyusb.c +++ b/drivers/char/xillybus/xillyusb.c @@ -1903,6 +1903,13 @@ static const struct file_operations xillyusb_fops = { static int xillyusb_setup_base_eps(struct xillyusb_dev *xdev) { + struct usb_device *udev = xdev->udev; + + /* Verify that device has the two fundamental bulk in/out endpoints */ + if (usb_pipe_type_check(udev, usb_sndbulkpipe(udev, MSG_EP_NUM)) || + usb_pipe_type_check(udev, usb_rcvbulkpipe(udev, IN_EP_NUM))) + return -ENODEV; + xdev->msg_ep = endpoint_alloc(xdev, MSG_EP_NUM | USB_DIR_OUT, bulk_out_work, 1, 2); if (!xdev->msg_ep) @@ -1932,14 +1939,15 @@ static int setup_channels(struct xillyusb_dev *xdev, __le16 *chandesc, int num_channels) { - struct xillyusb_channel *chan; + struct usb_device *udev = xdev->udev; + struct xillyusb_channel *chan, *new_channels; int i; chan = kcalloc(num_channels, sizeof(*chan), GFP_KERNEL); if (!chan) return -ENOMEM; - xdev->channels = chan; + new_channels = chan; for (i = 0; i < num_channels; i++, chan++) { unsigned int in_desc = le16_to_cpu(*chandesc++); @@ -1968,6 +1976,15 @@ static int setup_channels(struct xillyusb_dev *xdev, */ if ((out_desc & 0x80) && i < 14) { /* Entry is valid */ + if (usb_pipe_type_check(udev, + usb_sndbulkpipe(udev, i + 2))) { + dev_err(xdev->dev, + "Missing BULK OUT endpoint %d\n", + i + 2); + kfree(new_channels); + return -ENODEV; + } + chan->writable = 1; chan->out_synchronous = !!(out_desc & 0x40); chan->out_seekable = !!(out_desc & 0x20); @@ -1977,6 +1994,7 @@ static int setup_channels(struct xillyusb_dev *xdev, } } + xdev->channels = new_channels; return 0; } -- cgit From 8d019b15ddd55d6dc5685b1f51902c4aa8e01939 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:06:54 +0300 Subject: selftests: net: local_termination: refactor macvlan creation/deletion This will be used in other subtests as well; make new macvlan_create() and macvlan_destroy() functions. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../selftests/net/forwarding/local_termination.sh | 30 +++++++++++++--------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 4b364cdf3ef0..36f3d577d0be 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -247,19 +247,29 @@ bridge_destroy() ip link del br0 } -standalone() +macvlan_create() { - h1_create - h2_create + local lower=$1 - ip link add link $h2 name macvlan0 type macvlan mode private + ip link add link $lower name macvlan0 type macvlan mode private ip link set macvlan0 address $MACVLAN_ADDR ip link set macvlan0 up +} - run_test $h2 - +macvlan_destroy() +{ ip link del macvlan0 +} + +standalone() +{ + h1_create + h2_create + macvlan_create $h2 + + run_test $h2 + macvlan_destroy h2_destroy h1_destroy } @@ -268,15 +278,11 @@ bridge() { h1_create bridge_create - - ip link add link br0 name macvlan0 type macvlan mode private - ip link set macvlan0 address $MACVLAN_ADDR - ip link set macvlan0 up + macvlan_create br0 run_test br0 - ip link del macvlan0 - + macvlan_destroy bridge_destroy h1_destroy } -- cgit From 4261fa35185c0112acca0496d3732c8fcfe1dcf2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:06:55 +0300 Subject: selftests: net: local_termination: parameterize sending interface In future changes we will want to subject the DUT, $h2, to additional VLAN-tagged traffic. For that, we need to run the tests using $h1.100 as a sending interface, rather than the currently hardcoded $h1. Add a parameter to run_test() and modify its 2 callers to explicitly pass $h1, as was implicit before. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../selftests/net/forwarding/local_termination.sh | 39 +++++++++++----------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 36f3d577d0be..92f0e242d119 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -104,44 +104,45 @@ mc_route_destroy() run_test() { - local rcv_if_name=$1 - local smac=$(mac_get $h1) + local send_if_name=$1; shift + local rcv_if_name=$1; shift + local smac=$(mac_get $send_if_name) local rcv_dmac=$(mac_get $rcv_if_name) tcpdump_start $rcv_if_name - mc_route_prepare $h1 + mc_route_prepare $send_if_name mc_route_prepare $rcv_if_name - send_uc_ipv4 $h1 $rcv_dmac - send_uc_ipv4 $h1 $MACVLAN_ADDR - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1 + send_uc_ipv4 $send_if_name $rcv_dmac + send_uc_ipv4 $send_if_name $MACVLAN_ADDR + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR1 ip link set dev $rcv_if_name promisc on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR2 ip link set dev $rcv_if_name promisc off mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR - mc_send $h1 $JOINED_IPV4_MC_ADDR + mc_send $send_if_name $JOINED_IPV4_MC_ADDR mc_leave mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR - mc_send $h1 $JOINED_IPV6_MC_ADDR + mc_send $send_if_name $JOINED_IPV6_MC_ADDR mc_leave - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR1 ip link set dev $rcv_if_name allmulticast on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR3 ip link set dev $rcv_if_name allmulticast off mc_route_destroy $rcv_if_name - mc_route_destroy $h1 + mc_route_destroy $send_if_name sleep 1 @@ -267,7 +268,7 @@ standalone() h2_create macvlan_create $h2 - run_test $h2 + run_test $h1 $h2 macvlan_destroy h2_destroy @@ -280,7 +281,7 @@ bridge() bridge_create macvlan_create br0 - run_test br0 + run_test $h1 br0 macvlan_destroy bridge_destroy -- cgit From df7cf5cc551c7c0a92520e91e1184993784c6386 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:06:56 +0300 Subject: selftests: net: local_termination: parameterize test name There are upcoming tests which verify the RX filtering of a bridge (or bridge port), but under differing vlan_filtering conditions. Since we currently print $h2 (the DUT) in the log_test() output, it becomes necessary to make a further distinction between tests, to not give the user the impression that the exact same thing is run twice. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../selftests/net/forwarding/local_termination.sh | 38 ++++++++++++---------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 92f0e242d119..af284edaf401 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -68,10 +68,11 @@ send_uc_ipv4() check_rcv() { - local if_name=$1 - local type=$2 - local pattern=$3 - local should_receive=$4 + local if_name=$1; shift + local type=$1; shift + local pattern=$1; shift + local should_receive=$1; shift + local test_name="$1"; shift local should_fail= [ $should_receive = true ] && should_fail=0 || should_fail=1 @@ -81,7 +82,7 @@ check_rcv() check_err_fail "$should_fail" "$?" "reception" - log_test "$if_name: $type" + log_test "$test_name: $type" } mc_route_prepare() @@ -106,6 +107,7 @@ run_test() { local send_if_name=$1; shift local rcv_if_name=$1; shift + local test_name="$1"; shift local smac=$(mac_get $send_if_name) local rcv_dmac=$(mac_get $rcv_if_name) @@ -150,61 +152,61 @@ run_test() check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \ "$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \ "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" xfail_on_veth $h1 \ check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ - false + false "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \ "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" xfail_on_veth $h1 \ check_rcv $rcv_if_name \ "Unicast IPv4 to unknown MAC address, allmulti" \ "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to joined group" \ "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" xfail_on_veth $h1 \ check_rcv $rcv_if_name \ "Multicast IPv4 to unknown group" \ "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to joined group" \ "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" xfail_on_veth $h1 \ check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" tcpdump_cleanup $rcv_if_name } -- cgit From 5b8e74182ed3d4f1c38c626e6120275ca9d92bee Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:06:57 +0300 Subject: selftests: net: local_termination: add one more test for VLAN-aware bridges The current bridge() test is for packet reception on a VLAN-unaware bridge. Some things are different enough with VLAN-aware bridges that it's worth renaming this test into vlan_unaware_bridge(), and add a new vlan_aware_bridge() test. The two will share the same implementation: bridge() becomes a common function, which receives $vlan_filtering as an argument. Rename it to test_bridge() at the same time, because just bridge() pollutes the global namespace and we cannot invoke the binary with the same name from the iproute2 package currently. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../selftests/net/forwarding/local_termination.sh | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index af284edaf401..5aa364b40e33 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="standalone bridge" +ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge" NUM_NETIFS=2 PING_COUNT=1 REQUIRE_MTOOLS=yes @@ -233,7 +233,9 @@ h2_destroy() bridge_create() { - ip link add br0 type bridge + local vlan_filtering=$1 + + ip link add br0 type bridge vlan_filtering $vlan_filtering ip link set br0 address $BRIDGE_ADDR ip link set br0 up @@ -277,10 +279,12 @@ standalone() h1_destroy } -bridge() +test_bridge() { + local vlan_filtering=$1 + h1_create - bridge_create + bridge_create $vlan_filtering macvlan_create br0 run_test $h1 br0 @@ -290,6 +294,16 @@ bridge() h1_destroy } +vlan_unaware_bridge() +{ + test_bridge 0 +} + +vlan_aware_bridge() +{ + test_bridge 1 +} + cleanup() { pre_cleanup -- cgit From 5fea8bb009744bbb90b3f6ca41c558429ee4c849 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:06:58 +0300 Subject: selftests: net: local_termination: introduce new tests which capture VLAN behavior Add more coverage to the local termination selftest as follows: - 8021q upper of $h2 - 8021q upper of $h2, where $h2 is a port of a VLAN-unaware bridge - 8021q upper of $h2, where $h2 is a port of a VLAN-aware bridge - 8021q upper of VLAN-unaware br0, which is the upper of $h2 - 8021q upper of VLAN-aware br0, which is the upper of $h2 Especially the cases with traffic sent through the VLAN upper of a VLAN-aware bridge port will be immediately relevant when we will start transmitting PTP packets as an additional kind of traffic. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../selftests/net/forwarding/local_termination.sh | 117 +++++++++++++++++++-- 1 file changed, 110 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 5aa364b40e33..e22c6a693bef 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -1,7 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge" +ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \ + vlan_over_vlan_unaware_bridged_port vlan_over_vlan_aware_bridged_port \ + vlan_over_vlan_unaware_bridge vlan_over_vlan_aware_bridge" NUM_NETIFS=2 PING_COUNT=1 REQUIRE_MTOOLS=yes @@ -231,6 +233,30 @@ h2_destroy() simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 } +h1_vlan_create() +{ + simple_if_init $h1 + vlan_create $h1 100 v$h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_vlan_destroy() +{ + vlan_destroy $h1 100 + simple_if_fini $h1 +} + +h2_vlan_create() +{ + simple_if_init $h2 + vlan_create $h2 100 v$h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_vlan_destroy() +{ + vlan_destroy $h2 100 + simple_if_fini $h2 +} + bridge_create() { local vlan_filtering=$1 @@ -241,14 +267,10 @@ bridge_create() ip link set $h2 master br0 ip link set $h2 up - - simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 } bridge_destroy() { - simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 - ip link del br0 } @@ -272,7 +294,7 @@ standalone() h2_create macvlan_create $h2 - run_test $h1 $h2 + run_test $h1 $h2 "$h2" macvlan_destroy h2_destroy @@ -285,11 +307,13 @@ test_bridge() h1_create bridge_create $vlan_filtering + simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 macvlan_create br0 - run_test $h1 br0 + run_test $h1 br0 "vlan_filtering=$vlan_filtering bridge" macvlan_destroy + simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 bridge_destroy h1_destroy } @@ -304,6 +328,85 @@ vlan_aware_bridge() test_bridge 1 } +test_vlan() +{ + h1_vlan_create + h2_vlan_create + macvlan_create $h2.100 + + run_test $h1.100 $h2.100 "VLAN upper" + + macvlan_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_bridged_port() +{ + local vlan_filtering=$1 + + h1_vlan_create + h2_vlan_create + bridge_create $vlan_filtering + macvlan_create $h2.100 + + run_test $h1.100 $h2.100 "VLAN over vlan_filtering=$vlan_filtering bridged port" + + macvlan_destroy + bridge_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridged_port() +{ + vlan_over_bridged_port 0 +} + +vlan_over_vlan_aware_bridged_port() +{ + vlan_over_bridged_port 1 +} + +vlan_over_bridge() +{ + local vlan_filtering=$1 + + h1_vlan_create + bridge_create $vlan_filtering + simple_if_init br0 + vlan_create br0 100 vbr0 $H2_IPV4/24 $H2_IPV6/64 + macvlan_create br0.100 + + if [ $vlan_filtering = 1 ]; then + bridge vlan add dev $h2 vid 100 master + bridge vlan add dev br0 vid 100 self + fi + + run_test $h1.100 br0.100 "VLAN over vlan_filtering=$vlan_filtering bridge" + + if [ $vlan_filtering = 1 ]; then + bridge vlan del dev br0 vid 100 self + bridge vlan del dev $h2 vid 100 master + fi + + macvlan_destroy + vlan_destroy br0 100 + simple_if_fini br0 + bridge_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridge() +{ + vlan_over_bridge 0 +} + +vlan_over_vlan_aware_bridge() +{ + vlan_over_bridge 1 +} + cleanup() { pre_cleanup -- cgit From 9aa3749ca4a880c1a59720aab3eacf344ed8d68d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:06:59 +0300 Subject: selftests: net: local_termination: don't use xfail_on_veth() xfail_on_veth() for this test is an incorrect approximation which gives false positives and false negatives. When local_termination fails with "reception succeeded, but should have failed", it is because the DUT ($h2) accepts packets even when not configured as promiscuous. This is not something specific to veth; even the bridge behaves that way, but this is not captured by the xfail_on_veth test. The IFF_UNICAST_FLT flag is not explicitly exported to user space, but it can somewhat be determined from the interface's behavior. We have to create a macvlan upper with a different MAC address. This forces a dev_uc_add() call in the kernel. When the unicast filtering list is not empty, but the device doesn't support IFF_UNICAST_FLT, __dev_set_rx_mode() force-enables promiscuity on the interface, to ensure correct behavior (that the requested address is received). We can monitor the change in the promiscuity flag and infer from it whether the device supports unicast filtering. There is no equivalent thing for allmulti, unfortunately. We never know what's hiding behind a device which has allmulti=off. Whether it will actually perform RX multicast filtering of unknown traffic is a strong "maybe". The bridge driver, for example, completely ignores the flag. We'll have to keep the xfail behavior, but instead of XFAIL on just veth, always XFAIL. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/lib.sh | 57 +++++++++++++++++++++ .../selftests/net/forwarding/local_termination.sh | 58 ++++++++++++++++------ 2 files changed, 99 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index ff96bb7535ff..718d04a4f72d 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -500,6 +500,11 @@ check_err_fail() fi } +xfail() +{ + FAIL_TO_XFAIL=yes "$@" +} + xfail_on_slow() { if [[ $KSFT_MACHINE_SLOW = yes ]]; then @@ -1113,6 +1118,39 @@ mac_get() ip -j link show dev $if_name | jq -r '.[]["address"]' } +ether_addr_to_u64() +{ + local addr="$1" + local order="$((1 << 40))" + local val=0 + local byte + + addr="${addr//:/ }" + + for byte in $addr; do + byte="0x$byte" + val=$((val + order * byte)) + order=$((order >> 8)) + done + + printf "0x%x" $val +} + +u64_to_ether_addr() +{ + local val=$1 + local byte + local i + + for ((i = 40; i >= 0; i -= 8)); do + byte=$(((val & (0xff << i)) >> i)) + printf "%02x" $byte + if [ $i -ne 0 ]; then + printf ":" + fi + done +} + ipv6_lladdr_get() { local if_name=$1 @@ -2229,3 +2267,22 @@ absval() echo $((v > 0 ? v : -v)) } + +has_unicast_flt() +{ + local dev=$1; shift + local mac_addr=$(mac_get $dev) + local tmp=$(ether_addr_to_u64 $mac_addr) + local promisc + + ip link set $dev up + ip link add link $dev name macvlan-tmp type macvlan mode private + ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1))) + ip link set macvlan-tmp up + + promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity') + + ip link del macvlan-tmp + + [[ $promisc == 1 ]] && echo "no" || echo "yes" +} diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index e22c6a693bef..80ea4c10d764 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -109,9 +109,11 @@ run_test() { local send_if_name=$1; shift local rcv_if_name=$1; shift + local no_unicast_flt=$1; shift local test_name="$1"; shift local smac=$(mac_get $send_if_name) local rcv_dmac=$(mac_get $rcv_if_name) + local should_receive tcpdump_start $rcv_if_name @@ -160,26 +162,26 @@ run_test() "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \ true "$test_name" - xfail_on_veth $h1 \ - check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ - "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ - false "$test_name" + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ + "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ + $should_receive "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \ "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \ true "$test_name" - xfail_on_veth $h1 \ - check_rcv $rcv_if_name \ - "Unicast IPv4 to unknown MAC address, allmulti" \ - "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ - false "$test_name" + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name \ + "Unicast IPv4 to unknown MAC address, allmulti" \ + "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ + $should_receive "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to joined group" \ "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \ true "$test_name" - xfail_on_veth $h1 \ + xfail \ check_rcv $rcv_if_name \ "Multicast IPv4 to unknown group" \ "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ @@ -197,7 +199,7 @@ run_test() "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \ true "$test_name" - xfail_on_veth $h1 \ + xfail \ check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ false "$test_name" @@ -290,11 +292,17 @@ macvlan_destroy() standalone() { + local no_unicast_flt=true + + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + h1_create h2_create macvlan_create $h2 - run_test $h1 $h2 "$h2" + run_test $h1 $h2 $no_unicast_flt "$h2" macvlan_destroy h2_destroy @@ -303,6 +311,7 @@ standalone() test_bridge() { + local no_unicast_flt=true local vlan_filtering=$1 h1_create @@ -310,7 +319,7 @@ test_bridge() simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 macvlan_create br0 - run_test $h1 br0 "vlan_filtering=$vlan_filtering bridge" + run_test $h1 br0 $no_unicast_flt "vlan_filtering=$vlan_filtering bridge" macvlan_destroy simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 @@ -330,11 +339,17 @@ vlan_aware_bridge() test_vlan() { + local no_unicast_flt=true + + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + h1_vlan_create h2_vlan_create macvlan_create $h2.100 - run_test $h1.100 $h2.100 "VLAN upper" + run_test $h1.100 $h2.100 $no_unicast_flt "VLAN upper" macvlan_destroy h2_vlan_destroy @@ -343,14 +358,23 @@ test_vlan() vlan_over_bridged_port() { + local no_unicast_flt=true local vlan_filtering=$1 + # br_manage_promisc() will not force a single vlan_filtering port to + # promiscuous mode, so we should still expect unicast filtering to take + # place if the device can do it. + if [ $(has_unicast_flt $h2) = yes ] && [ $vlan_filtering = 1 ]; then + no_unicast_flt=false + fi + h1_vlan_create h2_vlan_create bridge_create $vlan_filtering macvlan_create $h2.100 - run_test $h1.100 $h2.100 "VLAN over vlan_filtering=$vlan_filtering bridged port" + run_test $h1.100 $h2.100 $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridged port" macvlan_destroy bridge_destroy @@ -370,6 +394,7 @@ vlan_over_vlan_aware_bridged_port() vlan_over_bridge() { + local no_unicast_flt=true local vlan_filtering=$1 h1_vlan_create @@ -383,7 +408,8 @@ vlan_over_bridge() bridge vlan add dev br0 vid 100 self fi - run_test $h1.100 br0.100 "VLAN over vlan_filtering=$vlan_filtering bridge" + run_test $h1.100 br0.100 $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridge" if [ $vlan_filtering = 1 ]; then bridge vlan del dev br0 vid 100 self -- cgit From 237979504264912a9797dabc0db35126e705fe0d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:00 +0300 Subject: selftests: net: local_termination: add PTP frames to the mix A breakage in the felix DSA driver shows we do not have enough test coverage. More generally, it is sufficiently special that it is likely drivers will treat it differently. This is not meant to be a full PTP test, it just makes sure that PTP packets sent to the different addresses corresponding to their profiles are received correctly. The local_termination selftest seemed like the most appropriate place for this addition. PTP RX/TX in some cases makes no sense (over a bridge) and this is why $skip_ptp exists. And in others - PTP over a bridge port - the IP stack needs convincing through the available bridge netfilter hooks to leave the PTP packets alone and not stolen by the bridge rx_handler. It is safe to assume that users have that figured out already. This is a driver level test, and by using tcpdump, all that extra setup is out of scope here. send_non_ip() was an unfinished idea; written but never used. Replace it with a more generic send_raw(), and send 3 PTP packet types times 3 transports. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../selftests/net/forwarding/local_termination.sh | 161 +++++++++++++++++++-- 1 file changed, 148 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 80ea4c10d764..648868f74604 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -39,9 +39,68 @@ UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05" UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06" UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07" -NON_IP_MC="01:02:03:04:05:06" -NON_IP_PKT="00:04 48:45:4c:4f" -BC="ff:ff:ff:ff:ff:ff" +PTP_1588_L2_SYNC=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00" +PTP_1588_L2_FOLLOW_UP=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c5 f1 17 97 ed f0" +PTP_1588_L2_PDELAY_REQ=" \ +01:80:c2:00:00:0e 00:00:de:ad:be:ef 88:f7 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 06 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00" +PTP_1588_IPV4_SYNC=" \ +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9a 40 00 01 11 cb 88 c0 00 02 01 e0 00 \ +01 81 01 3f 01 3f 00 34 a3 c8 00 02 00 2c 00 00 \ +02 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" +PTP_1588_IPV4_FOLLOW_UP=" +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9b 40 00 01 11 cb 87 c0 00 02 01 e0 00 \ +01 81 01 40 01 40 00 34 a3 c8 08 02 00 2c 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 02 00 00 00 66 83 \ +c6 0f 1d 9a 61 87" +PTP_1588_IPV4_PDELAY_REQ=" \ +01:00:5e:00:00:6b 00:00:de:ad:be:ef 08:00 45 00 \ +00 52 35 a9 40 00 01 11 a1 85 c0 00 02 01 e0 00 \ +00 6b 01 3f 01 3f 00 3e a2 bc 02 02 00 36 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 01 05 7f 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_SYNC=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 06 \ +7c 2f 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 3f 01 3f 00 36 2e 92 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_FOLLOW_UP=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 0a \ +00 bc 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 40 01 40 00 36 2e 92 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c6 2a 32 09 bd 74 00 00" +PTP_1588_IPV6_PDELAY_REQ=" \ +33:33:00:00:00:6b 00:00:de:ad:be:ef 86:dd 60 0c \ +5c fd 00 40 11 01 fe 80 00 00 00 00 00 00 3c 37 \ +63 ff fe cf 17 0e ff 02 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 6b 01 3f 01 3f 00 40 b4 54 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" # Disable promisc to ensure we don't receive unknown MAC DA packets export TCPDUMP_EXTRA_FLAGS="-pl" @@ -49,13 +108,15 @@ export TCPDUMP_EXTRA_FLAGS="-pl" h1=${NETIFS[p1]} h2=${NETIFS[p2]} -send_non_ip() +send_raw() { - local if_name=$1 - local smac=$2 - local dmac=$3 + local if_name=$1; shift + local pkt="$1"; shift + local smac=$(mac_get $if_name) - $MZ -q $if_name "$dmac $smac $NON_IP_PKT" + pkt="${pkt/00:00:de:ad:be:ef/$smac}" + + $MZ -q $if_name "$pkt" } send_uc_ipv4() @@ -109,6 +170,7 @@ run_test() { local send_if_name=$1; shift local rcv_if_name=$1; shift + local skip_ptp=$1; shift local no_unicast_flt=$1; shift local test_name="$1"; shift local smac=$(mac_get $send_if_name) @@ -150,6 +212,35 @@ run_test() mc_route_destroy $rcv_if_name mc_route_destroy $send_if_name + if [ $skip_ptp = false ]; then + ip maddress add 01:1b:19:00:00:00 dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_SYNC" + send_raw $send_if_name "$PTP_1588_L2_FOLLOW_UP" + ip maddress del 01:1b:19:00:00:00 dev $rcv_if_name + + ip maddress add 01:80:c2:00:00:0e dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_PDELAY_REQ" + ip maddress del 01:80:c2:00:00:0e dev $rcv_if_name + + mc_join $rcv_if_name 224.0.1.129 + send_raw $send_if_name "$PTP_1588_IPV4_SYNC" + send_raw $send_if_name "$PTP_1588_IPV4_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name 224.0.0.107 + send_raw $send_if_name "$PTP_1588_IPV4_PDELAY_REQ" + mc_leave + + mc_join $rcv_if_name ff0e::181 + send_raw $send_if_name "$PTP_1588_IPV6_SYNC" + send_raw $send_if_name "$PTP_1588_IPV6_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name ff02::6b + send_raw $send_if_name "$PTP_1588_IPV6_PDELAY_REQ" + mc_leave + fi + sleep 1 tcpdump_stop $rcv_if_name @@ -212,6 +303,44 @@ run_test() "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \ true "$test_name" + if [ $skip_ptp = false ]; then + check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + fi + tcpdump_cleanup $rcv_if_name } @@ -293,6 +422,7 @@ macvlan_destroy() standalone() { local no_unicast_flt=true + local skip_ptp=false if [ $(has_unicast_flt $h2) = yes ]; then no_unicast_flt=false @@ -302,7 +432,7 @@ standalone() h2_create macvlan_create $h2 - run_test $h1 $h2 $no_unicast_flt "$h2" + run_test $h1 $h2 $skip_ptp $no_unicast_flt "$h2" macvlan_destroy h2_destroy @@ -313,13 +443,15 @@ test_bridge() { local no_unicast_flt=true local vlan_filtering=$1 + local skip_ptp=true h1_create bridge_create $vlan_filtering simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 macvlan_create br0 - run_test $h1 br0 $no_unicast_flt "vlan_filtering=$vlan_filtering bridge" + run_test $h1 br0 $skip_ptp $no_unicast_flt \ + "vlan_filtering=$vlan_filtering bridge" macvlan_destroy simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 @@ -340,6 +472,7 @@ vlan_aware_bridge() test_vlan() { local no_unicast_flt=true + local skip_ptp=false if [ $(has_unicast_flt $h2) = yes ]; then no_unicast_flt=false @@ -349,7 +482,7 @@ test_vlan() h2_vlan_create macvlan_create $h2.100 - run_test $h1.100 $h2.100 $no_unicast_flt "VLAN upper" + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt "VLAN upper" macvlan_destroy h2_vlan_destroy @@ -360,6 +493,7 @@ vlan_over_bridged_port() { local no_unicast_flt=true local vlan_filtering=$1 + local skip_ptp=false # br_manage_promisc() will not force a single vlan_filtering port to # promiscuous mode, so we should still expect unicast filtering to take @@ -373,7 +507,7 @@ vlan_over_bridged_port() bridge_create $vlan_filtering macvlan_create $h2.100 - run_test $h1.100 $h2.100 $no_unicast_flt \ + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt \ "VLAN over vlan_filtering=$vlan_filtering bridged port" macvlan_destroy @@ -396,6 +530,7 @@ vlan_over_bridge() { local no_unicast_flt=true local vlan_filtering=$1 + local skip_ptp=true h1_vlan_create bridge_create $vlan_filtering @@ -408,7 +543,7 @@ vlan_over_bridge() bridge vlan add dev br0 vid 100 self fi - run_test $h1.100 br0.100 $no_unicast_flt \ + run_test $h1.100 br0.100 $skip_ptp $no_unicast_flt \ "VLAN over vlan_filtering=$vlan_filtering bridge" if [ $vlan_filtering = 1 ]; then -- cgit From e29b82ef27616777e21c07dc263a8769cbdaa358 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:01 +0300 Subject: selftests: net: bridge_vlan_aware: test that other TPIDs are seen as untagged The bridge VLAN implementation w.r.t. VLAN protocol is described in merge commit 1a0b20b25732 ("Merge branch 'bridge-next'"). We are only sensitive to those VLAN tags whose TPID is equal to the bridge's vlan_protocol. Thus, an 802.1ad VLAN should be treated as 802.1Q-untagged. Add 3 tests which validate that: - 802.1ad-tagged traffic is learned into the PVID of an 802.1Q-aware bridge - Double-tagged traffic is forwarded when just the PVID of the port is present in the VLAN group of the ports - Double-tagged traffic is not forwarded when the PVID of the port is absent from the VLAN group of the ports The test passes with both veth and ocelot. Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/bridge_vlan_aware.sh | 54 +++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 64bd00fe9a4f..90f8a244ea90 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -142,6 +142,58 @@ extern_learn() bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null } +other_tpid() +{ + local mac=de:ad:be:ef:13:37 + + # Test that packets with TPID 802.1ad VID 3 + TPID 802.1Q VID 5 are + # classified as untagged by a bridge with vlan_protocol 802.1Q, and + # are processed in the PVID of the ingress port (here 1). Not VID 3, + # and not VID 5. + RET=0 + + tc qdisc add dev $h2 clsact + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + ip link set $h2 promisc on + ethtool -K $h2 rx-vlan-filter off rx-vlan-stag-filter off + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + # Match on 'self' addresses as well, for those drivers which + # do not push their learned addresses to the bridge software + # database + bridge -j fdb show $swp1 | \ + jq -e ".[] | select(.mac == \"$(mac_get $h1)\") | select(.vlan == 1)" &> /dev/null + check_err $? "FDB entry was not learned when it should" + + log_test "FDB entry in PVID for VLAN-tagged with other TPID" + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was not forwarded when it should" + log_test "Reception of VLAN with other TPID as untagged" + + bridge vlan del dev $swp1 vid 1 + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was forwarded when should not" + log_test "Reception of VLAN with other TPID as untagged (no PVID)" + + bridge vlan add dev $swp1 vid 1 pvid untagged + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + trap cleanup EXIT setup_prepare -- cgit From 67c3ca2c5cfe6a50772514e3349b5e7b3b0fac03 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:02 +0300 Subject: net: mscc: ocelot: use ocelot_xmit_get_vlan_info() also for FDMA and register injection Problem description ------------------- On an NXP LS1028A (felix DSA driver) with the following configuration: - ocelot-8021q tagging protocol - VLAN-aware bridge (with STP) spanning at least swp0 and swp1 - 8021q VLAN upper interfaces on swp0 and swp1: swp0.700, swp1.700 - ptp4l on swp0.700 and swp1.700 we see that the ptp4l instances do not see each other's traffic, and they all go to the grand master state due to the ANNOUNCE_RECEIPT_TIMEOUT_EXPIRES condition. Jumping to the conclusion for the impatient ------------------------------------------- There is a zero-day bug in the ocelot switchdev driver in the way it handles VLAN-tagged packet injection. The correct logic already exists in the source code, in function ocelot_xmit_get_vlan_info() added by commit 5ca721c54d86 ("net: dsa: tag_ocelot: set the classified VLAN during xmit"). But it is used only for normal NPI-based injection with the DSA "ocelot" tagging protocol. The other injection code paths (register-based and FDMA-based) roll their own wrong logic. This affects and was noticed on the DSA "ocelot-8021q" protocol because it uses register-based injection. By moving ocelot_xmit_get_vlan_info() to a place that's common for both the DSA tagger and the ocelot switch library, it can also be called from ocelot_port_inject_frame() in ocelot.c. We need to touch the lines with ocelot_ifh_port_set()'s prototype anyway, so let's rename it to something clearer regarding what it does, and add a kernel-doc. ocelot_ifh_set_basic() should do. Investigation notes ------------------- Debugging reveals that PTP event (aka those carrying timestamps, like Sync) frames injected into swp0.700 (but also swp1.700) hit the wire with two VLAN tags: 00000000: 01 1b 19 00 00 00 00 01 02 03 04 05 81 00 02 bc ~~~~~~~~~~~ 00000010: 81 00 02 bc 88 f7 00 12 00 2c 00 00 02 00 00 00 ~~~~~~~~~~~ 00000020: 00 00 00 00 00 00 00 00 00 00 00 01 02 ff fe 03 00000030: 04 05 00 01 00 04 00 00 00 00 00 00 00 00 00 00 00000040: 00 00 The second (unexpected) VLAN tag makes felix_check_xtr_pkt() -> ptp_classify_raw() fail to see these as PTP packets at the link partner's receiving end, and return PTP_CLASS_NONE (because the BPF classifier is not written to expect 2 VLAN tags). The reason why packets have 2 VLAN tags is because the transmission code treats VLAN incorrectly. Neither ocelot switchdev, nor felix DSA, declare the NETIF_F_HW_VLAN_CTAG_TX feature. Therefore, at xmit time, all VLANs should be in the skb head, and none should be in the hwaccel area. This is done by: static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t features) { if (skb_vlan_tag_present(skb) && !vlan_hw_offload_capable(features, skb->vlan_proto)) skb = __vlan_hwaccel_push_inside(skb); return skb; } But ocelot_port_inject_frame() handles things incorrectly: ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb)); void ocelot_ifh_port_set(struct sk_buff *skb, void *ifh, int port, u32 rew_op) { (...) if (vlan_tag) ocelot_ifh_set_vlan_tci(ifh, vlan_tag); (...) } The way __vlan_hwaccel_push_inside() pushes the tag inside the skb head is by calling: static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) { skb->vlan_present = 0; } which does _not_ zero out skb->vlan_tci as seen by skb_vlan_tag_get(). This means that ocelot, when it calls skb_vlan_tag_get(), sees (and uses) a residual skb->vlan_tci, while the same VLAN tag is _already_ in the skb head. The trivial fix for double VLAN headers is to replace the content of ocelot_ifh_port_set() with: if (skb_vlan_tag_present(skb)) ocelot_ifh_set_vlan_tci(ifh, skb_vlan_tag_get(skb)); but this would not be correct either, because, as mentioned, vlan_hw_offload_capable() is false for us, so we'd be inserting dead code and we'd always transmit packets with VID=0 in the injection frame header. I can't actually test the ocelot switchdev driver and rely exclusively on code inspection, but I don't think traffic from 8021q uppers has ever been injected properly, and not double-tagged. Thus I'm blaming the introduction of VLAN fields in the injection header - early driver code. As hinted at in the early conclusion, what we _want_ to happen for VLAN transmission was already described once in commit 5ca721c54d86 ("net: dsa: tag_ocelot: set the classified VLAN during xmit"). ocelot_xmit_get_vlan_info() intends to ensure that if the port through which we're transmitting is under a VLAN-aware bridge, the outer VLAN tag from the skb head is stripped from there and inserted into the injection frame header (so that the packet is processed in hardware through that actual VLAN). And in all other cases, the packet is sent with VID=0 in the injection frame header, since the port is VLAN-unaware and has logic to strip this VID on egress (making it invisible to the wire). Fixes: 08d02364b12f ("net: mscc: fix the injection header") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 29 +++++++++++++++----- drivers/net/ethernet/mscc/ocelot_fdma.c | 2 +- include/linux/dsa/ocelot.h | 47 +++++++++++++++++++++++++++++++++ include/soc/mscc/ocelot.h | 3 ++- net/dsa/tag_ocelot.c | 37 ++------------------------ 5 files changed, 75 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index ed2fb44500b0..69a4e5a90475 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1193,17 +1193,34 @@ bool ocelot_can_inject(struct ocelot *ocelot, int grp) } EXPORT_SYMBOL(ocelot_can_inject); -void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag) +/** + * ocelot_ifh_set_basic - Set basic information in Injection Frame Header + * @ifh: Pointer to Injection Frame Header memory + * @ocelot: Switch private data structure + * @port: Egress port number + * @rew_op: Egress rewriter operation for PTP + * @skb: Pointer to socket buffer (packet) + * + * Populate the Injection Frame Header with basic information for this skb: the + * analyzer bypass bit, destination port, VLAN info, egress rewriter info. + */ +void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port, + u32 rew_op, struct sk_buff *skb) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; + u64 vlan_tci, tag_type; + + ocelot_xmit_get_vlan_info(skb, ocelot_port->bridge, &vlan_tci, + &tag_type); + ocelot_ifh_set_bypass(ifh, 1); ocelot_ifh_set_dest(ifh, BIT_ULL(port)); - ocelot_ifh_set_tag_type(ifh, IFH_TAG_TYPE_C); - if (vlan_tag) - ocelot_ifh_set_vlan_tci(ifh, vlan_tag); + ocelot_ifh_set_tag_type(ifh, tag_type); + ocelot_ifh_set_vlan_tci(ifh, vlan_tci); if (rew_op) ocelot_ifh_set_rew_op(ifh, rew_op); } -EXPORT_SYMBOL(ocelot_ifh_port_set); +EXPORT_SYMBOL(ocelot_ifh_set_basic); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb) @@ -1214,7 +1231,7 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) | QS_INJ_CTRL_SOF, QS_INJ_CTRL, grp); - ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb)); + ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb); for (i = 0; i < OCELOT_TAG_LEN / 4; i++) ocelot_write_rix(ocelot, ifh[i], QS_INJ_WR, grp); diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c index 312a46832154..87b59cc5e441 100644 --- a/drivers/net/ethernet/mscc/ocelot_fdma.c +++ b/drivers/net/ethernet/mscc/ocelot_fdma.c @@ -666,7 +666,7 @@ static int ocelot_fdma_prepare_skb(struct ocelot *ocelot, int port, u32 rew_op, ifh = skb_push(skb, OCELOT_TAG_LEN); skb_put(skb, ETH_FCS_LEN); memset(ifh, 0, OCELOT_TAG_LEN); - ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb)); + ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb); return 0; } diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index dca2969015d8..6fbfbde68a37 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -5,6 +5,8 @@ #ifndef _NET_DSA_TAG_OCELOT_H #define _NET_DSA_TAG_OCELOT_H +#include +#include #include #include #include @@ -273,4 +275,49 @@ static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb) return rew_op; } +/** + * ocelot_xmit_get_vlan_info: Determine VLAN_TCI and TAG_TYPE for injected frame + * @skb: Pointer to socket buffer + * @br: Pointer to bridge device that the port is under, if any + * @vlan_tci: + * @tag_type: + * + * If the port is under a VLAN-aware bridge, remove the VLAN header from the + * payload and move it into the DSA tag, which will make the switch classify + * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero, + * which is the pvid of standalone ports (OCELOT_STANDALONE_PVID), although not + * of VLAN-unaware bridge ports (that would be ocelot_vlan_unaware_pvid()). + * Anyway, VID 0 is fine because it is stripped on egress for these port modes, + * and source address learning is not performed for packets injected from the + * CPU anyway, so it doesn't matter that the VID is "wrong". + */ +static inline void ocelot_xmit_get_vlan_info(struct sk_buff *skb, + struct net_device *br, + u64 *vlan_tci, u64 *tag_type) +{ + struct vlan_ethhdr *hdr; + u16 proto, tci; + + if (!br || !br_vlan_enabled(br)) { + *vlan_tci = 0; + *tag_type = IFH_TAG_TYPE_C; + return; + } + + hdr = (struct vlan_ethhdr *)skb_mac_header(skb); + br_vlan_get_proto(br, &proto); + + if (ntohs(hdr->h_vlan_proto) == proto) { + vlan_remove_tag(skb, &tci); + *vlan_tci = tci; + } else { + rcu_read_lock(); + br_vlan_get_pvid_rcu(br, &tci); + rcu_read_unlock(); + *vlan_tci = tci; + } + + *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C; +} + #endif diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 6a37b29f4b4c..ed18e6bafc8d 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -969,7 +969,8 @@ void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target, bool ocelot_can_inject(struct ocelot *ocelot, int grp); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb); -void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag); +void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port, + u32 rew_op, struct sk_buff *skb); int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp); void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb, diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index e0e4300bfbd3..bf6608fc6be7 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -8,40 +8,6 @@ #define OCELOT_NAME "ocelot" #define SEVILLE_NAME "seville" -/* If the port is under a VLAN-aware bridge, remove the VLAN header from the - * payload and move it into the DSA tag, which will make the switch classify - * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero, - * which is the pvid of standalone and VLAN-unaware bridge ports. - */ -static void ocelot_xmit_get_vlan_info(struct sk_buff *skb, struct dsa_port *dp, - u64 *vlan_tci, u64 *tag_type) -{ - struct net_device *br = dsa_port_bridge_dev_get(dp); - struct vlan_ethhdr *hdr; - u16 proto, tci; - - if (!br || !br_vlan_enabled(br)) { - *vlan_tci = 0; - *tag_type = IFH_TAG_TYPE_C; - return; - } - - hdr = skb_vlan_eth_hdr(skb); - br_vlan_get_proto(br, &proto); - - if (ntohs(hdr->h_vlan_proto) == proto) { - vlan_remove_tag(skb, &tci); - *vlan_tci = tci; - } else { - rcu_read_lock(); - br_vlan_get_pvid_rcu(br, &tci); - rcu_read_unlock(); - *vlan_tci = tci; - } - - *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C; -} - static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, __be32 ifh_prefix, void **ifh) { @@ -53,7 +19,8 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, u32 rew_op = 0; u64 qos_class; - ocelot_xmit_get_vlan_info(skb, dp, &vlan_tci, &tag_type); + ocelot_xmit_get_vlan_info(skb, dsa_port_bridge_dev_get(dp), &vlan_tci, + &tag_type); qos_class = netdev_get_num_tc(netdev) ? netdev_get_prio_tc_map(netdev, skb->priority) : skb->priority; -- cgit From e1b9e80236c540fa85d76e2d510d1b38e1968c5d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:03 +0300 Subject: net: mscc: ocelot: fix QoS class for injected packets with "ocelot-8021q" There are 2 distinct code paths (listed below) in the source code which set up an injection header for Ocelot(-like) switches. Code path (2) lacks the QoS class and source port being set correctly. Especially the improper QoS classification is a problem for the "ocelot-8021q" alternative DSA tagging protocol, because we support tc-taprio and each packet needs to be scheduled precisely through its time slot. This includes PTP, which is normally assigned to a traffic class other than 0, but would be sent through TC 0 nonetheless. The code paths are: (1) ocelot_xmit_common() from net/dsa/tag_ocelot.c - called only by the standard "ocelot" DSA tagging protocol which uses NPI-based injection - sets up bit fields in the tag manually to account for a small difference (destination port offset) between Ocelot and Seville. Namely, ocelot_ifh_set_dest() is omitted out of ocelot_xmit_common(), because there's also seville_ifh_set_dest(). (2) ocelot_ifh_set_basic(), called by: - ocelot_fdma_prepare_skb() for FDMA transmission of the ocelot switchdev driver - ocelot_port_xmit() -> ocelot_port_inject_frame() for register-based transmission of the ocelot switchdev driver - felix_port_deferred_xmit() -> ocelot_port_inject_frame() for the DSA tagger ocelot-8021q when it must transmit PTP frames (also through register-based injection). sets the bit fields according to its own logic. The problem is that (2) doesn't call ocelot_ifh_set_qos_class(). Copying that logic from ocelot_xmit_common() fixes that. Unfortunately, although desirable, it is not easily possible to de-duplicate code paths (1) and (2), and make net/dsa/tag_ocelot.c directly call ocelot_ifh_set_basic()), because of the ocelot/seville difference. This is the "minimal" fix with some logic duplicated (but at least more consolidated). Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 10 +++++++++- drivers/net/ethernet/mscc/ocelot_fdma.c | 1 - 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 69a4e5a90475..9301716e21d5 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1208,13 +1208,21 @@ void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port, u32 rew_op, struct sk_buff *skb) { struct ocelot_port *ocelot_port = ocelot->ports[port]; + struct net_device *dev = skb->dev; u64 vlan_tci, tag_type; + int qos_class; ocelot_xmit_get_vlan_info(skb, ocelot_port->bridge, &vlan_tci, &tag_type); + qos_class = netdev_get_num_tc(dev) ? + netdev_get_prio_tc_map(dev, skb->priority) : skb->priority; + + memset(ifh, 0, OCELOT_TAG_LEN); ocelot_ifh_set_bypass(ifh, 1); + ocelot_ifh_set_src(ifh, BIT_ULL(ocelot->num_phys_ports)); ocelot_ifh_set_dest(ifh, BIT_ULL(port)); + ocelot_ifh_set_qos_class(ifh, qos_class); ocelot_ifh_set_tag_type(ifh, tag_type); ocelot_ifh_set_vlan_tci(ifh, vlan_tci); if (rew_op) @@ -1225,7 +1233,7 @@ EXPORT_SYMBOL(ocelot_ifh_set_basic); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb) { - u32 ifh[OCELOT_TAG_LEN / 4] = {0}; + u32 ifh[OCELOT_TAG_LEN / 4]; unsigned int i, count, last; ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) | diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c index 87b59cc5e441..00326ae8c708 100644 --- a/drivers/net/ethernet/mscc/ocelot_fdma.c +++ b/drivers/net/ethernet/mscc/ocelot_fdma.c @@ -665,7 +665,6 @@ static int ocelot_fdma_prepare_skb(struct ocelot *ocelot, int port, u32 rew_op, ifh = skb_push(skb, OCELOT_TAG_LEN); skb_put(skb, ETH_FCS_LEN); - memset(ifh, 0, OCELOT_TAG_LEN); ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb); return 0; -- cgit From c5e12ac3beb0dd3a718296b2d8af5528e9ab728e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:04 +0300 Subject: net: mscc: ocelot: serialize access to the injection/extraction groups As explained by Horatiu Vultur in commit 603ead96582d ("net: sparx5: Add spinlock for frame transmission from CPU") which is for a similar hardware design, multiple CPUs can simultaneously perform injection or extraction. There are only 2 register groups for injection and 2 for extraction, and the driver only uses one of each. So we'd better serialize access using spin locks, otherwise frame corruption is possible. Note that unlike in sparx5, FDMA in ocelot does not have this issue because struct ocelot_fdma_tx_ring already contains an xmit_lock. I guess this is mostly a problem for NXP LS1028A, as that is dual core. I don't think VSC7514 is. So I'm blaming the commit where LS1028A (aka the felix DSA driver) started using register-based packet injection and extraction. Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 11 +++++++ drivers/net/ethernet/mscc/ocelot.c | 52 ++++++++++++++++++++++++++++++ drivers/net/ethernet/mscc/ocelot_vsc7514.c | 4 +++ include/soc/mscc/ocelot.h | 9 ++++++ 4 files changed, 76 insertions(+) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index e554699f06d4..8d31ff18c5c7 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -528,7 +528,9 @@ static int felix_tag_8021q_setup(struct dsa_switch *ds) * so we need to be careful that there are no extra frames to be * dequeued over MMIO, since we would never know to discard them. */ + ocelot_lock_xtr_grp_bh(ocelot, 0); ocelot_drain_cpu_queue(ocelot, 0); + ocelot_unlock_xtr_grp_bh(ocelot, 0); return 0; } @@ -1518,6 +1520,8 @@ static void felix_port_deferred_xmit(struct kthread_work *work) int port = xmit_work->dp->index; int retries = 10; + ocelot_lock_inj_grp(ocelot, 0); + do { if (ocelot_can_inject(ocelot, 0)) break; @@ -1526,6 +1530,7 @@ static void felix_port_deferred_xmit(struct kthread_work *work) } while (--retries); if (!retries) { + ocelot_unlock_inj_grp(ocelot, 0); dev_err(ocelot->dev, "port %d failed to inject skb\n", port); ocelot_port_purge_txtstamp_skb(ocelot, port, skb); @@ -1535,6 +1540,8 @@ static void felix_port_deferred_xmit(struct kthread_work *work) ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb); + ocelot_unlock_inj_grp(ocelot, 0); + consume_skb(skb); kfree(xmit_work); } @@ -1694,6 +1701,8 @@ static bool felix_check_xtr_pkt(struct ocelot *ocelot) if (!felix->info->quirk_no_xtr_irq) return false; + ocelot_lock_xtr_grp(ocelot, grp); + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) { struct sk_buff *skb; unsigned int type; @@ -1730,6 +1739,8 @@ out: ocelot_drain_cpu_queue(ocelot, 0); } + ocelot_unlock_xtr_grp(ocelot, grp); + return true; } diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 9301716e21d5..f4e027a6fe95 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1099,6 +1099,48 @@ void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb, } EXPORT_SYMBOL(ocelot_ptp_rx_timestamp); +void ocelot_lock_inj_grp(struct ocelot *ocelot, int grp) + __acquires(&ocelot->inj_lock) +{ + spin_lock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_lock_inj_grp); + +void ocelot_unlock_inj_grp(struct ocelot *ocelot, int grp) + __releases(&ocelot->inj_lock) +{ + spin_unlock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_unlock_inj_grp); + +void ocelot_lock_xtr_grp(struct ocelot *ocelot, int grp) + __acquires(&ocelot->inj_lock) +{ + spin_lock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_lock_xtr_grp); + +void ocelot_unlock_xtr_grp(struct ocelot *ocelot, int grp) + __releases(&ocelot->inj_lock) +{ + spin_unlock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_unlock_xtr_grp); + +void ocelot_lock_xtr_grp_bh(struct ocelot *ocelot, int grp) + __acquires(&ocelot->xtr_lock) +{ + spin_lock_bh(&ocelot->xtr_lock); +} +EXPORT_SYMBOL_GPL(ocelot_lock_xtr_grp_bh); + +void ocelot_unlock_xtr_grp_bh(struct ocelot *ocelot, int grp) + __releases(&ocelot->xtr_lock) +{ + spin_unlock_bh(&ocelot->xtr_lock); +} +EXPORT_SYMBOL_GPL(ocelot_unlock_xtr_grp_bh); + int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb) { u64 timestamp, src_port, len; @@ -1109,6 +1151,8 @@ int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb) u32 val, *buf; int err; + lockdep_assert_held(&ocelot->xtr_lock); + err = ocelot_xtr_poll_xfh(ocelot, grp, xfh); if (err) return err; @@ -1184,6 +1228,8 @@ bool ocelot_can_inject(struct ocelot *ocelot, int grp) { u32 val = ocelot_read(ocelot, QS_INJ_STATUS); + lockdep_assert_held(&ocelot->inj_lock); + if (!(val & QS_INJ_STATUS_FIFO_RDY(BIT(grp)))) return false; if (val & QS_INJ_STATUS_WMARK_REACHED(BIT(grp))) @@ -1236,6 +1282,8 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 ifh[OCELOT_TAG_LEN / 4]; unsigned int i, count, last; + lockdep_assert_held(&ocelot->inj_lock); + ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) | QS_INJ_CTRL_SOF, QS_INJ_CTRL, grp); @@ -1272,6 +1320,8 @@ EXPORT_SYMBOL(ocelot_port_inject_frame); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp) { + lockdep_assert_held(&ocelot->xtr_lock); + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) ocelot_read_rix(ocelot, QS_XTR_RD, grp); } @@ -2954,6 +3004,8 @@ int ocelot_init(struct ocelot *ocelot) mutex_init(&ocelot->fwd_domain_lock); spin_lock_init(&ocelot->ptp_clock_lock); spin_lock_init(&ocelot->ts_id_lock); + spin_lock_init(&ocelot->inj_lock); + spin_lock_init(&ocelot->xtr_lock); ocelot->owq = alloc_ordered_workqueue("ocelot-owq", 0); if (!ocelot->owq) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 993212c3a7da..c09dd2e3343c 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -51,6 +51,8 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg) struct ocelot *ocelot = arg; int grp = 0, err; + ocelot_lock_xtr_grp(ocelot, grp); + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) { struct sk_buff *skb; @@ -69,6 +71,8 @@ out: if (err < 0) ocelot_drain_cpu_queue(ocelot, 0); + ocelot_unlock_xtr_grp(ocelot, grp); + return IRQ_HANDLED; } diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index ed18e6bafc8d..462c653e1017 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -813,6 +813,9 @@ struct ocelot { const u32 *const *map; struct list_head stats_regions; + spinlock_t inj_lock; + spinlock_t xtr_lock; + u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM]; int packet_buffer_size; int num_frame_refs; @@ -966,6 +969,12 @@ void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target, u32 val, u32 reg, u32 offset); /* Packet I/O */ +void ocelot_lock_inj_grp(struct ocelot *ocelot, int grp); +void ocelot_unlock_inj_grp(struct ocelot *ocelot, int grp); +void ocelot_lock_xtr_grp(struct ocelot *ocelot, int grp); +void ocelot_unlock_xtr_grp(struct ocelot *ocelot, int grp); +void ocelot_lock_xtr_grp_bh(struct ocelot *ocelot, int grp); +void ocelot_unlock_xtr_grp_bh(struct ocelot *ocelot, int grp); bool ocelot_can_inject(struct ocelot *ocelot, int grp); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb); -- cgit From 93e4649efa964201c73b0a03c35c04a0d6fc809f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:05 +0300 Subject: net: dsa: provide a software untagging function on RX for VLAN-aware bridges Through code analysis, I realized that the ds->untag_bridge_pvid logic is contradictory - see the newly added FIXME above the kernel-doc for dsa_software_untag_vlan_unaware_bridge(). Moreover, for the Felix driver, I need something very similar, but which is actually _not_ contradictory: untag the bridge PVID on RX, but for VLAN-aware bridges. The existing logic does it for VLAN-unaware bridges. Since I don't want to change the functionality of drivers which were supposedly properly tested with the ds->untag_bridge_pvid flag, I have introduced a new one: ds->untag_vlan_aware_bridge_pvid, and I have refactored the DSA reception code into a common path for both flags. TODO: both flags should be unified under a single ds->software_vlan_untag, which users of both current flags should set. This is not something that can be carried out right away. It needs very careful examination of all drivers which make use of this functionality, since some of them actually get this wrong in the first place. For example, commit 9130c2d30c17 ("net: dsa: microchip: ksz8795: Use software untagging on CPU port") uses this in a driver which has ds->configure_vlan_while_not_filtering = true. The latter mechanism has been known for many years to be broken by design: https://lore.kernel.org/netdev/CABumfLzJmXDN_W-8Z=p9KyKUVi_HhS7o_poBkeKHS2BkAiyYpw@mail.gmail.com/ and we have the situation of 2 bugs canceling each other. There is no private VLAN, and the port follows the PVID of the VLAN-unaware bridge. So, it's kinda ok for that driver to use the ds->untag_bridge_pvid mechanism, in a broken way. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 16 ++++--- net/dsa/tag.c | 5 +- net/dsa/tag.h | 135 ++++++++++++++++++++++++++++++++++++++++++------------ 3 files changed, 118 insertions(+), 38 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index b06f97ae3da1..d7a6c2930277 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -403,14 +403,18 @@ struct dsa_switch { */ u32 configure_vlan_while_not_filtering:1; - /* If the switch driver always programs the CPU port as egress tagged - * despite the VLAN configuration indicating otherwise, then setting - * @untag_bridge_pvid will force the DSA receive path to pop the - * bridge's default_pvid VLAN tagged frames to offer a consistent - * behavior between a vlan_filtering=0 and vlan_filtering=1 bridge - * device. + /* Pop the default_pvid of VLAN-unaware bridge ports from tagged frames. + * DEPRECATED: Do NOT set this field in new drivers. Instead look at + * the dsa_software_vlan_untag() comments. */ u32 untag_bridge_pvid:1; + /* Pop the default_pvid of VLAN-aware bridge ports from tagged frames. + * Useful if the switch cannot preserve the VLAN tag as seen on the + * wire for user port ingress, and chooses to send all frames as + * VLAN-tagged to the CPU, including those which were originally + * untagged. + */ + u32 untag_vlan_aware_bridge_pvid:1; /* Let DSA manage the FDB entries towards the * CPU, based on the software bridge database. diff --git a/net/dsa/tag.c b/net/dsa/tag.c index 6e402d49afd3..79ad105902d9 100644 --- a/net/dsa/tag.c +++ b/net/dsa/tag.c @@ -105,8 +105,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, p = netdev_priv(skb->dev); - if (unlikely(cpu_dp->ds->untag_bridge_pvid)) { - nskb = dsa_untag_bridge_pvid(skb); + if (unlikely(cpu_dp->ds->untag_bridge_pvid || + cpu_dp->ds->untag_vlan_aware_bridge_pvid)) { + nskb = dsa_software_vlan_untag(skb); if (!nskb) { kfree_skb(skb); return 0; diff --git a/net/dsa/tag.h b/net/dsa/tag.h index f6b9c73718df..d5707870906b 100644 --- a/net/dsa/tag.h +++ b/net/dsa/tag.h @@ -44,46 +44,81 @@ static inline struct net_device *dsa_conduit_find_user(struct net_device *dev, return NULL; } -/* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged - * frames as untagged, since the bridge will not untag them. +/** + * dsa_software_untag_vlan_aware_bridge: Software untagging for VLAN-aware bridge + * @skb: Pointer to received socket buffer (packet) + * @br: Pointer to bridge upper interface of ingress port + * @vid: Parsed VID from packet + * + * The bridge can process tagged packets. Software like STP/PTP may not. The + * bridge can also process untagged packets, to the same effect as if they were + * tagged with the PVID of the ingress port. So packets tagged with the PVID of + * the bridge port must be software-untagged, to support both use cases. */ -static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) +static inline void dsa_software_untag_vlan_aware_bridge(struct sk_buff *skb, + struct net_device *br, + u16 vid) { - struct dsa_port *dp = dsa_user_to_port(skb->dev); - struct net_device *br = dsa_port_bridge_dev_get(dp); - struct net_device *dev = skb->dev; - struct net_device *upper_dev; - u16 vid, pvid, proto; + u16 pvid, proto; int err; - if (!br || br_vlan_enabled(br)) - return skb; - err = br_vlan_get_proto(br, &proto); if (err) - return skb; + return; - /* Move VLAN tag from data to hwaccel */ - if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) { - skb = skb_vlan_untag(skb); - if (!skb) - return NULL; - } + err = br_vlan_get_pvid_rcu(skb->dev, &pvid); + if (err) + return; - if (!skb_vlan_tag_present(skb)) - return skb; + if (vid == pvid && skb->vlan_proto == htons(proto)) + __vlan_hwaccel_clear_tag(skb); +} - vid = skb_vlan_tag_get_id(skb); +/** + * dsa_software_untag_vlan_unaware_bridge: Software untagging for VLAN-unaware bridge + * @skb: Pointer to received socket buffer (packet) + * @br: Pointer to bridge upper interface of ingress port + * @vid: Parsed VID from packet + * + * The bridge ignores all VLAN tags. Software like STP/PTP may not (it may run + * on the plain port, or on a VLAN upper interface). Maybe packets are coming + * to software as tagged with a driver-defined VID which is NOT equal to the + * PVID of the bridge port (since the bridge is VLAN-unaware, its configuration + * should NOT be committed to hardware). DSA needs a method for this private + * VID to be communicated by software to it, and if packets are tagged with it, + * software-untag them. Note: the private VID may be different per bridge, to + * support the FDB isolation use case. + * + * FIXME: this is currently implemented based on the broken assumption that + * the "private VID" used by the driver in VLAN-unaware mode is equal to the + * bridge PVID. It should not be, except for a coincidence; the bridge PVID is + * irrelevant to the data path in the VLAN-unaware mode. Thus, the VID that + * this function removes is wrong. + * + * All users of ds->untag_bridge_pvid should fix their drivers, if necessary, + * to make the two independent. Only then, if there still remains a need to + * strip the private VID from packets, then a new ds->ops->get_private_vid() + * API shall be introduced to communicate to DSA what this VID is, which needs + * to be stripped here. + */ +static inline void dsa_software_untag_vlan_unaware_bridge(struct sk_buff *skb, + struct net_device *br, + u16 vid) +{ + struct net_device *upper_dev; + u16 pvid, proto; + int err; - /* We already run under an RCU read-side critical section since - * we are called from netif_receive_skb_list_internal(). - */ - err = br_vlan_get_pvid_rcu(dev, &pvid); + err = br_vlan_get_proto(br, &proto); if (err) - return skb; + return; - if (vid != pvid) - return skb; + err = br_vlan_get_pvid_rcu(skb->dev, &pvid); + if (err) + return; + + if (vid != pvid || skb->vlan_proto != htons(proto)) + return; /* The sad part about attempting to untag from DSA is that we * don't know, unless we check, if the skb will end up in @@ -95,10 +130,50 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) * definitely keep the tag, to make sure it keeps working. */ upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid); - if (upper_dev) + if (!upper_dev) + __vlan_hwaccel_clear_tag(skb); +} + +/** + * dsa_software_vlan_untag: Software VLAN untagging in DSA receive path + * @skb: Pointer to socket buffer (packet) + * + * Receive path method for switches which cannot avoid tagging all packets + * towards the CPU port. Called when ds->untag_bridge_pvid (legacy) or + * ds->untag_vlan_aware_bridge_pvid is set to true. + * + * As a side effect of this method, any VLAN tag from the skb head is moved + * to hwaccel. + */ +static inline struct sk_buff *dsa_software_vlan_untag(struct sk_buff *skb) +{ + struct dsa_port *dp = dsa_user_to_port(skb->dev); + struct net_device *br = dsa_port_bridge_dev_get(dp); + u16 vid; + + /* software untagging for standalone ports not yet necessary */ + if (!br) return skb; - __vlan_hwaccel_clear_tag(skb); + /* Move VLAN tag from data to hwaccel */ + if (!skb_vlan_tag_present(skb)) { + skb = skb_vlan_untag(skb); + if (!skb) + return NULL; + } + + if (!skb_vlan_tag_present(skb)) + return skb; + + vid = skb_vlan_tag_get_id(skb); + + if (br_vlan_enabled(br)) { + if (dp->ds->untag_vlan_aware_bridge_pvid) + dsa_software_untag_vlan_aware_bridge(skb, br, vid); + } else { + if (dp->ds->untag_bridge_pvid) + dsa_software_untag_vlan_unaware_bridge(skb, br, vid); + } return skb; } -- cgit From f1288fd7293b91442ad7420394c252a252ecaa30 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:06 +0300 Subject: net: dsa: felix: fix VLAN tag loss on CPU reception with ocelot-8021q There is a major design bug with ocelot-8021q, which is that it expects more of the hardware than the hardware can actually do. The short summary of the issue is that when a port is under a VLAN-aware bridge and we use this tagging protocol, VLAN upper interfaces of this port do not see RX traffic. We use VCAP ES0 (egress rewriter) rules towards the tag_8021q CPU port to encapsulate packets with an outer tag, later stripped by software, that depends on the source user port. We do this so that packets can be identified in ocelot_rcv(). To be precise, we create rules with push_outer_tag = OCELOT_ES0_TAG and push_inner_tag = 0. With this configuration, we expect the switch to keep the inner tag configuration as found in the packet (if it was untagged on user port ingress, keep it untagged, otherwise preserve the VLAN tag unmodified as the inner tag towards the tag_8021q CPU port). But this is not what happens. Instead, table "Tagging Combinations" from the user manual suggests that when the ES0 action is "PUSH_OUTER_TAG=1 and PUSH_INNER_TAG=0", there will be "no inner tag". Experimentation further clarifies what this means. It appears that this "inner tag" which is not pushed into the packet on its egress towards the CPU is none other than the classified VLAN. When the ingress user port is standalone or under a VLAN-unaware bridge, the classified VLAN is a discardable quantity: it is a fixed value - the result of ocelot_vlan_unaware_pvid()'s configuration, and actually independent of the VID from any 802.1Q header that may be in the frame. It is actually preferable to discard the "inner tag" in this case. The problem is when the ingress port is under a VLAN-aware bridge. Then, the classified VLAN is taken from the frame's 802.1Q header, with a fallback on the bridge port's PVID. It would be very good to not discard the "inner tag" here, because if we do, we break communication with any 8021q VLAN uppers that the port might have. These have a processing path outside the bridge. There seems to be nothing else we can do except to change the configuration for VCAP ES0 rules, to actually push the inner VLAN into the frame. There are 2 options for that, first is to push a fixed value specified in the rule, and second is to push a fixed value, plus (aka arithmetic +) the classified VLAN. We choose the second option, and we select that fixed value as 0. Thus, what is pushed in the inner tag is just the classified VLAN. From there, we need to perform software untagging, in the receive path, of stuff that was untagged on the wire. Fixes: 7c83a7c539ab ("net: dsa: add a second tagger for Ocelot switches based on tag_8021q") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 115 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 109 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 8d31ff18c5c7..4a705f7333f4 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -61,11 +61,46 @@ static int felix_cpu_port_for_conduit(struct dsa_switch *ds, return cpu_dp->index; } +/** + * felix_update_tag_8021q_rx_rule - Update VCAP ES0 tag_8021q rule after + * vlan_filtering change + * @outer_tagging_rule: Pointer to VCAP filter on which the update is performed + * @vlan_filtering: Current bridge VLAN filtering setting + * + * Source port identification for tag_8021q is done using VCAP ES0 rules on the + * CPU port(s). The ES0 tag B (inner tag from the packet) can be configured as + * either: + * - push_inner_tag=0: the inner tag is never pushed into the frame + * (and we lose info about the classified VLAN). This is + * good when the classified VLAN is a discardable quantity + * for the software RX path: it is either set to + * OCELOT_STANDALONE_PVID, or to + * ocelot_vlan_unaware_pvid(bridge). + * - push_inner_tag=1: the inner tag is always pushed. This is good when the + * classified VLAN is not a discardable quantity (the port + * is under a VLAN-aware bridge, and software needs to + * continue processing the packet in the same VLAN as the + * hardware). + * The point is that what is good for a VLAN-unaware port is not good for a + * VLAN-aware port, and vice versa. Thus, the RX tagging rules must be kept in + * sync with the VLAN filtering state of the port. + */ +static void +felix_update_tag_8021q_rx_rule(struct ocelot_vcap_filter *outer_tagging_rule, + bool vlan_filtering) +{ + if (vlan_filtering) + outer_tagging_rule->action.push_inner_tag = OCELOT_ES0_TAG; + else + outer_tagging_rule->action.push_inner_tag = OCELOT_NO_ES0_TAG; +} + /* Set up VCAP ES0 rules for pushing a tag_8021q VLAN towards the CPU such that * the tagger can perform RX source port identification. */ static int felix_tag_8021q_vlan_add_rx(struct dsa_switch *ds, int port, - int upstream, u16 vid) + int upstream, u16 vid, + bool vlan_filtering) { struct ocelot_vcap_filter *outer_tagging_rule; struct ocelot *ocelot = ds->priv; @@ -96,6 +131,14 @@ static int felix_tag_8021q_vlan_add_rx(struct dsa_switch *ds, int port, outer_tagging_rule->action.tag_a_tpid_sel = OCELOT_TAG_TPID_SEL_8021AD; outer_tagging_rule->action.tag_a_vid_sel = 1; outer_tagging_rule->action.vid_a_val = vid; + felix_update_tag_8021q_rx_rule(outer_tagging_rule, vlan_filtering); + outer_tagging_rule->action.tag_b_tpid_sel = OCELOT_TAG_TPID_SEL_8021Q; + /* Leave TAG_B_VID_SEL at 0 (Classified VID + VID_B_VAL). Since we also + * leave VID_B_VAL at 0, this makes ES0 tag B (the inner tag) equal to + * the classified VID, which we need to see in the DSA tagger's receive + * path. Note: the inner tag is only visible in the packet when pushed + * (push_inner_tag == OCELOT_ES0_TAG). + */ err = ocelot_vcap_filter_add(ocelot, outer_tagging_rule, NULL); if (err) @@ -227,6 +270,7 @@ static int felix_tag_8021q_vlan_del_tx(struct dsa_switch *ds, int port, u16 vid) static int felix_tag_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, u16 flags) { + struct dsa_port *dp = dsa_to_port(ds, port); struct dsa_port *cpu_dp; int err; @@ -234,11 +278,12 @@ static int felix_tag_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, * membership, which we aren't. So we don't need to add any VCAP filter * for the CPU port. */ - if (!dsa_is_user_port(ds, port)) + if (!dsa_port_is_user(dp)) return 0; dsa_switch_for_each_cpu_port(cpu_dp, ds) { - err = felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid); + err = felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid, + dsa_port_is_vlan_filtering(dp)); if (err) return err; } @@ -258,10 +303,11 @@ add_tx_failed: static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) { + struct dsa_port *dp = dsa_to_port(ds, port); struct dsa_port *cpu_dp; int err; - if (!dsa_is_user_port(ds, port)) + if (!dsa_port_is_user(dp)) return 0; dsa_switch_for_each_cpu_port(cpu_dp, ds) { @@ -278,11 +324,41 @@ static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) del_tx_failed: dsa_switch_for_each_cpu_port(cpu_dp, ds) - felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid); + felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid, + dsa_port_is_vlan_filtering(dp)); return err; } +static int felix_update_tag_8021q_rx_rules(struct dsa_switch *ds, int port, + bool vlan_filtering) +{ + struct ocelot_vcap_filter *outer_tagging_rule; + struct ocelot_vcap_block *block_vcap_es0; + struct ocelot *ocelot = ds->priv; + struct dsa_port *cpu_dp; + unsigned long cookie; + int err; + + block_vcap_es0 = &ocelot->block[VCAP_ES0]; + + dsa_switch_for_each_cpu_port(cpu_dp, ds) { + cookie = OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port, + cpu_dp->index); + + outer_tagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_es0, + cookie, false); + + felix_update_tag_8021q_rx_rule(outer_tagging_rule, vlan_filtering); + + err = ocelot_vcap_filter_replace(ocelot, outer_tagging_rule); + if (err) + return err; + } + + return 0; +} + static int felix_trap_get_cpu_port(struct dsa_switch *ds, const struct ocelot_vcap_filter *trap) { @@ -532,6 +608,16 @@ static int felix_tag_8021q_setup(struct dsa_switch *ds) ocelot_drain_cpu_queue(ocelot, 0); ocelot_unlock_xtr_grp_bh(ocelot, 0); + /* Problem: when using push_inner_tag=1 for ES0 tag B, we lose info + * about whether the received packets were VLAN-tagged on the wire, + * since they are always tagged on egress towards the CPU port. + * + * Since using push_inner_tag=1 is unavoidable for VLAN-aware bridges, + * we must work around the fallout by untagging in software to make + * untagged reception work more or less as expected. + */ + ds->untag_vlan_aware_bridge_pvid = true; + return 0; } @@ -556,6 +642,8 @@ static void felix_tag_8021q_teardown(struct dsa_switch *ds) ocelot_port_teardown_dsa_8021q_cpu(ocelot, dp->index); dsa_tag_8021q_unregister(ds); + + ds->untag_vlan_aware_bridge_pvid = false; } static unsigned long felix_tag_8021q_get_host_fwd_mask(struct dsa_switch *ds) @@ -1010,8 +1098,23 @@ static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, struct netlink_ext_ack *extack) { struct ocelot *ocelot = ds->priv; + bool using_tag_8021q; + struct felix *felix; + int err; - return ocelot_port_vlan_filtering(ocelot, port, enabled, extack); + err = ocelot_port_vlan_filtering(ocelot, port, enabled, extack); + if (err) + return err; + + felix = ocelot_to_felix(ocelot); + using_tag_8021q = felix->tag_proto == DSA_TAG_PROTO_OCELOT_8021Q; + if (using_tag_8021q) { + err = felix_update_tag_8021q_rx_rules(ds, port, enabled); + if (err) + return err; + } + + return 0; } static int felix_vlan_add(struct dsa_switch *ds, int port, -- cgit From 36dd1141be70b5966906919714dc504a24c65ddf Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Aug 2024 03:07:07 +0300 Subject: net: mscc: ocelot: treat 802.1ad tagged traffic as 802.1Q-untagged I was revisiting the topic of 802.1ad treatment in the Ocelot switch [0] and realized that not only is its basic VLAN classification pipeline improper for offloading vlan_protocol 802.1ad bridges, but also improper for offloading regular 802.1Q bridges already. Namely, 802.1ad-tagged traffic should be treated as VLAN-untagged by bridged ports, but this switch treats it as if it was 802.1Q-tagged with the same VID as in the 802.1ad header. This is markedly different to what the Linux bridge expects; see the "other_tpid()" function in tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh. An idea came to me that the VCAP IS1 TCAM is more powerful than I'm giving it credit for, and that it actually overwrites the classified VID before the VLAN Table lookup takes place. In other words, it can be used even to save a packet from being dropped on ingress due to VLAN membership. Add a sophisticated TCAM rule hardcoded into the driver to force the switch to behave like a Linux bridge with vlan_filtering 1 vlan_protocol 802.1Q. Regarding the lifetime of the filter: eventually the bridge will disappear, and vlan_filtering on the port will be restored to 0 for standalone mode. Then the filter will be deleted. [0]: https://lore.kernel.org/netdev/20201009122947.nvhye4hvcha3tljh@skbuf/ Fixes: 7142529f1688 ("net: mscc: ocelot: add VLAN filtering") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 188 ++++++++++++++++++++++++++++++-- drivers/net/ethernet/mscc/ocelot_vcap.c | 1 + include/soc/mscc/ocelot_vcap.h | 2 + 3 files changed, 180 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index f4e027a6fe95..3d72aa7b1305 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -453,9 +453,158 @@ static u16 ocelot_vlan_unaware_pvid(struct ocelot *ocelot, return VLAN_N_VID - bridge_num - 1; } +/** + * ocelot_update_vlan_reclassify_rule() - Make switch aware only to bridge VLAN TPID + * + * @ocelot: Switch private data structure + * @port: Index of ingress port + * + * IEEE 802.1Q-2018 clauses "5.5 C-VLAN component conformance" and "5.6 S-VLAN + * component conformance" suggest that a C-VLAN component should only recognize + * and filter on C-Tags, and an S-VLAN component should only recognize and + * process based on C-Tags. + * + * In Linux, as per commit 1a0b20b25732 ("Merge branch 'bridge-next'"), C-VLAN + * components are largely represented by a bridge with vlan_protocol 802.1Q, + * and S-VLAN components by a bridge with vlan_protocol 802.1ad. + * + * Currently the driver only offloads vlan_protocol 802.1Q, but the hardware + * design is non-conformant, because the switch assigns each frame to a VLAN + * based on an entirely different question, as detailed in figure "Basic VLAN + * Classification Flow" from its manual and reproduced below. + * + * Set TAG_TYPE, PCP, DEI, VID to port-default values in VLAN_CFG register + * if VLAN_AWARE_ENA[port] and frame has outer tag then: + * if VLAN_INNER_TAG_ENA[port] and frame has inner tag then: + * TAG_TYPE = (Frame.InnerTPID <> 0x8100) + * Set PCP, DEI, VID to values from inner VLAN header + * else: + * TAG_TYPE = (Frame.OuterTPID <> 0x8100) + * Set PCP, DEI, VID to values from outer VLAN header + * if VID == 0 then: + * VID = VLAN_CFG.VLAN_VID + * + * Summarized, the switch will recognize both 802.1Q and 802.1ad TPIDs as VLAN + * "with equal rights", and just set the TAG_TYPE bit to 0 (if 802.1Q) or to 1 + * (if 802.1ad). It will classify based on whichever of the tags is "outer", no + * matter what TPID that may have (or "inner", if VLAN_INNER_TAG_ENA[port]). + * + * In the VLAN Table, the TAG_TYPE information is not accessible - just the + * classified VID is - so it is as if each VLAN Table entry is for 2 VLANs: + * C-VLAN X, and S-VLAN X. + * + * Whereas the Linux bridge behavior is to only filter on frames with a TPID + * equal to the vlan_protocol, and treat everything else as VLAN-untagged. + * + * Consider an ingress packet tagged with 802.1ad VID=3 and 802.1Q VID=5, + * received on a bridge vlan_filtering=1 vlan_protocol=802.1Q port. This frame + * should be treated as 802.1Q-untagged, and classified to the PVID of that + * bridge port. Not to VID=3, and not to VID=5. + * + * The VCAP IS1 TCAM has everything we need to overwrite the choices made in + * the basic VLAN classification pipeline: it can match on TAG_TYPE in the key, + * and it can modify the classified VID in the action. Thus, for each port + * under a vlan_filtering bridge, we can insert a rule in VCAP IS1 lookup 0 to + * match on 802.1ad tagged frames and modify their classified VID to the 802.1Q + * PVID of the port. This effectively makes it appear to the outside world as + * if those packets were processed as VLAN-untagged. + * + * The rule needs to be updated each time the bridge PVID changes, and needs + * to be deleted if the bridge PVID is deleted, or if the port becomes + * VLAN-unaware. + */ +static int ocelot_update_vlan_reclassify_rule(struct ocelot *ocelot, int port) +{ + unsigned long cookie = OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port); + struct ocelot_vcap_block *block_vcap_is1 = &ocelot->block[VCAP_IS1]; + struct ocelot_port *ocelot_port = ocelot->ports[port]; + const struct ocelot_bridge_vlan *pvid_vlan; + struct ocelot_vcap_filter *filter; + int err, val, pcp, dei; + bool vid_replace_ena; + u16 vid; + + pvid_vlan = ocelot_port->pvid_vlan; + vid_replace_ena = ocelot_port->vlan_aware && pvid_vlan; + + filter = ocelot_vcap_block_find_filter_by_id(block_vcap_is1, cookie, + false); + if (!vid_replace_ena) { + /* If the reclassification filter doesn't need to exist, delete + * it if it was previously installed, and exit doing nothing + * otherwise. + */ + if (filter) + return ocelot_vcap_filter_del(ocelot, filter); + + return 0; + } + + /* The reclassification rule must apply. See if it already exists + * or if it must be created. + */ + + /* Treating as VLAN-untagged means using as classified VID equal to + * the bridge PVID, and PCP/DEI set to the port default QoS values. + */ + vid = pvid_vlan->vid; + val = ocelot_read_gix(ocelot, ANA_PORT_QOS_CFG, port); + pcp = ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_X(val); + dei = !!(val & ANA_PORT_QOS_CFG_DP_DEFAULT_VAL); + + if (filter) { + bool changed = false; + + /* Filter exists, just update it */ + if (filter->action.vid != vid) { + filter->action.vid = vid; + changed = true; + } + if (filter->action.pcp != pcp) { + filter->action.pcp = pcp; + changed = true; + } + if (filter->action.dei != dei) { + filter->action.dei = dei; + changed = true; + } + + if (!changed) + return 0; + + return ocelot_vcap_filter_replace(ocelot, filter); + } + + /* Filter doesn't exist, create it */ + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + filter->key_type = OCELOT_VCAP_KEY_ANY; + filter->ingress_port_mask = BIT(port); + filter->vlan.tpid = OCELOT_VCAP_BIT_1; + filter->prio = 1; + filter->id.cookie = cookie; + filter->id.tc_offload = false; + filter->block_id = VCAP_IS1; + filter->type = OCELOT_VCAP_FILTER_OFFLOAD; + filter->lookup = 0; + filter->action.vid_replace_ena = true; + filter->action.pcp_dei_ena = true; + filter->action.vid = vid; + filter->action.pcp = pcp; + filter->action.dei = dei; + + err = ocelot_vcap_filter_add(ocelot, filter, NULL); + if (err) + kfree(filter); + + return err; +} + /* Default vlan to clasify for untagged frames (may be zero) */ -static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, - const struct ocelot_bridge_vlan *pvid_vlan) +static int ocelot_port_set_pvid(struct ocelot *ocelot, int port, + const struct ocelot_bridge_vlan *pvid_vlan) { struct ocelot_port *ocelot_port = ocelot->ports[port]; u16 pvid = ocelot_vlan_unaware_pvid(ocelot, ocelot_port->bridge); @@ -475,15 +624,23 @@ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, * happens automatically), but also 802.1p traffic which gets * classified to VLAN 0, but that is always in our RX filter, so it * would get accepted were it not for this setting. + * + * Also, we only support the bridge 802.1Q VLAN protocol, so + * 802.1ad-tagged frames (carrying S-Tags) should be considered + * 802.1Q-untagged, and also dropped. */ if (!pvid_vlan && ocelot_port->vlan_aware) val = ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA; + ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA | + ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA; ocelot_rmw_gix(ocelot, val, ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA, + ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA | + ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA, ANA_PORT_DROP_CFG, port); + + return ocelot_update_vlan_reclassify_rule(ocelot, port); } static struct ocelot_bridge_vlan *ocelot_bridge_vlan_find(struct ocelot *ocelot, @@ -631,7 +788,10 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M, ANA_PORT_VLAN_CFG, port); - ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan); + err = ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan); + if (err) + return err; + ocelot_port_manage_port_tag(ocelot, port); return 0; @@ -684,9 +844,12 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, return err; /* Default ingress vlan classification */ - if (pvid) - ocelot_port_set_pvid(ocelot, port, - ocelot_bridge_vlan_find(ocelot, vid)); + if (pvid) { + err = ocelot_port_set_pvid(ocelot, port, + ocelot_bridge_vlan_find(ocelot, vid)); + if (err) + return err; + } /* Untagged egress vlan clasification */ ocelot_port_manage_port_tag(ocelot, port); @@ -712,8 +875,11 @@ int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid) return err; /* Ingress */ - if (del_pvid) - ocelot_port_set_pvid(ocelot, port, NULL); + if (del_pvid) { + err = ocelot_port_set_pvid(ocelot, port, NULL); + if (err) + return err; + } /* Egress */ ocelot_port_manage_port_tag(ocelot, port); @@ -2607,7 +2773,7 @@ int ocelot_port_set_default_prio(struct ocelot *ocelot, int port, u8 prio) ANA_PORT_QOS_CFG, port); - return 0; + return ocelot_update_vlan_reclassify_rule(ocelot, port); } EXPORT_SYMBOL_GPL(ocelot_port_set_default_prio); diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 73cdec5ca6a3..5734b86aed5b 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -695,6 +695,7 @@ static void is1_entry_set(struct ocelot *ocelot, int ix, vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_MC, filter->dmac_mc); vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_BC, filter->dmac_bc); vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_VLAN_TAGGED, tag->tagged); + vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_TPID, tag->tpid); vcap_key_set(vcap, &data, VCAP_IS1_HK_VID, tag->vid.value, tag->vid.mask); vcap_key_set(vcap, &data, VCAP_IS1_HK_PCP, diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index c601a4598b0d..eb19668a06db 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -13,6 +13,7 @@ */ #define OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port, upstream) ((upstream) << 16 | (port)) #define OCELOT_VCAP_IS1_TAG_8021Q_TXVLAN(ocelot, port) (port) +#define OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port) ((ocelot)->num_phys_ports + (port)) #define OCELOT_VCAP_IS2_TAG_8021Q_TXVLAN(ocelot, port) (port) #define OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port) ((ocelot)->num_phys_ports + (port)) #define OCELOT_VCAP_IS2_MRP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2) @@ -499,6 +500,7 @@ struct ocelot_vcap_key_vlan { struct ocelot_vcap_u8 pcp; /* PCP (3 bit) */ enum ocelot_vcap_bit dei; /* DEI */ enum ocelot_vcap_bit tagged; /* Tagged/untagged frame */ + enum ocelot_vcap_bit tpid; }; struct ocelot_vcap_key_etype { -- cgit From 27ec3c57fcadb43c79ed05b2ea31bc18c72d798a Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 9 Aug 2024 10:11:33 +0200 Subject: wifi: mwifiex: duplicate static structs used in driver instances mwifiex_band_2ghz and mwifiex_band_5ghz are statically allocated, but used and modified in driver instances. Duplicate them before using them in driver instances so that different driver instances do not influence each other. This was observed on a board which has one PCIe and one SDIO mwifiex adapter. It blew up in mwifiex_setup_ht_caps(). This was called with the statically allocated struct which is modified in this function. Cc: stable@vger.kernel.org Fixes: d6bffe8bb520 ("mwifiex: support for creation of AP interface") Signed-off-by: Sascha Hauer Reviewed-by: Francesco Dolcini Acked-by: Brian Norris Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240809-mwifiex-duplicate-static-structs-v1-1-6837b903b1a4@pengutronix.de --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 32 ++++++++++++++++++++----- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 155eb0fab12a..bf35c92f91d7 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -4363,11 +4363,27 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) if (ISSUPP_ADHOC_ENABLED(adapter->fw_cap_info)) wiphy->interface_modes |= BIT(NL80211_IFTYPE_ADHOC); - wiphy->bands[NL80211_BAND_2GHZ] = &mwifiex_band_2ghz; - if (adapter->config_bands & BAND_A) - wiphy->bands[NL80211_BAND_5GHZ] = &mwifiex_band_5ghz; - else + wiphy->bands[NL80211_BAND_2GHZ] = devm_kmemdup(adapter->dev, + &mwifiex_band_2ghz, + sizeof(mwifiex_band_2ghz), + GFP_KERNEL); + if (!wiphy->bands[NL80211_BAND_2GHZ]) { + ret = -ENOMEM; + goto err; + } + + if (adapter->config_bands & BAND_A) { + wiphy->bands[NL80211_BAND_5GHZ] = devm_kmemdup(adapter->dev, + &mwifiex_band_5ghz, + sizeof(mwifiex_band_5ghz), + GFP_KERNEL); + if (!wiphy->bands[NL80211_BAND_5GHZ]) { + ret = -ENOMEM; + goto err; + } + } else { wiphy->bands[NL80211_BAND_5GHZ] = NULL; + } if (adapter->drcs_enabled && ISSUPP_DRCS_ENABLED(adapter->fw_cap_info)) wiphy->iface_combinations = &mwifiex_iface_comb_ap_sta_drcs; @@ -4461,8 +4477,7 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) if (ret < 0) { mwifiex_dbg(adapter, ERROR, "%s: wiphy_register failed: %d\n", __func__, ret); - wiphy_free(wiphy); - return ret; + goto err; } if (!adapter->regd) { @@ -4504,4 +4519,9 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) adapter->wiphy = wiphy; return ret; + +err: + wiphy_free(wiphy); + + return ret; } -- cgit From a42db293e5983aa1508d12644f23d73f0553b32c Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 16 Aug 2024 12:33:28 +0530 Subject: ASoC: SOF: amd: Fix for acp init sequence When ACP is not powered on by default, acp power on sequence explicitly invoked by programming pgfsm control mask. The existing implementation checks the same PGFSM status mask and programs the same PGFSM control mask in all ACP variants which breaks acp power on sequence for ACP6.0 and ACP6.3 variants. So to fix this issue, update ACP pgfsm control mask and status mask based on acp descriptor rev field, which will vary based on acp variant. Fixes: 846aef1d7cc0 ("ASoC: SOF: amd: Add Renoir ACP HW support") Signed-off-by: Vijendar Mukunda Link: https://patch.msgid.link/20240816070328.610360-1-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/acp.c | 19 +++++++++++++++++-- sound/soc/sof/amd/acp.h | 7 +++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index d95f865669a6..85b58c8ccd0d 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -433,6 +433,7 @@ static int acp_power_on(struct snd_sof_dev *sdev) const struct sof_amd_acp_desc *desc = get_chip_info(sdev->pdata); unsigned int base = desc->pgfsm_base; unsigned int val; + unsigned int acp_pgfsm_status_mask, acp_pgfsm_cntl_mask; int ret; val = snd_sof_dsp_read(sdev, ACP_DSP_BAR, base + PGFSM_STATUS_OFFSET); @@ -440,9 +441,23 @@ static int acp_power_on(struct snd_sof_dev *sdev) if (val == ACP_POWERED_ON) return 0; - if (val & ACP_PGFSM_STATUS_MASK) + switch (desc->rev) { + case 3: + case 5: + acp_pgfsm_status_mask = ACP3X_PGFSM_STATUS_MASK; + acp_pgfsm_cntl_mask = ACP3X_PGFSM_CNTL_POWER_ON_MASK; + break; + case 6: + acp_pgfsm_status_mask = ACP6X_PGFSM_STATUS_MASK; + acp_pgfsm_cntl_mask = ACP6X_PGFSM_CNTL_POWER_ON_MASK; + break; + default: + return -EINVAL; + } + + if (val & acp_pgfsm_status_mask) snd_sof_dsp_write(sdev, ACP_DSP_BAR, base + PGFSM_CONTROL_OFFSET, - ACP_PGFSM_CNTL_POWER_ON_MASK); + acp_pgfsm_cntl_mask); ret = snd_sof_dsp_read_poll_timeout(sdev, ACP_DSP_BAR, base + PGFSM_STATUS_OFFSET, val, !val, ACP_REG_POLL_INTERVAL, ACP_REG_POLL_TIMEOUT_US); diff --git a/sound/soc/sof/amd/acp.h b/sound/soc/sof/amd/acp.h index 1af86b5b28db..61b28df8c908 100644 --- a/sound/soc/sof/amd/acp.h +++ b/sound/soc/sof/amd/acp.h @@ -25,8 +25,11 @@ #define ACP_REG_POLL_TIMEOUT_US 2000 #define ACP_DMA_COMPLETE_TIMEOUT_US 5000 -#define ACP_PGFSM_CNTL_POWER_ON_MASK 0x01 -#define ACP_PGFSM_STATUS_MASK 0x03 +#define ACP3X_PGFSM_CNTL_POWER_ON_MASK 0x01 +#define ACP3X_PGFSM_STATUS_MASK 0x03 +#define ACP6X_PGFSM_CNTL_POWER_ON_MASK 0x07 +#define ACP6X_PGFSM_STATUS_MASK 0x0F + #define ACP_POWERED_ON 0x00 #define ACP_ASSERT_RESET 0x01 #define ACP_RELEASE_RESET 0x00 -- cgit From 145082ebfcf08f4fd254c467abf4aa58b4d38505 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Mon, 12 Aug 2024 14:17:21 +0200 Subject: Documentation/llvm: turn make command for ccache into code block The command provided to use ccache with clang is not a literal code block. Once built, the documentation displays the '' symbols as a " character, which is wrong, and the command can not be applied as provided. Turn the command into a literal code block. Signed-off-by: Javier Carrasco Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- Documentation/kbuild/llvm.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst index bb5c44f8bd1c..6dc66b4f31a7 100644 --- a/Documentation/kbuild/llvm.rst +++ b/Documentation/kbuild/llvm.rst @@ -126,7 +126,7 @@ Ccache ``ccache`` can be used with ``clang`` to improve subsequent builds, (though KBUILD_BUILD_TIMESTAMP_ should be set to a deterministic value between builds -in order to avoid 100% cache misses, see Reproducible_builds_ for more info): +in order to avoid 100% cache misses, see Reproducible_builds_ for more info):: KBUILD_BUILD_TIMESTAMP='' make LLVM=1 CC="ccache clang" -- cgit From c2f6e16a6771eaefba6bb35f6803fe7217822d41 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 15 Aug 2024 13:02:55 -0400 Subject: bcachefs: Increase size of cuckoo hash table on too many rehashes Also, improve the calculation of the new table size, so that it can shrink when needed. Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets_waiting_for_journal.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index ec1b636ef78d..f70eb2127d32 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -93,7 +93,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, .dev_bucket = (u64) dev << 56 | bucket, .journal_seq = journal_seq, }; - size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0; + size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0, nr_rehashes_this_size = 0; int ret = 0; mutex_lock(&b->lock); @@ -106,7 +106,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, for (i = 0; i < size; i++) nr_elements += t->d[i].journal_seq > flushed_seq; - new_bits = t->bits + (nr_elements * 3 > size); + new_bits = ilog2(roundup_pow_of_two(nr_elements * 3)); n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL); if (!n) { @@ -115,7 +115,14 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, } retry_rehash: + if (nr_rehashes_this_size == 3) { + new_bits++; + nr_rehashes_this_size = 0; + } + nr_rehashes++; + nr_rehashes_this_size++; + bucket_table_init(n, new_bits); tmp = new; -- cgit From 075cabf324c3fd790d6ba39ff9db33a30b954fe2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 16 Aug 2024 12:31:29 -0400 Subject: bcachefs: Fix forgetting to pass trans to fsck_err() Reported-by: syzbot+e3938cd6d761b78750e6@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index b69ef4b3de6e..ad517ef744e5 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -925,7 +925,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, return PTR_ERR(a); if (a->v.data_type && type && a->v.data_type != type) { - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, bucket_metadata_type_mismatch, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", -- cgit From 9482f3b05332a624508a91c2ab2cf3527328a6a4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 16 Aug 2024 12:41:46 -0400 Subject: bcachefs: avoid overflowing LRU_TIME_BITS for cached data lru Reported-by: syzbot+510b0b28f8e6de64d307@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 260e7fa83d05..fd790b03fbe1 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -150,7 +150,9 @@ static inline void alloc_data_type_set(struct bch_alloc_v4 *a, enum bch_data_typ static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) { - return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; + return a.data_type == BCH_DATA_cached + ? a.io_time[READ] & LRU_TIME_MAX + : 0; } #define DATA_TYPES_MOVABLE \ -- cgit From 99c87fe0f584f8d778a323141504d1ba5c89a4a5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 16 Aug 2024 12:44:49 -0400 Subject: bcachefs: fix incorrect i_state usage Reported-by: syzbot+95e40eae71609e40d851@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 15fc41e63b6c..94c392abef65 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -193,7 +193,7 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino * only insert fully created inodes in the inode hash table. But * discard_new_inode() expects it to be set... */ - inode->v.i_flags |= I_NEW; + inode->v.i_state |= I_NEW; /* * We don't want bch2_evict_inode() to delete the inode on disk, * we just raced and had another inode in cache. Normally new -- cgit From 534f7eff9239c1b0af852fc33f5af2b62c00eddf Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 16 Aug 2024 10:40:38 +0930 Subject: btrfs: only enable extent map shrinker for DEBUG builds Although there are several patches improving the extent map shrinker, there are still reports of too frequent shrinker behavior, taking too much CPU for the kswapd process. So let's only enable extent shrinker for now, until we got more comprehensive understanding and a better solution. Link: https://lore.kernel.org/linux-btrfs/3df4acd616a07ef4d2dc6bad668701504b412ffc.camel@intelfx.name/ Link: https://lore.kernel.org/linux-btrfs/c30fd6b3-ca7a-4759-8a53-d42878bf84f7@gmail.com/ Fixes: 956a17d9d050 ("btrfs: add a shrinker for extent maps") CC: stable@vger.kernel.org # 6.10+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 11044e9e2cb1..98fa0f382480 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2402,7 +2402,13 @@ static long btrfs_nr_cached_objects(struct super_block *sb, struct shrink_contro trace_btrfs_extent_map_shrinker_count(fs_info, nr); - return nr; + /* + * Only report the real number for DEBUG builds, as there are reports of + * serious performance degradation caused by too frequent shrinks. + */ + if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) + return nr; + return 0; } static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_control *sc) -- cgit From f232de7cdb4b99adb2c7f2bc5e0b7e4e1292873b Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 15 Aug 2024 10:16:08 +0300 Subject: net/mlx5e: SHAMPO, Fix page leak When SHAMPO is used, a receive queue currently almost always leaks one page on shutdown. A page has MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (8) headers. These headers are tracked in the SHAMPO bitmap. Each page is released when the last header index in the group is processed. During header allocation, there can be leftovers from a page that will be used in a subsequent allocation. This is normally fine, except for the following scenario (simplified a bit): 1) Allocate N new page fragments, showing only the relevant last 4 fragments: 0: new page 1: new page 2: new page 3: new page 4: page from previous allocation 5: page from previous allocation 6: page from previous allocation 7: page from previous allocation 2) NAPI processes header indices 4-7 because they are the oldest allocated. Bit 7 will be set to 0. 3) Receive queue shutdown occurs. All the remaining bits are being iterated on to release the pages. But the page assigned to header indices 0-3 will not be freed due to what happened in step 2. This patch fixes the issue by making sure that on allocation, header fragments are always allocated in groups of MLX5E_SHAMPO_WQ_HEADER_PER_PAGE so that there is never a partial page left over between allocations. A more appropriate fix would be a refactoring of mlx5e_alloc_rx_hd_mpwqe() and mlx5e_build_shampo_hd_umr(). But this refactoring is too big for net. It will be targeted for net-next. Fixes: e839ac9a89cb ("net/mlx5e: SHAMPO, Simplify header page release in teardown") Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20240815071611.2211873-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 225da8d691fc..23aa555ca0ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -735,6 +735,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) ksm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); + ksm_entries = ALIGN_DOWN(ksm_entries, MLX5E_SHAMPO_WQ_HEADER_PER_PAGE); if (!ksm_entries) return 0; -- cgit From 94e521937839475b83bac46e4d3ccba332e12064 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 15 Aug 2024 10:16:09 +0300 Subject: net/mlx5e: SHAMPO, Release in progress headers The change in the fixes tag cleaned up too much: it removed the part that was releasing header pages that were posted via UMR but haven't been acknowledged yet on the ICOSQ. This patch corrects this omission by setting the bits between pi and ci to on when shutting down a queue with SHAMPO. To be consistent with the Striding RQ code, this action is done in mlx5e_free_rx_missing_descs(). Fixes: e839ac9a89cb ("net/mlx5e: SHAMPO, Simplify header page release in teardown") Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20240815071611.2211873-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 25 ++++++++++++++--------- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index bb5da42edc23..d9e241423bc5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -998,6 +998,7 @@ void mlx5e_build_ptys2ethtool_map(void); bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, enum mlx5e_mpwrq_umr_mode umr_mode); +void mlx5e_shampo_fill_umr(struct mlx5e_rq *rq, int len); void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq); void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5df904639b0c..583fa24a7ae9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1236,6 +1236,14 @@ void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq) rq->mpwqe.actual_wq_head = wq->head; rq->mpwqe.umr_in_progress = 0; rq->mpwqe.umr_completed = 0; + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u16 len; + + len = (shampo->pi - shampo->ci) & shampo->hd_per_wq; + mlx5e_shampo_fill_umr(rq, len); + } } void mlx5e_free_rx_descs(struct mlx5e_rq *rq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 23aa555ca0ae..de9d01036c28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -963,26 +963,31 @@ void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq) sq->cc = sqcc; } -static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr, - struct mlx5e_icosq *sq) +void mlx5e_shampo_fill_umr(struct mlx5e_rq *rq, int len) { - struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq); - struct mlx5e_shampo_hd *shampo; - /* assume 1:1 relationship between RQ and icosq */ - struct mlx5e_rq *rq = &c->rq; - int end, from, len = umr.len; + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + int end, from, full_len = len; - shampo = rq->mpwqe.shampo; end = shampo->hd_per_wq; from = shampo->ci; - if (from + len > shampo->hd_per_wq) { + if (from + len > end) { len -= end - from; bitmap_set(shampo->bitmap, from, end - from); from = 0; } bitmap_set(shampo->bitmap, from, len); - shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1); + shampo->ci = (shampo->ci + full_len) & (shampo->hd_per_wq - 1); +} + +static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr, + struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq); + /* assume 1:1 relationship between RQ and icosq */ + struct mlx5e_rq *rq = &c->rq; + + mlx5e_shampo_fill_umr(rq, umr.len); } int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) -- cgit From a07e953dafe5ebd88942dc861dfb06eaf055fb07 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 15 Aug 2024 10:16:10 +0300 Subject: net/mlx5e: XPS, Fix oversight of Multi-PF Netdev changes The offending commit overlooked the Multi-PF Netdev changes. Revert mlx5e_set_default_xps_cpumasks to incorporate Multi-PF Netdev changes. Fixes: bcee093751f8 ("net/mlx5e: Modifying channels number and updating TX queues") Signed-off-by: Carolina Jubran Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20240815071611.2211873-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 583fa24a7ae9..16b67c457b60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3028,15 +3028,18 @@ int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, struct mlx5e_params *params) { - struct mlx5_core_dev *mdev = priv->mdev; - int num_comp_vectors, ix, irq; - - num_comp_vectors = mlx5_comp_vectors_max(mdev); + int ix; for (ix = 0; ix < params->num_channels; ix++) { + int num_comp_vectors, irq, vec_ix; + struct mlx5_core_dev *mdev; + + mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix); + num_comp_vectors = mlx5_comp_vectors_max(mdev); cpumask_clear(priv->scratchpad.cpumask); + vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix); - for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { + for (irq = vec_ix; irq < num_comp_vectors; irq += params->num_channels) { int cpu = mlx5_comp_vector_get_cpu(mdev, irq); cpumask_set_cpu(cpu, priv->scratchpad.cpumask); -- cgit From 607e1df7bd47fe91cab85a97f57870a26d066137 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 15 Aug 2024 10:16:11 +0300 Subject: net/mlx5: Fix IPsec RoCE MPV trace call Prevent the call trace below from happening, by not allowing IPsec creation over a slave, if master device doesn't support IPsec. WARNING: CPU: 44 PID: 16136 at kernel/locking/rwsem.c:240 down_read+0x75/0x94 Modules linked in: esp4_offload esp4 act_mirred act_vlan cls_flower sch_ingress mlx5_vdpa vringh vhost_iotlb vdpa mst_pciconf(OE) nfsv3 nfs_acl nfs lockd grace fscache netfs xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_counter nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 rfkill cuse fuse rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_target_mod target_core_mod ib_umad ib_iser libiscsi scsi_transport_iscsi rdma_cm ib_ipoib iw_cm ib_cm ipmi_ssif intel_rapl_msr intel_rapl_common amd64_edac edac_mce_amd kvm_amd kvm irqbypass crct10dif_pclmul crc32_pclmul mlx5_ib ghash_clmulni_intel sha1_ssse3 dell_smbios ib_uverbs aesni_intel crypto_simd dcdbas wmi_bmof dell_wmi_descriptor cryptd pcspkr ib_core acpi_ipmi sp5100_tco ccp i2c_piix4 ipmi_si ptdma k10temp ipmi_devintf ipmi_msghandler acpi_power_meter acpi_cpufreq ext4 mbcache jbd2 sd_mod t10_pi sg mgag200 drm_kms_helper syscopyarea sysfillrect mlx5_core sysimgblt fb_sys_fops cec ahci libahci mlxfw drm pci_hyperv_intf libata tg3 sha256_ssse3 tls megaraid_sas i2c_algo_bit psample wmi dm_mirror dm_region_hash dm_log dm_mod [last unloaded: mst_pci] CPU: 44 PID: 16136 Comm: kworker/44:3 Kdump: loaded Tainted: GOE 5.15.0-20240509.el8uek.uek7_u3_update_v6.6_ipsec_bf.x86_64 #2 Hardware name: Dell Inc. PowerEdge R7525/074H08, BIOS 2.0.3 01/15/2021 Workqueue: events xfrm_state_gc_task RIP: 0010:down_read+0x75/0x94 Code: 00 48 8b 45 08 65 48 8b 14 25 80 fc 01 00 83 e0 02 48 09 d0 48 83 c8 01 48 89 45 08 5d 31 c0 89 c2 89 c6 89 c7 e9 cb 88 3b 00 <0f> 0b 48 8b 45 08 a8 01 74 b2 a8 02 75 ae 48 89 c2 48 83 ca 02 f0 RSP: 0018:ffffb26387773da8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffffa08b658af900 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ff886bc5e1366f2f RDI: 0000000000000000 RBP: ffffa08b658af940 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffa0a9bfb31540 R13: ffffa0a9bfb37900 R14: 0000000000000000 R15: ffffa0a9bfb37905 FS: 0000000000000000(0000) GS:ffffa0a9bfb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055a45ed814e8 CR3: 000000109038a000 CR4: 0000000000350ee0 Call Trace: ? show_trace_log_lvl+0x1d6/0x2f9 ? show_trace_log_lvl+0x1d6/0x2f9 ? mlx5_devcom_for_each_peer_begin+0x29/0x60 [mlx5_core] ? down_read+0x75/0x94 ? __warn+0x80/0x113 ? down_read+0x75/0x94 ? report_bug+0xa4/0x11d ? handle_bug+0x35/0x8b ? exc_invalid_op+0x14/0x75 ? asm_exc_invalid_op+0x16/0x1b ? down_read+0x75/0x94 ? down_read+0xe/0x94 mlx5_devcom_for_each_peer_begin+0x29/0x60 [mlx5_core] mlx5_ipsec_fs_roce_tx_destroy+0xb1/0x130 [mlx5_core] tx_destroy+0x1b/0xc0 [mlx5_core] tx_ft_put+0x53/0xc0 [mlx5_core] mlx5e_xfrm_free_state+0x45/0x90 [mlx5_core] ___xfrm_state_destroy+0x10f/0x1a2 xfrm_state_gc_task+0x81/0xa9 process_one_work+0x1f1/0x3c6 worker_thread+0x53/0x3e4 ? process_one_work.cold+0x46/0x3c kthread+0x127/0x144 ? set_kthread_struct+0x60/0x52 ret_from_fork+0x22/0x2d ---[ end trace 5ef7896144d398e1 ]--- Fixes: dfbd229abeee ("net/mlx5: Configure IPsec steering for egress RoCEv2 MPV traffic") Reviewed-by: Leon Romanovsky Signed-off-by: Patrisious Haddad Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20240815071611.2211873-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c index 234cd00f71a1..b7d4b1a2baf2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c @@ -386,7 +386,8 @@ static int ipsec_fs_roce_tx_mpv_create(struct mlx5_core_dev *mdev, return -EOPNOTSUPP; peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp); - if (!peer_priv) { + if (!peer_priv || !peer_priv->ipsec) { + mlx5_core_err(mdev, "IPsec not supported on master device\n"); err = -EOPNOTSUPP; goto release_peer; } @@ -455,7 +456,8 @@ static int ipsec_fs_roce_rx_mpv_create(struct mlx5_core_dev *mdev, return -EOPNOTSUPP; peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp); - if (!peer_priv) { + if (!peer_priv || !peer_priv->ipsec) { + mlx5_core_err(mdev, "IPsec not supported on master device\n"); err = -EOPNOTSUPP; goto release_peer; } -- cgit From 0e49d3ff12501adaafaf6fdb19699f021d1eda1c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 May 2024 23:48:58 -0400 Subject: bcachefs: Fix locking in __bch2_trans_mark_dev_sb() We run this in full RW mode now, so we have to guard against the superblock buffer being reallocated. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 5 +---- fs/bcachefs/buckets.c | 14 +++++++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 6cbf2aa6a947..eb3002c4eae7 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -741,12 +741,9 @@ fsck_err: static int bch2_mark_superblocks(struct bch_fs *c) { - mutex_lock(&c->sb_lock); gc_pos_set(c, gc_phase(GC_PHASE_sb)); - int ret = bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); - mutex_unlock(&c->sb_lock); - return ret; + return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); } static void bch2_gc_free(struct bch_fs *c) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index ad517ef744e5..be2bbd248631 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -915,7 +915,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, enum bch_data_type type, unsigned sectors) { - struct bch_fs *c = trans->c; struct btree_iter iter; int ret = 0; @@ -1046,13 +1045,18 @@ static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans, static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *ca, enum btree_iter_update_trigger_flags flags) { - struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; + struct bch_fs *c = trans->c; + + mutex_lock(&c->sb_lock); + struct bch_sb_layout layout = ca->disk_sb.sb->layout; + mutex_unlock(&c->sb_lock); + u64 bucket = 0; unsigned i, bucket_sectors = 0; int ret; - for (i = 0; i < layout->nr_superblocks; i++) { - u64 offset = le64_to_cpu(layout->sb_offset[i]); + for (i = 0; i < layout.nr_superblocks; i++) { + u64 offset = le64_to_cpu(layout.sb_offset[i]); if (offset == BCH_SB_SECTOR) { ret = bch2_trans_mark_metadata_sectors(trans, ca, @@ -1063,7 +1067,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c } ret = bch2_trans_mark_metadata_sectors(trans, ca, offset, - offset + (1 << layout->sb_max_size_bits), + offset + (1 << layout.sb_max_size_bits), BCH_DATA_sb, &bucket, &bucket_sectors, flags); if (ret) return ret; -- cgit From 2fa62ce91a52e704716d08f9a8eb3f9e7e04710d Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Wed, 14 Aug 2024 12:01:24 +0800 Subject: scsi: MAINTAINERS: Update HiSilicon SAS controller driver maintainer Add Yihang Li as the maintainer of the HiSilicon SAS controller driver, replacing Xiang Chen. Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20240814040124.1376195-1-liyihang9@huawei.com Signed-off-by: Martin K. Petersen --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 42decde38320..f96bc870a664 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10173,7 +10173,7 @@ F: Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt F: drivers/infiniband/hw/hns/ HISILICON SAS Controller -M: Xiang Chen +M: Yihang Li S: Supported W: http://www.hisilicon.com F: Documentation/devicetree/bindings/scsi/hisilicon-sas.txt -- cgit From f03e94f23b04c2b71c0044c1534921b3975ef10c Mon Sep 17 00:00:00 2001 From: Chaotian Jing Date: Tue, 13 Aug 2024 13:34:10 +0800 Subject: scsi: core: Fix the return value of scsi_logical_block_count() scsi_logical_block_count() should return the block count of a given SCSI command. The original implementation ended up shifting twice, leading to an incorrect count being returned. Fix the conversion between bytes and logical blocks. Cc: stable@vger.kernel.org Fixes: 6a20e21ae1e2 ("scsi: core: Add helper to return number of logical blocks in a request") Signed-off-by: Chaotian Jing Link: https://lore.kernel.org/r/20240813053534.7720-1-chaotian.jing@mediatek.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/scsi/scsi_cmnd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 45c40d200154..8ecfb94049db 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -234,7 +234,7 @@ static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd) static inline unsigned int scsi_logical_block_count(struct scsi_cmnd *scmd) { - unsigned int shift = ilog2(scmd->device->sector_size) - SECTOR_SHIFT; + unsigned int shift = ilog2(scmd->device->sector_size); return blk_rq_bytes(scsi_cmd_to_rq(scmd)) >> shift; } -- cgit From a0c9fe5eecc97680323ee83780ea3eaf440ba1b7 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 15 Aug 2024 16:37:13 +0100 Subject: tc-testing: don't access non-existent variable on exception Since commit 255c1c7279ab ("tc-testing: Allow test cases to be skipped") the variable test_ordinal doesn't exist in call_pre_case(). So it should not be accessed when an exception occurs. This resolves the following splat: ... During handling of the above exception, another exception occurred: Traceback (most recent call last): File ".../tdc.py", line 1028, in main() File ".../tdc.py", line 1022, in main set_operation_mode(pm, parser, args, remaining) File ".../tdc.py", line 966, in set_operation_mode catresults = test_runner_serial(pm, args, alltests) File ".../tdc.py", line 642, in test_runner_serial (index, tsr) = test_runner(pm, args, alltests) File ".../tdc.py", line 536, in test_runner res = run_one_test(pm, args, index, tidx) File ".../tdc.py", line 419, in run_one_test pm.call_pre_case(tidx) File ".../tdc.py", line 146, in call_pre_case print('test_ordinal is {}'.format(test_ordinal)) NameError: name 'test_ordinal' is not defined Fixes: 255c1c7279ab ("tc-testing: Allow test cases to be skipped") Signed-off-by: Simon Horman Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20240815-tdc-test-ordinal-v1-1-0255c122a427@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tdc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index ee349187636f..4f255cec0c22 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -143,7 +143,6 @@ class PluginMgr: except Exception as ee: print('exception {} in call to pre_case for {} plugin'. format(ee, pgn_inst.__class__)) - print('test_ordinal is {}'.format(test_ordinal)) print('testid is {}'.format(caseinfo['id'])) raise -- cgit From cd06b713a6880997ca5aecac8e33d5f9c541749e Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 16 Aug 2024 11:55:10 +0530 Subject: scsi: ufs: core: Add a quirk for handling broken LSDBS field in controller capabilities register 'Legacy Queue & Single Doorbell Support (LSDBS)' field in the controller capabilities register is supposed to report whether the legacy single doorbell mode is supported in the controller or not. But some controllers report '1' in this field which corresponds to 'LSDB not supported', but they indeed support LSDB. So let's add a quirk to handle those controllers. If the quirk is enabled by the controller driver, then LSDBS register field will be ignored and legacy single doorbell mode is assumed to be enabled always. Tested-by: Amit Pundir Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240816-ufs-bug-fix-v3-1-e6fe0e18e2a3@linaro.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 6 +++++- include/ufs/ufshcd.h | 8 ++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 0b3d0c8e0dda..a6f818cdef0e 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2426,7 +2426,11 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba) * 0h: legacy single doorbell support is available * 1h: indicate that legacy single doorbell support has been removed */ - hba->lsdb_sup = !FIELD_GET(MASK_LSDB_SUPPORT, hba->capabilities); + if (!(hba->quirks & UFSHCD_QUIRK_BROKEN_LSDBS_CAP)) + hba->lsdb_sup = !FIELD_GET(MASK_LSDB_SUPPORT, hba->capabilities); + else + hba->lsdb_sup = true; + if (!hba->mcq_sup) return 0; diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index cac0cdb9a916..0fd2aebac728 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -676,6 +676,14 @@ enum ufshcd_quirks { * the standard best practice for managing keys). */ UFSHCD_QUIRK_KEYS_IN_PRDT = 1 << 24, + + /* + * This quirk indicates that the controller reports the value 1 (not + * supported) in the Legacy Single DoorBell Support (LSDBS) bit of the + * Controller Capabilities register although it supports the legacy + * single doorbell mode. + */ + UFSHCD_QUIRK_BROKEN_LSDBS_CAP = 1 << 25, }; enum ufshcd_caps { -- cgit From ea593e028a9cc523557b4084a61d87ae69e2f270 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 16 Aug 2024 11:55:11 +0530 Subject: scsi: ufs: qcom: Add UFSHCD_QUIRK_BROKEN_LSDBS_CAP for SM8550 SoC SM8550 SoC has the UFSHCI 4.0 compliant UFS controller and only supports legacy single doorbell mode without MCQ. But due to a hardware bug, it reports 1 in the 'Legacy Queue & Single Doorbell Support (LSDBS)' field of the Controller Capabilities register. This field is supposed to read as 0 if legacy single doorbell mode is supported and 1 otherwise. Starting with commit 0c60eb0cc320 ("scsi: ufs: core: Check LSDBS cap when !mcq"), ufshcd driver is now relying on the LSDBS field to decide when to use the legacy doorbell mode if MCQ is not supported. And this ends up breaking UFS on SM8550: ufshcd-qcom 1d84000.ufs: ufshcd_init: failed to initialize (legacy doorbell mode not supported) ufshcd-qcom 1d84000.ufs: error -EINVAL: Initialization failed with error -22 So use the UFSHCD_QUIRK_BROKEN_LSDBS_CAP quirk for SM8550 SoC so that the ufshcd driver could use legacy doorbell mode correctly. Fixes: 0c60eb0cc320 ("scsi: ufs: core: Check LSDBS cap when !mcq") Tested-by: Amit Pundir Reviewed-by: Bart Van Assche Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240816-ufs-bug-fix-v3-2-e6fe0e18e2a3@linaro.org Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-qcom.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 810e637047d0..c87fdc849c62 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -857,6 +857,9 @@ static void ufs_qcom_advertise_quirks(struct ufs_hba *hba) if (host->hw_ver.major > 0x3) hba->quirks |= UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH; + + if (of_device_is_compatible(hba->dev->of_node, "qcom,sm8550-ufshc")) + hba->quirks |= UFSHCD_QUIRK_BROKEN_LSDBS_CAP; } static void ufs_qcom_set_phy_gear(struct ufs_qcom_host *host) @@ -1847,7 +1850,8 @@ static void ufs_qcom_remove(struct platform_device *pdev) } static const struct of_device_id ufs_qcom_of_match[] __maybe_unused = { - { .compatible = "qcom,ufshc"}, + { .compatible = "qcom,ufshc" }, + { .compatible = "qcom,sm8550-ufshc" }, {}, }; MODULE_DEVICE_TABLE(of, ufs_qcom_of_match); -- cgit From cd612b57c3672487ae8565855eaf9e83862eccc5 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 16 Aug 2024 13:59:56 +0100 Subject: scsi: MAINTAINERS: Add header files to SCSI SUBSYSTEM This is part of an effort to assign a section in MAINTAINERS to header files that relate to Networking [1]. In this case the files with "net" in their name. [1] https://lore.kernel.org/netdev/20240816-net-mnt-v1-0-ef946b47ced4@kernel.org/ As part of that effort these files came up: * include/uapi/scsi/scsi_netlink_fc.h * include/uapi/scsi/scsi_netlink.h Unlike all the other matching files, these one seem to relate more closely to SCSI than Networking, so I have added them to the SCSI SUBSYSTEM section. In order to simplify things, and for consistency, I have added the entire include/uapi/scsi rather than the individual files. Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20240816-scsi-mnt-v1-1-439af8b1c28b@kernel.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f96bc870a664..9a33ab69abab 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20351,6 +20351,7 @@ F: Documentation/devicetree/bindings/scsi/ F: drivers/scsi/ F: drivers/ufs/ F: include/scsi/ +F: include/uapi/scsi/ SCSI TAPE DRIVER M: Kai Mäkisara -- cgit From cbaac68987b8699397df29413b33bd51f5255255 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 16 Aug 2024 20:53:10 -0400 Subject: scsi: sd: Do not attempt to configure discard unless LBPME is set Commit f874d7210d88 ("scsi: sd: Keep the discard mode stable") attempted to address an issue where one mode of discard operation got configured prior to the device completing full discovery. Unfortunately this change assumed discard was always enabled on the device. Do not attempt to configure discard unless LBPME is enabled. Link: https://lore.kernel.org/r/20240817005325.3319384-1-martin.petersen@oracle.com Fixes: f874d7210d88 ("scsi: sd: Keep the discard mode stable") Reported-by: Chris Bainbridge Tested-by: Chris Bainbridge Tested-by: Shin'ichiro Kawasaki Tested-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 699f4f9674d9..dad3991397cf 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3308,6 +3308,9 @@ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer) static unsigned int sd_discard_mode(struct scsi_disk *sdkp) { + if (!sdkp->lbpme) + return SD_LBP_FULL; + if (!sdkp->lbpvpd) { /* LBP VPD page not provided */ if (sdkp->max_unmap_blocks) -- cgit From ab8d66d132bc8f1992d3eb6cab8d32dda6733c84 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 29 Jul 2024 16:01:57 +0200 Subject: soundwire: stream: fix programming slave ports for non-continous port maps Two bitmasks in 'struct sdw_slave_prop' - 'source_ports' and 'sink_ports' - define which ports to program in sdw_program_slave_port_params(). The masks are used to get the appropriate data port properties ('struct sdw_get_slave_dpn_prop') from an array. Bitmasks can be non-continuous or can start from index different than 0, thus when looking for matching port property for given port, we must iterate over mask bits, not from 0 up to number of ports. This fixes allocation and programming slave ports, when a source or sink masks start from further index. Fixes: f8101c74aa54 ("soundwire: Add Master and Slave port programming") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20240729140157.326450-1-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/stream.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index 7aa4900dcf31..f275143d7b18 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -1291,18 +1291,18 @@ struct sdw_dpn_prop *sdw_get_slave_dpn_prop(struct sdw_slave *slave, unsigned int port_num) { struct sdw_dpn_prop *dpn_prop; - u8 num_ports; + unsigned long mask; int i; if (direction == SDW_DATA_DIR_TX) { - num_ports = hweight32(slave->prop.source_ports); + mask = slave->prop.source_ports; dpn_prop = slave->prop.src_dpn_prop; } else { - num_ports = hweight32(slave->prop.sink_ports); + mask = slave->prop.sink_ports; dpn_prop = slave->prop.sink_dpn_prop; } - for (i = 0; i < num_ports; i++) { + for_each_set_bit(i, &mask, 32) { if (dpn_prop[i].num == port_num) return &dpn_prop[i]; } -- cgit From 3c0da3d163eb32f1f91891efaade027fa9b245b9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 6 Aug 2024 21:51:42 +0200 Subject: fuse: Initialize beyond-EOF page contents before setting uptodate fuse_notify_store(), unlike fuse_do_readpage(), does not enable page zeroing (because it can be used to change partial page contents). So fuse_notify_store() must be more careful to fully initialize page contents (including parts of the page that are beyond end-of-file) before marking the page uptodate. The current code can leave beyond-EOF page contents uninitialized, which makes these uninitialized page contents visible to userspace via mmap(). This is an information leak, but only affects systems which do not enable init-on-alloc (via CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y or the corresponding kernel command line parameter). Link: https://bugs.chromium.org/p/project-zero/issues/detail?id=2574 Cc: stable@kernel.org Fixes: a1d75f258230 ("fuse: add store request") Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds --- fs/fuse/dev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9eb191b5c4de..7146038b2fe7 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1618,9 +1618,11 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, this_num = min_t(unsigned, num, PAGE_SIZE - offset); err = fuse_copy_page(cs, &page, offset, this_num, 0); - if (!err && offset == 0 && - (this_num == PAGE_SIZE || file_size == end)) + if (!PageUptodate(page) && !err && offset == 0 && + (this_num == PAGE_SIZE || file_size == end)) { + zero_user_segment(page, this_num, PAGE_SIZE); SetPageUptodate(page); + } unlock_page(page); put_page(page); -- cgit From 47ac09b91befbb6a235ab620c32af719f8208399 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 18 Aug 2024 13:17:27 -0700 Subject: Linux 6.11-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5f417808213f..68ebd6d6b444 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* -- cgit From e4be320eeca842a3d7648258ee3673f1755a5a59 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 15 Aug 2024 18:31:36 -0500 Subject: smb3: fix broken cached reads when posix locks Mandatory locking is enforced for cached reads, which violates default posix semantics, and also it is enforced inconsistently. This affected recent versions of libreoffice, and can be demonstrated by opening a file twice from the same client, locking it from handle one and trying to read from it from handle two (which fails, returning EACCES). There is already a mount option "forcemandatorylock" (which defaults to off), so with this change only when the user intentionally specifies "forcemandatorylock" on mount will we break posix semantics on read to a locked range (ie we will only fail in this case, if the user mounts with "forcemandatorylock"). An earlier patch fixed the write path. Fixes: 85160e03a79e ("CIFS: Implement caching mechanism for mandatory brlocks") Cc: stable@vger.kernel.org Cc: Pavel Shilovsky Reviewed-by: David Howells Reported-by: abartlet@samba.org Reported-by: Kevin Ottens Signed-off-by: Steve French --- fs/smb/client/file.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 1fc66bcf49eb..f9b302cb8233 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -2912,9 +2912,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) if (!CIFS_CACHE_READ(cinode)) return netfs_unbuffered_read_iter(iocb, to); - if (cap_unix(tcon->ses) && - (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) { if (iocb->ki_flags & IOCB_DIRECT) return netfs_unbuffered_read_iter(iocb, to); return netfs_buffered_read_iter(iocb, to); -- cgit From dfd046d0ced19b6ff5f11ec4ceab0a83de924771 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 15 Aug 2024 08:56:35 +0900 Subject: ksmbd: Use unsafe_memcpy() for ntlm_negotiate rsp buffer is allocated larger than spnego_blob from smb2_allocate_rsp_buf(). Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 2df1354288e6..3f4c56a10a86 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1370,7 +1370,8 @@ static int ntlm_negotiate(struct ksmbd_work *work, } sz = le16_to_cpu(rsp->SecurityBufferOffset); - memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len); + unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len, + /* alloc is larger than blob, see smb2_allocate_rsp_buf() */); rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len); out: @@ -1453,7 +1454,9 @@ static int ntlm_authenticate(struct ksmbd_work *work, return -ENOMEM; sz = le16_to_cpu(rsp->SecurityBufferOffset); - memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len); + unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, + spnego_blob_len, + /* alloc is larger than blob, see smb2_allocate_rsp_buf() */); rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len); kfree(spnego_blob); } -- cgit From 76e98a158b207771a6c9a0de0a60522a446a3447 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 17 Aug 2024 14:03:49 +0900 Subject: ksmbd: fix race condition between destroy_previous_session() and smb2 operations() If there is ->PreviousSessionId field in the session setup request, The session of the previous connection should be destroyed. During this, if the smb2 operation requests in the previous session are being processed, a racy issue could happen with ksmbd_destroy_file_table(). This patch sets conn->status to KSMBD_SESS_NEED_RECONNECT to block incoming operations and waits until on-going operations are complete (i.e. idle) before desctorying the previous session. Fixes: c8efcc786146 ("ksmbd: add support for durable handles v1/v2") Cc: stable@vger.kernel.org # v6.6+ Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-25040 Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 34 +++++++++++++++++++++++++++++++++- fs/smb/server/connection.h | 3 ++- fs/smb/server/mgmt/user_session.c | 9 +++++++++ fs/smb/server/smb2pdu.c | 2 +- 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 09e1e7771592..7889df8112b4 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -165,11 +165,43 @@ void ksmbd_all_conn_set_status(u64 sess_id, u32 status) up_read(&conn_list_lock); } -void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id) +void ksmbd_conn_wait_idle(struct ksmbd_conn *conn) { wait_event(conn->req_running_q, atomic_read(&conn->req_running) < 2); } +int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id) +{ + struct ksmbd_conn *conn; + int rc, retry_count = 0, max_timeout = 120; + int rcount = 1; + +retry_idle: + if (retry_count >= max_timeout) + return -EIO; + + down_read(&conn_list_lock); + list_for_each_entry(conn, &conn_list, conns_list) { + if (conn->binding || xa_load(&conn->sessions, sess_id)) { + if (conn == curr_conn) + rcount = 2; + if (atomic_read(&conn->req_running) >= rcount) { + rc = wait_event_timeout(conn->req_running_q, + atomic_read(&conn->req_running) < rcount, + HZ); + if (!rc) { + up_read(&conn_list_lock); + retry_count++; + goto retry_idle; + } + } + } + } + up_read(&conn_list_lock); + + return 0; +} + int ksmbd_conn_write(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 5c2845e47cf2..5b947175c048 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -145,7 +145,8 @@ extern struct list_head conn_list; extern struct rw_semaphore conn_list_lock; bool ksmbd_conn_alive(struct ksmbd_conn *conn); -void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id); +void ksmbd_conn_wait_idle(struct ksmbd_conn *conn); +int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id); struct ksmbd_conn *ksmbd_conn_alloc(void); void ksmbd_conn_free(struct ksmbd_conn *conn); bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c); diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 162a12685d2c..99416ce9f501 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -311,6 +311,7 @@ void destroy_previous_session(struct ksmbd_conn *conn, { struct ksmbd_session *prev_sess; struct ksmbd_user *prev_user; + int err; down_write(&sessions_table_lock); down_write(&conn->session_lock); @@ -325,8 +326,16 @@ void destroy_previous_session(struct ksmbd_conn *conn, memcmp(user->passkey, prev_user->passkey, user->passkey_sz)) goto out; + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_RECONNECT); + err = ksmbd_conn_wait_idle_sess_id(conn, id); + if (err) { + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE); + goto out; + } + ksmbd_destroy_file_table(&prev_sess->file_table); prev_sess->state = SMB2_SESSION_EXPIRED; + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE); ksmbd_launch_ksmbd_durable_scavenger(); out: up_write(&conn->session_lock); diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 3f4c56a10a86..cb7f487c96af 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2213,7 +2213,7 @@ int smb2_session_logoff(struct ksmbd_work *work) ksmbd_conn_unlock(conn); ksmbd_close_session_fds(work); - ksmbd_conn_wait_idle(conn, sess_id); + ksmbd_conn_wait_idle(conn); /* * Re-lookup session to validate if session is deleted -- cgit From 4fdd8664c8a94411a01d11d5ed2f083f105f570a Mon Sep 17 00:00:00 2001 From: Victor Timofei Date: Fri, 16 Aug 2024 22:24:52 +0300 Subject: ksmbd: fix spelling mistakes in documentation There are a couple of spelling mistakes in the documentation. This patch fixes them. Signed-off-by: Victor Timofei Acked-by: Namjae Jeon Signed-off-by: Steve French --- Documentation/filesystems/smb/ksmbd.rst | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Documentation/filesystems/smb/ksmbd.rst b/Documentation/filesystems/smb/ksmbd.rst index 6b30e43a0d11..67cb68ea6e68 100644 --- a/Documentation/filesystems/smb/ksmbd.rst +++ b/Documentation/filesystems/smb/ksmbd.rst @@ -13,7 +13,7 @@ KSMBD architecture The subset of performance related operations belong in kernelspace and the other subset which belong to operations which are not really related with performance in userspace. So, DCE/RPC management that has historically resulted -into number of buffer overflow issues and dangerous security bugs and user +into a number of buffer overflow issues and dangerous security bugs and user account management are implemented in user space as ksmbd.mountd. File operations that are related with performance (open/read/write/close etc.) in kernel space (ksmbd). This also allows for easier integration with VFS @@ -24,8 +24,8 @@ ksmbd (kernel daemon) When the server daemon is started, It starts up a forker thread (ksmbd/interface name) at initialization time and open a dedicated port 445 -for listening to SMB requests. Whenever new clients make request, Forker -thread will accept the client connection and fork a new thread for dedicated +for listening to SMB requests. Whenever new clients make a request, the Forker +thread will accept the client connection and fork a new thread for a dedicated communication channel between the client and the server. It allows for parallel processing of SMB requests(commands) from clients as well as allowing for new clients to make new connections. Each instance is named ksmbd/1~n(port number) @@ -34,12 +34,12 @@ thread can decide to pass through the commands to the user space (ksmbd.mountd), currently DCE/RPC commands are identified to be handled through the user space. To further utilize the linux kernel, it has been chosen to process the commands as workitems and to be executed in the handlers of the ksmbd-io kworker threads. -It allows for multiplexing of the handlers as the kernel take care of initiating +It allows for multiplexing of the handlers as the kernel takes care of initiating extra worker threads if the load is increased and vice versa, if the load is -decreased it destroys the extra worker threads. So, after connection is -established with client. Dedicated ksmbd/1..n(port number) takes complete +decreased it destroys the extra worker threads. So, after the connection is +established with the client. Dedicated ksmbd/1..n(port number) takes complete ownership of receiving/parsing of SMB commands. Each received command is worked -in parallel i.e., There can be multiple clients commands which are worked in +in parallel i.e., there can be multiple client commands which are worked in parallel. After receiving each command a separated kernel workitem is prepared for each command which is further queued to be handled by ksmbd-io kworkers. So, each SMB workitem is queued to the kworkers. This allows the benefit of load @@ -49,9 +49,9 @@ performance by handling client commands in parallel. ksmbd.mountd (user space daemon) -------------------------------- -ksmbd.mountd is userspace process to, transfer user account and password that +ksmbd.mountd is a userspace process to, transfer the user account and password that are registered using ksmbd.adduser (part of utils for user space). Further it -allows sharing information parameters that parsed from smb.conf to ksmbd in +allows sharing information parameters that are parsed from smb.conf to ksmbd in kernel. For the execution part it has a daemon which is continuously running and connected to the kernel interface using netlink socket, it waits for the requests (dcerpc and share/user info). It handles RPC calls (at a minimum few @@ -124,7 +124,7 @@ How to run 1. Download ksmbd-tools(https://github.com/cifsd-team/ksmbd-tools/releases) and compile them. - - Refer README(https://github.com/cifsd-team/ksmbd-tools/blob/master/README.md) + - Refer to README(https://github.com/cifsd-team/ksmbd-tools/blob/master/README.md) to know how to use ksmbd.mountd/adduser/addshare/control utils $ ./autogen.sh @@ -133,7 +133,7 @@ How to run 2. Create /usr/local/etc/ksmbd/ksmbd.conf file, add SMB share in ksmbd.conf file. - - Refer ksmbd.conf.example in ksmbd-utils, See ksmbd.conf manpage + - Refer to ksmbd.conf.example in ksmbd-utils, See ksmbd.conf manpage for details to configure shares. $ man ksmbd.conf @@ -145,7 +145,7 @@ How to run $ man ksmbd.adduser $ sudo ksmbd.adduser -a -4. Insert ksmbd.ko module after build your kernel. No need to load module +4. Insert the ksmbd.ko module after you build your kernel. No need to load the module if ksmbd is built into the kernel. - Set ksmbd in menuconfig(e.g. $ make menuconfig) @@ -175,7 +175,7 @@ Each layer 1. Enable all component prints # sudo ksmbd.control -d "all" -2. Enable one of components (smb, auth, vfs, oplock, ipc, conn, rdma) +2. Enable one of the components (smb, auth, vfs, oplock, ipc, conn, rdma) # sudo ksmbd.control -d "smb" 3. Show what prints are enabled. -- cgit From 7c525dddbee71880e654ad44f3917787a4f6042c Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 16 Aug 2024 19:33:39 +0200 Subject: ksmbd: Replace one-element arrays with flexible-array members Replace the deprecated one-element arrays with flexible-array members in the structs filesystem_attribute_info and filesystem_device_info. There are no binary differences after this conversion. Link: https://github.com/KSPP/linux/issues/79 Signed-off-by: Thorsten Blum Reviewed-by: Gustavo A. R. Silva Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 4 ++-- fs/smb/server/smb_common.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index cb7f487c96af..0bc9edf22ba4 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -5360,7 +5360,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, "NTFS", PATH_MAX, conn->local_nls, 0); len = len * 2; info->FileSystemNameLen = cpu_to_le32(len); - sz = sizeof(struct filesystem_attribute_info) - 2 + len; + sz = sizeof(struct filesystem_attribute_info) + len; rsp->OutputBufferLength = cpu_to_le32(sz); break; } @@ -5386,7 +5386,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, len = len * 2; info->VolumeLabelSize = cpu_to_le32(len); info->Reserved = 0; - sz = sizeof(struct filesystem_vol_info) - 2 + len; + sz = sizeof(struct filesystem_vol_info) + len; rsp->OutputBufferLength = cpu_to_le32(sz); break; } diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index 4a3148b0167f..cc1d6dfe29d5 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -213,7 +213,7 @@ struct filesystem_attribute_info { __le32 Attributes; __le32 MaxPathNameComponentLength; __le32 FileSystemNameLen; - __le16 FileSystemName[1]; /* do not have to save this - get subset? */ + __le16 FileSystemName[]; /* do not have to save this - get subset? */ } __packed; struct filesystem_device_info { @@ -226,7 +226,7 @@ struct filesystem_vol_info { __le32 SerialNumber; __le32 VolumeLabelSize; __le16 Reserved; - __le16 VolumeLabel[1]; + __le16 VolumeLabel[]; } __packed; struct filesystem_info { -- cgit From d6d539c9a7ad0655e5ad46b5e869f1b20bce8953 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Aug 2024 19:51:13 -0400 Subject: bcachefs: Reallocate table when we're increasing size Fixes: c2f6e16a6771 ("bcachefs: Increase size of cuckoo hash table on too many rehashes") Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets_waiting_for_journal.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index f70eb2127d32..f9fb150eda70 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -107,7 +107,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, nr_elements += t->d[i].journal_seq > flushed_seq; new_bits = ilog2(roundup_pow_of_two(nr_elements * 3)); - +realloc: n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL); if (!n) { ret = -BCH_ERR_ENOMEM_buckets_waiting_for_journal_set; @@ -118,6 +118,8 @@ retry_rehash: if (nr_rehashes_this_size == 3) { new_bits++; nr_rehashes_this_size = 0; + kvfree(n); + goto realloc; } nr_rehashes++; -- cgit From d9f49c3106e404776afcf6c5682357f4fe088beb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Aug 2024 16:41:39 -0400 Subject: bcachefs: fix field-spanning write warning attempts to retrofit memory safety onto C are increasingly annoying ------------[ cut here ]------------ memcpy: detected field-spanning write (size 4) of single field "&k.replicas" at fs/bcachefs/replicas.c:454 (size 3) WARNING: CPU: 5 PID: 6525 at fs/bcachefs/replicas.c:454 bch2_replicas_gc2+0x2cb/0x400 [bcachefs] bch2_replicas_gc2+0x2cb/0x400: bch2_replicas_gc2 at /home/ojab/src/bcachefs/fs/bcachefs/replicas.c:454 (discriminator 3) Modules linked in: dm_mod tun nf_conntrack_netlink nfnetlink xt_addrtype br_netfilter overlay msr sctp bcachefs lz4hc_compress lz4_compress libcrc32c xor raid6_pq lz4_decompress pps_ldisc pps_core wireguard libchacha20poly1305 chacha_x86_64 poly1305_x86_64 ip6_udp_tunnel udp_tunnel curve25519_x86_64 libcurve25519_generic libchacha sit tunnel4 ip_tunnel af_packet bridge stp llc ip6table_nat ip6table_filter ip6_tables xt_MASQUERADE xt_conntrack iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter ip_tables x_tables tcp_bbr sch_fq_codel efivarfs nls_iso8859_1 nls_cp437 vfat fat cdc_mbim cdc_wdm cdc_ncm cdc_ether usbnet r8152 input_leds joydev mii amdgpu mousedev hid_generic usbhid hid ath10k_pci amd_atl edac_mce_amd ath10k_core kvm_amd ath kvm mac80211 bfq crc32_pclmul crc32c_intel polyval_clmulni polyval_generic sha512_ssse3 sha256_ssse3 sha1_ssse3 snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg i2c_algo_bit drm_exec snd_hda_codec r8169 drm_suballoc_helper aesni_intel gf128mul crypto_simd amdxcp realtek mfd_core tpm_crb drm_buddy snd_hwdep mdio_devres libarc4 cryptd tpm_tis wmi_bmof cfg80211 evdev libphy snd_hda_core tpm_tis_core gpu_sched rapl xhci_pci xhci_hcd snd_pcm drm_display_helper snd_timer tpm sp5100_tco rfkill efi_pstore mpt3sas drm_ttm_helper ahci usbcore libaescfb ccp snd ttm 8250 libahci watchdog soundcore raid_class sha1_generic acpi_cpufreq k10temp 8250_base usb_common scsi_transport_sas i2c_piix4 hwmon video serial_mctrl_gpio serial_base ecdh_generic wmi rtc_cmos backlight ecc gpio_amdpt rng_core gpio_generic button CPU: 5 UID: 0 PID: 6525 Comm: bcachefs Tainted: G W 6.11.0-rc1-ojab-00058-g224bc118aec9 #6 6d5debde398d2a84851f42ab300dae32c2992027 Tainted: [W]=WARN RIP: 0010:bch2_replicas_gc2+0x2cb/0x400 [bcachefs] Code: c7 c2 60 91 d1 c1 48 89 c6 48 c7 c7 98 91 d1 c1 4c 89 14 24 44 89 5c 24 08 48 89 44 24 20 c6 05 fa 68 04 00 01 e8 05 a3 40 e4 <0f> 0b 4c 8b 14 24 44 8b 5c 24 08 48 8b 44 24 20 e9 55 fe ff ff 8b RSP: 0018:ffffb434c9263d60 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff9a8efa79cc00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffb434c9263de0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000005 R13: ffff9a8efa73c300 R14: ffff9a8d9e880000 R15: ffff9a8d9e8806f8 FS: 0000000000000000(0000) GS:ffff9a9410c80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000565423373090 CR3: 0000000164e30000 CR4: 00000000003506f0 Call Trace: ? __warn+0x97/0x150 ? bch2_replicas_gc2+0x2cb/0x400 [bcachefs 9803eca5e131ef28f26250ede34072d5b50d98b3] bch2_replicas_gc2+0x2cb/0x400: bch2_replicas_gc2 at /home/ojab/src/bcachefs/fs/bcachefs/replicas.c:454 (discriminator 3) ? report_bug+0x196/0x1c0 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x17/0x80 ? __wake_up_klogd.part.0+0x4c/0x80 ? asm_exc_invalid_op+0x16/0x20 ? bch2_replicas_gc2+0x2cb/0x400 [bcachefs 9803eca5e131ef28f26250ede34072d5b50d98b3] bch2_replicas_gc2+0x2cb/0x400: bch2_replicas_gc2 at /home/ojab/src/bcachefs/fs/bcachefs/replicas.c:454 (discriminator 3) ? bch2_dev_usage_read+0xa0/0xa0 [bcachefs 9803eca5e131ef28f26250ede34072d5b50d98b3] bch2_dev_usage_read+0xa0/0xa0: discard_in_flight_remove at /home/ojab/src/bcachefs/fs/bcachefs/alloc_background.c:1712 Signed-off-by: Kent Overstreet --- fs/bcachefs/replicas.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 1223b710755d..12b1d28b7eb4 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -451,7 +451,8 @@ retry: .type = BCH_DISK_ACCOUNTING_replicas, }; - memcpy(&k.replicas, e, replicas_entry_bytes(e)); + unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e), + "embedded variable length struct"); struct bpos p = disk_accounting_pos_to_bpos(&k); -- cgit From 47cdc7b14417a40af6a5d5909f1d28a5a23fc11d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Aug 2024 17:38:43 -0400 Subject: bcachefs: Fix incorrect gfp flags fixes: 00488 WARNING: CPU: 9 PID: 194 at mm/page_alloc.c:4410 __alloc_pages_noprof+0x1818/0x1888 00488 Modules linked in: 00488 CPU: 9 UID: 0 PID: 194 Comm: kworker/u66:1 Not tainted 6.11.0-rc1-ktest-g18fa10d6495f #2931 00488 Hardware name: linux,dummy-virt (DT) 00488 Workqueue: writeback wb_workfn (flush-bcachefs-2) 00488 pstate: 20001005 (nzCv daif -PAN -UAO -TCO -DIT +SSBS BTYPE=--) 00488 pc : __alloc_pages_noprof+0x1818/0x1888 00488 lr : __alloc_pages_noprof+0x5f4/0x1888 00488 sp : ffffff80ccd8ed00 00488 x29: ffffff80ccd8ed00 x28: 0000000000000000 x27: dfffffc000000000 00488 x26: 0000000000000010 x25: 0000000000000002 x24: 0000000000000000 00488 x23: 0000000000000000 x22: 1ffffff0199b1dbe x21: ffffff80cc680900 00488 x20: 0000000000000000 x19: ffffff80ccd8eed0 x18: 0000000000000000 00488 x17: ffffff80cc58a010 x16: dfffffc000000000 x15: 1ffffff00474e518 00488 x14: 1ffffff00474e518 x13: 1ffffff00474e518 x12: ffffffb8104701b9 00488 x11: 1ffffff8104701b8 x10: ffffffb8104701b8 x9 : ffffffc08043cde8 00488 x8 : 00000047efb8fe48 x7 : ffffff80ccd8ee20 x6 : 0000000000048000 00488 x5 : 1ffffff810470138 x4 : 0000000000000050 x3 : 1ffffff0199b1d94 00488 x2 : ffffffb0199b1d94 x1 : 0000000000000001 x0 : ffffffc082387448 00488 Call trace: 00488 __alloc_pages_noprof+0x1818/0x1888 00488 new_slab+0x284/0x2f0 00488 ___slab_alloc+0x208/0x8e0 00488 __kmalloc_noprof+0x328/0x340 00488 __bch2_writepage+0x106c/0x1830 00488 write_cache_pages+0xa0/0xe8 due to __GFP_NOFAIL without allowing reclaim Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io-buffered.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index cc33d763f722..184d03851676 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -534,7 +534,7 @@ do_io: if (f_sectors > w->tmp_sectors) { kfree(w->tmp); - w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), __GFP_NOFAIL); + w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), GFP_NOFS|__GFP_NOFAIL); w->tmp_sectors = f_sectors; } -- cgit From 5b5c96c63d5b6e91c622611e04b2b156bbae53f5 Mon Sep 17 00:00:00 2001 From: Hongzhen Luo Date: Thu, 1 Aug 2024 19:26:22 +0800 Subject: erofs: simplify readdir operation - Use i_size instead of i_size_read() due to immutable fses; - Get rid of an unneeded goto since erofs_fill_dentries() also works; - Remove unnecessary lines. Signed-off-by: Hongzhen Luo Link: https://lore.kernel.org/r/20240801112622.2164029-1-hongzhen@linux.alibaba.com Reviewed-by: Gao Xiang Signed-off-by: Gao Xiang --- fs/erofs/dir.c | 35 ++++++++++++----------------------- fs/erofs/internal.h | 2 +- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 2193a6710c8f..c3b90abdee37 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -8,19 +8,15 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, void *dentry_blk, struct erofs_dirent *de, - unsigned int nameoff, unsigned int maxsize) + unsigned int nameoff0, unsigned int maxsize) { - const struct erofs_dirent *end = dentry_blk + nameoff; + const struct erofs_dirent *end = dentry_blk + nameoff0; while (de < end) { - const char *de_name; + unsigned char d_type = fs_ftype_to_dtype(de->file_type); + unsigned int nameoff = le16_to_cpu(de->nameoff); + const char *de_name = (char *)dentry_blk + nameoff; unsigned int de_namelen; - unsigned char d_type; - - d_type = fs_ftype_to_dtype(de->file_type); - - nameoff = le16_to_cpu(de->nameoff); - de_name = (char *)dentry_blk + nameoff; /* the last dirent in the block? */ if (de + 1 >= end) @@ -52,21 +48,20 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct super_block *sb = dir->i_sb; unsigned long bsz = sb->s_blocksize; - const size_t dirsize = i_size_read(dir); - unsigned int i = erofs_blknr(sb, ctx->pos); unsigned int ofs = erofs_blkoff(sb, ctx->pos); int err = 0; bool initial = true; buf.mapping = dir->i_mapping; - while (ctx->pos < dirsize) { + while (ctx->pos < dir->i_size) { + erofs_off_t dbstart = ctx->pos - ofs; struct erofs_dirent *de; unsigned int nameoff, maxsize; - de = erofs_bread(&buf, erofs_pos(sb, i), EROFS_KMAP); + de = erofs_bread(&buf, dbstart, EROFS_KMAP); if (IS_ERR(de)) { erofs_err(sb, "fail to readdir of logical block %u of nid %llu", - i, EROFS_I(dir)->nid); + erofs_blknr(sb, dbstart), EROFS_I(dir)->nid); err = PTR_ERR(de); break; } @@ -79,25 +74,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) break; } - maxsize = min_t(unsigned int, dirsize - ctx->pos + ofs, bsz); - + maxsize = min_t(unsigned int, dir->i_size - dbstart, bsz); /* search dirents at the arbitrary position */ if (initial) { initial = false; - ofs = roundup(ofs, sizeof(struct erofs_dirent)); - ctx->pos = erofs_pos(sb, i) + ofs; - if (ofs >= nameoff) - goto skip_this; + ctx->pos = dbstart + ofs; } err = erofs_fill_dentries(dir, ctx, de, (void *)de + ofs, nameoff, maxsize); if (err) break; -skip_this: - ctx->pos = erofs_pos(sb, i) + maxsize; - ++i; + ctx->pos = dbstart + maxsize; ofs = 0; } erofs_put_metabuf(&buf); diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 736607675396..45dc15ebd870 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -220,7 +220,7 @@ struct erofs_buf { }; #define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL }) -#define erofs_blknr(sb, addr) ((addr) >> (sb)->s_blocksize_bits) +#define erofs_blknr(sb, addr) ((erofs_blk_t)((addr) >> (sb)->s_blocksize_bits)) #define erofs_blkoff(sb, addr) ((addr) & ((sb)->s_blocksize - 1)) #define erofs_pos(sb, blk) ((erofs_off_t)(blk) << (sb)->s_blocksize_bits) #define erofs_iblks(i) (round_up((i)->i_size, i_blocksize(i)) >> (i)->i_blkbits) -- cgit From 2c534624ae70100aeea0b5800b0f3768b2fd3cf0 Mon Sep 17 00:00:00 2001 From: Hongzhen Luo Date: Tue, 6 Aug 2024 19:22:08 +0800 Subject: erofs: get rid of check_layout_compatibility() Simple enough to just open-code it. Signed-off-by: Hongzhen Luo Reviewed-by: Sandeep Dhavale Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20240806112208.150323-1-hongzhen@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/erofs/super.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 32ce5b35e1df..6cb5c8916174 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -108,22 +108,6 @@ static void erofs_free_inode(struct inode *inode) kmem_cache_free(erofs_inode_cachep, vi); } -static bool check_layout_compatibility(struct super_block *sb, - struct erofs_super_block *dsb) -{ - const unsigned int feature = le32_to_cpu(dsb->feature_incompat); - - EROFS_SB(sb)->feature_incompat = feature; - - /* check if current kernel meets all mandatory requirements */ - if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) { - erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", - feature & ~EROFS_ALL_FEATURE_INCOMPAT); - return false; - } - return true; -} - /* read variable-sized metadata, offset will be aligned by 4-byte */ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, erofs_off_t *offset, int *lengthp) @@ -279,7 +263,7 @@ static int erofs_scan_devices(struct super_block *sb, static int erofs_read_superblock(struct super_block *sb) { - struct erofs_sb_info *sbi; + struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_super_block *dsb; void *data; @@ -291,9 +275,7 @@ static int erofs_read_superblock(struct super_block *sb) return PTR_ERR(data); } - sbi = EROFS_SB(sb); dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); - ret = -EINVAL; if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { erofs_err(sb, "cannot find valid erofs superblock"); @@ -318,8 +300,12 @@ static int erofs_read_superblock(struct super_block *sb) } ret = -EINVAL; - if (!check_layout_compatibility(sb, dsb)) + sbi->feature_incompat = le32_to_cpu(dsb->feature_incompat); + if (sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT) { + erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", + sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT); goto out; + } sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE; if (sbi->sb_size > PAGE_SIZE - EROFS_SUPER_OFFSET) { -- cgit From 7ce7c2283fa6843ab3c2adfeb83dcc504a107858 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Aug 2024 12:06:19 +0200 Subject: Input: i8042 - add Fujitsu Lifebook E756 to i8042 quirk table Yet another quirk entry for Fujitsu laptop. Lifebook E756 requires i8041.nomux for keeping the touchpad working after suspend/resume. Link: https://bugzilla.suse.com/show_bug.cgi?id=1229056 Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20240814100630.2048-1-tiwai@suse.de Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 5b50475ec414..78e5c9c60b8b 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -626,6 +626,15 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOMUX) }, + { + /* Fujitsu Lifebook E756 */ + /* https://bugzilla.suse.com/show_bug.cgi?id=1229056 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E756"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX) + }, { /* Fujitsu Lifebook E5411 */ .matches = { -- cgit From 5d41eeb6725e3e24853629e5d7635e4bc45d736e Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 9 Aug 2024 17:11:28 +0530 Subject: drm/i915/hdcp: Use correct cp_irq_count We are checking cp_irq_count from the wrong hdcp structure which ends up giving timed out errors. We only increment the cp_irq_count of the primary connector's hdcp structure but here in case of multidisplay setup we end up checking the secondary connector's hdcp structure, which will not have its cp_irq_count incremented. This leads to a timed out at CP_IRQ error even though a CP_IRQ was raised. Extract it from the correct intel_hdcp structure. --v2 -Explain why it was the wrong hdcp structure [Jani] Fixes: 8c9e4f68b861 ("drm/i915/hdcp: Use per-device debugs") Signed-off-by: Suraj Kandpal Reviewed-by: Ankit Nautiyal Link: https://patchwork.freedesktop.org/patch/msgid/20240809114127.3940699-2-suraj.kandpal@intel.com (cherry picked from commit dd925902634def895690426bf10e0a8b3e56f56d) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp_hdcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index 2edffe62f360..b0101d72b9c1 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -39,7 +39,9 @@ static u32 transcoder_to_stream_enc_status(enum transcoder cpu_transcoder) static void intel_dp_hdcp_wait_for_cp_irq(struct intel_connector *connector, int timeout) { - struct intel_hdcp *hdcp = &connector->hdcp; + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct intel_dp *dp = &dig_port->dp; + struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; long ret; #define C (hdcp->cp_irq_count_cached != atomic_read(&hdcp->cp_irq_count)) -- cgit From 3d765ae2daccc570b3f4fbcb57eb321b12cdded2 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Thu, 4 Jan 2024 19:31:17 +0100 Subject: Input: i8042 - add forcenorestore quirk to leave controller untouched even on s3 On s3 resume the i8042 driver tries to restore the controller to a known state by reinitializing things, however this can confuse the controller with different effects. Mostly occasionally unresponsive keyboards after resume. These issues do not rise on s0ix resume as here the controller is assumed to preserved its state from before suspend. This patch adds a quirk for devices where the reinitialization on s3 resume is not needed and might be harmful as described above. It does this by using the s0ix resume code path at selected locations. This new quirk goes beyond what the preexisting reset=never quirk does, which only skips some reinitialization steps. Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240104183118.779778-2-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 10 +++++++--- drivers/input/serio/i8042.c | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 78e5c9c60b8b..00e804301088 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -83,6 +83,7 @@ static inline void i8042_write_command(int val) #define SERIO_QUIRK_KBDRESET BIT(12) #define SERIO_QUIRK_DRITEK BIT(13) #define SERIO_QUIRK_NOPNP BIT(14) +#define SERIO_QUIRK_FORCENORESTORE BIT(15) /* Quirk table for different mainboards. Options similar or identical to i8042 * module parameters. @@ -1694,6 +1695,8 @@ static void __init i8042_check_quirks(void) if (quirks & SERIO_QUIRK_NOPNP) i8042_nopnp = true; #endif + if (quirks & SERIO_QUIRK_FORCENORESTORE) + i8042_forcenorestore = true; } #else static inline void i8042_check_quirks(void) {} @@ -1727,7 +1730,7 @@ static int __init i8042_platform_init(void) i8042_check_quirks(); - pr_debug("Active quirks (empty means none):%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + pr_debug("Active quirks (empty means none):%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", i8042_nokbd ? " nokbd" : "", i8042_noaux ? " noaux" : "", i8042_nomux ? " nomux" : "", @@ -1747,10 +1750,11 @@ static int __init i8042_platform_init(void) "", #endif #ifdef CONFIG_PNP - i8042_nopnp ? " nopnp" : ""); + i8042_nopnp ? " nopnp" : "", #else - ""); + "", #endif + i8042_forcenorestore ? " forcenorestore" : ""); retval = i8042_pnp_init(); if (retval) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index e0fb1db653b7..8ec4872b4471 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -115,6 +115,10 @@ module_param_named(nopnp, i8042_nopnp, bool, 0); MODULE_PARM_DESC(nopnp, "Do not use PNP to detect controller settings"); #endif +static bool i8042_forcenorestore; +module_param_named(forcenorestore, i8042_forcenorestore, bool, 0); +MODULE_PARM_DESC(forcenorestore, "Force no restore on s3 resume, copying s2idle behaviour"); + #define DEBUG #ifdef DEBUG static bool i8042_debug; @@ -1232,7 +1236,7 @@ static int i8042_pm_suspend(struct device *dev) { int i; - if (pm_suspend_via_firmware()) + if (!i8042_forcenorestore && pm_suspend_via_firmware()) i8042_controller_reset(true); /* Set up serio interrupts for system wakeup. */ @@ -1248,7 +1252,7 @@ static int i8042_pm_suspend(struct device *dev) static int i8042_pm_resume_noirq(struct device *dev) { - if (!pm_resume_via_firmware()) + if (i8042_forcenorestore || !pm_resume_via_firmware()) i8042_interrupt(0, NULL); return 0; @@ -1271,7 +1275,7 @@ static int i8042_pm_resume(struct device *dev) * not restore the controller state to whatever it had been at boot * time, so we do not need to do anything. */ - if (!pm_suspend_via_firmware()) + if (i8042_forcenorestore || !pm_suspend_via_firmware()) return 0; /* -- cgit From aaa4ca873d3da768896ffc909795359a01e853ef Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Thu, 4 Jan 2024 19:31:18 +0100 Subject: Input: i8042 - use new forcenorestore quirk to replace old buggy quirk combination The old quirk combination sometimes cause a laggy keyboard after boot. With the new quirk the initial issue of an unresponsive keyboard after s3 resume is also fixed, but it doesn't have the negative side effect of the sometimes laggy keyboard. Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240104183118.779778-3-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 00e804301088..bad238f69a7a 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1159,18 +1159,10 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, { - /* - * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes - * the keyboard very laggy for ~5 seconds after boot and - * sometimes also after resume. - * However both are required for the keyboard to not fail - * completely sometimes after boot or resume. - */ .matches = { DMI_MATCH(DMI_BOARD_NAME, "N150CU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { -- cgit From a9aaf1ff88a8cb99a1335c9eb76de637f0cf8c10 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 13 Aug 2024 21:07:50 +0200 Subject: power: sequencing: request the WLAN enable GPIO as-is If the WCN module is powered up before linux boots and the ath11k driver probes at the same time as the power sequencing driver, we may end up driving the wlan-enable GPIO low in the latter, breaking the start-up of the WLAN module. Request the wlan-enable GPIO as-is so that if the WLAN module is already starting/started, we leave it alone. Fixes: 2f1630f437df ("power: pwrseq: add a driver for the PMU module on the QCom WCN chipsets") Reported-by: Stephan Gerhold Link: https://lore.kernel.org/r/20240813190751.155035-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/power/sequencing/pwrseq-qcom-wcn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/sequencing/pwrseq-qcom-wcn.c b/drivers/power/sequencing/pwrseq-qcom-wcn.c index 42dacfda745e..d786cbf1b2cd 100644 --- a/drivers/power/sequencing/pwrseq-qcom-wcn.c +++ b/drivers/power/sequencing/pwrseq-qcom-wcn.c @@ -283,7 +283,7 @@ static int pwrseq_qcom_wcn_probe(struct platform_device *pdev) "Failed to get the Bluetooth enable GPIO\n"); ctx->wlan_gpio = devm_gpiod_get_optional(dev, "wlan-enable", - GPIOD_OUT_LOW); + GPIOD_ASIS); if (IS_ERR(ctx->wlan_gpio)) return dev_err_probe(dev, PTR_ERR(ctx->wlan_gpio), "Failed to get the WLAN enable GPIO\n"); -- cgit From e080a26725fb36f535f22ea42694c60ab005fb2e Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 19 Aug 2024 10:52:07 +0800 Subject: erofs: allow large folios for compressed files As commit 2e6506e1c4ee ("mm/migrate: fix deadlock in migrate_pages_batch() on large folios") has landed upstream, large folios can be safely enabled for compressed inodes since all prerequisites have already landed in 6.11-rc1. Stress tests has been running on my fleet for over 20 days without any regression. Additionally, users [1] have requested it for months. Let's allow large folios for EROFS full cases upstream now for wider testing. [1] https://lore.kernel.org/r/CAGsJ_4wtE8OcpinuqVwG4jtdx6Qh5f+TON6wz+4HMCq=A2qFcA@mail.gmail.com Cc: Barry Song <21cnbao@gmail.com> Cc: Matthew Wilcox (Oracle) [ Gao Xiang: minor commit typo fixes. ] Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20240819025207.3808649-1-hsiangkao@linux.alibaba.com --- Documentation/filesystems/erofs.rst | 2 +- fs/erofs/inode.c | 18 ++++++++---------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index cc4626d6ee4f..c293f8e37468 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -75,7 +75,7 @@ Here are the main features of EROFS: - Support merging tail-end data into a special inode as fragments. - - Support large folios for uncompressed files. + - Support large folios to make use of THPs (Transparent Hugepages); - Support direct I/O on uncompressed files to avoid double caching for loop devices; diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 43c09aae2afc..419432be3223 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -257,25 +257,23 @@ static int erofs_fill_inode(struct inode *inode) goto out_unlock; } + mapping_set_large_folios(inode->i_mapping); if (erofs_inode_is_data_compressed(vi->datalayout)) { #ifdef CONFIG_EROFS_FS_ZIP DO_ONCE_LITE_IF(inode->i_blkbits != PAGE_SHIFT, erofs_info, inode->i_sb, "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!"); inode->i_mapping->a_ops = &z_erofs_aops; - err = 0; - goto out_unlock; -#endif +#else err = -EOPNOTSUPP; - goto out_unlock; - } - inode->i_mapping->a_ops = &erofs_raw_access_aops; - mapping_set_large_folios(inode->i_mapping); +#endif + } else { + inode->i_mapping->a_ops = &erofs_raw_access_aops; #ifdef CONFIG_EROFS_FS_ONDEMAND - if (erofs_is_fscache_mode(inode->i_sb)) - inode->i_mapping->a_ops = &erofs_fscache_access_aops; + if (erofs_is_fscache_mode(inode->i_sb)) + inode->i_mapping->a_ops = &erofs_fscache_access_aops; #endif - + } out_unlock: erofs_put_metabuf(&buf); return err; -- cgit From 56314c0d78d6f5a60c8804c517167991a879e14a Mon Sep 17 00:00:00 2001 From: John Sweeney Date: Sun, 18 Aug 2024 11:30:15 -0400 Subject: ALSA: hda/realtek: Enable mute/micmute LEDs on HP Laptop 14-ey0xxx HP Pavilion Plus 14-ey0xxx needs existing quirk ALC245_FIXUP_HP_X360_MUTE_LEDS to enable its mute/micmute LEDs. Signed-off-by: John Sweeney Cc: Link: https://patch.msgid.link/E1sfhrD-0007TA-HC@rmmprod05.runbox Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d022a25635f9..4eafbcb40120 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10315,6 +10315,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c15, "HP Spectre x360 2-in-1 Laptop 14-eu0xxx", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), SND_PCI_QUIRK(0x103c, 0x8c16, "HP Spectre 16", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8c17, "HP Spectre 16", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8c21, "HP Pavilion Plus Laptop 14-ey0XXX", ALC245_FIXUP_HP_X360_MUTE_LEDS), SND_PCI_QUIRK(0x103c, 0x8c46, "HP EliteBook 830 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c47, "HP EliteBook 840 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c48, "HP EliteBook 860 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), -- cgit From 32108c22ac619c32dd6db594319e259b63bfb387 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 19 Aug 2024 10:41:53 +0200 Subject: ALSA: seq: Skip event type filtering for UMP events UMP events don't use the event type field, hence it's invalid to apply the filter, which may drop the events unexpectedly. Skip the event filtering for UMP events, instead. Fixes: 46397622a3fa ("ALSA: seq: Add UMP support") Cc: Link: https://patch.msgid.link/20240819084156.10286-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 8c4ee5066afe..6be548baa6df 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -537,6 +537,9 @@ static struct snd_seq_client *get_event_dest_client(struct snd_seq_event *event, return NULL; if (! dest->accept_input) goto __not_avail; + if (snd_seq_ev_is_ump(event)) + return dest; /* ok - no filter checks */ + if ((dest->filter & SNDRV_SEQ_FILTER_USE_EVENT) && ! test_bit(event->type, dest->event_filter)) goto __not_avail; -- cgit From 7167395a4be7930ecac6a33b4e54d7e3dd9ee209 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 15 Aug 2024 15:59:50 +0800 Subject: selftests: udpgro: report error when receive failed Currently, we only check the latest senders's exit code. If the receiver report failed, it is not recoreded. Fix it by checking the exit code of all the involved processes. Before: bad GRO lookup ok multiple GRO socks ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 failed $ echo $? 0 After: bad GRO lookup ok multiple GRO socks ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 failed $ echo $? 1 Fixes: 3327a9c46352 ("selftests: add functionals test for UDP GRO") Suggested-by: Paolo Abeni Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro.sh | 44 +++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 11a1ebda564f..4659cf01e438 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -46,17 +46,19 @@ run_one() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} & + local PID1=$! wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -73,6 +75,7 @@ run_one_nat() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 if [[ ${tx_args} = *-4* ]]; then ipt_cmd=iptables @@ -93,16 +96,17 @@ run_one_nat() { # ... so that GRO will match the UDP_GRO enabled socket, but packets # will land on the 'plain' one ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & - pid=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \ - echo "ok" || \ - echo "failed"& + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - kill -INT $pid - wait $(jobs -p) + check_err $? + kill -INT ${PID1} + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -111,20 +115,26 @@ run_one_2sock() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 + check_err $? wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } -- cgit From d7818402b1d80347c764001583f6d63fa68c2e1a Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 15 Aug 2024 15:59:51 +0800 Subject: selftests: udpgro: no need to load xdp for gro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit d7db7775ea2e ("net: veth: do not manipulate GRO when using XDP"), there is no need to load XDP program to enable GRO. On the other hand, the current test is failed due to loading the XDP program. e.g. # selftests: net: udpgro.sh # ipv4 # no GRO ok # no GRO chk cmsg ok # GRO ./udpgso_bench_rx: recv: bad packet len, got 1472, expected 14720 # # failed [...] # bad GRO lookup ok # multiple GRO socks ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 # # ./udpgso_bench_rx: recv: bad packet len, got 1452, expected 14520 # # failed ok 1 selftests: net: udpgro.sh After fix, all the test passed. # ./udpgro.sh ipv4 no GRO ok [...] multiple GRO socks ok Fixes: d7db7775ea2e ("net: veth: do not manipulate GRO when using XDP") Reported-by: Yi Chen Closes: https://issues.redhat.com/browse/RHEL-53858 Reviewed-by: Toke Høiland-Jørgensen Acked-by: Paolo Abeni Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro.sh | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 4659cf01e438..d5ffd8c9172e 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -7,8 +7,6 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" - # set global exit status, but never reset nonzero one. check_err() { @@ -38,7 +36,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp + ip netns exec "${PEER_NS}" ethtool -K veth1 gro on } run_one() { @@ -206,11 +204,6 @@ run_all() { return $ret } -if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Run 'make' first" - exit -1 -fi - if [[ $# -eq 0 ]]; then run_all elif [[ $1 == "__subprocess" ]]; then -- cgit From cf1e515c9a40caa8bddb920970d3257bb01c1421 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 19 Aug 2024 20:00:07 +0800 Subject: iommufd/selftest: Make dirty_ops static The sparse tool complains as follows: drivers/iommu/iommufd/selftest.c:277:30: warning: symbol 'dirty_ops' was not declared. Should it be static? This symbol is not used outside of selftest.c, so marks it static. Fixes: 266ce58989ba ("iommufd/selftest: Test IOMMU_HWPT_ALLOC_DIRTY_TRACKING") Link: https://patch.msgid.link/r/20240819120007.3884868-1-ruanjinjie@huawei.com Signed-off-by: Jinjie Ruan Reviewed-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index f95e32e29133..222cfc11ebfd 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -273,7 +273,7 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, return 0; } -const struct iommu_dirty_ops dirty_ops = { +static const struct iommu_dirty_ops dirty_ops = { .set_dirty_tracking = mock_domain_set_dirty_tracking, .read_and_clear_dirty = mock_domain_read_and_clear_dirty, }; -- cgit From cd8e468efb4fb2742e06328a75b282c35c1abf8d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Aug 2024 21:01:57 +0200 Subject: ACPI: video: Add Dell UART backlight controller detection Dell All In One (AIO) models released after 2017 use a backlight controller board connected to an UART. In DSDT this uart port will be defined as: Name (_HID, "DELL0501") Name (_CID, EisaId ("PNP0501") Commit 484bae9e4d6a ("platform/x86: Add new Dell UART backlight driver") has added support for this, but I neglected to tie this into acpi_video_get_backlight_type(). Now the first AIO has turned up which has not only the DSDT bits for this, but also an actual controller attached to the UART, yet it is not using this controller for backlight control. Add support to acpi_video_get_backlight_type() for a new dell_uart backlight type. So that the existing infra to override the backlight control method on the commandline or with DMI quirks can be used. Fixes: 484bae9e4d6a ("platform/x86: Add new Dell UART backlight driver") Cc: All applicable Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20240814190159.15650-2-hdegoede@redhat.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 7 +++++++ include/acpi/video.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index c11cbe5b6eaa..e509dcbf3090 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -54,6 +54,8 @@ static void acpi_video_parse_cmdline(void) acpi_backlight_cmdline = acpi_backlight_nvidia_wmi_ec; if (!strcmp("apple_gmux", acpi_video_backlight_string)) acpi_backlight_cmdline = acpi_backlight_apple_gmux; + if (!strcmp("dell_uart", acpi_video_backlight_string)) + acpi_backlight_cmdline = acpi_backlight_dell_uart; if (!strcmp("none", acpi_video_backlight_string)) acpi_backlight_cmdline = acpi_backlight_none; } @@ -918,6 +920,7 @@ enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto static DEFINE_MUTEX(init_mutex); static bool nvidia_wmi_ec_present; static bool apple_gmux_present; + static bool dell_uart_present; static bool native_available; static bool init_done; static long video_caps; @@ -932,6 +935,7 @@ enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto &video_caps, NULL); nvidia_wmi_ec_present = nvidia_wmi_ec_supported(); apple_gmux_present = apple_gmux_detect(NULL, NULL); + dell_uart_present = acpi_dev_present("DELL0501", NULL, -1); init_done = true; } if (native) @@ -962,6 +966,9 @@ enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto if (apple_gmux_present) return acpi_backlight_apple_gmux; + if (dell_uart_present) + return acpi_backlight_dell_uart; + /* Use ACPI video if available, except when native should be preferred. */ if ((video_caps & ACPI_VIDEO_BACKLIGHT) && !(native_available && prefer_native_over_acpi_video())) diff --git a/include/acpi/video.h b/include/acpi/video.h index 3d538d4178ab..044c463138df 100644 --- a/include/acpi/video.h +++ b/include/acpi/video.h @@ -50,6 +50,7 @@ enum acpi_backlight_type { acpi_backlight_native, acpi_backlight_nvidia_wmi_ec, acpi_backlight_apple_gmux, + acpi_backlight_dell_uart, }; #if IS_ENABLED(CONFIG_ACPI_VIDEO) -- cgit From b5f0943001339c4d324a1af10470ce0bdd79f966 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Aug 2024 21:01:58 +0200 Subject: platform/x86: dell-uart-backlight: Use acpi_video_get_backlight_type() The dell-uart-backlight driver supports backlight control on Dell All In One (AIO) models using a backlight controller board connected to an UART. In DSDT this uart port will be defined as: Name (_HID, "DELL0501") Name (_CID, EisaId ("PNP0501") Now the first AIO has turned up which has not only the DSDT bits for this, but also an actual controller attached to the UART, yet it is not using this controller for backlight control. Use the acpi_video_get_backlight_type() function from the ACPI video-detect code to check if the dell-uart-backlight driver should actually be used. This allows reusing the existing ACPI video-detect infra to override the backlight control method on the commandline or with DMI quirks. Fixes: 484bae9e4d6a ("platform/x86: Add new Dell UART backlight driver") Cc: All applicable Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20240814190159.15650-3-hdegoede@redhat.com Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/dell/Kconfig | 1 + drivers/platform/x86/dell/dell-uart-backlight.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/platform/x86/dell/Kconfig b/drivers/platform/x86/dell/Kconfig index 85a78ef91182..309236cecd5a 100644 --- a/drivers/platform/x86/dell/Kconfig +++ b/drivers/platform/x86/dell/Kconfig @@ -161,6 +161,7 @@ config DELL_SMO8800 config DELL_UART_BACKLIGHT tristate "Dell AIO UART Backlight driver" depends on ACPI + depends on ACPI_VIDEO depends on BACKLIGHT_CLASS_DEVICE depends on SERIAL_DEV_BUS help diff --git a/drivers/platform/x86/dell/dell-uart-backlight.c b/drivers/platform/x86/dell/dell-uart-backlight.c index 87d2a20b4cb3..3995f90add45 100644 --- a/drivers/platform/x86/dell/dell-uart-backlight.c +++ b/drivers/platform/x86/dell/dell-uart-backlight.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "../serdev_helpers.h" /* The backlight controller must respond within 1 second */ @@ -332,10 +333,17 @@ struct serdev_device_driver dell_uart_bl_serdev_driver = { static int dell_uart_bl_pdev_probe(struct platform_device *pdev) { + enum acpi_backlight_type bl_type; struct serdev_device *serdev; struct device *ctrl_dev; int ret; + bl_type = acpi_video_get_backlight_type(); + if (bl_type != acpi_backlight_dell_uart) { + dev_dbg(&pdev->dev, "Not loading (ACPI backlight type = %d)\n", bl_type); + return -ENODEV; + } + ctrl_dev = get_serdev_controller("DELL0501", NULL, 0, "serial0"); if (IS_ERR(ctrl_dev)) return PTR_ERR(ctrl_dev); -- cgit From 5c7bb62cb8f53de71d8ab3d619be22740da0b837 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Aug 2024 21:01:59 +0200 Subject: ACPI: video: Add backlight=native quirk for Dell OptiPlex 7760 AIO Dell All In One (AIO) models released after 2017 may use a backlight controller board connected to an UART. In DSDT this uart port will be defined as: Name (_HID, "DELL0501") Name (_CID, EisaId ("PNP0501") The Dell OptiPlex 7760 AIO has an ACPI device for one if its UARTs with the above _HID + _CID. Loading the dell-uart-backlight driver shows that there actually is a backlight controller board attached to the UART, which reports a firmware version of "G&MX01-V15". But the backlight controller board does not actually control the backlight brightness and the GPU's native backlight control method does work. Add a quirk to use the GPU's native backlight control method on this model. Fixes: 484bae9e4d6a ("platform/x86: Add new Dell UART backlight driver") Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2303936 Cc: All applicable Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20240814190159.15650-4-hdegoede@redhat.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index e509dcbf3090..674b9db7a1ef 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -823,6 +823,21 @@ static const struct dmi_system_id video_detect_dmi_table[] = { }, }, + /* + * Dell AIO (All in Ones) which advertise an UART attached backlight + * controller board in their ACPI tables (and may even have one), but + * which need native backlight control nevertheless. + */ + { + /* https://bugzilla.redhat.com/show_bug.cgi?id=2303936 */ + .callback = video_detect_force_native, + /* Dell OptiPlex 7760 AIO */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 7760 AIO"), + }, + }, + /* * Models which have nvidia-ec-wmi support, but should not use it. * Note this indicates a likely firmware bug on these models and should -- cgit From f4b2a0ae1a31fd3d1b5ca18ee08319b479cf9b5f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 24 Jul 2024 14:53:09 -0700 Subject: drm/xe: Fix opregion leak Being part o the display, ideally the setup and cleanup would be done by display itself. However this is a bigger refactor that needs to be done on both i915 and xe. For now, just fix the leak: unreferenced object 0xffff8881a0300008 (size 192): comm "modprobe", pid 4354, jiffies 4295647021 hex dump (first 32 bytes): 00 00 87 27 81 88 ff ff 18 80 9b 00 00 c9 ff ff ...'............ 18 81 9b 00 00 c9 ff ff 00 00 00 00 00 00 00 00 ................ backtrace (crc 99260e31): [] kmemleak_alloc+0x4b/0x80 [] kmalloc_trace_noprof+0x312/0x3d0 [] intel_opregion_setup+0x89/0x700 [xe] [] xe_display_init_noirq+0x2f/0x90 [xe] [] xe_device_probe+0x7a3/0xbf0 [xe] [] xe_pci_probe+0x333/0x5b0 [xe] [] local_pci_probe+0x48/0xb0 [] pci_device_probe+0xc8/0x280 [] really_probe+0xf8/0x390 [] __driver_probe_device+0x8a/0x170 [] driver_probe_device+0x23/0xb0 [] __driver_attach+0xc7/0x190 [] bus_for_each_dev+0x7d/0xd0 [] driver_attach+0x1e/0x30 [] bus_add_driver+0x117/0x250 Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240724215309.644423-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 6f4e43a2f771b737d991142ec4f6d4b7ff31fbb4) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_display.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 8b83dcff72e1..ca4468c82078 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -132,6 +132,7 @@ static void xe_display_fini_noirq(void *arg) return; intel_display_driver_remove_noirq(xe); + intel_opregion_cleanup(xe); } int xe_display_init_noirq(struct xe_device *xe) @@ -157,8 +158,10 @@ int xe_display_init_noirq(struct xe_device *xe) intel_display_device_info_runtime_init(xe); err = intel_display_driver_probe_noirq(xe); - if (err) + if (err) { + intel_opregion_cleanup(xe); return err; + } return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noirq, xe); } -- cgit From c621f70539cae731d9749c1900cd00bb70ea5c72 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Sun, 4 Aug 2024 23:20:57 -0700 Subject: drm/xe/observation: Drop empty sysctl table entry An empty sysctl table entry was inadvertently left behind for observation sysctl. The breaks on 6.11 with the following errors: [ 219.654850] sysctl table check failed: dev/xe/(null) procname is null [ 219.654862] sysctl table check failed: dev/xe/(null) No proc_handler Drop the empty entry. Fixes: 63347fe031e3 ("drm/xe/uapi: Rename xe perf layer as xe observation layer") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2419 Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20240805062057.3547560-1-ashutosh.dixit@intel.com (cherry picked from commit be1dec570b6f5a29ce9c99334c52bea94c28914b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_observation.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c index fcb584b42a7d..a78c92a44ec2 100644 --- a/drivers/gpu/drm/xe/xe_observation.c +++ b/drivers/gpu/drm/xe/xe_observation.c @@ -66,7 +66,6 @@ static struct ctl_table observation_ctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - {} }; /** -- cgit From 7090d7fc969fcc9985d7e538cfcd8a69a5f9c616 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Jul 2024 08:28:31 -0700 Subject: drm/xe: Move VM dma-resv lock from xe_exec_queue_create to __xe_exec_queue_init The critical section which requires the VM dma-resv is the call xe_lrc_create in __xe_exec_queue_init. Move this lock to __xe_exec_queue_init holding it just around xe_lrc_create. Not only is good practice, this also fixes a locking double of the VM dma-resv in the error paths of __xe_exec_queue_init as xe_lrc_put tries to acquire this too resulting in a deadlock. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Maarten Lankhorst Signed-off-by: Matthew Brost Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240724152831.1848325-1-matthew.brost@intel.com (cherry picked from commit 549dd786b61cd3db903f5d94d07fc5a89ccdbeb9) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index a39384bb9553..16f24f4a7062 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -105,22 +105,35 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, static int __xe_exec_queue_init(struct xe_exec_queue *q) { + struct xe_vm *vm = q->vm; int i, err; + if (vm) { + err = xe_vm_lock(vm, true); + if (err) + return err; + } + for (i = 0; i < q->width; ++i) { q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); - goto err_lrc; + goto err_unlock; } } + if (vm) + xe_vm_unlock(vm); + err = q->ops->init(q); if (err) goto err_lrc; return 0; +err_unlock: + if (vm) + xe_vm_unlock(vm); err_lrc: for (i = i - 1; i >= 0; --i) xe_lrc_put(q->lrc[i]); @@ -140,15 +153,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (IS_ERR(q)) return q; - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - goto err_post_alloc; - } - err = __xe_exec_queue_init(q); - if (vm) - xe_vm_unlock(vm); if (err) goto err_post_alloc; -- cgit From 15939ca77d4424f736e1e4953b4da2351cc9689d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 16:28:30 -0700 Subject: drm/xe: Fix tile fini sequence Only set tile->mmio.regs to NULL if not the root tile in tile_fini. The root tile mmio regs is setup ealier in MMIO init thus it should be set to NULL in mmio_fini. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240809232830.3302251-1-matthew.brost@intel.com (cherry picked from commit 3396900aa273903639a1792afa4d23dc09bec291) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index f92faad4b96d..83122c77edd9 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -30,7 +30,8 @@ static void tiles_fini(void *arg) int id; for_each_tile(tile, xe, id) - tile->mmio.regs = NULL; + if (tile != xe_device_get_root_tile(xe)) + tile->mmio.regs = NULL; } int xe_mmio_probe_tiles(struct xe_device *xe) @@ -91,9 +92,11 @@ add_mmio_ext: static void mmio_fini(void *arg) { struct xe_device *xe = arg; + struct xe_tile *root_tile = xe_device_get_root_tile(xe); pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); xe->mmio.regs = NULL; + root_tile->mmio.regs = NULL; } int xe_mmio_init(struct xe_device *xe) -- cgit From 730b72480e29f63fd644f5fa57c9d46109428953 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 14 Aug 2024 12:01:30 +0100 Subject: drm/xe: prevent UAF around preempt fence The fence lock is part of the queue, therefore in the current design anything locking the fence should then also hold a ref to the queue to prevent the queue from being freed. However, currently it looks like we signal the fence and then drop the queue ref, but if something is waiting on the fence, the waiter is kicked to wake up at some later point, where upon waking up it first grabs the lock before checking the fence state. But if we have already dropped the queue ref, then the lock might already be freed as part of the queue, leading to uaf. To prevent this, move the fence lock into the fence itself so we don't run into lifetime issues. Alternative might be to have device level lock, or only release the queue in the fence release callback, however that might require pushing to another worker to avoid locking issues. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2454 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2342 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2020 Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240814110129.825847-2-matthew.auld@intel.com (cherry picked from commit 7116c35aacedc38be6d15bd21b2fc936eed0008b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 1 - drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 -- drivers/gpu/drm/xe/xe_preempt_fence.c | 3 ++- drivers/gpu/drm/xe/xe_preempt_fence_types.h | 2 ++ 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 16f24f4a7062..9731dcd0b1bd 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -643,7 +643,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (xe_vm_in_preempt_fence_mode(vm)) { q->lr.context = dma_fence_context_alloc(1); - spin_lock_init(&q->lr.lock); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index a35ce24c9798..f6ee0ae80fd6 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -126,8 +126,6 @@ struct xe_exec_queue { u32 seqno; /** @lr.link: link into VM's list of exec queues */ struct list_head link; - /** @lr.lock: preemption fences lock */ - spinlock_t lock; } lr; /** @ops: submission backend exec queue operations */ diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index e8b8ae5c6485..c453f45328b1 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -128,8 +128,9 @@ xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, { list_del_init(&pfence->link); pfence->q = xe_exec_queue_get(q); + spin_lock_init(&pfence->lock); dma_fence_init(&pfence->base, &preempt_fence_ops, - &q->lr.lock, context, seqno); + &pfence->lock, context, seqno); return &pfence->base; } diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h index b54b5c29b533..312c3372a49f 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -25,6 +25,8 @@ struct xe_preempt_fence { struct xe_exec_queue *q; /** @preempt_work: work struct which issues preemption */ struct work_struct preempt_work; + /** @lock: dma-fence fence lock */ + spinlock_t lock; /** @error: preempt fence is in error state */ int error; }; -- cgit From ddf6492e0e508b7c2b42c8d5a4ac82bd38ef0dd5 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 6 Aug 2024 12:50:44 +0200 Subject: drm/xe/display: Make display suspend/resume work on discrete We should unpin before evicting all memory, and repin after GT resume. This way, we preserve the contents of the framebuffers, and won't hang on resume due to migration engine not being restored yet. Signed-off-by: Maarten Lankhorst Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org # v6.8+ Reviewed-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20240806105044.596842-3-maarten.lankhorst@linux.intel.com Signed-off-by: Maarten Lankhorst,,, (cherry picked from commit cb8f81c1753187995b7a43e79c12959f14eb32d3) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_display.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/xe/xe_pm.c | 11 ++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index ca4468c82078..49de4e4f8a75 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -283,6 +283,27 @@ static bool suspend_to_idle(void) return false; } +static void xe_display_flush_cleanup_work(struct xe_device *xe) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&xe->drm, crtc) { + struct drm_crtc_commit *commit; + + spin_lock(&crtc->base.commit_lock); + commit = list_first_entry_or_null(&crtc->base.commit_list, + struct drm_crtc_commit, commit_entry); + if (commit) + drm_crtc_commit_get(commit); + spin_unlock(&crtc->base.commit_lock); + + if (commit) { + wait_for_completion(&commit->cleanup_done); + drm_crtc_commit_put(commit); + } + } +} + void xe_display_pm_suspend(struct xe_device *xe, bool runtime) { bool s2idle = suspend_to_idle(); @@ -300,6 +321,8 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) if (!runtime) intel_display_driver_suspend(xe); + xe_display_flush_cleanup_work(xe); + intel_dp_mst_suspend(xe); intel_hpd_cancel_work(xe); diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index de3b5df65e48..9a3f618d22dc 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -91,13 +91,13 @@ int xe_pm_suspend(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); + xe_display_pm_suspend(xe, false); + /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) goto err; - xe_display_pm_suspend(xe, false); - for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); if (err) { @@ -151,11 +151,11 @@ int xe_pm_resume(struct xe_device *xe) xe_irq_resume(xe); - xe_display_pm_resume(xe, false); - for_each_gt(gt, xe, id) xe_gt_resume(gt); + xe_display_pm_resume(xe, false); + err = xe_bo_restore_user(xe); if (err) goto err; @@ -363,10 +363,11 @@ int xe_pm_runtime_suspend(struct xe_device *xe) mutex_unlock(&xe->mem_access.vram_userfault.lock); if (xe->d3cold.allowed) { + xe_display_pm_suspend(xe, true); + err = xe_bo_evict_all(xe); if (err) goto out; - xe_display_pm_suspend(xe, true); } for_each_gt(gt, xe, id) { -- cgit From ad614a706b1ac83b95b333f44b8f5e70bcb37dc5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 29 Jul 2024 11:26:34 +0200 Subject: drm/xe/oa/uapi: Make bit masks unsigned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with gcc-5: In function ‘decode_oa_format.isra.26’, inlined from ‘xe_oa_set_prop_oa_format’ at drivers/gpu/drm/xe/xe_oa.c:1664:6: ././include/linux/compiler_types.h:510:38: error: call to ‘__compiletime_assert_1336’ declared with attribute error: FIELD_GET: mask is not constant [...] ./include/linux/bitfield.h:155:3: note: in expansion of macro ‘__BF_FIELD_CHECK’ __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ ^ drivers/gpu/drm/xe/xe_oa.c:1573:18: note: in expansion of macro ‘FIELD_GET’ u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); ^ Fixes: b6fd51c62119 ("drm/xe/oa/uapi: Define and parse OA stream properties") Signed-off-by: Geert Uytterhoeven Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240729092634.2227611-1-geert+renesas@glider.be Signed-off-by: Lucas De Marchi (cherry picked from commit f2881dfdaaa9ec873dbd383ef5512fc31e576cbb) Signed-off-by: Rodrigo Vivi --- include/uapi/drm/xe_drm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 19619d4952a8..db232a25189e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1590,10 +1590,10 @@ enum drm_xe_oa_property_id { * b. Counter select c. Counter size and d. BC report. Also refer to the * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. */ -#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0) -#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8) -#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16) -#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24) +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xffu << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xffu << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xffu << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xffu << 24) /** * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit -- cgit From 565d121b69980637f040eb4d84289869cdaabedf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 13 Aug 2024 00:28:25 +0200 Subject: tcp: prevent concurrent execution of tcp_sk_exit_batch Its possible that two threads call tcp_sk_exit_batch() concurrently, once from the cleanup_net workqueue, once from a task that failed to clone a new netns. In the latter case, error unwinding calls the exit handlers in reverse order for the 'failed' netns. tcp_sk_exit_batch() calls tcp_twsk_purge(). Problem is that since commit b099ce2602d8 ("net: Batch inet_twsk_purge"), this function picks up twsk in any dying netns, not just the one passed in via exit_batch list. This means that the error unwind of setup_net() can "steal" and destroy timewait sockets belonging to the exiting netns. This allows the netns exit worker to proceed to call WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); without the expected 1 -> 0 transition, which then splats. At same time, error unwind path that is also running inet_twsk_purge() will splat as well: WARNING: .. at lib/refcount.c:31 refcount_warn_saturate+0x1ed/0x210 ... refcount_dec include/linux/refcount.h:351 [inline] inet_twsk_kill+0x758/0x9c0 net/ipv4/inet_timewait_sock.c:70 inet_twsk_deschedule_put net/ipv4/inet_timewait_sock.c:221 inet_twsk_purge+0x725/0x890 net/ipv4/inet_timewait_sock.c:304 tcp_sk_exit_batch+0x1c/0x170 net/ipv4/tcp_ipv4.c:3522 ops_exit_list+0x128/0x180 net/core/net_namespace.c:178 setup_net+0x714/0xb40 net/core/net_namespace.c:375 copy_net_ns+0x2f0/0x670 net/core/net_namespace.c:508 create_new_namespaces+0x3ea/0xb10 kernel/nsproxy.c:110 ... because refcount_dec() of tw_refcount unexpectedly dropped to 0. This doesn't seem like an actual bug (no tw sockets got lost and I don't see a use-after-free) but as erroneous trigger of debug check. Add a mutex to force strict ordering: the task that calls tcp_twsk_purge() blocks other task from doing final _dec_and_test before mutex-owner has removed all tw sockets of dying netns. Fixes: e9bd0cca09d1 ("tcp: Don't allocate tcp_death_row outside of struct netns_ipv4.") Reported-by: syzbot+8ea26396ff85d23a8929@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/0000000000003a5292061f5e4e19@google.com/ Link: https://lore.kernel.org/netdev/20240812140104.GA21559@breakpoint.cc/ Signed-off-by: Florian Westphal Reviewed-by: Kuniyuki Iwashima Reviewed-by: Jason Xing Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240812222857.29837-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ipv4.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fd17f25ff288..a4e510846905 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -97,6 +97,8 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; +static DEFINE_MUTEX(tcp_exit_batch_mutex); + static u32 tcp_v4_init_seq(const struct sk_buff *skb) { return secure_tcp_seq(ip_hdr(skb)->daddr, @@ -3514,6 +3516,16 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { struct net *net; + /* make sure concurrent calls to tcp_sk_exit_batch from net_cleanup_work + * and failed setup_net error unwinding path are serialized. + * + * tcp_twsk_purge() handles twsk in any dead netns, not just those in + * net_exit_list, the thread that dismantles a particular twsk must + * do so without other thread progressing to refcount_dec_and_test() of + * tcp_death_row.tw_refcount. + */ + mutex_lock(&tcp_exit_batch_mutex); + tcp_twsk_purge(net_exit_list); list_for_each_entry(net, net_exit_list, exit_list) { @@ -3521,6 +3533,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); tcp_fastopen_ctx_destroy(net); } + + mutex_unlock(&tcp_exit_batch_mutex); } static struct pernet_operations __net_initdata tcp_sk_ops = { -- cgit From 64b582ca88ca11400467b282d5fa3b870ded1c11 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 15 Aug 2024 16:32:27 +0000 Subject: block: Read max write zeroes once for __blkdev_issue_write_zeroes() As reported in [0], we may get a hang when formatting a XFS FS on a RAID0 drive. Commit 73a768d5f955 ("block: factor out a blk_write_zeroes_limit helper") changed __blkdev_issue_write_zeroes() to read the max write zeroes value in the loop. This is not safe as max write zeroes may change in value. Specifically for the case of [0], the value goes to 0, and we get an infinite loop. Lift the limit reading out of the loop. [0] https://lore.kernel.org/linux-xfs/4d31268f-310b-4220-88a2-e191c3932a82@oracle.com/T/#t Fixes: 73a768d5f955 ("block: factor out a blk_write_zeroes_limit helper") Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240815163228.216051-2-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- block/blk-lib.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index 9f735efa6c94..83eb7761c2bf 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -111,13 +111,20 @@ static sector_t bio_write_zeroes_limit(struct block_device *bdev) (UINT_MAX >> SECTOR_SHIFT) & ~bs_mask); } +/* + * There is no reliable way for the SCSI subsystem to determine whether a + * device supports a WRITE SAME operation without actually performing a write + * to media. As a result, write_zeroes is enabled by default and will be + * disabled if a zeroing operation subsequently fails. This means that this + * queue limit is likely to change at runtime. + */ static void __blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, - struct bio **biop, unsigned flags) + struct bio **biop, unsigned flags, sector_t limit) { + while (nr_sects) { - unsigned int len = min_t(sector_t, nr_sects, - bio_write_zeroes_limit(bdev)); + unsigned int len = min(nr_sects, limit); struct bio *bio; if ((flags & BLKDEV_ZERO_KILLABLE) && @@ -141,12 +148,14 @@ static void __blkdev_issue_write_zeroes(struct block_device *bdev, static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp, unsigned flags) { + sector_t limit = bio_write_zeroes_limit(bdev); struct bio *bio = NULL; struct blk_plug plug; int ret = 0; blk_start_plug(&plug); - __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, flags); + __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, + flags, limit); if (bio) { if ((flags & BLKDEV_ZERO_KILLABLE) && fatal_signal_pending(current)) { @@ -165,7 +174,7 @@ static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, * on an I/O error, in which case we'll turn any error into * "not supported" here. */ - if (ret && !bdev_write_zeroes_sectors(bdev)) + if (ret && !limit) return -EOPNOTSUPP; return ret; } @@ -265,12 +274,14 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, unsigned flags) { + sector_t limit = bio_write_zeroes_limit(bdev); + if (bdev_read_only(bdev)) return -EPERM; - if (bdev_write_zeroes_sectors(bdev)) { + if (limit) { __blkdev_issue_write_zeroes(bdev, sector, nr_sects, - gfp_mask, biop, flags); + gfp_mask, biop, flags, limit); } else { if (flags & BLKDEV_ZERO_NOFALLBACK) return -EOPNOTSUPP; -- cgit From 81475beb1b5996505a39cd1d9316ce1e668932a2 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 15 Aug 2024 16:32:28 +0000 Subject: block: Drop NULL check in bdev_write_zeroes_sectors() Function bdev_get_queue() must not return NULL, so drop the check in bdev_write_zeroes_sectors(). Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Reviewed-by: Martin K. Petersen Reviewed-by: Nitesh Shetty Link: https://lore.kernel.org/r/20240815163228.216051-3-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e85ec73a07d5..b7664d593486 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1296,12 +1296,7 @@ bdev_max_secure_erase_sectors(struct block_device *bdev) static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return q->limits.max_write_zeroes_sectors; - - return 0; + return bdev_get_queue(bdev)->limits.max_write_zeroes_sectors; } static inline bool bdev_nonrot(struct block_device *bdev) -- cgit From de48aad2a8e80ba026ca91c383f590f0bf97b3c0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Aug 2024 11:47:57 -0400 Subject: rpcrdma: Device kref is over-incremented on error from xa_alloc If the device's reference count is too high, the device completion callback never fires. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/ib_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c index a938c19c3490..4d1e9fa89573 100644 --- a/net/sunrpc/xprtrdma/ib_client.c +++ b/net/sunrpc/xprtrdma/ib_client.c @@ -62,9 +62,9 @@ int rpcrdma_rn_register(struct ib_device *device, if (!rd || test_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags)) return -ENETUNREACH; - kref_get(&rd->rd_kref); if (xa_alloc(&rd->rd_xa, &rn->rn_index, rn, xa_limit_32b, GFP_KERNEL) < 0) return -ENOMEM; + kref_get(&rd->rd_kref); rn->rn_done = done; return 0; } -- cgit From 6b3b023e2d0c130235c0e494f77df2a9a64ab6a2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Aug 2024 11:47:58 -0400 Subject: rpcrdma: Use XA_FLAGS_ALLOC instead of XA_FLAGS_ALLOC1 Nit: The built-in xa_limit_32b range starts at 0, but XA_FLAGS_ALLOC1 configures the xarray's allocator to start at 1. Adopt the more conventional XA_FLAGS_ALLOC because there's no mechanical reason to skip 0. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/ib_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c index 4d1e9fa89573..7913d7bad23d 100644 --- a/net/sunrpc/xprtrdma/ib_client.c +++ b/net/sunrpc/xprtrdma/ib_client.c @@ -111,7 +111,7 @@ static int rpcrdma_add_one(struct ib_device *device) return -ENOMEM; kref_init(&rd->rd_kref); - xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC1); + xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC); rd->rd_device = device; init_completion(&rd->rd_done); ib_set_client_data(device, &rpcrdma_ib_client, rd); -- cgit From dc0112e6d8b42b39f9d283bab489a757e9d284f0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Aug 2024 11:47:59 -0400 Subject: rpcrdma: Trace connection registration and unregistration These new trace points record xarray indices and the time of endpoint registration and unregistration, to co-ordinate with device removal events. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 36 ++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/ib_client.c | 2 ++ 2 files changed, 38 insertions(+) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index ba2d6a0e41cc..a96a985c49b3 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -2277,6 +2277,42 @@ DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one); DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_wait_on); DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one_done); +DECLARE_EVENT_CLASS(rpcrdma_client_register_class, + TP_PROTO( + const struct ib_device *device, + const struct rpcrdma_notification *rn + ), + + TP_ARGS(device, rn), + + TP_STRUCT__entry( + __string(name, device->name) + __field(void *, callback) + __field(u32, index) + ), + + TP_fast_assign( + __assign_str(name); + __entry->callback = rn->rn_done; + __entry->index = rn->rn_index; + ), + + TP_printk("device=%s index=%u done callback=%pS\n", + __get_str(name), __entry->index, __entry->callback + ) +); + +#define DEFINE_CLIENT_REGISTER_EVENT(name) \ + DEFINE_EVENT(rpcrdma_client_register_class, name, \ + TP_PROTO( \ + const struct ib_device *device, \ + const struct rpcrdma_notification *rn \ + ), \ + TP_ARGS(device, rn)) + +DEFINE_CLIENT_REGISTER_EVENT(rpcrdma_client_register); +DEFINE_CLIENT_REGISTER_EVENT(rpcrdma_client_unregister); + #endif /* _TRACE_RPCRDMA_H */ #include diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c index 7913d7bad23d..8507cd4d8921 100644 --- a/net/sunrpc/xprtrdma/ib_client.c +++ b/net/sunrpc/xprtrdma/ib_client.c @@ -66,6 +66,7 @@ int rpcrdma_rn_register(struct ib_device *device, return -ENOMEM; kref_get(&rd->rd_kref); rn->rn_done = done; + trace_rpcrdma_client_register(device, rn); return 0; } @@ -91,6 +92,7 @@ void rpcrdma_rn_unregister(struct ib_device *device, if (!rd) return; + trace_rpcrdma_client_unregister(device, rn); xa_erase(&rd->rd_xa, rn->rn_index); kref_put(&rd->rd_kref, rpcrdma_rn_release); } -- cgit From 2240a50e6294214de791729e9dcba6880fa7e44e Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sat, 17 Aug 2024 18:15:41 +0800 Subject: KVM: arm64: vgic-debug: Don't put unmarked LPIs If there were LPIs being mapped behind our back (i.e., between .start() and .stop()), we would put them at iter_unmark_lpis() without checking if they were actually *marked*, which is obviously not good. Switch to use the xa_for_each_marked() iterator to fix it. Cc: stable@vger.kernel.org Fixes: 85d3ccc8b75b ("KVM: arm64: vgic-debug: Use an xarray mark for debug iterator") Signed-off-by: Zenghui Yu Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240817101541.1664-1-yuzenghui@huawei.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic-debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index bc74d06398ef..e1397ab2072a 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -85,7 +85,7 @@ static void iter_unmark_lpis(struct kvm *kvm) struct vgic_irq *irq; unsigned long intid; - xa_for_each(&dist->lpi_xa, intid, irq) { + xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) { xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); vgic_put_irq(kvm, irq); } -- cgit From f616506754d34bcfdbfbc7508b562e5c98461e9a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Aug 2024 13:50:45 +0100 Subject: KVM: arm64: vgic: Don't hold config_lock while unregistering redistributors We recently moved the teardown of the vgic part of a vcpu inside a critical section guarded by the config_lock. This teardown phase involves calling into kvm_io_bus_unregister_dev(), which takes the kvm->srcu lock. However, this violates the established order where kvm->srcu is taken on a memory fault (such as an MMIO access), possibly followed by taking the config_lock if the GIC emulation requires mutual exclusion from the other vcpus. It therefore results in a bad lockdep splat, as reported by Zenghui. Fix this by moving the call to kvm_io_bus_unregister_dev() outside of the config_lock critical section. At this stage, there shouln't be any need to hold the config_lock. As an additional bonus, document the ordering between kvm->slots_lock, kvm->srcu and kvm->arch.config_lock so that I cannot pretend I didn't know about those anymore. Fixes: 9eb18136af9f ("KVM: arm64: vgic: Hold config_lock while tearing down a CPU interface") Reported-by: Zenghui Yu Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Tested-by: Zenghui Yu Link: https://lore.kernel.org/r/20240819125045.3474845-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic-init.c | 9 ++++++--- arch/arm64/kvm/vgic/vgic.c | 5 +++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 41feb858ff9a..e7c53e8af3d1 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -417,10 +417,8 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) kfree(vgic_cpu->private_irqs); vgic_cpu->private_irqs = NULL; - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - vgic_unregister_redist_iodev(vcpu); + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; - } } void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -448,6 +446,11 @@ void kvm_vgic_destroy(struct kvm *kvm) kvm_vgic_dist_destroy(kvm); mutex_unlock(&kvm->arch.config_lock); + + if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + kvm_for_each_vcpu(i, vcpu, kvm) + vgic_unregister_redist_iodev(vcpu); + mutex_unlock(&kvm->slots_lock); } diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 974849ea7101..abe29c7d85d0 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -36,6 +36,11 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * we have to disable IRQs before taking this lock and everything lower * than it. * + * The config_lock has additional ordering requirements: + * kvm->slots_lock + * kvm->srcu + * kvm->arch.config_lock + * * If you need to take multiple locks, always take the upper lock first, * then the lower ones, e.g. first take the its_lock, then the irq_lock. * If you are already holding a lock and need to take a higher one, you -- cgit From 27cb2b7fec2abf310e4128137979124ead920ccb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 3 Jul 2024 13:43:38 +0100 Subject: drm/xe/bmg: implement Wa_16023588340 This involves enabling l2 caching of host side memory access to VRAM through the CPU BAR. The main fallout here is with display since VRAM writes from CPU can now be cached in GPU l2, and display is never coherent with caches, so needs various manual flushing. In the case of fbc we disable it due to complications in getting this to work correctly (in a later patch). Signed-off-by: Matthew Auld Cc: Jonathan Cavitt Cc: Matt Roper Cc: Lucas De Marchi Cc: Vinod Govindapillai Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240703124338.208220-3-matthew.auld@intel.com (cherry picked from commit 01570b446939c3538b1aa3d059837f49fa14a3ae) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 2 ++ drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 8 +++++ drivers/gpu/drm/xe/display/xe_fb_pin.c | 3 ++ drivers/gpu/drm/xe/regs/xe_gt_regs.h | 8 +++++ drivers/gpu/drm/xe/xe_device.c | 30 +++++++++++++++++ drivers/gpu/drm/xe/xe_device.h | 1 + drivers/gpu/drm/xe/xe_gt.c | 54 ++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_pat.c | 11 +++++- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 9 files changed, 117 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 628c245c4822..e97c9da451b3 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -25,12 +25,14 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ uses_generated_oob := \ $(obj)/xe_ggtt.o \ + $(obj)/xe_device.o \ $(obj)/xe_gsc.o \ $(obj)/xe_gt.o \ $(obj)/xe_guc.o \ $(obj)/xe_guc_ads.o \ $(obj)/xe_guc_pc.o \ $(obj)/xe_migrate.o \ + $(obj)/xe_pat.o \ $(obj)/xe_ring_ops.o \ $(obj)/xe_vm.o \ $(obj)/xe_wa.o \ diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index 9e860c61f4b3..ccd0d87d438a 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -7,6 +7,8 @@ #include "intel_display_types.h" #include "intel_dsb_buffer.h" #include "xe_bo.h" +#include "xe_device.h" +#include "xe_device_types.h" #include "xe_gt.h" u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) @@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); + xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); + xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 423f367c7065..d7db44e79eaf 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -10,6 +10,7 @@ #include "intel_fb.h" #include "intel_fb_pin.h" #include "xe_bo.h" +#include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_pm.h" @@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return vma; err_unpin: diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d44564bad009..fd9d94174efb 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -80,6 +80,9 @@ #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) #define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) +#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194) +#define CG_DIS_CNTLBUS REG_BIT(6) + #define CCS_AUX_INV XE_REG(0x4208) #define VD0_AUX_INV XE_REG(0x4218) @@ -372,6 +375,11 @@ #define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8) +#define XE2_GLOBAL_INVAL XE_REG(0xb404) + +#define SCRATCH1LPFC XE_REG(0xb474) +#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0) + #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) #define XE2_TDF_CTRL XE_REG(0xb418) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index f2f1d8ddb221..6ce44ca2524d 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -54,6 +54,9 @@ #include "xe_vm.h" #include "xe_vram.h" #include "xe_wait_user_fence.h" +#include "xe_wa.h" + +#include static int xe_file_open(struct drm_device *dev, struct drm_file *file) { @@ -820,6 +823,11 @@ void xe_device_td_flush(struct xe_device *xe) if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) return; + if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { + xe_device_l2_flush(xe); + return; + } + for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -843,6 +851,28 @@ void xe_device_td_flush(struct xe_device *xe) } } +void xe_device_l2_flush(struct xe_device *xe) +{ + struct xe_gt *gt; + int err; + + gt = xe_root_mmio_gt(xe); + + if (!XE_WA(gt, 16023588340)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return; + + xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1); + + if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true)) + xe_gt_err_once(gt, "Global invalidation timeout\n"); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index b3952718b3c1..533ccfb2567a 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -162,6 +162,7 @@ u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address); u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address); void xe_device_td_flush(struct xe_device *xe); +void xe_device_l2_flush(struct xe_device *xe); static inline bool xe_device_wedged(struct xe_device *xe) { diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 31b2e64c70c6..816ecc9e294c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -11,6 +11,8 @@ #include #include +#include + #include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_mi_commands.h" #include "regs/xe_gt_regs.h" @@ -95,6 +97,51 @@ void xe_gt_sanitize(struct xe_gt *gt) gt->uc.guc.submission_state.enabled = false; } +static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) +{ + u32 reg; + int err; + + if (!XE_WA(gt, 16023588340)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (WARN_ON(err)) + return; + + if (!xe_gt_is_media_type(gt)) { + xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg |= CG_DIS_CNTLBUS; + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + } + + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + +static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) +{ + u32 reg; + int err; + + if (!XE_WA(gt, 16023588340)) + return; + + if (xe_gt_is_media_type(gt)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (WARN_ON(err)) + return; + + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg &= ~CG_DIS_CNTLBUS; + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + /** * xe_gt_remove() - Clean up the GT structures before driver removal * @gt: the GT object @@ -111,6 +158,8 @@ void xe_gt_remove(struct xe_gt *gt) for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) xe_hw_fence_irq_finish(>->fence_irq[i]); + + xe_gt_disable_host_l2_vram(gt); } static void gt_reset_worker(struct work_struct *w); @@ -508,6 +557,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_mcr_init_early(gt); xe_pat_init(gt); + xe_gt_enable_host_l2_vram(gt); err = xe_uc_init(>->uc); if (err) @@ -643,6 +693,8 @@ static int do_gt_restart(struct xe_gt *gt) xe_pat_init(gt); + xe_gt_enable_host_l2_vram(gt); + xe_gt_mcr_set_implicit_defaults(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); @@ -796,6 +848,8 @@ int xe_gt_suspend(struct xe_gt *gt) xe_gt_idle_disable_pg(gt); + xe_gt_disable_host_l2_vram(gt); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); xe_gt_dbg(gt, "suspended\n"); diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 4ee32ee1cc88..722278cc23fc 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -7,6 +7,8 @@ #include +#include + #include "regs/xe_reg_defs.h" #include "xe_assert.h" #include "xe_device.h" @@ -15,6 +17,7 @@ #include "xe_gt_mcr.h" #include "xe_mmio.h" #include "xe_sriov.h" +#include "xe_wa.h" #define _PAT_ATS 0x47fc #define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \ @@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe) if (GRAPHICS_VER(xe) == 20) { xe->pat.ops = &xe2_pat_ops; xe->pat.table = xe2_pat_table; - xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table); + + /* Wa_16023588340. XXX: Should use XE_WA */ + if (GRAPHICS_VERx100(xe) == 2001) + xe->pat.n_entries = 28; /* Disable CLOS3 */ + else + xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; xe->pat.idx[XE_CACHE_WT] = 15; xe->pat.idx[XE_CACHE_WB] = 2; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 26066beb4f6f..08f7336881e3 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -29,3 +29,4 @@ 13011645652 GRAPHICS_VERSION(2004) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) +16023588340 GRAPHICS_VERSION(2001) -- cgit From 03a2dc84f5c4ef31ac0112b29d51ff103f7c8dd4 Mon Sep 17 00:00:00 2001 From: Ngai-Mint Kwan Date: Mon, 1 Jul 2024 11:46:37 -0700 Subject: drm/xe/xe2lpm: Extend Wa_16021639441 Wa_16021639441 applies to Xe2_LPM. Signed-off-by: Ngai-Mint Kwan Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240701184637.531794-1-ngai-mint.kwan@linux.intel.com (cherry picked from commit 74e3076800067c6dc0dcff5b75344cec064c20eb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index c7bf0862b231..6c52d9d02b5f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -539,6 +539,16 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + /* Xe2_LPM */ + + { XE_RTP_NAME("16021639441"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), + GHWSP_CSB_REPORT_DIS | + PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + /* Xe2_HPM */ { XE_RTP_NAME("16021639441"), -- cgit From b196e6fcc71186134b4cfe756067d87ae41b1ed9 Mon Sep 17 00:00:00 2001 From: Bommu Krishnaiah Date: Wed, 3 Jul 2024 14:37:54 +0530 Subject: drm/xe/xe2lpg: Extend workaround 14021402888 workaround 14021402888 also applies to Xe2_LPG. Replicate the existing entry to one specific for Xe2_LPG. Signed-off-by: Bommu Krishnaiah Cc: Tejas Upadhyay Cc: Matt Roper Cc: Himal Prasad Ghimiray Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240703090754.1323647-1-krishnaiah.bommu@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 56ab6986992ba143aee0bda33e15a764343e271d) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 6c52d9d02b5f..fd009b2c68fa 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -486,6 +486,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE)) }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, /* Xe2_HPG */ -- cgit From 7e81285380743aa5759bb29a388f056c3d326a2c Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 10 Jul 2024 10:57:50 +0530 Subject: drm/xe/xe2: Make subsequent L2 flush sequential Issuing the flush on top of an ongoing flush is not desirable. Lets use lock to make it sequential. Reviewed-by: Nirmoy Das Signed-off-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240710052750.3031586-1-tejas.upadhyay@intel.com Signed-off-by: Nirmoy Das (cherry picked from commit 71733b8d7f50b61403f940c6c9745fb3a9b98dcb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 2 ++ drivers/gpu/drm/xe/xe_gt.c | 1 + drivers/gpu/drm/xe/xe_gt_types.h | 6 ++++++ 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6ce44ca2524d..c89deffffb6d 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -865,10 +865,12 @@ void xe_device_l2_flush(struct xe_device *xe) if (err) return; + spin_lock(>->global_invl_lock); xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1); if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 816ecc9e294c..b9bcbbe27705 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -388,6 +388,7 @@ int xe_gt_init_early(struct xe_gt *gt) xe_force_wake_init_gt(gt, gt_to_fw(gt)); xe_pcode_init(gt); + spin_lock_init(>->global_invl_lock); return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 6b5e0b45efb0..38a0d0e178c8 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -362,6 +362,12 @@ struct xe_gt { */ spinlock_t mcr_lock; + /** + * @global_invl_lock: protects the register for the duration + * of a global invalidation of l2 cache + */ + spinlock_t global_invl_lock; + /** @wa_active: keep track of active workarounds */ struct { /** @wa_active.gt: bitmap with active GT workarounds */ -- cgit From cbc6e98ab11bea52789d2835e45e8816c39407e1 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Tue, 9 Jul 2024 21:26:06 +0530 Subject: drm/xe/xe2: Add Wa_15015404425 Wa_15015404425 asks us to perform four "dummy" writes to a non-existent register offset before every real register read. Although the specific offset of the writes doesn't directly matter, the workaround suggests offset 0x130030 as a good target so that these writes will be easy to recognize and filter out in debugging traces. V5(MattR): - Avoid negating an equality comparison V4(MattR): - Use writel and remove xe_reg usage V3(MattR): - Define dummy reg local to function - Avoid tracing dummy writes - Update commit message V2: - Add WA to 8/16/32bit reads also - MattR - Corrected dummy reg address - MattR - Use for loop to avoid mental pause - JaniN Reviewed-by: Matt Roper Signed-off-by: Tejas Upadhyay Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240709155606.2998941-1-tejas.upadhyay@intel.com (cherry picked from commit 86c5b70a9c0c3f05f7002ef8b789460c96b54e27) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 83122c77edd9..aa68cac9fdf8 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -124,12 +124,29 @@ int xe_mmio_init(struct xe_device *xe) return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe); } +static void mmio_flush_pending_writes(struct xe_gt *gt) +{ +#define DUMMY_REG_OFFSET 0x130030 + struct xe_tile *tile = gt_to_tile(gt); + int i; + + if (tile->xe->info.platform != XE_LUNARLAKE) + return; + + /* 4 dummy writes */ + for (i = 0; i < 4; i++) + writel(0, tile->mmio.regs + DUMMY_REG_OFFSET); +} + u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u8 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); @@ -142,6 +159,9 @@ u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u16 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); @@ -163,6 +183,9 @@ u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u32 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt))) val = xe_gt_sriov_vf_read32(gt, reg); else -- cgit From f5cb1275c8ce56c7583cb323cfa08a820a7ef6b4 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 7 Aug 2024 16:53:32 -0700 Subject: drm/xe: fix WA 14018094691 This WA is applied while initializing the media GT, but it a primary GT WA (because it modifies a register on the primary GT), so the XE_WA macro is returning false even when the WA should be applied. Fix this by using the primary GT in the macro. Note that this WA only applies to PXP and we don't yet support that in Xe, so there are no negative effects to this bug, which is why we didn't see any errors in testing. v2: use the primary GT in the macro instead of marking the WA as platform-wide (Lucas, Matt). Signed-off-by: Daniele Ceraolo Spurio Cc: Matt Roper Cc: Lucas De Marchi Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240807235333.1370915-1-daniele.ceraolospurio@intel.com (cherry picked from commit e422c0bfd9e47e399e86bcc483f49d8b54064fc2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index f8239a13fa2b..77ce44e845c5 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -260,7 +260,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) struct xe_tile *tile = gt_to_tile(gt); int ret; - if (XE_WA(gt, 14018094691)) { + if (XE_WA(tile->primary_gt, 14018094691)) { ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); /* @@ -278,7 +278,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) ret = gsc_upload(gsc); - if (XE_WA(gt, 14018094691)) + if (XE_WA(tile->primary_gt, 14018094691)) xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); if (ret) -- cgit From 8776b0234e1d008d8f19b26f6c3af1cfa6187070 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 12 Aug 2024 19:11:17 +0530 Subject: drm/xe/xe2hpg: Add Wa_14021821874 Wa_14021821874 applies to xe2_hpg V2(Himal): - Use space after define Cc: Matt Roper Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Tejas Upadhyay Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240812134117.813670-1-tejas.upadhyay@intel.com (cherry picked from commit 21ff3a16e92e2fa4f906a61d148aca1423c58298) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index fd9d94174efb..3c2865040058 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -437,6 +437,7 @@ #define DIS_FIX_EOT1_FLUSH REG_BIT(9) #define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED) +#define STK_ID_RESTRICT REG_BIT(12) #define SLM_WMTP_RESTORE REG_BIT(11) #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index fd009b2c68fa..e648265d081b 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -542,6 +542,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + { XE_RTP_NAME("14021821874"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) + }, /* Xe2_LPM */ -- cgit From 8636a5c29be1f05b5162a5c82c874338b6717759 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 9 Aug 2024 16:12:35 -0700 Subject: drm/xe: use devm instead of drmm for managed bo The BO cleanup touches the GGTT and therefore requires the HW to be available, so we need to use devm instead of drmm. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1160 Signed-off-by: Daniele Ceraolo Spurio Cc: Lucas De Marchi Cc: Matthew Auld Reviewed-by: Matthew Auld Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240809231237.1503796-2-daniele.ceraolospurio@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 8d3a2d3d766a823c7510cdc17e6ff7c042c63b61) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 31192d983d9e..261d3d6c8a93 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1575,7 +1575,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, return bo; } -static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg) +static void __xe_bo_unpin_map_no_vm(void *arg) { xe_bo_unpin_map_no_vm(arg); } @@ -1590,7 +1590,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile if (IS_ERR(bo)) return bo; - ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo); + ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo); if (ret) return ERR_PTR(ret); @@ -1638,7 +1638,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str if (IS_ERR(bo)) return PTR_ERR(bo); - drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src); + devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src); *src = bo; return 0; -- cgit From a06a7b3429e2548a28bb661f17347b8ffe4a8a15 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 15 Aug 2024 16:05:40 -0700 Subject: drm/xe/uc: Use devm to register cleanup that includes exec_queues Exec_queue cleanup requires HW access, so we need to use devm instead of drmm for it. Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Alan Previn Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240815230541.3828206-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 5a891a0e69f134f53cc91b409f38e5ea1cafaf0a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gsc.c | 4 ++-- drivers/gpu/drm/xe/xe_guc_submit.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 77ce44e845c5..2a612652bb13 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -437,7 +437,7 @@ out: return ret; } -static void free_resources(struct drm_device *drm, void *arg) +static void free_resources(void *arg) { struct xe_gsc *gsc = arg; @@ -501,7 +501,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) gsc->q = q; gsc->wq = wq; - err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); + err = devm_add_action_or_reset(xe->drm.dev, free_resources, gsc); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 6398629e6b4e..77b0f0d8f729 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -284,7 +284,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) free_submit_wq(guc); } -static void guc_submit_wedged_fini(struct drm_device *drm, void *arg) +static void guc_submit_wedged_fini(void *arg) { struct xe_guc *guc = arg; struct xe_exec_queue *q; @@ -877,7 +877,7 @@ void xe_guc_submit_wedge(struct xe_guc *guc) xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); - err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); -- cgit From 4e870e6bbec5c41c0d8b253282dca9465fbf5044 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 19 Aug 2024 17:04:42 -0700 Subject: Input: himax_hx83112b - fix incorrect size when reading product ID We need to read a u32 value (4 bytes), not size of a pointer to that value. Also, himax_read_mcu() wrapper is an overkill, remove it and use himax_bus_read() directly. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408200301.Ujpj7Vov-lkp@intel.com/ Fixes: 0944829d491e ("Input: himax_hx83112b - implement MCU register reading") Tested-by: Felix Kaechele Link: https://lore.kernel.org/r/ZsPdmtfC54R7JVxR@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/himax_hx83112b.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/input/touchscreen/himax_hx83112b.c b/drivers/input/touchscreen/himax_hx83112b.c index 9ed3bccde4ac..896a145ddb2b 100644 --- a/drivers/input/touchscreen/himax_hx83112b.c +++ b/drivers/input/touchscreen/himax_hx83112b.c @@ -130,17 +130,6 @@ static int himax_bus_read(struct himax_ts_data *ts, u32 address, void *dst, return 0; } -static int himax_read_mcu(struct himax_ts_data *ts, u32 address, u32 *dst) -{ - int error; - - error = himax_bus_read(ts, address, dst, sizeof(dst)); - if (error) - return error; - - return 0; -} - static void himax_reset(struct himax_ts_data *ts) { gpiod_set_value_cansleep(ts->gpiod_rst, 1); @@ -160,7 +149,8 @@ static int himax_read_product_id(struct himax_ts_data *ts, u32 *product_id) { int error; - error = himax_read_mcu(ts, HIMAX_REG_ADDR_ICID, product_id); + error = himax_bus_read(ts, HIMAX_REG_ADDR_ICID, product_id, + sizeof(*product_id)); if (error) return error; -- cgit From ce335db0621648472f9bb4b7191eb2e13a5793cf Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 16 Aug 2024 18:29:17 +0800 Subject: net: mctp: test: Use correct skb for route input check In the MCTP route input test, we're routing one skb, then (when delivery is expected) checking the resulting routed skb. However, we're currently checking the original skb length, rather than the routed skb. Check the routed skb instead; the original will have been freed at this point. Fixes: 8892c0490779 ("mctp: Add route input to socket tests") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/kernel-janitors/4ad204f0-94cf-46c5-bdab-49592addf315@kili.mountain/ Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240816-mctp-kunit-skb-fix-v1-1-3c367ac89c27@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/test/route-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 77e5dd422258..8551dab1d1e6 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -366,7 +366,7 @@ static void mctp_test_route_input_sk(struct kunit *test) skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); - KUNIT_EXPECT_EQ(test, skb->len, 1); + KUNIT_EXPECT_EQ(test, skb2->len, 1); skb_free_datagram(sock->sk, skb2); -- cgit From 807067bf014d4a3ae2cc55bd3de16f22a01eb580 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 Aug 2024 15:04:37 -0700 Subject: kcm: Serialise kcm_sendmsg() for the same socket. syzkaller reported UAF in kcm_release(). [0] The scenario is 1. Thread A builds a skb with MSG_MORE and sets kcm->seq_skb. 2. Thread A resumes building skb from kcm->seq_skb but is blocked by sk_stream_wait_memory() 3. Thread B calls sendmsg() concurrently, finishes building kcm->seq_skb and puts the skb to the write queue 4. Thread A faces an error and finally frees skb that is already in the write queue 5. kcm_release() does double-free the skb in the write queue When a thread is building a MSG_MORE skb, another thread must not touch it. Let's add a per-sk mutex and serialise kcm_sendmsg(). [0]: BUG: KASAN: slab-use-after-free in __skb_unlink include/linux/skbuff.h:2366 [inline] BUG: KASAN: slab-use-after-free in __skb_dequeue include/linux/skbuff.h:2385 [inline] BUG: KASAN: slab-use-after-free in __skb_queue_purge_reason include/linux/skbuff.h:3175 [inline] BUG: KASAN: slab-use-after-free in __skb_queue_purge include/linux/skbuff.h:3181 [inline] BUG: KASAN: slab-use-after-free in kcm_release+0x170/0x4c8 net/kcm/kcmsock.c:1691 Read of size 8 at addr ffff0000ced0fc80 by task syz-executor329/6167 CPU: 1 PID: 6167 Comm: syz-executor329 Tainted: G B 6.8.0-rc5-syzkaller-g9abbc24128bc #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call trace: dump_backtrace+0x1b8/0x1e4 arch/arm64/kernel/stacktrace.c:291 show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:298 __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd0/0x124 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:377 [inline] print_report+0x178/0x518 mm/kasan/report.c:488 kasan_report+0xd8/0x138 mm/kasan/report.c:601 __asan_report_load8_noabort+0x20/0x2c mm/kasan/report_generic.c:381 __skb_unlink include/linux/skbuff.h:2366 [inline] __skb_dequeue include/linux/skbuff.h:2385 [inline] __skb_queue_purge_reason include/linux/skbuff.h:3175 [inline] __skb_queue_purge include/linux/skbuff.h:3181 [inline] kcm_release+0x170/0x4c8 net/kcm/kcmsock.c:1691 __sock_release net/socket.c:659 [inline] sock_close+0xa4/0x1e8 net/socket.c:1421 __fput+0x30c/0x738 fs/file_table.c:376 ____fput+0x20/0x30 fs/file_table.c:404 task_work_run+0x230/0x2e0 kernel/task_work.c:180 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0x618/0x1f64 kernel/exit.c:871 do_group_exit+0x194/0x22c kernel/exit.c:1020 get_signal+0x1500/0x15ec kernel/signal.c:2893 do_signal+0x23c/0x3b44 arch/arm64/kernel/signal.c:1249 do_notify_resume+0x74/0x1f4 arch/arm64/kernel/entry-common.c:148 exit_to_user_mode_prepare arch/arm64/kernel/entry-common.c:169 [inline] exit_to_user_mode arch/arm64/kernel/entry-common.c:178 [inline] el0_svc+0xac/0x168 arch/arm64/kernel/entry-common.c:713 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 Allocated by task 6166: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_alloc_info+0x70/0x84 mm/kasan/generic.c:626 unpoison_slab_object mm/kasan/common.c:314 [inline] __kasan_slab_alloc+0x74/0x8c mm/kasan/common.c:340 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slub.c:3813 [inline] slab_alloc_node mm/slub.c:3860 [inline] kmem_cache_alloc_node+0x204/0x4c0 mm/slub.c:3903 __alloc_skb+0x19c/0x3d8 net/core/skbuff.c:641 alloc_skb include/linux/skbuff.h:1296 [inline] kcm_sendmsg+0x1d3c/0x2124 net/kcm/kcmsock.c:783 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] sock_sendmsg+0x220/0x2c0 net/socket.c:768 splice_to_socket+0x7cc/0xd58 fs/splice.c:889 do_splice_from fs/splice.c:941 [inline] direct_splice_actor+0xec/0x1d8 fs/splice.c:1164 splice_direct_to_actor+0x438/0xa0c fs/splice.c:1108 do_splice_direct_actor fs/splice.c:1207 [inline] do_splice_direct+0x1e4/0x304 fs/splice.c:1233 do_sendfile+0x460/0xb3c fs/read_write.c:1295 __do_sys_sendfile64 fs/read_write.c:1362 [inline] __se_sys_sendfile64 fs/read_write.c:1348 [inline] __arm64_sys_sendfile64+0x160/0x3b4 fs/read_write.c:1348 __invoke_syscall arch/arm64/kernel/syscall.c:37 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155 el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:712 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 Freed by task 6167: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_free_info+0x5c/0x74 mm/kasan/generic.c:640 poison_slab_object+0x124/0x18c mm/kasan/common.c:241 __kasan_slab_free+0x3c/0x78 mm/kasan/common.c:257 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2121 [inline] slab_free mm/slub.c:4299 [inline] kmem_cache_free+0x15c/0x3d4 mm/slub.c:4363 kfree_skbmem+0x10c/0x19c __kfree_skb net/core/skbuff.c:1109 [inline] kfree_skb_reason+0x240/0x6f4 net/core/skbuff.c:1144 kfree_skb include/linux/skbuff.h:1244 [inline] kcm_release+0x104/0x4c8 net/kcm/kcmsock.c:1685 __sock_release net/socket.c:659 [inline] sock_close+0xa4/0x1e8 net/socket.c:1421 __fput+0x30c/0x738 fs/file_table.c:376 ____fput+0x20/0x30 fs/file_table.c:404 task_work_run+0x230/0x2e0 kernel/task_work.c:180 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0x618/0x1f64 kernel/exit.c:871 do_group_exit+0x194/0x22c kernel/exit.c:1020 get_signal+0x1500/0x15ec kernel/signal.c:2893 do_signal+0x23c/0x3b44 arch/arm64/kernel/signal.c:1249 do_notify_resume+0x74/0x1f4 arch/arm64/kernel/entry-common.c:148 exit_to_user_mode_prepare arch/arm64/kernel/entry-common.c:169 [inline] exit_to_user_mode arch/arm64/kernel/entry-common.c:178 [inline] el0_svc+0xac/0x168 arch/arm64/kernel/entry-common.c:713 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 The buggy address belongs to the object at ffff0000ced0fc80 which belongs to the cache skbuff_head_cache of size 240 The buggy address is located 0 bytes inside of freed 240-byte region [ffff0000ced0fc80, ffff0000ced0fd70) The buggy address belongs to the physical page: page:00000000d35f4ae4 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10ed0f flags: 0x5ffc00000000800(slab|node=0|zone=2|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 05ffc00000000800 ffff0000c1cbf640 fffffdffc3423100 dead000000000004 raw: 0000000000000000 00000000000c000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff0000ced0fb80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff0000ced0fc00: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc >ffff0000ced0fc80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff0000ced0fd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc ffff0000ced0fd80: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+b72d86aa5df17ce74c60@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b72d86aa5df17ce74c60 Tested-by: syzbot+b72d86aa5df17ce74c60@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240815220437.69511-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/kcm.h | 1 + net/kcm/kcmsock.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/include/net/kcm.h b/include/net/kcm.h index 90279e5e09a5..441e993be634 100644 --- a/include/net/kcm.h +++ b/include/net/kcm.h @@ -70,6 +70,7 @@ struct kcm_sock { struct work_struct tx_work; struct list_head wait_psock_list; struct sk_buff *seq_skb; + struct mutex tx_mutex; u32 tx_stopped : 1; /* Don't use bit fields here, these are set under different locks */ diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 2f191e50d4fc..d4118c796290 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -755,6 +755,7 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR); int err = -EPIPE; + mutex_lock(&kcm->tx_mutex); lock_sock(sk); /* Per tcp_sendmsg this should be in poll */ @@ -926,6 +927,7 @@ partial_message: KCM_STATS_ADD(kcm->stats.tx_bytes, copied); release_sock(sk); + mutex_unlock(&kcm->tx_mutex); return copied; out_error: @@ -951,6 +953,7 @@ out_error: sk->sk_write_space(sk); release_sock(sk); + mutex_unlock(&kcm->tx_mutex); return err; } @@ -1204,6 +1207,7 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) spin_unlock_bh(&mux->lock); INIT_WORK(&kcm->tx_work, kcm_tx_work); + mutex_init(&kcm->tx_mutex); spin_lock_bh(&mux->rx_lock); kcm_rcv_ready(kcm); -- cgit From 2102bdac67b55bf2d1df4ff757bced74e94a5f74 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Aug 2024 21:03:07 -0400 Subject: bcachefs: Extra debug for data move path We don't have sufficient information to debug: https://github.com/koverstreet/bcachefs/issues/726 - print out durability of extent ptrs, when non default - print the number of replicas we need in data_update_to_text() Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 3 +++ fs/bcachefs/extents.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 6a854c918496..1ca628e93e87 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -475,6 +475,9 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, bch2_compression_opt_to_text(out, background_compression(*io_opts)); prt_newline(out); + prt_str(out, "opts.replicas:\t"); + prt_u64(out, io_opts->data_replicas); + prt_str(out, "extra replicas:\t"); prt_u64(out, data_opts->extra_replicas); } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 4419ad3e454e..9406f82fc255 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1017,6 +1017,8 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc prt_printf(out, "ptr: %u:%llu:%u gen %u", ptr->dev, b, offset, ptr->gen); + if (ca->mi.durability != 1) + prt_printf(out, " d=%u", ca->mi.durability); if (ptr->cached) prt_str(out, " cached"); if (ptr->unwritten) -- cgit From 50f2b98dc83de7809a5c5bf0ccf9af2e75c37c13 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 13 Aug 2024 10:59:08 +0100 Subject: MIPS: cevt-r4k: Don't call get_c0_compare_int if timer irq is installed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This avoids warning: [ 0.118053] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:283 Caused by get_c0_compare_int on secondary CPU. We also skipped saving IRQ number to struct clock_event_device *cd as it's never used by clockevent core, as per comments it's only meant for "non CPU local devices". Reported-by: Serge Semin Closes: https://lore.kernel.org/linux-mips/6szkkqxpsw26zajwysdrwplpjvhl5abpnmxgu2xuj3dkzjnvsf@4daqrz4mf44k/ Signed-off-by: Jiaxun Yang Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Serge Semin Tested-by: Serge Semin Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/cevt-r4k.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c index 368e8475870f..5f6e9e2ebbdb 100644 --- a/arch/mips/kernel/cevt-r4k.c +++ b/arch/mips/kernel/cevt-r4k.c @@ -303,13 +303,6 @@ int r4k_clockevent_init(void) if (!c0_compare_int_usable()) return -ENXIO; - /* - * With vectored interrupts things are getting platform specific. - * get_c0_compare_int is a hook to allow a platform to return the - * interrupt number of its liking. - */ - irq = get_c0_compare_int(); - cd = &per_cpu(mips_clockevent_device, cpu); cd->name = "MIPS"; @@ -320,7 +313,6 @@ int r4k_clockevent_init(void) min_delta = calculate_min_delta(); cd->rating = 300; - cd->irq = irq; cd->cpumask = cpumask_of(cpu); cd->set_next_event = mips_next_event; cd->event_handler = mips_event_handler; @@ -332,6 +324,13 @@ int r4k_clockevent_init(void) cp0_timer_irq_installed = 1; + /* + * With vectored interrupts things are getting platform specific. + * get_c0_compare_int is a hook to allow a platform to return the + * interrupt number of its liking. + */ + irq = get_c0_compare_int(); + if (request_irq(irq, c0_compare_interrupt, flags, "timer", c0_compare_interrupt)) pr_err("Failed to request irq %d (timer)\n", irq); -- cgit From 1eacdd71b3436b54d5fc8218c4bb0187d92a6892 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 20 Aug 2024 09:54:30 +0200 Subject: netfilter: nft_counter: Disable BH in nft_counter_offload_stats(). The sequence counter nft_counter_seq is a per-CPU counter. There is no lock associated with it. nft_counter_do_eval() is using the same counter and disables BH which suggest that it can be invoked from a softirq. This in turn means that nft_counter_offload_stats(), which disables only preemption, can be interrupted by nft_counter_do_eval() leading to two writer for one seqcount_t. This can lead to loosing stats or reading statistics while they are updated. Disable BH during stats update in nft_counter_offload_stats() to ensure one writer at a time. Fixes: b72920f6e4a9d ("netfilter: nftables: counter hardware offload support") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_counter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 291ed2026367..16f40b503d37 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -265,7 +265,7 @@ static void nft_counter_offload_stats(struct nft_expr *expr, struct nft_counter *this_cpu; seqcount_t *myseq; - preempt_disable(); + local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); myseq = this_cpu_ptr(&nft_counter_seq); @@ -273,7 +273,7 @@ static void nft_counter_offload_stats(struct nft_expr *expr, this_cpu->packets += stats->pkts; this_cpu->bytes += stats->bytes; write_seqcount_end(myseq); - preempt_enable(); + local_bh_enable(); } void nft_counter_init_seqcount(void) -- cgit From a0b39e2dc7017ac667b70bdeee5293e410fab2fb Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 20 Aug 2024 09:54:31 +0200 Subject: netfilter: nft_counter: Synchronize nft_counter_reset() against reader. nft_counter_reset() resets the counter by subtracting the previously retrieved value from the counter. This is a write operation on the counter and as such it requires to be performed with a write sequence of nft_counter_seq to serialize against its possible reader. Update the packets/ bytes within write-sequence of nft_counter_seq. Fixes: d84701ecbcd6a ("netfilter: nft_counter: rework atomic dump and reset") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_counter.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 16f40b503d37..eab0dc66bee6 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -107,11 +107,16 @@ static void nft_counter_reset(struct nft_counter_percpu_priv *priv, struct nft_counter *total) { struct nft_counter *this_cpu; + seqcount_t *myseq; local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); + myseq = this_cpu_ptr(&nft_counter_seq); + + write_seqcount_begin(myseq); this_cpu->packets -= total->packets; this_cpu->bytes -= total->bytes; + write_seqcount_end(myseq); local_bh_enable(); } -- cgit From 46ee21e9f59205e54943dfe51b2dc8a9352ca37d Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 16 Aug 2024 09:36:26 -0700 Subject: platform/x86: ISST: Fix return value on last invalid resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When only the last resource is invalid, tpmi_sst_dev_add() is returing error even if there are other valid resources before. This function should return error when there are no valid resources. Here tpmi_sst_dev_add() is returning "ret" variable. But this "ret" variable contains the failure status of last call to sst_main(), which failed for the invalid resource. But there may be other valid resources before the last entry. To address this, do not update "ret" variable for sst_main() return status. If there are no valid resources, it is already checked for by !inst below the loop and -ENODEV is returned. Fixes: 9d1d36268f3d ("platform/x86: ISST: Support partitioned systems") Signed-off-by: Srinivas Pandruvada Cc: stable@vger.kernel.org # 6.10+ Link: https://lore.kernel.org/r/20240816163626.415762-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c index 7fa360073f6e..404582307109 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c @@ -1549,8 +1549,7 @@ int tpmi_sst_dev_add(struct auxiliary_device *auxdev) goto unlock_free; } - ret = sst_main(auxdev, &pd_info[i]); - if (ret) { + if (sst_main(auxdev, &pd_info[i])) { /* * This entry is not valid, hardware can partially * populate dies. In this case MMIO will have 0xFFs. -- cgit From 4b3e33fcc38f7750604b065c55a43e94c5bc3145 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 15 Aug 2024 17:14:16 +0200 Subject: ip6_tunnel: Fix broken GRO GRO code checks for matching layer 2 headers to see, if packet belongs to the same flow and because ip6 tunnel set dev->hard_header_len this check fails in cases, where it shouldn't. To fix this don't set hard_header_len, but use needed_headroom like ipv4/ip_tunnel.c does. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Thomas Bogendoerfer Link: https://patch.msgid.link/20240815151419.109864-1-tbogendoerfer@suse.de Signed-off-by: Paolo Abeni --- net/ipv6/ip6_tunnel.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9dee0c127955..87dfb565a9f8 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1507,7 +1507,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) tdev = __dev_get_by_index(t->net, p->link); if (tdev) { - dev->hard_header_len = tdev->hard_header_len + t_hlen; + dev->needed_headroom = tdev->hard_header_len + + tdev->needed_headroom + t_hlen; mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU); mtu = mtu - t_hlen; @@ -1731,7 +1732,9 @@ ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr, int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { struct ip6_tnl *tnl = netdev_priv(dev); + int t_hlen; + t_hlen = tnl->hlen + sizeof(struct ipv6hdr); if (tnl->parms.proto == IPPROTO_IPV6) { if (new_mtu < IPV6_MIN_MTU) return -EINVAL; @@ -1740,10 +1743,10 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) { - if (new_mtu > IP6_MAX_MTU - dev->hard_header_len) + if (new_mtu > IP6_MAX_MTU - dev->hard_header_len - t_hlen) return -EINVAL; } else { - if (new_mtu > IP_MAX_MTU - dev->hard_header_len) + if (new_mtu > IP_MAX_MTU - dev->hard_header_len - t_hlen) return -EINVAL; } WRITE_ONCE(dev->mtu, new_mtu); @@ -1887,12 +1890,11 @@ ip6_tnl_dev_init_gen(struct net_device *dev) t_hlen = t->hlen + sizeof(struct ipv6hdr); dev->type = ARPHRD_TUNNEL6; - dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; dev->min_mtu = ETH_MIN_MTU; - dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; + dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len - t_hlen; netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); netdev_lockdep_set_classes(dev); -- cgit From 6275c7bc8dd07644ea8142a1773d826800f0f3f7 Mon Sep 17 00:00:00 2001 From: Ben Whitten Date: Sun, 11 Aug 2024 22:22:11 +0100 Subject: mmc: dw_mmc: allow biu and ciu clocks to defer Fix a race condition if the clock provider comes up after mmc is probed, this causes mmc to fail without retrying. When given the DEFER error from the clk source, pass it on up the chain. Fixes: f90a0612f0e1 ("mmc: dw_mmc: lookup for optional biu and ciu clocks") Signed-off-by: Ben Whitten Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240811212212.123255-1-ben.whitten@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 2333ef4893ee..e9f6e4e62290 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -3299,6 +3299,10 @@ int dw_mci_probe(struct dw_mci *host) host->biu_clk = devm_clk_get(host->dev, "biu"); if (IS_ERR(host->biu_clk)) { dev_dbg(host->dev, "biu clock not available\n"); + ret = PTR_ERR(host->biu_clk); + if (ret == -EPROBE_DEFER) + return ret; + } else { ret = clk_prepare_enable(host->biu_clk); if (ret) { @@ -3310,6 +3314,10 @@ int dw_mci_probe(struct dw_mci *host) host->ciu_clk = devm_clk_get(host->dev, "ciu"); if (IS_ERR(host->ciu_clk)) { dev_dbg(host->dev, "ciu clock not available\n"); + ret = PTR_ERR(host->ciu_clk); + if (ret == -EPROBE_DEFER) + goto err_clk_biu; + host->bus_hz = host->pdata->bus_hz; } else { ret = clk_prepare_enable(host->ciu_clk); -- cgit From ea72ce5da22806d5713f3ffb39a6d5ae73841f93 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Aug 2024 00:29:36 +0200 Subject: x86/kaslr: Expose and use the end of the physical memory address space iounmap() on x86 occasionally fails to unmap because the provided valid ioremap address is not below high_memory. It turned out that this happens due to KASLR. KASLR uses the full address space between PAGE_OFFSET and vaddr_end to randomize the starting points of the direct map, vmalloc and vmemmap regions. It thereby limits the size of the direct map by using the installed memory size plus an extra configurable margin for hot-plug memory. This limitation is done to gain more randomization space because otherwise only the holes between the direct map, vmalloc, vmemmap and vaddr_end would be usable for randomizing. The limited direct map size is not exposed to the rest of the kernel, so the memory hot-plug and resource management related code paths still operate under the assumption that the available address space can be determined with MAX_PHYSMEM_BITS. request_free_mem_region() allocates from (1 << MAX_PHYSMEM_BITS) - 1 downwards. That means the first allocation happens past the end of the direct map and if unlucky this address is in the vmalloc space, which causes high_memory to become greater than VMALLOC_START and consequently causes iounmap() to fail for valid ioremap addresses. MAX_PHYSMEM_BITS cannot be changed for that because the randomization does not align with address bit boundaries and there are other places which actually require to know the maximum number of address bits. All remaining usage sites of MAX_PHYSMEM_BITS have been analyzed and found to be correct. Cure this by exposing the end of the direct map via PHYSMEM_END and use that for the memory hot-plug and resource management related places instead of relying on MAX_PHYSMEM_BITS. In the KASLR case PHYSMEM_END maps to a variable which is initialized by the KASLR initialization and otherwise it is based on MAX_PHYSMEM_BITS as before. To prevent future hickups add a check into add_pages() to catch callers trying to add memory above PHYSMEM_END. Fixes: 0483e1fa6e09 ("x86/mm: Implement ASLR for kernel memory regions") Reported-by: Max Ramanouski Reported-by: Alistair Popple Signed-off-by: Thomas Gleixner Tested-By: Max Ramanouski Tested-by: Alistair Popple Reviewed-by: Dan Williams Reviewed-by: Alistair Popple Reviewed-by: Kees Cook Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/87ed6soy3z.ffs@tglx --- arch/x86/include/asm/page_64.h | 1 + arch/x86/include/asm/pgtable_64_types.h | 4 ++++ arch/x86/mm/init_64.c | 4 ++++ arch/x86/mm/kaslr.c | 32 ++++++++++++++++++++++++++------ include/linux/mm.h | 4 ++++ kernel/resource.c | 6 ++---- mm/memory_hotplug.c | 2 +- mm/sparse.c | 2 +- 8 files changed, 43 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index af4302d79b59..f3d257c45225 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -17,6 +17,7 @@ extern unsigned long phys_base; extern unsigned long page_offset_base; extern unsigned long vmalloc_base; extern unsigned long vmemmap_base; +extern unsigned long physmem_end; static __always_inline unsigned long __phys_addr_nodebug(unsigned long x) { diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 9053dfe9fa03..a98e53491a4e 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -140,6 +140,10 @@ extern unsigned int ptrs_per_p4d; # define VMEMMAP_START __VMEMMAP_BASE_L4 #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ +#ifdef CONFIG_RANDOMIZE_MEMORY +# define PHYSMEM_END physmem_end +#endif + /* * End of the region for which vmalloc page tables are pre-allocated. * For non-KMSAN builds, this is the same as VMALLOC_END. diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d8dbeac8b206..ff253648706f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -958,8 +958,12 @@ static void update_end_of_memory_vars(u64 start, u64 size) int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, struct mhp_params *params) { + unsigned long end = ((start_pfn + nr_pages) << PAGE_SHIFT) - 1; int ret; + if (WARN_ON_ONCE(end > PHYSMEM_END)) + return -ERANGE; + ret = __add_pages(nid, start_pfn, nr_pages, params); WARN_ON_ONCE(ret); diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 37db264866b6..230f1dee4f09 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -47,13 +47,24 @@ static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; */ static __initdata struct kaslr_memory_region { unsigned long *base; + unsigned long *end; unsigned long size_tb; } kaslr_regions[] = { - { &page_offset_base, 0 }, - { &vmalloc_base, 0 }, - { &vmemmap_base, 0 }, + { + .base = &page_offset_base, + .end = &physmem_end, + }, + { + .base = &vmalloc_base, + }, + { + .base = &vmemmap_base, + }, }; +/* The end of the possible address space for physical memory */ +unsigned long physmem_end __ro_after_init; + /* Get size in bytes used by the memory region */ static inline unsigned long get_padding(struct kaslr_memory_region *region) { @@ -82,6 +93,8 @@ void __init kernel_randomize_memory(void) BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE); BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); + /* Preset the end of the possible address space for physical memory */ + physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1); if (!kaslr_memory_enabled()) return; @@ -128,11 +141,18 @@ void __init kernel_randomize_memory(void) vaddr += entropy; *kaslr_regions[i].base = vaddr; + /* Calculate the end of the region */ + vaddr += get_padding(&kaslr_regions[i]); /* - * Jump the region and add a minimum padding based on - * randomization alignment. + * KASLR trims the maximum possible size of the + * direct-map. Update the physmem_end boundary. + * No rounding required as the region starts + * PUD aligned and size is in units of TB. */ - vaddr += get_padding(&kaslr_regions[i]); + if (kaslr_regions[i].end) + *kaslr_regions[i].end = __pa_nodebug(vaddr - 1); + + /* Add a minimum padding based on randomization alignment. */ vaddr = round_up(vaddr + 1, PUD_SIZE); remain_entropy -= entropy; } diff --git a/include/linux/mm.h b/include/linux/mm.h index c4b238a20b76..b3864156eaa4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -97,6 +97,10 @@ extern const int mmap_rnd_compat_bits_max; extern int mmap_rnd_compat_bits __read_mostly; #endif +#ifndef PHYSMEM_END +# define PHYSMEM_END ((1ULL << MAX_PHYSMEM_BITS) - 1) +#endif + #include #include diff --git a/kernel/resource.c b/kernel/resource.c index 14777afb0a99..a83040fde236 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1826,8 +1826,7 @@ static resource_size_t gfr_start(struct resource *base, resource_size_t size, if (flags & GFR_DESCENDING) { resource_size_t end; - end = min_t(resource_size_t, base->end, - (1ULL << MAX_PHYSMEM_BITS) - 1); + end = min_t(resource_size_t, base->end, PHYSMEM_END); return end - size + 1; } @@ -1844,8 +1843,7 @@ static bool gfr_continue(struct resource *base, resource_size_t addr, * @size did not wrap 0. */ return addr > addr - size && - addr <= min_t(resource_size_t, base->end, - (1ULL << MAX_PHYSMEM_BITS) - 1); + addr <= min_t(resource_size_t, base->end, PHYSMEM_END); } static resource_size_t gfr_next(resource_size_t addr, resource_size_t size, diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 66267c26ca1b..951878ab627a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1681,7 +1681,7 @@ struct range __weak arch_get_mappable_range(void) struct range mhp_get_pluggable_range(bool need_mapping) { - const u64 max_phys = (1ULL << MAX_PHYSMEM_BITS) - 1; + const u64 max_phys = PHYSMEM_END; struct range mhp_range; if (need_mapping) { diff --git a/mm/sparse.c b/mm/sparse.c index e4b830091d13..0c3bff882033 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -129,7 +129,7 @@ static inline int sparse_early_nid(struct mem_section *section) static void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, unsigned long *end_pfn) { - unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); + unsigned long max_sparsemem_pfn = (PHYSMEM_END + 1) >> PAGE_SHIFT; /* * Sanity checks - do not allow an architecture to pass -- cgit From a1e627af32ed60713941cbfc8075d44cad07f6dd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 20 Aug 2024 11:44:08 +0300 Subject: mmc: mmc_test: Fix NULL dereference on allocation failure If the "test->highmem = alloc_pages()" allocation fails then calling __free_pages(test->highmem) will result in a NULL dereference. Also change the error code to -ENOMEM instead of returning success. Fixes: 2661081f5ab9 ("mmc_test: highmem tests") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/8c90be28-67b4-4b0d-a105-034dc72a0b31@stanley.mountain Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc_test.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index 8f7f587a0025..b7f627a9fdea 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -3125,13 +3125,13 @@ static ssize_t mtf_test_write(struct file *file, const char __user *buf, test->buffer = kzalloc(BUFFER_SIZE, GFP_KERNEL); #ifdef CONFIG_HIGHMEM test->highmem = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, BUFFER_ORDER); + if (!test->highmem) { + count = -ENOMEM; + goto free_test_buffer; + } #endif -#ifdef CONFIG_HIGHMEM - if (test->buffer && test->highmem) { -#else if (test->buffer) { -#endif mutex_lock(&mmc_test_lock); mmc_test_run(test, testcase); mutex_unlock(&mmc_test_lock); @@ -3139,6 +3139,7 @@ static ssize_t mtf_test_write(struct file *file, const char __user *buf, #ifdef CONFIG_HIGHMEM __free_pages(test->highmem, BUFFER_ORDER); +free_test_buffer: #endif kfree(test->buffer); kfree(test); -- cgit From 783bf5d09f86b9736605f3e01a3472e55ef98ff8 Mon Sep 17 00:00:00 2001 From: Carlos Song Date: Tue, 20 Aug 2024 15:06:58 +0800 Subject: spi: spi-fsl-lpspi: limit PRESCALE bit in TCR register Referring to the errata ERR051608 of I.MX93, LPSPI TCR[PRESCALE] can only be configured to be 0 or 1, other values are not valid and will cause LPSPI to not work. Add the prescale limitation for LPSPI in I.MX93. Other platforms are not affected. Signed-off-by: Carlos Song Link: https://patch.msgid.link/20240820070658.672127-1-carlos.song@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index be261ac09df8..350c5d91d869 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -82,6 +82,10 @@ #define TCR_RXMSK BIT(19) #define TCR_TXMSK BIT(18) +struct fsl_lpspi_devtype_data { + u8 prescale_max; +}; + struct lpspi_config { u8 bpw; u8 chip_select; @@ -119,10 +123,25 @@ struct fsl_lpspi_data { bool usedma; struct completion dma_rx_completion; struct completion dma_tx_completion; + + const struct fsl_lpspi_devtype_data *devtype_data; +}; + +/* + * ERR051608 fixed or not: + * https://www.nxp.com/docs/en/errata/i.MX93_1P87f.pdf + */ +static struct fsl_lpspi_devtype_data imx93_lpspi_devtype_data = { + .prescale_max = 1, +}; + +static struct fsl_lpspi_devtype_data imx7ulp_lpspi_devtype_data = { + .prescale_max = 8, }; static const struct of_device_id fsl_lpspi_dt_ids[] = { - { .compatible = "fsl,imx7ulp-spi", }, + { .compatible = "fsl,imx7ulp-spi", .data = &imx7ulp_lpspi_devtype_data,}, + { .compatible = "fsl,imx93-spi", .data = &imx93_lpspi_devtype_data,}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, fsl_lpspi_dt_ids); @@ -297,9 +316,11 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) { struct lpspi_config config = fsl_lpspi->config; unsigned int perclk_rate, scldiv, div; + u8 prescale_max; u8 prescale; perclk_rate = clk_get_rate(fsl_lpspi->clk_per); + prescale_max = fsl_lpspi->devtype_data->prescale_max; if (!config.speed_hz) { dev_err(fsl_lpspi->dev, @@ -315,7 +336,7 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) div = DIV_ROUND_UP(perclk_rate, config.speed_hz); - for (prescale = 0; prescale < 8; prescale++) { + for (prescale = 0; prescale < prescale_max; prescale++) { scldiv = div / (1 << prescale) - 2; if (scldiv < 256) { fsl_lpspi->config.prescale = prescale; @@ -822,6 +843,7 @@ static int fsl_lpspi_init_rpm(struct fsl_lpspi_data *fsl_lpspi) static int fsl_lpspi_probe(struct platform_device *pdev) { + const struct fsl_lpspi_devtype_data *devtype_data; struct fsl_lpspi_data *fsl_lpspi; struct spi_controller *controller; struct resource *res; @@ -830,6 +852,10 @@ static int fsl_lpspi_probe(struct platform_device *pdev) u32 temp; bool is_target; + devtype_data = of_device_get_match_data(&pdev->dev); + if (!devtype_data) + return -ENODEV; + is_target = of_property_read_bool((&pdev->dev)->of_node, "spi-slave"); if (is_target) controller = devm_spi_alloc_target(&pdev->dev, @@ -848,6 +874,7 @@ static int fsl_lpspi_probe(struct platform_device *pdev) fsl_lpspi->is_target = is_target; fsl_lpspi->is_only_cs1 = of_property_read_bool((&pdev->dev)->of_node, "fsl,spi-only-use-cs1-sel"); + fsl_lpspi->devtype_data = devtype_data; init_completion(&fsl_lpspi->xfer_done); -- cgit From fc59b9a5f7201b9f7272944596113a82cc7773d5 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 16 Aug 2024 14:48:10 +0300 Subject: bonding: fix bond_ipsec_offload_ok return type Fix the return type which should be bool. Fixes: 955b785ec6b3 ("bonding: fix suspicious RCU usage in bond_ipsec_offload_ok()") Signed-off-by: Nikolay Aleksandrov Reviewed-by: Hangbin Liu Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1cd92c12e782..85b5868deeea 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -599,34 +599,28 @@ static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) struct net_device *real_dev; struct slave *curr_active; struct bonding *bond; - int err; + bool ok = false; bond = netdev_priv(bond_dev); rcu_read_lock(); curr_active = rcu_dereference(bond->curr_active_slave); real_dev = curr_active->dev; - if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { - err = false; + if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) goto out; - } - if (!xs->xso.real_dev) { - err = false; + if (!xs->xso.real_dev) goto out; - } if (!real_dev->xfrmdev_ops || !real_dev->xfrmdev_ops->xdo_dev_offload_ok || - netif_is_bond_master(real_dev)) { - err = false; + netif_is_bond_master(real_dev)) goto out; - } - err = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); + ok = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); out: rcu_read_unlock(); - return err; + return ok; } static const struct xfrmdev_ops bond_xfrmdev_ops = { -- cgit From 95c90e4ad89d493a7a14fa200082e466e2548f9d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 16 Aug 2024 14:48:11 +0300 Subject: bonding: fix null pointer deref in bond_ipsec_offload_ok We must check if there is an active slave before dereferencing the pointer. Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Signed-off-by: Nikolay Aleksandrov Reviewed-by: Hangbin Liu Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 85b5868deeea..65ddb71eebcd 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -604,6 +604,8 @@ static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) bond = netdev_priv(bond_dev); rcu_read_lock(); curr_active = rcu_dereference(bond->curr_active_slave); + if (!curr_active) + goto out; real_dev = curr_active->dev; if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) -- cgit From f8cde9805981c50d0c029063dc7d82821806fc44 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 16 Aug 2024 14:48:12 +0300 Subject: bonding: fix xfrm real_dev null pointer dereference We shouldn't set real_dev to NULL because packets can be in transit and xfrm might call xdo_dev_offload_ok() in parallel. All callbacks assume real_dev is set. Example trace: kernel: BUG: unable to handle page fault for address: 0000000000001030 kernel: bond0: (slave eni0np1): making interface the new active one kernel: #PF: supervisor write access in kernel mode kernel: #PF: error_code(0x0002) - not-present page kernel: PGD 0 P4D 0 kernel: Oops: 0002 [#1] PREEMPT SMP kernel: CPU: 4 PID: 2237 Comm: ping Not tainted 6.7.7+ #12 kernel: Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014 kernel: RIP: 0010:nsim_ipsec_offload_ok+0xc/0x20 [netdevsim] kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA kernel: Code: e0 0f 0b 48 83 7f 38 00 74 de 0f 0b 48 8b 47 08 48 8b 37 48 8b 78 40 e9 b2 e5 9a d7 66 90 0f 1f 44 00 00 48 8b 86 80 02 00 00 <83> 80 30 10 00 00 01 b8 01 00 00 00 c3 0f 1f 80 00 00 00 00 0f 1f kernel: bond0: (slave eni0np1): making interface the new active one kernel: RSP: 0018:ffffabde81553b98 EFLAGS: 00010246 kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA kernel: kernel: RAX: 0000000000000000 RBX: ffff9eb404e74900 RCX: ffff9eb403d97c60 kernel: RDX: ffffffffc090de10 RSI: ffff9eb404e74900 RDI: ffff9eb3c5de9e00 kernel: RBP: ffff9eb3c0a42000 R08: 0000000000000010 R09: 0000000000000014 kernel: R10: 7974203030303030 R11: 3030303030303030 R12: 0000000000000000 kernel: R13: ffff9eb3c5de9e00 R14: ffffabde81553cc8 R15: ffff9eb404c53000 kernel: FS: 00007f2a77a3ad00(0000) GS:ffff9eb43bd00000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 0000000000001030 CR3: 00000001122ab000 CR4: 0000000000350ef0 kernel: bond0: (slave eni0np1): making interface the new active one kernel: Call Trace: kernel: kernel: ? __die+0x1f/0x60 kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA kernel: ? page_fault_oops+0x142/0x4c0 kernel: ? do_user_addr_fault+0x65/0x670 kernel: ? kvm_read_and_reset_apf_flags+0x3b/0x50 kernel: bond0: (slave eni0np1): making interface the new active one kernel: ? exc_page_fault+0x7b/0x180 kernel: ? asm_exc_page_fault+0x22/0x30 kernel: ? nsim_bpf_uninit+0x50/0x50 [netdevsim] kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA kernel: ? nsim_ipsec_offload_ok+0xc/0x20 [netdevsim] kernel: bond0: (slave eni0np1): making interface the new active one kernel: bond_ipsec_offload_ok+0x7b/0x90 [bonding] kernel: xfrm_output+0x61/0x3b0 kernel: bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA kernel: ip_push_pending_frames+0x56/0x80 Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Signed-off-by: Nikolay Aleksandrov Reviewed-by: Hangbin Liu Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 65ddb71eebcd..f74bacf071fc 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -582,7 +582,6 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) } else { slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); } - ipsec->xs->xso.real_dev = NULL; } spin_unlock_bh(&bond->ipsec_lock); rcu_read_unlock(); -- cgit From c4c5c5d2ef40a9f67a9241dc5422eac9ffe19547 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 16 Aug 2024 14:48:13 +0300 Subject: bonding: fix xfrm state handling when clearing active slave If the active slave is cleared manually the xfrm state is not flushed. This leads to xfrm add/del imbalance and adding the same state multiple times. For example when the device cannot handle anymore states we get: [ 1169.884811] bond0: (slave eni0np1): bond_ipsec_add_sa_all: failed to add SA because it's filled with the same state after multiple active slave clearings. This change also has a few nice side effects: user-space gets a notification for the change, the old device gets its mac address and promisc/mcast adjusted properly. Fixes: 18cb261afd7b ("bonding: support hardware encryption offload to slaves") Signed-off-by: Nikolay Aleksandrov Reviewed-by: Hangbin Liu Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index bc80fb6397dc..95d59a18c022 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -936,7 +936,7 @@ static int bond_option_active_slave_set(struct bonding *bond, /* check to see if we are clearing active */ if (!slave_dev) { netdev_dbg(bond->dev, "Clearing current active slave\n"); - RCU_INIT_POINTER(bond->curr_active_slave, NULL); + bond_change_active_slave(bond, NULL); bond_select_active_slave(bond); } else { struct slave *old_active = rtnl_dereference(bond->curr_active_slave); -- cgit From 4d936f10ff80274841537a26d1fbfe9984de0ef9 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 20 Aug 2024 09:18:50 +0530 Subject: irqchip/sifive-plic: Probe plic driver early for Allwinner D1 platform The latest Linux RISC-V no longer boots on the Allwinner D1 platform because the sun4i_timer driver fails to get an interrupt from PLIC due to the recent conversion of the PLIC to a platform driver. Converting the sun4i timer to a platform driver does not work either because the D1 does not have a SBI timer available so early boot hangs. See the 'Closes:' link for deeper analysis. The real fix requires enabling the SBI time extension in the platform firmware (OpenSBI) and convert sun4i_timer into platform driver. Unfortunately, the real fix involves changing multiple places and can't be achieved in a short duration and aside of that requires users to update firmware. As a work-around, retrofit PLIC probing such that the PLIC is probed early only for the Allwinner D1 platform and probed as a regular platform driver for rest of the RISC-V platforms. In the process, partially revert some of the previous changes because the PLIC device pointer is not available in all probing paths. Fixes: e306a894bd51 ("irqchip/sifive-plic: Chain to parent IRQ after handlers are ready") Fixes: 8ec99b033147 ("irqchip/sifive-plic: Convert PLIC driver into a platform driver") Suggested-by: Thomas Gleixner Signed-off-by: Anup Patel Signed-off-by: Thomas Gleixner Tested-by: Samuel Holland Tested-by: Emil Renner Berthing Tested-by: Charlie Jenkins Reviewed-by: Samuel Holland Reviewed-by: Charlie Jenkins Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240820034850.3189912-1-apatel@ventanamicro.com Closes: https://lore.kernel.org/lkml/20240814145642.344485-1-emil.renner.berthing@canonical.com/ --- drivers/irqchip/irq-sifive-plic.c | 115 +++++++++++++++++++++++--------------- 1 file changed, 71 insertions(+), 44 deletions(-) diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 9e22f7e378f5..4d9ea718086d 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -3,6 +3,7 @@ * Copyright (C) 2017 SiFive * Copyright (C) 2018 Christoph Hellwig */ +#define pr_fmt(fmt) "riscv-plic: " fmt #include #include #include @@ -63,7 +64,7 @@ #define PLIC_QUIRK_EDGE_INTERRUPT 0 struct plic_priv { - struct device *dev; + struct fwnode_handle *fwnode; struct cpumask lmask; struct irq_domain *irqdomain; void __iomem *regs; @@ -378,8 +379,8 @@ static void plic_handle_irq(struct irq_desc *desc) int err = generic_handle_domain_irq(handler->priv->irqdomain, hwirq); if (unlikely(err)) { - dev_warn_ratelimited(handler->priv->dev, - "can't find mapping for hwirq %lu\n", hwirq); + pr_warn_ratelimited("%pfwP: can't find mapping for hwirq %lu\n", + handler->priv->fwnode, hwirq); } } @@ -408,7 +409,8 @@ static int plic_starting_cpu(unsigned int cpu) enable_percpu_irq(plic_parent_irq, irq_get_trigger_type(plic_parent_irq)); else - dev_warn(handler->priv->dev, "cpu%d: parent irq not available\n", cpu); + pr_warn("%pfwP: cpu%d: parent irq not available\n", + handler->priv->fwnode, cpu); plic_set_threshold(handler, PLIC_ENABLE_THRESHOLD); return 0; @@ -424,38 +426,36 @@ static const struct of_device_id plic_match[] = { {} }; -static int plic_parse_nr_irqs_and_contexts(struct platform_device *pdev, +static int plic_parse_nr_irqs_and_contexts(struct fwnode_handle *fwnode, u32 *nr_irqs, u32 *nr_contexts) { - struct device *dev = &pdev->dev; int rc; /* * Currently, only OF fwnode is supported so extend this * function for ACPI support. */ - if (!is_of_node(dev->fwnode)) + if (!is_of_node(fwnode)) return -EINVAL; - rc = of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", nr_irqs); + rc = of_property_read_u32(to_of_node(fwnode), "riscv,ndev", nr_irqs); if (rc) { - dev_err(dev, "riscv,ndev property not available\n"); + pr_err("%pfwP: riscv,ndev property not available\n", fwnode); return rc; } - *nr_contexts = of_irq_count(to_of_node(dev->fwnode)); + *nr_contexts = of_irq_count(to_of_node(fwnode)); if (WARN_ON(!(*nr_contexts))) { - dev_err(dev, "no PLIC context available\n"); + pr_err("%pfwP: no PLIC context available\n", fwnode); return -EINVAL; } return 0; } -static int plic_parse_context_parent(struct platform_device *pdev, u32 context, +static int plic_parse_context_parent(struct fwnode_handle *fwnode, u32 context, u32 *parent_hwirq, int *parent_cpu) { - struct device *dev = &pdev->dev; struct of_phandle_args parent; unsigned long hartid; int rc; @@ -464,10 +464,10 @@ static int plic_parse_context_parent(struct platform_device *pdev, u32 context, * Currently, only OF fwnode is supported so extend this * function for ACPI support. */ - if (!is_of_node(dev->fwnode)) + if (!is_of_node(fwnode)) return -EINVAL; - rc = of_irq_parse_one(to_of_node(dev->fwnode), context, &parent); + rc = of_irq_parse_one(to_of_node(fwnode), context, &parent); if (rc) return rc; @@ -480,48 +480,55 @@ static int plic_parse_context_parent(struct platform_device *pdev, u32 context, return 0; } -static int plic_probe(struct platform_device *pdev) +static int plic_probe(struct fwnode_handle *fwnode) { int error = 0, nr_contexts, nr_handlers = 0, cpu, i; - struct device *dev = &pdev->dev; unsigned long plic_quirks = 0; struct plic_handler *handler; u32 nr_irqs, parent_hwirq; struct plic_priv *priv; irq_hw_number_t hwirq; + void __iomem *regs; - if (is_of_node(dev->fwnode)) { + if (is_of_node(fwnode)) { const struct of_device_id *id; - id = of_match_node(plic_match, to_of_node(dev->fwnode)); + id = of_match_node(plic_match, to_of_node(fwnode)); if (id) plic_quirks = (unsigned long)id->data; + + regs = of_iomap(to_of_node(fwnode), 0); + if (!regs) + return -ENOMEM; + } else { + return -ENODEV; } - error = plic_parse_nr_irqs_and_contexts(pdev, &nr_irqs, &nr_contexts); + error = plic_parse_nr_irqs_and_contexts(fwnode, &nr_irqs, &nr_contexts); if (error) - return error; + goto fail_free_regs; - priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + error = -ENOMEM; + goto fail_free_regs; + } - priv->dev = dev; + priv->fwnode = fwnode; priv->plic_quirks = plic_quirks; priv->nr_irqs = nr_irqs; + priv->regs = regs; - priv->regs = devm_platform_ioremap_resource(pdev, 0); - if (WARN_ON(!priv->regs)) - return -EIO; - - priv->prio_save = devm_bitmap_zalloc(dev, nr_irqs, GFP_KERNEL); - if (!priv->prio_save) - return -ENOMEM; + priv->prio_save = bitmap_zalloc(nr_irqs, GFP_KERNEL); + if (!priv->prio_save) { + error = -ENOMEM; + goto fail_free_priv; + } for (i = 0; i < nr_contexts; i++) { - error = plic_parse_context_parent(pdev, i, &parent_hwirq, &cpu); + error = plic_parse_context_parent(fwnode, i, &parent_hwirq, &cpu); if (error) { - dev_warn(dev, "hwirq for context%d not found\n", i); + pr_warn("%pfwP: hwirq for context%d not found\n", fwnode, i); continue; } @@ -543,7 +550,7 @@ static int plic_probe(struct platform_device *pdev) } if (cpu < 0) { - dev_warn(dev, "Invalid cpuid for context %d\n", i); + pr_warn("%pfwP: Invalid cpuid for context %d\n", fwnode, i); continue; } @@ -554,7 +561,7 @@ static int plic_probe(struct platform_device *pdev) */ handler = per_cpu_ptr(&plic_handlers, cpu); if (handler->present) { - dev_warn(dev, "handler already present for context %d.\n", i); + pr_warn("%pfwP: handler already present for context %d.\n", fwnode, i); plic_set_threshold(handler, PLIC_DISABLE_THRESHOLD); goto done; } @@ -568,8 +575,8 @@ static int plic_probe(struct platform_device *pdev) i * CONTEXT_ENABLE_SIZE; handler->priv = priv; - handler->enable_save = devm_kcalloc(dev, DIV_ROUND_UP(nr_irqs, 32), - sizeof(*handler->enable_save), GFP_KERNEL); + handler->enable_save = kcalloc(DIV_ROUND_UP(nr_irqs, 32), + sizeof(*handler->enable_save), GFP_KERNEL); if (!handler->enable_save) goto fail_cleanup_contexts; done: @@ -581,7 +588,7 @@ done: nr_handlers++; } - priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1, + priv->irqdomain = irq_domain_add_linear(to_of_node(fwnode), nr_irqs + 1, &plic_irqdomain_ops, priv); if (WARN_ON(!priv->irqdomain)) goto fail_cleanup_contexts; @@ -619,13 +626,13 @@ done: } } - dev_info(dev, "mapped %d interrupts with %d handlers for %d contexts.\n", - nr_irqs, nr_handlers, nr_contexts); + pr_info("%pfwP: mapped %d interrupts with %d handlers for %d contexts.\n", + fwnode, nr_irqs, nr_handlers, nr_contexts); return 0; fail_cleanup_contexts: for (i = 0; i < nr_contexts; i++) { - if (plic_parse_context_parent(pdev, i, &parent_hwirq, &cpu)) + if (plic_parse_context_parent(fwnode, i, &parent_hwirq, &cpu)) continue; if (parent_hwirq != RV_IRQ_EXT || cpu < 0) continue; @@ -634,17 +641,37 @@ fail_cleanup_contexts: handler->present = false; handler->hart_base = NULL; handler->enable_base = NULL; + kfree(handler->enable_save); handler->enable_save = NULL; handler->priv = NULL; } - return -ENOMEM; + bitmap_free(priv->prio_save); +fail_free_priv: + kfree(priv); +fail_free_regs: + iounmap(regs); + return error; +} + +static int plic_platform_probe(struct platform_device *pdev) +{ + return plic_probe(pdev->dev.fwnode); } static struct platform_driver plic_driver = { .driver = { .name = "riscv-plic", .of_match_table = plic_match, + .suppress_bind_attrs = true, }, - .probe = plic_probe, + .probe = plic_platform_probe, }; builtin_platform_driver(plic_driver); + +static int __init plic_early_probe(struct device_node *node, + struct device_node *parent) +{ + return plic_probe(&node->fwnode); +} + +IRQCHIP_DECLARE(riscv, "allwinner,sun20i-d1-plic", plic_early_probe); -- cgit From f97fd458763a4801d04dbb4a79d9ca6282d293ec Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 18 Aug 2024 18:16:25 +0100 Subject: irqchip/gic-v4: Fix ordering between vmapp and vpe locks The recently established lock ordering mandates that the per-VM vmapp_lock is acquired before taking the per-VPE lock. As it turns out, its_vpe_set_affinity() takes the VPE lock, and then calls into its_send_vmovp(), which itself takes the vmapp lock. Obviously, this is a lock order violation. As its_send_vmovp() is only called from its_vpe_set_affinity(), hoist the vmapp locking from the former into the latter, restoring the expected order. Fixes: f0eb154c39471 ("irqchip/gic-v4: Substitute vmovp_lock for a per-VM lock") Reported-by: Zhou Wang Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240818171625.3030584-1-maz@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 9b34596b3542..fdec478ba5e7 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1329,12 +1329,6 @@ static void its_send_vmovp(struct its_vpe *vpe) return; } - /* - * Protect against concurrent updates of the mapping state on - * individual VMs. - */ - guard(raw_spinlock_irqsave)(&vpe->its_vm->vmapp_lock); - /* * Yet another marvel of the architecture. If using the * its_list "feature", we need to make sure that all ITSs @@ -3824,7 +3818,14 @@ static int its_vpe_set_affinity(struct irq_data *d, * protect us, and that we must ensure nobody samples vpe->col_idx * during the update, hence the lock below which must also be * taken on any vLPI handling path that evaluates vpe->col_idx. + * + * Finally, we must protect ourselves against concurrent updates of + * the mapping state on this VM should the ITS list be in use (see + * the shortcut in its_send_vmovp() otherewise). */ + if (its_list_map) + raw_spin_lock(&vpe->its_vm->vmapp_lock); + from = vpe_to_cpuid_lock(vpe, &flags); table_mask = gic_data_rdist_cpu(from)->vpe_table_mask; @@ -3854,6 +3855,9 @@ out: irq_data_update_effective_affinity(d, cpumask_of(cpu)); vpe_to_cpuid_unlock(vpe, flags); + if (its_list_map) + raw_spin_unlock(&vpe->its_vm->vmapp_lock); + return IRQ_SET_MASK_OK_DONE; } -- cgit From efe81b7bdf7d882d0ce3d183f1571321046da8f1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 20 Aug 2024 11:42:40 +0300 Subject: irqchip/riscv-aplic: Fix an IS_ERR() vs NULL bug in probe() The devm_platform_ioremap_resource() function doesn't return NULL, it returns error pointers. Fix the error handling to match. Fixes: 2333df5ae51e ("irqchip: Add RISC-V advanced PLIC driver for direct-mode") Signed-off-by: Dan Carpenter Signed-off-by: Thomas Gleixner Reviewed-by: Jinjie Ruan Reviewed-by: Anup Patel Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/a5a628d6-81d8-4933-81a8-64aad4743ec4@stanley.mountain --- drivers/irqchip/irq-riscv-aplic-main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-riscv-aplic-main.c b/drivers/irqchip/irq-riscv-aplic-main.c index 28dd175b5764..981fad6fb8f7 100644 --- a/drivers/irqchip/irq-riscv-aplic-main.c +++ b/drivers/irqchip/irq-riscv-aplic-main.c @@ -175,9 +175,9 @@ static int aplic_probe(struct platform_device *pdev) /* Map the MMIO registers */ regs = devm_platform_ioremap_resource(pdev, 0); - if (!regs) { + if (IS_ERR(regs)) { dev_err(dev, "failed map MMIO registers\n"); - return -ENOMEM; + return PTR_ERR(regs); } /* -- cgit From c5af2c90ba5629f0424a8d315f75fb8d91713c3c Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 20 Aug 2024 17:28:43 +0800 Subject: irqchip/gic-v2m: Fix refcount leak in gicv2m_of_init() gicv2m_of_init() fails to perform an of_node_put() when of_address_to_resource() fails, leading to a refcount leak. Address this by moving the error handling path outside of the loop and making it common to all failure modes. Fixes: 4266ab1a8ff5 ("irqchip/gic-v2m: Refactor to prepare for ACPI support") Signed-off-by: Ma Ke Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240820092843.1219933-1-make24@iscas.ac.cn --- drivers/irqchip/irq-gic-v2m.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index 51af63c046ed..be35c5349986 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -407,12 +407,12 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle, ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis, &res, 0); - if (ret) { - of_node_put(child); + if (ret) break; - } } + if (ret && child) + of_node_put(child); if (!ret) ret = gicv2m_allocate_domains(parent); if (ret) -- cgit From 50b2143356e888777fc5bca023c39f34f404613a Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 7 Aug 2024 12:53:24 +0200 Subject: ice: fix page reuse when PAGE_SIZE is over 8k Architectures that have PAGE_SIZE >= 8192 such as arm64 should act the same as x86 currently, meaning reuse of a page should only take place when no one else is busy with it. Do two things independently of underlying PAGE_SIZE: - store the page count under ice_rx_buf::pgcnt - then act upon its value vs ice_rx_buf::pagecnt_bias when making the decision regarding page reuse Fixes: 2b245cb29421 ("ice: Implement transmit and NAPI support") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 8d25b6981269..50211188c1a7 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -837,16 +837,15 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) if (!dev_page_is_reusable(page)) return false; -#if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1)) return false; -#else +#if (PAGE_SIZE >= 8192) #define ICE_LAST_OFFSET \ (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048) if (rx_buf->page_offset > ICE_LAST_OFFSET) return false; -#endif /* PAGE_SIZE < 8192) */ +#endif /* PAGE_SIZE >= 8192) */ /* If we have drained the page fragment pool we need to update * the pagecnt_bias and page count so that we fully restock the @@ -949,12 +948,7 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[ntc]; - rx_buf->pgcnt = -#if (PAGE_SIZE < 8192) - page_count(rx_buf->page); -#else - 0; -#endif + rx_buf->pgcnt = page_count(rx_buf->page); prefetchw(rx_buf->page); if (!size) -- cgit From b966ad832942b5a11e002f9b5ef102b08425b84a Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 7 Aug 2024 12:53:25 +0200 Subject: ice: fix ICE_LAST_OFFSET formula For bigger PAGE_SIZE archs, ice driver works on 3k Rx buffers. Therefore, ICE_LAST_OFFSET should take into account ICE_RXBUF_3072, not ICE_RXBUF_2048. Fixes: 7237f5b0dba4 ("ice: introduce legacy Rx flag") Suggested-by: Luiz Capitulino Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 50211188c1a7..4b690952bb40 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -842,7 +842,7 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) return false; #if (PAGE_SIZE >= 8192) #define ICE_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048) + (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_3072) if (rx_buf->page_offset > ICE_LAST_OFFSET) return false; #endif /* PAGE_SIZE >= 8192) */ -- cgit From d53d4dcce69be5773e2d0878c9899ebfbf58c393 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 7 Aug 2024 12:53:26 +0200 Subject: ice: fix truesize operations for PAGE_SIZE >= 8192 When working on multi-buffer packet on arch that has PAGE_SIZE >= 8192, truesize is calculated and stored in xdp_buff::frame_sz per each processed Rx buffer. This means that frame_sz will contain the truesize based on last received buffer, but commit 1dc1a7e7f410 ("ice: Centrallize Rx buffer recycling") assumed this value will be constant for each buffer, which breaks the page recycling scheme and mess up the way we update the page::page_offset. To fix this, let us work on constant truesize when PAGE_SIZE >= 8192 instead of basing this on size of a packet read from Rx descriptor. This way we can simplify the code and avoid calculating truesize per each received frame and on top of that when using xdp_update_skb_shared_info(), current formula for truesize update will be valid. This means ice_rx_frame_truesize() can be removed altogether. Furthermore, first call to it within ice_clean_rx_irq() for 4k PAGE_SIZE was redundant as xdp_buff::frame_sz is initialized via xdp_init_buff() in ice_vsi_cfg_rxq(). This should have been removed at the point where xdp_buff struct started to be a member of ice_rx_ring and it was no longer a stack based variable. There are two fixes tags as my understanding is that the first one exposed us to broken truesize and page_offset handling and then second introduced broken skb_shared_info update in ice_{construct,build}_skb(). Reported-and-tested-by: Luiz Capitulino Closes: https://lore.kernel.org/netdev/8f9e2a5c-fd30-4206-9311-946a06d031bb@redhat.com/ Fixes: 1dc1a7e7f410 ("ice: Centrallize Rx buffer recycling") Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_base.c | 21 +++++++++++++++++++- drivers/net/ethernet/intel/ice/ice_txrx.c | 33 ------------------------------- 2 files changed, 20 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 1facf179a96f..f448d3a84564 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -512,6 +512,25 @@ static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring) xsk_pool_fill_cb(ring->xsk_pool, &desc); } +/** + * ice_get_frame_sz - calculate xdp_buff::frame_sz + * @rx_ring: the ring being configured + * + * Return frame size based on underlying PAGE_SIZE + */ +static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring) +{ + unsigned int frame_sz; + +#if (PAGE_SIZE >= 8192) + frame_sz = rx_ring->rx_buf_len; +#else + frame_sz = ice_rx_pg_size(rx_ring) / 2; +#endif + + return frame_sz; +} + /** * ice_vsi_cfg_rxq - Configure an Rx queue * @ring: the ring being configured @@ -576,7 +595,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) } } - xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq); + xdp_init_buff(&ring->xdp, ice_get_frame_sz(ring), &ring->xdp_rxq); ring->xdp.data = NULL; ring->xdp_ext.pkt_ctx = &ring->pkt_ctx; err = ice_setup_rx_ctx(ring); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 4b690952bb40..c9bc3f1add5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -521,30 +521,6 @@ err: return -ENOMEM; } -/** - * ice_rx_frame_truesize - * @rx_ring: ptr to Rx ring - * @size: size - * - * calculate the truesize with taking into the account PAGE_SIZE of - * underlying arch - */ -static unsigned int -ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size) -{ - unsigned int truesize; - -#if (PAGE_SIZE < 8192) - truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ -#else - truesize = rx_ring->rx_offset ? - SKB_DATA_ALIGN(rx_ring->rx_offset + size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : - SKB_DATA_ALIGN(size); -#endif - return truesize; -} - /** * ice_run_xdp - Executes an XDP program on initialized xdp_buff * @rx_ring: Rx ring @@ -1154,11 +1130,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) bool failure; u32 first; - /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ -#if (PAGE_SIZE < 8192) - xdp->frame_sz = ice_rx_frame_truesize(rx_ring, 0); -#endif - xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (xdp_prog) { xdp_ring = rx_ring->xdp_ring; @@ -1217,10 +1188,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) hard_start = page_address(rx_buf->page) + rx_buf->page_offset - offset; xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); -#if (PAGE_SIZE > 4096) - /* At larger PAGE_SIZE, frame_sz depend on len size */ - xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size); -#endif xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { break; -- cgit From 503ab6ee40fc103ea55cc9e50bb879e571d65aac Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 19 Aug 2024 09:17:42 +0200 Subject: ice: use internal pf id instead of function number Use always the same pf id in devlink port number. When doing pass-through the PF to VM bus info func number can be any value. Fixes: 2ae0aa4758b0 ("ice: Move devlink port to PF/VF struct") Reviewed-by: Wojciech Drewek Suggested-by: Jiri Pirko Signed-off-by: Michal Swiatkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/devlink/devlink_port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c index 00fed5a61d62..62ef8e2fb5f1 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c @@ -337,7 +337,7 @@ int ice_devlink_create_pf_port(struct ice_pf *pf) return -EIO; attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = pf->hw.bus.func; + attrs.phys.port_number = pf->hw.pf_id; /* As FW supports only port split options for whole device, * set port split options only for first PF. @@ -455,7 +455,7 @@ int ice_devlink_create_vf_port(struct ice_vf *vf) return -EINVAL; attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF; - attrs.pci_vf.pf = pf->hw.bus.func; + attrs.pci_vf.pf = pf->hw.pf_id; attrs.pci_vf.vf = vf->vf_id; ice_devlink_set_switch_id(pf, &attrs.switch_id); -- cgit From c50e7475961c36ec4d21d60af055b32f9436b431 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 17 Aug 2024 09:52:46 +0300 Subject: dpaa2-switch: Fix error checking in dpaa2_switch_seed_bp() The dpaa2_switch_add_bufs() function returns the number of bufs that it was able to add. It returns BUFS_PER_CMD (7) for complete success or a smaller number if there are not enough pages available. However, the error checking is looking at the total number of bufs instead of the number which were added on this iteration. Thus the error checking only works correctly for the first iteration through the loop and subsequent iterations are always counted as a success. Fix this by checking only the bufs added in the current iteration. Fixes: 0b1b71370458 ("staging: dpaa2-switch: handle Rx path on control interface") Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Reviewed-by: Ioana Ciornei Tested-by: Ioana Ciornei Link: https://patch.msgid.link/eec27f30-b43f-42b6-b8ee-04a6f83423b6@stanley.mountain Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index a71f848adc05..a293b08f36d4 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -2638,13 +2638,14 @@ static int dpaa2_switch_refill_bp(struct ethsw_core *ethsw) static int dpaa2_switch_seed_bp(struct ethsw_core *ethsw) { - int *count, i; + int *count, ret, i; for (i = 0; i < DPAA2_ETHSW_NUM_BUFS; i += BUFS_PER_CMD) { + ret = dpaa2_switch_add_bufs(ethsw, ethsw->bpid); count = ðsw->buf_count; - *count += dpaa2_switch_add_bufs(ethsw, ethsw->bpid); + *count += ret; - if (unlikely(*count < BUFS_PER_CMD)) + if (unlikely(ret < BUFS_PER_CMD)) return -ENOMEM; } -- cgit From 80a1e7b83bb1834b5568a3872e64c05795d88f31 Mon Sep 17 00:00:00 2001 From: Nikolay Kuratov Date: Mon, 19 Aug 2024 10:54:08 +0300 Subject: cxgb4: add forgotten u64 ivlan cast before shift It is done everywhere in cxgb4 code, e.g. in is_filter_exact_match() There is no reason it should not be done here Found by Linux Verification Center (linuxtesting.org) with SVACE Signed-off-by: Nikolay Kuratov Cc: stable@vger.kernel.org Fixes: 12b276fbf6e0 ("cxgb4: add support to create hash filters") Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20240819075408.92378-1-kniv@yandex-team.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 786ceae34488..dd9e68465e69 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1244,7 +1244,8 @@ static u64 hash_filter_ntuple(struct ch_filter_specification *fs, * in the Compressed Filter Tuple. */ if (tp->vlan_shift >= 0 && fs->mask.ivlan) - ntuple |= (FT_VLAN_VLD_F | fs->val.ivlan) << tp->vlan_shift; + ntuple |= (u64)(FT_VLAN_VLD_F | + fs->val.ivlan) << tp->vlan_shift; if (tp->port_shift >= 0 && fs->mask.iport) ntuple |= (u64)fs->val.iport << tp->port_shift; -- cgit From 8aba27c4a5020abdf60149239198297f88338a8d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 16 Aug 2024 17:20:34 +0200 Subject: igb: cope with large MAX_SKB_FRAGS Sabrina reports that the igb driver does not cope well with large MAX_SKB_FRAG values: setting MAX_SKB_FRAG to 45 causes payload corruption on TX. An easy reproducer is to run ssh to connect to the machine. With MAX_SKB_FRAGS=17 it works, with MAX_SKB_FRAGS=45 it fails. This has been reported originally in https://bugzilla.redhat.com/show_bug.cgi?id=2265320 The root cause of the issue is that the driver does not take into account properly the (possibly large) shared info size when selecting the ring layout, and will try to fit two packets inside the same 4K page even when the 1st fraglist will trump over the 2nd head. Address the issue by checking if 2K buffers are insufficient. Fixes: 3948b05950fd ("net: introduce a config option to tweak MAX_SKB_FRAGS") Reported-by: Jan Tluka Reported-by: Jirka Hladky Reported-by: Sabrina Dubroca Tested-by: Sabrina Dubroca Tested-by: Corinna Vinschen Signed-off-by: Paolo Abeni Signed-off-by: Corinna Vinschen Link: https://patch.msgid.link/20240816152034.1453285-1-vinschen@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igb/igb_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 11be39f435f3..33a42b4c21e0 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4808,6 +4808,7 @@ static void igb_set_rx_buffer_len(struct igb_adapter *adapter, #if (PAGE_SIZE < 8192) if (adapter->max_frame_size > IGB_MAX_FRAME_BUILD_SKB || + IGB_2K_TOO_SMALL_WITH_PADDING || rd32(E1000_RCTL) & E1000_RCTL_SBP) set_ring_uses_large_buffer(rx_ring); #endif -- cgit From 6efea5135417ae8194485d1d05ea79a21cf1a11c Mon Sep 17 00:00:00 2001 From: Martin Whitaker Date: Sat, 17 Aug 2024 10:41:41 +0100 Subject: net: dsa: microchip: fix PTP config failure when using multiple ports When performing the port_hwtstamp_set operation, ptp_schedule_worker() will be called if hardware timestamoing is enabled on any of the ports. When using multiple ports for PTP, port_hwtstamp_set is executed for each port. When called for the first time ptp_schedule_worker() returns 0. On subsequent calls it returns 1, indicating the worker is already scheduled. Currently the ksz driver treats 1 as an error and fails to complete the port_hwtstamp_set operation, thus leaving the timestamping configuration for those ports unchanged. This patch fixes this by ignoring the ptp_schedule_worker() return value. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/7aae307a-35ca-4209-a850-7b2749d40f90@martin-whitaker.me.uk Fixes: bb01ad30570b0 ("net: dsa: microchip: ptp: manipulating absolute time using ptp hw clock") Signed-off-by: Martin Whitaker Reviewed-by: Andrew Lunn Acked-by: Arun Ramadoss Link: https://patch.msgid.link/20240817094141.3332-1-foss@martin-whitaker.me.uk Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_ptp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_ptp.c b/drivers/net/dsa/microchip/ksz_ptp.c index f0bd46e5d4ec..050f17c43ef6 100644 --- a/drivers/net/dsa/microchip/ksz_ptp.c +++ b/drivers/net/dsa/microchip/ksz_ptp.c @@ -266,7 +266,6 @@ static int ksz_ptp_enable_mode(struct ksz_device *dev) struct ksz_port *prt; struct dsa_port *dp; bool tag_en = false; - int ret; dsa_switch_for_each_user_port(dp, dev->ds) { prt = &dev->ports[dp->index]; @@ -277,9 +276,7 @@ static int ksz_ptp_enable_mode(struct ksz_device *dev) } if (tag_en) { - ret = ptp_schedule_worker(ptp_data->clock, 0); - if (ret) - return ret; + ptp_schedule_worker(ptp_data->clock, 0); } else { ptp_cancel_worker_sync(ptp_data->clock); } -- cgit From b8673d56935c32a4e0a1a0b40951fdd313dbf340 Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Thu, 15 Aug 2024 07:49:30 +0000 Subject: rust: kbuild: fix export of bss symbols Symbols in the bss segment are not currently exported. This is a problem for Rust modules that link against statics, that are resident in the kernel image. Thus export symbols in the bss segment. Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Signed-off-by: Andreas Hindborg Reviewed-by: Alice Ryhl Tested-by: Alice Ryhl Reviewed-by: Gary Guo Link: https://lore.kernel.org/r/20240815074519.2684107-2-nmi@metaspace.dk [ Reworded slightly. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/Makefile b/rust/Makefile index 8de3ebba9551..f168d2c98a15 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -305,7 +305,7 @@ $(obj)/bindings/bindings_helpers_generated.rs: $(src)/helpers.c FORCE quiet_cmd_exports = EXPORTS $@ cmd_exports = \ $(NM) -p --defined-only $< \ - | awk '/ (T|R|D) / {printf "EXPORT_SYMBOL_RUST_GPL(%s);\n",$$3}' > $@ + | awk '/ (T|R|D|B) / {printf "EXPORT_SYMBOL_RUST_GPL(%s);\n",$$3}' > $@ $(obj)/exports_core_generated.h: $(obj)/core.o FORCE $(call if_changed,exports) -- cgit From 528876d867a23b5198022baf2e388052ca67c952 Mon Sep 17 00:00:00 2001 From: Joseph Huang Date: Mon, 19 Aug 2024 19:52:50 -0400 Subject: net: dsa: mv88e6xxx: Fix out-of-bound access If an ATU violation was caused by a CPU Load operation, the SPID could be larger than DSA_MAX_PORTS (the size of mv88e6xxx_chip.ports[] array). Fixes: 75c05a74e745 ("net: dsa: mv88e6xxx: Fix counting of ATU violations") Signed-off-by: Joseph Huang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20240819235251.1331763-1-Joseph.Huang@garmin.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/global1_atu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c index ce3b3690c3c0..c47f068f56b3 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_atu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c @@ -457,7 +457,8 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) trace_mv88e6xxx_atu_full_violation(chip->dev, spid, entry.portvec, entry.mac, fid); - chip->ports[spid].atu_full_violation++; + if (spid < ARRAY_SIZE(chip->ports)) + chip->ports[spid].atu_full_violation++; } return IRQ_HANDLED; -- cgit From c07ff8592d57ed258afee5a5e04991a48dbaf382 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 19 Aug 2024 10:56:45 -0700 Subject: netem: fix return value if duplicate enqueue fails There is a bug in netem_enqueue() introduced by commit 5845f706388a ("net: netem: fix skb length BUG_ON in __skb_to_sgvec") that can lead to a use-after-free. This commit made netem_enqueue() always return NET_XMIT_SUCCESS when a packet is duplicated, which can cause the parent qdisc's q.qlen to be mistakenly incremented. When this happens qlen_notify() may be skipped on the parent during destruction, leaving a dangling pointer for some classful qdiscs like DRR. There are two ways for the bug happen: - If the duplicated packet is dropped by rootq->enqueue() and then the original packet is also dropped. - If rootq->enqueue() sends the duplicated packet to a different qdisc and the original packet is dropped. In both cases NET_XMIT_SUCCESS is returned even though no packets are enqueued at the netem qdisc. The fix is to defer the enqueue of the duplicate packet until after the original packet has been guaranteed to return NET_XMIT_SUCCESS. Fixes: 5845f706388a ("net: netem: fix skb length BUG_ON in __skb_to_sgvec") Reported-by: Budimir Markovic Signed-off-by: Stephen Hemminger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240819175753.5151-1-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 47 +++++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index edc72962ae63..0f8d581438c3 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -446,12 +446,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct netem_sched_data *q = qdisc_priv(sch); /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; - struct sk_buff *skb2; + struct sk_buff *skb2 = NULL; struct sk_buff *segs = NULL; unsigned int prev_len = qdisc_pkt_len(skb); int count = 1; - int rc = NET_XMIT_SUCCESS; - int rc_drop = NET_XMIT_DROP; /* Do not fool qdisc_drop_all() */ skb->prev = NULL; @@ -480,19 +478,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, skb_orphan_partial(skb); /* - * If we need to duplicate packet, then re-insert at top of the - * qdisc tree, since parent queuer expects that only one - * skb will be queued. + * If we need to duplicate packet, then clone it before + * original is modified. */ - if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { - struct Qdisc *rootq = qdisc_root_bh(sch); - u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ - - q->duplicate = 0; - rootq->enqueue(skb2, rootq, to_free); - q->duplicate = dupsave; - rc_drop = NET_XMIT_SUCCESS; - } + if (count > 1) + skb2 = skb_clone(skb, GFP_ATOMIC); /* * Randomized packet corruption. @@ -504,7 +494,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (skb_is_gso(skb)) { skb = netem_segment(skb, sch, to_free); if (!skb) - return rc_drop; + goto finish_segs; + segs = skb->next; skb_mark_not_on_list(skb); qdisc_skb_cb(skb)->pkt_len = skb->len; @@ -530,7 +521,24 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* re-link segs, so that qdisc_drop_all() frees them all */ skb->next = segs; qdisc_drop_all(skb, sch, to_free); - return rc_drop; + if (skb2) + __qdisc_drop(skb2, to_free); + return NET_XMIT_DROP; + } + + /* + * If doing duplication then re-insert at top of the + * qdisc tree, since parent queuer expects that only one + * skb will be queued. + */ + if (skb2) { + struct Qdisc *rootq = qdisc_root_bh(sch); + u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ + + q->duplicate = 0; + rootq->enqueue(skb2, rootq, to_free); + q->duplicate = dupsave; + skb2 = NULL; } qdisc_qstats_backlog_inc(sch, skb); @@ -601,9 +609,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, } finish_segs: + if (skb2) + __qdisc_drop(skb2, to_free); + if (segs) { unsigned int len, last_len; - int nb; + int rc, nb; len = skb ? skb->len : 0; nb = skb ? 1 : 0; -- cgit From 0005e01e1e875c5e27130c5e2ed0189749d1e08a Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 20 Aug 2024 16:56:19 +0800 Subject: erofs: fix out-of-bound access when z_erofs_gbuf_growsize() partially fails If z_erofs_gbuf_growsize() partially fails on a global buffer due to memory allocation failure or fault injection (as reported by syzbot [1]), new pages need to be freed by comparing to the existing pages to avoid memory leaks. However, the old gbuf->pages[] array may not be large enough, which can lead to null-ptr-deref or out-of-bound access. Fix this by checking against gbuf->nrpages in advance. [1] https://lore.kernel.org/r/000000000000f7b96e062018c6e3@google.com Reported-by: syzbot+242ee56aaa9585553766@syzkaller.appspotmail.com Fixes: d6db47e571dc ("erofs: do not use pagepool in z_erofs_gbuf_growsize()") Cc: # 6.10+ Reviewed-by: Chunhai Guo Reviewed-by: Sandeep Dhavale Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20240820085619.1375963-1-hsiangkao@linux.alibaba.com --- fs/erofs/zutil.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c index 9b53883e5caf..37afe2024840 100644 --- a/fs/erofs/zutil.c +++ b/fs/erofs/zutil.c @@ -111,7 +111,8 @@ int z_erofs_gbuf_growsize(unsigned int nrpages) out: if (i < z_erofs_gbuf_count && tmp_pages) { for (j = 0; j < nrpages; ++j) - if (tmp_pages[j] && tmp_pages[j] != gbuf->pages[j]) + if (tmp_pages[j] && (j >= gbuf->nrpages || + tmp_pages[j] != gbuf->pages[j])) __free_page(tmp_pages[j]); kfree(tmp_pages); } -- cgit From e255683c06df572ead96db5efb5d21be30c0efaa Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:19 +0200 Subject: mptcp: pm: re-using ID of unused removed ADD_ADDR If no subflow is attached to the 'signal' endpoint that is being removed, the addr ID will not be marked as available again. Mark the linked ID as available when removing the address entry from the list to cover this case. Fixes: b6c08380860b ("mptcp: remove addr and subflow in PM netlink") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-1-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 4cae2aa7be5c..26f0329e16bb 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1431,7 +1431,10 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, ret = remove_anno_list_by_saddr(msk, addr); if (ret || force) { spin_lock_bh(&msk->pm.lock); - msk->pm.add_addr_signaled -= ret; + if (ret) { + __set_bit(addr->id, msk->pm.id_avail_bitmap); + msk->pm.add_addr_signaled--; + } mptcp_pm_remove_addr(msk, &list); spin_unlock_bh(&msk->pm.lock); } -- cgit From a13d5aad4dd9a309eecdc33cfd75045bd5f376a3 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:20 +0200 Subject: selftests: mptcp: join: check re-using ID of unused ADD_ADDR This test extends "delete re-add signal" to validate the previous commit. An extra address is announced by the server, but this address cannot be used by the client. The result is that no subflow will be established to this address. Later, the server will delete this extra endpoint, and set a new one, with a valid address, but re-using the same ID. Before the previous commit, the server would not have been able to announce this new address. While at it, extra checks have been added to validate the expected numbers of MPJ, ADD_ADDR and RM_ADDR. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: b6c08380860b ("mptcp: remove addr and subflow in PM netlink") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-2-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9ea6d698e9d3..25077ccf31d2 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3601,9 +3601,11 @@ endpoint_tests() # remove and re-add if reset "delete re-add signal" && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then - pm_nl_set_limits $ns1 1 1 - pm_nl_set_limits $ns2 1 1 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal test_linkfail=4 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & local tests_pid=$! @@ -3615,15 +3617,21 @@ endpoint_tests() chk_mptcp_info subflows 1 subflows 1 pm_nl_del_endpoint $ns1 1 10.0.2.1 + pm_nl_del_endpoint $ns1 2 224.0.0.1 sleep 0.5 chk_subflow_nr "after delete" 1 chk_mptcp_info subflows 0 subflows 0 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal wait_mpj $ns2 - chk_subflow_nr "after re-add" 2 - chk_mptcp_info subflows 1 subflows 1 + chk_subflow_nr "after re-add" 3 + chk_mptcp_info subflows 2 subflows 2 mptcp_lib_kill_wait $tests_pid + + chk_join_nr 3 3 3 + chk_add_nr 4 4 + chk_rm_nr 2 1 invert fi } -- cgit From edd8b5d868a4d459f3065493001e293901af758d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:21 +0200 Subject: mptcp: pm: re-using ID of unused removed subflows If no subflow is attached to the 'subflow' endpoint that is being removed, the addr ID will not be marked as available again. Mark the linked ID as available when removing the 'subflow' endpoint if no subflow is attached to it. While at it, the local_addr_used counter is decremented if the ID was marked as being used to reflect the reality, but also to allow adding new endpoints after that. Fixes: b6c08380860b ("mptcp: remove addr and subflow in PM netlink") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-3-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 26f0329e16bb..8b232a210a06 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1469,8 +1469,17 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr); mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); - if (remove_subflow) + + if (remove_subflow) { mptcp_pm_remove_subflow(msk, &list); + } else if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + /* If the subflow has been used, but now closed */ + spin_lock_bh(&msk->pm.lock); + if (!__test_and_set_bit(entry->addr.id, msk->pm.id_avail_bitmap)) + msk->pm.local_addr_used--; + spin_unlock_bh(&msk->pm.lock); + } + release_sock(sk); next: -- cgit From 65fb58afa341ad68e71e5c4d816b407e6a683a66 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:22 +0200 Subject: selftests: mptcp: join: check re-using ID of closed subflow This test extends "delete and re-add" to validate the previous commit. A new 'subflow' endpoint is added, but the subflow request will be rejected. The result is that no subflow will be established from this address. Later, the endpoint is removed and re-added after having cleared the firewall rule. Before the previous commit, the client would not have been able to create this new subflow. While at it, extra checks have been added to validate the expected numbers of MPJ and RM_ADDR. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: b6c08380860b ("mptcp: remove addr and subflow in PM netlink") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-4-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 27 ++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 25077ccf31d2..fbb0174145ad 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -436,9 +436,10 @@ reset_with_tcp_filter() local ns="${!1}" local src="${2}" local target="${3}" + local chain="${4:-INPUT}" if ! ip netns exec "${ns}" ${iptables} \ - -A INPUT \ + -A "${chain}" \ -s "${src}" \ -p tcp \ -j "${target}"; then @@ -3571,10 +3572,10 @@ endpoint_tests() mptcp_lib_kill_wait $tests_pid fi - if reset "delete and re-add" && + if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then - pm_nl_set_limits $ns1 1 1 - pm_nl_set_limits $ns2 1 1 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow test_linkfail=4 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & @@ -3591,11 +3592,27 @@ endpoint_tests() chk_subflow_nr "after delete" 1 chk_mptcp_info subflows 0 subflows 0 - pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow wait_mpj $ns2 chk_subflow_nr "after re-add" 2 chk_mptcp_info subflows 1 subflows 1 + + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_attempt_fail $ns2 + chk_subflow_nr "after new reject" 2 + chk_mptcp_info subflows 1 subflows 1 + + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_del_endpoint $ns2 3 10.0.3.2 + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after no reject" 3 + chk_mptcp_info subflows 2 subflows 2 + mptcp_lib_kill_wait $tests_pid + + chk_join_nr 3 3 3 + chk_rm_nr 1 1 fi # remove and re-add -- cgit From ef34a6ea0cab1800f4b3c9c3c2cefd5091e03379 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:23 +0200 Subject: mptcp: pm: re-using ID of unused flushed subflows If no subflows are attached to the 'subflow' endpoints that are being flushed, the corresponding addr IDs will not be marked as available again. Mark all ID as being available when flushing all the 'subflow' endpoints, and reset local_addr_used counter to cover these cases. Note that mptcp_pm_remove_addrs_and_subflows() helper is only called for flushing operations, not to remove a specific set of addresses and subflows. Fixes: 06faa2271034 ("mptcp: remove multi addresses and subflows in PM") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-5-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 8b232a210a06..2c26696b820e 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1623,8 +1623,15 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, mptcp_pm_remove_addr(msk, &alist); spin_unlock_bh(&msk->pm.lock); } + if (slist.nr) mptcp_pm_remove_subflow(msk, &slist); + + /* Reset counters: maybe some subflows have been removed before */ + spin_lock_bh(&msk->pm.lock); + bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + msk->pm.local_addr_used = 0; + spin_unlock_bh(&msk->pm.lock); } static void mptcp_nl_remove_addrs_list(struct net *net, -- cgit From e06959e9eebdfea4654390f53b65cff57691872e Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:24 +0200 Subject: selftests: mptcp: join: test for flush/re-add endpoints After having flushed endpoints that didn't cause the creation of new subflows, it is important to check endpoints can be re-created, re-using previously used IDs. Before the previous commit, the client would not have been able to re-create the subflow that was previously rejected. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 06faa2271034 ("mptcp: remove multi addresses and subflows in PM") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-6-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index fbb0174145ad..f609c02c6123 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3651,6 +3651,36 @@ endpoint_tests() chk_rm_nr 2 1 invert fi + # flush and re-add + if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + test_linkfail=4 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + + wait_attempt_fail $ns2 + chk_subflow_nr "before flush" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_flush_endpoint $ns2 + pm_nl_flush_endpoint $ns1 + wait_rm_addr $ns2 0 + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal + wait_mpj $ns2 + mptcp_lib_kill_wait $tests_pid + + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_rm_nr 1 0 invert + fi } # [$1: error message] -- cgit From f448451aa62d54be16acb0034223c17e0d12bc69 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:25 +0200 Subject: mptcp: pm: remove mptcp_pm_remove_subflow() This helper is confusing. It is in pm.c, but it is specific to the in-kernel PM and it cannot be used by the userspace one. Also, it simply calls one in-kernel specific function with the PM lock, while the similar mptcp_pm_remove_addr() helper requires the PM lock. What's left is the pr_debug(), which is not that useful, because a similar one is present in the only function called by this helper: mptcp_pm_nl_rm_subflow_received() After these modifications, this helper can be marked as 'static', and the lock can be taken only once in mptcp_pm_flush_addrs_and_subflows(). Note that it is not a bug fix, but it will help backporting the following commits. Fixes: 0ee4261a3681 ("mptcp: implement mptcp_pm_remove_subflow") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-7-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 10 ---------- net/mptcp/pm_netlink.c | 16 +++++++--------- net/mptcp/protocol.h | 3 --- 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 23bb89c94e90..925123e99889 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -60,16 +60,6 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ return 0; } -int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) -{ - pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); - - spin_lock_bh(&msk->pm.lock); - mptcp_pm_nl_rm_subflow_received(msk, rm_list); - spin_unlock_bh(&msk->pm.lock); - return 0; -} - /* path manager event handlers */ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 2c26696b820e..44fc1c5959ac 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -857,8 +857,8 @@ static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); } -void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list) +static void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list) { mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); } @@ -1471,7 +1471,9 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); if (remove_subflow) { - mptcp_pm_remove_subflow(msk, &list); + spin_lock_bh(&msk->pm.lock); + mptcp_pm_nl_rm_subflow_received(msk, &list); + spin_unlock_bh(&msk->pm.lock); } else if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { /* If the subflow has been used, but now closed */ spin_lock_bh(&msk->pm.lock); @@ -1617,18 +1619,14 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, alist.ids[alist.nr++] = entry->addr.id; } + spin_lock_bh(&msk->pm.lock); if (alist.nr) { - spin_lock_bh(&msk->pm.lock); msk->pm.add_addr_signaled -= alist.nr; mptcp_pm_remove_addr(msk, &alist); - spin_unlock_bh(&msk->pm.lock); } - if (slist.nr) - mptcp_pm_remove_subflow(msk, &slist); - + mptcp_pm_nl_rm_subflow_received(msk, &slist); /* Reset counters: maybe some subflows have been removed before */ - spin_lock_bh(&msk->pm.lock); bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); msk->pm.local_addr_used = 0; spin_unlock_bh(&msk->pm.lock); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 60c6b073d65f..a1c1b0ff1ce1 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1026,7 +1026,6 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); -int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); void mptcp_free_local_addr_list(struct mptcp_sock *msk); @@ -1133,8 +1132,6 @@ static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflo void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); -void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list); unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); -- cgit From 322ea3778965da72862cca2a0c50253aacf65fe6 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:26 +0200 Subject: mptcp: pm: only mark 'subflow' endp as available Adding the following warning ... WARN_ON_ONCE(msk->pm.local_addr_used == 0) ... before decrementing the local_addr_used counter helped to find a bug when running the "remove single address" subtest from the mptcp_join.sh selftests. Removing a 'signal' endpoint will trigger the removal of all subflows linked to this endpoint via mptcp_pm_nl_rm_addr_or_subflow() with rm_type == MPTCP_MIB_RMSUBFLOW. This will decrement the local_addr_used counter, which is wrong in this case because this counter is linked to 'subflow' endpoints, and here it is a 'signal' endpoint that is being removed. Now, the counter is decremented, only if the ID is being used outside of mptcp_pm_nl_rm_addr_or_subflow(), only for 'subflow' endpoints, and if the ID is not 0 -- local_addr_used is not taking into account these ones. This marking of the ID as being available, and the decrement is done no matter if a subflow using this ID is currently available, because the subflow could have been closed before. Fixes: 06faa2271034 ("mptcp: remove multi addresses and subflows in PM") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-8-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 44fc1c5959ac..4cf7cc851f80 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -833,10 +833,10 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, if (rm_type == MPTCP_MIB_RMSUBFLOW) __MPTCP_INC_STATS(sock_net(sk), rm_type); } - if (rm_type == MPTCP_MIB_RMSUBFLOW) - __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap); - else if (rm_type == MPTCP_MIB_RMADDR) + + if (rm_type == MPTCP_MIB_RMADDR) __MPTCP_INC_STATS(sock_net(sk), rm_type); + if (!removed) continue; @@ -846,8 +846,6 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, if (rm_type == MPTCP_MIB_RMADDR) { msk->pm.add_addr_accepted--; WRITE_ONCE(msk->pm.accept_addr, true); - } else if (rm_type == MPTCP_MIB_RMSUBFLOW) { - msk->pm.local_addr_used--; } } } @@ -1441,6 +1439,14 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, return ret; } +static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) +{ + /* If it was marked as used, and not ID 0, decrement local_addr_used */ + if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && + id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) + msk->pm.local_addr_used--; +} + static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, const struct mptcp_pm_addr_entry *entry) { @@ -1474,11 +1480,11 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, spin_lock_bh(&msk->pm.lock); mptcp_pm_nl_rm_subflow_received(msk, &list); spin_unlock_bh(&msk->pm.lock); - } else if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { - /* If the subflow has been used, but now closed */ + } + + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { spin_lock_bh(&msk->pm.lock); - if (!__test_and_set_bit(entry->addr.id, msk->pm.id_avail_bitmap)) - msk->pm.local_addr_used--; + __mark_subflow_endp_available(msk, list.ids[0]); spin_unlock_bh(&msk->pm.lock); } @@ -1516,6 +1522,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net, spin_lock_bh(&msk->pm.lock); mptcp_pm_remove_addr(msk, &list); mptcp_pm_nl_rm_subflow_received(msk, &list); + __mark_subflow_endp_available(msk, 0); spin_unlock_bh(&msk->pm.lock); release_sock(sk); @@ -1917,6 +1924,7 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); mptcp_pm_nl_rm_subflow_received(msk, &list); + __mark_subflow_endp_available(msk, list.ids[0]); mptcp_pm_create_subflow_or_signal_addr(msk); spin_unlock_bh(&msk->pm.lock); } -- cgit From 1c1f721375989579e46741f59523e39ec9b2a9bd Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:27 +0200 Subject: mptcp: pm: only decrement add_addr_accepted for MPJ req Adding the following warning ... WARN_ON_ONCE(msk->pm.add_addr_accepted == 0) ... before decrementing the add_addr_accepted counter helped to find a bug when running the "remove single subflow" subtest from the mptcp_join.sh selftest. Removing a 'subflow' endpoint will first trigger a RM_ADDR, then the subflow closure. Before this patch, and upon the reception of the RM_ADDR, the other peer will then try to decrement this add_addr_accepted. That's not correct because the attached subflows have not been created upon the reception of an ADD_ADDR. A way to solve that is to decrement the counter only if the attached subflow was an MP_JOIN to a remote id that was not 0, and initiated by the host receiving the RM_ADDR. Fixes: d0876b2284cf ("mptcp: add the incoming RM_ADDR support") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-9-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 4cf7cc851f80..882781571c7b 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -829,7 +829,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); - removed = true; + removed |= subflow->request_join; if (rm_type == MPTCP_MIB_RMSUBFLOW) __MPTCP_INC_STATS(sock_net(sk), rm_type); } @@ -843,7 +843,11 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, if (!mptcp_pm_is_kernel(msk)) continue; - if (rm_type == MPTCP_MIB_RMADDR) { + if (rm_type == MPTCP_MIB_RMADDR && rm_id && + !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { + /* Note: if the subflow has been closed before, this + * add_addr_accepted counter will not be decremented. + */ msk->pm.add_addr_accepted--; WRITE_ONCE(msk->pm.accept_addr, true); } -- cgit From 0137a3c7c2ea3f9df8ebfc65d78b4ba712a187bb Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:28 +0200 Subject: mptcp: pm: check add_addr_accept_max before accepting new ADD_ADDR The limits might have changed in between, it is best to check them before accepting new ADD_ADDR. Fixes: d0876b2284cf ("mptcp: add the incoming RM_ADDR support") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-10-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 882781571c7b..28a9a3726146 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -848,8 +848,8 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, /* Note: if the subflow has been closed before, this * add_addr_accepted counter will not be decremented. */ - msk->pm.add_addr_accepted--; - WRITE_ONCE(msk->pm.accept_addr, true); + if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) + WRITE_ONCE(msk->pm.accept_addr, true); } } } -- cgit From ca6e55a703ca2894611bb5c5bca8bfd2290fd91e Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:29 +0200 Subject: mptcp: pm: only in-kernel cannot have entries with ID 0 The ID 0 is specific per MPTCP connections. The per netns entries cannot have this special ID 0 then. But that's different for the userspace PM where the entries are per connection, they can then use this special ID 0. Fixes: f40be0db0b76 ("mptcp: unify pm get_flags_and_ifindex_by_id") Cc: stable@vger.kernel.org Acked-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-11-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 3 --- net/mptcp/pm_netlink.c | 4 ++++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 925123e99889..3e6e0f5510bb 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -434,9 +434,6 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id *flags = 0; *ifindex = 0; - if (!id) - return 0; - if (mptcp_pm_is_userspace(msk)) return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 28a9a3726146..d0a80f537fc3 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1395,6 +1395,10 @@ int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int struct sock *sk = (struct sock *)msk; struct net *net = sock_net(sk); + /* No entries with ID 0 */ + if (id == 0) + return 0; + rcu_read_lock(); entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); if (entry) { -- cgit From 09355f7abb9fbfc1a240be029837921ea417bf4f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:30 +0200 Subject: mptcp: pm: fullmesh: select the right ID later When reacting upon the reception of an ADD_ADDR, the in-kernel PM first looks for fullmesh endpoints. If there are some, it will pick them, using their entry ID. It should set the ID 0 when using the endpoint corresponding to the initial subflow, it is a special case imposed by the MPTCP specs. Note that msk->mpc_endpoint_id might not be set when receiving the first ADD_ADDR from the server. So better to compare the addresses. Fixes: 1a0d6136c5f0 ("mptcp: local addresses fullmesh") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-12-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d0a80f537fc3..a2e37ab1c40f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -636,6 +636,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, { struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *entry; + struct mptcp_addr_info mpc_addr; struct pm_nl_pernet *pernet; unsigned int subflows_max; int i = 0; @@ -643,6 +644,8 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, pernet = pm_nl_get_pernet_from_msk(msk); subflows_max = mptcp_pm_get_subflows_max(msk); + mptcp_local_address((struct sock_common *)msk, &mpc_addr); + rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) @@ -653,7 +656,13 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, if (msk->pm.subflows < subflows_max) { msk->pm.subflows++; - addrs[i++] = entry->addr; + addrs[i] = entry->addr; + + /* Special case for ID0: set the correct ID */ + if (mptcp_addresses_equal(&entry->addr, &mpc_addr, entry->addr.port)) + addrs[i].id = 0; + + i++; } } rcu_read_unlock(); -- cgit From 4878f9f8421f4587bee7b232c1c8a9d3a7d4d782 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:31 +0200 Subject: selftests: mptcp: join: validate fullmesh endp on 1st sf This case was not covered, and the wrong ID was set before the previous commit. The rest is not modified, it is just that it will increase the code coverage. The right address ID can be verified by looking at the packet traces. We could automate that using Netfilter with some cBPF code for example, but that's always a bit cryptic. Packetdrill seems better fitted for that. Fixes: 4f49d63352da ("selftests: mptcp: add fullmesh testcases") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-13-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f609c02c6123..89e553e0e0c2 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3059,6 +3059,7 @@ fullmesh_tests() pm_nl_set_limits $ns1 1 3 pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh fullmesh=1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 -- cgit From 48e50dcbcbaaf713d82bf2da5c16aeced94ad07d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 19 Aug 2024 21:45:32 +0200 Subject: mptcp: pm: avoid possible UaF when selecting endp select_local_address() and select_signal_address() both select an endpoint entry from the list inside an RCU protected section, but return a reference to it, to be read later on. If the entry is dereferenced after the RCU unlock, reading info could cause a Use-after-Free. A simple solution is to copy the required info while inside the RCU protected section to avoid any risk of UaF later. The address ID might need to be modified later to handle the ID0 case later, so a copy seems OK to deal with. Reported-by: Paolo Abeni Closes: https://lore.kernel.org/45cd30d3-7710-491c-ae4d-a1368c00beb1@redhat.com Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-14-38035d40de5b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 64 +++++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a2e37ab1c40f..3e4ad801786f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -143,11 +143,13 @@ static bool lookup_subflow_by_daddr(const struct list_head *list, return false; } -static struct mptcp_pm_addr_entry * +static bool select_local_address(const struct pm_nl_pernet *pernet, - const struct mptcp_sock *msk) + const struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *new_entry) { - struct mptcp_pm_addr_entry *entry, *ret = NULL; + struct mptcp_pm_addr_entry *entry; + bool found = false; msk_owned_by_me(msk); @@ -159,17 +161,21 @@ select_local_address(const struct pm_nl_pernet *pernet, if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) continue; - ret = entry; + *new_entry = *entry; + found = true; break; } rcu_read_unlock(); - return ret; + + return found; } -static struct mptcp_pm_addr_entry * -select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk) +static bool +select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *new_entry) { - struct mptcp_pm_addr_entry *entry, *ret = NULL; + struct mptcp_pm_addr_entry *entry; + bool found = false; rcu_read_lock(); /* do not keep any additional per socket state, just signal @@ -184,11 +190,13 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk) if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) continue; - ret = entry; + *new_entry = *entry; + found = true; break; } rcu_read_unlock(); - return ret; + + return found; } unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) @@ -512,9 +520,10 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { - struct mptcp_pm_addr_entry *local, *signal_and_subflow = NULL; struct sock *sk = (struct sock *)msk; + struct mptcp_pm_addr_entry local; unsigned int add_addr_signal_max; + bool signal_and_subflow = false; unsigned int local_addr_max; struct pm_nl_pernet *pernet; unsigned int subflows_max; @@ -565,23 +574,22 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) return; - local = select_signal_address(pernet, msk); - if (!local) + if (!select_signal_address(pernet, msk, &local)) goto subflow; /* If the alloc fails, we are on memory pressure, not worth * continuing, and trying to create subflows. */ - if (!mptcp_pm_alloc_anno_list(msk, &local->addr)) + if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) return; - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); + __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; - mptcp_pm_announce_addr(msk, &local->addr, false); + mptcp_pm_announce_addr(msk, &local.addr, false); mptcp_pm_nl_addr_send_ack(msk); - if (local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) - signal_and_subflow = local; + if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) + signal_and_subflow = true; } subflow: @@ -592,26 +600,22 @@ subflow: bool fullmesh; int i, nr; - if (signal_and_subflow) { - local = signal_and_subflow; - signal_and_subflow = NULL; - } else { - local = select_local_address(pernet, msk); - if (!local) - break; - } + if (signal_and_subflow) + signal_and_subflow = false; + else if (!select_local_address(pernet, msk, &local)) + break; - fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); + fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); msk->pm.local_addr_used++; - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); - nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs); + __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); if (nr == 0) continue; spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) - __mptcp_subflow_connect(sk, &local->addr, &addrs[i]); + __mptcp_subflow_connect(sk, &local.addr, &addrs[i]); spin_lock_bh(&msk->pm.lock); } mptcp_pm_nl_check_work_pending(msk); -- cgit From 0b43312902d165c4c8429cd49e8c91479f52b7c4 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 13 Aug 2024 13:51:48 +0800 Subject: drm/amdgpu: fixing rlc firmware loading failure issue Skip rlc firmware validation to ignore firmware header size mismatch issues. This restores the workaround added in commit 849e133c973c ("drm/amdgpu: Fix the null pointer when load rlc firmware") Fixes: 3af2c80ae2f5 ("drm/amdgpu: refine gfx10 firmware loading") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3551 Signed-off-by: Yang Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 89ec85d16eb8110d88c273d1d34f1fe5a70ba8cc) --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 2957702fca0c..e444e621ddaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4116,6 +4116,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { + char fw_name[53]; char ucode_prefix[30]; const char *wks = ""; int err; @@ -4149,8 +4150,8 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); if (!amdgpu_sriov_vf(adev)) { - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, - "amdgpu/%s_rlc.bin", ucode_prefix); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; -- cgit From e3e4bf58bad1576ac732a1429f53e3d4bfb82b4b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 14 Aug 2024 10:28:24 -0400 Subject: drm/amdgpu/sdma5.2: limit wptr workaround to sdma 5.2.1 The workaround seems to cause stability issues on other SDMA 5.2.x IPs. Fixes: a03ebf116303 ("drm/amdgpu/sdma5.2: Update wptr registers as well as doorbell") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3556 Acked-by: Ruijing Dong Signed-off-by: Alex Deucher (cherry picked from commit 2dc3851ef7d9c5439ea8e9623fc36878f3b40649) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index af1e90159ce3..2e72d445415f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -176,14 +176,16 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - /* SDMA seems to miss doorbells sometimes when powergating kicks in. - * Updating the wptr directly will wake it. This is only safe because - * we disallow gfxoff in begin_use() and then allow it again in end_use(). - */ - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(5, 2, 1)) { + /* SDMA seems to miss doorbells sometimes when powergating kicks in. + * Updating the wptr directly will wake it. This is only safe because + * we disallow gfxoff in begin_use() and then allow it again in end_use(). + */ + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } } else { DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " -- cgit From c99769bceab4ecb6a067b9af11f9db281eea3e2a Mon Sep 17 00:00:00 2001 From: Candice Li Date: Thu, 15 Aug 2024 11:37:28 +0800 Subject: drm/amdgpu: Validate TA binary size Add TA binary size validation to avoid OOB write. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit c0a04e3570d72aaf090962156ad085e37c62e442) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 0c856005df6b..38face981c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -166,6 +166,9 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t if (ret) return -EFAULT; + if (ta_bin_len > PSP_1_MEG) + return -EINVAL; + copy_pos += sizeof(uint32_t); ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); -- cgit From 9cead81eff635e3b3cbce51b40228f3bdc6f2b8c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 19 Aug 2024 11:14:29 -0400 Subject: drm/amdgpu: fix eGPU hotplug regression The driver needs to wait for the on board firmware to finish its initialization before probing the card. Commit 959056982a9b ("drm/amdgpu: Fix discovery initialization failure during pci rescan") switched from using msleep() to using usleep_range() which seems to have caused init failures on some navi1x boards. Switch back to msleep(). Fixes: 959056982a9b ("drm/amdgpu: Fix discovery initialization failure during pci rescan") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3559 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3500 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: Ma Jun (cherry picked from commit c69b07f7bbc905022491c45097923d3487479529) Cc: stable@vger.kernel.org # 6.10.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ac108fca64fe..7b561e8e3caf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -278,7 +278,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, msg = RREG32(mmMP0_SMN_C2PMSG_33); if (msg & 0x80000000) break; - usleep_range(1000, 1100); + msleep(1); } } -- cgit From 822c8020aebcf5804a143b891e34f29873fee5e2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 20 Aug 2024 13:03:58 +1000 Subject: ata: pata_macio: Fix DMA table overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kolbjørn and Jonáš reported that their 32-bit PowerMacs were crashing in pata-macio since commit 09fe2bfa6b83 ("ata: pata_macio: Fix max_segment_size with PAGE_SIZE == 64K"). For example: kernel BUG at drivers/ata/pata_macio.c:544! Oops: Exception in kernel mode, sig: 5 [#1] BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 DEBUG_PAGEALLOC PowerMac ... NIP pata_macio_qc_prep+0xf4/0x190 LR pata_macio_qc_prep+0xfc/0x190 Call Trace: 0xc1421660 (unreliable) ata_qc_issue+0x14c/0x2d4 __ata_scsi_queuecmd+0x200/0x53c ata_scsi_queuecmd+0x50/0xe0 scsi_queue_rq+0x788/0xb1c __blk_mq_issue_directly+0x58/0xf4 blk_mq_plug_issue_direct+0x8c/0x1b4 blk_mq_flush_plug_list.part.0+0x584/0x5e0 __blk_flush_plug+0xf8/0x194 __submit_bio+0x1b8/0x2e0 submit_bio_noacct_nocheck+0x230/0x304 btrfs_work_helper+0x200/0x338 process_one_work+0x1a8/0x338 worker_thread+0x364/0x4c0 kthread+0x100/0x104 start_kernel_thread+0x10/0x14 That commit increased max_segment_size to 64KB, with the justification that the SCSI core was already using that size when PAGE_SIZE == 64KB, and that there was existing logic to split over-sized requests. However with a sufficiently large request, the splitting logic causes each sg to be split into two commands in the DMA table, leading to overflow of the DMA table, triggering the BUG_ON(). With default settings the bug doesn't trigger, because the request size is limited by max_sectors_kb == 1280, however max_sectors_kb can be increased, and apparently some distros do that by default using udev rules. Fix the bug for 4KB kernels by reverting to the old max_segment_size. For 64KB kernels the sg_tablesize needs to be halved, to allow for the possibility that each sg will be split into two. Fixes: 09fe2bfa6b83 ("ata: pata_macio: Fix max_segment_size with PAGE_SIZE == 64K") Cc: stable@vger.kernel.org # v6.10+ Reported-by: Kolbjørn Barmen Closes: https://lore.kernel.org/all/62d248bb-e97a-25d2-bcf2-9160c518cae5@kolla.no/ Reported-by: Jonáš Vidra Closes: https://lore.kernel.org/all/3b6441b8-06e6-45da-9e55-f92f2c86933e@ufal.mff.cuni.cz/ Tested-by: Kolbjørn Barmen Signed-off-by: Michael Ellerman Signed-off-by: Damien Le Moal --- drivers/ata/pata_macio.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index 1b85e8bf4ef9..1cb8d24b088f 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -208,6 +208,19 @@ static const char* macio_ata_names[] = { /* Don't let a DMA segment go all the way to 64K */ #define MAX_DBDMA_SEG 0xff00 +#ifdef CONFIG_PAGE_SIZE_64KB +/* + * The SCSI core requires the segment size to cover at least a page, so + * for 64K page size kernels it must be at least 64K. However the + * hardware can't handle 64K, so pata_macio_qc_prep() will split large + * requests. To handle the split requests the tablesize must be halved. + */ +#define PATA_MACIO_MAX_SEGMENT_SIZE SZ_64K +#define PATA_MACIO_SG_TABLESIZE (MAX_DCMDS / 2) +#else +#define PATA_MACIO_MAX_SEGMENT_SIZE MAX_DBDMA_SEG +#define PATA_MACIO_SG_TABLESIZE MAX_DCMDS +#endif /* * Wait 1s for disk to answer on IDE bus after a hard reset @@ -912,16 +925,10 @@ static int pata_macio_do_resume(struct pata_macio_priv *priv) static const struct scsi_host_template pata_macio_sht = { __ATA_BASE_SHT(DRV_NAME), - .sg_tablesize = MAX_DCMDS, + .sg_tablesize = PATA_MACIO_SG_TABLESIZE, /* We may not need that strict one */ .dma_boundary = ATA_DMA_BOUNDARY, - /* - * The SCSI core requires the segment size to cover at least a page, so - * for 64K page size kernels this must be at least 64K. However the - * hardware can't handle 64K, so pata_macio_qc_prep() will split large - * requests. - */ - .max_segment_size = SZ_64K, + .max_segment_size = PATA_MACIO_MAX_SEGMENT_SIZE, .device_configure = pata_macio_device_configure, .sdev_groups = ata_common_sdev_groups, .can_queue = ATA_DEF_QUEUE, -- cgit From d4bc0a264fb482b019c84fbc7202dd3cab059087 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 20 Aug 2024 13:04:07 +1000 Subject: ata: pata_macio: Use WARN instead of BUG The overflow/underflow conditions in pata_macio_qc_prep() should never happen. But if they do there's no need to kill the system entirely, a WARN and failing the IO request should be sufficient and might allow the system to keep running. Signed-off-by: Michael Ellerman Signed-off-by: Damien Le Moal --- drivers/ata/pata_macio.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index 1cb8d24b088f..f2f36e55a1f4 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -554,7 +554,8 @@ static enum ata_completion_errors pata_macio_qc_prep(struct ata_queued_cmd *qc) while (sg_len) { /* table overflow should never happen */ - BUG_ON (pi++ >= MAX_DCMDS); + if (WARN_ON_ONCE(pi >= MAX_DCMDS)) + return AC_ERR_SYSTEM; len = (sg_len < MAX_DBDMA_SEG) ? sg_len : MAX_DBDMA_SEG; table->command = cpu_to_le16(write ? OUTPUT_MORE: INPUT_MORE); @@ -566,11 +567,13 @@ static enum ata_completion_errors pata_macio_qc_prep(struct ata_queued_cmd *qc) addr += len; sg_len -= len; ++table; + ++pi; } } /* Should never happen according to Tejun */ - BUG_ON(!pi); + if (WARN_ON_ONCE(!pi)) + return AC_ERR_SYSTEM; /* Convert the last command to an input/output */ table--; -- cgit From fd764e74e5b75512be1b55ec9680a6c35885cc63 Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Thu, 15 Aug 2024 07:49:43 +0000 Subject: rust: block: fix wrong usage of lockdep API When allocating `struct gendisk`, `GenDiskBuilder` is using a dynamic lock class key without registering the key. This is an incorrect use of the API, which causes a `WARN` trace. Fix the issue by using a static lock class key, which is more appropriate for the situation anyway. Fixes: 3253aba3408a ("rust: block: introduce `kernel::block::mq` module") Reported-by: Behme Dirk (XC-CP/ESB5) Closes: https://rust-for-linux.zulipchat.com/#narrow/stream/x/topic/x/near/457090036 Signed-off-by: Andreas Hindborg Reviewed-by: Benno Lossin Reviewed-by: Gary Guo Reviewed-by: Alice Ryhl Tested-by: Dirk Behme Link: https://lore.kernel.org/r/20240815074519.2684107-3-nmi@metaspace.dk [ Applied `rustfmt`, reworded slightly and made Zulip link a permalink. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/block/mq/gen_disk.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs index f548a6199847..708125dce96a 100644 --- a/rust/kernel/block/mq/gen_disk.rs +++ b/rust/kernel/block/mq/gen_disk.rs @@ -6,8 +6,8 @@ //! C header: [`include/linux/blk_mq.h`](srctree/include/linux/blk_mq.h) use crate::block::mq::{raw_writer::RawWriter, Operations, TagSet}; -use crate::error; use crate::{bindings, error::from_err_ptr, error::Result, sync::Arc}; +use crate::{error, static_lock_class}; use core::fmt::{self, Write}; /// A builder for [`GenDisk`]. @@ -93,8 +93,6 @@ impl GenDiskBuilder { name: fmt::Arguments<'_>, tagset: Arc>, ) -> Result> { - let lock_class_key = crate::sync::LockClassKey::new(); - // SAFETY: `bindings::queue_limits` contain only fields that are valid when zeroed. let mut lim: bindings::queue_limits = unsafe { core::mem::zeroed() }; @@ -110,7 +108,7 @@ impl GenDiskBuilder { tagset.raw_tag_set(), &mut lim, core::ptr::null_mut(), - lock_class_key.as_ptr(), + static_lock_class!().as_ptr(), ) })?; -- cgit From 5d88f98b2e73b2928cab7f8bd3d67777cb9ea1e7 Mon Sep 17 00:00:00 2001 From: Vincent Woltmann Date: Fri, 16 Aug 2024 20:01:42 +0000 Subject: docs: rust: remove unintended blockquote in Coding Guidelines An unordered list in coding-guidelines.rst was indented, producing a blockquote around it and making it look more indented than expected. Remove the indentation to only output an unordered list. Reported-by: Miguel Ojeda Closes: https://github.com/Rust-for-Linux/linux/issues/1063 Fixes: d07479b211b7 ("docs: add Rust documentation") Signed-off-by: Vincent Woltmann Link: https://lore.kernel.org/r/20240816200339.2495875-1-vincent@woltmann.art [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- Documentation/rust/coding-guidelines.rst | 38 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/Documentation/rust/coding-guidelines.rst b/Documentation/rust/coding-guidelines.rst index 05542840b16c..329b070a1d47 100644 --- a/Documentation/rust/coding-guidelines.rst +++ b/Documentation/rust/coding-guidelines.rst @@ -145,32 +145,32 @@ This is how a well-documented Rust function may look like: This example showcases a few ``rustdoc`` features and some conventions followed in the kernel: - - The first paragraph must be a single sentence briefly describing what - the documented item does. Further explanations must go in extra paragraphs. +- The first paragraph must be a single sentence briefly describing what + the documented item does. Further explanations must go in extra paragraphs. - - Unsafe functions must document their safety preconditions under - a ``# Safety`` section. +- Unsafe functions must document their safety preconditions under + a ``# Safety`` section. - - While not shown here, if a function may panic, the conditions under which - that happens must be described under a ``# Panics`` section. +- While not shown here, if a function may panic, the conditions under which + that happens must be described under a ``# Panics`` section. - Please note that panicking should be very rare and used only with a good - reason. In almost all cases, a fallible approach should be used, typically - returning a ``Result``. + Please note that panicking should be very rare and used only with a good + reason. In almost all cases, a fallible approach should be used, typically + returning a ``Result``. - - If providing examples of usage would help readers, they must be written in - a section called ``# Examples``. +- If providing examples of usage would help readers, they must be written in + a section called ``# Examples``. - - Rust items (functions, types, constants...) must be linked appropriately - (``rustdoc`` will create a link automatically). +- Rust items (functions, types, constants...) must be linked appropriately + (``rustdoc`` will create a link automatically). - - Any ``unsafe`` block must be preceded by a ``// SAFETY:`` comment - describing why the code inside is sound. +- Any ``unsafe`` block must be preceded by a ``// SAFETY:`` comment + describing why the code inside is sound. - While sometimes the reason might look trivial and therefore unneeded, - writing these comments is not just a good way of documenting what has been - taken into account, but most importantly, it provides a way to know that - there are no *extra* implicit constraints. + While sometimes the reason might look trivial and therefore unneeded, + writing these comments is not just a good way of documenting what has been + taken into account, but most importantly, it provides a way to know that + there are no *extra* implicit constraints. To learn more about how to write documentation for Rust and extra features, please take a look at the ``rustdoc`` book at: -- cgit From 0ff8f3f0979559b0d7494d580f2597beab3f159b Mon Sep 17 00:00:00 2001 From: Michael Vetter Date: Mon, 19 Aug 2024 22:57:31 +0200 Subject: rust: kernel: fix typos in code comments Fix spelling mistakes in code comments. Signed-off-by: Michael Vetter Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20240819205731.2163-1-jubalh@iodoru.org [ Reworded slightly. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/init/macros.rs | 4 ++-- rust/kernel/net/phy.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs index 02ecedc4ae7a..9a0c4650ef67 100644 --- a/rust/kernel/init/macros.rs +++ b/rust/kernel/init/macros.rs @@ -145,7 +145,7 @@ //! } //! } //! // Implement the internal `PinData` trait that marks the pin-data struct as a pin-data -//! // struct. This is important to ensure that no user can implement a rouge `__pin_data` +//! // struct. This is important to ensure that no user can implement a rogue `__pin_data` //! // function without using `unsafe`. //! unsafe impl ::kernel::init::__internal::PinData for __ThePinData { //! type Datee = Bar; @@ -156,7 +156,7 @@ //! // case no such fields exist, hence this is almost empty. The two phantomdata fields exist //! // for two reasons: //! // - `__phantom`: every generic must be used, since we cannot really know which generics -//! // are used, we declere all and then use everything here once. +//! // are used, we declare all and then use everything here once. //! // - `__phantom_pin`: uses the `'__pin` lifetime and ensures that this struct is invariant //! // over it. The lifetime is needed to work around the limitation that trait bounds must //! // not be trivial, e.g. the user has a `#[pin] PhantomPinned` field -- this is diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index fd40b703d224..91dac63ffa17 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -491,7 +491,7 @@ impl Adapter { pub struct DriverVTable(Opaque); // SAFETY: `DriverVTable` doesn't expose any &self method to access internal data, so it's safe to -// share `&DriverVTable` across execution context boundries. +// share `&DriverVTable` across execution context boundaries. unsafe impl Sync for DriverVTable {} /// Creates a [`DriverVTable`] instance from [`Driver`]. -- cgit From e0ee967630c8ee67bb47a5b38d235cd5a8789c48 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 20 Aug 2024 18:31:58 -0600 Subject: io_uring/kbuf: sanitize peek buffer setup Harden the buffer peeking a bit, by adding a sanity check for it having a valid size. Outside of that, arg->max_len is a size_t, though it's only ever set to a 32-bit value (as it's governed by MAX_RW_COUNT). Bump our needed check to a size_t so we know it fits. Finally, cap the calculated needed iov value to the PEEK_MAX_IMPORT, which is the maximum number of segments that should be peeked. Fixes: 35c8711c8fc4 ("io_uring/kbuf: add helpers for getting/peeking multiple buffers") Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index c95dc1736dd9..1af2bd56af44 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -218,10 +218,13 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, buf = io_ring_head_to_buf(br, head, bl->mask); if (arg->max_len) { - int needed; + u32 len = READ_ONCE(buf->len); + size_t needed; - needed = (arg->max_len + buf->len - 1) / buf->len; - needed = min(needed, PEEK_MAX_IMPORT); + if (unlikely(!len)) + return -ENOBUFS; + needed = (arg->max_len + len - 1) / len; + needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT); if (nr_avail > needed) nr_avail = needed; } -- cgit From 91191a6e50a2ff752da244493171037663536768 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Wed, 21 Aug 2024 12:47:11 +0000 Subject: ALSA: hda: cs35l56: Don't use the device index as a calibration index The HDA driver cannot assume that the order that the devices are specified in the cirrus,dev-index matches the order of calibration entries. Only a calibration entry with a matching silicon id will be used. Fixes: cfa43aaa7948 ("ALSA: hda: cs35l56: Apply amp calibration from EFI data") Signed-off-by: Simon Trimmer Link: https://patch.msgid.link/20240821124711.44325-1-simont@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l56_hda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index a9dfd62637cf..e3ac0e23ae32 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -1003,7 +1003,7 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id) goto err; } - cs35l56->base.cal_index = cs35l56->index; + cs35l56->base.cal_index = -1; cs35l56_init_cs_dsp(&cs35l56->base, &cs35l56->cs_dsp); cs35l56->cs_dsp.client_ops = &cs35l56_hda_client_ops; -- cgit From 3568affcddd68743e25aa3ec1647d9b82797757b Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 20 Aug 2024 13:29:30 -0700 Subject: soc: qcom: pmic_glink: Fix race during initialization As pointed out by Stephen Boyd it is possible that during initialization of the pmic_glink child drivers, the protection-domain notifiers fires, and the associated work is scheduled, before the client registration returns and as a result the local "client" pointer has been initialized. The outcome of this is a NULL pointer dereference as the "client" pointer is blindly dereferenced. Timeline provided by Stephen: CPU0 CPU1 ---- ---- ucsi->client = NULL; devm_pmic_glink_register_client() client->pdr_notify(client->priv, pg->client_state) pmic_glink_ucsi_pdr_notify() schedule_work(&ucsi->register_work) pmic_glink_ucsi_register() ucsi_register() pmic_glink_ucsi_read_version() pmic_glink_ucsi_read() pmic_glink_ucsi_read() pmic_glink_send(ucsi->client) ucsi->client = client // Too late! This code is identical across the altmode, battery manager and usci child drivers. Resolve this by splitting the allocation of the "client" object and the registration thereof into two operations. This only happens if the protection domain registry is populated at the time of registration, which by the introduction of commit '1ebcde047c54 ("soc: qcom: add pd-mapper implementation")' became much more likely. Reported-by: Amit Pundir Closes: https://lore.kernel.org/all/CAMi1Hd2_a7TjA7J9ShrAbNOd_CoZ3D87twmO5t+nZxC9sX18tA@mail.gmail.com/ Reported-by: Johan Hovold Closes: https://lore.kernel.org/all/ZqiyLvP0gkBnuekL@hovoldconsulting.com/ Reported-by: Stephen Boyd Closes: https://lore.kernel.org/all/CAE-0n52JgfCBWiFQyQWPji8cq_rCsviBpW-m72YitgNfdaEhQg@mail.gmail.com/ Fixes: 58ef4ece1e41 ("soc: qcom: pmic_glink: Introduce base PMIC GLINK driver") Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Reviewed-by: Neil Armstrong Tested-by: Amit Pundir Reviewed-by: Johan Hovold Acked-by: Sebastian Reichel Tested-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240820-pmic-glink-v6-11-races-v3-1-eec53c750a04@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/power/supply/qcom_battmgr.c | 16 ++++++++++------ drivers/soc/qcom/pmic_glink.c | 28 ++++++++++++++++++---------- drivers/soc/qcom/pmic_glink_altmode.c | 17 +++++++++++------ drivers/usb/typec/ucsi/ucsi_glink.c | 16 ++++++++++------ include/linux/soc/qcom/pmic_glink.h | 11 ++++++----- 5 files changed, 55 insertions(+), 33 deletions(-) diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index 46f36dcb185c..7cf19a39d986 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -1385,12 +1385,16 @@ static int qcom_battmgr_probe(struct auxiliary_device *adev, "failed to register wireless charing power supply\n"); } - battmgr->client = devm_pmic_glink_register_client(dev, - PMIC_GLINK_OWNER_BATTMGR, - qcom_battmgr_callback, - qcom_battmgr_pdr_notify, - battmgr); - return PTR_ERR_OR_ZERO(battmgr->client); + battmgr->client = devm_pmic_glink_client_alloc(dev, PMIC_GLINK_OWNER_BATTMGR, + qcom_battmgr_callback, + qcom_battmgr_pdr_notify, + battmgr); + if (IS_ERR(battmgr->client)) + return PTR_ERR(battmgr->client); + + pmic_glink_client_register(battmgr->client); + + return 0; } static const struct auxiliary_device_id qcom_battmgr_id_table[] = { diff --git a/drivers/soc/qcom/pmic_glink.c b/drivers/soc/qcom/pmic_glink.c index 9ebc0ba35947..53b176d04fbd 100644 --- a/drivers/soc/qcom/pmic_glink.c +++ b/drivers/soc/qcom/pmic_glink.c @@ -66,15 +66,14 @@ static void _devm_pmic_glink_release_client(struct device *dev, void *res) spin_unlock_irqrestore(&pg->client_lock, flags); } -struct pmic_glink_client *devm_pmic_glink_register_client(struct device *dev, - unsigned int id, - void (*cb)(const void *, size_t, void *), - void (*pdr)(void *, int), - void *priv) +struct pmic_glink_client *devm_pmic_glink_client_alloc(struct device *dev, + unsigned int id, + void (*cb)(const void *, size_t, void *), + void (*pdr)(void *, int), + void *priv) { struct pmic_glink_client *client; struct pmic_glink *pg = dev_get_drvdata(dev->parent); - unsigned long flags; client = devres_alloc(_devm_pmic_glink_release_client, sizeof(*client), GFP_KERNEL); if (!client) @@ -85,6 +84,18 @@ struct pmic_glink_client *devm_pmic_glink_register_client(struct device *dev, client->cb = cb; client->pdr_notify = pdr; client->priv = priv; + INIT_LIST_HEAD(&client->node); + + devres_add(dev, client); + + return client; +} +EXPORT_SYMBOL_GPL(devm_pmic_glink_client_alloc); + +void pmic_glink_client_register(struct pmic_glink_client *client) +{ + struct pmic_glink *pg = client->pg; + unsigned long flags; mutex_lock(&pg->state_lock); spin_lock_irqsave(&pg->client_lock, flags); @@ -95,11 +106,8 @@ struct pmic_glink_client *devm_pmic_glink_register_client(struct device *dev, spin_unlock_irqrestore(&pg->client_lock, flags); mutex_unlock(&pg->state_lock); - devres_add(dev, client); - - return client; } -EXPORT_SYMBOL_GPL(devm_pmic_glink_register_client); +EXPORT_SYMBOL_GPL(pmic_glink_client_register); int pmic_glink_send(struct pmic_glink_client *client, void *data, size_t len) { diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c index 1e0808b3cb93..463b1c528831 100644 --- a/drivers/soc/qcom/pmic_glink_altmode.c +++ b/drivers/soc/qcom/pmic_glink_altmode.c @@ -520,12 +520,17 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev, return ret; } - altmode->client = devm_pmic_glink_register_client(dev, - altmode->owner_id, - pmic_glink_altmode_callback, - pmic_glink_altmode_pdr_notify, - altmode); - return PTR_ERR_OR_ZERO(altmode->client); + altmode->client = devm_pmic_glink_client_alloc(dev, + altmode->owner_id, + pmic_glink_altmode_callback, + pmic_glink_altmode_pdr_notify, + altmode); + if (IS_ERR(altmode->client)) + return PTR_ERR(altmode->client); + + pmic_glink_client_register(altmode->client); + + return 0; } static const struct auxiliary_device_id pmic_glink_altmode_id_table[] = { diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index 16c328497e0b..f6f4fae40399 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -367,12 +367,16 @@ static int pmic_glink_ucsi_probe(struct auxiliary_device *adev, ucsi->port_orientation[port] = desc; } - ucsi->client = devm_pmic_glink_register_client(dev, - PMIC_GLINK_OWNER_USBC, - pmic_glink_ucsi_callback, - pmic_glink_ucsi_pdr_notify, - ucsi); - return PTR_ERR_OR_ZERO(ucsi->client); + ucsi->client = devm_pmic_glink_client_alloc(dev, PMIC_GLINK_OWNER_USBC, + pmic_glink_ucsi_callback, + pmic_glink_ucsi_pdr_notify, + ucsi); + if (IS_ERR(ucsi->client)) + return PTR_ERR(ucsi->client); + + pmic_glink_client_register(ucsi->client); + + return 0; } static void pmic_glink_ucsi_remove(struct auxiliary_device *adev) diff --git a/include/linux/soc/qcom/pmic_glink.h b/include/linux/soc/qcom/pmic_glink.h index fd124aa18c81..7cddf1027752 100644 --- a/include/linux/soc/qcom/pmic_glink.h +++ b/include/linux/soc/qcom/pmic_glink.h @@ -23,10 +23,11 @@ struct pmic_glink_hdr { int pmic_glink_send(struct pmic_glink_client *client, void *data, size_t len); -struct pmic_glink_client *devm_pmic_glink_register_client(struct device *dev, - unsigned int id, - void (*cb)(const void *, size_t, void *), - void (*pdr)(void *, int), - void *priv); +struct pmic_glink_client *devm_pmic_glink_client_alloc(struct device *dev, + unsigned int id, + void (*cb)(const void *, size_t, void *), + void (*pdr)(void *, int), + void *priv); +void pmic_glink_client_register(struct pmic_glink_client *client); #endif -- cgit From 11bb2ffb679399f99041540cf662409905179e3a Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 20 Aug 2024 13:29:31 -0700 Subject: usb: typec: ucsi: Move unregister out of atomic section Commit '9329933699b3 ("soc: qcom: pmic_glink: Make client-lock non-sleeping")' moved the pmic_glink client list under a spinlock, as it is accessed by the rpmsg/glink callback, which in turn is invoked from IRQ context. This means that ucsi_unregister() is now called from atomic context, which isn't feasible as it's expecting a sleepable context. An effort is under way to get GLINK to invoke its callbacks in a sleepable context, but until then lets schedule the unregistration. A side effect of this is that ucsi_unregister() can now happen after the remote processor, and thereby the communication link with it, is gone. pmic_glink_send() is amended with a check to avoid the resulting NULL pointer dereference. This does however result in the user being informed about this error by the following entry in the kernel log: ucsi_glink.pmic_glink_ucsi pmic_glink.ucsi.0: failed to send UCSI write request: -5 Fixes: 9329933699b3 ("soc: qcom: pmic_glink: Make client-lock non-sleeping") Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Tested-by: Amit Pundir Reviewed-by: Johan Hovold Tested-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240820-pmic-glink-v6-11-races-v3-2-eec53c750a04@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/pmic_glink.c | 10 +++++++++- drivers/usb/typec/ucsi/ucsi_glink.c | 27 ++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/soc/qcom/pmic_glink.c b/drivers/soc/qcom/pmic_glink.c index 53b176d04fbd..b218460219b7 100644 --- a/drivers/soc/qcom/pmic_glink.c +++ b/drivers/soc/qcom/pmic_glink.c @@ -112,8 +112,16 @@ EXPORT_SYMBOL_GPL(pmic_glink_client_register); int pmic_glink_send(struct pmic_glink_client *client, void *data, size_t len) { struct pmic_glink *pg = client->pg; + int ret; - return rpmsg_send(pg->ept, data, len); + mutex_lock(&pg->state_lock); + if (!pg->ept) + ret = -ECONNRESET; + else + ret = rpmsg_send(pg->ept, data, len); + mutex_unlock(&pg->state_lock); + + return ret; } EXPORT_SYMBOL_GPL(pmic_glink_send); diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index f6f4fae40399..6aace19d595b 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -68,6 +68,9 @@ struct pmic_glink_ucsi { struct work_struct notify_work; struct work_struct register_work; + spinlock_t state_lock; + bool ucsi_registered; + bool pd_running; u8 read_buf[UCSI_BUF_SIZE]; }; @@ -244,8 +247,20 @@ static void pmic_glink_ucsi_notify(struct work_struct *work) static void pmic_glink_ucsi_register(struct work_struct *work) { struct pmic_glink_ucsi *ucsi = container_of(work, struct pmic_glink_ucsi, register_work); + unsigned long flags; + bool pd_running; - ucsi_register(ucsi->ucsi); + spin_lock_irqsave(&ucsi->state_lock, flags); + pd_running = ucsi->pd_running; + spin_unlock_irqrestore(&ucsi->state_lock, flags); + + if (!ucsi->ucsi_registered && pd_running) { + ucsi_register(ucsi->ucsi); + ucsi->ucsi_registered = true; + } else if (ucsi->ucsi_registered && !pd_running) { + ucsi_unregister(ucsi->ucsi); + ucsi->ucsi_registered = false; + } } static void pmic_glink_ucsi_callback(const void *data, size_t len, void *priv) @@ -269,11 +284,12 @@ static void pmic_glink_ucsi_callback(const void *data, size_t len, void *priv) static void pmic_glink_ucsi_pdr_notify(void *priv, int state) { struct pmic_glink_ucsi *ucsi = priv; + unsigned long flags; - if (state == SERVREG_SERVICE_STATE_UP) - schedule_work(&ucsi->register_work); - else if (state == SERVREG_SERVICE_STATE_DOWN) - ucsi_unregister(ucsi->ucsi); + spin_lock_irqsave(&ucsi->state_lock, flags); + ucsi->pd_running = (state == SERVREG_SERVICE_STATE_UP); + spin_unlock_irqrestore(&ucsi->state_lock, flags); + schedule_work(&ucsi->register_work); } static void pmic_glink_ucsi_destroy(void *data) @@ -320,6 +336,7 @@ static int pmic_glink_ucsi_probe(struct auxiliary_device *adev, INIT_WORK(&ucsi->register_work, pmic_glink_ucsi_register); init_completion(&ucsi->read_ack); init_completion(&ucsi->write_ack); + spin_lock_init(&ucsi->state_lock); mutex_init(&ucsi->lock); ucsi->ucsi = ucsi_create(dev, &pmic_glink_ucsi_ops); -- cgit From ad51126037a43c05f5f4af5eb262734e3e88ca59 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 20 Aug 2024 13:29:32 -0700 Subject: soc: qcom: pmic_glink: Actually communicate when remote goes down When the pmic_glink state is UP and we either receive a protection- domain (PD) notification indicating that the PD is going down, or that the whole remoteproc is going down, it's expected that the pmic_glink client instances are notified that their function has gone DOWN. This is not what the code does, which results in the client state either not updating, or being wrong in many cases. So let's fix the conditions. Fixes: 58ef4ece1e41 ("soc: qcom: pmic_glink: Introduce base PMIC GLINK driver") Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Tested-by: Amit Pundir Reviewed-by: Johan Hovold Tested-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240820-pmic-glink-v6-11-races-v3-3-eec53c750a04@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/pmic_glink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/qcom/pmic_glink.c b/drivers/soc/qcom/pmic_glink.c index b218460219b7..9606222993fd 100644 --- a/drivers/soc/qcom/pmic_glink.c +++ b/drivers/soc/qcom/pmic_glink.c @@ -191,7 +191,7 @@ static void pmic_glink_state_notify_clients(struct pmic_glink *pg) if (pg->pdr_state == SERVREG_SERVICE_STATE_UP && pg->ept) new_state = SERVREG_SERVICE_STATE_UP; } else { - if (pg->pdr_state == SERVREG_SERVICE_STATE_UP && pg->ept) + if (pg->pdr_state == SERVREG_SERVICE_STATE_DOWN || !pg->ept) new_state = SERVREG_SERVICE_STATE_DOWN; } -- cgit From 8342009efa2a5e75dce56173d7de026bcc6666d8 Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Mon, 29 Jul 2024 21:38:35 -0400 Subject: firmware: qcom: tzmem: disable sdm670 platform The Pixel 3a returns 4291821499 (-3145797 or 0xFFCFFFBB) when attempting to load the GPU firmware if tzmem is allowed. Disable it on SDM670 so the GPU can successfully probe. Signed-off-by: Richard Acayan Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20240730013834.41840-2-mailingradian@gmail.com Signed-off-by: Bjorn Andersson --- drivers/firmware/qcom/qcom_tzmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/qcom/qcom_tzmem.c b/drivers/firmware/qcom/qcom_tzmem.c index caedeef0059c..92b365178235 100644 --- a/drivers/firmware/qcom/qcom_tzmem.c +++ b/drivers/firmware/qcom/qcom_tzmem.c @@ -77,6 +77,7 @@ static bool qcom_tzmem_using_shm_bridge; /* List of machines that are known to not support SHM bridge correctly. */ static const char *const qcom_tzmem_blacklist[] = { "qcom,sc8180x", + "qcom,sdm670", /* failure in GPU firmware loading */ "qcom,sdm845", /* reset in rmtfs memory assignment */ "qcom,sm8150", /* reset in rmtfs memory assignment */ NULL -- cgit From a3ca27c405faad584af6e8e38cdafe5be73230a1 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 9 Aug 2024 08:47:15 +0200 Subject: s390/mm: Prevent lowcore vs identity mapping overlap The identity mapping position in virtual memory is randomized together with the kernel mapping. That position can never overlap with the lowcore even when the lowcore is relocated. Prevent overlapping with the lowcore to allow independent positioning of the identity mapping. With the current value of the alternative lowcore address of 0x70000 the overlap could happen in case the identity mapping is placed at zero. This is a prerequisite for uncoupling of randomization base of kernel image and identity mapping in virtual memory. Acked-by: Vasily Gorbik Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/kernel/setup.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 4ec99f73fa27..a3fea683b227 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -734,7 +734,23 @@ static void __init memblock_add_physmem_info(void) } /* - * Reserve memory used for lowcore/command line/kernel image. + * Reserve memory used for lowcore. + */ +static void __init reserve_lowcore(void) +{ + void *lowcore_start = get_lowcore(); + void *lowcore_end = lowcore_start + sizeof(struct lowcore); + void *start, *end; + + if ((void *)__identity_base < lowcore_end) { + start = max(lowcore_start, (void *)__identity_base); + end = min(lowcore_end, (void *)(__identity_base + ident_map_size)); + memblock_reserve(__pa(start), __pa(end)); + } +} + +/* + * Reserve memory used for absolute lowcore/command line/kernel image. */ static void __init reserve_kernel(void) { @@ -918,6 +934,7 @@ void __init setup_arch(char **cmdline_p) /* Do some memory reservations *before* memory is added to memblock */ reserve_pgtables(); + reserve_lowcore(); reserve_kernel(); reserve_initrd(); reserve_certificate_list(); -- cgit From 32db401965f165f7c44447d0508097f070c8f576 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 9 Aug 2024 08:47:16 +0200 Subject: s390/mm: Pin identity mapping base to zero SIE instruction performs faster when the virtual address of SIE block matches the physical one. Pin the identity mapping base to zero for the benefit of SIE and other instructions that have similar performance impact. Still, randomize the base when DEBUG_VM kernel configuration option is enabled. Suggested-by: Vasily Gorbik Reviewed-by: Christian Borntraeger Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 13 +++++++++++++ arch/s390/boot/startup.c | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a822f952f64a..c60e699e99f5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -604,6 +604,19 @@ config RANDOMIZE_BASE as a security feature that deters exploit attempts relying on knowledge of the location of kernel internals. +config RANDOMIZE_IDENTITY_BASE + bool "Randomize the address of the identity mapping base" + depends on RANDOMIZE_BASE + default DEBUG_VM + help + The identity mapping base address is pinned to zero by default. + Allow randomization of that base to expose otherwise missed + notion of physical and virtual addresses of data structures. + That does not have any impact on the base address at which the + kernel image is loaded. + + If unsure, say N + config KERNEL_IMAGE_BASE hex "Kernel image base address" range 0x100000 0x1FFFFFE0000000 if !KASAN diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index ce232552bc1c..cff34744b5a9 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -341,7 +341,8 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS)); max_mappable = max(ident_map_size, MAX_DCSS_ADDR); max_mappable = min(max_mappable, vmemmap_start); - __identity_base = round_down(vmemmap_start - max_mappable, rte_size); + if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE)) + __identity_base = round_down(vmemmap_start - max_mappable, rte_size); return asce_limit; } -- cgit From b4f5bd60d558f6ba451d7e76aa05782c07a182a3 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 6 Aug 2024 12:06:23 +0200 Subject: s390/ap: Refine AP bus bindings complete processing With the rework of the AP bus scan and the introduction of a bindings complete completion also the timing until the userspace finally receives a AP bus binding complete uevent had increased. Unfortunately this event triggers some important jobs for preparation of KVM guests, for example the modification of card/queue masks to reassign AP resources to the alternate AP queue device driver (vfio_ap) which is the precondition for building mediated devices which may be a precondition for starting KVM guests using AP resources. This small fix now triggers the check for binding complete each time an AP device driver has registered. With this patch the bindings complete may be posted up to 30s earlier as there is no need to wait for the next AP bus scan any more. Fixes: 778412ab915d ("s390/ap: rearm APQNs bindings complete completion") Signed-off-by: Harald Freudenberger Reviewed-by: Holger Dengler Cc: stable@vger.kernel.org Acked-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 0998b17ecb37..f9f682f19415 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -971,11 +971,16 @@ int ap_driver_register(struct ap_driver *ap_drv, struct module *owner, char *name) { struct device_driver *drv = &ap_drv->driver; + int rc; drv->bus = &ap_bus_type; drv->owner = owner; drv->name = name; - return driver_register(drv); + rc = driver_register(drv); + + ap_check_bindings_complete(); + + return rc; } EXPORT_SYMBOL(ap_driver_register); -- cgit From c158ceb826068a8bbe3c9e78df420f47ba53c8a8 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 20 Aug 2024 15:59:34 -0700 Subject: soc: qcom: pd-mapper: Fix singleton refcount The Qualcomm pd-mapper is a refcounted singleton, but the refcount is never incremented, which means the as soon as any remoteproc instance stops the count will hit 0. At this point the pd-mapper QMI service is stopped, leaving firmware without access to the PD information. Stopping any other remoteproc instances will result in a use-after-free, which best case manifest itself as a refcount underflow: refcount_t: underflow; use-after-free. WARNING: CPU: 1 PID: 354 at lib/refcount.c:87 refcount_dec_and_mutex_lock+0xc4/0x148 ... Call trace: refcount_dec_and_mutex_lock+0xc4/0x148 qcom_pdm_remove+0x40/0x118 [qcom_pd_mapper] ... Fix this by incrementing the refcount, so that the pd-mapper is only torn down when the last remoteproc stops, as intended. Fixes: 1ebcde047c54 ("soc: qcom: add pd-mapper implementation") Signed-off-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240820-pd-mapper-refcount-fix-v1-1-03ea65c0309b@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/qcom_pd_mapper.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/soc/qcom/qcom_pd_mapper.c b/drivers/soc/qcom/qcom_pd_mapper.c index 9afa09c3920e..2228595a3dc5 100644 --- a/drivers/soc/qcom/qcom_pd_mapper.c +++ b/drivers/soc/qcom/qcom_pd_mapper.c @@ -635,6 +635,8 @@ static int qcom_pdm_probe(struct auxiliary_device *auxdev, ret = PTR_ERR(data); else __qcom_pdm_data = data; + } else { + refcount_inc(&__qcom_pdm_data->refcnt); } auxiliary_set_drvdata(auxdev, __qcom_pdm_data); -- cgit From 0e9fdab1e8df490354562187cdbb8dec643eae2c Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 21 Aug 2024 14:19:54 +0800 Subject: ASoC: allow module autoloading for table db1200_pids Add MODULE_DEVICE_TABLE(), so modules could be properly autoloaded based on the alias from platform_device_id table. Signed-off-by: Hongbo Li Link: https://patch.msgid.link/20240821061955.2273782-2-lihongbo22@huawei.com Signed-off-by: Mark Brown --- sound/soc/au1x/db1200.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/au1x/db1200.c b/sound/soc/au1x/db1200.c index 83a75a38705b..81abe2e18402 100644 --- a/sound/soc/au1x/db1200.c +++ b/sound/soc/au1x/db1200.c @@ -44,6 +44,7 @@ static const struct platform_device_id db1200_pids[] = { }, {}, }; +MODULE_DEVICE_TABLE(platform, db1200_pids); /*------------------------- AC97 PART ---------------------------*/ -- cgit From 5f7c98b7519a3a847d9182bd99d57ea250032ca1 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 21 Aug 2024 14:19:55 +0800 Subject: ASoC: allow module autoloading for table board_ids Add MODULE_DEVICE_TABLE(), so modules could be properly autoloaded based on the alias from platform_device_id table. Signed-off-by: Hongbo Li Link: https://patch.msgid.link/20240821061955.2273782-3-lihongbo22@huawei.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-sof-mach.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/amd/acp/acp-sof-mach.c b/sound/soc/amd/acp/acp-sof-mach.c index fc59ea34e687..b3a702dcd991 100644 --- a/sound/soc/amd/acp/acp-sof-mach.c +++ b/sound/soc/amd/acp/acp-sof-mach.c @@ -158,6 +158,8 @@ static const struct platform_device_id board_ids[] = { }, { } }; +MODULE_DEVICE_TABLE(platform, board_ids); + static struct platform_driver acp_asoc_audio = { .driver = { .name = "sof_mach", -- cgit From a6f78359ac75f24cac3c1bdd753c49c1877bcd82 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Sat, 17 Aug 2024 02:47:30 +0000 Subject: drm/xe: Fix missing workqueue destroy in xe_gt_pagefault On driver reload we never free up the memory for the pagefault and access counter workqueues. Add those destroy calls here. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Stuart Summers Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/c9a951505271dc3a7aee76de7656679f69c11518.1723862633.git.stuart.summers@intel.com (cherry picked from commit 7586fc52b14e0b8edd0d1f8a434e0de2078b7b2b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 9292d5468868..b2a7fa55bd18 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -382,6 +382,18 @@ static void pf_queue_work_func(struct work_struct *w) static void acc_queue_work_func(struct work_struct *w); +static void pagefault_fini(void *arg) +{ + struct xe_gt *gt = arg; + struct xe_device *xe = gt_to_xe(gt); + + if (!xe->info.has_usm) + return; + + destroy_workqueue(gt->usm.acc_wq); + destroy_workqueue(gt->usm.pf_wq); +} + int xe_gt_pagefault_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -409,10 +421,12 @@ int xe_gt_pagefault_init(struct xe_gt *gt) gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", WQ_UNBOUND | WQ_HIGHPRI, NUM_ACC_QUEUE); - if (!gt->usm.acc_wq) + if (!gt->usm.acc_wq) { + destroy_workqueue(gt->usm.pf_wq); return -ENOMEM; + } - return 0; + return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt); } void xe_gt_pagefault_reset(struct xe_gt *gt) -- cgit From dd3e840a33b57b92812fbec26273b3f0b4eb5ae3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 15 Aug 2024 12:35:22 -0700 Subject: drm/xe: Drop HW fence pointer to HW fence ctx The HW fence ctx objects are not ref counted rather tied to the life of an LRC object. HW fences reference the HW fence ctx, HW fences can outlive LRCs thus resulting in UAF. Drop the HW fence pointer to HW fence ctx rather just store what is needed directly in HW fence. v2: - Fix typo in commit (Ashutosh) - Use snprintf (Ashutosh) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240815193522.16008-1-matthew.brost@intel.com (cherry picked from commit 60db6f540af9f93144d5039140aa2ed17171d168) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_fence.c | 9 +++++---- drivers/gpu/drm/xe/xe_hw_fence_types.h | 7 +++++-- drivers/gpu/drm/xe/xe_trace.h | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 45a9789cf501..0b4f12be3692 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -148,20 +148,20 @@ static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev); + return dev_name(fence->xe->drm.dev); } static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return fence->ctx->name; + return fence->name; } static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - struct xe_device *xe = gt_to_xe(fence->ctx->gt); + struct xe_device *xe = fence->xe; u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); return dma_fence->error || @@ -253,7 +253,8 @@ void xe_hw_fence_init(struct dma_fence *fence, struct xe_hw_fence_ctx *ctx, struct xe_hw_fence *hw_fence = container_of(fence, typeof(*hw_fence), dma); - hw_fence->ctx = ctx; + hw_fence->xe = gt_to_xe(ctx->gt); + snprintf(hw_fence->name, sizeof(hw_fence->name), "%s", ctx->name); hw_fence->seqno_map = seqno_map; INIT_LIST_HEAD(&hw_fence->irq_link); diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h index b33c4956e8ea..364a61f4bfda 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence_types.h +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -12,6 +12,7 @@ #include #include +struct xe_device; struct xe_gt; /** @@ -61,8 +62,10 @@ struct xe_hw_fence_ctx { struct xe_hw_fence { /** @dma: base dma fence for hardware fence context */ struct dma_fence dma; - /** @ctx: hardware fence context */ - struct xe_hw_fence_ctx *ctx; + /** @xe: Xe device for hw fence driver name */ + struct xe_device *xe; + /** @name: name of hardware fence context */ + char name[MAX_FENCE_NAME_LEN]; /** @seqno_map: I/O map for seqno */ struct iosys_map seqno_map; /** @irq_link: Link in struct xe_hw_fence_irq.pending */ diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index baba14fb1e32..01837f6f609f 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_ARGS(fence), TP_STRUCT__entry( - __string(dev, __dev_name_gt(fence->ctx->gt)) + __string(dev, __dev_name_xe(fence->xe)) __field(u64, ctx) __field(u32, seqno) __field(struct xe_hw_fence *, fence) -- cgit From 9e7f30563677fbeff62d368d5d2a5ac7aaa9746a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 13:23:09 -0700 Subject: drm/xe: Free job before xe_exec_queue_put Free job depends on job->vm being valid, the last xe_exec_queue_put can destroy the VM. Prevent UAF by freeing job before xe_exec_queue_put. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Nirmoy Das Reviewed-by: Jagmeet Randhawa Link: https://patchwork.freedesktop.org/patch/msgid/20240820202309.1260755-1-matthew.brost@intel.com (cherry picked from commit 32a42c93b74c8ca6d0915ea3eba21bceff53042f) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sched_job.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 44d534e362cd..9628f9deb3c0 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -171,12 +171,13 @@ void xe_sched_job_destroy(struct kref *ref) struct xe_sched_job *job = container_of(ref, struct xe_sched_job, refcount); struct xe_device *xe = job_to_xe(job); + struct xe_exec_queue *q = job->q; xe_sched_job_free_fences(job); - xe_exec_queue_put(job->q); dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); job_free(job); + xe_exec_queue_put(q); xe_pm_runtime_put(xe); } -- cgit From a069a22f391019e84390f4e8c1a9c531ba4fb28f Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sun, 18 Aug 2024 21:48:03 +0900 Subject: tracing: fgraph: Fix to add new fgraph_ops to array after ftrace_startup_subops() Since the register_ftrace_graph() assigns a new fgraph_ops to fgraph_array before registring it by ftrace_startup_subops(), the new fgraph_ops can be used in function_graph_enter(). In most cases, it is still OK because those fgraph_ops's hashtable is already initialized by ftrace_set_filter*() etc. But if a user registers a new fgraph_ops which does not initialize the hash list, ftrace_ops_test() in function_graph_enter() causes a NULL pointer dereference BUG because fgraph_ops->ops.func_hash is NULL. This can be reproduced by the below commands because function profiler's fgraph_ops does not initialize the hash list; # cd /sys/kernel/tracing # echo function_graph > current_tracer # echo 1 > function_profile_enabled To fix this problem, add a new fgraph_ops to fgraph_array after ftrace_startup_subops(). Thus, until the new fgraph_ops is initialized, we will see fgraph_stub on the corresponding fgraph_array entry. Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Link: https://lore.kernel.org/172398528350.293426.8347220120333730248.stgit@devnote2 Fixes: c132be2c4fcc ("function_graph: Have the instances use their own ftrace_ops for filtering") Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fgraph.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index d1d5ea2d0a1b..d7d4fb403f6f 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1206,18 +1206,24 @@ static void init_task_vars(int idx) read_unlock(&tasklist_lock); } -static void ftrace_graph_enable_direct(bool enable_branch) +static void ftrace_graph_enable_direct(bool enable_branch, struct fgraph_ops *gops) { trace_func_graph_ent_t func = NULL; trace_func_graph_ret_t retfunc = NULL; int i; - for_each_set_bit(i, &fgraph_array_bitmask, - sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) { - func = fgraph_array[i]->entryfunc; - retfunc = fgraph_array[i]->retfunc; - fgraph_direct_gops = fgraph_array[i]; - } + if (gops) { + func = gops->entryfunc; + retfunc = gops->retfunc; + fgraph_direct_gops = gops; + } else { + for_each_set_bit(i, &fgraph_array_bitmask, + sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) { + func = fgraph_array[i]->entryfunc; + retfunc = fgraph_array[i]->retfunc; + fgraph_direct_gops = fgraph_array[i]; + } + } if (WARN_ON_ONCE(!func)) return; @@ -1256,8 +1262,6 @@ int register_ftrace_graph(struct fgraph_ops *gops) ret = -ENOSPC; goto out; } - - fgraph_array[i] = gops; gops->idx = i; ftrace_graph_active++; @@ -1266,7 +1270,7 @@ int register_ftrace_graph(struct fgraph_ops *gops) ftrace_graph_disable_direct(true); if (ftrace_graph_active == 1) { - ftrace_graph_enable_direct(false); + ftrace_graph_enable_direct(false, gops); register_pm_notifier(&ftrace_suspend_notifier); ret = start_graph_tracing(); if (ret) @@ -1281,14 +1285,15 @@ int register_ftrace_graph(struct fgraph_ops *gops) } else { init_task_vars(gops->idx); } - /* Always save the function, and reset at unregistering */ gops->saved_func = gops->entryfunc; ret = ftrace_startup_subops(&graph_ops, &gops->ops, command); + if (!ret) + fgraph_array[i] = gops; + error: if (ret) { - fgraph_array[i] = &fgraph_stub; ftrace_graph_active--; gops->saved_func = NULL; fgraph_lru_release_index(i); @@ -1324,7 +1329,7 @@ void unregister_ftrace_graph(struct fgraph_ops *gops) ftrace_shutdown_subops(&graph_ops, &gops->ops, command); if (ftrace_graph_active == 1) - ftrace_graph_enable_direct(true); + ftrace_graph_enable_direct(true, NULL); else if (!ftrace_graph_active) ftrace_graph_disable_direct(false); -- cgit From 57df60e1f981fa8c288a49012a4bbb02ae0ecdbc Mon Sep 17 00:00:00 2001 From: Yang Ruibin <11162571@vivo.com> Date: Wed, 21 Aug 2024 03:59:33 -0400 Subject: thermal/debugfs: Fix the NULL vs IS_ERR() confusion in debugfs_create_dir() The debugfs_create_dir() return value is never NULL, it is either a valid pointer or an error one. Use IS_ERR() to check it. Fixes: 7ef01f228c9f ("thermal/debugfs: Add thermal debugfs information for mitigation episodes") Fixes: 755113d76786 ("thermal/debugfs: Add thermal cooling device debugfs information") Signed-off-by: Yang Ruibin <11162571@vivo.com> Link: https://patch.msgid.link/20240821075934.12145-1-11162571@vivo.com [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c index 7dd67bf48571..939d3e5f1817 100644 --- a/drivers/thermal/thermal_debugfs.c +++ b/drivers/thermal/thermal_debugfs.c @@ -178,11 +178,11 @@ struct thermal_debugfs { void thermal_debug_init(void) { d_root = debugfs_create_dir("thermal", NULL); - if (!d_root) + if (IS_ERR(d_root)) return; d_cdev = debugfs_create_dir("cooling_devices", d_root); - if (!d_cdev) + if (IS_ERR(d_cdev)) return; d_tz = debugfs_create_dir("thermal_zones", d_root); @@ -202,7 +202,7 @@ static struct thermal_debugfs *thermal_debugfs_add_id(struct dentry *d, int id) snprintf(ids, IDSLENGTH, "%d", id); thermal_dbg->d_top = debugfs_create_dir(ids, d); - if (!thermal_dbg->d_top) { + if (IS_ERR(thermal_dbg->d_top)) { kfree(thermal_dbg); return NULL; } -- cgit From bc754cc76d1bbc87be5d8b7eee05ceb0ae613bce Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 20 Aug 2024 09:56:38 +0900 Subject: tracing: Fix memory leak in fgraph storage selftest With ftrace boot-time selftest, kmemleak reported some memory leaks in the new test case for function graph storage for multiple tracers. unreferenced object 0xffff888005060080 (size 32): comm "swapper/0", pid 1, jiffies 4294676440 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 20 10 06 05 80 88 ff ff ........ ....... 54 0c 1e 81 ff ff ff ff 00 00 00 00 00 00 00 00 T............... backtrace (crc 7c93416c): [<000000000238ee6f>] __kmalloc_cache_noprof+0x11f/0x2a0 [<0000000033d2b6c5>] enter_record+0xe8/0x150 [<0000000054c38424>] match_records+0x1cd/0x230 [<00000000c775b63d>] ftrace_set_hash+0xff/0x380 [<000000007bf7208c>] ftrace_set_filter+0x70/0x90 [<00000000a5c08dda>] test_graph_storage_multi+0x2e/0xf0 [<000000006ba028ca>] trace_selftest_startup_function_graph+0x1e8/0x260 [<00000000a715d3eb>] run_tracer_selftest+0x111/0x190 [<00000000395cbf90>] register_tracer+0xdf/0x1f0 [<0000000093e67f7b>] do_one_initcall+0x141/0x3b0 [<00000000c591b682>] do_initcall_level+0x82/0xa0 [<000000004e4c6600>] do_initcalls+0x43/0x70 [<0000000034f3c4e4>] kernel_init_freeable+0x170/0x1f0 [<00000000c7a5dab2>] kernel_init+0x1a/0x1a0 [<00000000ea105947>] ret_from_fork+0x3a/0x50 [<00000000a1932e84>] ret_from_fork_asm+0x1a/0x30 ... This means filter hash allocated for the fixtures are not correctly released after the test. Free those hash lists after tests are done and split the loop for initialize fixture and register fixture for rollback. Fixes: dd120af2d5f8 ("ftrace: Add multiple fgraph storage selftest") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/172411539857.28895.13119957560263401102.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_selftest.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 97f1e4bc47dc..c4ad7cd7e778 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -942,7 +942,7 @@ static __init int test_graph_storage_multi(void) { struct fgraph_fixture *fixture; bool printed = false; - int i, ret; + int i, j, ret; pr_cont("PASSED\n"); pr_info("Testing multiple fgraph storage on a function: "); @@ -953,22 +953,35 @@ static __init int test_graph_storage_multi(void) if (ret && ret != -ENODEV) { pr_cont("*Could not set filter* "); printed = true; - goto out; + goto out2; } + } + for (j = 0; j < ARRAY_SIZE(store_bytes); j++) { + fixture = &store_bytes[j]; ret = register_ftrace_graph(&fixture->gops); if (ret) { pr_warn("Failed to init store_bytes fgraph tracing\n"); printed = true; - goto out; + goto out1; } } DYN_FTRACE_TEST_NAME(); -out: +out1: + while (--j >= 0) { + fixture = &store_bytes[j]; + unregister_ftrace_graph(&fixture->gops); + + if (fixture->error_str && !printed) { + pr_cont("*** %s ***", fixture->error_str); + printed = true; + } + } +out2: while (--i >= 0) { fixture = &store_bytes[i]; - unregister_ftrace_graph(&fixture->gops); + ftrace_free_filter(&fixture->gops.ops); if (fixture->error_str && !printed) { pr_cont("*** %s ***", fixture->error_str); -- cgit From 92764e8822d4e7f8efb5ad959fac195a7f8ea0c6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 14 Aug 2024 21:38:21 +0100 Subject: netfs, ceph: Partially revert "netfs: Replace PG_fscache by setting folio->private and marking dirty" This partially reverts commit 2ff1e97587f4d398686f52c07afde3faf3da4e5c. In addition to reverting the removal of PG_private_2 wrangling from the buffered read code[1][2], the removal of the waits for PG_private_2 from netfs_release_folio() and netfs_invalidate_folio() need reverting too. It also adds a wait into ceph_evict_inode() to wait for netfs read and copy-to-cache ops to complete. Fixes: 2ff1e97587f4 ("netfs: Replace PG_fscache by setting folio->private and marking dirty") Signed-off-by: David Howells Link: https://lore.kernel.org/r/3575457.1722355300@warthog.procyon.org.uk [1] Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8e5ced7804cb9184c4a23f8054551240562a8eda [2] Link: https://lore.kernel.org/r/20240814203850.2240469-2-dhowells@redhat.com cc: Max Kellermann cc: Ilya Dryomov cc: Xiubo Li cc: Jeff Layton cc: Matthew Wilcox cc: ceph-devel@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Christian Brauner --- fs/ceph/inode.c | 1 + fs/netfs/misc.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 71cd70514efa..4a8eec46254b 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -695,6 +695,7 @@ void ceph_evict_inode(struct inode *inode) percpu_counter_dec(&mdsc->metric.total_inodes); + netfs_wait_for_outstanding_io(inode); truncate_inode_pages_final(&inode->i_data); if (inode->i_state & I_PINNING_NETFS_WB) ceph_fscache_unuse_cookie(inode, true); diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 83e644bd518f..554a1a4615ad 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -101,6 +101,8 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) _enter("{%lx},%zx,%zx", folio->index, offset, length); + folio_wait_private_2(folio); /* [DEPRECATED] */ + if (!folio_test_private(folio)) return; @@ -165,6 +167,11 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp) if (folio_test_private(folio)) return false; + if (unlikely(folio_test_private_2(folio))) { /* [DEPRECATED] */ + if (current_is_kswapd() || !(gfp & __GFP_FS)) + return false; + folio_wait_private_2(folio); + } fscache_note_page_release(netfs_i_cookie(ctx)); return true; } -- cgit From 524b2c6dc80d735be9ebcd2decffe2889baab65d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 15 Aug 2024 14:39:33 +0200 Subject: romfs: fix romfs_read_folio() Add the correct offset to folio_zero_tail(). Fixes: d86f2de026c5 ("romfs: Convert romfs_read_folio() to use a folio") Reported-by: Greg Ungerer Link: https://lore.kernel.org/r/Zr0GTnPHfeA0P8nb@casper.infradead.org Signed-off-by: Christian Brauner --- fs/romfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 68758b6fed94..0addcc849ff2 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -126,7 +126,7 @@ static int romfs_read_folio(struct file *file, struct folio *folio) } } - buf = folio_zero_tail(folio, fillsize, buf); + buf = folio_zero_tail(folio, fillsize, buf + fillsize); kunmap_local(buf); folio_end_read(folio, ret == 0); return ret; -- cgit From 232590ea7fc125986a526e03081b98e5783f70d2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 19 Aug 2024 10:38:23 +0200 Subject: Revert "pidfd: prevent creation of pidfds for kthreads" This reverts commit 3b5bbe798b2451820e74243b738268f51901e7d0. Eric reported that systemd-shutdown gets broken by blocking the creating of pidfds for kthreads as older versions seems to rely on being able to create a pidfd for any process in /proc. Reported-by: Eric Biggers Link: https://lore.kernel.org/r/20240818035818.GA1929@sol.localdomain Signed-off-by: Christian Brauner --- kernel/fork.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 18bdc87209d0..cc760491f201 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2053,23 +2053,10 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re */ int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret) { - if (!pid) - return -EINVAL; - - scoped_guard(rcu) { - struct task_struct *tsk; - - if (flags & PIDFD_THREAD) - tsk = pid_task(pid, PIDTYPE_PID); - else - tsk = pid_task(pid, PIDTYPE_TGID); - if (!tsk) - return -EINVAL; + bool thread = flags & PIDFD_THREAD; - /* Don't create pidfds for kernel threads for now. */ - if (tsk->flags & PF_KTHREAD) - return -EINVAL; - } + if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID)) + return -EINVAL; return __pidfd_prepare(pid, flags, ret); } @@ -2416,12 +2403,6 @@ __latent_entropy struct task_struct *copy_process( if (clone_flags & CLONE_PIDFD) { int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0; - /* Don't create pidfds for kernel threads for now. */ - if (args->kthread) { - retval = -EINVAL; - goto bad_fork_free_pid; - } - /* Note that no task has been attached to @pid yet. */ retval = __pidfd_prepare(pid, flags, &pidfile); if (retval < 0) -- cgit From 5d6a6c7454ebaefba518e334750b05700131923b Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 8 Aug 2024 12:00:57 +0530 Subject: PCI: qcom-ep: Disable MHI RAM data parity error interrupt for SA8775P SoC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SA8775P SoC has support for the hardware parity check feature on the MHI RAM (entity that holds MHI registers, etc.) But due to a hardware bug in the parity check logic, the data parity error interrupt is getting generated all the time when using MHI. So the hardware team has suggested disabling the parity check error to work around the hardware bug. Mask the parity error interrupt in PARF_INT_ALL_5_MASK register. Fixes: 58d0d3e032b3 ("PCI: qcom-ep: Add support for SA8775P SOC") Link: https://lore.kernel.org/linux-pci/20240808063057.7394-1-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/dwc/pcie-qcom-ep.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c index 236229f66c80..a9b263f749b6 100644 --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c @@ -58,6 +58,7 @@ #define PARF_DEBUG_CNT_AUX_CLK_IN_L1SUB_L2 0xc88 #define PARF_DEVICE_TYPE 0x1000 #define PARF_BDF_TO_SID_CFG 0x2c00 +#define PARF_INT_ALL_5_MASK 0x2dcc /* PARF_INT_ALL_{STATUS/CLEAR/MASK} register fields */ #define PARF_INT_ALL_LINK_DOWN BIT(1) @@ -127,6 +128,9 @@ /* PARF_CFG_BITS register fields */ #define PARF_CFG_BITS_REQ_EXIT_L1SS_MSI_LTR_EN BIT(1) +/* PARF_INT_ALL_5_MASK fields */ +#define PARF_INT_ALL_5_MHI_RAM_DATA_PARITY_ERR BIT(0) + /* ELBI registers */ #define ELBI_SYS_STTS 0x08 #define ELBI_CS2_ENABLE 0xa4 @@ -158,10 +162,12 @@ enum qcom_pcie_ep_link_status { * struct qcom_pcie_ep_cfg - Per SoC config struct * @hdma_support: HDMA support on this SoC * @override_no_snoop: Override NO_SNOOP attribute in TLP to enable cache snooping + * @disable_mhi_ram_parity_check: Disable MHI RAM data parity error check */ struct qcom_pcie_ep_cfg { bool hdma_support; bool override_no_snoop; + bool disable_mhi_ram_parity_check; }; /** @@ -480,6 +486,12 @@ static int qcom_pcie_perst_deassert(struct dw_pcie *pci) PARF_INT_ALL_LINK_UP | PARF_INT_ALL_EDMA; writel_relaxed(val, pcie_ep->parf + PARF_INT_ALL_MASK); + if (pcie_ep->cfg && pcie_ep->cfg->disable_mhi_ram_parity_check) { + val = readl_relaxed(pcie_ep->parf + PARF_INT_ALL_5_MASK); + val &= ~PARF_INT_ALL_5_MHI_RAM_DATA_PARITY_ERR; + writel_relaxed(val, pcie_ep->parf + PARF_INT_ALL_5_MASK); + } + ret = dw_pcie_ep_init_registers(&pcie_ep->pci.ep); if (ret) { dev_err(dev, "Failed to complete initialization: %d\n", ret); @@ -901,6 +913,7 @@ static void qcom_pcie_ep_remove(struct platform_device *pdev) static const struct qcom_pcie_ep_cfg cfg_1_34_0 = { .hdma_support = true, .override_no_snoop = true, + .disable_mhi_ram_parity_check = true, }; static const struct of_device_id qcom_pcie_ep_match[] = { -- cgit From db1ec60fba4a995975dc1dc837b408db0d666801 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 22 Jul 2024 18:41:28 +0530 Subject: PCI: qcom: Use OPP only if the platform supports it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With commit 5b6272e0efd5 ("PCI: qcom: Add OPP support to scale performance"), OPP was used to control the interconnect and power domains if the platform supported OPP. Also to maintain the backward compatibility with platforms not supporting OPP but just ICC, the above mentioned commit assumed that if ICC was not available on the platform, it would resort to OPP. Unfortunately, some old platforms don't support either ICC or OPP. On those platforms, resorting to OPP in the absence of ICC throws below errors from OPP core during suspend and resume: qcom-pcie 1c08000.pcie: dev_pm_opp_set_opp: device opp doesn't exist qcom-pcie 1c08000.pcie: _find_key: OPP table not found (-19) Also, it doesn't make sense to invoke the OPP APIs when OPP is not supported by the platform at all. Add a "use_pm_opp" flag to identify whether OPP is supported and use it to control invoking the OPP APIs. Fixes: 5b6272e0efd5 ("PCI: qcom: Add OPP support to scale performance") Link: https://lore.kernel.org/linux-pci/20240722131128.32470-1-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Mayank Rana --- drivers/pci/controller/dwc/pcie-qcom.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 0180edf3310e..6f953e32d990 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -261,6 +261,7 @@ struct qcom_pcie { const struct qcom_pcie_cfg *cfg; struct dentry *debugfs; bool suspended; + bool use_pm_opp; }; #define to_qcom_pcie(x) dev_get_drvdata((x)->dev) @@ -1433,7 +1434,7 @@ static void qcom_pcie_icc_opp_update(struct qcom_pcie *pcie) dev_err(pci->dev, "Failed to set bandwidth for PCIe-MEM interconnect path: %d\n", ret); } - } else { + } else if (pcie->use_pm_opp) { freq_mbps = pcie_dev_speed_mbps(pcie_link_speed[speed]); if (freq_mbps < 0) return; @@ -1592,6 +1593,8 @@ static int qcom_pcie_probe(struct platform_device *pdev) max_freq); goto err_pm_runtime_put; } + + pcie->use_pm_opp = true; } else { /* Skip ICC init if OPP is supported as it is handled by OPP */ ret = qcom_pcie_icc_init(pcie); @@ -1683,7 +1686,7 @@ static int qcom_pcie_suspend_noirq(struct device *dev) if (ret) dev_err(dev, "Failed to disable CPU-PCIe interconnect path: %d\n", ret); - if (!pcie->icc_mem) + if (pcie->use_pm_opp) dev_pm_opp_set_opp(pcie->pci->dev, NULL); } return ret; -- cgit From b128ed5ab27330deeeaf51ea8bb69f1442a96f7f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 19 Aug 2024 17:06:21 +0200 Subject: udp: fix receiving fraglist GSO packets When assembling fraglist GSO packets, udp4_gro_complete does not set skb->csum_start, which makes the extra validation in __udp_gso_segment fail. Fixes: 89add40066f9 ("net: drop bad gso csum_start and offset in virtio_net_hdr") Signed-off-by: Felix Fietkau Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20240819150621.59833-1-nbd@nbd.name Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b254a5dadfcf..d842303587af 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -279,7 +279,8 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, return ERR_PTR(-EINVAL); if (unlikely(skb_checksum_start(gso_skb) != - skb_transport_header(gso_skb))) + skb_transport_header(gso_skb) && + !(skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST))) return ERR_PTR(-EINVAL); /* We don't know if egress device can segment and checksum the packet -- cgit From f8669d7b5f5d2d88959456ae9123d8bb6fdc1ebe Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 20 Aug 2024 12:53:47 +0200 Subject: selftests: mlxsw: ethtool_lanes: Source ethtool lib from correct path Source the ethtool library from the correct path and avoid the following error: ./ethtool_lanes.sh: line 14: ./../../../net/forwarding/ethtool_lib.sh: No such file or directory Fixes: 40d269c000bd ("selftests: forwarding: Move several selftests") Signed-off-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://patch.msgid.link/2112faff02e536e1ac14beb4c2be09c9574b90ae.1724150067.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh index 877cd6df94a1..fe905a7f34b3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh @@ -2,6 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 lib_dir=$(dirname $0)/../../../net/forwarding +ethtool_lib_dir=$(dirname $0)/../hw ALL_TESTS=" autoneg @@ -11,7 +12,7 @@ ALL_TESTS=" NUM_NETIFS=2 : ${TIMEOUT:=30000} # ms source $lib_dir/lib.sh -source $lib_dir/ethtool_lib.sh +source $ethtool_lib_dir/ethtool_lib.sh setup_prepare() { -- cgit From 007d4271a5f10638cba6f0b99698557ef30014b9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Aug 2024 16:20:53 +0000 Subject: netpoll: do not export netpoll_poll_[disable|enable]() netpoll_poll_disable() and netpoll_poll_enable() are only used from core networking code, there is no need to export them. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240820162053.3870927-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 55bcacf67df3..d657b042d5a0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -228,7 +228,6 @@ void netpoll_poll_disable(struct net_device *dev) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); } -EXPORT_SYMBOL(netpoll_poll_disable); void netpoll_poll_enable(struct net_device *dev) { @@ -239,7 +238,6 @@ void netpoll_poll_enable(struct net_device *dev) up(&ni->dev_lock); rcu_read_unlock(); } -EXPORT_SYMBOL(netpoll_poll_enable); static void refill_skbs(void) { -- cgit From faa389b2fbaaec7fd27a390b4896139f9da662e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Aug 2024 16:08:57 +0000 Subject: ipv6: prevent UAF in ip6_send_skb() syzbot reported an UAF in ip6_send_skb() [1] After ip6_local_out() has returned, we no longer can safely dereference rt, unless we hold rcu_read_lock(). A similar issue has been fixed in commit a688caa34beb ("ipv6: take rcu lock in rawv6_send_hdrinc()") Another potential issue in ip6_finish_output2() is handled in a separate patch. [1] BUG: KASAN: slab-use-after-free in ip6_send_skb+0x18d/0x230 net/ipv6/ip6_output.c:1964 Read of size 8 at addr ffff88806dde4858 by task syz.1.380/6530 CPU: 1 UID: 0 PID: 6530 Comm: syz.1.380 Not tainted 6.11.0-rc3-syzkaller-00306-gdf6cbc62cc9b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 Call Trace: __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:119 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 ip6_send_skb+0x18d/0x230 net/ipv6/ip6_output.c:1964 rawv6_push_pending_frames+0x75c/0x9e0 net/ipv6/raw.c:588 rawv6_sendmsg+0x19c7/0x23c0 net/ipv6/raw.c:926 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x1a6/0x270 net/socket.c:745 sock_write_iter+0x2dd/0x400 net/socket.c:1160 do_iter_readv_writev+0x60a/0x890 vfs_writev+0x37c/0xbb0 fs/read_write.c:971 do_writev+0x1b1/0x350 fs/read_write.c:1018 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f936bf79e79 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f936cd7f038 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 00007f936c115f80 RCX: 00007f936bf79e79 RDX: 0000000000000001 RSI: 0000000020000040 RDI: 0000000000000004 RBP: 00007f936bfe7916 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007f936c115f80 R15: 00007fff2860a7a8 Allocated by task 6530: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 unpoison_slab_object mm/kasan/common.c:312 [inline] __kasan_slab_alloc+0x66/0x80 mm/kasan/common.c:338 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slub.c:3988 [inline] slab_alloc_node mm/slub.c:4037 [inline] kmem_cache_alloc_noprof+0x135/0x2a0 mm/slub.c:4044 dst_alloc+0x12b/0x190 net/core/dst.c:89 ip6_blackhole_route+0x59/0x340 net/ipv6/route.c:2670 make_blackhole net/xfrm/xfrm_policy.c:3120 [inline] xfrm_lookup_route+0xd1/0x1c0 net/xfrm/xfrm_policy.c:3313 ip6_dst_lookup_flow+0x13e/0x180 net/ipv6/ip6_output.c:1257 rawv6_sendmsg+0x1283/0x23c0 net/ipv6/raw.c:898 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x1a6/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2597 ___sys_sendmsg net/socket.c:2651 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2680 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 45: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x40/0x50 mm/kasan/generic.c:579 poison_slab_object+0xe0/0x150 mm/kasan/common.c:240 __kasan_slab_free+0x37/0x60 mm/kasan/common.c:256 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2252 [inline] slab_free mm/slub.c:4473 [inline] kmem_cache_free+0x145/0x350 mm/slub.c:4548 dst_destroy+0x2ac/0x460 net/core/dst.c:124 rcu_do_batch kernel/rcu/tree.c:2569 [inline] rcu_core+0xafd/0x1830 kernel/rcu/tree.c:2843 handle_softirqs+0x2c4/0x970 kernel/softirq.c:554 __do_softirq kernel/softirq.c:588 [inline] invoke_softirq kernel/softirq.c:428 [inline] __irq_exit_rcu+0xf4/0x1c0 kernel/softirq.c:637 irq_exit_rcu+0x9/0x30 kernel/softirq.c:649 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1043 [inline] sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1043 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702 Last potentially related work creation: kasan_save_stack+0x3f/0x60 mm/kasan/common.c:47 __kasan_record_aux_stack+0xac/0xc0 mm/kasan/generic.c:541 __call_rcu_common kernel/rcu/tree.c:3106 [inline] call_rcu+0x167/0xa70 kernel/rcu/tree.c:3210 refdst_drop include/net/dst.h:263 [inline] skb_dst_drop include/net/dst.h:275 [inline] nf_ct_frag6_queue net/ipv6/netfilter/nf_conntrack_reasm.c:306 [inline] nf_ct_frag6_gather+0xb9a/0x2080 net/ipv6/netfilter/nf_conntrack_reasm.c:485 ipv6_defrag+0x2c8/0x3c0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:67 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xc3/0x220 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] __ip6_local_out+0x6fa/0x800 net/ipv6/output_core.c:143 ip6_local_out+0x26/0x70 net/ipv6/output_core.c:153 ip6_send_skb+0x112/0x230 net/ipv6/ip6_output.c:1959 rawv6_push_pending_frames+0x75c/0x9e0 net/ipv6/raw.c:588 rawv6_sendmsg+0x19c7/0x23c0 net/ipv6/raw.c:926 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x1a6/0x270 net/socket.c:745 sock_write_iter+0x2dd/0x400 net/socket.c:1160 do_iter_readv_writev+0x60a/0x890 Fixes: 0625491493d9 ("ipv6: ip6_push_pending_frames() should increment IPSTATS_MIB_OUTDISCARDS") Signed-off-by: Eric Dumazet Reported-by: syzbot Reviewed-by: David Ahern Link: https://patch.msgid.link/20240820160859.3786976-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ab504d31f0cd..f7b53effc80f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1956,6 +1956,7 @@ int ip6_send_skb(struct sk_buff *skb) struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); int err; + rcu_read_lock(); err = ip6_local_out(net, skb->sk, skb); if (err) { if (err > 0) @@ -1965,6 +1966,7 @@ int ip6_send_skb(struct sk_buff *skb) IPSTATS_MIB_OUTDISCARDS); } + rcu_read_unlock(); return err; } -- cgit From da273b377ae0d9bd255281ed3c2adb228321687b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Aug 2024 16:08:58 +0000 Subject: ipv6: fix possible UAF in ip6_finish_output2() If skb_expand_head() returns NULL, skb has been freed and associated dst/idev could also have been freed. We need to hold rcu_read_lock() to make sure the dst and associated idev are alive. Fixes: 5796015fa968 ("ipv6: allocate enough headroom in ip6_finish_output2()") Signed-off-by: Eric Dumazet Cc: Vasily Averin Reviewed-by: David Ahern Link: https://patch.msgid.link/20240820160859.3786976-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f7b53effc80f..1b9ebee7308f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -70,11 +70,15 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * /* Be paranoid, rather than too clever. */ if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { + /* Make sure idev stays alive */ + rcu_read_lock(); skb = skb_expand_head(skb, hh_len); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); return -ENOMEM; } + rcu_read_unlock(); } hdr = ipv6_hdr(skb); -- cgit From 2d5ff7e339d04622d8282661df36151906d0e1c7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Aug 2024 16:08:59 +0000 Subject: ipv6: prevent possible UAF in ip6_xmit() If skb_expand_head() returns NULL, skb has been freed and the associated dst/idev could also have been freed. We must use rcu_read_lock() to prevent a possible UAF. Fixes: 0c9f227bee11 ("ipv6: use skb_expand_head in ip6_xmit") Signed-off-by: Eric Dumazet Cc: Vasily Averin Reviewed-by: David Ahern Link: https://patch.msgid.link/20240820160859.3786976-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1b9ebee7308f..f26841f1490f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -287,11 +287,15 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, head_room += opt->opt_nflen + opt->opt_flen; if (unlikely(head_room > skb_headroom(skb))) { + /* Make sure idev stays alive */ + rcu_read_lock(); skb = skb_expand_head(skb, head_room); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); return -ENOBUFS; } + rcu_read_unlock(); } if (opt) { -- cgit From 8baeef7616d5194045c5a6b97fd1246b87c55b13 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Tue, 20 Aug 2024 13:34:15 -0700 Subject: bnxt_en: Fix double DMA unmapping for XDP_REDIRECT Remove the dma_unmap_page_attrs() call in the driver's XDP_REDIRECT code path. This should have been removed when we let the page pool handle the DMA mapping. This bug causes the warning: WARNING: CPU: 7 PID: 59 at drivers/iommu/dma-iommu.c:1198 iommu_dma_unmap_page+0xd5/0x100 CPU: 7 PID: 59 Comm: ksoftirqd/7 Tainted: G W 6.8.0-1010-gcp #11-Ubuntu Hardware name: Dell Inc. PowerEdge R7525/0PYVT1, BIOS 2.15.2 04/02/2024 RIP: 0010:iommu_dma_unmap_page+0xd5/0x100 Code: 89 ee 48 89 df e8 cb f2 69 ff 48 83 c4 08 5b 41 5c 41 5d 41 5e 41 5f 5d 31 c0 31 d2 31 c9 31 f6 31 ff 45 31 c0 e9 ab 17 71 00 <0f> 0b 48 83 c4 08 5b 41 5c 41 5d 41 5e 41 5f 5d 31 c0 31 d2 31 c9 RSP: 0018:ffffab1fc0597a48 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff99ff838280c8 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffab1fc0597a78 R08: 0000000000000002 R09: ffffab1fc0597c1c R10: ffffab1fc0597cd3 R11: ffff99ffe375acd8 R12: 00000000e65b9000 R13: 0000000000000050 R14: 0000000000001000 R15: 0000000000000002 FS: 0000000000000000(0000) GS:ffff9a06efb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000565c34c37210 CR3: 00000005c7e3e000 CR4: 0000000000350ef0 ? show_regs+0x6d/0x80 ? __warn+0x89/0x150 ? iommu_dma_unmap_page+0xd5/0x100 ? report_bug+0x16a/0x190 ? handle_bug+0x51/0xa0 ? exc_invalid_op+0x18/0x80 ? iommu_dma_unmap_page+0xd5/0x100 ? iommu_dma_unmap_page+0x35/0x100 dma_unmap_page_attrs+0x55/0x220 ? bpf_prog_4d7e87c0d30db711_xdp_dispatcher+0x64/0x9f bnxt_rx_xdp+0x237/0x520 [bnxt_en] bnxt_rx_pkt+0x640/0xdd0 [bnxt_en] __bnxt_poll_work+0x1a1/0x3d0 [bnxt_en] bnxt_poll+0xaa/0x1e0 [bnxt_en] __napi_poll+0x33/0x1e0 net_rx_action+0x18a/0x2f0 Fixes: 578fcfd26e2a ("bnxt_en: Let the page pool manage the DMA mapping") Reviewed-by: Andy Gospodarek Reviewed-by: Kalesh AP Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20240820203415.168178-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 345681d5007e..f88b641533fc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -297,11 +297,6 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, * redirect is coming from a frame received by the * bnxt_en driver. */ - rx_buf = &rxr->rx_buf_ring[cons]; - mapping = rx_buf->mapping - bp->rx_dma_offset; - dma_unmap_page_attrs(&pdev->dev, mapping, - BNXT_RX_PAGE_SIZE, bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); /* if we are unable to allocate a new buffer, abort and reuse */ if (bnxt_alloc_rx_data(bp, rxr, rxr->rx_prod, GFP_ATOMIC)) { -- cgit From 3f53d050416e88122d53aabbadb1fede998004da Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 12:22:23 -0400 Subject: bcachefs: bch2_data_update_init() cleanup Factor out some helpers - this function has gotten much too big. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 179 ++++++++++++++++++++++++++-------------------- 1 file changed, 101 insertions(+), 78 deletions(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 1ca628e93e87..5f49f4953b19 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -20,6 +20,76 @@ #include "subvolume.h" #include "trace.h" +static void bkey_put_dev_refs(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) + bch2_dev_put(bch2_dev_have_ref(c, ptr->dev)); +} + +static bool bkey_get_dev_refs(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) { + if (!bch2_dev_tryget(c, ptr->dev)) { + bkey_for_each_ptr(ptrs, ptr2) { + if (ptr2 == ptr) + break; + bch2_dev_put(bch2_dev_have_ref(c, ptr2->dev)); + } + return false; + } + } + return true; +} + +static void bkey_nocow_unlock(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); + struct bpos bucket = PTR_BUCKET_POS(ca, ptr); + + bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0); + } +} + +static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); + struct bpos bucket = PTR_BUCKET_POS(ca, ptr); + + if (ctxt) { + bool locked; + + move_ctxt_wait_event(ctxt, + (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) || + list_empty(&ctxt->ios)); + + if (!locked) + bch2_bucket_nocow_lock(&c->nocow_locks, bucket, 0); + } else { + if (!bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) { + bkey_for_each_ptr(ptrs, ptr2) { + if (ptr2 == ptr) + break; + + bucket = PTR_BUCKET_POS(ca, ptr2); + bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0); + } + return false; + } + } + } + return true; +} + static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k) { if (trace_move_extent_finish_enabled()) { @@ -355,17 +425,11 @@ void bch2_data_update_read_done(struct data_update *m, void bch2_data_update_exit(struct data_update *update) { struct bch_fs *c = update->op.c; - struct bkey_ptrs_c ptrs = - bch2_bkey_ptrs_c(bkey_i_to_s_c(update->k.k)); - - bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); - if (c->opts.nocow_enabled) - bch2_bucket_nocow_unlock(&c->nocow_locks, - PTR_BUCKET_POS(ca, ptr), 0); - bch2_dev_put(ca); - } + struct bkey_s_c k = bkey_i_to_s_c(update->k.k); + if (c->opts.nocow_enabled) + bkey_nocow_unlock(c, k); + bkey_put_dev_refs(c, k); bch2_bkey_buf_exit(&update->k, c); bch2_disk_reservation_put(c, &update->op.res); bch2_bio_free_pages_pool(c, &update->op.wbio.bio); @@ -546,7 +610,6 @@ int bch2_data_update_init(struct btree_trans *trans, const union bch_extent_entry *entry; struct extent_ptr_decoded p; unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; - unsigned ptrs_locked = 0; int ret = 0; /* @@ -557,6 +620,15 @@ int bch2_data_update_init(struct btree_trans *trans, if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot))) return -BCH_ERR_data_update_done; + if (!bkey_get_dev_refs(c, k)) + return -BCH_ERR_data_update_done; + + if (c->opts.nocow_enabled && + !bkey_nocow_lock(c, ctxt, k)) { + bkey_put_dev_refs(c, k); + return -BCH_ERR_nocow_lock_blocked; + } + bch2_bkey_buf_init(&m->k); bch2_bkey_buf_reassemble(&m->k, c, k); m->btree_id = btree_id; @@ -578,40 +650,24 @@ int bch2_data_update_init(struct btree_trans *trans, m->op.compression_opt = background_compression(io_opts); m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK; - bkey_for_each_ptr(ptrs, ptr) { - if (!bch2_dev_tryget(c, ptr->dev)) { - bkey_for_each_ptr(ptrs, ptr2) { - if (ptr2 == ptr) - break; - bch2_dev_put(bch2_dev_have_ref(c, ptr2->dev)); - } - return -BCH_ERR_data_update_done; - } - } - unsigned durability_have = 0, durability_removing = 0; i = 0; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - struct bch_dev *ca = bch2_dev_have_ref(c, p.ptr.dev); - struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); - bool locked; - - rcu_read_lock(); - if (((1U << i) & m->data_opts.rewrite_ptrs)) { - BUG_ON(p.ptr.cached); - - if (crc_is_compressed(p.crc)) - reserve_sectors += k.k->size; - - m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); - durability_removing += bch2_extent_ptr_desired_durability(c, &p); - } else if (!p.ptr.cached && - !((1U << i) & m->data_opts.kill_ptrs)) { - bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); - durability_have += bch2_extent_ptr_durability(c, &p); + if (!p.ptr.cached) { + rcu_read_lock(); + if (BIT(i) & m->data_opts.rewrite_ptrs) { + if (crc_is_compressed(p.crc)) + reserve_sectors += k.k->size; + + m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); + durability_removing += bch2_extent_ptr_desired_durability(c, &p); + } else if (!(BIT(i) & m->data_opts.kill_ptrs)) { + bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); + durability_have += bch2_extent_ptr_durability(c, &p); + } + rcu_read_unlock(); } - rcu_read_unlock(); /* * op->csum_type is normally initialized from the fs/file's @@ -626,24 +682,6 @@ int bch2_data_update_init(struct btree_trans *trans, if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) m->op.incompressible = true; - if (c->opts.nocow_enabled) { - if (ctxt) { - move_ctxt_wait_event(ctxt, - (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, - bucket, 0)) || - list_empty(&ctxt->ios)); - - if (!locked) - bch2_bucket_nocow_lock(&c->nocow_locks, bucket, 0); - } else { - if (!bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) { - ret = -BCH_ERR_nocow_lock_blocked; - goto err; - } - } - ptrs_locked |= (1U << i); - } - i++; } @@ -664,7 +702,7 @@ int bch2_data_update_init(struct btree_trans *trans, /* if iter == NULL, it's just a promote */ if (iter) ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts); - goto done; + goto out; } m->op.nr_replicas = min(durability_removing, durability_required) + @@ -684,8 +722,7 @@ int bch2_data_update_init(struct btree_trans *trans, bch2_data_update_to_text(&buf, m); WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf); printbuf_exit(&buf); - ret = -BCH_ERR_data_update_done; - goto done; + goto out; } m->op.nr_replicas_required = m->op.nr_replicas; @@ -696,30 +733,16 @@ int bch2_data_update_init(struct btree_trans *trans, ? 0 : BCH_DISK_RESERVATION_NOFAIL); if (ret) - goto err; + goto out; } if (bkey_extent_is_unwritten(k)) { bch2_update_unwritten_extent(trans, m); - goto done; + goto out; } return 0; -err: - i = 0; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - struct bch_dev *ca = bch2_dev_have_ref(c, p.ptr.dev); - struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); - if ((1U << i) & ptrs_locked) - bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0); - bch2_dev_put(ca); - i++; - } - - bch2_bkey_buf_exit(&m->k, c); - bch2_bio_free_pages_pool(c, &m->op.wbio.bio); - return ret; -done: +out: bch2_data_update_exit(m); return ret ?: -BCH_ERR_data_update_done; } -- cgit From 8cc0e50614520c6c609c6ae32a65d0591b7865a1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 13:13:39 -0400 Subject: bcachefs: Fix "trying to move an extent, but nr_replicas=0" data_update_init() does a bunch of complicated stuff to decide how many replicas to add, since we only want to increase an extent's durability on an explicit rereplicate, but extent pointers may be on devices with different durability settings. There was a corner case when evacuating a device that had been set to durability=0 after data had been written to it, and extents on that device had already been rereplicated - then evacuate only needs to drop pointers on that device, not move them. So the assert for !m->op.nr_replicas was spurious; this was a perfectly legitimate case that needed to be handled. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 5f49f4953b19..65176d51b502 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -695,16 +695,6 @@ int bch2_data_update_init(struct btree_trans *trans, * Increasing replication is an explicit operation triggered by * rereplicate, currently, so that users don't get an unexpected -ENOSPC */ - if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) && - !durability_required) { - m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; - m->data_opts.rewrite_ptrs = 0; - /* if iter == NULL, it's just a promote */ - if (iter) - ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts); - goto out; - } - m->op.nr_replicas = min(durability_removing, durability_required) + m->data_opts.extra_replicas; @@ -716,17 +706,22 @@ int bch2_data_update_init(struct btree_trans *trans, if (!(durability_have + durability_removing)) m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1); - if (!m->op.nr_replicas) { - struct printbuf buf = PRINTBUF; + m->op.nr_replicas_required = m->op.nr_replicas; - bch2_data_update_to_text(&buf, m); - WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf); - printbuf_exit(&buf); + /* + * It might turn out that we don't need any new replicas, if the + * replicas or durability settings have been changed since the extent + * was written: + */ + if (!m->op.nr_replicas) { + m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; + m->data_opts.rewrite_ptrs = 0; + /* if iter == NULL, it's just a promote */ + if (iter) + ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts); goto out; } - m->op.nr_replicas_required = m->op.nr_replicas; - if (reserve_sectors) { ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, m->data_opts.extra_replicas -- cgit From 548e7f51679bf0ec3cdc2027d780c5d06a2a7ac6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 13:24:26 -0400 Subject: bcachefs: setting bcachefs_effective.* xattrs is a noop bcachefs_effective.* xattrs show the options inherited from parent directories (as well as explicitly set); this namespace is not for setting bcachefs options. Change the .set() handler to a noop so that if e.g. rsync is copying xattrs it'll do the right thing, and only copy xattrs in the bcachefs.* namespace. We don't want to return an error, because that will cause rsync to bail out or get spammy. Signed-off-by: Kent Overstreet --- fs/bcachefs/xattr.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index f2b4c17a0307..331f944d73dc 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -612,10 +612,20 @@ static int bch2_xattr_bcachefs_get_effective( name, buffer, size, true); } +/* Noop - xattrs in the bcachefs_effective namespace are inherited */ +static int bch2_xattr_bcachefs_set_effective(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *dentry, struct inode *vinode, + const char *name, const void *value, + size_t size, int flags) +{ + return 0; +} + static const struct xattr_handler bch_xattr_bcachefs_effective_handler = { .prefix = "bcachefs_effective.", .get = bch2_xattr_bcachefs_get_effective, - .set = bch2_xattr_bcachefs_set, + .set = bch2_xattr_bcachefs_set_effective, }; #endif /* NO_BCACHEFS_FS */ -- cgit From 49203a6b9d12bfd1a223a67847a631a78f1cd782 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 15:08:12 -0400 Subject: bcachefs: Fix failure to relock in btree_node_get() discovered by new trans->locked asserts Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index f5d85b50b6f2..cc778d7e769e 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -974,6 +974,10 @@ retry: bch2_btree_node_wait_on_read(b); + ret = bch2_trans_relock(trans); + if (ret) + return ERR_PTR(ret); + /* * should_be_locked is not set on this path yet, so we need to * relock it specifically: -- cgit From e150a7e89c4727176d07f5a0a8966fc2af05821c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 20:18:34 -0400 Subject: bcachefs: Fix bch2_trigger_alloc assert On testing on an old mangled filesystem, we missed a case. Fixes: bd864bc2d907 ("bcachefs: Fix bch2_trigger_alloc when upgrading from old versions") Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index fd3a2522bc3e..488d0710f7b9 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -829,7 +829,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, if (likely(new.k->type == KEY_TYPE_alloc_v4)) { new_a = bkey_s_to_alloc_v4(new).v; } else { - BUG_ON(!(flags & BTREE_TRIGGER_gc)); + BUG_ON(!(flags & (BTREE_TRIGGER_gc|BTREE_TRIGGER_check_repair))); struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); ret = PTR_ERR_OR_ZERO(new_ka); -- cgit From c2a503f3e98e191d86738f5438a3a2b69575c830 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Aug 2024 20:38:49 -0400 Subject: bcachefs: Fix bch2_bucket_gens_init() Comparing the wrong bpos - this was missed because normally bucket_gens_init() runs on brand new filesystems, but this bug caused it to overwrite bucket_gens keys with 0s when upgrading ancient filesystems. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 488d0710f7b9..ac933142aeda 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -556,7 +556,7 @@ int bch2_bucket_gens_init(struct bch_fs *c) struct bpos pos = alloc_gens_pos(iter.pos, &offset); int ret2 = 0; - if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) { + if (have_bucket_gens_key && !bkey_eq(g.k.p, pos)) { ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); if (ret2) -- cgit From b8db1bd8020d5fecb3bf46cd8b954a657c20ba14 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 Aug 2024 16:13:16 -0400 Subject: bcachefs: fix time_stats_to_text() Fixes: 7423330e30ab ("bcachefs: prt_printf() now respects \r\n\t") Signed-off-by: Kent Overstreet --- fs/bcachefs/util.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 138320eaa2ad..1b8554460af4 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -416,7 +416,6 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats printbuf_tabstop_push(out, TABSTOP_SIZE + 2); prt_printf(out, "\tsince mount\r\trecent\r\n"); - prt_printf(out, "recent"); printbuf_tabstops_reset(out); printbuf_tabstop_push(out, out->indent + 20); -- cgit From cecc328240609df17395dfd0ea03cc813d8be36d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 Aug 2024 11:25:39 -0400 Subject: bcachefs: fix missing bch2_err_str() Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 9138944c5ae6..267c2336b115 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2469,8 +2469,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino : bch2_inode_unpack(inode_k, &inode); if (ret) { /* Should have been caught in dirents pass */ - if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) - bch_err(c, "error looking up parent directory: %i", ret); + bch_err_msg(c, ret, "error looking up parent directory"); break; } -- cgit From 1dceae4cc12aa6389d9a8706f0d2a94d1679e79d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 Aug 2024 12:10:33 -0400 Subject: bcachefs: unlock_long() before resort in journal replay Fix another SRCU splat - this one pretty harmless. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d89eb43c5ce9..11368dfa96b2 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -322,6 +322,7 @@ int bch2_journal_replay(struct bch_fs *c) } } + bch2_trans_unlock_long(trans); /* * Now, replay any remaining keys in the order in which they appear in * the journal, unpinning those journal entries as we go: -- cgit From 3c5d0b72a8e8c19c960e8fefb7463067e58b6bc4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 Aug 2024 15:22:55 -0400 Subject: bcachefs: fix failure to relock in bch2_btree_node_mem_alloc() We weren't always so strict about trans->locked state - but now we are, and new assertions are shaking some bugs out. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 17 ++++++++++++++ fs/bcachefs/btree_cache.h | 2 ++ fs/bcachefs/btree_update_interior.c | 46 ++++++++++++++++++++----------------- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index cc778d7e769e..063725ecb2b3 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -159,6 +159,16 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) return b; } +void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) +{ + mutex_lock(&c->btree_cache.lock); + list_move(&b->list, &c->btree_cache.freeable); + mutex_unlock(&c->btree_cache.lock); + + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); +} + /* Btree in memory cache - hash table */ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) @@ -736,6 +746,13 @@ out: start_time); memalloc_nofs_restore(flags); + + int ret = bch2_trans_relock(trans); + if (unlikely(ret)) { + bch2_btree_node_to_freelist(c, b); + return ERR_PTR(ret); + } + return b; err: mutex_lock(&bc->lock); diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index c0eb87a057cc..f82064007127 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -12,6 +12,8 @@ struct btree_iter; void bch2_recalc_btree_reserve(struct bch_fs *); +void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *); + void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *); int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *); int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *, diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index b3454d4619e8..8fd112026e7a 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -317,6 +317,12 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, : 0; int ret; + b = bch2_btree_node_mem_alloc(trans, interior_node); + if (IS_ERR(b)) + return b; + + BUG_ON(b->ob.nr); + mutex_lock(&c->btree_reserve_cache_lock); if (c->btree_reserve_cache_nr > nr_reserve) { struct btree_alloc *a = @@ -325,10 +331,9 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, obs = a->ob; bkey_copy(&tmp.k, &a->k); mutex_unlock(&c->btree_reserve_cache_lock); - goto mem_alloc; + goto out; } mutex_unlock(&c->btree_reserve_cache_lock); - retry: ret = bch2_alloc_sectors_start_trans(trans, c->opts.metadata_target ?: @@ -341,7 +346,7 @@ retry: c->opts.metadata_replicas_required), watermark, 0, cl, &wp); if (unlikely(ret)) - return ERR_PTR(ret); + goto err; if (wp->sectors_free < btree_sectors(c)) { struct open_bucket *ob; @@ -360,19 +365,16 @@ retry: bch2_open_bucket_get(c, wp, &obs); bch2_alloc_sectors_done(c, wp); -mem_alloc: - b = bch2_btree_node_mem_alloc(trans, interior_node); - six_unlock_write(&b->c.lock); - six_unlock_intent(&b->c.lock); - - /* we hold cannibalize_lock: */ - BUG_ON(IS_ERR(b)); - BUG_ON(b->ob.nr); - +out: bkey_copy(&b->key, &tmp.k); b->ob = obs; + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); return b; +err: + bch2_btree_node_to_freelist(c, b); + return ERR_PTR(ret); } static struct btree *bch2_btree_node_alloc(struct btree_update *as, @@ -2439,6 +2441,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite } new_hash = bch2_btree_node_mem_alloc(trans, false); + ret = PTR_ERR_OR_ZERO(new_hash); + if (ret) + goto err; } path->intent_ref++; @@ -2446,14 +2451,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite commit_flags, skip_triggers); --path->intent_ref; - if (new_hash) { - mutex_lock(&c->btree_cache.lock); - list_move(&new_hash->list, &c->btree_cache.freeable); - mutex_unlock(&c->btree_cache.lock); - - six_unlock_write(&new_hash->c.lock); - six_unlock_intent(&new_hash->c.lock); - } + if (new_hash) + bch2_btree_node_to_freelist(c, new_hash); +err: closure_sync(&cl); bch2_btree_cache_cannibalize_unlock(trans); return ret; @@ -2522,6 +2522,10 @@ int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id b = bch2_btree_node_mem_alloc(trans, false); bch2_btree_cache_cannibalize_unlock(trans); + ret = PTR_ERR_OR_ZERO(b); + if (ret) + return ret; + set_btree_node_fake(b); set_btree_node_need_rewrite(b); b->c.level = level; @@ -2553,7 +2557,7 @@ int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level) { - bch2_trans_run(c, bch2_btree_root_alloc_fake_trans(trans, id, level)); + bch2_trans_run(c, lockrestart_do(trans, bch2_btree_root_alloc_fake_trans(trans, id, level))); } static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as) -- cgit From 5dbfc4ef72f15508882aff58c307b8425cf037a8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 Aug 2024 15:04:15 -0400 Subject: bcachefs: fix failure to relock in btree_node_fill() Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 063725ecb2b3..e52a06d3418c 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -873,6 +873,10 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, bch2_btree_node_read(trans, b, sync); + int ret = bch2_trans_relock(trans); + if (ret) + return ERR_PTR(ret); + if (!sync) return NULL; -- cgit From 6575b8c9877c3dd1f7db1d0d61bd250a0bf18b6d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 Aug 2024 19:31:20 -0400 Subject: bcachefs: Fix locking in bch2_ioc_setlabel() Fixes: 7a254053a590 ("bcachefs: support FS_IOC_SETFSLABEL") Reported-by: syzbot+7e9efdfec27fbde0141d@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-ioctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index aea8132d2c40..99c7fe987c74 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -328,9 +328,8 @@ static int bch2_ioc_setlabel(struct bch_fs *c, mutex_lock(&c->sb_lock); strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE); - mutex_unlock(&c->sb_lock); - ret = bch2_write_super(c); + mutex_unlock(&c->sb_lock); mnt_drop_write_file(file); return ret; -- cgit From cab18be6957b6af8cbe3502fd5f6d7b9f02ccceb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 20:49:07 -0400 Subject: bcachefs: Fix replay_now_at() assert Journal replay, in the slowpath where we insert keys in journal order, was inserting keys in the wrong order; keys from early repair come last. Reported-by: syzbot+2c4fcb257ce2b6a29d0e@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 11368dfa96b2..36de1c6fe8c3 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -241,7 +241,13 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) const struct journal_key *l = *((const struct journal_key **)_l); const struct journal_key *r = *((const struct journal_key **)_r); - return cmp_int(l->journal_seq, r->journal_seq); + /* + * Map 0 to U64_MAX, so that keys with journal_seq === 0 come last + * + * journal_seq == 0 means that the key comes from early repair, and + * should be inserted last so as to avoid overflowing the journal + */ + return cmp_int(l->journal_seq - 1, r->journal_seq - 1); } int bch2_journal_replay(struct bch_fs *c) -- cgit From bdbdd4759f081ca2d0a5d9e8af21d742ffaf8439 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 21:10:45 -0400 Subject: bcachefs: Fix missing validation in bch2_sb_journal_v2_validate() Reported-by: syzbot+47ecc948aadfb2ab3efc@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_sb.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c index db80e506e3ab..62b910f2fb27 100644 --- a/fs/bcachefs/journal_sb.c +++ b/fs/bcachefs/journal_sb.c @@ -104,6 +104,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2); struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx); int ret = -BCH_ERR_invalid_sb_journal; + u64 sum = 0; unsigned nr; unsigned i; struct u64_range *b; @@ -119,6 +120,15 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f for (i = 0; i < nr; i++) { b[i].start = le64_to_cpu(journal->d[i].start); b[i].end = b[i].start + le64_to_cpu(journal->d[i].nr); + + if (b[i].end <= b[i].start) { + prt_printf(err, "journal buckets entry with bad nr: %llu+%llu", + le64_to_cpu(journal->d[i].start), + le64_to_cpu(journal->d[i].nr)); + goto err; + } + + sum += le64_to_cpu(journal->d[i].nr); } sort(b, nr, sizeof(*b), u64_range_cmp, NULL); @@ -148,6 +158,11 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f } } + if (sum > UINT_MAX) { + prt_printf(err, "too many journal buckets: %llu > %u", sum, UINT_MAX); + goto err; + } + ret = 0; err: kfree(b); -- cgit From 06f67437ab356e3140f51aea272d33ce28421f66 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 22:06:44 -0400 Subject: fs/super.c: improve get_tree() error message seeing an odd bug where we fail to correctly return an error from .get_tree(): https://syzkaller.appspot.com/bug?extid=c0360e8367d6d8d04a66 we need to be able to distinguish between accidently returning a positive error (as implied by the log) and no error. Cc: David Howells Signed-off-by: Kent Overstreet --- fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/super.c b/fs/super.c index 38d72a3cf6fc..b7913b55debc 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1802,8 +1802,8 @@ int vfs_get_tree(struct fs_context *fc) return error; if (!fc->root) { - pr_err("Filesystem %s get_tree() didn't set fc->root\n", - fc->fs_type->name); + pr_err("Filesystem %s get_tree() didn't set fc->root, returned %i\n", + fc->fs_type->name, error); /* We don't know what the locking state of the superblock is - * if there is a superblock. */ -- cgit From 7f2de6947f92cfa4be8e5eaa1237e962bb8ee65f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 22:27:45 -0400 Subject: bcachefs: Fix warning in bch2_fs_journal_stop() j->last_empty_seq needs to match j->seq when the journal is empty Reported-by: syzbot+4093905737cf289b6b38@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 649e3a01608a..f5f7db50ca31 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1260,7 +1260,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) } if (!had_entries) - j->last_empty_seq = cur_seq; + j->last_empty_seq = cur_seq - 1; /* to match j->seq */ spin_lock(&j->lock); -- cgit From 8ed823b19214e403ca485532f48c0e02035021ae Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 22:57:56 -0400 Subject: bcachefs: Fix compat issue with old alloc_v4 keys we allow new fields to be added to existing key types, and new versions should treat them as being zeroed; this was not handled in alloc_v4_validate. Reported-by: syzbot+3b2968fa4953885dd66a@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 50 ++++++++++++++++++----------------- fs/bcachefs/alloc_background_format.h | 1 + fs/bcachefs/btree_iter.h | 9 +++++++ 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index ac933142aeda..dc97b1f8bc08 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -240,71 +240,73 @@ fsck_err: int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, enum bch_validate_flags flags) { - struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); + struct bch_alloc_v4 a; int ret = 0; - bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), + bkey_val_copy(&a, bkey_s_c_to_alloc_v4(k)); + + bkey_fsck_err_on(alloc_v4_u64s_noerror(&a) > bkey_val_u64s(k.k), c, alloc_v4_val_size_bad, "bad val size (%u > %zu)", - alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k)); + alloc_v4_u64s_noerror(&a), bkey_val_u64s(k.k)); - bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && - BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), + bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(&a) && + BCH_ALLOC_V4_NR_BACKPOINTERS(&a), c, alloc_v4_backpointers_start_bad, "invalid backpointers_start"); - bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, + bkey_fsck_err_on(alloc_data_type(a, a.data_type) != a.data_type, c, alloc_key_data_type_bad, "invalid data type (got %u should be %u)", - a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); + a.data_type, alloc_data_type(a, a.data_type)); for (unsigned i = 0; i < 2; i++) - bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX, + bkey_fsck_err_on(a.io_time[i] > LRU_TIME_MAX, c, alloc_key_io_time_bad, "invalid io_time[%s]: %llu, max %llu", i == READ ? "read" : "write", - a.v->io_time[i], LRU_TIME_MAX); + a.io_time[i], LRU_TIME_MAX); - unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(a.v) * sizeof(u64) > + unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(&a) * sizeof(u64) > offsetof(struct bch_alloc_v4, stripe_sectors) - ? a.v->stripe_sectors + ? a.stripe_sectors : 0; - switch (a.v->data_type) { + switch (a.data_type) { case BCH_DATA_free: case BCH_DATA_need_gc_gens: case BCH_DATA_need_discard: bkey_fsck_err_on(stripe_sectors || - a.v->dirty_sectors || - a.v->cached_sectors || - a.v->stripe, + a.dirty_sectors || + a.cached_sectors || + a.stripe, c, alloc_key_empty_but_have_data, "empty data type free but have data %u.%u.%u %u", stripe_sectors, - a.v->dirty_sectors, - a.v->cached_sectors, - a.v->stripe); + a.dirty_sectors, + a.cached_sectors, + a.stripe); break; case BCH_DATA_sb: case BCH_DATA_journal: case BCH_DATA_btree: case BCH_DATA_user: case BCH_DATA_parity: - bkey_fsck_err_on(!a.v->dirty_sectors && + bkey_fsck_err_on(!a.dirty_sectors && !stripe_sectors, c, alloc_key_dirty_sectors_0, "data_type %s but dirty_sectors==0", - bch2_data_type_str(a.v->data_type)); + bch2_data_type_str(a.data_type)); break; case BCH_DATA_cached: - bkey_fsck_err_on(!a.v->cached_sectors || - a.v->dirty_sectors || + bkey_fsck_err_on(!a.cached_sectors || + a.dirty_sectors || stripe_sectors || - a.v->stripe, + a.stripe, c, alloc_key_cached_inconsistency, "data type inconsistency"); - bkey_fsck_err_on(!a.v->io_time[READ] && + bkey_fsck_err_on(!a.io_time[READ] && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, c, alloc_key_cached_but_read_time_zero, "cached bucket with read_time == 0"); diff --git a/fs/bcachefs/alloc_background_format.h b/fs/bcachefs/alloc_background_format.h index 47d9d006502c..f754a2951d8a 100644 --- a/fs/bcachefs/alloc_background_format.h +++ b/fs/bcachefs/alloc_background_format.h @@ -69,6 +69,7 @@ struct bch_alloc_v4 { __u64 io_time[2]; __u32 stripe; __u32 nr_external_backpointers; + /* end of fields in original version of alloc_v4 */ __u64 fragmentation_lru; __u32 stripe_sectors; __u32 pad; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index dca62375d7d3..222b7ce8a901 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -569,6 +569,15 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans, bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \ _btree_id, _pos, _flags, KEY_TYPE_##_type)) +#define bkey_val_copy(_dst_v, _src_k) \ +do { \ + unsigned b = min_t(unsigned, sizeof(*_dst_v), \ + bkey_val_bytes(_src_k.k)); \ + memcpy(_dst_v, _src_k.v, b); \ + if (b < sizeof(*_dst_v)) \ + memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \ +} while (0) + static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, unsigned btree_id, struct bpos pos, unsigned flags, unsigned type, -- cgit From 0b50b7313ef2494926df30ce8e2ce284f1b847fc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 Aug 2024 23:21:52 -0400 Subject: bcachefs: Fix refcounting in discard path bch_dev->io_ref does not protect against the filesystem going away; bch_fs->writes does. Thus the filesystem write ref needs to be the last ref we release. Reported-by: syzbot+9e0404b505e604f67e41@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index dc97b1f8bc08..ba46f1c1d78a 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1874,26 +1874,26 @@ static void bch2_do_discards_work(struct work_struct *work) trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); - bch2_write_ref_put(c, BCH_WRITE_REF_discard); percpu_ref_put(&ca->io_ref); + bch2_write_ref_put(c, BCH_WRITE_REF_discard); } void bch2_dev_do_discards(struct bch_dev *ca) { struct bch_fs *c = ca->fs; - if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard)) return; - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard)) - goto put_ioref; + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + goto put_write_ref; if (queue_work(c->write_ref_wq, &ca->discard_work)) return; - bch2_write_ref_put(c, BCH_WRITE_REF_discard); -put_ioref: percpu_ref_put(&ca->io_ref); +put_write_ref: + bch2_write_ref_put(c, BCH_WRITE_REF_discard); } void bch2_do_discards(struct bch_fs *c) -- cgit From dedb2fe37574857c84e9598b9f5272505dedf7af Mon Sep 17 00:00:00 2001 From: Yuesong Li Date: Thu, 22 Aug 2024 14:21:58 +0800 Subject: bcachefs: Fix double assignment in check_dirent_to_subvol() ret was assigned twice in check_dirent_to_subvol(). Reported by cocci. Signed-off-by: Yuesong Li Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 267c2336b115..6801c37ee803 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2006,7 +2006,6 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * if (ret) { bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum); ret = -BCH_ERR_fsck_repair_unimplemented; - ret = 0; goto err; } -- cgit From 87313ac1f134d6ee1e7c858da8bdea9147b537a9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 22 Aug 2024 02:13:02 -0400 Subject: bcachefs: clear path->should_be_locked in bch2_btree_key_cache_drop() bch2_btree_key_cache_drop() evicts the key cache entry - it's used when we're doing an update that bypasses the key cache, because for cache coherency reasons a key can't be in the key cache unless it also exists in the btree - i.e. creates have to bypass the cache. After evicting, the path no longer points to a key cache key, and relock() will always fail if should_be_locked is true. Prep for improving path->should_be_locked assertions Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 79954490627c..9b3ec2a3b8ce 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -726,6 +726,7 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans, mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED); btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); + path->should_be_locked = false; } static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, -- cgit From 9b82ff1362f50914c8292902e07be98a9f59d33d Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 22 Aug 2024 10:54:19 +0800 Subject: ALSA: hda/realtek - Fixed ALC256 headphone no sound Dell platform, plug headphone or headset, it had a chance to get no sound from headphone. Replace depop procedure will solve this issue. Signed-off-by: Kailang Yang Link: https://lore.kernel.org/bb8e2de30d294dc287944efa0667685a@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 50 ++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4eafbcb40120..bf098c6fedb5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4930,6 +4930,30 @@ static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec, } } +static void alc_hp_mute_disable(struct hda_codec *codec, unsigned int delay) +{ + if (delay <= 0) + delay = 75; + snd_hda_codec_write(codec, 0x21, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + msleep(delay); + snd_hda_codec_write(codec, 0x21, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + msleep(delay); +} + +static void alc_hp_enable_unmute(struct hda_codec *codec, unsigned int delay) +{ + if (delay <= 0) + delay = 75; + snd_hda_codec_write(codec, 0x21, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); + msleep(delay); + snd_hda_codec_write(codec, 0x21, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); + msleep(delay); +} + static const struct coef_fw alc225_pre_hsmode[] = { UPDATE_COEF(0x4a, 1<<8, 0), UPDATE_COEFEX(0x57, 0x05, 1<<14, 0), @@ -5031,6 +5055,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) case 0x10ec0236: case 0x10ec0256: case 0x19e58326: + alc_hp_mute_disable(codec, 75); alc_process_coef_fw(codec, coef0256); break; case 0x10ec0234: @@ -5302,6 +5327,7 @@ static void alc_headset_mode_default(struct hda_codec *codec) alc_write_coef_idx(codec, 0x45, 0xc089); msleep(50); alc_process_coef_fw(codec, coef0256); + alc_hp_enable_unmute(codec, 75); break; case 0x10ec0234: case 0x10ec0274: @@ -5399,6 +5425,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) case 0x10ec0256: case 0x19e58326: alc_process_coef_fw(codec, coef0256); + alc_hp_enable_unmute(codec, 75); break; case 0x10ec0234: case 0x10ec0274: @@ -5514,6 +5541,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) case 0x10ec0256: case 0x19e58326: alc_process_coef_fw(codec, coef0256); + alc_hp_enable_unmute(codec, 75); break; case 0x10ec0234: case 0x10ec0274: @@ -5619,25 +5647,21 @@ static void alc_determine_headset_type(struct hda_codec *codec) alc_write_coef_idx(codec, 0x06, 0x6104); alc_write_coefex_idx(codec, 0x57, 0x3, 0x09a3); - snd_hda_codec_write(codec, 0x21, 0, - AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); - msleep(80); - snd_hda_codec_write(codec, 0x21, 0, - AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); - alc_process_coef_fw(codec, coef0255); msleep(300); val = alc_read_coef_idx(codec, 0x46); is_ctia = (val & 0x0070) == 0x0070; - + if (!is_ctia) { + alc_write_coef_idx(codec, 0x45, 0xe089); + msleep(100); + val = alc_read_coef_idx(codec, 0x46); + if ((val & 0x0070) == 0x0070) + is_ctia = false; + else + is_ctia = true; + } alc_write_coefex_idx(codec, 0x57, 0x3, 0x0da3); alc_update_coefex_idx(codec, 0x57, 0x5, 1<<14, 0); - - snd_hda_codec_write(codec, 0x21, 0, - AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); - msleep(80); - snd_hda_codec_write(codec, 0x21, 0, - AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); break; case 0x10ec0234: case 0x10ec0274: -- cgit From 1d8c3c23a6bc1527e253b305b4b68c03d833b824 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 22 Aug 2024 07:17:09 +0000 Subject: KVM: arm64: Ensure canonical IPA is hugepage-aligned when handling fault Zenghui reports that VMs backed by hugetlb pages are no longer booting after commit fd276e71d1e7 ("KVM: arm64: nv: Handle shadow stage 2 page faults"). Support for shadow stage-2 MMUs introduced the concept of a fault IPA and canonical IPA to stage-2 fault handling. These are identical in the non-nested case, as the hardware stage-2 context is always that of the canonical IPA space. Both addresses need to be hugepage-aligned when preparing to install a hugepage mapping to ensure that KVM uses the correct GFN->PFN translation and installs that at the correct IPA for the current stage-2. And now I'm feeling thirsty after all this talk of IPAs... Fixes: fd276e71d1e7 ("KVM: arm64: nv: Handle shadow stage 2 page faults") Reported-by: Zenghui Yu Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240822071710.2291690-1-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 6981b1bc0946..a509b63bd4dd 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1540,8 +1540,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_pagesize = min(vma_pagesize, (long)max_map_size); } - if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) + /* + * Both the canonical IPA and fault IPA must be hugepage-aligned to + * ensure we find the right PFN and lay down the mapping in the right + * place. + */ + if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) { fault_ipa &= ~(vma_pagesize - 1); + ipa &= ~(vma_pagesize - 1); + } gfn = ipa >> PAGE_SHIFT; mte_allowed = kvm_vma_mte_allowed(vma); -- cgit From 7dd9c26bd6cf679bcfdef01a8659791aa6487a29 Mon Sep 17 00:00:00 2001 From: Simon Arlott Date: Thu, 22 Aug 2024 08:25:07 +0100 Subject: can: mcp251x: fix deadlock if an interrupt occurs during mcp251x_open The mcp251x_hw_wake() function is called with the mpc_lock mutex held and disables the interrupt handler so that no interrupts can be processed while waking the device. If an interrupt has already occurred then waiting for the interrupt handler to complete will deadlock because it will be trying to acquire the same mutex. CPU0 CPU1 ---- ---- mcp251x_open() mutex_lock(&priv->mcp_lock) request_threaded_irq() mcp251x_can_ist() mutex_lock(&priv->mcp_lock) mcp251x_hw_wake() disable_irq() <-- deadlock Use disable_irq_nosync() instead because the interrupt handler does everything while holding the mutex so it doesn't matter if it's still running. Fixes: 8ce8c0abcba3 ("can: mcp251x: only reset hardware as required") Signed-off-by: Simon Arlott Reviewed-by: Przemek Kitszel Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/4fc08687-1d80-43fe-9f0d-8ef8475e75f6@0882a8b5-c6c3-11e9-b005-00805fc181fe.uuid.home.arpa Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index 3b8736ff0345..ec5c64006a16 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -752,7 +752,7 @@ static int mcp251x_hw_wake(struct spi_device *spi) int ret; /* Force wakeup interrupt to wake device, but don't execute IST */ - disable_irq(spi->irq); + disable_irq_nosync(spi->irq); mcp251x_write_2regs(spi, CANINTE, CANINTE_WAKIE, CANINTF_WAKIF); /* Wait for oscillator startup timer after wake up */ -- cgit From 6e95097b6bb20f0021180b150f41ad9962dcdcc9 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 13 Aug 2024 13:44:47 +0300 Subject: MAINTAINERS: Mark UVC gadget driver as orphan I haven't had time to maintain the UVC gadget driver for a long while. Dan Scally confirmed he is also in a similar -ENOTIME situation with no short term hope of fixing that. Being listed as maintainers doesn't help progress, so mark the driver as orphan to reflect the current state. Signed-off-by: Laurent Pinchart Acked-by: Daniel Scally Link: https://lore.kernel.org/r/20240813104447.25821-1-laurent.pinchart@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index f328373463b0..2b193c9a44ee 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23821,10 +23821,8 @@ F: drivers/media/usb/uvc/ F: include/uapi/linux/uvcvideo.h USB WEBCAM GADGET -M: Laurent Pinchart -M: Daniel Scally L: linux-usb@vger.kernel.org -S: Maintained +S: Orphan F: drivers/usb/gadget/function/*uvc* F: drivers/usb/gadget/legacy/webcam.c F: include/uapi/linux/usb/g_uvc.h -- cgit From 3e6245ebe7ef341639e9a7e402b3ade8ad45a19f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Aug 2024 11:03:38 +0100 Subject: KVM: arm64: Make ICC_*SGI*_EL1 undef in the absence of a vGICv3 On a system with a GICv3, if a guest hasn't been configured with GICv3 and that the host is not capable of GICv2 emulation, a write to any of the ICC_*SGI*_EL1 registers is trapped to EL2. We therefore try to emulate the SGI access, only to hit a NULL pointer as no private interrupt is allocated (no GIC, remember?). The obvious fix is to give the guest what it deserves, in the shape of a UNDEF exception. Reported-by: Alexander Potapenko Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240820100349.3544850-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 6 ++++++ arch/arm64/kvm/vgic/vgic.h | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c90324060436..31e49da867ff 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -33,6 +33,7 @@ #include #include "sys_regs.h" +#include "vgic/vgic.h" #include "trace.h" @@ -435,6 +436,11 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, { bool g1; + if (!kvm_has_gicv3(vcpu->kvm)) { + kvm_inject_undefined(vcpu); + return false; + } + if (!p->is_write) return read_from_write_only(vcpu, p, r); diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index ba8f790431bd..8532bfe3fed4 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -346,4 +346,11 @@ void vgic_v4_configure_vsgis(struct kvm *kvm); void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val); int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq); +static inline bool kvm_has_gicv3(struct kvm *kvm) +{ + return (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) && + irqchip_in_kernel(kvm) && + kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3); +} + #endif -- cgit From 14e497183df28c006603cc67fd3797a537eef7b9 Mon Sep 17 00:00:00 2001 From: Selvarasu Ganesan Date: Thu, 15 Aug 2024 12:18:31 +0530 Subject: usb: dwc3: core: Prevent USB core invalid event buffer address access This commit addresses an issue where the USB core could access an invalid event buffer address during runtime suspend, potentially causing SMMU faults and other memory issues in Exynos platforms. The problem arises from the following sequence. 1. In dwc3_gadget_suspend, there is a chance of a timeout when moving the USB core to the halt state after clearing the run/stop bit by software. 2. In dwc3_core_exit, the event buffer is cleared regardless of the USB core's status, which may lead to an SMMU faults and other memory issues. if the USB core tries to access the event buffer address. To prevent this hardware quirk on Exynos platforms, this commit ensures that the event buffer address is not cleared by software when the USB core is active during runtime suspend by checking its status before clearing the buffer address. Cc: stable Signed-off-by: Selvarasu Ganesan Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20240815064836.1491-1-selvarasu.g@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 734de2a8bd21..ccc3895dbd7f 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -564,9 +564,17 @@ int dwc3_event_buffers_setup(struct dwc3 *dwc) void dwc3_event_buffers_cleanup(struct dwc3 *dwc) { struct dwc3_event_buffer *evt; + u32 reg; if (!dwc->ev_buf) return; + /* + * Exynos platforms may not be able to access event buffer if the + * controller failed to halt on dwc3_core_exit(). + */ + reg = dwc3_readl(dwc->regs, DWC3_DSTS); + if (!(reg & DWC3_DSTS_DEVCTRLHLT)) + return; evt = dwc->ev_buf; -- cgit From 0497a356d3c498221eb0c1edc1e8985816092f12 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 20 Aug 2024 08:21:19 +0000 Subject: usb: cdnsp: fix incorrect index in cdnsp_get_hw_deq function Patch fixes the incorrect "stream_id" table index instead of "ep_index" used in cdnsp_get_hw_deq function. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") cc: stable@vger.kernel.org Signed-off-by: Pawel Laszczak Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/PH7PR07MB95381F2182688811D5C711CEDD8D2@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 02f297f5637d..a60c0cb991cd 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -402,7 +402,7 @@ static u64 cdnsp_get_hw_deq(struct cdnsp_device *pdev, struct cdnsp_stream_ctx *st_ctx; struct cdnsp_ep *pep; - pep = &pdev->eps[stream_id]; + pep = &pdev->eps[ep_index]; if (pep->ep_state & EP_HAS_STREAMS) { st_ctx = &pep->stream_info.stream_ctx_array[stream_id]; -- cgit From 0b00583ecacb0b51712a5ecd34cf7e6684307c67 Mon Sep 17 00:00:00 2001 From: Ian Ray Date: Wed, 14 Aug 2024 10:29:05 +0300 Subject: cdc-acm: Add DISABLE_ECHO quirk for GE HealthCare UI Controller USB_DEVICE(0x1901, 0x0006) may send data before cdc_acm is ready, which may be misinterpreted in the default N_TTY line discipline. Signed-off-by: Ian Ray Acked-by: Oliver Neuku Cc: stable Link: https://lore.kernel.org/r/20240814072905.2501-1-ian.ray@gehealthcare.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 0e7439dba8fe..0c1b69d944ca 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1761,6 +1761,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x11ca, 0x0201), /* VeriFone Mx870 Gadget Serial */ .driver_info = SINGLE_RX_URB, }, + { USB_DEVICE(0x1901, 0x0006), /* GE Healthcare Patient Monitor UI Controller */ + .driver_info = DISABLE_ECHO, /* DISABLE ECHO in termios flag */ + }, { USB_DEVICE(0x1965, 0x0018), /* Uniden UBC125XLT */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, -- cgit From b52a07e07dead777517af3cbda851bb2cc157c9d Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 14 Aug 2024 19:25:37 +0800 Subject: usb: gadget: uvc: queue pump work in uvcg_video_enable() Since commit "6acba0345b68 usb:gadget:uvc Do not use worker thread to pump isoc usb requests", pump work could only be queued in uvc_video_complete() and uvc_v4l2_qbuf(). If VIDIOC_QBUF is executed before VIDIOC_STREAMON, we can only depend on uvc_video_complete() to queue pump work. However, this requires some free requests in req_ready list. If req_ready list is empty all the time, pump work will never be queued and video datas will never be pumped to usb controller. Actually, this situation could happen when run uvc-gadget with static image: $ ./uvc-gadget -i 1080p.jpg uvc.0 When capture image from this device, the user app will always block there. The issue is uvc driver has queued video buffer before streamon, but the req_ready list is empty all the time after streamon. This will queue pump work in uvcg_video_enable() to fill some request to req_ready list so the uvc device could work properly. Fixes: 6acba0345b68 ("usb:gadget:uvc Do not use worker thread to pump isoc usb requests") Cc: stable@vger.kernel.org Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20240814112537.2608949-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/uvc_video.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index d41f5f31dadd..a9edd60fbbf7 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -753,6 +753,7 @@ int uvcg_video_enable(struct uvc_video *video) video->req_int_count = 0; uvc_video_ep_queue_initial_requests(video); + queue_work(video->async_wq, &video->pump); return ret; } -- cgit From 5b235693ed2a1e4963625717a1598becf97759cc Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 15 Aug 2024 13:31:31 +0200 Subject: dt-bindings: usb: microchip,usb2514: Fix reference USB device schema An USB hub is not a HCD, but an USB device. Fix the referenced schema accordingly. Fixes: bfbf2e4b77e2 ("dt-bindings: usb: Document the Microchip USB2514 hub") Cc: stable@vger.kernel.org Reviewed-by: Krzysztof Kozlowski Signed-off-by: Alexander Stein Link: https://lore.kernel.org/r/20240815113132.372542-1-alexander.stein@ew.tq-group.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/microchip,usb2514.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml b/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml index 245e8c3ce669..b14e6f37b298 100644 --- a/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml +++ b/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml @@ -10,7 +10,7 @@ maintainers: - Fabio Estevam allOf: - - $ref: usb-hcd.yaml# + - $ref: usb-device.yaml# properties: compatible: @@ -36,6 +36,13 @@ required: - compatible - reg +patternProperties: + "^.*@[0-9a-f]{1,2}$": + description: The hard wired USB devices + type: object + $ref: /schemas/usb/usb-device.yaml + additionalProperties: true + unevaluatedProperties: false examples: -- cgit From 2aa765a43817ec8add990f83c8e54a9a5d87aa9c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Aug 2024 09:54:08 +0200 Subject: usb: dwc3: omap: add missing depopulate in probe error path Depopulate device in probe error paths to fix leak of children resources. Fixes: ee249b455494 ("usb: dwc3: omap: remove IRQ_NOAUTOEN used with shared irq") Cc: stable@vger.kernel.org Acked-by: Thinh Nguyen Signed-off-by: Krzysztof Kozlowski Reviewed-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/20240816075409.23080-1-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-omap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index d5c77db4daa9..2a11fc0ee84f 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -522,11 +522,13 @@ static int dwc3_omap_probe(struct platform_device *pdev) if (ret) { dev_err(dev, "failed to request IRQ #%d --> %d\n", omap->irq, ret); - goto err1; + goto err2; } dwc3_omap_enable_irqs(omap); return 0; +err2: + of_platform_depopulate(dev); err1: pm_runtime_put_sync(dev); pm_runtime_disable(dev); -- cgit From 16f2a21d9d7e48e1af02654fe3d926c0ce6cb3e5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 16 Aug 2024 09:54:09 +0200 Subject: usb: dwc3: xilinx: add missing depopulate in probe error path Depopulate device in probe error paths to fix leak of children resources. Fixes: 53b5ff83d893 ("usb: dwc3: xilinx: improve error handling for PM APIs") Cc: stable@vger.kernel.org Reviewed-by: Radhey Shyam Pandey Signed-off-by: Krzysztof Kozlowski Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20240816075409.23080-2-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-xilinx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index bb4d894c16e9..f1298b1b4f84 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c +++ b/drivers/usb/dwc3/dwc3-xilinx.c @@ -327,9 +327,14 @@ static int dwc3_xlnx_probe(struct platform_device *pdev) goto err_pm_set_suspended; pm_suspend_ignore_children(dev, false); - return pm_runtime_resume_and_get(dev); + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + goto err_pm_set_suspended; + + return 0; err_pm_set_suspended: + of_platform_depopulate(dev); pm_runtime_set_suspended(dev); err_clk_put: -- cgit From 4f83cae0edb2b13aabb82e8a4852092844d320aa Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Sun, 18 Aug 2024 22:21:01 +0200 Subject: usb: typec: fsa4480: Relax CHIP_ID check Some FSA4480-compatible chips like the OCP96011 used on Fairphone 5 return 0x00 from the CHIP_ID register. Handle that gracefully and only fail probe when the I2C read has failed. With this the dev_dbg will print 0 but otherwise continue working. [ 0.251581] fsa4480 1-0042: Found FSA4480 v0.0 (Vendor ID = 0) Cc: stable@vger.kernel.org Fixes: e885f5f1f2b4 ("usb: typec: fsa4480: Check if the chip is really there") Signed-off-by: Luca Weiss Reviewed-by: Heikki Krogerus Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240818-fsa4480-chipid-fix-v1-1-17c239435cf7@fairphone.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/fsa4480.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/mux/fsa4480.c b/drivers/usb/typec/mux/fsa4480.c index cd235339834b..f71dba8bf07c 100644 --- a/drivers/usb/typec/mux/fsa4480.c +++ b/drivers/usb/typec/mux/fsa4480.c @@ -274,7 +274,7 @@ static int fsa4480_probe(struct i2c_client *client) return dev_err_probe(dev, PTR_ERR(fsa->regmap), "failed to initialize regmap\n"); ret = regmap_read(fsa->regmap, FSA4480_DEVICE_ID, &val); - if (ret || !val) + if (ret) return dev_err_probe(dev, -ENODEV, "FSA4480 not found\n"); dev_dbg(dev, "Found FSA4480 v%lu.%lu (Vendor ID = %lu)\n", -- cgit From 3a8839bbb86da7968a792123ed2296d063871a52 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Tue, 20 Aug 2024 19:01:27 +0800 Subject: usb: core: sysfs: Unmerge @usb3_hardware_lpm_attr_group in remove_power_attributes() Device attribute group @usb3_hardware_lpm_attr_group is merged by add_power_attributes(), but it is not unmerged explicitly, fixed by unmerging it in remove_power_attributes(). Fixes: 655fe4effe0f ("usbcore: add sysfs support to xHCI usb3 hardware LPM") Cc: stable@vger.kernel.org Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20240820-sysfs_fix-v2-1-a9441487077e@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index d83231d6736a..61b6d978892c 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -670,6 +670,7 @@ static int add_power_attributes(struct device *dev) static void remove_power_attributes(struct device *dev) { + sysfs_unmerge_group(&dev->kobj, &usb3_hardware_lpm_attr_group); sysfs_unmerge_group(&dev->kobj, &usb2_hardware_lpm_attr_group); sysfs_unmerge_group(&dev->kobj, &power_attr_group); } -- cgit From 72fca8371f205d654f95b09cd023a71fd5307041 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Thu, 15 Aug 2024 08:40:29 +0200 Subject: usb: dwc3: ep0: Don't reset resource alloc flag (including ep0) The DWC3_EP_RESOURCE_ALLOCATED flag ensures that the resource of an endpoint is only assigned once. Unless the endpoint is reset, don't clear this flag. Otherwise we may set endpoint resource again, which prevents the driver from initiate transfer after handling a STALL or endpoint halt to the control endpoint. Commit f2e0eee47038 ("usb: dwc3: ep0: Don't reset resource alloc flag") was fixing the initial issue, but did this only for physical ep1. Since the function dwc3_ep0_stall_and_restart is resetting the flags for both physical endpoints, this also has to be done for ep0. Cc: stable@vger.kernel.org Fixes: b311048c174d ("usb: dwc3: gadget: Rewrite endpoint allocation flow") Acked-by: Thinh Nguyen Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20240814-dwc3hwep0reset-v2-1-29e1d7d923ea@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ep0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index d96ffbe52039..c9533a99e47c 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -232,7 +232,8 @@ void dwc3_ep0_stall_and_restart(struct dwc3 *dwc) /* stall is always issued on EP0 */ dep = dwc->eps[0]; __dwc3_gadget_ep_set_halt(dep, 1, false); - dep->flags = DWC3_EP_ENABLED; + dep->flags &= DWC3_EP_RESOURCE_ALLOCATED; + dep->flags |= DWC3_EP_ENABLED; dwc->delayed_status = false; if (!list_empty(&dep->pending_list)) { -- cgit From ddfcfeba891064b88bb844208b43bef2ef970f0c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 14 Aug 2024 11:39:56 +0200 Subject: usb: dwc3: st: fix probed platform device ref count on probe error path The probe function never performs any paltform device allocation, thus error path "undo_platform_dev_alloc" is entirely bogus. It drops the reference count from the platform device being probed. If error path is triggered, this will lead to unbalanced device reference counts and premature release of device resources, thus possible use-after-free when releasing remaining devm-managed resources. Fixes: f83fca0707c6 ("usb: dwc3: add ST dwc3 glue layer to manage dwc3 HC") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Acked-by: Thinh Nguyen Reviewed-by: Patrice Chotard Link: https://lore.kernel.org/r/20240814093957.37940-1-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-st.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c index 211360eee95a..a9cb04043f08 100644 --- a/drivers/usb/dwc3/dwc3-st.c +++ b/drivers/usb/dwc3/dwc3-st.c @@ -219,10 +219,8 @@ static int st_dwc3_probe(struct platform_device *pdev) dwc3_data->regmap = regmap; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "syscfg-reg"); - if (!res) { - ret = -ENXIO; - goto undo_platform_dev_alloc; - } + if (!res) + return -ENXIO; dwc3_data->syscfg_reg_off = res->start; @@ -233,8 +231,7 @@ static int st_dwc3_probe(struct platform_device *pdev) devm_reset_control_get_exclusive(dev, "powerdown"); if (IS_ERR(dwc3_data->rstc_pwrdn)) { dev_err(&pdev->dev, "could not get power controller\n"); - ret = PTR_ERR(dwc3_data->rstc_pwrdn); - goto undo_platform_dev_alloc; + return PTR_ERR(dwc3_data->rstc_pwrdn); } /* Manage PowerDown */ @@ -300,8 +297,6 @@ undo_softreset: reset_control_assert(dwc3_data->rstc_rst); undo_powerdown: reset_control_assert(dwc3_data->rstc_pwrdn); -undo_platform_dev_alloc: - platform_device_put(pdev); return ret; } -- cgit From cd4897bfd14f6a5388b21ba45a066541a0425199 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 14 Aug 2024 11:39:57 +0200 Subject: usb: dwc3: st: add missing depopulate in probe error path Depopulate device in probe error paths to fix leak of children resources. Fixes: f83fca0707c6 ("usb: dwc3: add ST dwc3 glue layer to manage dwc3 HC") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Reviewed-by: Patrice Chotard Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20240814093957.37940-2-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-st.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c index a9cb04043f08..c8c7cd0c1796 100644 --- a/drivers/usb/dwc3/dwc3-st.c +++ b/drivers/usb/dwc3/dwc3-st.c @@ -266,7 +266,7 @@ static int st_dwc3_probe(struct platform_device *pdev) if (!child_pdev) { dev_err(dev, "failed to find dwc3 core device\n"); ret = -ENODEV; - goto err_node_put; + goto depopulate; } dwc3_data->dr_mode = usb_get_dr_mode(&child_pdev->dev); @@ -282,6 +282,7 @@ static int st_dwc3_probe(struct platform_device *pdev) ret = st_dwc3_drd_init(dwc3_data); if (ret) { dev_err(dev, "drd initialisation failed\n"); + of_platform_depopulate(dev); goto undo_softreset; } @@ -291,6 +292,8 @@ static int st_dwc3_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dwc3_data); return 0; +depopulate: + of_platform_depopulate(dev); err_node_put: of_node_put(child); undo_softreset: -- cgit From 6ea14ccb60c8ab829349979b22b58a941ec4a3ee Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 13 Aug 2024 12:39:46 +0200 Subject: netfilter: flowtable: validate vlan header Ensure there is sufficient room to access the protocol field of the VLAN header, validate it once before the flowtable lookup. ===================================================== BUG: KMSAN: uninit-value in nf_flow_offload_inet_hook+0x45a/0x5f0 net/netfilter/nf_flow_table_inet.c:32 nf_flow_offload_inet_hook+0x45a/0x5f0 net/netfilter/nf_flow_table_inet.c:32 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline] nf_ingress net/core/dev.c:5440 [inline] Fixes: 4cd91f7c290f ("netfilter: flowtable: add vlan support") Reported-by: syzbot+8407d9bb88cd4c6bf61a@syzkaller.appspotmail.com Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_inet.c | 3 +++ net/netfilter/nf_flow_table_ip.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 88787b45e30d..8b541a080342 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -17,6 +17,9 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, switch (skb->protocol) { case htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) + return NF_ACCEPT; + veth = (struct vlan_ethhdr *)skb_mac_header(skb); proto = veth->h_vlan_encapsulated_proto; break; diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index c2c005234dcd..98edcaa37b38 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -281,6 +281,9 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, switch (skb->protocol) { case htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) + return false; + veth = (struct vlan_ethhdr *)skb_mac_header(skb); if (veth->h_vlan_encapsulated_proto == proto) { *offset += VLAN_HLEN; -- cgit From 1fa7b099d60ad64f559bd3b8e3f0d94b2e015514 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 22 Aug 2024 16:46:56 +0800 Subject: ALSA: hda/realtek - FIxed ALC285 headphone no sound Dell platform with ALC215 ALC285 ALC289 ALC225 ALC295 ALC299, plug headphone or headset. It had a chance to get no sound from headphone. Replace depop procedure will solve this issue. Signed-off-by: Kailang Yang Link: https://lore.kernel.org/d0de1b03fd174520945dde216d765223@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bf098c6fedb5..b5cc3417138c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5090,6 +5090,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) case 0x10ec0295: case 0x10ec0289: case 0x10ec0299: + alc_hp_mute_disable(codec, 75); alc_process_coef_fw(codec, alc225_pre_hsmode); alc_process_coef_fw(codec, coef0225); break; @@ -5315,6 +5316,7 @@ static void alc_headset_mode_default(struct hda_codec *codec) case 0x10ec0299: alc_process_coef_fw(codec, alc225_pre_hsmode); alc_process_coef_fw(codec, coef0225); + alc_hp_enable_unmute(codec, 75); break; case 0x10ec0255: alc_process_coef_fw(codec, coef0255); @@ -5474,6 +5476,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) alc_process_coef_fw(codec, coef0225_2); else alc_process_coef_fw(codec, coef0225_1); + alc_hp_enable_unmute(codec, 75); break; case 0x10ec0867: alc_update_coefex_idx(codec, 0x57, 0x5, 1<<14, 0); @@ -5579,6 +5582,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) case 0x10ec0289: case 0x10ec0299: alc_process_coef_fw(codec, coef0225); + alc_hp_enable_unmute(codec, 75); break; } codec_dbg(codec, "Headset jack set to Nokia-style headset mode.\n"); @@ -5738,12 +5742,6 @@ static void alc_determine_headset_type(struct hda_codec *codec) case 0x10ec0295: case 0x10ec0289: case 0x10ec0299: - snd_hda_codec_write(codec, 0x21, 0, - AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); - msleep(80); - snd_hda_codec_write(codec, 0x21, 0, - AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); - alc_process_coef_fw(codec, alc225_pre_hsmode); alc_update_coef_idx(codec, 0x67, 0xf000, 0x1000); val = alc_read_coef_idx(codec, 0x45); @@ -5760,15 +5758,19 @@ static void alc_determine_headset_type(struct hda_codec *codec) val = alc_read_coef_idx(codec, 0x46); is_ctia = (val & 0x00f0) == 0x00f0; } + if (!is_ctia) { + alc_update_coef_idx(codec, 0x45, 0x3f<<10, 0x38<<10); + alc_update_coef_idx(codec, 0x49, 3<<8, 1<<8); + msleep(100); + val = alc_read_coef_idx(codec, 0x46); + if ((val & 0x00f0) == 0x00f0) + is_ctia = false; + else + is_ctia = true; + } alc_update_coef_idx(codec, 0x4a, 7<<6, 7<<6); alc_update_coef_idx(codec, 0x4a, 3<<4, 3<<4); alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000); - - snd_hda_codec_write(codec, 0x21, 0, - AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); - msleep(80); - snd_hda_codec_write(codec, 0x21, 0, - AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); break; case 0x10ec0867: is_ctia = true; -- cgit From f2916c83d746eb99f50f42c15cf4c47c2ea5f3b3 Mon Sep 17 00:00:00 2001 From: Mengyuan Lou Date: Tue, 20 Aug 2024 11:04:25 +0800 Subject: net: ngbe: Fix phy mode set to external phy The MAC only has add the TX delay and it can not be modified. MAC and PHY are both set the TX delay cause transmission problems. So just disable TX delay in PHY, when use rgmii to attach to external phy, set PHY_INTERFACE_MODE_RGMII_RXID to phy drivers. And it is does not matter to internal phy. Fixes: bc2426d74aa3 ("net: ngbe: convert phylib to phylink") Signed-off-by: Mengyuan Lou Cc: stable@vger.kernel.org # 6.3+ Reviewed-by: Jacob Keller Link: https://patch.msgid.link/E6759CF1387CF84C+20240820030425.93003-1-mengyuanlou@net-swift.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c index ec54b18c5fe7..a5e9b779c44d 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c @@ -124,8 +124,12 @@ static int ngbe_phylink_init(struct wx *wx) MAC_SYM_PAUSE | MAC_ASYM_PAUSE; config->mac_managed_pm = true; - phy_mode = PHY_INTERFACE_MODE_RGMII_ID; - __set_bit(PHY_INTERFACE_MODE_RGMII_ID, config->supported_interfaces); + /* The MAC only has add the Tx delay and it can not be modified. + * So just disable TX delay in PHY, and it is does not matter to + * internal phy. + */ + phy_mode = PHY_INTERFACE_MODE_RGMII_RXID; + __set_bit(PHY_INTERFACE_MODE_RGMII_RXID, config->supported_interfaces); phylink = phylink_create(config, NULL, phy_mode, &ngbe_mac_ops); if (IS_ERR(phylink)) -- cgit From a2f5c505b4378cd6fc7c4a44ff3665ccef2037db Mon Sep 17 00:00:00 2001 From: Sava Jakovljev Date: Wed, 21 Aug 2024 04:16:57 +0200 Subject: net: phy: realtek: Fix setting of PHY LEDs Mode B bit on RTL8211F The current implementation incorrectly sets the mode bit of the PHY chip. Bit 15 (RTL8211F_LEDCR_MODE) should not be shifted together with the configuration nibble of a LED- it should be set independently of the index of the LED being configured. As a consequence, the RTL8211F LED control is actually operating in Mode A. Fix the error by or-ing final register value to write with a const-value of RTL8211F_LEDCR_MODE, thus setting Mode bit explicitly. Fixes: 17784801d888 ("net: phy: realtek: Add support for PHY LEDs on RTL8211F") Signed-off-by: Sava Jakovljev Reviewed-by: Marek Vasut Link: https://patch.msgid.link/PAWP192MB21287372F30C4E55B6DF6158C38E2@PAWP192MB2128.EURP192.PROD.OUTLOOK.COM Signed-off-by: Paolo Abeni --- drivers/net/phy/realtek.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 87865918dab6..25e5bfbb6f89 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -555,7 +555,7 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, unsigned long rules) { const u16 mask = RTL8211F_LEDCR_MASK << (RTL8211F_LEDCR_SHIFT * index); - u16 reg = RTL8211F_LEDCR_MODE; /* Mode B */ + u16 reg = 0; if (index >= RTL8211F_LED_COUNT) return -EINVAL; @@ -575,6 +575,7 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, } reg <<= RTL8211F_LEDCR_SHIFT * index; + reg |= RTL8211F_LEDCR_MODE; /* Mode B */ return phy_modify_paged(phydev, 0xd04, RTL8211F_LEDCR, mask, reg); } -- cgit From af688a99eb1fc7ef69774665d61e6be51cea627a Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 21 Aug 2024 12:35:58 +0530 Subject: octeontx2-af: Fix CPT AF register offset calculation Some CPT AF registers are per LF and others are global. Translation of PF/VF local LF slot number to actual LF slot number is required only for accessing perf LF registers. CPT AF global registers access do not require any LF slot number. Also, there is no reason CPT PF/VF to know actual lf's register offset. Without this fix microcode loading will fail, VFs cannot be created and hardware is not usable. Fixes: bc35e28af789 ("octeontx2-af: replace cpt slot with lf id on reg write") Signed-off-by: Bharat Bhushan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240821070558.1020101-1-bbhushan2@marvell.com Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/octeontx2/af/rvu_cpt.c | 23 +++++++++++----------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 3e09d2285814..daf4b951e905 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -632,7 +632,9 @@ int rvu_mbox_handler_cpt_inline_ipsec_cfg(struct rvu *rvu, return ret; } -static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) +static bool validate_and_update_reg_offset(struct rvu *rvu, + struct cpt_rd_wr_reg_msg *req, + u64 *reg_offset) { u64 offset = req->reg_offset; int blkaddr, num_lfs, lf; @@ -663,6 +665,11 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) if (lf < 0) return false; + /* Translate local LF's offset to global CPT LF's offset to + * access LFX register. + */ + *reg_offset = (req->reg_offset & 0xFF000) + (lf << 3); + return true; } else if (!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK)) { /* Registers that can be accessed from PF */ @@ -697,7 +704,7 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, struct cpt_rd_wr_reg_msg *rsp) { u64 offset = req->reg_offset; - int blkaddr, lf; + int blkaddr; blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); if (blkaddr < 0) @@ -708,18 +715,10 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, !is_cpt_vf(rvu, req->hdr.pcifunc)) return CPT_AF_ERR_ACCESS_DENIED; - if (!is_valid_offset(rvu, req)) + if (!validate_and_update_reg_offset(rvu, req, &offset)) return CPT_AF_ERR_ACCESS_DENIED; - /* Translate local LF used by VFs to global CPT LF */ - lf = rvu_get_lf(rvu, &rvu->hw->block[blkaddr], req->hdr.pcifunc, - (offset & 0xFFF) >> 3); - - /* Translate local LF's offset to global CPT LF's offset */ - offset &= 0xFF000; - offset += lf << 3; - - rsp->reg_offset = offset; + rsp->reg_offset = req->reg_offset; rsp->ret_val = req->ret_val; rsp->is_write = req->is_write; -- cgit From 9abf199943a6469a71f6ce5c2266e9364d310f8b Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Tue, 13 Aug 2024 16:38:08 +0800 Subject: wifi: ath11k: fix NULL pointer dereference in ath11k_mac_get_eirp_power() Commit 39dc8b8ea387 ("wifi: mac80211: pass parsed TPE data to drivers") breaks ath11k, leading to kernel crash: BUG: kernel NULL pointer dereference, address: 0000000000000018 RIP: 0010:ath11k_mac_get_eirp_power.isra.0+0x5b/0x80 [ath11k] Call Trace: ath11k_mac_fill_reg_tpc_info+0x3d6/0x800 [ath11k] ath11k_mac_vdev_start_restart+0x412/0x4d0 [ath11k] ath11k_mac_op_sta_state+0x7bc/0xbb0 [ath11k] drv_sta_state+0xf1/0x5f0 [mac80211] sta_info_insert_rcu+0x28d/0x530 [mac80211] sta_info_insert+0xf/0x20 [mac80211] ieee80211_prep_connection+0x3b4/0x4c0 [mac80211] ieee80211_mgd_auth+0x363/0x600 [mac80211] The issue scenario is, AP advertises power spectral density (PSD) values in its transmit power envelope (TPE) IE and supports 160 MHz bandwidth in 6 GHz. When connecting to this AP, in ath11k_mac_parse_tx_pwr_env(), the local variable psd is true and then reg_tpc_info.num_pwr_levels is set to 8 due to 160 MHz bandwidth. Note here ath11k fails to set reg_tpc_info.is_psd_power as TRUE due to above commit. Then in ath11k_mac_fill_reg_tpc_info(), for each of the 8 power levels, for a PSD channel, ath11k_mac_get_psd_channel() is expected to be called to get required information. However due to invalid reg_tpc_info.is_psd_power, it is ath11k_mac_get_eirp_power() that gets called and passed with pwr_lvl_idx as one of the arguments. Note this function implicitly requires pwr_lvl_idx to be no more than 3. So when pwr_lvl_idx is larger than that ath11k_mac_get_seg_freq() returns invalid center frequency, with which as the input ieee80211_get_channel() returns NULL, then kernel crashes due to NULL pointer dereference. Fix it by setting reg_tpc_info.is_psd_power properly. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.30 Fixes: 39dc8b8ea387 ("wifi: mac80211: pass parsed TPE data to drivers") Reported-by: Mikko Tiihonen Tested-by: Mikko Tiihonen Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219131 Signed-off-by: Baochen Qiang Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240813083808.9224-1-quic_bqiang@quicinc.com --- drivers/net/wireless/ath/ath11k/mac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index ba910ae2c676..7c0ef6916dd2 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -7900,6 +7900,7 @@ static void ath11k_mac_parse_tx_pwr_env(struct ath11k *ar, } if (psd) { + arvif->reg_tpc_info.is_psd_power = true; arvif->reg_tpc_info.num_pwr_levels = psd->count; for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) { -- cgit From 9a8fc292dd93b93db30e01c94c0da4c944852f28 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 22 Aug 2024 14:30:53 +0300 Subject: spi: pxa2xx: Do not override dev->platform_data on probe The platform_data field may be supplied by legacy board code. In other cases we override it, and module remove and probe cycle will crash the kernel since it will carry a stale pointer. Fix this by supplying a third argument to the pxa2xx_spi_probe() and avoid overriding dev->platform_data. Reported-by: Hao Ma Fixes: cc160697a576 ("spi: pxa2xx: Convert PCI driver to use spi-pxa2xx code directly") Fixes: 3d8f037fbcab ("spi: pxa2xx: Move platform driver to a separate file") Fixes: 20ade9b9771c ("spi: pxa2xx: Extract pxa2xx_spi_platform_*() callbacks") Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20240822113408.750831-2-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 2 +- drivers/spi/spi-pxa2xx-platform.c | 6 ++---- drivers/spi/spi-pxa2xx.c | 5 ++--- drivers/spi/spi-pxa2xx.h | 3 ++- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 616d032f1a89..c98bb214b6ae 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -297,7 +297,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, return ret; ssp->irq = pci_irq_vector(dev, 0); - return pxa2xx_spi_probe(&dev->dev, ssp); + return pxa2xx_spi_probe(&dev->dev, ssp, pdata); } static void pxa2xx_spi_pci_remove(struct pci_dev *dev) diff --git a/drivers/spi/spi-pxa2xx-platform.c b/drivers/spi/spi-pxa2xx-platform.c index 98a8ceb7db6f..f9504cddc7ba 100644 --- a/drivers/spi/spi-pxa2xx-platform.c +++ b/drivers/spi/spi-pxa2xx-platform.c @@ -63,7 +63,7 @@ static struct ssp_device *pxa2xx_spi_ssp_request(struct platform_device *pdev) ssp = pxa_ssp_request(pdev->id, pdev->name); if (!ssp) - return ssp; + return NULL; status = devm_add_action_or_reset(&pdev->dev, pxa2xx_spi_ssp_release, ssp); if (status) @@ -148,8 +148,6 @@ static int pxa2xx_spi_platform_probe(struct platform_device *pdev) platform_info = pxa2xx_spi_init_pdata(pdev); if (IS_ERR(platform_info)) return dev_err_probe(dev, PTR_ERR(platform_info), "missing platform data\n"); - - dev->platform_data = platform_info; } ssp = pxa2xx_spi_ssp_request(pdev); @@ -158,7 +156,7 @@ static int pxa2xx_spi_platform_probe(struct platform_device *pdev) if (!ssp) ssp = &platform_info->ssp; - return pxa2xx_spi_probe(dev, ssp); + return pxa2xx_spi_probe(dev, ssp, platform_info); } static void pxa2xx_spi_platform_remove(struct platform_device *pdev) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 16b96eb176cd..e3a95adc5279 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1277,16 +1277,15 @@ static size_t pxa2xx_spi_max_dma_transfer_size(struct spi_device *spi) return MAX_DMA_LEN; } -int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp) +int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp, + struct pxa2xx_spi_controller *platform_info) { - struct pxa2xx_spi_controller *platform_info; struct spi_controller *controller; struct driver_data *drv_data; const struct lpss_config *config; int status; u32 tmp; - platform_info = dev_get_platdata(dev); if (platform_info->is_target) controller = devm_spi_alloc_target(dev, sizeof(*drv_data)); else diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h index a470d3d634d3..447be0369384 100644 --- a/drivers/spi/spi-pxa2xx.h +++ b/drivers/spi/spi-pxa2xx.h @@ -132,7 +132,8 @@ extern void pxa2xx_spi_dma_stop(struct driver_data *drv_data); extern int pxa2xx_spi_dma_setup(struct driver_data *drv_data); extern void pxa2xx_spi_dma_release(struct driver_data *drv_data); -int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp); +int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp, + struct pxa2xx_spi_controller *platform_info); void pxa2xx_spi_remove(struct device *dev); extern const struct dev_pm_ops pxa2xx_spi_pm_ops; -- cgit From e17465f78eb92ebb4be17e35d6c0584406f643a0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 22 Aug 2024 14:30:54 +0300 Subject: spi: pxa2xx: Move PM runtime handling to the glue drivers PCI and platform buses have different defaults for runtime PM. In particular PCI probe is assumed to be called when PM runtime is enabled by the PCI core. In this case if we try enable it again the PM runtime complaints with pxa2xx_spi_pci 0000:00:07.0: Unbalanced pm_runtime_enable! Fix this by moving PM runtime handling from the SPI PXA2xx core to the glue drivers. Fixes: cc160697a576 ("spi: pxa2xx: Convert PCI driver to use spi-pxa2xx code directly") Fixes: 3d8f037fbcab ("spi: pxa2xx: Move platform driver to a separate file") Fixes: 20ade9b9771c ("spi: pxa2xx: Extract pxa2xx_spi_platform_*() callbacks") Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20240822113408.750831-3-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 15 ++++++++++++++- drivers/spi/spi-pxa2xx-platform.c | 22 ++++++++++++++++++++-- drivers/spi/spi-pxa2xx.c | 15 +-------------- 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index c98bb214b6ae..cc8dcf782399 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -297,11 +298,23 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, return ret; ssp->irq = pci_irq_vector(dev, 0); - return pxa2xx_spi_probe(&dev->dev, ssp, pdata); + ret = pxa2xx_spi_probe(&dev->dev, ssp, pdata); + if (ret) + return ret; + + pm_runtime_set_autosuspend_delay(&dev->dev, 50); + pm_runtime_use_autosuspend(&dev->dev); + pm_runtime_put_autosuspend(&dev->dev); + pm_runtime_allow(&dev->dev); + + return 0; } static void pxa2xx_spi_pci_remove(struct pci_dev *dev) { + pm_runtime_forbid(&dev->dev); + pm_runtime_get_noresume(&dev->dev); + pxa2xx_spi_remove(&dev->dev); } diff --git a/drivers/spi/spi-pxa2xx-platform.c b/drivers/spi/spi-pxa2xx-platform.c index f9504cddc7ba..595af9fa4e0f 100644 --- a/drivers/spi/spi-pxa2xx-platform.c +++ b/drivers/spi/spi-pxa2xx-platform.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -142,6 +143,7 @@ static int pxa2xx_spi_platform_probe(struct platform_device *pdev) struct pxa2xx_spi_controller *platform_info; struct device *dev = &pdev->dev; struct ssp_device *ssp; + int ret; platform_info = dev_get_platdata(dev); if (!platform_info) { @@ -156,12 +158,28 @@ static int pxa2xx_spi_platform_probe(struct platform_device *pdev) if (!ssp) ssp = &platform_info->ssp; - return pxa2xx_spi_probe(dev, ssp, platform_info); + pm_runtime_set_autosuspend_delay(dev, 50); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + + ret = pxa2xx_spi_probe(dev, ssp, platform_info); + if (ret) + pm_runtime_disable(dev); + + return ret; } static void pxa2xx_spi_platform_remove(struct platform_device *pdev) { - pxa2xx_spi_remove(&pdev->dev); + struct device *dev = &pdev->dev; + + pm_runtime_get_sync(dev); + + pxa2xx_spi_remove(dev); + + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); } static const struct acpi_device_id pxa2xx_spi_acpi_match[] = { diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index e3a95adc5279..bf1f34b0ffc8 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1449,24 +1449,16 @@ int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp, } } - pm_runtime_set_autosuspend_delay(dev, 50); - pm_runtime_use_autosuspend(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - /* Register with the SPI framework */ dev_set_drvdata(dev, drv_data); status = spi_register_controller(controller); if (status) { dev_err_probe(dev, status, "problem registering SPI controller\n"); - goto out_error_pm_runtime_enabled; + goto out_error_clock_enabled; } return status; -out_error_pm_runtime_enabled: - pm_runtime_disable(dev); - out_error_clock_enabled: clk_disable_unprepare(ssp->clk); @@ -1483,8 +1475,6 @@ void pxa2xx_spi_remove(struct device *dev) struct driver_data *drv_data = dev_get_drvdata(dev); struct ssp_device *ssp = drv_data->ssp; - pm_runtime_get_sync(dev); - spi_unregister_controller(drv_data->controller); /* Disable the SSP at the peripheral and SOC level */ @@ -1495,9 +1485,6 @@ void pxa2xx_spi_remove(struct device *dev) if (drv_data->controller_info->enable_dma) pxa2xx_spi_dma_release(drv_data); - pm_runtime_put_noidle(dev); - pm_runtime_disable(dev); - /* Release IRQ */ free_irq(ssp->irq, drv_data); } -- cgit From bff980d8d9ca537fd5f3c0e9a99876c1e3713e81 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 22 Aug 2024 12:57:25 +0100 Subject: ASoC: cs-amp-lib-test: Force test calibration blob entries to be valid For a normal calibration blob the calTarget values must be non-zero and unique, and the calTime values must be non-zero. Don't rely on get_random_bytes() to be random enough to guarantee this. Force the calTarget and calTime values to be valid while retaining randomness in the values. Signed-off-by: Richard Fitzgerald Fixes: 177862317a98 ("ASoC: cs-amp-lib: Add KUnit test for calibration helpers") Link: https://patch.msgid.link/20240822115725.259568-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs-amp-lib-test.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/soc/codecs/cs-amp-lib-test.c b/sound/soc/codecs/cs-amp-lib-test.c index 15f991b2e16e..8169ec88a8ba 100644 --- a/sound/soc/codecs/cs-amp-lib-test.c +++ b/sound/soc/codecs/cs-amp-lib-test.c @@ -38,6 +38,7 @@ static void cs_amp_lib_test_init_dummy_cal_blob(struct kunit *test, int num_amps { struct cs_amp_lib_test_priv *priv = test->priv; unsigned int blob_size; + int i; blob_size = offsetof(struct cirrus_amp_efi_data, data) + sizeof(struct cirrus_amp_cal_data) * num_amps; @@ -49,6 +50,14 @@ static void cs_amp_lib_test_init_dummy_cal_blob(struct kunit *test, int num_amps priv->cal_blob->count = num_amps; get_random_bytes(priv->cal_blob->data, sizeof(struct cirrus_amp_cal_data) * num_amps); + + /* Ensure all timestamps are non-zero to mark the entry valid. */ + for (i = 0; i < num_amps; i++) + priv->cal_blob->data[i].calTime[0] |= 1; + + /* Ensure that all UIDs are non-zero and unique. */ + for (i = 0; i < num_amps; i++) + *(u8 *)&priv->cal_blob->data[i].calTarget[0] = i + 1; } static u64 cs_amp_lib_test_get_target_uid(struct kunit *test) -- cgit From 1ac66c4960e1c735eb6edfd3e6d52bebb2aa347e Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 21 Aug 2024 09:46:44 +0100 Subject: MAINTAINERS: Add sonet.h to ATM section of MAINTAINERS This is part of an effort to assign a section in MAINTAINERS to header files that relate to Networking. In this case the files with "net" in their name. It seems that sonet.h is included in ATM related source files, and thus that ATM is the most relevant section for these files. Cc: Chas Williams <3chas3@gmail.com> Signed-off-by: Simon Horman Signed-off-by: Paolo Abeni --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a964a34651f5..c682203915a2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3504,7 +3504,9 @@ S: Maintained W: http://linux-atm.sourceforge.net F: drivers/atm/ F: include/linux/atm* +F: include/linux/sonet.h F: include/uapi/linux/atm* +F: include/uapi/linux/sonet.h ATMEL MACB ETHERNET DRIVER M: Nicolas Ferre -- cgit From eb208fecd77d898709c25af680487289fd5f3e16 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 21 Aug 2024 09:46:45 +0100 Subject: MAINTAINERS: Add net_tstamp.h to SOCKET TIMESTAMPING section This is part of an effort to assign a section in MAINTAINERS to header files that relate to Networking. In this case the files with "net" in their name. Cc: Richard Cochran Cc: Willem de Bruijn Signed-off-by: Simon Horman Acked-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c682203915a2..e5b9a4d9bc21 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21057,6 +21057,7 @@ SOCKET TIMESTAMPING M: Willem de Bruijn S: Maintained F: Documentation/networking/timestamping.rst +F: include/linux/net_tstamp.h F: include/uapi/linux/net_tstamp.h F: tools/testing/selftests/net/so_txtime.c -- cgit From 8cb0a938d90b25f123fcb2e24bbda9eaabd79c9e Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 21 Aug 2024 09:46:46 +0100 Subject: MAINTAINERS: Add limited globs for Networking headers This aims to add limited globs to improve the coverage of header files in the NETWORKING DRIVERS and NETWORKING [GENERAL] sections. It is done so in a minimal way to exclude overlap with other sections. And so as not to require "X" entries to exclude files otherwise matched by these new globs. While imperfect, due to it's limited nature, this does extend coverage of header files by these sections. And aims to automatically cover new files that seem very likely belong to these sections. The include/linux/netdev* glob (both sections) + Subsumes the entries for: - include/linux/netdevice.h + Extends the sections to cover - include/linux/netdevice_xmit.h - include/linux/netdev_features.h The include/uapi/linux/netdev* globs: (both sections) + Subsumes the entries for: - include/linux/netdevice.h + Extends the sections to cover - include/linux/netdev.h The include/linux/skbuff* glob (NETWORKING [GENERAL] section only): + Subsumes the entry for: - include/linux/skbuff.h + Extends the section to cover - include/linux/skbuff_ref.h A include/uapi/linux/net_* glob was not added to the NETWORKING [GENERAL] section. Although it would subsume the entry for include/uapi/linux/net_namespace.h, which is fine, it would also extend coverage to: - include/uapi/linux/net_dropmon.h, which belongs to the NETWORK DROP MONITOR section - include/uapi/linux/net_tstamp.h which, as per an earlier patch in this series, belongs to the SOCKET TIMESTAMPING section Signed-off-by: Simon Horman Signed-off-by: Paolo Abeni --- MAINTAINERS | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e5b9a4d9bc21..03d571b131eb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15884,10 +15884,10 @@ F: include/linux/fddidevice.h F: include/linux/hippidevice.h F: include/linux/if_* F: include/linux/inetdevice.h -F: include/linux/netdevice.h +F: include/linux/netdev* F: include/uapi/linux/cn_proc.h F: include/uapi/linux/if_* -F: include/uapi/linux/netdevice.h +F: include/uapi/linux/netdev* F: tools/testing/selftests/drivers/net/ X: drivers/net/wireless/ @@ -15940,13 +15940,13 @@ F: include/linux/framer/framer.h F: include/linux/in.h F: include/linux/indirect_call_wrapper.h F: include/linux/net.h -F: include/linux/netdevice.h -F: include/linux/skbuff.h +F: include/linux/netdev* +F: include/linux/skbuff* F: include/net/ F: include/uapi/linux/in.h F: include/uapi/linux/net.h F: include/uapi/linux/net_namespace.h -F: include/uapi/linux/netdevice.h +F: include/uapi/linux/netdev* F: lib/net_utils.c F: lib/random32.c F: net/ -- cgit From f2d20c9b97f0df64841b89fa1ad3e9c92f7377ae Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 21 Aug 2024 09:46:47 +0100 Subject: MAINTAINERS: Add header files to NETWORKING sections This is part of an effort to assign a section in MAINTAINERS to header files that relate to Networking. In this case the files with "net" or "skbuff" in their name. This patch adds a number of such files to the NETWORKING DRIVERS and NETWORKING [GENERAL] sections. Signed-off-by: Simon Horman Signed-off-by: Paolo Abeni --- MAINTAINERS | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 03d571b131eb..798f1ffcbbaa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15879,13 +15879,16 @@ F: drivers/net/ F: include/dt-bindings/net/ F: include/linux/cn_proc.h F: include/linux/etherdevice.h +F: include/linux/ethtool_netlink.h F: include/linux/fcdevice.h F: include/linux/fddidevice.h F: include/linux/hippidevice.h F: include/linux/if_* F: include/linux/inetdevice.h F: include/linux/netdev* +F: include/linux/platform_data/wiznet.h F: include/uapi/linux/cn_proc.h +F: include/uapi/linux/ethtool_netlink.h F: include/uapi/linux/if_* F: include/uapi/linux/netdev* F: tools/testing/selftests/drivers/net/ @@ -15939,14 +15942,28 @@ F: include/linux/framer/framer-provider.h F: include/linux/framer/framer.h F: include/linux/in.h F: include/linux/indirect_call_wrapper.h +F: include/linux/inet.h +F: include/linux/inet_diag.h F: include/linux/net.h F: include/linux/netdev* +F: include/linux/netlink.h +F: include/linux/netpoll.h +F: include/linux/rtnetlink.h +F: include/linux/seq_file_net.h F: include/linux/skbuff* F: include/net/ +F: include/uapi/linux/genetlink.h +F: include/uapi/linux/hsr_netlink.h F: include/uapi/linux/in.h +F: include/uapi/linux/inet_diag.h +F: include/uapi/linux/nbd-netlink.h F: include/uapi/linux/net.h F: include/uapi/linux/net_namespace.h +F: include/uapi/linux/netconf.h F: include/uapi/linux/netdev* +F: include/uapi/linux/netlink.h +F: include/uapi/linux/netlink_diag.h +F: include/uapi/linux/rtnetlink.h F: lib/net_utils.c F: lib/random32.c F: net/ -- cgit From 46097a92662496394628cb41138e681d6074cce7 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 21 Aug 2024 09:46:48 +0100 Subject: MAINTAINERS: Mark JME Network Driver as Odd Fixes This driver only appears to have received sporadic clean-ups, typically part of some tree-wide activity, and fixes for quite some time. And according to the maintainer, Guo-Fu Tseng, the device has been EOLed for a long time (see Link). Accordingly, it seems appropriate to mark this driver as odd fixes. Cc: Moon Yeounsu Cc: Guo-Fu Tseng Link: https://lore.kernel.org/netdev/20240805003139.M94125@cooldavid.org/ Signed-off-by: Simon Horman Signed-off-by: Paolo Abeni --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 798f1ffcbbaa..0c94ec0ca478 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11995,7 +11995,7 @@ F: fs/jfs/ JME NETWORK DRIVER M: Guo-Fu Tseng L: netdev@vger.kernel.org -S: Maintained +S: Odd Fixes F: drivers/net/ethernet/jme.* JOURNALLING FLASH FILE SYSTEM V2 (JFFS2) -- cgit From 3e878fe5a0b139838a65f50a3df3caf3299dbc24 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 22 Aug 2024 03:57:39 -0400 Subject: bcachefs: add missing inode_walker_exit() fix a small leak Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 6801c37ee803..83bd31b44aad 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2215,6 +2215,8 @@ int bch2_check_xattrs(struct bch_fs *c) NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_xattr(trans, &iter, k, &hash_info, &inode))); + + inode_walker_exit(&inode); bch_err_fn(c, ret); return ret; } -- cgit From a592cdf5164d3feb821085df71f63e70e8b8b08c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 Aug 2024 16:41:00 -0400 Subject: bcachefs: don't use rht_bucket() in btree_key_cache_scan() rht_bucket() does strange complicated things when a rehash is in progress. Instead, just skip scanning when a rehash is in progress: scanning is going to be more expensive (many more empty slots to cover), and some sort of infinite loop is being observed Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 9b3ec2a3b8ce..fda7998734cb 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -778,6 +778,20 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); + + /* + * Scanning is expensive while a rehash is in progress - most elements + * will be on the new hashtable, if it's in progress + * + * A rehash could still start while we're scanning - that's ok, we'll + * still see most elements. + */ + if (unlikely(tbl->nest)) { + rcu_read_unlock(); + srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); + return SHRINK_STOP; + } + if (bc->shrink_iter >= tbl->size) bc->shrink_iter = 0; start = bc->shrink_iter; @@ -785,7 +799,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, do { struct rhash_head *pos, *next; - pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter)); + pos = rht_ptr_rcu(&tbl->buckets[bc->shrink_iter]); while (!rht_is_a_nulls(pos)) { next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter); @@ -866,12 +880,22 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) while (atomic_long_read(&bc->nr_keys)) { rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); - if (tbl) + if (tbl) { + if (tbl->nest) { + /* wait for in progress rehash */ + rcu_read_unlock(); + mutex_lock(&bc->table.mutex); + mutex_unlock(&bc->table.mutex); + rcu_read_lock(); + continue; + } for (i = 0; i < tbl->size; i++) - rht_for_each_entry_rcu(ck, pos, tbl, i, hash) { + while (pos = rht_ptr_rcu(&tbl->buckets[i]), !rht_is_a_nulls(pos)) { + ck = container_of(pos, struct bkey_cached, hash); bkey_cached_evict(bc, ck); list_add(&ck->list, &items); } + } rcu_read_unlock(); } -- cgit From ce61b605a00502c59311d0a4b1f58d62b48272d0 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 20 Aug 2024 22:07:38 +0900 Subject: ksmbd: the buffer of smb2 query dir response has at least 1 byte When STATUS_NO_MORE_FILES status is set to smb2 query dir response, ->StructureSize is set to 9, which mean buffer has 1 byte. This issue occurs because ->Buffer[1] in smb2_query_directory_rsp to flex-array. Fixes: eb3e28c1e89b ("smb3: Replace smb2pdu 1-element arrays with flex-arrays") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 0bc9edf22ba4..e9204180919e 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -4409,7 +4409,8 @@ int smb2_query_dir(struct ksmbd_work *work) rsp->OutputBufferLength = cpu_to_le32(0); rsp->Buffer[0] = 0; rc = ksmbd_iov_pin_rsp(work, (void *)rsp, - sizeof(struct smb2_query_directory_rsp)); + offsetof(struct smb2_query_directory_rsp, Buffer) + + 1); if (rc) goto err_out; } else { -- cgit From 2186a116538a715b20e15f84fdd3545e5fe0a39b Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 22 Aug 2024 08:20:50 +0000 Subject: smb/server: fix return value of smb2_open() In most error cases, error code is not returned in smb2_open(), __process_request() will not print error message. Fix this by returning the correct value at the end of smb2_open(). Signed-off-by: ChenXiaoSong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index e9204180919e..55d4e69bd9c9 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3713,7 +3713,7 @@ err_out2: kfree(name); kfree(lc); - return 0; + return rc; } static int readdir_info_level_struct_sz(int info_level) -- cgit From 4e8771a3666c8f216eefd6bd2fd50121c6c437db Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 22 Aug 2024 08:20:51 +0000 Subject: smb/server: fix potential null-ptr-deref of lease_ctx_info in smb2_open() null-ptr-deref will occur when (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) and parse_lease_state() return NULL. Fix this by check if 'lease_ctx_info' is NULL. Additionally, remove the redundant parentheses in parse_durable_handle_context(). Signed-off-by: ChenXiaoSong Signed-off-by: Steve French --- fs/smb/server/oplock.c | 2 +- fs/smb/server/smb2pdu.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index a8f52c4ebbda..e546ffa57b55 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1510,7 +1510,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) * parse_lease_state() - parse lease context containted in file open request * @open_req: buffer containing smb2 file open(create) request * - * Return: oplock state, -ENOENT if create lease context not found + * Return: allocated lease context object on success, otherwise NULL */ struct lease_ctx_info *parse_lease_state(void *open_req) { diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 55d4e69bd9c9..5d170ab0817d 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2770,8 +2770,8 @@ static int parse_durable_handle_context(struct ksmbd_work *work, } } - if (((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || - req_op_level == SMB2_OPLOCK_LEVEL_BATCH)) { + if ((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || + req_op_level == SMB2_OPLOCK_LEVEL_BATCH) { dh_info->CreateGuid = durable_v2_blob->CreateGuid; dh_info->persistent = @@ -2791,8 +2791,8 @@ static int parse_durable_handle_context(struct ksmbd_work *work, goto out; } - if (((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || - req_op_level == SMB2_OPLOCK_LEVEL_BATCH)) { + if ((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || + req_op_level == SMB2_OPLOCK_LEVEL_BATCH) { ksmbd_debug(SMB, "Request for durable open\n"); dh_info->type = dh_idx; } @@ -3414,7 +3414,7 @@ int smb2_open(struct ksmbd_work *work) goto err_out1; } } else { - if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) { + if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE && lc) { if (S_ISDIR(file_inode(filp)->i_mode)) { lc->req_state &= ~SMB2_LEASE_WRITE_CACHING_LE; lc->is_dir = true; -- cgit From 0dd771b7d60b8281f10f6721783c60716d22075f Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 22 Aug 2024 08:20:52 +0000 Subject: smb/server: remove useless assignment of 'file_present' in smb2_open() The variable is already true here. Signed-off-by: ChenXiaoSong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 5d170ab0817d..cd23517d9640 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3096,7 +3096,6 @@ int smb2_open(struct ksmbd_work *work) goto err_out; } - file_present = true; idmap = mnt_idmap(path.mnt); } else { if (rc != -ENOENT) -- cgit From 2b7e0573a49064d9c94c114b4471327cd96ae39c Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 22 Aug 2024 08:20:54 +0000 Subject: smb/server: update misguided comment of smb2_allocate_rsp_buf() smb2_allocate_rsp_buf() will return other error code except -ENOMEM. Signed-off-by: ChenXiaoSong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index cd23517d9640..20846a4d3031 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -519,7 +519,7 @@ int init_smb2_rsp_hdr(struct ksmbd_work *work) * smb2_allocate_rsp_buf() - allocate smb2 response buffer * @work: smb work containing smb request buffer * - * Return: 0 on success, otherwise -ENOMEM + * Return: 0 on success, otherwise error */ int smb2_allocate_rsp_buf(struct ksmbd_work *work) { -- cgit From bb4485562f5907708f1c218b5d70dce04165d1e1 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 22 Aug 2024 14:35:44 +0100 Subject: ASoC: cs-amp-lib: Ignore empty UEFI calibration entries If the timestamp of a calibration entry is 0 it is an unused entry and must be ignored. Some end-products reserve EFI space for calibration entries by shipping with a zero-filled EFI file. When searching the file for calibration data the driver must skip the empty entries. The timestamp of a valid entry is always non-zero. Signed-off-by: Richard Fitzgerald Fixes: 1cad8725f2b9 ("ASoC: cs-amp-lib: Add helpers for factory calibration data") Link: https://patch.msgid.link/20240822133544.304421-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs-amp-lib.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs-amp-lib.c b/sound/soc/codecs/cs-amp-lib.c index 605964af8afa..51b128c80671 100644 --- a/sound/soc/codecs/cs-amp-lib.c +++ b/sound/soc/codecs/cs-amp-lib.c @@ -182,6 +182,10 @@ static int _cs_amp_get_efi_calibration_data(struct device *dev, u64 target_uid, for (i = 0; i < efi_data->count; ++i) { u64 cal_target = cs_amp_cal_target_u64(&efi_data->data[i]); + /* Skip empty entries */ + if (!efi_data->data[i].calTime[0] && !efi_data->data[i].calTime[1]) + continue; + /* Skip entries with unpopulated silicon ID */ if (cal_target == 0) continue; @@ -193,7 +197,8 @@ static int _cs_amp_get_efi_calibration_data(struct device *dev, u64 target_uid, } } - if (!cal && (amp_index >= 0) && (amp_index < efi_data->count)) { + if (!cal && (amp_index >= 0) && (amp_index < efi_data->count) && + (efi_data->data[amp_index].calTime[0] || efi_data->data[amp_index].calTime[1])) { u64 cal_target = cs_amp_cal_target_u64(&efi_data->data[amp_index]); /* -- cgit From d7fd2941ae9a67423d1c7bee985f240e4686634f Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 21 Aug 2024 18:55:06 +0200 Subject: s390/boot: Avoid possible physmem_info segment corruption When physical memory for the kernel image is allocated it does not consider extra memory required for offsetting the image start to match it with the lower 20 bits of KASLR virtual base address. That might lead to kernel access beyond its memory range. Suggested-by: Vasily Gorbik Fixes: 693d41f7c938 ("s390/mm: Restore mapping of kernel image using large pages") Signed-off-by: Alexander Gordeev Acked-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index cff34744b5a9..d69f1dfd3b1e 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -457,9 +457,9 @@ void startup_kernel(void) */ kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK; if (kaslr_enabled()) { - unsigned long end = ident_map_size - kaslr_large_page_offset; + unsigned long size = kernel_size + kaslr_large_page_offset; - __kaslr_offset_phys = randomize_within_range(kernel_size, _SEGMENT_SIZE, 0, end); + __kaslr_offset_phys = randomize_within_range(size, _SEGMENT_SIZE, 0, ident_map_size); } if (!__kaslr_offset_phys) __kaslr_offset_phys = nokaslr_offset_phys; -- cgit From 1642285e511c2a40b14e87a41aa8feace6123036 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 21 Aug 2024 18:55:07 +0200 Subject: s390/boot: Fix KASLR base offset off by __START_KERNEL bytes Symbol offsets to the KASLR base do not match symbol address in the vmlinux image. That is the result of setting the KASLR base to the beginning of .text section as result of an optimization. Revert that optimization and allocate virtual memory for the whole kernel image including __START_KERNEL bytes as per the linker script. That allows keeping the semantics of the KASLR base offset in sync with other architectures. Rename __START_KERNEL to TEXT_OFFSET, since it represents the offset of the .text section within the kernel image, rather than a virtual address. Still skip mapping TEXT_OFFSET bytes to save memory on pgtables and provoke exceptions in case an attempt to access this area is made, as no kernel symbol may reside there. In case CONFIG_KASAN is enabled the location counter might exceed the value of TEXT_OFFSET, while the decompressor linker script forcefully resets it to TEXT_OFFSET, which leads to a sections overlap link failure. Use MAX() expression to avoid that. Reported-by: Omar Sandoval Closes: https://lore.kernel.org/linux-s390/ZnS8dycxhtXBZVky@telecaster.dhcp.thefacebook.com/ Fixes: 56b1069c40c7 ("s390/boot: Rework deployment of the kernel image") Signed-off-by: Alexander Gordeev Acked-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 55 +++++++++++++++++++++++------------------- arch/s390/boot/vmem.c | 14 +++++++++-- arch/s390/boot/vmlinux.lds.S | 7 +++++- arch/s390/include/asm/page.h | 3 ++- arch/s390/kernel/vmlinux.lds.S | 2 +- arch/s390/tools/relocs.c | 2 +- 6 files changed, 52 insertions(+), 31 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index d69f1dfd3b1e..c73b5118ad42 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -162,7 +162,7 @@ static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, loc = (long)*reloc + phys_offset; if (loc < min_addr || loc > max_addr) error("64-bit relocation outside of kernel!\n"); - *(u64 *)loc += offset - __START_KERNEL; + *(u64 *)loc += offset; } } @@ -177,7 +177,7 @@ static void kaslr_adjust_got(unsigned long offset) */ for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) { if (*entry) - *entry += offset - __START_KERNEL; + *entry += offset; } } @@ -252,7 +252,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); /* choose kernel address space layout: 4 or 3 levels. */ - BUILD_BUG_ON(!IS_ALIGNED(__START_KERNEL, THREAD_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(TEXT_OFFSET, THREAD_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE)); BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE); vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE); @@ -389,31 +389,25 @@ static void kaslr_adjust_vmlinux_info(long offset) #endif } -static void fixup_vmlinux_info(void) -{ - vmlinux.entry -= __START_KERNEL; - kaslr_adjust_vmlinux_info(-__START_KERNEL); -} - void startup_kernel(void) { - unsigned long kernel_size = vmlinux.image_size + vmlinux.bss_size; - unsigned long nokaslr_offset_phys, kaslr_large_page_offset; - unsigned long amode31_lma = 0; + unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size; + unsigned long nokaslr_text_lma, text_lma = 0, amode31_lma = 0; + unsigned long kernel_size = TEXT_OFFSET + vmlinux_size; + unsigned long kaslr_large_page_offset; unsigned long max_physmem_end; unsigned long asce_limit; unsigned long safe_addr; psw_t psw; - fixup_vmlinux_info(); setup_lpp(); /* * Non-randomized kernel physical start address must be _SEGMENT_SIZE * aligned (see blow). */ - nokaslr_offset_phys = ALIGN(mem_safe_offset(), _SEGMENT_SIZE); - safe_addr = PAGE_ALIGN(nokaslr_offset_phys + kernel_size); + nokaslr_text_lma = ALIGN(mem_safe_offset(), _SEGMENT_SIZE); + safe_addr = PAGE_ALIGN(nokaslr_text_lma + vmlinux_size); /* * Reserve decompressor memory together with decompression heap, @@ -457,16 +451,27 @@ void startup_kernel(void) */ kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK; if (kaslr_enabled()) { - unsigned long size = kernel_size + kaslr_large_page_offset; + unsigned long size = vmlinux_size + kaslr_large_page_offset; - __kaslr_offset_phys = randomize_within_range(size, _SEGMENT_SIZE, 0, ident_map_size); + text_lma = randomize_within_range(size, _SEGMENT_SIZE, TEXT_OFFSET, ident_map_size); } - if (!__kaslr_offset_phys) - __kaslr_offset_phys = nokaslr_offset_phys; - __kaslr_offset_phys |= kaslr_large_page_offset; + if (!text_lma) + text_lma = nokaslr_text_lma; + text_lma |= kaslr_large_page_offset; + + /* + * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region is + * never accessed via the kernel image mapping as per the linker script: + * + * . = TEXT_OFFSET; + * + * Therefore, this region could be used for something else and does + * not need to be reserved. See how it is skipped in setup_vmem(). + */ + __kaslr_offset_phys = text_lma - TEXT_OFFSET; kaslr_adjust_vmlinux_info(__kaslr_offset_phys); - physmem_reserve(RR_VMLINUX, __kaslr_offset_phys, kernel_size); - deploy_kernel((void *)__kaslr_offset_phys); + physmem_reserve(RR_VMLINUX, text_lma, vmlinux_size); + deploy_kernel((void *)text_lma); /* vmlinux decompression is done, shrink reserved low memory */ physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end); @@ -489,7 +494,7 @@ void startup_kernel(void) amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G); } if (!amode31_lma) - amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size; + amode31_lma = text_lma - vmlinux.amode31_size; physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size); /* @@ -505,8 +510,8 @@ void startup_kernel(void) * - copy_bootdata() must follow setup_vmem() to propagate changes * to bootdata made by setup_vmem() */ - clear_bss_section(__kaslr_offset_phys); - kaslr_adjust_relocs(__kaslr_offset_phys, __kaslr_offset_phys + vmlinux.image_size, + clear_bss_section(text_lma); + kaslr_adjust_relocs(text_lma, text_lma + vmlinux.image_size, __kaslr_offset, __kaslr_offset_phys); kaslr_adjust_got(__kaslr_offset); setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 2847cc059ab7..145035f84a0e 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -90,7 +90,7 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern } memgap_start = end; } - kasan_populate(kernel_start, kernel_end, POPULATE_KASAN_MAP_SHADOW); + kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW); kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW); kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { @@ -475,7 +475,17 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l (unsigned long)__identity_va(end), POPULATE_IDENTITY); } - pgtable_populate(kernel_start, kernel_end, POPULATE_KERNEL); + + /* + * [kernel_start..kernel_start + TEXT_OFFSET] region is never + * accessed as per the linker script: + * + * . = TEXT_OFFSET; + * + * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to + * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region. + */ + pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL); pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT); pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), POPULATE_ABS_LOWCORE); diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S index a750711d44c8..66670212a361 100644 --- a/arch/s390/boot/vmlinux.lds.S +++ b/arch/s390/boot/vmlinux.lds.S @@ -109,7 +109,12 @@ SECTIONS #ifdef CONFIG_KERNEL_UNCOMPRESSED . = ALIGN(PAGE_SIZE); . += AMODE31_SIZE; /* .amode31 section */ - . = ALIGN(1 << 20); /* _SEGMENT_SIZE */ + + /* + * Make sure the location counter is not less than TEXT_OFFSET. + * _SEGMENT_SIZE is not available, use ALIGN(1 << 20) instead. + */ + . = MAX(TEXT_OFFSET, ALIGN(1 << 20)); #else . = ALIGN(8); #endif diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 06416b3f94f5..16e4caa931f1 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -279,8 +279,9 @@ static inline unsigned long virt_to_pfn(const void *kaddr) #define AMODE31_SIZE (3 * PAGE_SIZE) #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) -#define __START_KERNEL 0x100000 #define __NO_KASLR_START_KERNEL CONFIG_KERNEL_IMAGE_BASE #define __NO_KASLR_END_KERNEL (__NO_KASLR_START_KERNEL + KERNEL_IMAGE_SIZE) +#define TEXT_OFFSET 0x100000 + #endif /* _S390_PAGE_H */ diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index e67cd409b858..ae5d0a9d6911 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -39,7 +39,7 @@ PHDRS { SECTIONS { - . = __START_KERNEL; + . = TEXT_OFFSET; .text : { _stext = .; /* Start of text section */ _text = .; /* Text and read-only data */ diff --git a/arch/s390/tools/relocs.c b/arch/s390/tools/relocs.c index a74dbd5c9896..30a732c808f3 100644 --- a/arch/s390/tools/relocs.c +++ b/arch/s390/tools/relocs.c @@ -280,7 +280,7 @@ static int do_reloc(struct section *sec, Elf_Rel *rel) case R_390_GOTOFF64: break; case R_390_64: - add_reloc(&relocs64, offset - ehdr.e_entry); + add_reloc(&relocs64, offset); break; default: die("Unsupported relocation type: %d\n", r_type); -- cgit From 15179cf2806f91685410e598f82813a7fcf90f6c Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 16 Aug 2024 16:47:39 -0500 Subject: smb3: fix problem unloading module due to leaked refcount on shutdown The shutdown ioctl can leak a refcount on the tlink which can prevent rmmod (unloading the cifs.ko) module from working. Found while debugging xfstest generic/043 Fixes: 69ca1f57555f ("smb3: add dynamic tracepoints for shutdown ioctl") Reviewed-by: Meetakshi Setiya Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/connect.c | 3 +++ fs/smb/client/ioctl.c | 2 ++ fs/smb/client/link.c | 1 + 3 files changed, 6 insertions(+) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index d2307162a2de..c1c14274930a 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -4194,6 +4194,9 @@ tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink) * * If one doesn't exist then insert a new tcon_link struct into the tree and * try to construct a new one. + * + * REMEMBER to call cifs_put_tlink() after successful calls to cifs_sb_tlink, + * to avoid refcount issues */ struct tcon_link * cifs_sb_tlink(struct cifs_sb_info *cifs_sb) diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 44dbaf9929a4..9bb5c869f4db 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -229,9 +229,11 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) shutdown_good: trace_smb3_shutdown_done(flags, tcon->tid); + cifs_put_tlink(tlink); return 0; shutdown_out_err: trace_smb3_shutdown_err(rc, flags, tcon->tid); + cifs_put_tlink(tlink); return rc; } diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index d86da949a919..80099bbb333b 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -588,6 +588,7 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) { rc = PTR_ERR(tlink); + /* BB could be clearer if skipped put_tlink on error here, but harmless */ goto symlink_exit; } pTcon = tlink_tcon(tlink); -- cgit From ec686804117a0421cf31d54427768aaf93aa0069 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 21 Aug 2024 00:45:03 -0300 Subject: smb: client: ignore unhandled reparse tags Just ignore reparse points that the client can't parse rather than bailing out and not opening the file or directory. Reported-by: Marc <1marc1@gmail.com> Closes: https://lore.kernel.org/r/CAMHwNVv-B+Q6wa0FEXrAuzdchzcJRsPKDDRrNaYZJd6X-+iJzw@mail.gmail.com Fixes: 539aad7f14da ("smb: client: introduce ->parse_reparse_point()") Tested-by: Anthony Nandaa (Microsoft) Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/reparse.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 689d8a506d45..48c27581ec51 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -378,6 +378,8 @@ int parse_reparse_point(struct reparse_data_buffer *buf, u32 plen, struct cifs_sb_info *cifs_sb, bool unicode, struct cifs_open_info_data *data) { + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + data->reparse.buf = buf; /* See MS-FSCC 2.1.2 */ @@ -394,12 +396,13 @@ int parse_reparse_point(struct reparse_data_buffer *buf, case IO_REPARSE_TAG_LX_FIFO: case IO_REPARSE_TAG_LX_CHR: case IO_REPARSE_TAG_LX_BLK: - return 0; + break; default: - cifs_dbg(VFS, "%s: unhandled reparse tag: 0x%08x\n", - __func__, le32_to_cpu(buf->ReparseTag)); - return -EOPNOTSUPP; + cifs_tcon_dbg(VFS | ONCE, "unhandled reparse tag: 0x%08x\n", + le32_to_cpu(buf->ReparseTag)); + break; } + return 0; } int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, -- cgit From 8fb4ac1cee88a57e7a56faba49b408a41a4af4db Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 18 Aug 2024 16:07:11 +0900 Subject: kbuild: fix typos "prequisites" to "prerequisites" This typo in scripts/Makefile.build has been present for more than 20 years. It was accidentally copy-pasted to other scripts/Makefile.* files. Fix them all. Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor --- scripts/Makefile.build | 2 +- scripts/Makefile.modfinal | 2 +- scripts/Makefile.vmlinux | 2 +- scripts/Makefile.vmlinux_o | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index efacca63c897..a5ac8ed1936f 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -487,7 +487,7 @@ $(subdir-ym): need-modorder=$(if $(filter $@/modules.order, $(subdir-modorder)),1) \ $(filter $@/%, $(single-subdir-goals)) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 1fa98b5e952b..306a6bb86e4d 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -62,7 +62,7 @@ endif targets += $(modules:%.o=%.ko) $(modules:%.o=%.mod.o) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 49946cb96844..5ceecbed31eb 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -33,7 +33,7 @@ targets += vmlinux vmlinux: scripts/link-vmlinux.sh vmlinux.o $(KBUILD_LDS) FORCE +$(call if_changed_dep,link_vmlinux) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 6de297916ce6..d64070b6b4bc 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -87,7 +87,7 @@ targets += modules.builtin modules.builtin: modules.builtin.modinfo FORCE $(call if_changed,modules_builtin) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE -- cgit From f58bab6fd4063913bd8321e99874b8239e9ba726 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 22 Aug 2024 14:47:01 -0400 Subject: nfsd: ensure that nfsd4_fattr_args.context is zeroed out If nfsd4_encode_fattr4 ends up doing a "goto out" before we get to checking for the security label, then args.context will be set to uninitialized junk on the stack, which we'll then try to free. Initialize it early. Fixes: f59388a579c6 ("NFSD: Add nfsd4_encode_fattr4_sec_label()") Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 42b41d55d4ed..43ccf6119cf1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3545,6 +3545,9 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, args.dentry = dentry; args.ignore_crossmnt = (ignore_crossmnt != 0); args.acl = NULL; +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + args.context = NULL; +#endif /* * Make a local copy of the attribute bitmap that can be modified. @@ -3617,7 +3620,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, args.contextsupport = false; #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - args.context = NULL; if ((attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) || attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { if (exp->ex_flags & NFSEXP_SECURITY_LABEL) -- cgit From afc954fd223ded70b1fa000767e2531db55cce58 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 14 Aug 2024 21:58:21 +0200 Subject: thermal: of: Fix OF node leak in thermal_of_trips_init() error path Terminating for_each_child_of_node() loop requires dropping OF node reference, so bailing out after thermal_of_populate_trip() error misses this. Solve the OF node reference leak with scoped for_each_child_of_node_scoped(). Fixes: d0c75fa2c17f ("thermal/of: Initialize trip points separately") Cc: All applicable Signed-off-by: Krzysztof Kozlowski Reviewed-by: Chen-Yu Tsai Reviewed-by: Daniel Lezcano Link: https://patch.msgid.link/20240814195823.437597-1-krzysztof.kozlowski@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_of.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index aa34b6e82e26..30f8d6e70484 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -125,7 +125,7 @@ static int thermal_of_populate_trip(struct device_node *np, static struct thermal_trip *thermal_of_trips_init(struct device_node *np, int *ntrips) { struct thermal_trip *tt; - struct device_node *trips, *trip; + struct device_node *trips; int ret, count; trips = of_get_child_by_name(np, "trips"); @@ -150,7 +150,7 @@ static struct thermal_trip *thermal_of_trips_init(struct device_node *np, int *n *ntrips = count; count = 0; - for_each_child_of_node(trips, trip) { + for_each_child_of_node_scoped(trips, trip) { ret = thermal_of_populate_trip(trip, &tt[count++]); if (ret) goto out_kfree; -- cgit From 662b52b761bfe0ba970e5823759798faf809b896 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 14 Aug 2024 21:58:22 +0200 Subject: thermal: of: Fix OF node leak in thermal_of_zone_register() thermal_of_zone_register() calls of_thermal_zone_find() which will iterate over OF nodes with for_each_available_child_of_node() to find matching thermal zone node. When it finds such, it exits the loop and returns the node. Prematurely ending for_each_available_child_of_node() loops requires dropping OF node reference, thus success of of_thermal_zone_find() means that caller must drop the reference. Fixes: 3fd6d6e2b4e8 ("thermal/of: Rework the thermal device tree initialization") Cc: All applicable Signed-off-by: Krzysztof Kozlowski Reviewed-by: Chen-Yu Tsai Reviewed-by: Daniel Lezcano Link: https://patch.msgid.link/20240814195823.437597-2-krzysztof.kozlowski@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_of.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 30f8d6e70484..b08a9b64718d 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -491,7 +491,8 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node * trips = thermal_of_trips_init(np, &ntrips); if (IS_ERR(trips)) { pr_err("Failed to find trip points for %pOFn id=%d\n", sensor, id); - return ERR_CAST(trips); + ret = PTR_ERR(trips); + goto out_of_node_put; } ret = thermal_of_monitor_init(np, &delay, &pdelay); @@ -519,6 +520,7 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node * goto out_kfree_trips; } + of_node_put(np); kfree(trips); ret = thermal_zone_device_enable(tz); @@ -533,6 +535,8 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node * out_kfree_trips: kfree(trips); +out_of_node_put: + of_node_put(np); return ERR_PTR(ret); } -- cgit From c0a1ef9c5be72ff28a5413deb1b3e1a066593c13 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 14 Aug 2024 21:58:23 +0200 Subject: thermal: of: Fix OF node leak in of_thermal_zone_find() error paths Terminating for_each_available_child_of_node() loop requires dropping OF node reference, so bailing out on errors misses this. Solve the OF node reference leak with scoped for_each_available_child_of_node_scoped(). Fixes: 3fd6d6e2b4e8 ("thermal/of: Rework the thermal device tree initialization") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Chen-Yu Tsai Reviewed-by: Daniel Lezcano Link: https://patch.msgid.link/20240814195823.437597-3-krzysztof.kozlowski@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_of.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index b08a9b64718d..1f252692815a 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -184,14 +184,14 @@ static struct device_node *of_thermal_zone_find(struct device_node *sensor, int * Search for each thermal zone, a defined sensor * corresponding to the one passed as parameter */ - for_each_available_child_of_node(np, tz) { + for_each_available_child_of_node_scoped(np, child) { int count, i; - count = of_count_phandle_with_args(tz, "thermal-sensors", + count = of_count_phandle_with_args(child, "thermal-sensors", "#thermal-sensor-cells"); if (count <= 0) { - pr_err("%pOFn: missing thermal sensor\n", tz); + pr_err("%pOFn: missing thermal sensor\n", child); tz = ERR_PTR(-EINVAL); goto out; } @@ -200,18 +200,19 @@ static struct device_node *of_thermal_zone_find(struct device_node *sensor, int int ret; - ret = of_parse_phandle_with_args(tz, "thermal-sensors", + ret = of_parse_phandle_with_args(child, "thermal-sensors", "#thermal-sensor-cells", i, &sensor_specs); if (ret < 0) { - pr_err("%pOFn: Failed to read thermal-sensors cells: %d\n", tz, ret); + pr_err("%pOFn: Failed to read thermal-sensors cells: %d\n", child, ret); tz = ERR_PTR(ret); goto out; } if ((sensor == sensor_specs.np) && id == (sensor_specs.args_count ? sensor_specs.args[0] : 0)) { - pr_debug("sensor %pOFn id=%d belongs to %pOFn\n", sensor, id, tz); + pr_debug("sensor %pOFn id=%d belongs to %pOFn\n", sensor, id, child); + tz = no_free_ptr(child); goto out; } } -- cgit From 591940e22e287fb64ac07be275e343d860cb72d6 Mon Sep 17 00:00:00 2001 From: Steve Wilkins Date: Fri, 9 Aug 2024 14:47:44 +0100 Subject: firmware: microchip: fix incorrect error report of programming:timeout on success After successfully programming the SPI flash with an MFPS auto update image, the error sysfs attribute reports programming:timeout. This is caused by an incorrect check on the return value from wait_for_completion_timeout() in mpfs_auto_update_poll_complete(). Fixes: ec5b0f1193ad ("firmware: microchip: add PolarFire SoC Auto Update support") Signed-off-by: Steve Wilkins Signed-off-by: Conor Dooley --- drivers/firmware/microchip/mpfs-auto-update.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/microchip/mpfs-auto-update.c b/drivers/firmware/microchip/mpfs-auto-update.c index 30de47895b1c..9ca5ee58edbd 100644 --- a/drivers/firmware/microchip/mpfs-auto-update.c +++ b/drivers/firmware/microchip/mpfs-auto-update.c @@ -166,7 +166,7 @@ static enum fw_upload_err mpfs_auto_update_poll_complete(struct fw_upload *fw_up */ ret = wait_for_completion_timeout(&priv->programming_complete, msecs_to_jiffies(AUTO_UPDATE_TIMEOUT_MS)); - if (ret) + if (!ret) return FW_UPLOAD_ERR_TIMEOUT; return FW_UPLOAD_ERR_NONE; -- cgit From 4ae738dfef2c0323752ab81786e2d298c9939321 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 22 Aug 2024 11:40:55 -0400 Subject: net: xilinx: axienet: Always disable promiscuous mode If promiscuous mode is disabled when there are fewer than four multicast addresses, then it will not be reflected in the hardware. Fix this by always clearing the promiscuous mode flag even when we program multicast addresses. Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Sean Anderson Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240822154059.1066595-2-sean.anderson@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 02fdf66e07fa..163d05248007 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -450,6 +450,10 @@ static void axienet_set_multicast_list(struct net_device *ndev) } else if (!netdev_mc_empty(ndev)) { struct netdev_hw_addr *ha; + reg = axienet_ior(lp, XAE_FMI_OFFSET); + reg &= ~XAE_FMI_PM_MASK; + axienet_iow(lp, XAE_FMI_OFFSET, reg); + i = 0; netdev_for_each_mc_addr(ha, ndev) { if (i >= XAE_MULTICAST_CAM_TABLE_NUM) -- cgit From 797a68c9de0f5a5447baf4bd3bb9c10a3993435b Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 22 Aug 2024 11:40:56 -0400 Subject: net: xilinx: axienet: Fix dangling multicast addresses If a multicast address is removed but there are still some multicast addresses, that address would remain programmed into the frame filter. Fix this by explicitly setting the enable bit for each filter. Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Sean Anderson Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240822154059.1066595-3-sean.anderson@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet.h | 1 + drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 21 +++++++++------------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index c7d9221fafdc..09c9f9787180 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -170,6 +170,7 @@ #define XAE_UAW0_OFFSET 0x00000700 /* Unicast address word 0 */ #define XAE_UAW1_OFFSET 0x00000704 /* Unicast address word 1 */ #define XAE_FMI_OFFSET 0x00000708 /* Frame Filter Control */ +#define XAE_FFE_OFFSET 0x0000070C /* Frame Filter Enable */ #define XAE_AF0_OFFSET 0x00000710 /* Address Filter 0 */ #define XAE_AF1_OFFSET 0x00000714 /* Address Filter 1 */ diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 163d05248007..9aeb7b9f3ae4 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -432,7 +432,7 @@ static int netdev_set_mac_address(struct net_device *ndev, void *p) */ static void axienet_set_multicast_list(struct net_device *ndev) { - int i; + int i = 0; u32 reg, af0reg, af1reg; struct axienet_local *lp = netdev_priv(ndev); @@ -454,7 +454,6 @@ static void axienet_set_multicast_list(struct net_device *ndev) reg &= ~XAE_FMI_PM_MASK; axienet_iow(lp, XAE_FMI_OFFSET, reg); - i = 0; netdev_for_each_mc_addr(ha, ndev) { if (i >= XAE_MULTICAST_CAM_TABLE_NUM) break; @@ -473,6 +472,7 @@ static void axienet_set_multicast_list(struct net_device *ndev) axienet_iow(lp, XAE_FMI_OFFSET, reg); axienet_iow(lp, XAE_AF0_OFFSET, af0reg); axienet_iow(lp, XAE_AF1_OFFSET, af1reg); + axienet_iow(lp, XAE_FFE_OFFSET, 1); i++; } } else { @@ -480,18 +480,15 @@ static void axienet_set_multicast_list(struct net_device *ndev) reg &= ~XAE_FMI_PM_MASK; axienet_iow(lp, XAE_FMI_OFFSET, reg); - - for (i = 0; i < XAE_MULTICAST_CAM_TABLE_NUM; i++) { - reg = axienet_ior(lp, XAE_FMI_OFFSET) & 0xFFFFFF00; - reg |= i; - - axienet_iow(lp, XAE_FMI_OFFSET, reg); - axienet_iow(lp, XAE_AF0_OFFSET, 0); - axienet_iow(lp, XAE_AF1_OFFSET, 0); - } - dev_info(&ndev->dev, "Promiscuous mode disabled.\n"); } + + for (; i < XAE_MULTICAST_CAM_TABLE_NUM; i++) { + reg = axienet_ior(lp, XAE_FMI_OFFSET) & 0xFFFFFF00; + reg |= i; + axienet_iow(lp, XAE_FMI_OFFSET, reg); + axienet_iow(lp, XAE_FFE_OFFSET, 0); + } } /** -- cgit From 57fb67783c4011581882f32e656d738da1f82042 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 21 Aug 2024 20:32:52 +0800 Subject: net: ovs: fix ovs_drop_reasons error There is something wrong with ovs_drop_reasons. ovs_drop_reasons[0] is "OVS_DROP_LAST_ACTION", but OVS_DROP_LAST_ACTION == __OVS_DROP_REASON + 1, which means that ovs_drop_reasons[1] should be "OVS_DROP_LAST_ACTION". And as Adrian tested, without the patch, adding flow to drop packets results in: drop at: do_execute_actions+0x197/0xb20 [openvsw (0xffffffffc0db6f97) origin: software input port ifindex: 8 timestamp: Tue Aug 20 10:19:17 2024 859853461 nsec protocol: 0x800 length: 98 original length: 98 drop reason: OVS_DROP_ACTION_ERROR With the patch, the same results in: drop at: do_execute_actions+0x197/0xb20 [openvsw (0xffffffffc0db6f97) origin: software input port ifindex: 8 timestamp: Tue Aug 20 10:16:13 2024 475856608 nsec protocol: 0x800 length: 98 original length: 98 drop reason: OVS_DROP_LAST_ACTION Fix this by initializing ovs_drop_reasons with index. Fixes: 9d802da40b7c ("net: openvswitch: add last-action drop reason") Signed-off-by: Menglong Dong Tested-by: Adrian Moreno Reviewed-by: Adrian Moreno Link: https://patch.msgid.link/20240821123252.186305-1-dongml2@chinatelecom.cn Signed-off-by: Jakub Kicinski --- net/openvswitch/datapath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 99d72543abd3..78d9961fcd44 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2706,7 +2706,7 @@ static struct pernet_operations ovs_net_ops = { }; static const char * const ovs_drop_reasons[] = { -#define S(x) (#x), +#define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x), OVS_DROP_REASONS(S) #undef S }; -- cgit From 0124fb0ebf3b0ef89892d42147c9387be3105318 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Wed, 21 Aug 2024 11:13:37 +0200 Subject: s390/iucv: Fix vargs handling in iucv_alloc_device() iucv_alloc_device() gets a format string and a varying number of arguments. This is incorrectly forwarded by calling dev_set_name() with the format string and a va_list, while dev_set_name() expects also a varying number of arguments. Symptoms: Corrupted iucv device names, which can result in log messages like: sysfs: cannot create duplicate filename '/devices/iucv/hvc_iucv1827699952' Fixes: 4452e8ef8c36 ("s390/iucv: Provide iucv_alloc_device() / iucv_release_device()") Link: https://bugzilla.suse.com/show_bug.cgi?id=1228425 Signed-off-by: Alexandra Winter Reviewed-by: Thorsten Winkler Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/20240821091337.3627068-1-wintera@linux.ibm.com Signed-off-by: Jakub Kicinski --- net/iucv/iucv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 1e42e13ad24e..d3e9efab7f4b 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -86,13 +86,15 @@ struct device *iucv_alloc_device(const struct attribute_group **attrs, { struct device *dev; va_list vargs; + char buf[20]; int rc; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) goto out_error; va_start(vargs, fmt); - rc = dev_set_name(dev, fmt, vargs); + vsnprintf(buf, sizeof(buf), fmt, vargs); + rc = dev_set_name(dev, "%s", buf); va_end(vargs); if (rc) goto out_error; -- cgit From a54a93d0e3599b05856971734e15418ac551a14c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 13 Aug 2024 09:35:27 +0800 Subject: nvme: move stopping keep-alive into nvme_uninit_ctrl() Commit 4733b65d82bd ("nvme: start keep-alive after admin queue setup") moves starting keep-alive from nvme_start_ctrl() into nvme_init_ctrl_finish(), but don't move stopping keep-alive into nvme_uninit_ctrl(), so keep-alive work can be started and keep pending after failing to start controller, finally use-after-free is triggered if nvme host driver is unloaded. This patch fixes kernel panic when running nvme/004 in case that connection failure is triggered, by moving stopping keep-alive into nvme_uninit_ctrl(). This way is reasonable because keep-alive is now started in nvme_init_ctrl_finish(). Fixes: 3af755a46881 ("nvme: move nvme_stop_keep_alive() back to original position") Cc: Hannes Reinecke Cc: Mark O'Donovan Reported-by: Changhui Zhong Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 33fa01c599ad..0dc8bcc664f2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4612,7 +4612,6 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_mpath_stop(ctrl); nvme_auth_stop(ctrl); - nvme_stop_keep_alive(ctrl); nvme_stop_failfast_work(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); @@ -4648,6 +4647,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) { + nvme_stop_keep_alive(ctrl); nvme_hwmon_exit(ctrl); nvme_fault_inject_fini(&ctrl->fault_inject); dev_pm_qos_hide_latency_tolerance(ctrl->device); -- cgit From fe01751347359862c65c715d51c0b3f4fa8ee2f0 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Wed, 14 Aug 2024 19:26:50 +0530 Subject: nvme: Remove unused field The "name" field in struct nvme_ctrl is unsued so removing it. This would help save 12 bytes of space for each nvme_ctrl instance created. Signed-off-by: Nilay Shroff Reviewed-by: Kanchan Joshi Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ae5314d32943..da57947130cc 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -301,7 +301,6 @@ struct nvme_ctrl { struct opal_dev *opal_dev; - char name[12]; u16 cntlid; u16 mtfa; -- cgit From 67d95303c84732c2e1de5730756281f648dbefaf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 13 Aug 2024 15:21:13 +0530 Subject: cpufreq: amd-pstate: Fix uninitialized variable in amd_pstate_cpu_boost_update() Smatch complains that "ret" could be uninitialized: drivers/cpufreq/amd-pstate.c:734 amd_pstate_cpu_boost_update() error: uninitialized symbol 'ret'. This seems like it probably is a real issue. Initialize "ret" to zero to be safe. Fixes: c8c68c38b56f ("cpufreq: amd-pstate: initialize core precision boost state") Signed-off-by: Dan Carpenter Reviewed-by: Perry Yuan Acked-by: Gautham R. Shenoy Link: https://lore.kernel.org/lkml/7ff53543-6c04-48a0-8d99-7dc010b93b3a@stanley.mountain/T/ Signed-off-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 68c616b572f2..358bd88cd0c5 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -692,7 +692,7 @@ static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) struct amd_cpudata *cpudata = policy->driver_data; struct cppc_perf_ctrls perf_ctrls; u32 highest_perf, nominal_perf, nominal_freq, max_freq; - int ret; + int ret = 0; highest_perf = READ_ONCE(cpudata->highest_perf); nominal_perf = READ_ONCE(cpudata->nominal_perf); -- cgit From 0d8584d288a9b4132e945d76bcc04395d158b2e7 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Tue, 13 Aug 2024 15:21:14 +0530 Subject: cpufreq/amd-pstate: Use topology_logical_package_id() instead of logical_die_id() After the commit 63edbaa48a57 ("x86/cpu/topology: Add support for the AMD 0x80000026 leaf"), the topolgy_logical_die_id() function returns the logical Core Chiplet Die (CCD) ID instead of the logical socket ID. Since this is currently used to set MSR_AMD_CPPC_ENABLE, which needs to be set on any one of the threads of the socket, it is prudent to use topology_logical_package_id() in place of topology_logical_die_id(). Fixes: 63edbaa48a57 ("x86/cpu/topology: Add support for the AMD 0x80000026 leaf") cc: stable@vger.kernel.org # 6.10 Signed-off-by: Gautham R. Shenoy Tested-by: Dhananjay Ugwekar Link: https://lore.kernel.org/lkml/20240801124509.3650-1-Dhananjay.Ugwekar@amd.com/ Signed-off-by: Dhananjay Ugwekar Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 358bd88cd0c5..89bda7a2bb8d 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -321,7 +321,7 @@ static inline int pstate_enable(bool enable) return 0; for_each_present_cpu(cpu) { - unsigned long logical_id = topology_logical_die_id(cpu); + unsigned long logical_id = topology_logical_package_id(cpu); if (test_bit(logical_id, &logical_proc_id_mask)) continue; -- cgit From 5e51224d2afbda57f33f47485871ee5532145e18 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Tue, 20 Aug 2024 14:33:15 +0000 Subject: smb/client: fix typo: GlobalMid_Sem -> GlobalMid_Lock The comments have typos, fix that to not confuse readers. Signed-off-by: ChenXiaoSong Reviewed-by: Namjae Jeon --- fs/smb/client/cifsfs.c | 6 +++--- fs/smb/client/cifsglob.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 2c4b357d85e2..d89485235425 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -75,9 +75,9 @@ unsigned int sign_CIFS_PDUs = 1; /* * Global transaction id (XID) information */ -unsigned int GlobalCurrentXid; /* protected by GlobalMid_Sem */ -unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */ -unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */ +unsigned int GlobalCurrentXid; /* protected by GlobalMid_Lock */ +unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Lock */ +unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Lock */ spinlock_t GlobalMid_Lock; /* protects above & list operations on midQ entries */ /* diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 5c9b3e6cd95f..7ebe80a25d04 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -2017,9 +2017,9 @@ extern spinlock_t cifs_tcp_ses_lock; /* * Global transaction id (XID) information */ -extern unsigned int GlobalCurrentXid; /* protected by GlobalMid_Sem */ -extern unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */ -extern unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */ +extern unsigned int GlobalCurrentXid; /* protected by GlobalMid_Lock */ +extern unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Lock */ +extern unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Lock */ extern spinlock_t GlobalMid_Lock; /* protects above & list operations on midQ entries */ /* -- cgit From cb78f9b7d0c0c9f86d8c0ac9c46b8b684d8785a9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 15 Aug 2024 10:18:41 -0400 Subject: nfs: fix the fetch of FATTR4_OPEN_ARGUMENTS The client doesn't properly request FATTR4_OPEN_ARGUMENTS in the initial SERVER_CAPS getattr. Add FATTR4_WORD2_OPEN_ARGUMENTS to the initial request. Fixes: 707f13b3d081 (NFSv4: Add support for the FATTR4_OPEN_ARGUMENTS attribute) Signed-off-by: Jeff Layton Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8883016c551c..06df74362e94 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3931,7 +3931,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f FATTR4_WORD0_CASE_INSENSITIVE | FATTR4_WORD0_CASE_PRESERVING; if (minorversion) - bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT; + bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT | + FATTR4_WORD2_OPEN_ARGUMENTS; status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (status == 0) { -- cgit From 95832998fb6edc50d4f2f6a958d9f90142d4be48 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 21 Aug 2024 08:28:25 -0400 Subject: nfs: fix bitmap decoder to handle a 3rd word It only decodes the first two words at this point. Have it decode the third word as well. Without this, the client doesn't send delegated timestamps in the CB_GETATTR response. With this change we also need to expand the on-stack bitmap in decode_recallany_args to 3 elements, in case the server sends a larger bitmap than expected. Fixes: 43df7110f4a9 ("NFSv4: Add CB_GETATTR support for delegated attributes") Signed-off-by: Jeff Layton Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/callback_xdr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 29c49a7e5fe1..6df77f008d3f 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -118,7 +118,9 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) if (likely(attrlen > 0)) bitmap[0] = ntohl(*p++); if (attrlen > 1) - bitmap[1] = ntohl(*p); + bitmap[1] = ntohl(*p++); + if (attrlen > 2) + bitmap[2] = ntohl(*p); return 0; } @@ -446,7 +448,7 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp, void *argp) { struct cb_recallanyargs *args = argp; - uint32_t bitmap[2]; + uint32_t bitmap[3]; __be32 *p, status; p = xdr_inline_decode(xdr, 4); -- cgit From a017ad1313fc91bdf235097fd0a02f673fc7bb11 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Aug 2024 14:05:00 -0400 Subject: NFSv4: Add missing rescheduling points in nfs_client_return_marked_delegations We're seeing reports of soft lockups when iterating through the loops, so let's add rescheduling points. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index cbbd4866b0b7..97b386032b71 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -228,6 +229,7 @@ static int __nfs_list_for_each_server(struct list_head *head, ret = fn(server, data); if (ret) goto out; + cond_resched(); rcu_read_lock(); } rcu_read_unlock(); -- cgit From d72b7963115bea971a28eaa2cb76722c023f9fdf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Aug 2024 14:05:01 -0400 Subject: NFSv4: Fix clearing of layout segments in layoutreturn Make sure that we clear the layout segments in cases where we see a fatal error, and also in the case where the layout is invalid. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 9 ++++++--- fs/nfs/pnfs.c | 5 ++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 06df74362e94..b8ffbe52ba15 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9998,6 +9998,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) fallthrough; default: task->tk_status = 0; + lrp->res.lrs_present = 0; fallthrough; case 0: break; @@ -10011,9 +10012,11 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) task->tk_status = 0; break; case -NFS4ERR_DELAY: - if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) - break; - goto out_restart; + if (nfs4_async_handle_error(task, server, NULL, NULL) == + -EAGAIN) + goto out_restart; + lrp->res.lrs_present = 0; + break; } return; out_restart: diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index aa698481bec8..0d16b383a452 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1284,10 +1284,9 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, LIST_HEAD(freeme); spin_lock(&inode->i_lock); - if (!pnfs_layout_is_valid(lo) || - !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) + if (!nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) goto out_unlock; - if (stateid) { + if (stateid && pnfs_layout_is_valid(lo)) { u32 seq = be32_to_cpu(arg_stateid->seqid); pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq); -- cgit From f92214e4c312f6ea9d78650cc6291d200f17abb6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Aug 2024 14:05:02 -0400 Subject: NFS: Avoid unnecessary rescanning of the per-server delegation list If the call to nfs_delegation_grab_inode() fails, we will not have dropped any locks that require us to rescan the list. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d5edb3b3eeef..20cb2008f9e4 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -647,6 +647,9 @@ restart: prev = delegation; continue; } + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) + continue; if (prev) { struct inode *tmp = nfs_delegation_grab_inode(prev); @@ -657,12 +660,6 @@ restart: } } - inode = nfs_delegation_grab_inode(delegation); - if (inode == NULL) { - rcu_read_unlock(); - iput(to_put); - goto restart; - } delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); @@ -1184,7 +1181,6 @@ static int nfs_server_reap_unclaimed_delegations(struct nfs_server *server, struct inode *inode; restart: rcu_read_lock(); -restart_locked: list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags) || @@ -1195,7 +1191,7 @@ restart_locked: continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) - goto restart_locked; + continue; delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); if (delegation != NULL) { @@ -1318,7 +1314,6 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server, restart: rcu_read_lock(); -restart_locked: list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags) || @@ -1330,7 +1325,7 @@ restart_locked: continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) - goto restart_locked; + continue; spin_lock(&delegation->lock); cred = get_cred_rcu(delegation->cred); nfs4_stateid_copy(&stateid, &delegation->stateid); -- cgit From 979b581e4c69257acab1af415ddad6b2d78a2fa5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Aug 2024 17:53:39 +0000 Subject: pktgen: use cpus_read_lock() in pg_net_init() I have seen the WARN_ON(smp_processor_id() != cpu) firing in pktgen_thread_worker() during tests. We must use cpus_read_lock()/cpus_read_unlock() around the for_each_online_cpu(cpu) loop. While we are at it use WARN_ON_ONCE() to avoid a possible syslog flood. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20240821175339.1191779-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/pktgen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index ea55a758a475..197a50ef8e2e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3654,7 +3654,7 @@ static int pktgen_thread_worker(void *arg) struct pktgen_dev *pkt_dev = NULL; int cpu = t->cpu; - WARN_ON(smp_processor_id() != cpu); + WARN_ON_ONCE(smp_processor_id() != cpu); init_waitqueue_head(&t->queue); complete(&t->start_done); @@ -3989,6 +3989,7 @@ static int __net_init pg_net_init(struct net *net) goto remove; } + cpus_read_lock(); for_each_online_cpu(cpu) { int err; @@ -3997,6 +3998,7 @@ static int __net_init pg_net_init(struct net *net) pr_warn("Cannot create thread for cpu %d (%d)\n", cpu, err); } + cpus_read_unlock(); if (list_empty(&pn->pktgen_threads)) { pr_err("Initialization failed for all threads\n"); -- cgit From 3417c9574e368f0330637505f00d3814ca8854d2 Mon Sep 17 00:00:00 2001 From: Sherry Yang Date: Tue, 20 Aug 2024 23:51:31 -0700 Subject: scsi: lpfc: Fix overflow build issue Build failed while enabling "CONFIG_GCOV_KERNEL=y" and "CONFIG_GCOV_PROFILE_ALL=y" with following error: BUILDSTDERR: drivers/scsi/lpfc/lpfc_bsg.c: In function 'lpfc_get_cgnbuf_info': BUILDSTDERR: ./include/linux/fortify-string.h:114:33: error: '__builtin_memcpy' accessing 18446744073709551615 bytes at offsets 0 and 0 overlaps 9223372036854775807 bytes at offset -9223372036854775808 [-Werror=restrict] BUILDSTDERR: 114 | #define __underlying_memcpy __builtin_memcpy BUILDSTDERR: | ^ BUILDSTDERR: ./include/linux/fortify-string.h:637:9: note: in expansion of macro '__underlying_memcpy' BUILDSTDERR: 637 | __underlying_##op(p, q, __fortify_size); \ BUILDSTDERR: | ^~~~~~~~~~~~~ BUILDSTDERR: ./include/linux/fortify-string.h:682:26: note: in expansion of macro '__fortify_memcpy_chk' BUILDSTDERR: 682 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ BUILDSTDERR: | ^~~~~~~~~~~~~~~~~~~~ BUILDSTDERR: drivers/scsi/lpfc/lpfc_bsg.c:5468:9: note: in expansion of macro 'memcpy' BUILDSTDERR: 5468 | memcpy(cgn_buff, cp, cinfosz); BUILDSTDERR: | ^~~~~~ This happens from the commit 06bb7fc0feee ("kbuild: turn on -Wrestrict by default"). Address this issue by using size_t type. Signed-off-by: Sherry Yang Link: https://lore.kernel.org/r/20240821065131.1180791-1-sherry.yang@oracle.com Reviewed-by: Justin Tee Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_bsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 4156419c52c7..4756a3f82531 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -5410,7 +5410,7 @@ lpfc_get_cgnbuf_info(struct bsg_job *job) struct get_cgnbuf_info_req *cgnbuf_req; struct lpfc_cgn_info *cp; uint8_t *cgn_buff; - int size, cinfosz; + size_t size, cinfosz; int rc = 0; if (job->request_len < sizeof(struct fc_bsg_request) + -- cgit From 919ddf8336f0b84c0453bac583808c9f165a85c2 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 22 Aug 2024 00:51:42 +0200 Subject: scsi: aacraid: Fix double-free on probe failure aac_probe_one() calls hardware-specific init functions through the aac_driver_ident::init pointer, all of which eventually call down to aac_init_adapter(). If aac_init_adapter() fails after allocating memory for aac_dev::queues, it frees the memory but does not clear that member. After the hardware-specific init function returns an error, aac_probe_one() goes down an error path that frees the memory pointed to by aac_dev::queues, resulting.in a double-free. Reported-by: Michael Gordon Link: https://bugs.debian.org/1075855 Fixes: 8e0c5ebde82b ("[SCSI] aacraid: Newer adapter communication iterface support") Signed-off-by: Ben Hutchings Link: https://lore.kernel.org/r/ZsZvfqlQMveoL5KQ@decadent.org.uk Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/comminit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index bd99c5492b7d..0f64b0244303 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -642,6 +642,7 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) if (aac_comm_init(dev)<0){ kfree(dev->queues); + dev->queues = NULL; return NULL; } /* @@ -649,6 +650,7 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) */ if (aac_fib_setup(dev) < 0) { kfree(dev->queues); + dev->queues = NULL; return NULL; } -- cgit From 4f9eedfa27ae5806ed10906bcceee7bae49c8941 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Mon, 19 Aug 2024 17:09:34 +0800 Subject: scsi: sd: Ignore command SYNCHRONIZE CACHE error if format in progress If formatting a suspended disk (such as formatting with different DIF type), the disk will be resuming first, and then the format command will submit to the disk through SG_IO ioctl. When the disk is processing the format command, the system does not submit other commands to the disk. Therefore, the system attempts to suspend the disk again and sends the SYNCHRONIZE CACHE command. However, the SYNCHRONIZE CACHE command will fail because the disk is in the formatting process. This will cause the runtime_status of the disk to error and it is difficult for user to recover it. Error info like: [ 669.925325] sd 6:0:6:0: [sdg] Synchronizing SCSI cache [ 670.202371] sd 6:0:6:0: [sdg] Synchronize Cache(10) failed: Result: hostbyte=0x00 driverbyte=DRIVER_OK [ 670.216300] sd 6:0:6:0: [sdg] Sense Key : 0x2 [current] [ 670.221860] sd 6:0:6:0: [sdg] ASC=0x4 ASCQ=0x4 To solve the issue, ignore the error and return success/0 when format is in progress. Cc: stable@vger.kernel.org Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20240819090934.2130592-1-liyihang9@huawei.com Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index dad3991397cf..9db86943d04c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1823,13 +1823,15 @@ static int sd_sync_cache(struct scsi_disk *sdkp) (sshdr.asc == 0x74 && sshdr.ascq == 0x71)) /* drive is password locked */ /* this is no error here */ return 0; + /* - * This drive doesn't support sync and there's not much - * we can do because this is called during shutdown - * or suspend so just return success so those operations - * can proceed. + * If a format is in progress or if the drive does not + * support sync, there is not much we can do because + * this is called during shutdown or suspend so just + * return success so those operations can proceed. */ - if (sshdr.sense_key == ILLEGAL_REQUEST) + if ((sshdr.asc == 0x04 && sshdr.ascq == 0x04) || + sshdr.sense_key == ILLEGAL_REQUEST) return 0; } -- cgit From b58b133e680b20d219940e0fdb6f6132c2b60f38 Mon Sep 17 00:00:00 2001 From: Pranjal Shrivastava Date: Fri, 16 Aug 2024 10:49:06 +0000 Subject: iommu: Handle iommu faults for a bad iopf setup The iommu_report_device_fault function was updated to return void while assuming that drivers only need to call iommu_report_device_fault() for reporting an iopf. This implementation causes following problems: 1. The drivers rely on the core code to call it's page_reponse, however, when a fault is received and no fault capable domain is attached / iopf_param is NULL, the ops->page_response is NOT called causing the device to stall in case the fault type was PAGE_REQ. 2. The arm_smmu_v3 driver relies on the returned value to log errors returning void from iommu_report_device_fault causes these events to be missed while logging. Modify the iommu_report_device_fault function to return -EINVAL for cases where no fault capable domain is attached or iopf_param was NULL and calls back to the driver (ops->page_response) in case the fault type was IOMMU_FAULT_PAGE_REQ. The returned value can be used by the drivers to log the fault/event as needed. Reported-by: Kunkun Jiang Closes: https://lore.kernel.org/all/6147caf0-b9a0-30ca-795e-a1aa502a5c51@huawei.com/ Fixes: 3dfa64aecbaf ("iommu: Make iommu_report_device_fault() return void") Signed-off-by: Jason Gunthorpe Signed-off-by: Pranjal Shrivastava Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20240816104906.1010626-1-praan@google.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- drivers/iommu/io-pgfault.c | 121 +++++++++++++++++++--------- include/linux/iommu.h | 5 +- 3 files changed, 87 insertions(+), 41 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index a31460f9f3d4..ed2b106e02dd 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1777,7 +1777,7 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) goto out_unlock; } - iommu_report_device_fault(master->dev, &fault_evt); + ret = iommu_report_device_fault(master->dev, &fault_evt); out_unlock: mutex_unlock(&smmu->streams_mutex); return ret; diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 81e9cc6e3164..4674e618797c 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -115,6 +115,59 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, return group; } +static struct iommu_attach_handle *find_fault_handler(struct device *dev, + struct iopf_fault *evt) +{ + struct iommu_fault *fault = &evt->fault; + struct iommu_attach_handle *attach_handle; + + if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { + attach_handle = iommu_attach_handle_get(dev->iommu_group, + fault->prm.pasid, 0); + if (IS_ERR(attach_handle)) { + const struct iommu_ops *ops = dev_iommu_ops(dev); + + if (!ops->user_pasid_table) + return NULL; + /* + * The iommu driver for this device supports user- + * managed PASID table. Therefore page faults for + * any PASID should go through the NESTING domain + * attached to the device RID. + */ + attach_handle = iommu_attach_handle_get( + dev->iommu_group, IOMMU_NO_PASID, + IOMMU_DOMAIN_NESTED); + if (IS_ERR(attach_handle)) + return NULL; + } + } else { + attach_handle = iommu_attach_handle_get(dev->iommu_group, + IOMMU_NO_PASID, 0); + + if (IS_ERR(attach_handle)) + return NULL; + } + + if (!attach_handle->domain->iopf_handler) + return NULL; + + return attach_handle; +} + +static void iopf_error_response(struct device *dev, struct iopf_fault *evt) +{ + const struct iommu_ops *ops = dev_iommu_ops(dev); + struct iommu_fault *fault = &evt->fault; + struct iommu_page_response resp = { + .pasid = fault->prm.pasid, + .grpid = fault->prm.grpid, + .code = IOMMU_PAGE_RESP_INVALID + }; + + ops->page_response(dev, evt, &resp); +} + /** * iommu_report_device_fault() - Report fault event to device driver * @dev: the device @@ -153,24 +206,39 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, * handling framework should guarantee that the iommu domain could only be * freed after the device has stopped generating page faults (or the iommu * hardware has been set to block the page faults) and the pending page faults - * have been flushed. + * have been flushed. In case no page fault handler is attached or no iopf params + * are setup, then the ops->page_response() is called to complete the evt. + * + * Returns 0 on success, or an error in case of a bad/failed iopf setup. */ -void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) +int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { + struct iommu_attach_handle *attach_handle; struct iommu_fault *fault = &evt->fault; struct iommu_fault_param *iopf_param; struct iopf_group abort_group = {}; struct iopf_group *group; + attach_handle = find_fault_handler(dev, evt); + if (!attach_handle) + goto err_bad_iopf; + + /* + * Something has gone wrong if a fault capable domain is attached but no + * iopf_param is setup + */ iopf_param = iopf_get_dev_fault_param(dev); if (WARN_ON(!iopf_param)) - return; + goto err_bad_iopf; if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { - report_partial_fault(iopf_param, fault); + int ret; + + ret = report_partial_fault(iopf_param, fault); iopf_put_dev_fault_param(iopf_param); /* A request that is not the last does not need to be ack'd */ - return; + + return ret; } /* @@ -185,38 +253,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) if (group == &abort_group) goto err_abort; - if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { - group->attach_handle = iommu_attach_handle_get(dev->iommu_group, - fault->prm.pasid, - 0); - if (IS_ERR(group->attach_handle)) { - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (!ops->user_pasid_table) - goto err_abort; - - /* - * The iommu driver for this device supports user- - * managed PASID table. Therefore page faults for - * any PASID should go through the NESTING domain - * attached to the device RID. - */ - group->attach_handle = - iommu_attach_handle_get(dev->iommu_group, - IOMMU_NO_PASID, - IOMMU_DOMAIN_NESTED); - if (IS_ERR(group->attach_handle)) - goto err_abort; - } - } else { - group->attach_handle = - iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); - if (IS_ERR(group->attach_handle)) - goto err_abort; - } - - if (!group->attach_handle->domain->iopf_handler) - goto err_abort; + group->attach_handle = attach_handle; /* * On success iopf_handler must call iopf_group_response() and @@ -225,7 +262,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) if (group->attach_handle->domain->iopf_handler(group)) goto err_abort; - return; + return 0; err_abort: dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n", @@ -235,6 +272,14 @@ err_abort: __iopf_free_group(group); else iopf_free_group(group); + + return 0; + +err_bad_iopf: + if (fault->type == IOMMU_FAULT_PAGE_REQ) + iopf_error_response(dev, evt); + + return -EINVAL; } EXPORT_SYMBOL_GPL(iommu_report_device_fault); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 04cbdae0052e..bd722f473635 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1563,7 +1563,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); void iopf_free_group(struct iopf_group *group); -void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); +int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); void iopf_group_response(struct iopf_group *group, enum iommu_page_response_code status); #else @@ -1601,9 +1601,10 @@ static inline void iopf_free_group(struct iopf_group *group) { } -static inline void +static inline int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { + return -ENODEV; } static inline void iopf_group_response(struct iopf_group *group, -- cgit From 82b8000c28b56b014ce52a1f1581bef4af148681 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 19 Aug 2024 11:09:43 +0200 Subject: net: drop special comment style As we discussed in the room at netdevconf earlier this week, drop the requirement for special comment style for netdev. For checkpatch, the general check accepts both right now, so simply drop the special request there as well. Acked-by: Stephen Hemminger Signed-off-by: Johannes Berg Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/process/coding-style.rst | 12 ------------ Documentation/process/maintainer-netdev.rst | 17 ----------------- scripts/checkpatch.pl | 10 ---------- 3 files changed, 39 deletions(-) diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst index 04f6aa377a5d..8e30c8f7697d 100644 --- a/Documentation/process/coding-style.rst +++ b/Documentation/process/coding-style.rst @@ -629,18 +629,6 @@ The preferred style for long (multi-line) comments is: * with beginning and ending almost-blank lines. */ -For files in net/ and drivers/net/ the preferred style for long (multi-line) -comments is a little different. - -.. code-block:: c - - /* The preferred comment style for files in net/ and drivers/net - * looks like this. - * - * It is nearly the same as the generally preferred comment style, - * but there is no initial almost-blank line. - */ - It's also important to comment data, whether they are basic types or derived types. To this end, use just one data declaration per line (no commas for multiple data declarations). This leaves you room for a small comment on each diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index fe8616397d63..30d24eecdaaa 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -355,23 +355,6 @@ just do it. As a result, a sequence of smaller series gets merged quicker and with better review coverage. Re-posting large series also increases the mailing list traffic. -Multi-line comments -~~~~~~~~~~~~~~~~~~~ - -Comment style convention is slightly different for networking and most of -the tree. Instead of this:: - - /* - * foobar blah blah blah - * another line of text - */ - -it is requested that you make it look like this:: - - /* foobar blah blah blah - * another line of text - */ - Local variable ordering ("reverse xmas tree", "RCS") ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 39032224d504..4427572b2477 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4015,16 +4015,6 @@ sub process { } } -# Block comment styles -# Networking with an initial /* - if ($realfile =~ m@^(drivers/net/|net/)@ && - $prevrawline =~ /^\+[ \t]*\/\*[ \t]*$/ && - $rawline =~ /^\+[ \t]*\*/ && - $realline > 3) { # Do not warn about the initial copyright comment block after SPDX-License-Identifier - WARN("NETWORKING_BLOCK_COMMENT_STYLE", - "networking block comments don't use an empty /* line, use /* Comment...\n" . $hereprev); - } - # Block comments use * on subsequent lines if ($prevline =~ /$;[ \t]*$/ && #ends in comment $prevrawline =~ /^\+.*?\/\*/ && #starting /* -- cgit From 71c8e2a7c822ee557b07d9bb49028dd269c87b2e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 22 Aug 2024 11:23:08 +0100 Subject: irqchip/gic-v3: Init SRE before poking sysregs The GICv3 driver pokes GICv3 system registers in gic_prio_init() before gic_cpu_sys_reg_init() ensures that GICv3 system registers have been enabled by writing to ICC_SRE_EL1.SRE. On arm64 this is benign as has_useable_gicv3_cpuif() runs earlier during cpufeature detection, and this enables the GICv3 system registers. On 32-bit arm when booting on an FVP using the boot-wrapper, the accesses in gic_prio_init() end up being UNDEFINED and crashes the kernel during boot. This is a regression introduced by the addition of gic_prio_init(). Fix this by factoring out the SRE initialization into a new function and calling it early in the three paths where SRE may not have been initialized: (1) gic_init_bases(), before the primary CPU pokes GICv3 sysregs in gic_prio_init(). (2) gic_starting_cpu(), before secondary CPUs initialize GICv3 sysregs in gic_cpu_init(). (3) gic_cpu_pm_notifier(), before CPUs re-initialize GICv3 sysregs in gic_cpu_sys_reg_init(). Fixes: d447bf09a4013541 ("irqchip/gic-v3: Detect GICD_CTRL.DS and SCR_EL3.FIQ earlier") Signed-off-by: Mark Rutland Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org --- drivers/irqchip/irq-gic-v3.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index c19083bfb943..74f21e03d4a3 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1154,14 +1154,8 @@ static void gic_update_rdist_properties(void) gic_data.rdists.has_vpend_valid_dirty ? "Valid+Dirty " : ""); } -static void gic_cpu_sys_reg_init(void) +static void gic_cpu_sys_reg_enable(void) { - int i, cpu = smp_processor_id(); - u64 mpidr = gic_cpu_to_affinity(cpu); - u64 need_rss = MPIDR_RS(mpidr); - bool group0; - u32 pribits; - /* * Need to check that the SRE bit has actually been set. If * not, it means that SRE is disabled at EL2. We're going to @@ -1172,6 +1166,16 @@ static void gic_cpu_sys_reg_init(void) if (!gic_enable_sre()) pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n"); +} + +static void gic_cpu_sys_reg_init(void) +{ + int i, cpu = smp_processor_id(); + u64 mpidr = gic_cpu_to_affinity(cpu); + u64 need_rss = MPIDR_RS(mpidr); + bool group0; + u32 pribits; + pribits = gic_get_pribits(); group0 = gic_has_group0(); @@ -1333,6 +1337,7 @@ static int gic_check_rdist(unsigned int cpu) static int gic_starting_cpu(unsigned int cpu) { + gic_cpu_sys_reg_enable(); gic_cpu_init(); if (gic_dist_supports_lpis()) @@ -1498,6 +1503,7 @@ static int gic_cpu_pm_notifier(struct notifier_block *self, if (cmd == CPU_PM_EXIT) { if (gic_dist_security_disabled()) gic_enable_redist(true); + gic_cpu_sys_reg_enable(); gic_cpu_sys_reg_init(); } else if (cmd == CPU_PM_ENTER && gic_dist_security_disabled()) { gic_write_grpen1(0); @@ -2070,6 +2076,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base, gic_update_rdist_properties(); + gic_cpu_sys_reg_enable(); gic_prio_init(); gic_dist_init(); gic_cpu_init(); -- cgit From 996b37da1e0f51314d4186b326742c2a95a9f0dd Mon Sep 17 00:00:00 2001 From: Ed Tsai Date: Mon, 8 Jul 2024 15:22:06 +0800 Subject: backing-file: convert to using fops->splice_write Filesystems may define their own splice write. Therefore, use the file fops instead of invoking iter_file_splice_write() directly. Signed-off-by: Ed Tsai Link: https://lore.kernel.org/r/20240708072208.25244-1-ed.tsai@mediatek.com Fixes: 5ca73468612d ("fuse: implement splice read/write passthrough") Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/backing-file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/backing-file.c b/fs/backing-file.c index afb557446c27..8860dac58c37 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -303,13 +303,16 @@ ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING))) return -EIO; + if (!out->f_op->splice_write) + return -EINVAL; + ret = file_remove_privs(ctx->user_file); if (ret) return ret; old_cred = override_creds(ctx->cred); file_start_write(out); - ret = iter_file_splice_write(pipe, out, ppos, len, flags); + ret = out->f_op->splice_write(pipe, out, ppos, len, flags); file_end_write(out); revert_creds(old_cred); -- cgit From 880799fc7a3a127c43143935c1a8767d77c19cae Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 23 Aug 2024 12:07:12 +0200 Subject: irqchip/irq-msi-lib: Check for NULL ops in msi_lib_irq_domain_select() The irq_domain passed to msi_lib_irq_domain_select() may not have msi_parent_ops set. There is a NULL pointer check for it, but unfortunately there is a dereference of the parent ops pointer before that. Move the NULL pointer test before the first use of that pointer. This was found on a MacchiatoBin (Marvell Armada 8K SoC), which uses the irq-mvebu-sei driver. Fixes: 72e257c6f058 ("irqchip: Provide irq-msi-lib") Signed-off-by: Maxime Chevallier Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240823100733.1900666-1-maxime.chevallier@bootlin.com Closes: https://lore.kernel.org/all/20240821165034.1af97bad@fedora-3.home/ --- drivers/irqchip/irq-msi-lib.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-msi-lib.c b/drivers/irqchip/irq-msi-lib.c index b5b90003311a..d8e29fc0d406 100644 --- a/drivers/irqchip/irq-msi-lib.c +++ b/drivers/irqchip/irq-msi-lib.c @@ -128,6 +128,9 @@ int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, const struct msi_parent_ops *ops = d->msi_parent_ops; u32 busmask = BIT(bus_token); + if (!ops) + return 0; + if (fwspec->fwnode != d->fwnode || fwspec->param_count != 0) return 0; @@ -135,6 +138,6 @@ int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, if (bus_token == ops->bus_select_token) return 1; - return ops && !!(ops->bus_select_mask & busmask); + return !!(ops->bus_select_mask & busmask); } EXPORT_SYMBOL_GPL(msi_lib_irq_domain_select); -- cgit From ba7b6633e9afa6b5a788efd533c4bdc6fb1c606d Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Thu, 22 Aug 2024 15:23:56 +0530 Subject: platform/x86/amd/pmc: Fix SMU command submission path on new AMD platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 426463d94d45 ("platform/x86/amd/pmc: Send OS_HINT command for new AMD platform") was introduced to enable sending mailbox commands to PMFW on newer platforms. However, it was later discovered that the commit did not configure the correct message port ID (i.e., S2D or PMC). Without this configuration, all command submissions to PMFW are treated as invalid, leading to command failures. To address this issue, the CPU ID association for the new platform needs to be added in amd_pmc_get_ip_info(). This ensures that the correct SMU port IDs are selected. Fixes: 426463d94d45 ("platform/x86/amd/pmc: Send OS_HINT command for new AMD platform") Co-developed-by: Sanket Goswami Signed-off-by: Sanket Goswami Signed-off-by: Shyam Sundar S K Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20240822095357.395808-1-Shyam-sundar.S-k@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmc/pmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index c3e51f0a5c33..f0d389cf1ecb 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -359,6 +359,7 @@ static void amd_pmc_get_ip_info(struct amd_pmc_dev *dev) dev->smu_msg = 0x538; break; case PCI_DEVICE_ID_AMD_1AH_M20H_ROOT: + case PCI_DEVICE_ID_AMD_1AH_M60H_ROOT: dev->num_ips = 22; dev->s2d_msg_id = 0xDE; dev->smu_msg = 0x938; -- cgit From a24cd5cfd1d07712a9f192401af638e3c6cc1491 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Thu, 22 Aug 2024 15:23:57 +0530 Subject: platform/x86/amd/pmc: Extend support for PMC features on new AMD platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PMC driver has capability to get the idle mask values and STB data from the PMFW. Extend this support to the platforms that belong to family 1Ah model 60h series. Co-developed-by: Sanket Goswami Signed-off-by: Sanket Goswami Signed-off-by: Shyam Sundar S K Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20240822095357.395808-2-Shyam-sundar.S-k@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmc/pmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index f0d389cf1ecb..bbb8edb62e00 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -598,6 +598,7 @@ static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev, val = amd_pmc_reg_read(pdev, AMD_PMC_SCRATCH_REG_YC); break; case PCI_DEVICE_ID_AMD_1AH_M20H_ROOT: + case PCI_DEVICE_ID_AMD_1AH_M60H_ROOT: val = amd_pmc_reg_read(pdev, AMD_PMC_SCRATCH_REG_1AH); break; default: @@ -631,6 +632,7 @@ static bool amd_pmc_is_stb_supported(struct amd_pmc_dev *dev) case AMD_CPU_ID_CB: case AMD_CPU_ID_PS: case PCI_DEVICE_ID_AMD_1AH_M20H_ROOT: + case PCI_DEVICE_ID_AMD_1AH_M60H_ROOT: return true; default: return false; -- cgit From 8af174ea863c72f25ce31cee3baad8a301c0cf0f Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 21 Aug 2024 13:42:29 -0700 Subject: net: mana: Fix race of mana_hwc_post_rx_wqe and new hwc response The mana_hwc_rx_event_handler() / mana_hwc_handle_resp() calls complete(&ctx->comp_event) before posting the wqe back. It's possible that other callers, like mana_create_txq(), start the next round of mana_hwc_send_request() before the posting of wqe. And if the HW is fast enough to respond, it can hit no_wqe error on the HW channel, then the response message is lost. The mana driver may fail to create queues and open, because of waiting for the HW response and timed out. Sample dmesg: [ 528.610840] mana 39d4:00:02.0: HWC: Request timed out! [ 528.614452] mana 39d4:00:02.0: Failed to send mana message: -110, 0x0 [ 528.618326] mana 39d4:00:02.0 enP14804s2: Failed to create WQ object: -110 To fix it, move posting of rx wqe before complete(&ctx->comp_event). Cc: stable@vger.kernel.org Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Signed-off-by: Haiyang Zhang Reviewed-by: Long Li Signed-off-by: David S. Miller --- drivers/net/ethernet/microsoft/mana/hw_channel.c | 62 +++++++++++++----------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index cafded2f9382..a00f915c5188 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -52,9 +52,33 @@ static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx, return 0; } +static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, + struct hwc_work_request *req) +{ + struct device *dev = hwc_rxq->hwc->dev; + struct gdma_sge *sge; + int err; + + sge = &req->sge; + sge->address = (u64)req->buf_sge_addr; + sge->mem_key = hwc_rxq->msg_buf->gpa_mkey; + sge->size = req->buf_len; + + memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); + req->wqe_req.sgl = sge; + req->wqe_req.num_sge = 1; + req->wqe_req.client_data_unit = 0; + + err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL); + if (err) + dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err); + return err; +} + static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, - const struct gdma_resp_hdr *resp_msg) + struct hwc_work_request *rx_req) { + const struct gdma_resp_hdr *resp_msg = rx_req->buf_va; struct hwc_caller_ctx *ctx; int err; @@ -62,6 +86,7 @@ static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, hwc->inflight_msg_res.map)) { dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n", resp_msg->response.hwc_msg_id); + mana_hwc_post_rx_wqe(hwc->rxq, rx_req); return; } @@ -75,30 +100,13 @@ static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, memcpy(ctx->output_buf, resp_msg, resp_len); out: ctx->error = err; - complete(&ctx->comp_event); -} - -static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, - struct hwc_work_request *req) -{ - struct device *dev = hwc_rxq->hwc->dev; - struct gdma_sge *sge; - int err; - - sge = &req->sge; - sge->address = (u64)req->buf_sge_addr; - sge->mem_key = hwc_rxq->msg_buf->gpa_mkey; - sge->size = req->buf_len; - memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); - req->wqe_req.sgl = sge; - req->wqe_req.num_sge = 1; - req->wqe_req.client_data_unit = 0; + /* Must post rx wqe before complete(), otherwise the next rx may + * hit no_wqe error. + */ + mana_hwc_post_rx_wqe(hwc->rxq, rx_req); - err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL); - if (err) - dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err); - return err; + complete(&ctx->comp_event); } static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self, @@ -235,14 +243,12 @@ static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id, return; } - mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, resp); + mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, rx_req); - /* Do no longer use 'resp', because the buffer is posted to the HW - * in the below mana_hwc_post_rx_wqe(). + /* Can no longer use 'resp', because the buffer is posted to the HW + * in mana_hwc_handle_resp() above. */ resp = NULL; - - mana_hwc_post_rx_wqe(hwc_rxq, rx_req); } static void mana_hwc_tx_event_handler(void *ctx, u32 gdma_txq_id, -- cgit From c358a809cb58af944d496944391a240e02f5837a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 20 Aug 2024 09:46:00 -0400 Subject: Revert "drm/ttm: increase ttm pre-fault value to PMD size" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 0ddd2ae586d28e521d37393364d989ce118802e0. This patch causes sluggishness and stuttering in graphical apps. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3564 Link: https://www.spinics.net/lists/dri-devel/msg457005.html Signed-off-by: Alex Deucher Cc: Zhu Lingshan Cc: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20240820134600.1909370-1-alexander.deucher@amd.com --- include/drm/ttm/ttm_bo.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index ef0f52f56ebc..6ccf96c91f3a 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -39,11 +39,7 @@ #include "ttm_device.h" /* Default number of pre-faulted pages in the TTM fault handler */ -#if CONFIG_PGTABLE_LEVELS > 2 -#define TTM_BO_VM_NUM_PREFAULT (1 << (PMD_SHIFT - PAGE_SHIFT)) -#else #define TTM_BO_VM_NUM_PREFAULT 16 -#endif struct iosys_map; -- cgit From 752f387faaae0ae2e84d3f496922524785e77d60 Mon Sep 17 00:00:00 2001 From: Thomas Blocher Date: Wed, 31 Jul 2024 01:16:26 +0200 Subject: pinctrl: at91: make it work with current gpiolib pinctrl-at91 currently does not support the gpio-groups devicetree property and has no pin-range. Because of this at91 gpios stopped working since patch commit 2ab73c6d8323fa1e ("gpio: Support GPIO controllers without pin-ranges") This was discussed in the patches commit fc328a7d1fcce263 ("gpio: Revert regression in sysfs-gpio (gpiolib.c)") commit 56e337f2cf132632 ("Revert "gpio: Revert regression in sysfs-gpio (gpiolib.c)"") As a workaround manually set pin-range via gpiochip_add_pin_range() until a) pinctrl-at91 is reworked to support devicetree gpio-groups b) another solution as mentioned in commit 56e337f2cf132632 ("Revert "gpio: Revert regression in sysfs-gpio (gpiolib.c)"") is found Signed-off-by: Thomas Blocher Link: https://lore.kernel.org/5b992862-355d-f0de-cd3d-ff99e67a4ff1@ek-dev.de Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-at91.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index b3c3f5fb2e2e..93ab277d9943 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -1403,8 +1403,11 @@ static int at91_pinctrl_probe(struct platform_device *pdev) /* We will handle a range of GPIO pins */ for (i = 0; i < gpio_banks; i++) - if (gpio_chips[i]) + if (gpio_chips[i]) { pinctrl_add_gpio_range(info->pctl, &gpio_chips[i]->range); + gpiochip_add_pin_range(&gpio_chips[i]->chip, dev_name(info->pctl->dev), 0, + gpio_chips[i]->range.pin_base, gpio_chips[i]->range.npins); + } dev_info(dev, "initialized AT91 pinctrl driver\n"); -- cgit From 1c38a62f15e595346a1106025722869e87ffe044 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Thu, 8 Aug 2024 12:13:55 +0800 Subject: pinctrl: single: fix potential NULL dereference in pcs_get_function() pinmux_generic_get_function() can return NULL and the pointer 'function' was dereferenced without checking against NULL. Add checking of pointer 'function' in pcs_get_function(). Found by code review. Cc: stable@vger.kernel.org Fixes: 571aec4df5b7 ("pinctrl: single: Use generic pinmux helpers for managing functions") Signed-off-by: Ma Ke Link: https://lore.kernel.org/20240808041355.2766009-1-make24@iscas.ac.cn Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-single.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 4c6bfabb6bd7..4da3c3f422b6 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -345,6 +345,8 @@ static int pcs_get_function(struct pinctrl_dev *pctldev, unsigned pin, return -ENOTSUPP; fselector = setting->func; function = pinmux_generic_get_function(pctldev, fselector); + if (!function) + return -EINVAL; *func = function->data; if (!(*func)) { dev_err(pcs->dev, "%s could not find function%i\n", -- cgit From 166bf8af91225576f85208a31eaedbadd182d1ea Mon Sep 17 00:00:00 2001 From: "Nícolas F. R. A. Prado" Date: Thu, 8 Aug 2024 19:27:09 -0400 Subject: pinctrl: mediatek: common-v2: Fix broken bias-disable for PULL_PU_PD_RSEL_TYPE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Despite its name, commit fed74d75277d ("pinctrl: mediatek: common-v2: Fix bias-disable for PULL_PU_PD_RSEL_TYPE") actually broke bias-disable for PULL_PU_PD_RSEL_TYPE. mtk_pinconf_bias_set_combo() tries every bias method supported by the pin until one succeeds. For PULL_PU_PD_RSEL_TYPE pins, before the breaking commit, mtk_pinconf_bias_set_rsel() would be called first to try and set the RSEL value (as well as PU and PD), and if that failed, the only other valid option was that bias-disable was specified, which would then be handled by calling mtk_pinconf_bias_set_pu_pd() and disabling both PU and PD. The breaking commit misunderstood this logic and added an early "return 0" in mtk_pinconf_bias_set_rsel(). The result was that in the bias-disable case, the bias was left unchanged, since by returning success, mtk_pinconf_bias_set_combo() no longer tried calling mtk_pinconf_bias_set_pu_pd() to disable the bias. Since the logic for configuring bias-disable on PULL_PU_PD_RSEL_TYPE pins required mtk_pinconf_bias_set_rsel() to fail first, in that case, an error was printed to the log, eg: mt8195-pinctrl 10005000.pinctrl: Not support rsel value 0 Ohm for pin = 29 (GPIO29) This is what the breaking commit actually got rid of, and likely part of the reason why that commit was thought to be fixing functionality, while in reality it was breaking it. Instead of simply reverting that commit, restore the functionality but in a way that avoids the error from being printed and makes the code less confusing: * Return 0 explicitly if a bias method was successful * Introduce an extra function mtk_pinconf_bias_set_pu_pd_rsel() that calls both mtk_pinconf_bias_set_rsel() (only if needed) and mtk_pinconf_bias_set_pu_pd() * And analogously for the corresponding getters Fixes: fed74d75277d ("pinctrl: mediatek: common-v2: Fix bias-disable for PULL_PU_PD_RSEL_TYPE") Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/20240808-mtk-rsel-bias-disable-fix-v1-1-1b4e85bf596c@collabora.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c | 55 +++++++++++++----------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index b7921b59eb7b..54301fbba524 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -709,32 +709,35 @@ static int mtk_pinconf_bias_set_rsel(struct mtk_pinctrl *hw, { int err, rsel_val; - if (!pullup && arg == MTK_DISABLE) - return 0; - if (hw->rsel_si_unit) { /* find pin rsel_index from pin_rsel array*/ err = mtk_hw_pin_rsel_lookup(hw, desc, pullup, arg, &rsel_val); if (err) - goto out; + return err; } else { - if (arg < MTK_PULL_SET_RSEL_000 || - arg > MTK_PULL_SET_RSEL_111) { - err = -EINVAL; - goto out; - } + if (arg < MTK_PULL_SET_RSEL_000 || arg > MTK_PULL_SET_RSEL_111) + return -EINVAL; rsel_val = arg - MTK_PULL_SET_RSEL_000; } - err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_RSEL, rsel_val); - if (err) - goto out; + return mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_RSEL, rsel_val); +} - err = mtk_pinconf_bias_set_pu_pd(hw, desc, pullup, MTK_ENABLE); +static int mtk_pinconf_bias_set_pu_pd_rsel(struct mtk_pinctrl *hw, + const struct mtk_pin_desc *desc, + u32 pullup, u32 arg) +{ + u32 enable = arg == MTK_DISABLE ? MTK_DISABLE : MTK_ENABLE; + int err; -out: - return err; + if (arg != MTK_DISABLE) { + err = mtk_pinconf_bias_set_rsel(hw, desc, pullup, arg); + if (err) + return err; + } + + return mtk_pinconf_bias_set_pu_pd(hw, desc, pullup, enable); } int mtk_pinconf_bias_set_combo(struct mtk_pinctrl *hw, @@ -750,22 +753,22 @@ int mtk_pinconf_bias_set_combo(struct mtk_pinctrl *hw, try_all_type = MTK_PULL_TYPE_MASK; if (try_all_type & MTK_PULL_RSEL_TYPE) { - err = mtk_pinconf_bias_set_rsel(hw, desc, pullup, arg); + err = mtk_pinconf_bias_set_pu_pd_rsel(hw, desc, pullup, arg); if (!err) - return err; + return 0; } if (try_all_type & MTK_PULL_PU_PD_TYPE) { err = mtk_pinconf_bias_set_pu_pd(hw, desc, pullup, arg); if (!err) - return err; + return 0; } if (try_all_type & MTK_PULL_PULLSEL_TYPE) { err = mtk_pinconf_bias_set_pullsel_pullen(hw, desc, pullup, arg); if (!err) - return err; + return 0; } if (try_all_type & MTK_PULL_PUPD_R1R0_TYPE) @@ -803,9 +806,9 @@ static int mtk_rsel_get_si_unit(struct mtk_pinctrl *hw, return 0; } -static int mtk_pinconf_bias_get_rsel(struct mtk_pinctrl *hw, - const struct mtk_pin_desc *desc, - u32 *pullup, u32 *enable) +static int mtk_pinconf_bias_get_pu_pd_rsel(struct mtk_pinctrl *hw, + const struct mtk_pin_desc *desc, + u32 *pullup, u32 *enable) { int pu, pd, rsel, err; @@ -939,22 +942,22 @@ int mtk_pinconf_bias_get_combo(struct mtk_pinctrl *hw, try_all_type = MTK_PULL_TYPE_MASK; if (try_all_type & MTK_PULL_RSEL_TYPE) { - err = mtk_pinconf_bias_get_rsel(hw, desc, pullup, enable); + err = mtk_pinconf_bias_get_pu_pd_rsel(hw, desc, pullup, enable); if (!err) - return err; + return 0; } if (try_all_type & MTK_PULL_PU_PD_TYPE) { err = mtk_pinconf_bias_get_pu_pd(hw, desc, pullup, enable); if (!err) - return err; + return 0; } if (try_all_type & MTK_PULL_PULLSEL_TYPE) { err = mtk_pinconf_bias_get_pullsel_pullen(hw, desc, pullup, enable); if (!err) - return err; + return 0; } if (try_all_type & MTK_PULL_PUPD_R1R0_TYPE) -- cgit From 9983a9cd4d429dc9ca01770083c4c1f366214b65 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 2 Jul 2024 12:15:14 -0500 Subject: cpufreq/amd-pstate-ut: Don't check for highest perf matching on prefcore If a system is using preferred cores the highest perf will be inconsistent as it can change from system events. Skip the checks for it. Fixes: e571a5e2068e ("cpufreq: amd-pstate: Update amd-pstate preferred core ranking dynamically") Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-ut.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index 66b73c308ce6..b7318669485e 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -160,14 +160,17 @@ static void amd_pstate_ut_check_perf(u32 index) lowest_perf = AMD_CPPC_LOWEST_PERF(cap1); } - if ((highest_perf != READ_ONCE(cpudata->highest_perf)) || - (nominal_perf != READ_ONCE(cpudata->nominal_perf)) || + if (highest_perf != READ_ONCE(cpudata->highest_perf) && !cpudata->hw_prefcore) { + pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n", + __func__, cpu, highest_perf, cpudata->highest_perf); + goto skip_test; + } + if ((nominal_perf != READ_ONCE(cpudata->nominal_perf)) || (lowest_nonlinear_perf != READ_ONCE(cpudata->lowest_nonlinear_perf)) || (lowest_perf != READ_ONCE(cpudata->lowest_perf))) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s cpu%d highest=%d %d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n", - __func__, cpu, highest_perf, cpudata->highest_perf, - nominal_perf, cpudata->nominal_perf, + pr_err("%s cpu%d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n", + __func__, cpu, nominal_perf, cpudata->nominal_perf, lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf, lowest_perf, cpudata->lowest_perf); goto skip_test; -- cgit From d3692d95cc4d88114b070ee63cffc976f00f207f Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Fri, 9 Aug 2024 02:22:04 +0200 Subject: pinctrl: qcom: x1e80100: Fix special pin offsets Remove the erroneus 0x100000 offset to prevent the boards from crashing on pin state setting, as well as for the intended state changes to take effect. Fixes: 05e4941d97ef ("pinctrl: qcom: Add X1E80100 pinctrl driver") Signed-off-by: Konrad Dybcio Reviewed-by: Abel Vesa Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/20240809-topic-h_sdc-v1-1-bb421532c531@quicinc.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-x1e80100.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-x1e80100.c b/drivers/pinctrl/qcom/pinctrl-x1e80100.c index 6cd4d10e6fd6..65ed933f05ce 100644 --- a/drivers/pinctrl/qcom/pinctrl-x1e80100.c +++ b/drivers/pinctrl/qcom/pinctrl-x1e80100.c @@ -1805,10 +1805,10 @@ static const struct msm_pingroup x1e80100_groups[] = { [235] = PINGROUP(235, aon_cci, qdss_gpio, _, _, _, _, _, _, _), [236] = PINGROUP(236, aon_cci, qdss_gpio, _, _, _, _, _, _, _), [237] = PINGROUP(237, _, _, _, _, _, _, _, _, _), - [238] = UFS_RESET(ufs_reset, 0x1f9000), - [239] = SDC_QDSD_PINGROUP(sdc2_clk, 0x1f2000, 14, 6), - [240] = SDC_QDSD_PINGROUP(sdc2_cmd, 0x1f2000, 11, 3), - [241] = SDC_QDSD_PINGROUP(sdc2_data, 0x1f2000, 9, 0), + [238] = UFS_RESET(ufs_reset, 0xf9000), + [239] = SDC_QDSD_PINGROUP(sdc2_clk, 0xf2000, 14, 6), + [240] = SDC_QDSD_PINGROUP(sdc2_cmd, 0xf2000, 11, 3), + [241] = SDC_QDSD_PINGROUP(sdc2_data, 0xf2000, 9, 0), }; static const struct msm_gpio_wakeirq_map x1e80100_pdc_map[] = { -- cgit From 639766ca10d1e218e257ae7eabe76814bae6ab89 Mon Sep 17 00:00:00 2001 From: Hal Feng Date: Mon, 12 Aug 2024 15:01:08 +0800 Subject: pinctrl: starfive: jh7110: Correct the level trigger configuration of iev register A mistake was made in level trigger register configuration. Correct it. Fixes: 447976ab62c5 ("pinctrl: starfive: Add StarFive JH7110 sys controller driver") Signed-off-by: Hal Feng Link: https://lore.kernel.org/20240812070108.100923-1-hal.feng@starfivetech.com Signed-off-by: Linus Walleij --- drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c index 4ce080caa233..1d0d6c224c10 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c @@ -793,12 +793,12 @@ static int jh7110_irq_set_type(struct irq_data *d, unsigned int trigger) case IRQ_TYPE_LEVEL_HIGH: irq_type = 0; /* 0: level triggered */ edge_both = 0; /* 0: ignored */ - polarity = mask; /* 1: high level */ + polarity = 0; /* 0: high level */ break; case IRQ_TYPE_LEVEL_LOW: irq_type = 0; /* 0: level triggered */ edge_both = 0; /* 0: ignored */ - polarity = 0; /* 0: low level */ + polarity = mask; /* 1: low level */ break; default: return -EINVAL; -- cgit From a204501e1743d695ca2930ed25a2be9f8ced96d3 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 23 Aug 2024 11:51:08 -0400 Subject: nfsd: prevent panic for nfsv4.0 closed files in nfs4_show_open Prior to commit 3f29cc82a84c ("nfsd: split sc_status out of sc_type") states_show() relied on sc_type field to be of valid type before calling into a subfunction to show content of a particular stateid. From that commit, we split the validity of the stateid into sc_status and no longer changed sc_type to 0 while unhashing the stateid. This resulted in kernel oopsing for nfsv4.0 opens that stay around and in nfs4_show_open() would derefence sc_file which was NULL. Instead, for closed open stateids forgo displaying information that relies of having a valid sc_file. To reproduce: mount the server with 4.0, read and close a file and then on the server cat /proc/fs/nfsd/clients/2/states [ 513.590804] Call trace: [ 513.590925] _raw_spin_lock+0xcc/0x160 [ 513.591119] nfs4_show_open+0x78/0x2c0 [nfsd] [ 513.591412] states_show+0x44c/0x488 [nfsd] [ 513.591681] seq_read_iter+0x5d8/0x760 [ 513.591896] seq_read+0x188/0x208 [ 513.592075] vfs_read+0x148/0x470 [ 513.592241] ksys_read+0xcc/0x178 Fixes: 3f29cc82a84c ("nfsd: split sc_status out of sc_type") Signed-off-by: Olga Kornievskaia Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a20c2c9d7d45..dafff707e23a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2789,15 +2789,18 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) deny & NFS4_SHARE_ACCESS_READ ? "r" : "-", deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); - spin_lock(&nf->fi_lock); - file = find_any_file_locked(nf); - if (file) { - nfs4_show_superblock(s, file); - seq_puts(s, ", "); - nfs4_show_fname(s, file); - seq_puts(s, ", "); - } - spin_unlock(&nf->fi_lock); + if (nf) { + spin_lock(&nf->fi_lock); + file = find_any_file_locked(nf); + if (file) { + nfs4_show_superblock(s, file); + seq_puts(s, ", "); + nfs4_show_fname(s, file); + seq_puts(s, ", "); + } + spin_unlock(&nf->fi_lock); + } else + seq_puts(s, "closed, "); nfs4_show_owner(s, oo); if (st->sc_status & SC_STATUS_ADMIN_REVOKED) seq_puts(s, ", admin-revoked"); -- cgit From 7eff3453cbd7e0bfc7524d59694119b5ca844778 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 5 Jul 2024 09:15:08 +0800 Subject: ovl: pass string to ovl_parse_layer() So it can be used for parsing the Opt_lowerdir. Signed-off-by: Zhihao Cheng Link: https://lore.kernel.org/r/20240705011510.794025-2-chengzhihao1@huawei.com Signed-off-by: Christian Brauner --- fs/overlayfs/params.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 4860fcc4611b..52e3860973b7 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -365,10 +365,9 @@ static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer, } } -static int ovl_parse_layer(struct fs_context *fc, struct fs_parameter *param, - enum ovl_opt layer) +static int ovl_parse_layer(struct fs_context *fc, const char *layer_name, enum ovl_opt layer) { - char *name = kstrdup(param->string, GFP_KERNEL); + char *name = kstrdup(layer_name, GFP_KERNEL); bool upper = (layer == Opt_upperdir || layer == Opt_workdir); struct path path; int err; @@ -582,7 +581,7 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_datadir_add: case Opt_upperdir: case Opt_workdir: - err = ovl_parse_layer(fc, param, opt); + err = ovl_parse_layer(fc, param->string, opt); break; case Opt_default_permissions: config->default_permissions = true; -- cgit From ca76ac36bb6068866feca185045e7edf2a8f392f Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Fri, 5 Jul 2024 09:15:09 +0800 Subject: ovl: fix wrong lowerdir number check for parameter Opt_lowerdir The max count of lowerdir is OVL_MAX_STACK[500], which is broken by commit 37f32f526438("ovl: fix memory leak in ovl_parse_param()") for parameter Opt_lowerdir. Since commit 819829f0319a("ovl: refactor layer parsing helpers") and commit 24e16e385f22("ovl: add support for appending lowerdirs one by one") added check ovl_mount_dir_check() in function ovl_parse_param_lowerdir(), the 'ctx->nr' should be smaller than OVL_MAX_STACK, after commit 37f32f526438("ovl: fix memory leak in ovl_parse_param()") is applied, the 'ctx->nr' is updated before the check ovl_mount_dir_check(), which leads the max count of lowerdir to become 499 for parameter Opt_lowerdir. Fix it by replacing lower layers parsing code with the existing helper function ovl_parse_layer(). Fixes: 37f32f526438 ("ovl: fix memory leak in ovl_parse_param()") Signed-off-by: Zhihao Cheng Link: https://lore.kernel.org/r/20240705011510.794025-3-chengzhihao1@huawei.com Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/overlayfs/params.c | 40 +++++++--------------------------------- 1 file changed, 7 insertions(+), 33 deletions(-) diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 52e3860973b7..8dd834c7f291 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -353,6 +353,8 @@ static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer, case Opt_datadir_add: ctx->nr_data++; fallthrough; + case Opt_lowerdir: + fallthrough; case Opt_lowerdir_add: WARN_ON(ctx->nr >= ctx->capacity); l = &ctx->lower[ctx->nr++]; @@ -375,7 +377,7 @@ static int ovl_parse_layer(struct fs_context *fc, const char *layer_name, enum o if (!name) return -ENOMEM; - if (upper) + if (upper || layer == Opt_lowerdir) err = ovl_mount_dir(name, &path); else err = ovl_mount_dir_noesc(name, &path); @@ -431,7 +433,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) { int err; struct ovl_fs_context *ctx = fc->fs_private; - struct ovl_fs_context_layer *l; char *dup = NULL, *iter; ssize_t nr_lower, nr; bool data_layer = false; @@ -471,35 +472,11 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) goto out_err; } - if (nr_lower > ctx->capacity) { - err = -ENOMEM; - l = krealloc_array(ctx->lower, nr_lower, sizeof(*ctx->lower), - GFP_KERNEL_ACCOUNT); - if (!l) - goto out_err; - - ctx->lower = l; - ctx->capacity = nr_lower; - } - iter = dup; - l = ctx->lower; - for (nr = 0; nr < nr_lower; nr++, l++) { - ctx->nr++; - memset(l, 0, sizeof(*l)); - - err = ovl_mount_dir(iter, &l->path); + for (nr = 0; nr < nr_lower; nr++) { + err = ovl_parse_layer(fc, iter, Opt_lowerdir); if (err) - goto out_put; - - err = ovl_mount_dir_check(fc, &l->path, Opt_lowerdir, iter, false); - if (err) - goto out_put; - - err = -ENOMEM; - l->name = kstrdup(iter, GFP_KERNEL_ACCOUNT); - if (!l->name) - goto out_put; + goto out_err; if (data_layer) ctx->nr_data++; @@ -517,7 +494,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) */ if (ctx->nr_data > 0) { pr_err("regular lower layers cannot follow data lower layers"); - goto out_put; + goto out_err; } data_layer = false; @@ -531,9 +508,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) kfree(dup); return 0; -out_put: - ovl_reset_lowerdirs(ctx); - out_err: kfree(dup); -- cgit From 441e36ef5b347d9ab4f54f7b54853266be687556 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Fri, 5 Jul 2024 09:15:10 +0800 Subject: ovl: ovl_parse_param_lowerdir: Add missed '\n' for pr_err Add '\n' for pr_err in function ovl_parse_param_lowerdir(), which ensures that error message is displayed at once. Fixes: b36a5780cb44 ("ovl: modify layer parameter parsing") Signed-off-by: Zhihao Cheng Link: https://lore.kernel.org/r/20240705011510.794025-4-chengzhihao1@huawei.com Signed-off-by: Christian Brauner --- fs/overlayfs/params.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 8dd834c7f291..d0568c091341 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -449,7 +449,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) return 0; if (*name == ':') { - pr_err("cannot append lower layer"); + pr_err("cannot append lower layer\n"); return -EINVAL; } @@ -493,7 +493,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) * there are no data layers. */ if (ctx->nr_data > 0) { - pr_err("regular lower layers cannot follow data lower layers"); + pr_err("regular lower layers cannot follow data lower layers\n"); goto out_err; } -- cgit From eb9e749c0182affafadfbe5ded4503c4b5a9b57c Mon Sep 17 00:00:00 2001 From: Kiran K Date: Thu, 18 Jul 2024 20:18:04 +0530 Subject: Bluetooth: btintel: Allow configuring drive strength of BRI BRI (Bluetooth Radio Interface) traffic from CNVr to CNVi was found causing cross talk step errors to WiFi. To avoid this potential issue OEM platforms can replace BRI resistor to adjust the BRI response line drive strength. During the *setup*, driver reads the drive strength value from uefi variable and passes it to the controller via vendor specific command with opcode 0xfc0a. dmesg: .. [21.982720] Bluetooth: hci0: Bootloader timestamp 2023.33 buildtype 1 build 45995 [21.984250] Bluetooth: hci0: Found device firmware: intel/ibt-0190-0291-iml.sfi [21.984255] Bluetooth: hci0: Boot Address: 0x30099000 [21.984256] Bluetooth: hci0: Firmware Version: 160-24.24 [22.011501] Bluetooth: hci0: Waiting for firmware download to complete [22.011518] Bluetooth: hci0: Firmware loaded in 26624 usecs [22.011584] Bluetooth: hci0: Waiting for device to boot [22.013546] Bluetooth: hci0: Malformed MSFT vendor event: 0x02 [22.013552] Bluetooth: hci0: Device booted in 1967 usecs ... [22.013792] Bluetooth: hci0: dsbr: enable: 0x01 value: 0x0b ... [22.015027] Bluetooth: hci0: Found device firmware: intel/ibt-0190-0291.sfi [22.015041] Bluetooth: hci0: Boot Address: 0x10000800 [22.015043] Bluetooth: hci0: Firmware Version: 160-24.24 [22.395821] Bluetooth: BNEP (Ethernet Emulation) ver 1.3 [22.395828] Bluetooth: BNEP filters: protocol multicast ... Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 124 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 7d5e4de64e3c..1ccbb5157515 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -26,6 +27,8 @@ #define ECDSA_OFFSET 644 #define ECDSA_HEADER_LEN 320 +#define BTINTEL_EFI_DSBR L"UefiCnvCommonDSBR" + enum { DSM_SET_WDISABLE2_DELAY = 1, DSM_SET_RESET_METHOD = 3, @@ -2616,6 +2619,120 @@ static u8 btintel_classify_pkt_type(struct hci_dev *hdev, struct sk_buff *skb) return hci_skb_pkt_type(skb); } +/* + * UefiCnvCommonDSBR UEFI variable provides information from the OEM platforms + * if they have replaced the BRI (Bluetooth Radio Interface) resistor to + * overcome the potential STEP errors on their designs. Based on the + * configauration, bluetooth firmware shall adjust the BRI response line drive + * strength. The below structure represents DSBR data. + * struct { + * u8 header; + * u32 dsbr; + * } __packed; + * + * header - defines revision number of the structure + * dsbr - defines drive strength BRI response + * bit0 + * 0 - instructs bluetooth firmware to use default values + * 1 - instructs bluetooth firmware to override default values + * bit3:1 + * Reserved + * bit7:4 + * DSBR override values (only if bit0 is set. Default value is 0xF + * bit31:7 + * Reserved + * Expected values for dsbr field: + * 1. 0xF1 - indicates that the resistor on board is 33 Ohm + * 2. 0x00 or 0xB1 - indicates that the resistor on board is 10 Ohm + * 3. Non existing UEFI variable or invalid (none of the above) - indicates + * that the resistor on board is 10 Ohm + * Even if uefi variable is not present, driver shall send 0xfc0a command to + * firmware to use default values. + * + */ +static int btintel_uefi_get_dsbr(u32 *dsbr_var) +{ + struct btintel_dsbr { + u8 header; + u32 dsbr; + } __packed data; + + efi_status_t status; + unsigned long data_size = 0; + efi_guid_t guid = EFI_GUID(0xe65d8884, 0xd4af, 0x4b20, 0x8d, 0x03, + 0x77, 0x2e, 0xcc, 0x3d, 0xa5, 0x31); + + if (!IS_ENABLED(CONFIG_EFI)) + return -EOPNOTSUPP; + + if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) + return -EOPNOTSUPP; + + status = efi.get_variable(BTINTEL_EFI_DSBR, &guid, NULL, &data_size, + NULL); + + if (status != EFI_BUFFER_TOO_SMALL || !data_size) + return -EIO; + + status = efi.get_variable(BTINTEL_EFI_DSBR, &guid, NULL, &data_size, + &data); + + if (status != EFI_SUCCESS) + return -ENXIO; + + *dsbr_var = data.dsbr; + return 0; +} + +static int btintel_set_dsbr(struct hci_dev *hdev, struct intel_version_tlv *ver) +{ + struct btintel_dsbr_cmd { + u8 enable; + u8 dsbr; + } __packed; + + struct btintel_dsbr_cmd cmd; + struct sk_buff *skb; + u8 status; + u32 dsbr; + bool apply_dsbr; + int err; + + /* DSBR command needs to be sent for BlazarI + B0 step product after + * downloading IML image. + */ + apply_dsbr = (ver->img_type == BTINTEL_IMG_IML && + ((ver->cnvi_top & 0xfff) == BTINTEL_CNVI_BLAZARI) && + INTEL_CNVX_TOP_STEP(ver->cnvi_top) == 0x01); + + if (!apply_dsbr) + return 0; + + dsbr = 0; + err = btintel_uefi_get_dsbr(&dsbr); + if (err < 0) + bt_dev_dbg(hdev, "Error reading efi: %ls (%d)", + BTINTEL_EFI_DSBR, err); + + cmd.enable = dsbr & BIT(0); + cmd.dsbr = dsbr >> 4 & 0xF; + + bt_dev_info(hdev, "dsbr: enable: 0x%2.2x value: 0x%2.2x", cmd.enable, + cmd.dsbr); + + skb = __hci_cmd_sync(hdev, 0xfc0a, sizeof(cmd), &cmd, HCI_CMD_TIMEOUT); + if (IS_ERR(skb)) + return -bt_to_errno(PTR_ERR(skb)); + + status = skb->data[0]; + kfree_skb(skb); + + if (status) + return -bt_to_errno(status); + + return 0; +} + int btintel_bootloader_setup_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver) { @@ -2650,6 +2767,13 @@ int btintel_bootloader_setup_tlv(struct hci_dev *hdev, if (err) return err; + /* set drive strength of BRI response */ + err = btintel_set_dsbr(hdev, ver); + if (err) { + bt_dev_err(hdev, "Failed to send dsbr command (%d)", err); + return err; + } + /* If image type returned is BTINTEL_IMG_IML, then controller supports * intermediate loader image */ -- cgit From 35237475384ab3622f63c3c09bdf6af6dacfe9c3 Mon Sep 17 00:00:00 2001 From: Neeraj Sanjay Kale Date: Fri, 16 Aug 2024 15:51:13 +0530 Subject: Bluetooth: btnxpuart: Fix random crash seen while removing driver This fixes the random kernel crash seen while removing the driver, when running the load/unload test over multiple iterations. 1) modprobe btnxpuart 2) hciconfig hci0 reset 3) hciconfig (check hci0 interface up with valid BD address) 4) modprobe -r btnxpuart Repeat steps 1 to 4 The ps_wakeup() call in btnxpuart_close() schedules the psdata->work(), which gets scheduled after module is removed, causing a kernel crash. This hidden issue got highlighted after enabling Power Save by default in 4183a7be7700 (Bluetooth: btnxpuart: Enable Power Save feature on startup) The new ps_cleanup() deasserts UART break immediately while closing serdev device, cancels any scheduled ps_work and destroys the ps_lock mutex. [ 85.884604] Unable to handle kernel paging request at virtual address ffffd4a61638f258 [ 85.884624] Mem abort info: [ 85.884625] ESR = 0x0000000086000007 [ 85.884628] EC = 0x21: IABT (current EL), IL = 32 bits [ 85.884633] SET = 0, FnV = 0 [ 85.884636] EA = 0, S1PTW = 0 [ 85.884638] FSC = 0x07: level 3 translation fault [ 85.884642] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000041dd0000 [ 85.884646] [ffffd4a61638f258] pgd=1000000095fff003, p4d=1000000095fff003, pud=100000004823d003, pmd=100000004823e003, pte=0000000000000000 [ 85.884662] Internal error: Oops: 0000000086000007 [#1] PREEMPT SMP [ 85.890932] Modules linked in: algif_hash algif_skcipher af_alg overlay fsl_jr_uio caam_jr caamkeyblob_desc caamhash_desc caamalg_desc crypto_engine authenc libdes crct10dif_ce polyval_ce polyval_generic snd_soc_imx_spdif snd_soc_imx_card snd_soc_ak5558 snd_soc_ak4458 caam secvio error snd_soc_fsl_spdif snd_soc_fsl_micfil snd_soc_fsl_sai snd_soc_fsl_utils gpio_ir_recv rc_core fuse [last unloaded: btnxpuart(O)] [ 85.927297] CPU: 1 PID: 67 Comm: kworker/1:3 Tainted: G O 6.1.36+g937b1be4345a #1 [ 85.936176] Hardware name: FSL i.MX8MM EVK board (DT) [ 85.936182] Workqueue: events 0xffffd4a61638f380 [ 85.936198] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 85.952817] pc : 0xffffd4a61638f258 [ 85.952823] lr : 0xffffd4a61638f258 [ 85.952827] sp : ffff8000084fbd70 [ 85.952829] x29: ffff8000084fbd70 x28: 0000000000000000 x27: 0000000000000000 [ 85.963112] x26: ffffd4a69133f000 x25: ffff4bf1c8540990 x24: ffff4bf215b87305 [ 85.963119] x23: ffff4bf215b87300 x22: ffff4bf1c85409d0 x21: ffff4bf1c8540970 [ 85.977382] x20: 0000000000000000 x19: ffff4bf1c8540880 x18: 0000000000000000 [ 85.977391] x17: 0000000000000000 x16: 0000000000000133 x15: 0000ffffe2217090 [ 85.977399] x14: 0000000000000001 x13: 0000000000000133 x12: 0000000000000139 [ 85.977407] x11: 0000000000000001 x10: 0000000000000a60 x9 : ffff8000084fbc50 [ 85.977417] x8 : ffff4bf215b7d000 x7 : ffff4bf215b83b40 x6 : 00000000000003e8 [ 85.977424] x5 : 00000000410fd030 x4 : 0000000000000000 x3 : 0000000000000000 [ 85.977432] x2 : 0000000000000000 x1 : ffff4bf1c4265880 x0 : 0000000000000000 [ 85.977443] Call trace: [ 85.977446] 0xffffd4a61638f258 [ 85.977451] 0xffffd4a61638f3e8 [ 85.977455] process_one_work+0x1d4/0x330 [ 85.977464] worker_thread+0x6c/0x430 [ 85.977471] kthread+0x108/0x10c [ 85.977476] ret_from_fork+0x10/0x20 [ 85.977488] Code: bad PC value [ 85.977491] ---[ end trace 0000000000000000 ]--- Preset since v6.9.11 Fixes: 86d55f124b52 ("Bluetooth: btnxpuart: Deasset UART break before closing serdev device") Signed-off-by: Neeraj Sanjay Kale Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btnxpuart.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 31d3dd90b672..ad1ec6f3685a 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -449,6 +449,23 @@ static bool ps_wakeup(struct btnxpuart_dev *nxpdev) return false; } +static void ps_cleanup(struct btnxpuart_dev *nxpdev) +{ + struct ps_data *psdata = &nxpdev->psdata; + u8 ps_state; + + mutex_lock(&psdata->ps_lock); + ps_state = psdata->ps_state; + mutex_unlock(&psdata->ps_lock); + + if (ps_state != PS_STATE_AWAKE) + ps_control(psdata->hdev, PS_STATE_AWAKE); + + ps_cancel_timer(nxpdev); + cancel_work_sync(&psdata->work); + mutex_destroy(&psdata->ps_lock); +} + static int send_ps_cmd(struct hci_dev *hdev, void *data) { struct btnxpuart_dev *nxpdev = hci_get_drvdata(hdev); @@ -1363,7 +1380,6 @@ static int btnxpuart_close(struct hci_dev *hdev) { struct btnxpuart_dev *nxpdev = hci_get_drvdata(hdev); - ps_wakeup(nxpdev); serdev_device_close(nxpdev->serdev); skb_queue_purge(&nxpdev->txq); if (!IS_ERR_OR_NULL(nxpdev->rx_skb)) { @@ -1516,8 +1532,8 @@ static void nxp_serdev_remove(struct serdev_device *serdev) nxpdev->new_baudrate = nxpdev->fw_init_baudrate; nxp_set_baudrate_cmd(hdev, NULL); } - ps_cancel_timer(nxpdev); } + ps_cleanup(nxpdev); hci_unregister_dev(hdev); hci_free_dev(hdev); } -- cgit From 18b3256db76bd1130965acd99fbd38f87c3e6950 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 21 Aug 2024 14:41:52 -0400 Subject: Bluetooth: hci_core: Fix not handling hibernation actions This fixes not handling hibernation actions on suspend notifier so they are treated in the same way as regular suspend actions. Fixes: 9952d90ea288 ("Bluetooth: Handle PM_SUSPEND_PREPARE and PM_POST_SUSPEND") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index f25a21f532aa..d6976db02c06 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2406,10 +2406,16 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action, /* To avoid a potential race with hci_unregister_dev. */ hci_dev_hold(hdev); - if (action == PM_SUSPEND_PREPARE) + switch (action) { + case PM_HIBERNATION_PREPARE: + case PM_SUSPEND_PREPARE: ret = hci_suspend_dev(hdev); - else if (action == PM_POST_SUSPEND) + break; + case PM_POST_HIBERNATION: + case PM_POST_SUSPEND: ret = hci_resume_dev(hdev); + break; + } if (ret) bt_dev_err(hdev, "Suspend notifier action (%lu) failed: %d", -- cgit From 740f2e2791b98e47288b3814c83a3f566518fed2 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Wed, 21 Aug 2024 06:07:42 +0000 Subject: usb: cdnsp: fix for Link TRB with TC Stop Endpoint command on LINK TRB with TC bit set to 1 causes that internal cycle bit can have incorrect state after command complete. In consequence empty transfer ring can be incorrectly detected when EP is resumed. NOP TRB before LINK TRB avoid such scenario. Stop Endpoint command is then on NOP TRB and internal cycle bit is not changed and have correct value. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") cc: Signed-off-by: Pawel Laszczak Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/PH7PR07MB953878279F375CCCE6C6F40FDD8E2@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.h | 3 +++ drivers/usb/cdns3/cdnsp-ring.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index dbee6f085277..84887dfea763 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -811,6 +811,7 @@ struct cdnsp_stream_info { * generate Missed Service Error Event. * Set skip flag when receive a Missed Service Error Event and * process the missed tds on the endpoint ring. + * @wa1_nop_trb: hold pointer to NOP trb. */ struct cdnsp_ep { struct usb_ep endpoint; @@ -838,6 +839,8 @@ struct cdnsp_ep { #define EP_UNCONFIGURED BIT(7) bool skip; + union cdnsp_trb *wa1_nop_trb; + }; /** diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index a60c0cb991cd..dbd83d321bca 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1904,6 +1904,23 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) if (ret) return ret; + /* + * workaround 1: STOP EP command on LINK TRB with TC bit set to 1 + * causes that internal cycle bit can have incorrect state after + * command complete. In consequence empty transfer ring can be + * incorrectly detected when EP is resumed. + * NOP TRB before LINK TRB avoid such scenario. STOP EP command is + * then on NOP TRB and internal cycle bit is not changed and have + * correct value. + */ + if (pep->wa1_nop_trb) { + field = le32_to_cpu(pep->wa1_nop_trb->trans_event.flags); + field ^= TRB_CYCLE; + + pep->wa1_nop_trb->trans_event.flags = cpu_to_le32(field); + pep->wa1_nop_trb = NULL; + } + /* * Don't give the first TRB to the hardware (by toggling the cycle bit) * until we've finished creating all the other TRBs. The ring's cycle @@ -1999,6 +2016,17 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) send_addr = addr; } + if (cdnsp_trb_is_link(ring->enqueue + 1)) { + field = TRB_TYPE(TRB_TR_NOOP) | TRB_IOC; + if (!ring->cycle_state) + field |= TRB_CYCLE; + + pep->wa1_nop_trb = ring->enqueue; + + cdnsp_queue_trb(pdev, ring, 0, 0x0, 0x0, + TRB_INTR_TARGET(0), field); + } + cdnsp_check_trb_math(preq, enqd_len); ret = cdnsp_giveback_first_trb(pdev, pep, preq->request.stream_id, start_cycle, start_trb); -- cgit From 0aa2e1b2fb7a75aa4b5b4347055ccfea6f091769 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Aug 2024 21:08:09 +0100 Subject: mm: Fix missing folio invalidation calls during truncation When AS_RELEASE_ALWAYS is set on a mapping, the ->release_folio() and ->invalidate_folio() calls should be invoked even if PG_private and PG_private_2 aren't set. This is used by netfslib to keep track of the point above which reads can be skipped in favour of just zeroing pagecache locally. There are a couple of places in truncation in which invalidation is only called when folio_has_private() is true. Fix these to check folio_needs_release() instead. Without this, the generic/075 and generic/112 xfstests (both fsx-based tests) fail with minimum folio size patches applied[1]. Fixes: b4fa966f03b7 ("mm, netfs, fscache: stop read optimisation when folio removed from pagecache") Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240815090849.972355-1-kernel@pankajraghav.com/ [1] Link: https://lore.kernel.org/r/20240823200819.532106-2-dhowells@redhat.com Reviewed-by: Matthew Wilcox (Oracle) cc: Matthew Wilcox (Oracle) cc: Pankaj Raghav cc: Jeff Layton cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: netfs@lists.linux.dev cc: linux-mm@kvack.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- mm/truncate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/truncate.c b/mm/truncate.c index 4d61fbdd4b2f..0668cd340a46 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -157,7 +157,7 @@ static void truncate_cleanup_folio(struct folio *folio) if (folio_mapped(folio)) unmap_mapping_folio(folio); - if (folio_has_private(folio)) + if (folio_needs_release(folio)) folio_invalidate(folio, 0, folio_size(folio)); /* @@ -219,7 +219,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) if (!mapping_inaccessible(folio->mapping)) folio_zero_range(folio, offset, length); - if (folio_has_private(folio)) + if (folio_needs_release(folio)) folio_invalidate(folio, offset, length); if (!folio_test_large(folio)) return true; -- cgit From a74ee0e878e262c0276966528d72d4e887174410 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Aug 2024 21:08:10 +0100 Subject: afs: Fix post-setattr file edit to do truncation correctly At the end of an kAFS RPC operation, there is an "edit" phase (originally intended for post-directory modification ops to edit the local image) that the setattr VFS op uses to fix up the pagecache if the RPC that requested truncation of a file was successful. afs_setattr_edit_file() calls truncate_setsize() which sets i_size, expands the pagecache if needed and truncates the pagecache. The first two of those, however, are redundant as they've already been done by afs_setattr_success() under the io_lock and the first is also done under the callback lock (cb_lock). Fix afs_setattr_edit_file() to call truncate_pagecache() instead (which is called by truncate_setsize(), thereby skipping the redundant parts. Fixes: 100ccd18bb41 ("netfs: Optimise away reads above the point at which there can be no data") Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240823200819.532106-3-dhowells@redhat.com cc: Matthew Wilcox (Oracle) cc: Pankaj Raghav cc: Jeff Layton cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: netfs@lists.linux.dev cc: linux-mm@kvack.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/afs/inode.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 3acf5e050072..a95e77670b49 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -695,13 +695,18 @@ static void afs_setattr_edit_file(struct afs_operation *op) { struct afs_vnode_param *vp = &op->file[0]; struct afs_vnode *vnode = vp->vnode; + struct inode *inode = &vnode->netfs.inode; if (op->setattr.attr->ia_valid & ATTR_SIZE) { loff_t size = op->setattr.attr->ia_size; - loff_t i_size = op->setattr.old_i_size; + loff_t old = op->setattr.old_i_size; + + /* Note: inode->i_size was updated by afs_apply_status() inside + * the I/O and callback locks. + */ - if (size != i_size) { - truncate_setsize(&vnode->netfs.inode, size); + if (size != old) { + truncate_pagecache(inode, size); netfs_resize_file(&vnode->netfs, size, true); fscache_resize_cookie(afs_vnode_cache(vnode), size); } -- cgit From 7dfc8f0c6144c290dbeb01835a67e81b34dda8cd Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Aug 2024 21:08:11 +0100 Subject: netfs: Fix netfs_release_folio() to say no if folio dirty Fix netfs_release_folio() to say no (ie. return false) if the folio is dirty (analogous with iomap's behaviour). Without this, it will say yes to the release of a dirty page by split_huge_page_to_list_to_order(), which will result in the loss of untruncated data in the folio. Without this, the generic/075 and generic/112 xfstests (both fsx-based tests) fail with minimum folio size patches applied[1]. Fixes: c1ec4d7c2e13 ("netfs: Provide invalidate_folio and release_folio calls") Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240815090849.972355-1-kernel@pankajraghav.com/ [1] Link: https://lore.kernel.org/r/20240823200819.532106-4-dhowells@redhat.com cc: Matthew Wilcox (Oracle) cc: Pankaj Raghav cc: Jeff Layton cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: netfs@lists.linux.dev cc: linux-mm@kvack.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/misc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 554a1a4615ad..69324761fcf7 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -161,6 +161,9 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp) struct netfs_inode *ctx = netfs_inode(folio_inode(folio)); unsigned long long end; + if (folio_test_dirty(folio)) + return false; + end = folio_pos(folio) + folio_size(folio); if (end > ctx->zero_point) ctx->zero_point = end; -- cgit From cce6bfa6ca0e30af9927b0074c97fe6a92f28092 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Aug 2024 21:08:12 +0100 Subject: netfs: Fix trimming of streaming-write folios in netfs_inval_folio() When netfslib writes to a folio that it doesn't have data for, but that data exists on the server, it will make a 'streaming write' whereby it stores data in a folio that is marked dirty, but not uptodate. When it does this, it attaches a record to folio->private to track the dirty region. When truncate() or fallocate() wants to invalidate part of such a folio, it will call into ->invalidate_folio(), specifying the part of the folio that is to be invalidated. netfs_invalidate_folio(), on behalf of the filesystem, must then determine how to trim the streaming write record. In a couple of cases, however, it does this incorrectly (the reduce-length and move-start cases are switched over and don't, in any case, calculate the value correctly). Fix this by making the logic tree more obvious and fixing the cases. Fixes: 9ebff83e6481 ("netfs: Prep to use folio->private for write grouping and streaming write") Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240823200819.532106-5-dhowells@redhat.com cc: Matthew Wilcox (Oracle) cc: Pankaj Raghav cc: Jeff Layton cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: netfs@lists.linux.dev cc: linux-mm@kvack.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/misc.c | 50 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 69324761fcf7..c1f321cf5999 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -97,10 +97,20 @@ EXPORT_SYMBOL(netfs_clear_inode_writeback); void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) { struct netfs_folio *finfo; + struct netfs_inode *ctx = netfs_inode(folio_inode(folio)); size_t flen = folio_size(folio); _enter("{%lx},%zx,%zx", folio->index, offset, length); + if (offset == 0 && length == flen) { + unsigned long long i_size = i_size_read(&ctx->inode); + unsigned long long fpos = folio_pos(folio), end; + + end = umin(fpos + flen, i_size); + if (fpos < i_size && end > ctx->zero_point) + ctx->zero_point = end; + } + folio_wait_private_2(folio); /* [DEPRECATED] */ if (!folio_test_private(folio)) @@ -115,18 +125,34 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) /* We have a partially uptodate page from a streaming write. */ unsigned int fstart = finfo->dirty_offset; unsigned int fend = fstart + finfo->dirty_len; - unsigned int end = offset + length; + unsigned int iend = offset + length; if (offset >= fend) return; - if (end <= fstart) + if (iend <= fstart) + return; + + /* The invalidation region overlaps the data. If the region + * covers the start of the data, we either move along the start + * or just erase the data entirely. + */ + if (offset <= fstart) { + if (iend >= fend) + goto erase_completely; + /* Move the start of the data. */ + finfo->dirty_len = fend - iend; + finfo->dirty_offset = offset; + return; + } + + /* Reduce the length of the data if the invalidation region + * covers the tail part. + */ + if (iend >= fend) { + finfo->dirty_len = offset - fstart; return; - if (offset <= fstart && end >= fend) - goto erase_completely; - if (offset <= fstart && end > fstart) - goto reduce_len; - if (offset > fstart && end >= fend) - goto move_start; + } + /* A partial write was split. The caller has already zeroed * it, so just absorb the hole. */ @@ -139,12 +165,6 @@ erase_completely: folio_clear_uptodate(folio); kfree(finfo); return; -reduce_len: - finfo->dirty_len = offset + length - finfo->dirty_offset; - return; -move_start: - finfo->dirty_len -= offset - finfo->dirty_offset; - finfo->dirty_offset = offset; } EXPORT_SYMBOL(netfs_invalidate_folio); @@ -164,7 +184,7 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp) if (folio_test_dirty(folio)) return false; - end = folio_pos(folio) + folio_size(folio); + end = umin(folio_pos(folio) + folio_size(folio), i_size_read(&ctx->inode)); if (end > ctx->zero_point) ctx->zero_point = end; -- cgit From 950b03d0f664a54389a555d79215348ed413161f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Aug 2024 21:08:13 +0100 Subject: netfs: Fix missing iterator reset on retry of short read Fix netfs_rreq_perform_resubmissions() to reset before retrying a short read, otherwise the wrong part of the output buffer will be used. Fixes: 92b6cc5d1e7c ("netfs: Add iov_iters to (sub)requests to describe various buffers") Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240823200819.532106-6-dhowells@redhat.com cc: Steve French cc: Paulo Alcantara cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/netfs/io.c b/fs/netfs/io.c index 5367caf3fa28..4da0a494e860 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -313,6 +313,7 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq) netfs_reset_subreq_iter(rreq, subreq); netfs_read_from_server(rreq, subreq); } else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) { + netfs_reset_subreq_iter(rreq, subreq); netfs_rreq_short_read(rreq, subreq); } } -- cgit From e00e99ba6c6b8e5239e75cd6684a6827d93c39a2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 24 Aug 2024 12:56:53 +0100 Subject: netfs: Fix interaction of streaming writes with zero-point tracker When a folio that is marked for streaming write (dirty, but not uptodate, with partial content specified in the private data) is written back, the folio is effectively switched to the blank state upon completion of the write. This means that if we want to read it in future, we need to reread the whole folio. However, if the folio is above the zero_point position, when it is read back, it will just be cleared and the read skipped, leading to apparent local corruption. Fix this by increasing the zero_point to the end of the dirty data in the folio when clearing the folio state after writeback. This is analogous to the folio having ->release_folio() called upon it. This was causing the config.log generated by configuring a cpython tree on a cifs share to get corrupted because the scripts involved were appending text to the file in small pieces. Fixes: 288ace2f57c9 ("netfs: New writeback implementation") Signed-off-by: David Howells Link: https://lore.kernel.org/r/563286.1724500613@warthog.procyon.org.uk cc: Steve French cc: Paulo Alcantara cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/write_collect.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 426cf87aaf2e..ae7a2043f670 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -33,6 +33,7 @@ int netfs_folio_written_back(struct folio *folio) { enum netfs_folio_trace why = netfs_folio_trace_clear; + struct netfs_inode *ictx = netfs_inode(folio->mapping->host); struct netfs_folio *finfo; struct netfs_group *group = NULL; int gcount = 0; @@ -41,6 +42,12 @@ int netfs_folio_written_back(struct folio *folio) /* Streaming writes cannot be redirtied whilst under writeback, * so discard the streaming record. */ + unsigned long long fend; + + fend = folio_pos(folio) + finfo->dirty_offset + finfo->dirty_len; + if (fend > ictx->zero_point) + ictx->zero_point = fend; + folio_detach_private(folio); group = finfo->netfs_group; gcount++; -- cgit From d3204616a67e53fdcad14c7026869330fb382fd4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 23 Aug 2024 17:38:41 -0400 Subject: bcachefs: Fix failure to flush moves before sleeping in copygc This fixes an apparent deadlock - rebalance would get stuck trying to take nocow locks because they weren't being released by copygc. Signed-off-by: Kent Overstreet --- fs/bcachefs/movinggc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index deef4f024d20..d86565bf07c8 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -383,7 +383,7 @@ static int bch2_copygc_thread(void *arg) if (min_member_capacity == U64_MAX) min_member_capacity = 128 * 2048; - bch2_trans_unlock_long(ctxt.trans); + move_buckets_wait(&ctxt, buckets, true); bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6), MAX_SCHEDULE_TIMEOUT); } -- cgit From 49aa7830396bce33b00fa7ee734c35de36521138 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 23 Aug 2024 15:35:22 -0400 Subject: bcachefs: Fix rebalance_work accounting rebalance_work was keying off of the presence of rebelance_opts in the extent - but that was incorrect, we keep those around after rebalance for indirect extents since the inode's options are not directly available Fixes: 20ac515a9cc7 ("bcachefs: bch_acct_rebalance_work") Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 +- fs/bcachefs/buckets.c | 74 ++++++++++++++++++++++++++++--------------- fs/bcachefs/extents.c | 39 +++++++++++++++++++++++ fs/bcachefs/extents.h | 1 + fs/bcachefs/sb-downgrade.c | 8 ++++- 5 files changed, 98 insertions(+), 27 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index c75f2e0f32bb..14ce726bf5a3 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -677,7 +677,8 @@ struct bch_sb_field_ext { x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ x(disk_accounting_v2, BCH_VERSION(1, 9)) \ x(disk_accounting_v3, BCH_VERSION(1, 10)) \ - x(disk_accounting_inum, BCH_VERSION(1, 11)) + x(disk_accounting_inum, BCH_VERSION(1, 11)) \ + x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index be2bbd248631..a2274429e7f4 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -699,7 +699,8 @@ err: static int __trigger_extent(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c k, - enum btree_iter_update_trigger_flags flags) + enum btree_iter_update_trigger_flags flags, + s64 *replicas_sectors) { bool gc = flags & BTREE_TRIGGER_gc; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -708,7 +709,6 @@ static int __trigger_extent(struct btree_trans *trans, enum bch_data_type data_type = bkey_is_btree_ptr(k.k) ? BCH_DATA_btree : BCH_DATA_user; - s64 replicas_sectors = 0; int ret = 0; struct disk_accounting_pos acc_replicas_key = { @@ -739,7 +739,7 @@ static int __trigger_extent(struct btree_trans *trans, if (ret) return ret; } else if (!p.has_ec) { - replicas_sectors += disk_sectors; + *replicas_sectors += disk_sectors; acc_replicas_key.replicas.devs[acc_replicas_key.replicas.nr_devs++] = p.ptr.dev; } else { ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags); @@ -777,7 +777,7 @@ static int __trigger_extent(struct btree_trans *trans, } if (acc_replicas_key.replicas.nr_devs) { - ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, &replicas_sectors, 1, gc); + ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, replicas_sectors, 1, gc); if (ret) return ret; } @@ -787,7 +787,7 @@ static int __trigger_extent(struct btree_trans *trans, .type = BCH_DISK_ACCOUNTING_snapshot, .snapshot.id = k.k->p.snapshot, }; - ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, &replicas_sectors, 1, gc); + ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, replicas_sectors, 1, gc); if (ret) return ret; } @@ -807,7 +807,7 @@ static int __trigger_extent(struct btree_trans *trans, .type = BCH_DISK_ACCOUNTING_btree, .btree.id = btree_id, }; - ret = bch2_disk_accounting_mod(trans, &acc_btree_key, &replicas_sectors, 1, gc); + ret = bch2_disk_accounting_mod(trans, &acc_btree_key, replicas_sectors, 1, gc); if (ret) return ret; } else { @@ -819,22 +819,13 @@ static int __trigger_extent(struct btree_trans *trans, s64 v[3] = { insert ? 1 : -1, insert ? k.k->size : -((s64) k.k->size), - replicas_sectors, + *replicas_sectors, }; ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc); if (ret) return ret; } - if (bch2_bkey_rebalance_opts(k)) { - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_rebalance_work, - }; - ret = bch2_disk_accounting_mod(trans, &acc, &replicas_sectors, 1, gc); - if (ret) - return ret; - } - return 0; } @@ -843,6 +834,7 @@ int bch2_trigger_extent(struct btree_trans *trans, struct bkey_s_c old, struct bkey_s new, enum btree_iter_update_trigger_flags flags) { + struct bch_fs *c = trans->c; struct bkey_ptrs_c new_ptrs = bch2_bkey_ptrs_c(new.s_c); struct bkey_ptrs_c old_ptrs = bch2_bkey_ptrs_c(old); unsigned new_ptrs_bytes = (void *) new_ptrs.end - (void *) new_ptrs.start; @@ -858,21 +850,53 @@ int bch2_trigger_extent(struct btree_trans *trans, new_ptrs_bytes)) return 0; - if (flags & BTREE_TRIGGER_transactional) { - struct bch_fs *c = trans->c; - int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) - - (int) bch2_bkey_needs_rebalance(c, old); + if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { + s64 old_replicas_sectors = 0, new_replicas_sectors = 0; + + if (old.k->type) { + int ret = __trigger_extent(trans, btree, level, old, + flags & ~BTREE_TRIGGER_insert, + &old_replicas_sectors); + if (ret) + return ret; + } + + if (new.k->type) { + int ret = __trigger_extent(trans, btree, level, new.s_c, + flags & ~BTREE_TRIGGER_overwrite, + &new_replicas_sectors); + if (ret) + return ret; + } + + int need_rebalance_delta = 0; + s64 need_rebalance_sectors_delta = 0; + + s64 s = bch2_bkey_sectors_need_rebalance(c, old); + need_rebalance_delta -= s != 0; + need_rebalance_sectors_delta -= s; - if (mod) { + s = bch2_bkey_sectors_need_rebalance(c, old); + need_rebalance_delta += s != 0; + need_rebalance_sectors_delta += s; + + if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) { int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, - new.k->p, mod > 0); + new.k->p, need_rebalance_delta > 0); if (ret) return ret; } - } - if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) - return trigger_run_overwrite_then_insert(__trigger_extent, trans, btree, level, old, new, flags); + if (need_rebalance_sectors_delta) { + struct disk_accounting_pos acc = { + .type = BCH_DISK_ACCOUNTING_rebalance_work, + }; + int ret = bch2_disk_accounting_mod(trans, &acc, &need_rebalance_sectors_delta, 1, + flags & BTREE_TRIGGER_gc); + if (ret) + return ret; + } + } return 0; } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 9406f82fc255..e317df3644a1 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1379,6 +1379,45 @@ bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k) return r != NULL; } +static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k, + unsigned target, unsigned compression) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + u64 sectors = 0; + + if (compression) { + unsigned compression_type = bch2_compression_opt_to_type(compression); + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || + p.ptr.unwritten) { + sectors = 0; + goto incompressible; + } + + if (!p.ptr.cached && p.crc.compression_type != compression_type) + sectors += p.crc.compressed_size; + } + } +incompressible: + if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, target)) + sectors += p.crc.compressed_size; + } + + return sectors; +} + +u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) +{ + const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); + + return r ? __bch2_bkey_sectors_need_rebalance(c, k, r->target, r->compression) : 0; +} + int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, struct bch_io_opts *opts) { diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 1a6ddee48041..709dd83183be 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -692,6 +692,7 @@ const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c); unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c, unsigned, unsigned); bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c); +u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *, struct bch_io_opts *); diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 650a1f77ca40..c7e4cdd3f6a5 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -74,6 +74,9 @@ BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ BCH_FSCK_ERR_accounting_key_junk_at_end) \ x(disk_accounting_inum, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_accounting_mismatch) \ + x(rebalance_work_acct_fix, \ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ BCH_FSCK_ERR_accounting_mismatch) @@ -108,7 +111,10 @@ BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ BCH_FSCK_ERR_fs_usage_replicas_wrong, \ BCH_FSCK_ERR_accounting_replicas_not_marked, \ - BCH_FSCK_ERR_bkey_version_in_future) + BCH_FSCK_ERR_bkey_version_in_future) \ + x(rebalance_work_acct_fix, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_accounting_mismatch) struct upgrade_downgrade_entry { u64 recovery_passes; -- cgit From 128f71fe014fc91efa1407ce549f94a9a9f1072c Mon Sep 17 00:00:00 2001 From: Huang-Huang Bao Date: Tue, 9 Jul 2024 18:54:28 +0800 Subject: pinctrl: rockchip: correct RK3328 iomux width flag for GPIO2-B pins The base iomux offsets for each GPIO pin line are accumulatively calculated based off iomux width flag in rockchip_pinctrl_get_soc_data. If the iomux width flag is one of IOMUX_WIDTH_4BIT, IOMUX_WIDTH_3BIT or IOMUX_WIDTH_2BIT, the base offset for next pin line would increase by 8 bytes, otherwise it would increase by 4 bytes. Despite most of GPIO2-B iomux have 2-bit data width, which can be fit into 4 bytes space with write mask, it actually take 8 bytes width for whole GPIO2-B line. Commit e8448a6c817c ("pinctrl: rockchip: fix pinmux bits for RK3328 GPIO2-B pins") wrongly set iomux width flag to 0, causing all base iomux offset for line after GPIO2-B to be calculated wrong. Fix the iomux width flag to IOMUX_WIDTH_2BIT so the offset after GPIO2-B is correctly increased by 8, matching the actual width of GPIO2-B iomux. Fixes: e8448a6c817c ("pinctrl: rockchip: fix pinmux bits for RK3328 GPIO2-B pins") Cc: stable@vger.kernel.org Reported-by: Richard Kojedzinszky Closes: https://lore.kernel.org/linux-rockchip/4f29b743202397d60edfb3c725537415@kojedz.in/ Tested-by: Richard Kojedzinszky Signed-off-by: Huang-Huang Bao Reviewed-by: Heiko Stuebner Tested-by: Daniel Golle Tested-by: Trevor Woerner Link: https://lore.kernel.org/20240709105428.1176375-1-i@eh5.me Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-rockchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 0eacaf10c640..6878bc86faa2 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -3795,7 +3795,7 @@ static struct rockchip_pin_bank rk3328_pin_banks[] = { PIN_BANK_IOMUX_FLAGS(0, 32, "gpio0", 0, 0, 0, 0), PIN_BANK_IOMUX_FLAGS(1, 32, "gpio1", 0, 0, 0, 0), PIN_BANK_IOMUX_FLAGS(2, 32, "gpio2", 0, - 0, + IOMUX_WIDTH_2BIT, IOMUX_WIDTH_3BIT, 0), PIN_BANK_IOMUX_FLAGS(3, 32, "gpio3", -- cgit From 5be63fc19fcaa4c236b307420483578a56986a37 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Aug 2024 19:07:11 +1200 Subject: Linux 6.11-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2c1db7a6f793..7b60eb103c5d 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* -- cgit From c724b2ab6a46435b4e7d58ad2fbbdb7a318823cf Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 21 Aug 2024 17:18:23 +0200 Subject: smb/client: avoid dereferencing rdata=NULL in smb2_new_read_req() This happens when called from SMB2_read() while using rdma and reaching the rdma_readwrite_threshold. Cc: stable@vger.kernel.org Fixes: a6559cc1d35d ("cifs: split out smb3_use_rdma_offload() helper") Reviewed-by: David Howells Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 83facb54276a..8901de199a6b 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4441,7 +4441,7 @@ smb2_new_read_req(void **buf, unsigned int *total_len, * If we want to do a RDMA write, fill in and append * smbd_buffer_descriptor_v1 to the end of read request */ - if (smb3_use_rdma_offload(io_parms)) { + if (rdata && smb3_use_rdma_offload(io_parms)) { struct smbd_buffer_descriptor_v1 *v1; bool need_invalidate = server->dialect == SMB30_PROT_ID; -- cgit From b608e2c318789aeba49055747166e13bee57df4a Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 21 Aug 2024 15:59:12 +0200 Subject: smb/client: remove unused rq_iter_size from struct smb_rqst Reviewed-by: David Howells Fixes: d08089f649a0 ("cifs: Change the I/O paths to use an iterator rather than a page list") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 1 - fs/smb/client/cifssmb.c | 1 - fs/smb/client/smb2ops.c | 2 -- fs/smb/client/smb2pdu.c | 2 -- 4 files changed, 6 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 7ebe80a25d04..f379b9dc93ba 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -254,7 +254,6 @@ struct cifs_open_info_data { struct smb_rqst { struct kvec *rq_iov; /* array of kvecs */ unsigned int rq_nvec; /* number of kvecs in array */ - size_t rq_iter_size; /* Amount of data in ->rq_iter */ struct iov_iter rq_iter; /* Data iterator */ struct xarray rq_buffer; /* Page buffer for encryption */ }; diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 595c4b673707..6dce70f17208 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1713,7 +1713,6 @@ cifs_async_writev(struct cifs_io_subrequest *wdata) rqst.rq_iov = iov; rqst.rq_nvec = 2; rqst.rq_iter = wdata->subreq.io_iter; - rqst.rq_iter_size = iov_iter_count(&wdata->subreq.io_iter); cifs_dbg(FYI, "async write at %llu %zu bytes\n", wdata->subreq.start, wdata->subreq.len); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 322cabc69c6f..ea298456d841 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4446,7 +4446,6 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, } iov_iter_xarray(&new->rq_iter, ITER_SOURCE, buffer, 0, size); - new->rq_iter_size = size; } } @@ -4492,7 +4491,6 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf, rqst.rq_nvec = 2; if (iter) { rqst.rq_iter = *iter; - rqst.rq_iter_size = iov_iter_count(iter); iter_size = iov_iter_count(iter); } diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 8901de199a6b..63a2541d4a05 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4523,7 +4523,6 @@ smb2_readv_callback(struct mid_q_entry *mid) if (rdata->got_bytes) { rqst.rq_iter = rdata->subreq.io_iter; - rqst.rq_iter_size = iov_iter_count(&rdata->subreq.io_iter); } WARN_ONCE(rdata->server != mid->server, @@ -4975,7 +4974,6 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) rqst.rq_iov = iov; rqst.rq_nvec = 1; rqst.rq_iter = wdata->subreq.io_iter; - rqst.rq_iter_size = iov_iter_count(&rqst.rq_iter); if (test_bit(NETFS_SREQ_RETRYING, &wdata->subreq.flags)) smb2_set_replay(server, &rqst); #ifdef CONFIG_CIFS_SMB_DIRECT -- cgit From 017d1701743657fbfaea74397727a9d2b81846b7 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 21 Aug 2024 16:31:39 +0200 Subject: smb/client: fix rdma usage in smb2_async_writev() rqst.rq_iter needs to be truncated otherwise we'll also send the bytes into the stream socket... This is the logic behind rqst.rq_npages = 0, which was removed in "cifs: Change the I/O paths to use an iterator rather than a page list" (d08089f649a0cfb2099c8551ac47eef0cc23fdf2). Cc: stable@vger.kernel.org Fixes: d08089f649a0 ("cifs: Change the I/O paths to use an iterator rather than a page list") Reviewed-by: David Howells Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 63a2541d4a05..2d7e6c42cf18 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4913,6 +4913,13 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) if (rc) goto out; + rqst.rq_iov = iov; + rqst.rq_iter = wdata->subreq.io_iter; + + rqst.rq_iov[0].iov_len = total_len - 1; + rqst.rq_iov[0].iov_base = (char *)req; + rqst.rq_nvec += 1; + if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -4924,6 +4931,7 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) req->WriteChannelInfoOffset = 0; req->WriteChannelInfoLength = 0; req->Channel = SMB2_CHANNEL_NONE; + req->Length = cpu_to_le32(io_parms->length); req->Offset = cpu_to_le64(io_parms->offset); req->DataOffset = cpu_to_le16( offsetof(struct smb2_write_req, Buffer)); @@ -4943,7 +4951,6 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) */ if (smb3_use_rdma_offload(io_parms)) { struct smbd_buffer_descriptor_v1 *v1; - size_t data_size = iov_iter_count(&wdata->subreq.io_iter); bool need_invalidate = server->dialect == SMB30_PROT_ID; wdata->mr = smbd_register_mr(server->smbd_conn, &wdata->subreq.io_iter, @@ -4952,9 +4959,10 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) rc = -EAGAIN; goto async_writev_out; } + /* For RDMA read, I/O size is in RemainingBytes not in Length */ + req->RemainingBytes = req->Length; req->Length = 0; req->DataOffset = 0; - req->RemainingBytes = cpu_to_le32(data_size); req->Channel = SMB2_CHANNEL_RDMA_V1_INVALIDATE; if (need_invalidate) req->Channel = SMB2_CHANNEL_RDMA_V1; @@ -4966,30 +4974,22 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) v1->offset = cpu_to_le64(wdata->mr->mr->iova); v1->token = cpu_to_le32(wdata->mr->mr->rkey); v1->length = cpu_to_le32(wdata->mr->mr->length); + + rqst.rq_iov[0].iov_len += sizeof(*v1); + + /* + * We keep wdata->subreq.io_iter, + * but we have to truncate rqst.rq_iter + */ + iov_iter_truncate(&rqst.rq_iter, 0); } #endif - iov[0].iov_len = total_len - 1; - iov[0].iov_base = (char *)req; - rqst.rq_iov = iov; - rqst.rq_nvec = 1; - rqst.rq_iter = wdata->subreq.io_iter; if (test_bit(NETFS_SREQ_RETRYING, &wdata->subreq.flags)) smb2_set_replay(server, &rqst); -#ifdef CONFIG_CIFS_SMB_DIRECT - if (wdata->mr) - iov[0].iov_len += sizeof(struct smbd_buffer_descriptor_v1); -#endif - cifs_dbg(FYI, "async write at %llu %u bytes iter=%zx\n", - io_parms->offset, io_parms->length, iov_iter_count(&rqst.rq_iter)); -#ifdef CONFIG_CIFS_SMB_DIRECT - /* For RDMA read, I/O size is in RemainingBytes not in Length */ - if (!wdata->mr) - req->Length = cpu_to_le32(io_parms->length); -#else - req->Length = cpu_to_le32(io_parms->length); -#endif + cifs_dbg(FYI, "async write at %llu %u bytes iter=%zx\n", + io_parms->offset, io_parms->length, iov_iter_count(&wdata->subreq.io_iter)); if (wdata->credits.value > 0) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->subreq.len, -- cgit From 416871f4fb84bc96822562e654941d5625a25bf8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Aug 2024 14:22:42 +0100 Subject: cifs: Fix FALLOC_FL_PUNCH_HOLE support The cifs filesystem doesn't quite emulate FALLOC_FL_PUNCH_HOLE correctly (note that due to lack of protocol support, it can't actually implement it directly). Whilst it will (partially) invalidate dirty folios in the pagecache, it doesn't write them back first, and so the EOF marker on the server may be lower than inode->i_size. This presents a problem, however, as if the punched hole invalidates the tail of the locally cached dirty data, writeback won't know it needs to move the EOF over to account for the hole punch (which isn't supposed to move the EOF). We could just write zeroes over the punched out region of the pagecache and write that back - but this is supposed to be a deallocatory operation. Fix this by manually moving the EOF over on the server after the operation if the hole punched would corrupt it. Note that the FSCTL_SET_ZERO_DATA RPC and the setting of the EOF should probably be compounded to stop a third party interfering (or, at least, massively reduce the chance). This was reproducible occasionally by using fsx with the following script: truncate 0x0 0x375e2 0x0 punch_hole 0x2f6d3 0x6ab5 0x375e2 truncate 0x0 0x3a71f 0x375e2 mapread 0xee05 0xcf12 0x3a71f write 0x2078e 0x5604 0x3a71f write 0x3ebdf 0x1421 0x3a71f * punch_hole 0x379d0 0x8630 0x40000 * mapread 0x2aaa2 0x85b 0x40000 fallocate 0x1b401 0x9ada 0x40000 read 0x15f2 0x7d32 0x40000 read 0x32f37 0x7a3b 0x40000 * The second "write" should extend the EOF to 0x40000, and the "punch_hole" should operate inside of that - but that depends on whether the VM gets in and writes back the data first. If it doesn't, the file ends up 0x3a71f in size, not 0x40000. Fixes: 31742c5a3317 ("enable fallocate punch hole ("fallocate -p") for SMB3") Signed-off-by: David Howells cc: Steve French cc: Paulo Alcantara cc: Shyam Prasad N cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index ea298456d841..0b9cb1a60d4a 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3305,6 +3305,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, struct inode *inode = file_inode(file); struct cifsFileInfo *cfile = file->private_data; struct file_zero_data_information fsctl_buf; + unsigned long long end = offset + len, i_size, remote_i_size; long rc; unsigned int xid; __u8 set_sparse = 1; @@ -3336,6 +3337,27 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, (char *)&fsctl_buf, sizeof(struct file_zero_data_information), CIFSMaxBufSize, NULL, NULL); + + if (rc) + goto unlock; + + /* If there's dirty data in the buffer that would extend the EOF if it + * were written, then we need to move the EOF marker over to the lower + * of the high end of the hole and the proposed EOF. The problem is + * that we locally hole-punch the tail of the dirty data, the proposed + * EOF update will end up in the wrong place. + */ + i_size = i_size_read(inode); + remote_i_size = netfs_inode(inode)->remote_i_size; + if (end > remote_i_size && i_size > remote_i_size) { + unsigned long long extend_to = umin(end, i_size); + rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, cfile->pid, extend_to); + if (rc >= 0) + netfs_inode(inode)->remote_i_size = extend_to; + } + +unlock: filemap_invalidate_unlock(inode->i_mapping); out: inode_unlock(inode); -- cgit From 58aec91efb93338d1cc7acc0a93242613a2a4e5f Mon Sep 17 00:00:00 2001 From: Miao Wang Date: Sun, 25 Aug 2024 22:17:39 +0800 Subject: LoongArch: Remove the unused dma-direct.h dma-direct.h is introduced in commit d4b6f1562a3c3284 ("LoongArch: Add Non-Uniform Memory Access (NUMA) support"). In commit c78c43fe7d42524c ("LoongArch: Use acpi_arch_dma_setup() and remove ARCH_HAS_PHYS_TO_DMA"), ARCH_HAS_PHYS_TO_DMA was deselected and the coresponding phys_to_dma()/ dma_to_phys() functions were removed. However, the unused dma-direct.h was left behind, which is removed by this patch. Cc: Fixes: c78c43fe7d42 ("LoongArch: Use acpi_arch_dma_setup() and remove ARCH_HAS_PHYS_TO_DMA") Signed-off-by: Miao Wang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/dma-direct.h | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 arch/loongarch/include/asm/dma-direct.h diff --git a/arch/loongarch/include/asm/dma-direct.h b/arch/loongarch/include/asm/dma-direct.h deleted file mode 100644 index 75ccd808a2af..000000000000 --- a/arch/loongarch/include/asm/dma-direct.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2020-2022 Loongson Technology Corporation Limited - */ -#ifndef _LOONGARCH_DMA_DIRECT_H -#define _LOONGARCH_DMA_DIRECT_H - -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); - -#endif /* _LOONGARCH_DMA_DIRECT_H */ -- cgit From 44ceabdec12f4e5938f5668c5a691aa3aac703d7 Mon Sep 17 00:00:00 2001 From: YOUNGJIN JOO Date: Sun, 25 Aug 2024 18:25:15 +0900 Subject: ALSA: hda/realtek: Fix the speaker output on Samsung Galaxy Book3 Ultra 144d:c1cc requires the same workaround to enable the speaker amp as other Samsung models with the ALC298 codec. Signed-off-by: YOUNGJIN JOO Cc: Link: https://patch.msgid.link/20240825092515.28728-1-neoelec@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b5cc3417138c..c04eac6a5064 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10540,6 +10540,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xca03, "Samsung Galaxy Book2 Pro 360 (NP930QED)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc868, "Samsung Galaxy Book2 Pro (NP930XED)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc1ca, "Samsung Galaxy Book3 Pro 360 (NP960QFG-KB1US)", ALC298_FIXUP_SAMSUNG_AMP2), + SND_PCI_QUIRK(0x144d, 0xc1cc, "Samsung Galaxy Book3 Ultra (NT960XFH-XD92G))", ALC298_FIXUP_SAMSUNG_AMP2), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), -- cgit From 25dfc9e357af8aed1ca79b318a73f2c59c1f0b2b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 19 Aug 2024 11:30:04 -0700 Subject: perf/x86/intel: Limit the period on Haswell Running the ltp test cve-2015-3290 concurrently reports the following warnings. perfevents: irq loop stuck! WARNING: CPU: 31 PID: 32438 at arch/x86/events/intel/core.c:3174 intel_pmu_handle_irq+0x285/0x370 Call Trace: ? __warn+0xa4/0x220 ? intel_pmu_handle_irq+0x285/0x370 ? __report_bug+0x123/0x130 ? intel_pmu_handle_irq+0x285/0x370 ? __report_bug+0x123/0x130 ? intel_pmu_handle_irq+0x285/0x370 ? report_bug+0x3e/0xa0 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x18/0x50 ? asm_exc_invalid_op+0x1a/0x20 ? irq_work_claim+0x1e/0x40 ? intel_pmu_handle_irq+0x285/0x370 perf_event_nmi_handler+0x3d/0x60 nmi_handle+0x104/0x330 Thanks to Thomas Gleixner's analysis, the issue is caused by the low initial period (1) of the frequency estimation algorithm, which triggers the defects of the HW, specifically erratum HSW11 and HSW143. (For the details, please refer https://lore.kernel.org/lkml/87plq9l5d2.ffs@tglx/) The HSW11 requires a period larger than 100 for the INST_RETIRED.ALL event, but the initial period in the freq mode is 1. The erratum is the same as the BDM11, which has been supported in the kernel. A minimum period of 128 is enforced as well on HSW. HSW143 is regarding that the fixed counter 1 may overcount 32 with the Hyper-Threading is enabled. However, based on the test, the hardware has more issues than it tells. Besides the fixed counter 1, the message 'interrupt took too long' can be observed on any counter which was armed with a period < 32 and two events expired in the same NMI. A minimum period of 32 is enforced for the rest of the events. The recommended workaround code of the HSW143 is not implemented. Because it only addresses the issue for the fixed counter. It brings extra overhead through extra MSR writing. No related overcounting issue has been reported so far. Fixes: 3a632cb229bf ("perf/x86/intel: Add simple Haswell PMU support") Reported-by: Li Huafei Suggested-by: Thomas Gleixner Signed-off-by: Kan Liang Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240819183004.3132920-1-kan.liang@linux.intel.com Closes: https://lore.kernel.org/lkml/20240729223328.327835-1-lihuafei1@huawei.com/ --- arch/x86/events/intel/core.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 0c9c2706d4ec..9e519d8a810a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4589,6 +4589,25 @@ static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void) return HYBRID_INTEL_CORE; } +static inline bool erratum_hsw11(struct perf_event *event) +{ + return (event->hw.config & INTEL_ARCH_EVENT_MASK) == + X86_CONFIG(.event=0xc0, .umask=0x01); +} + +/* + * The HSW11 requires a period larger than 100 which is the same as the BDM11. + * A minimum period of 128 is enforced as well for the INST_RETIRED.ALL. + * + * The message 'interrupt took too long' can be observed on any counter which + * was armed with a period < 32 and two events expired in the same NMI. + * A minimum period of 32 is enforced for the rest of the events. + */ +static void hsw_limit_period(struct perf_event *event, s64 *left) +{ + *left = max(*left, erratum_hsw11(event) ? 128 : 32); +} + /* * Broadwell: * @@ -4606,8 +4625,7 @@ static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void) */ static void bdw_limit_period(struct perf_event *event, s64 *left) { - if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == - X86_CONFIG(.event=0xc0, .umask=0x01)) { + if (erratum_hsw11(event)) { if (*left < 128) *left = 128; *left &= ~0x3fULL; @@ -6766,6 +6784,7 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; + x86_pmu.limit_period = hsw_limit_period; x86_pmu.lbr_double_abort = true; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; -- cgit From 2d3447261031503b181dacc549fe65ffe2d93d65 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 21 Aug 2024 15:53:18 -0400 Subject: btrfs: run delayed iputs when flushing delalloc We have transient failures with btrfs/301, specifically in the part where we do for i in $(seq 0 10); do write 50m to file rm -f file done Sometimes this will result in a transient quota error, and it's because sometimes we start writeback on the file which results in a delayed iput, and thus the rm doesn't actually clean the file up. When we're flushing the quota space we need to run the delayed iputs to make sure all the unlinks that we think have completed have actually completed. This removes the small window where we could fail to find enough space in our quota. CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 5d57a285d59b..7d6f5d9420ec 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4185,6 +4185,8 @@ static int try_flush_qgroup(struct btrfs_root *root) return 0; } + btrfs_run_delayed_iputs(root->fs_info); + btrfs_wait_on_delayed_iputs(root->fs_info); ret = btrfs_start_delalloc_snapshot(root, true); if (ret < 0) goto out; -- cgit From 9efaebc0072b8e95505544bf385c20ee8a29d799 Mon Sep 17 00:00:00 2001 From: Ross Brown Date: Tue, 30 Jul 2024 08:21:42 +0200 Subject: hwmon: (asus-ec-sensors) remove VRM temp X570-E GAMING X570-E GAMING does not have VRM temperature sensor. Signed-off-by: Ross Brown Signed-off-by: Eugene Shalygin Link: https://lore.kernel.org/r/20240730062320.5188-2-eugene.shalygin@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/asus-ec-sensors.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index 6bb8d7b1d219..ee396f21fac5 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -420,7 +420,7 @@ static const struct ec_board_info board_info_strix_b550_i_gaming = { static const struct ec_board_info board_info_strix_x570_e_gaming = { .sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB | - SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM | + SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE, .mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX, -- cgit From 0075df288dd8a7abfe03b3766176c393063591dd Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 29 Jul 2024 08:33:27 +0300 Subject: microblaze: don't treat zero reserved memory regions as error Before commit 721f4a6526da ("mm/memblock: remove empty dummy entry") the check for non-zero of memblock.reserved.cnt in mmu_init() would always be true either because memblock.reserved.cnt is initialized to 1 or because there were memory reservations earlier. The removal of dummy empty entry in memblock caused this check to fail because now memblock.reserved.cnt is initialized to 0. Remove the check for non-zero of memblock.reserved.cnt because it's perfectly fine to have an empty memblock.reserved array that early in boot. Reported-by: Guenter Roeck Signed-off-by: Mike Rapoport Reviewed-by: Wei Yang Tested-by: Guenter Roeck Link: https://lore.kernel.org/r/20240729053327.4091459-1-rppt@kernel.org Signed-off-by: Guenter Roeck --- arch/microblaze/mm/init.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 3827dc76edd8..4520c5741579 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -193,11 +193,6 @@ asmlinkage void __init mmu_init(void) { unsigned int kstart, ksize; - if (!memblock.reserved.cnt) { - pr_emerg("Error memory count\n"); - machine_restart(NULL); - } - if ((u32) memblock.memory.regions[0].size < 0x400000) { pr_emerg("Memory must be greater than 4MB\n"); machine_restart(NULL); -- cgit From 5a4c785905fd9361d067127b42564c08893f2a6f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 5 Aug 2024 18:13:41 -0700 Subject: Revert "MIPS: csrc-r4k: Apply verification clocksource flags" This reverts commit 7190401fc56fb5f02ee3d04476778ab000bbaf32. Verifying the clock source sometimes deems the MIPS clock to be unstable, at least in qemu. clocksource: timekeeping watchdog on CPU0: Marking clocksource 'MIPS' as unstable because the skew is too large: clocksource: 'jiffies' wd_nsec: 500000000 wd_now: ffff8bde wd_last: ffff8bac mask: ffffffff clocksource: 'MIPS' cs_nsec: 940634468 cs_now: 310181c4 cs_last: 28090a09 mask: ffffffff clocksource: Clocksource 'MIPS' skewed 440634468 ns (440 ms) over watchdog 'jiffies' interval of 500000000 ns (500 ms) clocksource: 'MIPS' is current clocksource. If this happens, network interfaces fail to come online. Signed-off-by: Guenter Roeck --- arch/mips/kernel/csrc-r4k.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c index bdb1fa8931f4..59eca397f297 100644 --- a/arch/mips/kernel/csrc-r4k.c +++ b/arch/mips/kernel/csrc-r4k.c @@ -21,9 +21,7 @@ static struct clocksource clocksource_mips = { .name = "MIPS", .read = c0_hpt_read, .mask = CLOCKSOURCE_MASK(32), - .flags = CLOCK_SOURCE_IS_CONTINUOUS | - CLOCK_SOURCE_MUST_VERIFY | - CLOCK_SOURCE_VERIFY_PERCPU, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; static u64 __maybe_unused notrace r4k_read_sched_clock(void) -- cgit From 98c0cc48e27e9d269a3e4db2acd72b486c88ec77 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 8 Aug 2024 08:50:03 -0700 Subject: apparmor: fix policy_unpack_test on big endian systems policy_unpack_test fails on big endian systems because data byte order is expected to be little endian but is generated in host byte order. This results in test failures such as: # policy_unpack_test_unpack_array_with_null_name: EXPECTATION FAILED at security/apparmor/policy_unpack_test.c:150 Expected array_size == (u16)16, but array_size == 4096 (0x1000) (u16)16 == 16 (0x10) # policy_unpack_test_unpack_array_with_null_name: pass:0 fail:1 skip:0 total:1 not ok 3 policy_unpack_test_unpack_array_with_null_name # policy_unpack_test_unpack_array_with_name: EXPECTATION FAILED at security/apparmor/policy_unpack_test.c:164 Expected array_size == (u16)16, but array_size == 4096 (0x1000) (u16)16 == 16 (0x10) # policy_unpack_test_unpack_array_with_name: pass:0 fail:1 skip:0 total:1 Add the missing endianness conversions when generating test data. Fixes: 4d944bcd4e73 ("apparmor: add AppArmor KUnit tests for policy unpack") Cc: Brendan Higgins Cc: Kees Cook Signed-off-by: Guenter Roeck --- security/apparmor/policy_unpack_test.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/security/apparmor/policy_unpack_test.c b/security/apparmor/policy_unpack_test.c index 874fcf97794e..c64733d6c98f 100644 --- a/security/apparmor/policy_unpack_test.c +++ b/security/apparmor/policy_unpack_test.c @@ -80,14 +80,14 @@ static struct aa_ext *build_aa_ext_struct(struct policy_unpack_fixture *puf, *(buf + 1) = strlen(TEST_U32_NAME) + 1; strscpy(buf + 3, TEST_U32_NAME, e->end - (void *)(buf + 3)); *(buf + 3 + strlen(TEST_U32_NAME) + 1) = AA_U32; - *((u32 *)(buf + 3 + strlen(TEST_U32_NAME) + 2)) = TEST_U32_DATA; + *((__le32 *)(buf + 3 + strlen(TEST_U32_NAME) + 2)) = cpu_to_le32(TEST_U32_DATA); buf = e->start + TEST_NAMED_U64_BUF_OFFSET; *buf = AA_NAME; *(buf + 1) = strlen(TEST_U64_NAME) + 1; strscpy(buf + 3, TEST_U64_NAME, e->end - (void *)(buf + 3)); *(buf + 3 + strlen(TEST_U64_NAME) + 1) = AA_U64; - *((u64 *)(buf + 3 + strlen(TEST_U64_NAME) + 2)) = TEST_U64_DATA; + *((__le64 *)(buf + 3 + strlen(TEST_U64_NAME) + 2)) = cpu_to_le64(TEST_U64_DATA); buf = e->start + TEST_NAMED_BLOB_BUF_OFFSET; *buf = AA_NAME; @@ -103,7 +103,7 @@ static struct aa_ext *build_aa_ext_struct(struct policy_unpack_fixture *puf, *(buf + 1) = strlen(TEST_ARRAY_NAME) + 1; strscpy(buf + 3, TEST_ARRAY_NAME, e->end - (void *)(buf + 3)); *(buf + 3 + strlen(TEST_ARRAY_NAME) + 1) = AA_ARRAY; - *((u16 *)(buf + 3 + strlen(TEST_ARRAY_NAME) + 2)) = TEST_ARRAY_SIZE; + *((__le16 *)(buf + 3 + strlen(TEST_ARRAY_NAME) + 2)) = cpu_to_le16(TEST_ARRAY_SIZE); return e; } -- cgit From aba07b9a0587f50e5d3346eaa19019cf3f86c0ea Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 16 Aug 2024 14:32:05 -0400 Subject: drm/vmwgfx: Prevent unmapping active read buffers The kms paths keep a persistent map active to read and compare the cursor buffer. These maps can race with each other in simple scenario where: a) buffer "a" mapped for update b) buffer "a" mapped for compare c) do the compare d) unmap "a" for compare e) update the cursor f) unmap "a" for update At step "e" the buffer has been unmapped and the read contents is bogus. Prevent unmapping of active read buffers by simply keeping a count of how many paths have currently active maps and unmap only when the count reaches 0. Fixes: 485d98d472d5 ("drm/vmwgfx: Add support for CursorMob and CursorBypass 4") Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: # v5.19+ Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20240816183332.31961-2-zack.rusin@broadcom.com Reviewed-by: Martin Krastev Reviewed-by: Maaz Mombasawala --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 13 +++++++++++-- drivers/gpu/drm/vmwgfx/vmwgfx_bo.h | 3 +++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index f42ebc4a7c22..a0e433fbcba6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -360,6 +360,8 @@ void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size) void *virtual; int ret; + atomic_inc(&vbo->map_count); + virtual = ttm_kmap_obj_virtual(&vbo->map, ¬_used); if (virtual) return virtual; @@ -383,11 +385,17 @@ void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size) */ void vmw_bo_unmap(struct vmw_bo *vbo) { + int map_count; + if (vbo->map.bo == NULL) return; - ttm_bo_kunmap(&vbo->map); - vbo->map.bo = NULL; + map_count = atomic_dec_return(&vbo->map_count); + + if (!map_count) { + ttm_bo_kunmap(&vbo->map); + vbo->map.bo = NULL; + } } @@ -421,6 +429,7 @@ static int vmw_bo_init(struct vmw_private *dev_priv, vmw_bo->tbo.priority = 3; vmw_bo->res_tree = RB_ROOT; xa_init(&vmw_bo->detached_resources); + atomic_set(&vmw_bo->map_count, 0); params->size = ALIGN(params->size, PAGE_SIZE); drm_gem_private_object_init(vdev, &vmw_bo->tbo.base, params->size); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index 62b4342d5f7c..43b5439ec9f7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -71,6 +71,8 @@ struct vmw_bo_params { * @map: Kmap object for semi-persistent mappings * @res_tree: RB tree of resources using this buffer object as a backing MOB * @res_prios: Eviction priority counts for attached resources + * @map_count: The number of currently active maps. Will differ from the + * cpu_writers because it includes kernel maps. * @cpu_writers: Number of synccpu write grabs. Protected by reservation when * increased. May be decreased without reservation. * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB @@ -90,6 +92,7 @@ struct vmw_bo { u32 res_prios[TTM_MAX_BO_PRIORITY]; struct xarray detached_resources; + atomic_t map_count; atomic_t cpu_writers; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx; -- cgit From 50f1199250912568606b3778dc56646c10cb7b04 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 16 Aug 2024 14:32:06 -0400 Subject: drm/vmwgfx: Fix prime with external buffers Make sure that for external buffers mapping goes through the dma_buf interface instead of trying to access pages directly. External buffers might not provide direct access to readable/writable pages so to make sure the bo's created from external dma_bufs can be read dma_buf interface has to be used. Fixes crashes in IGT's kms_prime with vgem. Regular desktop usage won't trigger this due to the fact that virtual machines will not have multiple GPUs but it enables better test coverage in IGT. Signed-off-by: Zack Rusin Fixes: b32233acceff ("drm/vmwgfx: Fix prime import/export") Cc: # v6.6+ Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: # v6.9+ Link: https://patchwork.freedesktop.org/patch/msgid/20240816183332.31961-3-zack.rusin@broadcom.com Reviewed-by: Martin Krastev Reviewed-by: Maaz Mombasawala --- drivers/gpu/drm/vmwgfx/vmwgfx_blit.c | 114 +++++++++++++++++++++++++++++++++-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 12 ++-- 3 files changed, 118 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c index 717d624e9a05..890a66a2361f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c @@ -27,6 +27,8 @@ **************************************************************************/ #include "vmwgfx_drv.h" + +#include "vmwgfx_bo.h" #include /* @@ -420,13 +422,105 @@ static int vmw_bo_cpu_blit_line(struct vmw_bo_blit_line_data *d, return 0; } +static void *map_external(struct vmw_bo *bo, struct iosys_map *map) +{ + struct vmw_private *vmw = + container_of(bo->tbo.bdev, struct vmw_private, bdev); + void *ptr = NULL; + int ret; + + if (bo->tbo.base.import_attach) { + ret = dma_buf_vmap(bo->tbo.base.dma_buf, map); + if (ret) { + drm_dbg_driver(&vmw->drm, + "Wasn't able to map external bo!\n"); + goto out; + } + ptr = map->vaddr; + } else { + ptr = vmw_bo_map_and_cache(bo); + } + +out: + return ptr; +} + +static void unmap_external(struct vmw_bo *bo, struct iosys_map *map) +{ + if (bo->tbo.base.import_attach) + dma_buf_vunmap(bo->tbo.base.dma_buf, map); + else + vmw_bo_unmap(bo); +} + +static int vmw_external_bo_copy(struct vmw_bo *dst, u32 dst_offset, + u32 dst_stride, struct vmw_bo *src, + u32 src_offset, u32 src_stride, + u32 width_in_bytes, u32 height, + struct vmw_diff_cpy *diff) +{ + struct vmw_private *vmw = + container_of(dst->tbo.bdev, struct vmw_private, bdev); + size_t dst_size = dst->tbo.resource->size; + size_t src_size = src->tbo.resource->size; + struct iosys_map dst_map = {0}; + struct iosys_map src_map = {0}; + int ret, i; + int x_in_bytes; + u8 *vsrc; + u8 *vdst; + + vsrc = map_external(src, &src_map); + if (!vsrc) { + drm_dbg_driver(&vmw->drm, "Wasn't able to map src\n"); + ret = -ENOMEM; + goto out; + } + + vdst = map_external(dst, &dst_map); + if (!vdst) { + drm_dbg_driver(&vmw->drm, "Wasn't able to map dst\n"); + ret = -ENOMEM; + goto out; + } + + vsrc += src_offset; + vdst += dst_offset; + if (src_stride == dst_stride) { + dst_size -= dst_offset; + src_size -= src_offset; + memcpy(vdst, vsrc, + min(dst_stride * height, min(dst_size, src_size))); + } else { + WARN_ON(dst_stride < width_in_bytes); + for (i = 0; i < height; ++i) { + memcpy(vdst, vsrc, width_in_bytes); + vsrc += src_stride; + vdst += dst_stride; + } + } + + x_in_bytes = (dst_offset % dst_stride); + diff->rect.x1 = x_in_bytes / diff->cpp; + diff->rect.y1 = ((dst_offset - x_in_bytes) / dst_stride); + diff->rect.x2 = diff->rect.x1 + width_in_bytes / diff->cpp; + diff->rect.y2 = diff->rect.y1 + height; + + ret = 0; +out: + unmap_external(src, &src_map); + unmap_external(dst, &dst_map); + + return ret; +} + /** * vmw_bo_cpu_blit - in-kernel cpu blit. * - * @dst: Destination buffer object. + * @vmw_dst: Destination buffer object. * @dst_offset: Destination offset of blit start in bytes. * @dst_stride: Destination stride in bytes. - * @src: Source buffer object. + * @vmw_src: Source buffer object. * @src_offset: Source offset of blit start in bytes. * @src_stride: Source stride in bytes. * @w: Width of blit. @@ -444,13 +538,15 @@ static int vmw_bo_cpu_blit_line(struct vmw_bo_blit_line_data *d, * Neither of the buffer objects may be placed in PCI memory * (Fixed memory in TTM terminology) when using this function. */ -int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, +int vmw_bo_cpu_blit(struct vmw_bo *vmw_dst, u32 dst_offset, u32 dst_stride, - struct ttm_buffer_object *src, + struct vmw_bo *vmw_src, u32 src_offset, u32 src_stride, u32 w, u32 h, struct vmw_diff_cpy *diff) { + struct ttm_buffer_object *src = &vmw_src->tbo; + struct ttm_buffer_object *dst = &vmw_dst->tbo; struct ttm_operation_ctx ctx = { .interruptible = false, .no_wait_gpu = false @@ -460,6 +556,11 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, int ret = 0; struct page **dst_pages = NULL; struct page **src_pages = NULL; + bool src_external = (src->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0; + bool dst_external = (dst->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0; + + if (WARN_ON(dst == src)) + return -EINVAL; /* Buffer objects need to be either pinned or reserved: */ if (!(dst->pin_count)) @@ -479,6 +580,11 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, return ret; } + if (src_external || dst_external) + return vmw_external_bo_copy(vmw_dst, dst_offset, dst_stride, + vmw_src, src_offset, src_stride, + w, h, diff); + if (!src->ttm->pages && src->ttm->sg) { src_pages = kvmalloc_array(src->ttm->num_pages, sizeof(struct page *), GFP_KERNEL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 32f50e595809..3f4719b3c268 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1353,9 +1353,9 @@ void vmw_diff_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, void vmw_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, size_t n); -int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, +int vmw_bo_cpu_blit(struct vmw_bo *dst, u32 dst_offset, u32 dst_stride, - struct ttm_buffer_object *src, + struct vmw_bo *src, u32 src_offset, u32 src_stride, u32 w, u32 h, struct vmw_diff_cpy *diff); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index 5453f7cf0e2d..fab155a68054 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -502,7 +502,7 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty) container_of(dirty->unit, typeof(*stdu), base); s32 width, height; s32 src_pitch, dst_pitch; - struct ttm_buffer_object *src_bo, *dst_bo; + struct vmw_bo *src_bo, *dst_bo; u32 src_offset, dst_offset; struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(stdu->cpp); @@ -517,11 +517,11 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty) /* Assume we are blitting from Guest (bo) to Host (display_srf) */ src_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp; - src_bo = &stdu->display_srf->res.guest_memory_bo->tbo; + src_bo = stdu->display_srf->res.guest_memory_bo; src_offset = ddirty->top * src_pitch + ddirty->left * stdu->cpp; dst_pitch = ddirty->pitch; - dst_bo = &ddirty->buf->tbo; + dst_bo = ddirty->buf; dst_offset = ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp; (void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch, @@ -1170,7 +1170,7 @@ vmw_stdu_bo_populate_update_cpu(struct vmw_du_update_plane *update, void *cmd, struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(0); struct vmw_stdu_update_gb_image *cmd_img = cmd; struct vmw_stdu_update *cmd_update; - struct ttm_buffer_object *src_bo, *dst_bo; + struct vmw_bo *src_bo, *dst_bo; u32 src_offset, dst_offset; s32 src_pitch, dst_pitch; s32 width, height; @@ -1184,11 +1184,11 @@ vmw_stdu_bo_populate_update_cpu(struct vmw_du_update_plane *update, void *cmd, diff.cpp = stdu->cpp; - dst_bo = &stdu->display_srf->res.guest_memory_bo->tbo; + dst_bo = stdu->display_srf->res.guest_memory_bo; dst_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp; dst_offset = bb->y1 * dst_pitch + bb->x1 * stdu->cpp; - src_bo = &vfbbo->buffer->tbo; + src_bo = vfbbo->buffer; src_pitch = update->vfb->base.pitches[0]; src_offset = bo_update->fb_top * src_pitch + bo_update->fb_left * stdu->cpp; -- cgit From e9fd436bb8fb9b9d31fdf07bbcdba6d30290c5e4 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 16 Aug 2024 14:32:07 -0400 Subject: drm/vmwgfx: Disable coherent dumb buffers without 3d Coherent surfaces make only sense if the host renders to them using accelerated apis. Without 3d the entire content of dumb buffers stays in the guest making all of the extra work they're doing to synchronize between guest and host useless. Configurations without 3d also tend to run with very low graphics memory limits. The pinned console fb, mob cursors and graphical login manager tend to run out of 16MB graphics memory that those guests use. Fix it by making sure the coherent dumb buffers are only used on configs with 3d enabled. Signed-off-by: Zack Rusin Fixes: d6667f0ddf46 ("drm/vmwgfx: Fix handling of dumb buffers") Reported-by: Christian Heusel Closes: https://lore.kernel.org/all/0d0330f3-2ac0-4cd5-8075-7f1cbaf72a8e@heusel.eu Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: # v6.9+ Link: https://patchwork.freedesktop.org/patch/msgid/20240816183332.31961-4-zack.rusin@broadcom.com Reviewed-by: Martin Krastev Reviewed-by: Maaz Mombasawala Tested-by: Benjamin Coddington --- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 8ae6a761c900..1625b30d9970 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -2283,9 +2283,11 @@ int vmw_dumb_create(struct drm_file *file_priv, /* * Without mob support we're just going to use raw memory buffer * because we wouldn't be able to support full surface coherency - * without mobs + * without mobs. There also no reason to support surface coherency + * without 3d (i.e. gpu usage on the host) because then all the + * contents is going to be rendered guest side. */ - if (!dev_priv->has_mob) { + if (!dev_priv->has_mob || !vmw_supports_3d(dev_priv)) { int cpp = DIV_ROUND_UP(args->bpp, 8); switch (cpp) { -- cgit From e21fea4ac3cf12eba1921fbbf7764bf69c6d4b2c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 22 Aug 2024 16:59:01 -0700 Subject: xfs: fix di_onlink checking for V1/V2 inodes "KjellR" complained on IRC that an old V4 filesystem suddenly stopped mounting after upgrading from 6.9.11 to 6.10.3, with the following splat when trying to read the rt bitmap inode: 00000000: 49 4e 80 00 01 02 00 01 00 00 00 00 00 00 00 00 IN.............. 00000010: 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000020: 00 00 00 00 00 00 00 00 43 d2 a9 da 21 0f d6 30 ........C...!..0 00000030: 43 d2 a9 da 21 0f d6 30 00 00 00 00 00 00 00 00 C...!..0........ 00000040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000050: 00 00 00 02 00 00 00 00 00 00 00 04 00 00 00 00 ................ 00000060: ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ As Dave Chinner points out, this is a V1 inode with both di_onlink and di_nlink set to 1 and di_flushiter == 0. In other words, this inode was formatted this way by mkfs and hasn't been touched since then. Back in the old days of xfsprogs 3.2.3, I observed that libxfs_ialloc would set di_nlink, but if the filesystem didn't have NLINK, it would then set di_version = 1. libxfs_iflush_int later sees the V1 inode and copies the value of di_nlink to di_onlink without zeroing di_onlink. Eventually this filesystem must have been upgraded to support NLINK because 6.10 doesn't support !NLINK filesystems, which is how we tripped over this old behavior. The filesystem doesn't have a realtime section, so that's why the rtbitmap inode has never been touched. Fix this by removing the di_onlink/di_nlink checking for all V1/V2 inodes because this is a muddy mess. The V3 inode handling code has always supported NLINK and written di_onlink==0 so keep that check. The removal of the V1 inode handling code when we dropped support for !NLINK obscured this old behavior. Reported-by: kjell.m.randa@gmail.com Fixes: 40cb8613d612 ("xfs: check unused nlink fields in the ondisk inode") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_inode_buf.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 513b50da6215..79babeac9d75 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -514,12 +514,18 @@ xfs_dinode_verify( return __this_address; } - if (dip->di_version > 1) { + /* + * Historical note: xfsprogs in the 3.2 era set up its incore inodes to + * have di_nlink track the link count, even if the actual filesystem + * only supported V1 inodes (i.e. di_onlink). When writing out the + * ondisk inode, it would set both the ondisk di_nlink and di_onlink to + * the the incore di_nlink value, which is why we cannot check for + * di_nlink==0 on a V1 inode. V2/3 inodes would get written out with + * di_onlink==0, so we can check that. + */ + if (dip->di_version >= 2) { if (dip->di_onlink) return __this_address; - } else { - if (dip->di_nlink) - return __this_address; } /* don't allow invalid i_size */ -- cgit From 5335affcff91b53cfc45694171f911cb23257c8b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 22 Aug 2024 16:59:17 -0700 Subject: xfs: fix folio dirtying for XFILE_ALLOC callers willy pointed out that folio_mark_dirty is the correct function to use to mark an xfile folio dirty because it calls out to the mapping's aops to mark it dirty. For tmpfs this likely doesn't matter much since it currently uses nop_dirty_folio, but let's use the abstractions properly. Reported-by: willy@infradead.org Fixes: 6907e3c00a40 ("xfs: add file_{get,put}_folio") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/scrub/xfile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c index d848222f802b..9b5d98fe1f8a 100644 --- a/fs/xfs/scrub/xfile.c +++ b/fs/xfs/scrub/xfile.c @@ -293,7 +293,7 @@ xfile_get_folio( * (potentially last) reference in xfile_put_folio. */ if (flags & XFILE_ALLOC) - folio_set_dirty(folio); + folio_mark_dirty(folio); return folio; } -- cgit From 95179935beadccaf0f0bb461adb778731e293da4 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 22 Aug 2024 16:59:33 -0700 Subject: xfs: xfs_finobt_count_blocks() walks the wrong btree As a result of the factoring in commit 14dd46cf31f4 ("xfs: split xfs_inobt_init_cursor"), mount started taking a long time on a user's filesystem. For Anders, this made mount times regress from under a second to over 15 minutes for a filesystem with only 30 million inodes in it. Anders bisected it down to the above commit, but even then the bug was not obvious. In this commit, over 20 calls to xfs_inobt_init_cursor() were modified, and some we modified to call a new function named xfs_finobt_init_cursor(). If that takes you a moment to reread those function names to see what the rename was, then you have realised why this bug wasn't spotted during review. And it wasn't spotted on inspection even after the bisect pointed at this commit - a single missing "f" isn't the easiest thing for a human eye to notice.... The result is that xfs_finobt_count_blocks() now incorrectly calls xfs_inobt_init_cursor() so it is now walking the inobt instead of the finobt. Hence when there are lots of allocated inodes in a filesystem, mount takes a -long- time run because it now walks a massive allocated inode btrees instead of the small, nearly empty free inode btrees. It also means all the finobt space reservations are wrong, so mount could potentially given ENOSPC on kernel upgrade. In hindsight, commit 14dd46cf31f4 should have been two commits - the first to convert the finobt callers to the new API, the second to modify the xfs_inobt_init_cursor() API for the inobt callers. That would have made the bug very obvious during review. Fixes: 14dd46cf31f4 ("xfs: split xfs_inobt_init_cursor") Reported-by: Anders Blomdell Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_ialloc_btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 496e2f72a85b..797d5b5f7b72 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -749,7 +749,7 @@ xfs_finobt_count_blocks( if (error) return error; - cur = xfs_inobt_init_cursor(pag, tp, agbp); + cur = xfs_finobt_init_cursor(pag, tp, agbp); error = xfs_btree_count_blocks(cur, tree_blocks); xfs_btree_del_cursor(cur, error); xfs_trans_brelse(tp, agbp); -- cgit From 410e8a18f8e9311c6bf29ae47f32ad46f0219569 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 22 Aug 2024 16:59:48 -0700 Subject: xfs: don't bother reporting blocks trimmed via FITRIM Don't bother reporting the number of bytes that we "trimmed" because the underlying storage isn't required to do anything(!) and failed discard IOs aren't reported to the caller anyway. It's not like userspace can use the reported value for anything useful like adjusting the offset parameter of the next call, and it's not like anyone ever wrote a manpage about FITRIM's out parameters. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Tested-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/xfs_discard.c | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 6f0fc7fe1f2b..25f5dffeab2a 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -158,8 +158,7 @@ static int xfs_trim_gather_extents( struct xfs_perag *pag, struct xfs_trim_cur *tcur, - struct xfs_busy_extents *extents, - uint64_t *blocks_trimmed) + struct xfs_busy_extents *extents) { struct xfs_mount *mp = pag->pag_mount; struct xfs_trans *tp; @@ -280,7 +279,6 @@ xfs_trim_gather_extents( xfs_extent_busy_insert_discard(pag, fbno, flen, &extents->extent_list); - *blocks_trimmed += flen; next_extent: if (tcur->by_bno) error = xfs_btree_increment(cur, 0, &i); @@ -327,8 +325,7 @@ xfs_trim_perag_extents( struct xfs_perag *pag, xfs_agblock_t start, xfs_agblock_t end, - xfs_extlen_t minlen, - uint64_t *blocks_trimmed) + xfs_extlen_t minlen) { struct xfs_trim_cur tcur = { .start = start, @@ -354,8 +351,7 @@ xfs_trim_perag_extents( extents->owner = extents; INIT_LIST_HEAD(&extents->extent_list); - error = xfs_trim_gather_extents(pag, &tcur, extents, - blocks_trimmed); + error = xfs_trim_gather_extents(pag, &tcur, extents); if (error) { kfree(extents); break; @@ -389,8 +385,7 @@ xfs_trim_datadev_extents( struct xfs_mount *mp, xfs_daddr_t start, xfs_daddr_t end, - xfs_extlen_t minlen, - uint64_t *blocks_trimmed) + xfs_extlen_t minlen) { xfs_agnumber_t start_agno, end_agno; xfs_agblock_t start_agbno, end_agbno; @@ -411,8 +406,7 @@ xfs_trim_datadev_extents( if (start_agno == end_agno) agend = end_agbno; - error = xfs_trim_perag_extents(pag, start_agbno, agend, minlen, - blocks_trimmed); + error = xfs_trim_perag_extents(pag, start_agbno, agend, minlen); if (error) last_error = error; @@ -431,9 +425,6 @@ struct xfs_trim_rtdev { /* list of rt extents to free */ struct list_head extent_list; - /* pointer to count of blocks trimmed */ - uint64_t *blocks_trimmed; - /* minimum length that caller allows us to trim */ xfs_rtblock_t minlen_fsb; @@ -551,7 +542,6 @@ xfs_trim_gather_rtextent( busyp->length = rlen; INIT_LIST_HEAD(&busyp->list); list_add_tail(&busyp->list, &tr->extent_list); - *tr->blocks_trimmed += rlen; tr->restart_rtx = rec->ar_startext + rec->ar_extcount; return 0; @@ -562,13 +552,11 @@ xfs_trim_rtdev_extents( struct xfs_mount *mp, xfs_daddr_t start, xfs_daddr_t end, - xfs_daddr_t minlen, - uint64_t *blocks_trimmed) + xfs_daddr_t minlen) { struct xfs_rtalloc_rec low = { }; struct xfs_rtalloc_rec high = { }; struct xfs_trim_rtdev tr = { - .blocks_trimmed = blocks_trimmed, .minlen_fsb = XFS_BB_TO_FSB(mp, minlen), }; struct xfs_trans *tp; @@ -634,7 +622,7 @@ xfs_trim_rtdev_extents( return error; } #else -# define xfs_trim_rtdev_extents(m,s,e,n,b) (-EOPNOTSUPP) +# define xfs_trim_rtdev_extents(...) (-EOPNOTSUPP) #endif /* CONFIG_XFS_RT */ /* @@ -661,7 +649,6 @@ xfs_ioc_trim( xfs_daddr_t start, end; xfs_extlen_t minlen; xfs_rfsblock_t max_blocks; - uint64_t blocks_trimmed = 0; int error, last_error = 0; if (!capable(CAP_SYS_ADMIN)) @@ -706,15 +693,13 @@ xfs_ioc_trim( end = start + BTOBBT(range.len) - 1; if (bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev)) { - error = xfs_trim_datadev_extents(mp, start, end, minlen, - &blocks_trimmed); + error = xfs_trim_datadev_extents(mp, start, end, minlen); if (error) last_error = error; } if (rt_bdev && !xfs_trim_should_stop()) { - error = xfs_trim_rtdev_extents(mp, start, end, minlen, - &blocks_trimmed); + error = xfs_trim_rtdev_extents(mp, start, end, minlen); if (error) last_error = error; } @@ -722,7 +707,8 @@ xfs_ioc_trim( if (last_error) return last_error; - range.len = XFS_FSB_TO_B(mp, blocks_trimmed); + range.len = min_t(unsigned long long, range.len, + XFS_FSB_TO_B(mp, max_blocks)); if (copy_to_user(urange, &range, sizeof(range))) return -EFAULT; return 0; -- cgit From 68415b349f3f16904f006275757f4fcb34b8ee43 Mon Sep 17 00:00:00 2001 From: Zizhi Wo Date: Thu, 22 Aug 2024 17:00:04 -0700 Subject: xfs: Fix the owner setting issue for rmap query in xfs fsmap I notice a rmap query bug in xfs_io fsmap: [root@fedora ~]# xfs_io -c 'fsmap -vvvv' /mnt EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL 0: 253:16 [0..7]: static fs metadata 0 (0..7) 8 1: 253:16 [8..23]: per-AG metadata 0 (8..23) 16 2: 253:16 [24..39]: inode btree 0 (24..39) 16 3: 253:16 [40..47]: per-AG metadata 0 (40..47) 8 4: 253:16 [48..55]: refcount btree 0 (48..55) 8 5: 253:16 [56..103]: per-AG metadata 0 (56..103) 48 6: 253:16 [104..127]: free space 0 (104..127) 24 ...... Bug: [root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 0 3' /mnt [root@fedora ~]# Normally, we should be able to get one record, but we got nothing. The root cause of this problem lies in the incorrect setting of rm_owner in the rmap query. In the case of the initial query where the owner is not set, __xfs_getfsmap_datadev() first sets info->high.rm_owner to ULLONG_MAX. This is done to prevent any omissions when comparing rmap items. However, if the current ag is detected to be the last one, the function sets info's high_irec based on the provided key. If high->rm_owner is not specified, it should continue to be set to ULLONG_MAX; otherwise, there will be issues with interval omissions. For example, consider "start" and "end" within the same block. If high->rm_owner == 0, it will be smaller than the founded record in rmapbt, resulting in a query with no records. The main call stack is as follows: xfs_ioc_getfsmap xfs_getfsmap xfs_getfsmap_datadev_rmapbt __xfs_getfsmap_datadev info->high.rm_owner = ULLONG_MAX if (pag->pag_agno == end_ag) xfs_fsmap_owner_to_rmap // set info->high.rm_owner = 0 because fmr_owner == -1ULL dest->rm_owner = 0 // get nothing xfs_getfsmap_datadev_rmapbt_query The problem can be resolved by simply modify the xfs_fsmap_owner_to_rmap function internal logic to achieve. After applying this patch, the above problem have been solved: [root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 0 3' /mnt EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL 0: 253:16 [0..7]: static fs metadata 0 (0..7) 8 Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl") Signed-off-by: Zizhi Wo Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Chandan Babu R --- fs/xfs/xfs_fsmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 85dbb46452ca..3a30b36779db 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -71,7 +71,7 @@ xfs_fsmap_owner_to_rmap( switch (src->fmr_owner) { case 0: /* "lowest owner id possible" */ case -1ULL: /* "highest owner id possible" */ - dest->rm_owner = 0; + dest->rm_owner = src->fmr_owner; break; case XFS_FMR_OWN_FREE: dest->rm_owner = XFS_RMAP_OWN_NULL; -- cgit From 7af6c720417f21f015f46baa33e182f349ddc93b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 15 Aug 2024 20:48:57 +0800 Subject: iommu/vt-d: Fix incorrect domain ID in context flush helper The helper intel_context_flush_present() is designed to flush all related caches when a context entry with the present bit set is modified. It currently retrieves the domain ID from the context entry and uses it to flush the IOTLB and context caches. This is incorrect when the context entry transitions from present to non-present, as the domain ID field is cleared before calling the helper. Fix it by passing the domain ID programmed in the context entry before the change to intel_context_flush_present(). This ensures that the correct domain ID is used for cache invalidation. Fixes: f90584f4beb8 ("iommu/vt-d: Add helper to flush caches for context change") Reported-by: Alex Williamson Closes: https://lore.kernel.org/linux-iommu/20240814162726.5efe1a6e.alex.williamson@redhat.com/ Signed-off-by: Lu Baolu Tested-by: Alex Williamson Reviewed-by: Alex Williamson Reviewed-by: Jerry Snitselaar Reviewed-by: Jacob Pan Link: https://lore.kernel.org/r/20240815124857.70038-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 8 ++++++-- drivers/iommu/intel/iommu.h | 2 +- drivers/iommu/intel/pasid.c | 7 ++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 9ff8b83c19a3..4aa070cf56e7 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1944,6 +1944,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 { struct intel_iommu *iommu = info->iommu; struct context_entry *context; + u16 did; spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, 0); @@ -1952,10 +1953,11 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 return; } + did = context_domain_id(context); context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - intel_context_flush_present(info, context, true); + intel_context_flush_present(info, context, did, true); } static int domain_setup_first_level(struct intel_iommu *iommu, @@ -4249,6 +4251,7 @@ static int context_flip_pri(struct device_domain_info *info, bool enable) struct intel_iommu *iommu = info->iommu; u8 bus = info->bus, devfn = info->devfn; struct context_entry *context; + u16 did; spin_lock(&iommu->lock); if (context_copied(iommu, bus, devfn)) { @@ -4261,6 +4264,7 @@ static int context_flip_pri(struct device_domain_info *info, bool enable) spin_unlock(&iommu->lock); return -ENODEV; } + did = context_domain_id(context); if (enable) context_set_sm_pre(context); @@ -4269,7 +4273,7 @@ static int context_flip_pri(struct device_domain_info *info, bool enable) if (!ecap_coherent(iommu->ecap)) clflush_cache_range(context, sizeof(*context)); - intel_context_flush_present(info, context, true); + intel_context_flush_present(info, context, did, true); spin_unlock(&iommu->lock); return 0; diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index b67c14da1240..a969be2258b1 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1154,7 +1154,7 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, void intel_context_flush_present(struct device_domain_info *info, struct context_entry *context, - bool affect_domains); + u16 did, bool affect_domains); #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 5792c817cefa..b51fc268dc84 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -683,6 +683,7 @@ static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; struct context_entry *context; + u16 did; spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, false); @@ -691,10 +692,11 @@ static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) return; } + did = context_domain_id(context); context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - intel_context_flush_present(info, context, false); + intel_context_flush_present(info, context, did, false); } static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) @@ -885,10 +887,9 @@ static void __context_flush_dev_iotlb(struct device_domain_info *info) */ void intel_context_flush_present(struct device_domain_info *info, struct context_entry *context, - bool flush_domains) + u16 did, bool flush_domains) { struct intel_iommu *iommu = info->iommu; - u16 did = context_domain_id(context); struct pasid_entry *pte; int i; -- cgit From 996dc53ac289b81957aa70d62ccadc6986d26a87 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 22 Aug 2024 11:45:54 -0300 Subject: iommufd: Do not allow creating areas without READ or WRITE This results in passing 0 or just IOMMU_CACHE to iommu_map(). Most of the page table formats don't like this: amdv1 - -EINVAL armv7s - returns 0, doesn't update mapped arm-lpae - returns 0 doesn't update mapped dart - returns 0, doesn't update mapped VT-D - returns -EINVAL Unfortunately the three formats that return 0 cause serious problems: - Returning ret = but not uppdating mapped from domain->map_pages() causes an infinite loop in __iommu_map() - Not writing ioptes means that VFIO/iommufd have no way to recover them and we will have memory leaks and worse during unmap Since almost nothing can support this, and it is a useless thing to do, block it early in iommufd. Cc: stable@kernel.org Fixes: aad37e71d5c4 ("iommufd: IOCTLs for the io_pagetable") Signed-off-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/1-v1-1211e1294c27+4b1-iommu_no_prot_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommufd/ioas.c | 8 ++++++++ tools/testing/selftests/iommu/iommufd.c | 6 +++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index 742248276548..157a89b993e4 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -213,6 +213,10 @@ int iommufd_ioas_map(struct iommufd_ucmd *ucmd) if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX) return -EOVERFLOW; + if (!(cmd->flags & + (IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE))) + return -EINVAL; + ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id); if (IS_ERR(ioas)) return PTR_ERR(ioas); @@ -253,6 +257,10 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd) cmd->dst_iova >= ULONG_MAX) return -EOVERFLOW; + if (!(cmd->flags & + (IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE))) + return -EINVAL; + src_ioas = iommufd_get_ioas(ucmd->ictx, cmd->src_ioas_id); if (IS_ERR(src_ioas)) return PTR_ERR(src_ioas); diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 6343f4053bd4..4927b9add5ad 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -825,7 +825,7 @@ TEST_F(iommufd_ioas, copy_area) { struct iommu_ioas_copy copy_cmd = { .size = sizeof(copy_cmd), - .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE, .dst_ioas_id = self->ioas_id, .src_ioas_id = self->ioas_id, .length = PAGE_SIZE, @@ -1318,7 +1318,7 @@ TEST_F(iommufd_ioas, copy_sweep) { struct iommu_ioas_copy copy_cmd = { .size = sizeof(copy_cmd), - .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE, .src_ioas_id = self->ioas_id, .dst_iova = MOCK_APERTURE_START, .length = MOCK_PAGE_SIZE, @@ -1608,7 +1608,7 @@ TEST_F(iommufd_mock_domain, user_copy) }; struct iommu_ioas_copy copy_cmd = { .size = sizeof(copy_cmd), - .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE, .dst_ioas_id = self->ioas_id, .dst_iova = MOCK_APERTURE_START, .length = BUFFER_SIZE, -- cgit From 6093cd582f8e027117a8d4ad5d129a1aacdc53d2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 22 Aug 2024 11:45:55 -0300 Subject: iommu: Do not return 0 from map_pages if it doesn't do anything These three implementations of map_pages() all succeed if a mapping is requested with no read or write. Since they return back to __iommu_map() leaving the mapped output as 0 it triggers an infinite loop. Therefore nothing is using no-access protection bits. Further, VFIO and iommufd rely on iommu_iova_to_phys() to get back PFNs stored by map, if iommu_map() succeeds but iommu_iova_to_phys() fails that will create serious bugs. Thus remove this never used "nothing to do" concept and just fail map immediately. Fixes: e5fc9753b1a8 ("iommu/io-pgtable: Add ARMv7 short descriptor support") Fixes: e1d3c0fd701d ("iommu: add ARM LPAE page table allocator") Fixes: 745ef1092bcf ("iommu/io-pgtable: Move Apple DART support to its own file") Signed-off-by: Jason Gunthorpe Acked-by: Will Deacon Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/2-v1-1211e1294c27+4b1-iommu_no_prot_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm-v7s.c | 3 +-- drivers/iommu/io-pgtable-arm.c | 3 +-- drivers/iommu/io-pgtable-dart.c | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 75f244a3e12d..06ffc683b28f 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -552,9 +552,8 @@ static int arm_v7s_map_pages(struct io_pgtable_ops *ops, unsigned long iova, paddr >= (1ULL << data->iop.cfg.oas))) return -ERANGE; - /* If no access, then nothing to do */ if (!(prot & (IOMMU_READ | IOMMU_WRITE))) - return 0; + return -EINVAL; while (pgcount--) { ret = __arm_v7s_map(data, iova, paddr, pgsize, prot, 1, data->pgd, diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index f5d9fd1f45bf..ff4149ae1751 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -515,9 +515,8 @@ static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova, if (WARN_ON(iaext || paddr >> cfg->oas)) return -ERANGE; - /* If no access, then nothing to do */ if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) - return 0; + return -EINVAL; prot = arm_lpae_prot_to_pte(data, iommu_prot); ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl, diff --git a/drivers/iommu/io-pgtable-dart.c b/drivers/iommu/io-pgtable-dart.c index ad28031e1e93..c004640640ee 100644 --- a/drivers/iommu/io-pgtable-dart.c +++ b/drivers/iommu/io-pgtable-dart.c @@ -245,9 +245,8 @@ static int dart_map_pages(struct io_pgtable_ops *ops, unsigned long iova, if (WARN_ON(paddr >> cfg->oas)) return -ERANGE; - /* If no access, then nothing to do */ if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) - return 0; + return -EINVAL; tbl = dart_get_table(data, iova); -- cgit From 51eeef9a482bcb00f6f75eda4de9bd013092b76f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 23 Aug 2024 17:54:54 +0100 Subject: MAINTAINERS: Add Jean-Philippe as SMMUv3 SVA reviewer Add Jean-Philippe as a reviewer for the Arm SMMUv3 SVA support, since he's been a consistent contributor to that code over the years and understands the relevant parts of the architecture much better than me. Cc: Robin Murphy Cc: Jean-Philippe Brucker Cc: Mostafa Saleh Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20240823165454.1064-1-will@kernel.org Signed-off-by: Joerg Roedel --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index f328373463b0..445cec2ae4ed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1880,6 +1880,10 @@ F: Documentation/devicetree/bindings/iommu/arm,smmu* F: drivers/iommu/arm/ F: drivers/iommu/io-pgtable-arm* +ARM SMMU SVA SUPPORT +R: Jean-Philippe Brucker +F: drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c + ARM SUB-ARCHITECTURES L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -- cgit From 28f5df210d06beb5920cf80446f1c27456c14b92 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Sun, 25 Aug 2024 16:47:50 +0200 Subject: random: vDSO: reject unknown getrandom() flags Like the getrandom() syscall, vDSO getrandom() must also reject unknown flags. [1] It would be possible to return -EINVAL from vDSO itself, but in the possible case that a new flag is added to getrandom() syscall in the future, it would be easier to get the behavior from the syscall, instead of erroring until the vDSO is extended to support the new flag or explicitly falling back. [1] Designing the API: Planning for Extension https://docs.kernel.org/process/adding-syscalls.html#designing-the-api-planning-for-extension Signed-off-by: Yann Droneaud [Jason: reworded commit message] Signed-off-by: Jason A. Donenfeld --- lib/vdso/getrandom.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/vdso/getrandom.c b/lib/vdso/getrandom.c index b230f0b10832..e1db228bc4f0 100644 --- a/lib/vdso/getrandom.c +++ b/lib/vdso/getrandom.c @@ -85,6 +85,10 @@ __cvdso_getrandom_data(const struct vdso_rng_data *rng_info, void *buffer, size_ if (unlikely(((unsigned long)opaque_state & ~PAGE_MASK) + sizeof(*state) > PAGE_SIZE)) return -EFAULT; + /* Handle unexpected flags by falling back to the kernel. */ + if (unlikely(flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))) + goto fallback_syscall; + /* If the caller passes the wrong size, which might happen due to CRIU, fallback. */ if (unlikely(opaque_len != sizeof(*state))) goto fallback_syscall; -- cgit From 2dc43c5e212036458ed7c5586fb82ee183fee504 Mon Sep 17 00:00:00 2001 From: Hendrik Borghorst Date: Sun, 25 Aug 2024 19:43:47 +0200 Subject: ALSA: hda/realtek: support HP Pavilion Aero 13-bg0xxx Mute LED This patch adds the HP Pavilion Aero 13 (13-bg0xxx) (year 2024) to list of quirks for keyboard LED mute indication. The laptop has two LEDs (one for speaker and one for mic mute). The pre-existing quirk ALC245_FIXUP_HP_X360_MUTE_LEDS chains both the quirk for mic and speaker mute. Tested on 6.11.0-rc4 with the aforementioned laptop. Signed-off-by: Hendrik Borghorst Cc: Link: https://patch.msgid.link/20240825174351.5687-1-hendrikborghorst@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c04eac6a5064..588738ce7380 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10380,6 +10380,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8cbd, "HP Pavilion Aero Laptop 13-bg0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS), SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8cde, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), -- cgit From 28b329f431cef840fddd9a9b493bc3eff1aa06c0 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 26 Aug 2024 10:49:40 +0100 Subject: ALSA: hda: hda_component: Fix mutex crash if nothing ever binds Move the initialization of parent->mutex into hda_component_manager_init() so that it is always valid. In hda_component_manager_bind() do not clear the parent information. Only zero-fill the per-component data ready for it to be filled in by the components as they bind. Previously parent->mutex was being initialized only in hda_component_manager_bind(). This meant that it was only initialized if all components appeared and there was a bind callback. If there wasn't a bind the mutex object was not valid when the Realtek driver called any of the other functions. Signed-off-by: Richard Fitzgerald Fixes: 047b9cbbaa8e ("ALSA: hda: hda_component: Protect shared data with a mutex") Link: https://patch.msgid.link/20240826094940.45563-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_component.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_component.c b/sound/pci/hda/hda_component.c index 7b19cb38b4e0..b7dfdb10d156 100644 --- a/sound/pci/hda/hda_component.c +++ b/sound/pci/hda/hda_component.c @@ -141,8 +141,7 @@ int hda_component_manager_bind(struct hda_codec *cdc, int ret; /* Init shared and component specific data */ - memset(parent, 0, sizeof(*parent)); - mutex_init(&parent->mutex); + memset(parent->comps, 0, sizeof(parent->comps)); parent->codec = cdc; mutex_lock(&parent->mutex); @@ -164,6 +163,8 @@ int hda_component_manager_init(struct hda_codec *cdc, struct hda_scodec_match *sm; int ret, i; + mutex_init(&parent->mutex); + for (i = 0; i < count; i++) { sm = devm_kmalloc(dev, sizeof(*sm), GFP_KERNEL); if (!sm) -- cgit From 5fd0628918977a0afdc2e6bc562d8751b5d3b8c5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 26 Aug 2024 12:45:22 +0200 Subject: netfilter: nf_tables: restore IP sanity checks for netdev/egress Subtract network offset to skb->len before performing IPv4 header sanity checks, then adjust transport offset from offset from mac header. Jorge Ortiz says: When small UDP packets (< 4 bytes payload) are sent from eth0, `meta l4proto udp` condition is not met because `NFT_PKTINFO_L4PROTO` is not set. This happens because there is a comparison that checks if the transport header offset exceeds the total length. This comparison does not take into account the fact that the skb network offset might be non-zero in egress mode (e.g., 14 bytes for Ethernet header). Fixes: 0ae8e4cca787 ("netfilter: nf_tables: set transport offset from mac header for netdev/egress") Reported-by: Jorge Ortiz Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv4.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index 60a7d0ce3080..fcf967286e37 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -19,7 +19,7 @@ static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt) static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) { struct iphdr *iph, _iph; - u32 len, thoff; + u32 len, thoff, skb_len; iph = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb), sizeof(*iph), &_iph); @@ -30,8 +30,10 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) return -1; len = iph_totlen(pkt->skb, iph); - thoff = skb_network_offset(pkt->skb) + (iph->ihl * 4); - if (pkt->skb->len < len) + thoff = iph->ihl * 4; + skb_len = pkt->skb->len - skb_network_offset(pkt->skb); + + if (skb_len < len) return -1; else if (len < thoff) return -1; @@ -40,7 +42,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = iph->protocol; - pkt->thoff = thoff; + pkt->thoff = skb_network_offset(pkt->skb) + thoff; pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; return 0; -- cgit From 9286dfd5735b9cceb6a14bdf15e13400ccb60fe7 Mon Sep 17 00:00:00 2001 From: Mathieu Fenniak Date: Fri, 23 Aug 2024 15:56:28 +0200 Subject: platform/x86: asus-wmi: Fix spurious rfkill on UX8406MA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Asus Zenbook Duo (UX8406MA) has a keyboard which can be placed on the laptop to connect it via USB, or can be removed from the laptop to reveal a hidden secondary display in which case the keyboard operates via Bluetooth. When it is placed on the secondary display to connect via USB, it emits a keypress for a wireless disable. This causes the rfkill system to be activated disconnecting the current wifi connection, which doesn't reflect the user's true intention. Detect this hardware and suppress any wireless switches from the keyboard; this keyboard does not have a wireless toggle capability so these presses are always spurious. Signed-off-by: Mathieu Fenniak Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240823135630.128447-1-mathieu@fenniak.net Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-nb-wmi.c | 20 +++++++++++++++++++- drivers/platform/x86/asus-wmi.h | 1 + 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index fceffe2082ec..ed3633c5955d 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -145,6 +145,10 @@ static struct quirk_entry quirk_asus_ignore_fan = { .wmi_ignore_fan = true, }; +static struct quirk_entry quirk_asus_zenbook_duo_kbd = { + .ignore_key_wlan = true, +}; + static int dmi_matched(const struct dmi_system_id *dmi) { pr_info("Identified laptop model '%s'\n", dmi->ident); @@ -516,6 +520,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_ignore_fan, }, + { + .callback = dmi_matched, + .ident = "ASUS Zenbook Duo UX8406MA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "UX8406MA"), + }, + .driver_data = &quirk_asus_zenbook_duo_kbd, + }, {}, }; @@ -630,7 +643,12 @@ static void asus_nb_wmi_key_filter(struct asus_wmi_driver *asus_wmi, int *code, case 0x32: /* Volume Mute */ if (atkbd_reports_vol_keys) *code = ASUS_WMI_KEY_IGNORE; - + break; + case 0x5D: /* Wireless console Toggle */ + case 0x5E: /* Wireless console Enable */ + case 0x5F: /* Wireless console Disable */ + if (quirks->ignore_key_wlan) + *code = ASUS_WMI_KEY_IGNORE; break; } } diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index cc30f1853847..d02f15fd3482 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -40,6 +40,7 @@ struct quirk_entry { bool wmi_force_als_set; bool wmi_ignore_fan; bool filter_i8042_e1_extended_codes; + bool ignore_key_wlan; enum asus_wmi_tablet_switch_mode tablet_switch_mode; int wapf; /* -- cgit From a3379eca24a7da5118a7d090da6f8eb8611acac8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 25 Aug 2024 15:24:15 +0200 Subject: platform/x86: x86-android-tablets: Make Lenovo Yoga Tab 3 X90F DMI match less strict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are 2G and 4G RAM versions of the Lenovo Yoga Tab 3 X90F and it turns out that the 2G version has a DMI product name of "CHERRYVIEW D1 PLATFORM" where as the 4G version has "CHERRYVIEW C0 PLATFORM". The sys-vendor + product-version check are unique enough that the product-name check is not necessary. Drop the product-name check so that the existing DMI match for the 4G RAM version also matches the 2G RAM version. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240825132415.8307-1-hdegoede@redhat.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/x86-android-tablets/dmi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/platform/x86/x86-android-tablets/dmi.c b/drivers/platform/x86/x86-android-tablets/dmi.c index 141a2d25e83b..387dd092c4dd 100644 --- a/drivers/platform/x86/x86-android-tablets/dmi.c +++ b/drivers/platform/x86/x86-android-tablets/dmi.c @@ -140,7 +140,6 @@ const struct dmi_system_id x86_android_tablet_ids[] __initconst = { /* Lenovo Yoga Tab 3 Pro YT3-X90F */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"), }, .driver_data = (void *)&lenovo_yt3_info, -- cgit From 052f3951640fd96d2e777b3272a925ec6c0c8100 Mon Sep 17 00:00:00 2001 From: Ryan Sullivan Date: Thu, 22 Aug 2024 13:31:22 -0400 Subject: selftests/livepatch: wait for atomic replace to occur On some machines with a large number of CPUs there is a sizable delay between an atomic replace occurring and when sysfs updates accordingly. This fix uses 'loop_until' to wait for the atomic replace to unload all previous livepatches. Reported-by: CKI Project Closes: https://datawarehouse.cki-project.org/kcidb/tests/redhat:1413102084-x86_64-kernel_upt_28 Signed-off-by: Ryan Sullivan Reviewed-by: Petr Mladek Acked-by: Joe Lawrence Link: https://lore.kernel.org/r/20240822173122.14760-1-rysulliv@redhat.com Signed-off-by: Petr Mladek --- tools/testing/selftests/livepatch/test-livepatch.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh index 65c9c058458d..bd13257bfdfe 100755 --- a/tools/testing/selftests/livepatch/test-livepatch.sh +++ b/tools/testing/selftests/livepatch/test-livepatch.sh @@ -139,11 +139,8 @@ load_lp $MOD_REPLACE replace=1 grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg -mods=(/sys/kernel/livepatch/*) -nmods=${#mods[@]} -if [ "$nmods" -ne 1 ]; then - die "Expecting only one moduled listed, found $nmods" -fi +loop_until 'mods=(/sys/kernel/livepatch/*); nmods=${#mods[@]}; [[ "$nmods" -eq 1 ]]' || + die "Expecting only one moduled listed, found $nmods" # These modules were disabled by the atomic replace for mod in $MOD_LIVEPATCH3 $MOD_LIVEPATCH2 $MOD_LIVEPATCH1; do -- cgit From f2c6dbd220170c2396fb019ead67fbada1e23ebd Mon Sep 17 00:00:00 2001 From: David Gow Date: Fri, 16 Aug 2024 12:51:22 +0800 Subject: kunit: Device wrappers should also manage driver name kunit_driver_create() accepts a name for the driver, but does not copy it, so if that name is either on the stack, or otherwise freed, we end up with a use-after-free when the driver is cleaned up. Instead, strdup() the name, and manage it as another KUnit allocation. As there was no existing kunit_kstrdup(), we add one. Further, add a kunit_ variant of strdup_const() and kfree_const(), so we don't need to allocate and manage the string in the majority of cases where it's a constant. However, these are inline functions, and is_kernel_rodata() only works for built-in code. This causes problems in two cases: - If kunit is built as a module, __{start,end}_rodata is not defined. - If a kunit test using these functions is built as a module, it will suffer the same fate. This fixes a KASAN splat with overflow.overflow_allocation_test, when built as a module. Restrict the is_kernel_rodata() case to when KUnit is built as a module, which fixes the first case, at the cost of losing the optimisation. Also, make kunit_{kstrdup,kfree}_const non-inline, so that other modules using them will not accidentally depend on is_kernel_rodata(). If KUnit is built-in, they'll benefit from the optimisation, if KUnit is not, they won't, but the string will be properly duplicated. Fixes: d03c720e03bd ("kunit: Add APIs for managing devices") Reported-by: Nico Pache Closes: https://groups.google.com/g/kunit-dev/c/81V9b9QYON0 Reviewed-by: Kees Cook Reviewed-by: Maxime Ripard Reviewed-by: Rae Moar Signed-off-by: David Gow Tested-by: Rae Moar Signed-off-by: Shuah Khan --- include/kunit/test.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/kunit/device.c | 7 +++++-- lib/kunit/test.c | 19 +++++++++++++++++++ 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/include/kunit/test.h b/include/kunit/test.h index e2a1f0928e8b..5ac237c949a0 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -28,6 +28,7 @@ #include #include +#include /* Static key: true if any KUnit tests are currently running */ DECLARE_STATIC_KEY_FALSE(kunit_running); @@ -480,6 +481,53 @@ static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp return kunit_kmalloc_array(test, n, size, gfp | __GFP_ZERO); } + +/** + * kunit_kfree_const() - conditionally free test managed memory + * @x: pointer to the memory + * + * Calls kunit_kfree() only if @x is not in .rodata section. + * See kunit_kstrdup_const() for more information. + */ +void kunit_kfree_const(struct kunit *test, const void *x); + +/** + * kunit_kstrdup() - Duplicates a string into a test managed allocation. + * + * @test: The test context object. + * @str: The NULL-terminated string to duplicate. + * @gfp: flags passed to underlying kmalloc(). + * + * See kstrdup() and kunit_kmalloc_array() for more information. + */ +static inline char *kunit_kstrdup(struct kunit *test, const char *str, gfp_t gfp) +{ + size_t len; + char *buf; + + if (!str) + return NULL; + + len = strlen(str) + 1; + buf = kunit_kmalloc(test, len, gfp); + if (buf) + memcpy(buf, str, len); + return buf; +} + +/** + * kunit_kstrdup_const() - Conditionally duplicates a string into a test managed allocation. + * + * @test: The test context object. + * @str: The NULL-terminated string to duplicate. + * @gfp: flags passed to underlying kmalloc(). + * + * Calls kunit_kstrdup() only if @str is not in the rodata section. Must be freed with + * kunit_kfree_const() -- not kunit_kfree(). + * See kstrdup_const() and kunit_kmalloc_array() for more information. + */ +const char *kunit_kstrdup_const(struct kunit *test, const char *str, gfp_t gfp); + /** * kunit_vm_mmap() - Allocate KUnit-tracked vm_mmap() area * @test: The test context object. diff --git a/lib/kunit/device.c b/lib/kunit/device.c index 25c81ed465fb..520c1fccee8a 100644 --- a/lib/kunit/device.c +++ b/lib/kunit/device.c @@ -89,7 +89,7 @@ struct device_driver *kunit_driver_create(struct kunit *test, const char *name) if (!driver) return ERR_PTR(err); - driver->name = name; + driver->name = kunit_kstrdup_const(test, name, GFP_KERNEL); driver->bus = &kunit_bus_type; driver->owner = THIS_MODULE; @@ -192,8 +192,11 @@ void kunit_device_unregister(struct kunit *test, struct device *dev) const struct device_driver *driver = to_kunit_device(dev)->driver; kunit_release_action(test, device_unregister_wrapper, dev); - if (driver) + if (driver) { + const char *driver_name = driver->name; kunit_release_action(test, driver_unregister_wrapper, (void *)driver); + kunit_kfree_const(test, driver_name); + } } EXPORT_SYMBOL_GPL(kunit_device_unregister); diff --git a/lib/kunit/test.c b/lib/kunit/test.c index e8b1b52a19ab..089c832e3cdb 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -874,6 +874,25 @@ void kunit_kfree(struct kunit *test, const void *ptr) } EXPORT_SYMBOL_GPL(kunit_kfree); +void kunit_kfree_const(struct kunit *test, const void *x) +{ +#if !IS_MODULE(CONFIG_KUNIT) + if (!is_kernel_rodata((unsigned long)x)) +#endif + kunit_kfree(test, x); +} +EXPORT_SYMBOL_GPL(kunit_kfree_const); + +const char *kunit_kstrdup_const(struct kunit *test, const char *str, gfp_t gfp) +{ +#if !IS_MODULE(CONFIG_KUNIT) + if (is_kernel_rodata((unsigned long)str)) + return str; +#endif + return kunit_kstrdup(test, str, gfp); +} +EXPORT_SYMBOL_GPL(kunit_kstrdup_const); + void kunit_cleanup(struct kunit *test) { struct kunit_resource *res; -- cgit From 4186c8d9e6af57bab0687b299df10ebd47534a0a Mon Sep 17 00:00:00 2001 From: Jacky Chou Date: Thu, 22 Aug 2024 15:30:06 +0800 Subject: net: ftgmac100: Ensure tx descriptor updates are visible The driver must ensure TX descriptor updates are visible before updating TX pointer and TX clear pointer. This resolves TX hangs observed on AST2600 when running iperf3. Signed-off-by: Jacky Chou Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index fddfd1dd5070..4c546c3aef0f 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -572,7 +572,7 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed) (*processed)++; return true; - drop: +drop: /* Clean rxdes0 (which resets own bit) */ rxdes->rxdes0 = cpu_to_le32(status & priv->rxdes0_edorr_mask); priv->rx_pointer = ftgmac100_next_rx_pointer(priv, pointer); @@ -656,6 +656,11 @@ static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv) ftgmac100_free_tx_packet(priv, pointer, skb, txdes, ctl_stat); txdes->txdes0 = cpu_to_le32(ctl_stat & priv->txdes0_edotr_mask); + /* Ensure the descriptor config is visible before setting the tx + * pointer. + */ + smp_wmb(); + priv->tx_clean_pointer = ftgmac100_next_tx_pointer(priv, pointer); return true; @@ -809,6 +814,11 @@ static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb, dma_wmb(); first->txdes0 = cpu_to_le32(f_ctl_stat); + /* Ensure the descriptor config is visible before setting the tx + * pointer. + */ + smp_wmb(); + /* Update next TX pointer */ priv->tx_pointer = pointer; @@ -829,7 +839,7 @@ static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; - dma_err: +dma_err: if (net_ratelimit()) netdev_err(netdev, "map tx fragment failed\n"); @@ -851,7 +861,7 @@ static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb, * last fragment, so we know ftgmac100_free_tx_packet() * hasn't freed the skb yet. */ - drop: +drop: /* Drop the packet */ dev_kfree_skb_any(skb); netdev->stats.tx_dropped++; @@ -1344,7 +1354,7 @@ static void ftgmac100_reset(struct ftgmac100 *priv) ftgmac100_init_all(priv, true); netdev_dbg(netdev, "Reset done !\n"); - bail: +bail: if (priv->mii_bus) mutex_unlock(&priv->mii_bus->mdio_lock); if (netdev->phydev) @@ -1543,15 +1553,15 @@ static int ftgmac100_open(struct net_device *netdev) return 0; - err_ncsi: +err_ncsi: napi_disable(&priv->napi); netif_stop_queue(netdev); - err_alloc: +err_alloc: ftgmac100_free_buffers(priv); free_irq(netdev->irq, netdev); - err_irq: +err_irq: netif_napi_del(&priv->napi); - err_hw: +err_hw: iowrite32(0, priv->base + FTGMAC100_OFFSET_IER); ftgmac100_free_rings(priv); return err; -- cgit From 33f58a0480bb9e2479ccdf556f61363723a50d47 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 21 Aug 2024 01:19:57 +0200 Subject: btrfs: initialize last_extent_end to fix -Wmaybe-uninitialized warning in extent_fiemap() There's a warning (probably on some older compiler version): fs/btrfs/fiemap.c: warning: 'last_extent_end' may be used uninitialized in this function [-Wmaybe-uninitialized]: => 822:19 Initialize the variable to 0 although it's not necessary as it's either properly set or not used after an error. The called function is in the same file so this is a false alert but we want to fix all -Wmaybe-uninitialized reports. Link: https://lore.kernel.org/all/20240819070639.2558629-1-geert@linux-m68k.org/ Reported-by: Geert Uytterhoeven Signed-off-by: David Sterba --- fs/btrfs/fiemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/fiemap.c b/fs/btrfs/fiemap.c index 8f95f3e44e99..df7f09f3b02e 100644 --- a/fs/btrfs/fiemap.c +++ b/fs/btrfs/fiemap.c @@ -637,7 +637,7 @@ static int extent_fiemap(struct btrfs_inode *inode, struct btrfs_path *path; struct fiemap_cache cache = { 0 }; struct btrfs_backref_share_check_ctx *backref_ctx; - u64 last_extent_end; + u64 last_extent_end = 0; u64 prev_extent_end; u64 range_start; u64 range_end; -- cgit From 274ea3563e5ab9f468c15bfb9d2492803a66d9be Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 26 Aug 2024 23:11:32 +0800 Subject: LoongArch: Define ARCH_IRQ_INIT_FLAGS as IRQ_NOPROBE Currently we call irq_set_noprobe() in a loop for all IRQs, but indeed it only works for IRQs below NR_IRQS_LEGACY because at init_IRQ() only legacy interrupts have been allocated. Instead, we can define ARCH_IRQ_INIT_FLAGS as IRQ_NOPROBE in asm/hwirq.h and the core will automatically set the flag for all interrupts. Reviewed-by: Thomas Gleixner Signed-off-by: Huacai Chen Signed-off-by: Tianyang Zhang --- arch/loongarch/include/asm/hw_irq.h | 2 ++ arch/loongarch/kernel/irq.c | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/include/asm/hw_irq.h b/arch/loongarch/include/asm/hw_irq.h index af4f4e8fbd85..8156ffb67415 100644 --- a/arch/loongarch/include/asm/hw_irq.h +++ b/arch/loongarch/include/asm/hw_irq.h @@ -9,6 +9,8 @@ extern atomic_t irq_err_count; +#define ARCH_IRQ_INIT_FLAGS IRQ_NOPROBE + /* * interrupt-retrigger: NOP for now. This may not be appropriate for all * machines, we'll see ... diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index f4991c03514f..adac8fcbb2ac 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -102,9 +102,6 @@ void __init init_IRQ(void) mp_ops.init_ipi(); #endif - for (i = 0; i < NR_IRQS; i++) - irq_set_noprobe(i); - for_each_possible_cpu(i) { page = alloc_pages_node(cpu_to_node(i), GFP_KERNEL, order); -- cgit From 80376323e2b6a4559f86b2b4d864848ac25cb054 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 26 Aug 2024 23:11:32 +0800 Subject: LoongArch: Add ifdefs to fix LSX and LASX related warnings There exist some warnings when building kernel if CONFIG_CPU_HAS_LBT is set but CONFIG_CPU_HAS_LSX and CONFIG_CPU_HAS_LASX are not set. In this case, there are no definitions of _restore_lsx & _restore_lasx and there are also no definitions of kvm_restore_lsx & kvm_restore_lasx in fpu.S and switch.S respectively, just add some ifdefs to fix these warnings. AS arch/loongarch/kernel/fpu.o arch/loongarch/kernel/fpu.o: warning: objtool: unexpected relocation symbol type in .rela.discard.func_stack_frame_non_standard: 0 arch/loongarch/kernel/fpu.o: warning: objtool: unexpected relocation symbol type in .rela.discard.func_stack_frame_non_standard: 0 AS [M] arch/loongarch/kvm/switch.o arch/loongarch/kvm/switch.o: warning: objtool: unexpected relocation symbol type in .rela.discard.func_stack_frame_non_standard: 0 arch/loongarch/kvm/switch.o: warning: objtool: unexpected relocation symbol type in .rela.discard.func_stack_frame_non_standard: 0 MODPOST Module.symvers ERROR: modpost: "kvm_restore_lsx" [arch/loongarch/kvm/kvm.ko] undefined! ERROR: modpost: "kvm_restore_lasx" [arch/loongarch/kvm/kvm.ko] undefined! Cc: stable@vger.kernel.org # 6.9+ Fixes: cb8a2ef0848c ("LoongArch: Add ORC stack unwinder support") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408120955.qls5oNQY-lkp@intel.com/ Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/fpu.S | 4 ++++ arch/loongarch/kvm/switch.S | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 69a85f2479fb..6ab640101457 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -530,6 +530,10 @@ SYM_FUNC_END(_restore_lasx_context) #ifdef CONFIG_CPU_HAS_LBT STACK_FRAME_NON_STANDARD _restore_fp +#ifdef CONFIG_CPU_HAS_LSX STACK_FRAME_NON_STANDARD _restore_lsx +#endif +#ifdef CONFIG_CPU_HAS_LASX STACK_FRAME_NON_STANDARD _restore_lasx #endif +#endif diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index 80e988985a6a..0c292f818492 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -277,6 +277,10 @@ SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest) #ifdef CONFIG_CPU_HAS_LBT STACK_FRAME_NON_STANDARD kvm_restore_fpu +#ifdef CONFIG_CPU_HAS_LSX STACK_FRAME_NON_STANDARD kvm_restore_lsx +#endif +#ifdef CONFIG_CPU_HAS_LASX STACK_FRAME_NON_STANDARD kvm_restore_lasx #endif +#endif -- cgit From 4956e07f05e239b274d042618a250c9fa3e92629 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 26 Aug 2024 23:11:32 +0800 Subject: LoongArch: KVM: Invalidate guest steal time address on vCPU reset If ParaVirt steal time feature is enabled, there is a percpu gpa address passed from guest vCPU and host modifies guest memory space with this gpa address. When vCPU is reset normally, it will notify host and invalidate gpa address. However if VM is crashed and VMM reboots VM forcely, the vCPU reboot notification callback will not be called in VM. Host needs invalidate the gpa address, else host will modify guest memory during VM reboots. Here it is invalidated from the vCPU KVM_REG_LOONGARCH_VCPU_RESET ioctl interface. Also funciton kvm_reset_timer() is removed at vCPU reset stage, since SW emulated timer is only used in vCPU block state. When a vCPU is removed from the block waiting queue, kvm_restore_timer() is called and SW timer is cancelled. And the timer register is also cleared at VMM when a vCPU is reset. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_vcpu.h | 1 - arch/loongarch/kvm/timer.c | 7 ------- arch/loongarch/kvm/vcpu.c | 2 +- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index c416cb7125c0..86570084e05a 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -76,7 +76,6 @@ static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { } #endif void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz); -void kvm_reset_timer(struct kvm_vcpu *vcpu); void kvm_save_timer(struct kvm_vcpu *vcpu); void kvm_restore_timer(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index bcc6b6d063d9..74a4b5c272d6 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -188,10 +188,3 @@ void kvm_save_timer(struct kvm_vcpu *vcpu) kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ESTAT); preempt_enable(); } - -void kvm_reset_timer(struct kvm_vcpu *vcpu) -{ - write_gcsr_timercfg(0); - kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TCFG, 0); - hrtimer_cancel(&vcpu->arch.swtimer); -} diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 16756ffb55e8..6905283f535b 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -647,7 +647,7 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, vcpu->kvm->arch.time_offset = (signed long)(v - drdtime()); break; case KVM_REG_LOONGARCH_VCPU_RESET: - kvm_reset_timer(vcpu); + vcpu->arch.st.guest_addr = 0; memset(&vcpu->arch.irq_pending, 0, sizeof(vcpu->arch.irq_pending)); memset(&vcpu->arch.irq_clear, 0, sizeof(vcpu->arch.irq_clear)); break; -- cgit From 3e9b4021fedf92c11233ae1a8615327d0cbbecd5 Mon Sep 17 00:00:00 2001 From: Daniel Gabay Date: Fri, 23 Aug 2024 10:55:46 +0200 Subject: wifi: mac80211: fix beacon SSID mismatch handling Return false when memcmp with zero_ssid returns 0 to correctly handle hidden SSIDs case. Fixes: 9cc88678db5b ("wifi: mac80211: check SSID in beacon") Reviewed-by: Andrei Otcheretianski Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Daniel Gabay Link: https://patch.msgid.link/20240823105546.7ab29ae287a6.I7f98e57e1ab6597614703fdd138cc88ad253d986@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4779a18ab75d..f9526bbc3633 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -6664,7 +6664,7 @@ static bool ieee80211_mgd_ssid_mismatch(struct ieee80211_sub_if_data *sdata, return true; /* hidden SSID: zeroed out */ - if (memcmp(elems->ssid, zero_ssid, elems->ssid_len)) + if (!memcmp(elems->ssid, zero_ssid, elems->ssid_len)) return false; return memcmp(elems->ssid, cfg->ssid, cfg->ssid_len); -- cgit From cb347bd29d0d106213a0cf4f86b72dffd08d3454 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 25 Aug 2024 19:17:02 +0300 Subject: wifi: iwlwifi: mvm: fix hibernation Fast resume is a feature that was recently introduced to speed up the resume time. It basically keeps the firmware alive while the system is suspended and that avoids starting again the whole device. This flow can't work for hibernation, since when the system boots, before the frozen image is loaded, the kernel may touch the device. As a result, we can't assume the device is in the exact same state as before the hibernation. Detect that we are resuming from hibernation through the PCI device and forbid the fast resume flow. We also need to shut down the device cleanly when that happens. In addition, in case the device is power gated during S3, we won't be able to keep the device alive. Detect this situation with BE200 at least with the help of the CSR_FUNC_SCRATCH register and reset the device upon resume if it was power gated during S3. Fixes: e8bb19c1d590 ("wifi: iwlwifi: support fast resume") Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240825191257.24eb3b19e74f.I3837810318dbef0a0a773cf4c4fcf89cdc6fdbd3@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h | 12 +++++++ drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 10 ++++++ drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 17 +++++++++- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 41 ++++++++++++++++++++++-- 4 files changed, 76 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h b/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h index 595fa6ddf084..8ef5ed2db051 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h @@ -85,6 +85,10 @@ struct iwl_cfg; * May sleep * @wimax_active: invoked when WiMax becomes active. May sleep * @time_point: called when transport layer wants to collect debug data + * @device_powered_off: called upon resume from hibernation but not only. + * Op_mode needs to reset its internal state because the device did not + * survive the system state transition. The firmware is no longer running, + * etc... */ struct iwl_op_mode_ops { struct iwl_op_mode *(*start)(struct iwl_trans *trans, @@ -107,6 +111,7 @@ struct iwl_op_mode_ops { void (*time_point)(struct iwl_op_mode *op_mode, enum iwl_fw_ini_time_point tp_id, union iwl_dbg_tlv_tp_data *tp_data); + void (*device_powered_off)(struct iwl_op_mode *op_mode); }; int iwl_opmode_register(const char *name, const struct iwl_op_mode_ops *ops); @@ -204,4 +209,11 @@ static inline void iwl_op_mode_time_point(struct iwl_op_mode *op_mode, op_mode->ops->time_point(op_mode, tp_id, tp_data); } +static inline void iwl_op_mode_device_powered_off(struct iwl_op_mode *op_mode) +{ + if (!op_mode || !op_mode->ops || !op_mode->ops->device_powered_off) + return; + op_mode->ops->device_powered_off(op_mode); +} + #endif /* __iwl_op_mode_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index b4d650583ac2..99a541d442bb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -3439,6 +3439,16 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) mutex_lock(&mvm->mutex); + /* Apparently, the device went away and device_powered_off() was called, + * don't even try to read the rt_status, the device is currently + * inaccessible. + */ + if (!test_bit(IWL_MVM_STATUS_IN_D3, &mvm->status)) { + IWL_INFO(mvm, + "Can't resume, device_powered_off() was called during wowlan\n"); + goto err; + } + mvm->last_reset_or_resume_time_jiffies = jiffies; /* get the BSS vif pointer again */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index b7dcae76a05d..75fc60a4808c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -2090,6 +2090,20 @@ static void iwl_op_mode_mvm_time_point(struct iwl_op_mode *op_mode, iwl_dbg_tlv_time_point(&mvm->fwrt, tp_id, tp_data); } +static void iwl_op_mode_mvm_device_powered_off(struct iwl_op_mode *op_mode) +{ + struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); + + mutex_lock(&mvm->mutex); + clear_bit(IWL_MVM_STATUS_IN_D3, &mvm->status); + mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED; + iwl_mvm_stop_device(mvm); +#ifdef CONFIG_PM + mvm->fast_resume = false; +#endif + mutex_unlock(&mvm->mutex); +} + #define IWL_MVM_COMMON_OPS \ /* these could be differentiated */ \ .queue_full = iwl_mvm_stop_sw_queue, \ @@ -2102,7 +2116,8 @@ static void iwl_op_mode_mvm_time_point(struct iwl_op_mode *op_mode, /* as we only register one, these MUST be common! */ \ .start = iwl_op_mode_mvm_start, \ .stop = iwl_op_mode_mvm_stop, \ - .time_point = iwl_op_mode_mvm_time_point + .time_point = iwl_op_mode_mvm_time_point, \ + .device_powered_off = iwl_op_mode_mvm_device_powered_off static const struct iwl_op_mode_ops iwl_mvm_ops = { IWL_MVM_COMMON_OPS, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 9ad43464b702..84fd93278450 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1577,11 +1577,12 @@ static int iwl_pci_suspend(struct device *device) return 0; } -static int iwl_pci_resume(struct device *device) +static int _iwl_pci_resume(struct device *device, bool restore) { struct pci_dev *pdev = to_pci_dev(device); struct iwl_trans *trans = pci_get_drvdata(pdev); struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + bool device_was_powered_off = false; /* Before you put code here, think about WoWLAN. You cannot check here * whether WoWLAN is enabled or not, and your code will run even if @@ -1597,6 +1598,26 @@ static int iwl_pci_resume(struct device *device) if (!trans->op_mode) return 0; + /* + * Scratch value was altered, this means the device was powered off, we + * need to reset it completely. + * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan, + * so assume that any bits there mean that the device is usable. + */ + if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ && + !iwl_read32(trans, CSR_FUNC_SCRATCH)) + device_was_powered_off = true; + + if (restore || device_was_powered_off) { + trans->state = IWL_TRANS_NO_FW; + /* Hope for the best here ... If one of those steps fails we + * won't really know how to recover. + */ + iwl_pcie_prepare_card_hw(trans); + iwl_finish_nic_init(trans); + iwl_op_mode_device_powered_off(trans->op_mode); + } + /* In WOWLAN, let iwl_trans_pcie_d3_resume do the rest of the work */ if (test_bit(STATUS_DEVICE_ENABLED, &trans->status)) return 0; @@ -1617,9 +1638,23 @@ static int iwl_pci_resume(struct device *device) return 0; } +static int iwl_pci_restore(struct device *device) +{ + return _iwl_pci_resume(device, true); +} + +static int iwl_pci_resume(struct device *device) +{ + return _iwl_pci_resume(device, false); +} + static const struct dev_pm_ops iwl_dev_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(iwl_pci_suspend, - iwl_pci_resume) + .suspend = pm_sleep_ptr(iwl_pci_suspend), + .resume = pm_sleep_ptr(iwl_pci_resume), + .freeze = pm_sleep_ptr(iwl_pci_suspend), + .thaw = pm_sleep_ptr(iwl_pci_resume), + .poweroff = pm_sleep_ptr(iwl_pci_suspend), + .restore = pm_sleep_ptr(iwl_pci_restore), }; #define IWL_PM_OPS (&iwl_dev_pm_ops) -- cgit From f8a129c1e10256c785164ed5efa5d17d45fbd81b Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Sun, 25 Aug 2024 19:17:13 +0300 Subject: wifi: iwlwifi: lower message level for FW buffer destination An invalid buffer destination is not a problem for the driver and it does not make sense to report it with the KERN_ERR message level. As such, change the message to use IWL_DEBUG_FW. Reported-by: Len Brown Closes: https://lore.kernel.org/r/CAJvTdKkcxJss=DM2sxgv_MR5BeZ4_OC-3ad6tA40TYH2yqHCWw@mail.gmail.com Signed-off-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240825191257.20abf78f05bc.Ifbcecc2ae9fb40b9698302507dcba8b922c8d856@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c index e63efbf809f0..ae93a72542b2 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c @@ -89,7 +89,8 @@ iwl_pcie_ctxt_info_dbg_enable(struct iwl_trans *trans, } break; default: - IWL_ERR(trans, "WRT: Invalid buffer destination\n"); + IWL_DEBUG_FW(trans, "WRT: Invalid buffer destination (%d)\n", + le32_to_cpu(fw_mon_cfg->buf_location)); } out: if (dbg_flags) -- cgit From d44162280899c3fc2c6700e21e491e71c3c96e3d Mon Sep 17 00:00:00 2001 From: Daniel Gabay Date: Sun, 25 Aug 2024 19:17:05 +0300 Subject: wifi: iwlwifi: mvm: fix iwl_mvm_scan_fits() calculation The calculation should consider also the 6GHz IE's len, fix that. In addition, in iwl_mvm_sched_scan_start() the scan_fits helper is called only in case non_psc_incldued is true, but it should be called regardless, fix that as well. Signed-off-by: Daniel Gabay Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240825191257.7db825442fd2.I99f4d6587709de02072fd57957ec7472331c6b1d@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index 8e0df31f1b3e..ecd9d301e88b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -837,8 +837,8 @@ static inline bool iwl_mvm_scan_fits(struct iwl_mvm *mvm, int n_ssids, return ((n_ssids <= PROBE_OPTION_MAX) && (n_channels <= mvm->fw->ucode_capa.n_scan_channels) & (ies->common_ie_len + - ies->len[NL80211_BAND_2GHZ] + - ies->len[NL80211_BAND_5GHZ] <= + ies->len[NL80211_BAND_2GHZ] + ies->len[NL80211_BAND_5GHZ] + + ies->len[NL80211_BAND_6GHZ] <= iwl_mvm_max_scan_ie_fw_cmd_room(mvm))); } @@ -3168,18 +3168,16 @@ int iwl_mvm_sched_scan_start(struct iwl_mvm *mvm, params.n_channels = j; } - if (non_psc_included && - !iwl_mvm_scan_fits(mvm, req->n_ssids, ies, params.n_channels)) { - kfree(params.channels); - return -ENOBUFS; + if (!iwl_mvm_scan_fits(mvm, req->n_ssids, ies, params.n_channels)) { + ret = -ENOBUFS; + goto out; } uid = iwl_mvm_build_scan_cmd(mvm, vif, &hcmd, ¶ms, type); - - if (non_psc_included) - kfree(params.channels); - if (uid < 0) - return uid; + if (uid < 0) { + ret = uid; + goto out; + } ret = iwl_mvm_send_cmd(mvm, &hcmd); if (!ret) { @@ -3197,6 +3195,9 @@ int iwl_mvm_sched_scan_start(struct iwl_mvm *mvm, mvm->sched_scan_pass_all = SCHED_SCAN_PASS_ALL_DISABLED; } +out: + if (non_psc_included) + kfree(params.channels); return ret; } -- cgit From 916a5d9c5354c426220a0a6533a5e8ea1287d6ea Mon Sep 17 00:00:00 2001 From: Daniel Gabay Date: Sun, 25 Aug 2024 19:17:06 +0300 Subject: wifi: iwlwifi: mvm: fix iwl_mvm_max_scan_ie_fw_cmd_room() Driver creates also the WFA TPC element, consider that in the calculation. Signed-off-by: Daniel Gabay Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240825191257.e710ce446b7f.I2715c6742e9c3d160e2ba41bc4b35de370d2ce34@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index ecd9d301e88b..bae6aec8295c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -48,6 +48,8 @@ /* Number of iterations on the channel for mei filtered scan */ #define IWL_MEI_SCAN_NUM_ITER 5U +#define WFA_TPC_IE_LEN 9 + struct iwl_mvm_scan_timing_params { u32 suspend_time; u32 max_out_time; @@ -303,8 +305,8 @@ static int iwl_mvm_max_scan_ie_fw_cmd_room(struct iwl_mvm *mvm) max_probe_len = SCAN_OFFLOAD_PROBE_REQ_SIZE; - /* we create the 802.11 header and SSID element */ - max_probe_len -= 24 + 2; + /* we create the 802.11 header SSID element and WFA TPC element */ + max_probe_len -= 24 + 2 + WFA_TPC_IE_LEN; /* DS parameter set element is added on 2.4GHZ band if required */ if (iwl_mvm_rrm_scan_needed(mvm)) @@ -731,8 +733,6 @@ static u8 *iwl_mvm_copy_and_insert_ds_elem(struct iwl_mvm *mvm, const u8 *ies, return newpos; } -#define WFA_TPC_IE_LEN 9 - static void iwl_mvm_add_tpc_report_ie(u8 *pos) { pos[0] = WLAN_EID_VENDOR_SPECIFIC; -- cgit From cd6f46c2fdb82e80ca248549c1f3ebe08b4a63ab Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 25 Aug 2024 19:17:07 +0300 Subject: wifi: iwlwifi: mvm: take the mutex before running link selection iwl_mvm_select_links is called by the link selection worker and it requires the mutex. Take it in the link selection worker. This logic used to run from iwl_mvm_rx_umac_scan_complete_notif which had the mvm->mutex held. This was changed to run in a worker holding the wiphy mutex, but we also need the mvm->mutex. Fixes: 2e194efa3809 ("wifi: iwlwifi: mvm: Fix race in scan completion") Signed-off-by: Emmanuel Grumbach Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240825191257.0cacecd5db1e.Iaca38a078592b69bdd06549daf63408ccf1810e4@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 75fc60a4808c..f7ff8b02def4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1198,10 +1198,12 @@ static void iwl_mvm_trig_link_selection(struct wiphy *wiphy, struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, trig_link_selection_wk); + mutex_lock(&mvm->mutex); ieee80211_iterate_active_interfaces(mvm->hw, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_find_link_selection_vif, NULL); + mutex_unlock(&mvm->mutex); } static struct iwl_op_mode * -- cgit From 3ee22f07a35b76939c5b8d17d6af292f5fafb509 Mon Sep 17 00:00:00 2001 From: Anjaneyulu Date: Sun, 25 Aug 2024 19:17:08 +0300 Subject: wifi: iwlwifi: fw: fix wgds rev 3 exact size Check size of WGDS revision 3 is equal to 8 entries size with some header, but doesn't depend on the number of used entries. Check that used entries are between min and max but allow more to be present than are used to fix operation with some BIOSes that have such data. Fixes: 97f8a3d1610b ("iwlwifi: ACPI: support revision 3 WGDS tables") Signed-off-by: Anjaneyulu Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240825191257.cc71dfc67ec3.Ic27ee15ac6128b275c210b6de88f2145bd83ca7b@changeid [edit commit message] Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 79774c8c7ff4..8c8880b44827 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -725,22 +725,25 @@ int iwl_acpi_get_wgds_table(struct iwl_fw_runtime *fwrt) entry = &wifi_pkg->package.elements[entry_idx]; entry_idx++; if (entry->type != ACPI_TYPE_INTEGER || - entry->integer.value > num_profiles) { + entry->integer.value > num_profiles || + entry->integer.value < + rev_data[idx].min_profiles) { ret = -EINVAL; goto out_free; } - num_profiles = entry->integer.value; /* - * this also validates >= min_profiles since we - * otherwise wouldn't have gotten the data when - * looking up in ACPI + * Check to see if we received package count + * same as max # of profiles */ if (wifi_pkg->package.count != hdr_size + profile_size * num_profiles) { ret = -EINVAL; goto out_free; } + + /* Number of valid profiles */ + num_profiles = entry->integer.value; } goto read_table; } -- cgit From 0668ebc8c2282ca1e7eb96092a347baefffb5fe7 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 25 Aug 2024 19:17:10 +0300 Subject: wifi: iwlwifi: mvm: pause TCM when the firmware is stopped Not doing so will make us send a host command to the transport while the firmware is not alive, which will trigger a WARNING. bad state = 0 WARNING: CPU: 2 PID: 17434 at drivers/net/wireless/intel/iwlwifi/iwl-trans.c:115 iwl_trans_send_cmd+0x1cb/0x1e0 [iwlwifi] RIP: 0010:iwl_trans_send_cmd+0x1cb/0x1e0 [iwlwifi] Call Trace: iwl_mvm_send_cmd+0x40/0xc0 [iwlmvm] iwl_mvm_config_scan+0x198/0x260 [iwlmvm] iwl_mvm_recalc_tcm+0x730/0x11d0 [iwlmvm] iwl_mvm_tcm_work+0x1d/0x30 [iwlmvm] process_one_work+0x29e/0x640 worker_thread+0x2df/0x690 ? rescuer_thread+0x540/0x540 kthread+0x192/0x1e0 ? set_kthread_struct+0x90/0x90 ret_from_fork+0x22/0x30 Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240825191257.5abe71ca1b6b.I97a968cb8be1f24f94652d9b110ecbf6af73f89e@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index f7ff8b02def4..b9daaffd9c7f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1513,6 +1513,8 @@ void iwl_mvm_stop_device(struct iwl_mvm *mvm) clear_bit(IWL_MVM_STATUS_FIRMWARE_RUNNING, &mvm->status); + iwl_mvm_pause_tcm(mvm, false); + iwl_fw_dbg_stop_sync(&mvm->fwrt); iwl_trans_stop_device(mvm->trans); iwl_free_fw_paging(&mvm->fwrt); -- cgit From 454f6306a31248cf972f5f16d4c145ad5b33bfdc Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Sun, 25 Aug 2024 19:17:12 +0300 Subject: wifi: iwlwifi: mvm: allow 6 GHz channels in MLO scan MLO internal scan may include 6 GHz channels. Since the 6 GHz scan indication is not set, the channel flags are set incorrectly, which leads to a firmware assert. Since the MLO scan may include 6 GHz and non 6 GHz channels in one request, add support for non-PSC 6 GHz channels (PSC channels are already supported) when the 6 GHz indication is not set. Fixes: 38b3998dfba3 ("wifi: iwlwifi: mvm: Introduce internal MLO passive scan") Signed-off-by: Avraham Stern Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240825191257.04807f8213b2.Idd09d4366df92a74853649c1a520b7f0f752d1ac@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index bae6aec8295c..1cc9c426bb15 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1659,6 +1659,17 @@ iwl_mvm_umac_scan_cfg_channels_v7(struct iwl_mvm *mvm, cfg->v2.channel_num = channels[i]->hw_value; if (cfg80211_channel_is_psc(channels[i])) cfg->flags = 0; + + if (band == NL80211_BAND_6GHZ) { + /* 6 GHz channels should only appear in a scan request + * that has scan_6ghz set. The only exception is MLO + * scan, which has to be passive. + */ + WARN_ON_ONCE(cfg->flags != 0); + cfg->flags = + cpu_to_le32(IWL_UHB_CHAN_CFG_FLAG_FORCE_PASSIVE); + } + cfg->v2.iter_count = 1; cfg->v2.iter_interval = 0; if (version < 17) -- cgit From 3a84454f5204718ca5b4ad2c1f0bf2031e2403d1 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 25 Aug 2024 19:17:04 +0300 Subject: wifi: iwlwifi: mvm: don't wait for tx queues if firmware is dead There is a WARNING in iwl_trans_wait_tx_queues_empty() (that was recently converted from just a message), that can be hit if we wait for TX queues to become empty after firmware died. Clearly, we can't expect anything from the firmware after it's declared dead. Don't call iwl_trans_wait_tx_queues_empty() in this case. While it could be a good idea to stop the flow earlier, the flush functions do some maintenance work that is not related to the firmware, so keep that part of the code running even when the firmware is not running. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240825191257.a7cbd794cee9.I44a739fbd4ffcc46b83844dd1c7b2eb0c7b270f6@changeid [edit commit message] Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 835a05b91833..625ccf566e1c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -5818,6 +5818,10 @@ static void iwl_mvm_flush_no_vif(struct iwl_mvm *mvm, u32 queues, bool drop) int i; if (!iwl_mvm_has_new_tx_api(mvm)) { + /* we can't ask the firmware anything if it is dead */ + if (test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, + &mvm->status)) + return; if (drop) { guard(mvm)(mvm); iwl_mvm_flush_tx_path(mvm, @@ -5911,8 +5915,11 @@ void iwl_mvm_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, /* this can take a while, and we may need/want other operations * to succeed while doing this, so do it without the mutex held + * If the firmware is dead, this can't work... */ - if (!drop && !iwl_mvm_has_new_tx_api(mvm)) + if (!drop && !iwl_mvm_has_new_tx_api(mvm) && + !test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, + &mvm->status)) iwl_trans_wait_tx_queues_empty(mvm->trans, msk); } -- cgit From 786c5be9ac29a39b6f37f1fdd2ea59d0fe35d525 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 5 Aug 2024 17:20:35 +0300 Subject: wifi: mac80211: free skb on error path in ieee80211_beacon_get_ap() In 'ieee80211_beacon_get_ap()', free allocated skb in case of error returned by 'ieee80211_beacon_protect()'. Compile tested only. Signed-off-by: Dmitry Antipov Link: https://patch.msgid.link/20240805142035.227847-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index edba4a31844f..bca7b341dd77 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5348,8 +5348,10 @@ ieee80211_beacon_get_ap(struct ieee80211_hw *hw, if (beacon->tail) skb_put_data(skb, beacon->tail, beacon->tail_len); - if (ieee80211_beacon_protect(skb, local, sdata, link) < 0) + if (ieee80211_beacon_protect(skb, local, sdata, link) < 0) { + dev_kfree_skb(skb); return NULL; + } ieee80211_beacon_get_finish(hw, vif, link, offs, beacon, skb, chanctx_conf, csa_off_base); -- cgit From f25d1b5f1be13a6de341b1d26e0cf4275e5908d2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 17 Aug 2024 11:33:27 -0400 Subject: MAINTAINERS: Update Olga Kornievskaia's email address Signed-off-by: Chuck Lever --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 93238316b6c0..174d65623a8b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11974,7 +11974,7 @@ KERNEL NFSD, SUNRPC, AND LOCKD SERVERS M: Chuck Lever M: Jeff Layton R: Neil Brown -R: Olga Kornievskaia +R: Olga Kornievskaia R: Dai Ngo R: Tom Talpey L: linux-nfs@vger.kernel.org -- cgit From da05ba23d4c8d3e8a45846b952e53dd76c4b5e36 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 23 Aug 2024 18:27:38 -0400 Subject: nfsd: hold reference to delegation when updating it for cb_getattr Once we've dropped the flc_lock, there is nothing that ensures that the delegation that was found will still be around later. Take a reference to it while holding the lock and then drop it when we've finished with the delegation. Fixes: c5967721e106 ("NFSD: handle GETATTR conflict with write delegation") Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index dafff707e23a..19d39872be32 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -8837,7 +8837,6 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode, struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file_lock_context *ctx; struct file_lease *fl; - struct nfs4_delegation *dp; struct iattr attrs; struct nfs4_cb_fattr *ncf; @@ -8862,7 +8861,8 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode, goto break_lease; } if (type == F_WRLCK) { - dp = fl->c.flc_owner; + struct nfs4_delegation *dp = fl->c.flc_owner; + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) { spin_unlock(&ctx->flc_lock); return 0; @@ -8870,6 +8870,7 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode, break_lease: nfsd_stats_wdeleg_getattr_inc(nn); dp = fl->c.flc_owner; + refcount_inc(&dp->dl_stid.sc_count); ncf = &dp->dl_cb_fattr; nfs4_cb_getattr(&dp->dl_cb_fattr); spin_unlock(&ctx->flc_lock); @@ -8879,8 +8880,10 @@ break_lease: /* Recall delegation only if client didn't respond */ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); if (status != nfserr_jukebox || - !nfsd_wait_for_delegreturn(rqstp, inode)) + !nfsd_wait_for_delegreturn(rqstp, inode)) { + nfs4_put_stid(&dp->dl_stid); return status; + } } if (!ncf->ncf_file_modified && (ncf->ncf_initial_cinfo != ncf->ncf_cb_change || @@ -8900,6 +8903,7 @@ break_lease: *size = ncf->ncf_cur_fsize; *modified = true; } + nfs4_put_stid(&dp->dl_stid); return 0; } break; -- cgit From 1116e0e372eb16dd907ec571ce5d4af325c55c10 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 23 Aug 2024 18:27:39 -0400 Subject: nfsd: fix potential UAF in nfsd4_cb_getattr_release Once we drop the delegation reference, the fields embedded in it are no longer safe to access. Do that last. Fixes: c5967721e106 ("NFSD: handle GETATTR conflict with write delegation") Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 19d39872be32..02d43f95146e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3078,9 +3078,9 @@ nfsd4_cb_getattr_release(struct nfsd4_callback *cb) struct nfs4_delegation *dp = container_of(ncf, struct nfs4_delegation, dl_cb_fattr); - nfs4_put_stid(&dp->dl_stid); clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags); wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY); + nfs4_put_stid(&dp->dl_stid); } static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { -- cgit From b6fb565a2d15277896583d471b21bc14a0c99661 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 26 Aug 2024 15:53:04 +0300 Subject: x86/tdx: Fix data leak in mmio_read() The mmio_read() function makes a TDVMCALL to retrieve MMIO data for an address from the VMM. Sean noticed that mmio_read() unintentionally exposes the value of an initialized variable (val) on the stack to the VMM. This variable is only needed as an output value. It did not need to be passed to the VMM in the first place. Do not send the original value of *val to the VMM. [ dhansen: clarify what 'val' is used for. ] Fixes: 31d58c4e557d ("x86/tdx: Handle in-kernel MMIO") Reported-by: Sean Christopherson Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20240826125304.1566719-1-kirill.shutemov%40linux.intel.com --- arch/x86/coco/tdx/tdx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 078e2bac2553..da8b66dce0da 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -389,7 +389,6 @@ static bool mmio_read(int size, unsigned long addr, unsigned long *val) .r12 = size, .r13 = EPT_READ, .r14 = addr, - .r15 = *val, }; if (__tdx_hypercall(&args)) -- cgit From c6a09e342f8e6d3cac7f7c5c14085236aca284b9 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 25 Aug 2024 20:27:45 -0700 Subject: binfmt_elf_fdpic: fix AUXV size calculation when ELF_HWCAP2 is defined create_elf_fdpic_tables() does not correctly account the space for the AUX vector when an architecture has ELF_HWCAP2 defined. Prior to the commit 10e29251be0e ("binfmt_elf_fdpic: fix /proc//auxv") it resulted in the last entry of the AUX vector being set to zero, but with that change it results in a kernel BUG. Fix that by adding one to the number of AUXV entries (nitems) when ELF_HWCAP2 is defined. Fixes: 10e29251be0e ("binfmt_elf_fdpic: fix /proc//auxv") Cc: stable@vger.kernel.org Reported-by: Greg Ungerer Closes: https://lore.kernel.org/lkml/5b51975f-6d0b-413c-8b38-39a6a45e8821@westnet.com.au/ Signed-off-by: Max Filippov Tested-by: Greg Ungerer Link: https://lore.kernel.org/r/20240826032745.3423812-1-jcmvbkbc@gmail.com Signed-off-by: Kees Cook --- fs/binfmt_elf_fdpic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 28a3439f163a..4fe5bb9f1b1f 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -589,6 +589,9 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, if (bprm->have_execfd) nitems++; +#ifdef ELF_HWCAP2 + nitems++; +#endif csp = sp; sp -= nitems * 2 * sizeof(unsigned long); -- cgit From a699781c79ecf6cfe67fb00a0331b4088c7c8466 Mon Sep 17 00:00:00 2001 From: Jamie Bainbridge Date: Fri, 23 Aug 2024 16:26:58 +1000 Subject: ethtool: check device is present when getting link settings A sysfs reader can race with a device reset or removal, attempting to read device state when the device is not actually present. eg: [exception RIP: qed_get_current_link+17] #8 [ffffb9e4f2907c48] qede_get_link_ksettings at ffffffffc07a994a [qede] #9 [ffffb9e4f2907cd8] __rh_call_get_link_ksettings at ffffffff992b01a3 #10 [ffffb9e4f2907d38] __ethtool_get_link_ksettings at ffffffff992b04e4 #11 [ffffb9e4f2907d90] duplex_show at ffffffff99260300 #12 [ffffb9e4f2907e38] dev_attr_show at ffffffff9905a01c #13 [ffffb9e4f2907e50] sysfs_kf_seq_show at ffffffff98e0145b #14 [ffffb9e4f2907e68] seq_read at ffffffff98d902e3 #15 [ffffb9e4f2907ec8] vfs_read at ffffffff98d657d1 #16 [ffffb9e4f2907f00] ksys_read at ffffffff98d65c3f #17 [ffffb9e4f2907f38] do_syscall_64 at ffffffff98a052fb crash> struct net_device.state ffff9a9d21336000 state = 5, state 5 is __LINK_STATE_START (0b1) and __LINK_STATE_NOCARRIER (0b100). The device is not present, note lack of __LINK_STATE_PRESENT (0b10). This is the same sort of panic as observed in commit 4224cfd7fb65 ("net-sysfs: add check for netdevice being present to speed_show"). There are many other callers of __ethtool_get_link_ksettings() which don't have a device presence check. Move this check into ethtool to protect all callers. Fixes: d519e17e2d01 ("net: export device speed and duplex via sysfs") Fixes: 4224cfd7fb65 ("net-sysfs: add check for netdevice being present to speed_show") Signed-off-by: Jamie Bainbridge Link: https://patch.msgid.link/8bae218864beaa44ed01628140475b9bf641c5b0.1724393671.git.jamie.bainbridge@gmail.com Signed-off-by: Jakub Kicinski --- net/core/net-sysfs.c | 2 +- net/ethtool/ioctl.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 0e2084ce7b75..444f23e74f8e 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -235,7 +235,7 @@ static ssize_t speed_show(struct device *dev, if (!rtnl_trylock()) return restart_syscall(); - if (netif_running(netdev) && netif_device_present(netdev)) { + if (netif_running(netdev)) { struct ethtool_link_ksettings cmd; if (!__ethtool_get_link_ksettings(netdev, &cmd)) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index e18823bf2330..ae041f51cd2d 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -442,6 +442,9 @@ int __ethtool_get_link_ksettings(struct net_device *dev, if (!dev->ethtool_ops->get_link_ksettings) return -EOPNOTSUPP; + if (!netif_device_present(dev)) + return -ENODEV; + memset(link_ksettings, 0, sizeof(*link_ksettings)); return dev->ethtool_ops->get_link_ksettings(dev, link_ksettings); } -- cgit From 284b75a3d83c7631586d98f6dede1d90f128f0db Mon Sep 17 00:00:00 2001 From: Zheng Qixing Date: Thu, 22 Aug 2024 11:30:50 +0800 Subject: ata: libata: Fix memory leak for error path in ata_host_alloc() In ata_host_alloc(), if devres_alloc() fails to allocate the device host resource data pointer, the already allocated ata_host structure is not freed before returning from the function. This results in a potential memory leak. Call kfree(host) before jumping to the error handling path to ensure that the ata_host structure is properly freed if devres_alloc() fails. Fixes: 2623c7a5f279 ("libata: add refcounting to ata_host") Cc: stable@vger.kernel.org Signed-off-by: Zheng Qixing Reviewed-by: Yu Kuai Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c7752dc80028..30932552437a 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5593,8 +5593,10 @@ struct ata_host *ata_host_alloc(struct device *dev, int n_ports) } dr = devres_alloc(ata_devres_release, 0, GFP_KERNEL); - if (!dr) + if (!dr) { + kfree(host); goto err_out; + } devres_add(dev, dr); dev_set_drvdata(dev, host); -- cgit From 0903b9e2a46cb6252a13d6b19d0502da9be191cf Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 22 Aug 2024 22:03:59 -0700 Subject: rust: alloc: eschew `Box>::write` Upstream Rust's libs-api team has consensus for stabilizing some of `feature(new_uninit)`, but not for `Box>::write`. Instead, we can use `MaybeUninit::write`, so Rust for Linux can drop the feature after stabilization. That will happen after merging, as the FCP has completed [1]. This is required before stabilization because remaining-unstable API will be divided into new features. This code doesn't know about those yet. It can't: they haven't landed, as the relevant PR is blocked on rustc's CI testing Rust-for-Linux without this patch. [ The PR has landed [2] and will be released in Rust 1.82.0 (expected on 2024-10-17), so we could conditionally enable the new unstable feature (`box_uninit_write` [3]) instead, but just for a single `unsafe` block it is probably not worth it. For the time being, I added it to the "nice to have" section of our unstable features list. - Miguel ] Link: https://github.com/rust-lang/rust/issues/63291#issuecomment-2183022955 [1] Link: https://github.com/rust-lang/rust/pull/129416 [2] Link: https://github.com/rust-lang/rust/issues/129397 [3] Signed-off-by: Jubilee Young Reviewed-by: Alice Ryhl Reviewed-by: Trevor Gross [ Reworded slightly. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/alloc/box_ext.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rust/kernel/alloc/box_ext.rs b/rust/kernel/alloc/box_ext.rs index 829cb1c1cf9e..9f1c1c489189 100644 --- a/rust/kernel/alloc/box_ext.rs +++ b/rust/kernel/alloc/box_ext.rs @@ -21,8 +21,10 @@ pub trait BoxExt: Sized { impl BoxExt for Box { fn new(x: T, flags: Flags) -> Result { - let b = >::new_uninit(flags)?; - Ok(Box::write(b, x)) + let mut b = >::new_uninit(flags)?; + b.write(x); + // SAFETY: We just wrote to it. + Ok(unsafe { b.assume_init() }) } #[cfg(any(test, testlib))] -- cgit From e846be0fba85603d2ad6fc8db6810958d7b6bed1 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Fri, 23 Aug 2024 17:34:12 +0530 Subject: net: ti: icssg-prueth: Fix 10M Link issue on AM64x Crash is seen on AM64x 10M link when connecting / disconnecting multiple times. The fix for this is to enable quirk_10m_link_issue for AM64x. Fixes: b256e13378a9 ("net: ti: icssg-prueth: Add AM64x icssg support") Signed-off-by: MD Danish Anwar Reviewed-by: Roger Quadros Link: https://patch.msgid.link/20240823120412.1262536-1-danishanwar@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 3e51b3a9b0a5..e3451beed323 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1452,6 +1452,7 @@ static const struct prueth_pdata am654_icssg_pdata = { static const struct prueth_pdata am64x_icssg_pdata = { .fdqring_mode = K3_RINGACC_RING_MODE_RING, + .quirk_10m_link_issue = 1, .switch_mode = 1, }; -- cgit From 7e8ae8486e4471513e2111aba6ac29f2357bed2a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 26 Aug 2024 10:32:34 -0400 Subject: fs/nfsd: fix update of inode attrs in CB_GETATTR Currently, we copy the mtime and ctime to the in-core inode and then mark the inode dirty. This is fine for certain types of filesystems, but not all. Some require a real setattr to properly change these values (e.g. ceph or reexported NFS). Fix this code to call notify_change() instead, which is the proper way to effect a setattr. There is one problem though: In this case, the client is holding a write delegation and has sent us attributes to update our cache. We don't want to break the delegation for this since that would defeat the purpose. Add a new ATTR_DELEG flag that makes notify_change bypass the try_break_deleg call. Fixes: c5967721e106 ("NFSD: handle GETATTR conflict with write delegation") Reviewed-by: Christian Brauner Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/attr.c | 14 +++++++++++--- fs/nfsd/nfs4state.c | 18 +++++++++++++----- fs/nfsd/nfs4xdr.c | 2 +- fs/nfsd/state.h | 2 +- include/linux/fs.h | 1 + 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/fs/attr.c b/fs/attr.c index 960a310581eb..0dbf43b6555c 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -489,9 +489,17 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry, error = security_inode_setattr(idmap, dentry, attr); if (error) return error; - error = try_break_deleg(inode, delegated_inode); - if (error) - return error; + + /* + * If ATTR_DELEG is set, then these attributes are being set on + * behalf of the holder of a write delegation. We want to avoid + * breaking the delegation in this case. + */ + if (!(ia_valid & ATTR_DELEG)) { + error = try_break_deleg(inode, delegated_inode); + if (error) + return error; + } if (inode->i_op->setattr) error = inode->i_op->setattr(idmap, dentry, attr); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 02d43f95146e..07f2496850c4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -8815,7 +8815,7 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, /** * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict * @rqstp: RPC transaction context - * @inode: file to be checked for a conflict + * @dentry: dentry of inode to be checked for a conflict * @modified: return true if file was modified * @size: new size of file if modified is true * @@ -8830,7 +8830,7 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, * code is returned. */ __be32 -nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode, +nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, bool *modified, u64 *size) { __be32 status; @@ -8839,6 +8839,7 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode, struct file_lease *fl; struct iattr attrs; struct nfs4_cb_fattr *ncf; + struct inode *inode = d_inode(dentry); *modified = false; ctx = locks_inode_context(inode); @@ -8890,15 +8891,22 @@ break_lease: ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) ncf->ncf_file_modified = true; if (ncf->ncf_file_modified) { + int err; + /* * Per section 10.4.3 of RFC 8881, the server would * not update the file's metadata with the client's * modified size */ attrs.ia_mtime = attrs.ia_ctime = current_time(inode); - attrs.ia_valid = ATTR_MTIME | ATTR_CTIME; - setattr_copy(&nop_mnt_idmap, inode, &attrs); - mark_inode_dirty(inode); + attrs.ia_valid = ATTR_MTIME | ATTR_CTIME | ATTR_DELEG; + inode_lock(inode); + err = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL); + inode_unlock(inode); + if (err) { + nfs4_put_stid(&dp->dl_stid); + return nfserrno(err); + } ncf->ncf_cur_fsize = ncf->ncf_cb_fsize; *size = ncf->ncf_cur_fsize; *modified = true; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 43ccf6119cf1..97f583777972 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3565,7 +3565,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, } args.size = 0; if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { - status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry), + status = nfsd4_deleg_getattr_conflict(rqstp, dentry, &file_modified, &size); if (status) goto out; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index ffc217099d19..ec4559ecd193 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -781,5 +781,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) } extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, - struct inode *inode, bool *file_modified, u64 *size); + struct dentry *dentry, bool *file_modified, u64 *size); #endif /* NFSD4_STATE_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 0283cf366c2a..bafc1d134b94 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -208,6 +208,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ #define ATTR_TIMES_SET (1 << 16) #define ATTR_TOUCH (1 << 17) +#define ATTR_DELEG (1 << 18) /* Delegated attrs. Don't break write delegations */ /* * Whiteout is represented by a char device. The following constants define the -- cgit From b49420d6a1aeb399e5b107fc6eb8584d0860fbd7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 21 Aug 2024 15:11:35 -0400 Subject: video/aperture: optionally match the device in sysfb_disable() In aperture_remove_conflicting_pci_devices(), we currently only call sysfb_disable() on vga class devices. This leads to the following problem when the pimary device is not VGA compatible: 1. A PCI device with a non-VGA class is the boot display 2. That device is probed first and it is not a VGA device so sysfb_disable() is not called, but the device resources are freed by aperture_detach_platform_device() 3. Non-primary GPU has a VGA class and it ends up calling sysfb_disable() 4. NULL pointer dereference via sysfb_disable() since the resources have already been freed by aperture_detach_platform_device() when it was called by the other device. Fix this by passing a device pointer to sysfb_disable() and checking the device to determine if we should execute it or not. v2: Fix build when CONFIG_SCREEN_INFO is not set v3: Move device check into the mutex Drop primary variable in aperture_remove_conflicting_pci_devices() Drop __init on pci sysfb_pci_dev_is_enabled() Fixes: 5ae3716cfdcd ("video/aperture: Only remove sysfb on the default vga pci device") Cc: Javier Martinez Canillas Cc: Thomas Zimmermann Cc: Helge Deller Cc: Sam Ravnborg Cc: Daniel Vetter Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20240821191135.829765-1-alexander.deucher@amd.com --- drivers/firmware/sysfb.c | 19 +++++++++++++------ drivers/of/platform.c | 2 +- drivers/video/aperture.c | 11 +++-------- include/linux/sysfb.h | 4 ++-- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 921f61507ae8..02a07d3d0d40 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -39,6 +39,8 @@ static struct platform_device *pd; static DEFINE_MUTEX(disable_lock); static bool disabled; +static struct device *sysfb_parent_dev(const struct screen_info *si); + static bool sysfb_unregister(void) { if (IS_ERR_OR_NULL(pd)) @@ -52,6 +54,7 @@ static bool sysfb_unregister(void) /** * sysfb_disable() - disable the Generic System Framebuffers support + * @dev: the device to check if non-NULL * * This disables the registration of system framebuffer devices that match the * generic drivers that make use of the system framebuffer set up by firmware. @@ -61,17 +64,21 @@ static bool sysfb_unregister(void) * Context: The function can sleep. A @disable_lock mutex is acquired to serialize * against sysfb_init(), that registers a system framebuffer device. */ -void sysfb_disable(void) +void sysfb_disable(struct device *dev) { + struct screen_info *si = &screen_info; + mutex_lock(&disable_lock); - sysfb_unregister(); - disabled = true; + if (!dev || dev == sysfb_parent_dev(si)) { + sysfb_unregister(); + disabled = true; + } mutex_unlock(&disable_lock); } EXPORT_SYMBOL_GPL(sysfb_disable); #if defined(CONFIG_PCI) -static __init bool sysfb_pci_dev_is_enabled(struct pci_dev *pdev) +static bool sysfb_pci_dev_is_enabled(struct pci_dev *pdev) { /* * TODO: Try to integrate this code into the PCI subsystem @@ -87,13 +94,13 @@ static __init bool sysfb_pci_dev_is_enabled(struct pci_dev *pdev) return true; } #else -static __init bool sysfb_pci_dev_is_enabled(struct pci_dev *pdev) +static bool sysfb_pci_dev_is_enabled(struct pci_dev *pdev) { return false; } #endif -static __init struct device *sysfb_parent_dev(const struct screen_info *si) +static struct device *sysfb_parent_dev(const struct screen_info *si) { struct pci_dev *pdev; diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 389d4ea6bfc1..ef622d41eb5b 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -592,7 +592,7 @@ static int __init of_platform_default_populate_init(void) * This can happen for example on DT systems that do EFI * booting and may provide a GOP handle to the EFI stub. */ - sysfb_disable(); + sysfb_disable(NULL); of_platform_device_create(node, NULL, NULL); of_node_put(node); } diff --git a/drivers/video/aperture.c b/drivers/video/aperture.c index 561be8feca96..2b5a1e666e9b 100644 --- a/drivers/video/aperture.c +++ b/drivers/video/aperture.c @@ -293,7 +293,7 @@ int aperture_remove_conflicting_devices(resource_size_t base, resource_size_t si * ask for this, so let's assume that a real driver for the display * was already probed and prevent sysfb to register devices later. */ - sysfb_disable(); + sysfb_disable(NULL); aperture_detach_devices(base, size); @@ -346,15 +346,10 @@ EXPORT_SYMBOL(__aperture_remove_legacy_vga_devices); */ int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *name) { - bool primary = false; resource_size_t base, size; int bar, ret = 0; - if (pdev == vga_default_device()) - primary = true; - - if (primary) - sysfb_disable(); + sysfb_disable(&pdev->dev); for (bar = 0; bar < PCI_STD_NUM_BARS; ++bar) { if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) @@ -370,7 +365,7 @@ int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *na * that consumes the VGA framebuffer I/O range. Remove this * device as well. */ - if (primary) + if (pdev == vga_default_device()) ret = __aperture_remove_legacy_vga_devices(pdev); return ret; diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index c9cb657dad08..bef5f06a91de 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -58,11 +58,11 @@ struct efifb_dmi_info { #ifdef CONFIG_SYSFB -void sysfb_disable(void); +void sysfb_disable(struct device *dev); #else /* CONFIG_SYSFB */ -static inline void sysfb_disable(void) +static inline void sysfb_disable(struct device *dev) { } -- cgit From 10d9d8c3512f16cad47b2ff81ec6fc4b27d8ee10 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 17 Aug 2024 18:34:30 +0930 Subject: btrfs: fix a use-after-free when hitting errors inside btrfs_submit_chunk() [BUG] There is an internal report that KASAN is reporting use-after-free, with the following backtrace: BUG: KASAN: slab-use-after-free in btrfs_check_read_bio+0xa68/0xb70 [btrfs] Read of size 4 at addr ffff8881117cec28 by task kworker/u16:2/45 CPU: 1 UID: 0 PID: 45 Comm: kworker/u16:2 Not tainted 6.11.0-rc2-next-20240805-default+ #76 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 Workqueue: btrfs-endio btrfs_end_bio_work [btrfs] Call Trace: dump_stack_lvl+0x61/0x80 print_address_description.constprop.0+0x5e/0x2f0 print_report+0x118/0x216 kasan_report+0x11d/0x1f0 btrfs_check_read_bio+0xa68/0xb70 [btrfs] process_one_work+0xce0/0x12a0 worker_thread+0x717/0x1250 kthread+0x2e3/0x3c0 ret_from_fork+0x2d/0x70 ret_from_fork_asm+0x11/0x20 Allocated by task 20917: kasan_save_stack+0x37/0x60 kasan_save_track+0x10/0x30 __kasan_slab_alloc+0x7d/0x80 kmem_cache_alloc_noprof+0x16e/0x3e0 mempool_alloc_noprof+0x12e/0x310 bio_alloc_bioset+0x3f0/0x7a0 btrfs_bio_alloc+0x2e/0x50 [btrfs] submit_extent_page+0x4d1/0xdb0 [btrfs] btrfs_do_readpage+0x8b4/0x12a0 [btrfs] btrfs_readahead+0x29a/0x430 [btrfs] read_pages+0x1a7/0xc60 page_cache_ra_unbounded+0x2ad/0x560 filemap_get_pages+0x629/0xa20 filemap_read+0x335/0xbf0 vfs_read+0x790/0xcb0 ksys_read+0xfd/0x1d0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Freed by task 20917: kasan_save_stack+0x37/0x60 kasan_save_track+0x10/0x30 kasan_save_free_info+0x37/0x50 __kasan_slab_free+0x4b/0x60 kmem_cache_free+0x214/0x5d0 bio_free+0xed/0x180 end_bbio_data_read+0x1cc/0x580 [btrfs] btrfs_submit_chunk+0x98d/0x1880 [btrfs] btrfs_submit_bio+0x33/0x70 [btrfs] submit_one_bio+0xd4/0x130 [btrfs] submit_extent_page+0x3ea/0xdb0 [btrfs] btrfs_do_readpage+0x8b4/0x12a0 [btrfs] btrfs_readahead+0x29a/0x430 [btrfs] read_pages+0x1a7/0xc60 page_cache_ra_unbounded+0x2ad/0x560 filemap_get_pages+0x629/0xa20 filemap_read+0x335/0xbf0 vfs_read+0x790/0xcb0 ksys_read+0xfd/0x1d0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 [CAUSE] Although I cannot reproduce the error, the report itself is good enough to pin down the cause. The call trace is the regular endio workqueue context, but the free-by-task trace is showing that during btrfs_submit_chunk() we already hit a critical error, and is calling btrfs_bio_end_io() to error out. And the original endio function called bio_put() to free the whole bio. This means a double freeing thus causing use-after-free, e.g.: 1. Enter btrfs_submit_bio() with a read bio The read bio length is 128K, crossing two 64K stripes. 2. The first run of btrfs_submit_chunk() 2.1 Call btrfs_map_block(), which returns 64K 2.2 Call btrfs_split_bio() Now there are two bios, one referring to the first 64K, the other referring to the second 64K. 2.3 The first half is submitted. 3. The second run of btrfs_submit_chunk() 3.1 Call btrfs_map_block(), which by somehow failed Now we call btrfs_bio_end_io() to handle the error 3.2 btrfs_bio_end_io() calls the original endio function Which is end_bbio_data_read(), and it calls bio_put() for the original bio. Now the original bio is freed. 4. The submitted first 64K bio finished Now we call into btrfs_check_read_bio() and tries to advance the bio iter. But since the original bio (thus its iter) is already freed, we trigger the above use-after free. And even if the memory is not poisoned/corrupted, we will later call the original endio function, causing a double freeing. [FIX] Instead of calling btrfs_bio_end_io(), call btrfs_orig_bbio_end_io(), which has the extra check on split bios and do the proper refcounting for cloned bios. Furthermore there is already one extra btrfs_cleanup_bio() call, but that is duplicated to btrfs_orig_bbio_end_io() call, so remove that label completely. Reported-by: David Sterba Fixes: 852eee62d31a ("btrfs: allow btrfs_submit_bio to split bios") CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/bio.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index f04d93109960..b4e31ae17cd9 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -668,7 +668,6 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) { struct btrfs_inode *inode = bbio->inode; struct btrfs_fs_info *fs_info = bbio->fs_info; - struct btrfs_bio *orig_bbio = bbio; struct bio *bio = &bbio->bio; u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT; u64 length = bio->bi_iter.bi_size; @@ -706,7 +705,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) bbio->saved_iter = bio->bi_iter; ret = btrfs_lookup_bio_sums(bbio); if (ret) - goto fail_put_bio; + goto fail; } if (btrfs_op(bio) == BTRFS_MAP_WRITE) { @@ -740,13 +739,13 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) ret = btrfs_bio_csum(bbio); if (ret) - goto fail_put_bio; + goto fail; } else if (use_append || (btrfs_is_zoned(fs_info) && inode && inode->flags & BTRFS_INODE_NODATASUM)) { ret = btrfs_alloc_dummy_sum(bbio); if (ret) - goto fail_put_bio; + goto fail; } } @@ -754,12 +753,23 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) done: return map_length == length; -fail_put_bio: - if (map_length < length) - btrfs_cleanup_bio(bbio); fail: btrfs_bio_counter_dec(fs_info); - btrfs_bio_end_io(orig_bbio, ret); + /* + * We have split the original bbio, now we have to end both the current + * @bbio and remaining one, as the remaining one will never be submitted. + */ + if (map_length < length) { + struct btrfs_bio *remaining = bbio->private; + + ASSERT(bbio->bio.bi_pool == &btrfs_clone_bioset); + ASSERT(remaining); + + remaining->bio.bi_status = ret; + btrfs_orig_bbio_end_io(remaining); + } + bbio->bio.bi_status = ret; + btrfs_orig_bbio_end_io(bbio); /* Do not submit another chunk */ return true; } -- cgit From 66927b89289974dab6d3b3cdd7706d0376034114 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 26 Aug 2024 15:11:38 -0400 Subject: bcachefs: Fix failure to return error in data_update_index_update() This fixes an assertion pop in io_write.c - if we don't return an error we're supposed to have completed all the btree updates. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 65176d51b502..004894ad4147 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -337,6 +337,7 @@ restart_drop_extra_replicas: printbuf_exit(&buf); bch2_fatal_error(c); + ret = -EIO; goto out; } -- cgit From d26935690c03fe8159d42358bed1c56252700cd1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 26 Aug 2024 19:11:00 -0400 Subject: bcachefs: Fix bch2_extents_match() false positive This was caught as a very rare nonce inconsistency, on systems with encryption and replication (and tiering, or some form of rebalance operation running): [Wed Jul 17 13:30:03 2024] about to insert invalid key in data update path [Wed Jul 17 13:30:03 2024] old: u64s 10 type extent 671283510:6392:U32_MAX len 16 ver 106595503: durability: 2 crc: c_size 8 size 16 offset 0 nonce 0 csum chacha20_poly1305_80 compress zstd ptr: 3:355968:104 gen 7 ptr: 4:513244:48 gen 6 rebalance: target hdd compression zstd [Wed Jul 17 13:30:03 2024] k: u64s 10 type extent 671283510:6400:U32_MAX len 16 ver 106595508: durability: 2 crc: c_size 8 size 16 offset 0 nonce 0 csum chacha20_poly1305_80 compress zstd ptr: 3:355968:112 gen 7 ptr: 4:513244:56 gen 6 rebalance: target hdd compression zstd [Wed Jul 17 13:30:03 2024] new: u64s 14 type extent 671283510:6392:U32_MAX len 8 ver 106595508: durability: 2 crc: c_size 8 size 16 offset 0 nonce 0 csum chacha20_poly1305_80 compress zstd ptr: 3:355968:112 gen 7 cached ptr: 4:513244:56 gen 6 cached rebalance: target hdd compression zstd crc: c_size 8 size 16 offset 8 nonce 0 csum chacha20_poly1305_80 compress zstd ptr: 1:10860085:32 gen 0 ptr: 0:17285918:408 gen 0 [Wed Jul 17 13:30:03 2024] bcachefs (cca5bc65-fe77-409d-a9fa-465a6e7f4eae): fatal error - emergency read only bch2_extents_match() was reporting true for extents that did not actually point to the same data. bch2_extent_match() iterates over pairs of pointers, looking for pointers that point to the same location on disk (with matching generation numbers). However one or both extents may have been trimmed (or merged) and they might not have the same disk offset: it corrects for this by subtracting the key offset and the checksum entry offset. However, this failed when an extent was immediately partially overwritten, and the new overwrite was allocated the next adjacent disk space. Normally, with compression off, this would never cause a bug, since the new extent would have to be immediately after the old extent for the pointer offsets to match, and the rebalance index update path is not looking for an extent outside the range of the extent it moved. However with compression enabled, extents take up less space on disk than they do in the btree index space - and spuriously matching after partial overwrite is possible. To fix this, add a secondary check, that strictly checks that the regions pointed to on disk overlap. https://github.com/koverstreet/bcachefs/issues/717 Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index e317df3644a1..eb31bda19544 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -929,8 +929,29 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2) bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2) if (p1.ptr.dev == p2.ptr.dev && p1.ptr.gen == p2.ptr.gen && + + /* + * This checks that the two pointers point + * to the same region on disk - adjusting + * for the difference in where the extents + * start, since one may have been trimmed: + */ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) == - (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k)) + (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k) && + + /* + * This additionally checks that the + * extents overlap on disk, since the + * previous check may trigger spuriously + * when one extent is immediately partially + * overwritten with another extent (so that + * on disk they are adjacent) and + * compression is in use: + */ + ((p1.ptr.offset >= p2.ptr.offset && + p1.ptr.offset < p2.ptr.offset + p2.crc.compressed_size) || + (p2.ptr.offset >= p1.ptr.offset && + p2.ptr.offset < p1.ptr.offset + p1.crc.compressed_size))) return true; return false; -- cgit From 7bbc079531fc38d401e1c4088d4981435a8828e3 Mon Sep 17 00:00:00 2001 From: Cosmo Chou Date: Mon, 19 Aug 2024 18:46:30 +0800 Subject: hwmon: (pt5161l) Fix invalid temperature reading The temperature reading function was using a signed long for the ADC code, which could lead to mishandling of invalid codes on 32-bit platforms. This allowed out-of-range ADC codes to be incorrectly interpreted as valid values and used in temperature calculations. Change adc_code to u32 to ensure that invalid ADC codes are correctly identified on all platforms. Fixes: 1b2ca93cd059 ("hwmon: Add driver for Astera Labs PT5161L retimer") Signed-off-by: Cosmo Chou Message-ID: <20240819104630.2375441-1-chou.cosmo@gmail.com> Signed-off-by: Guenter Roeck --- drivers/hwmon/pt5161l.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/pt5161l.c b/drivers/hwmon/pt5161l.c index b0d58a26d499..a9f0b23f9e76 100644 --- a/drivers/hwmon/pt5161l.c +++ b/drivers/hwmon/pt5161l.c @@ -427,7 +427,7 @@ static int pt5161l_read(struct device *dev, enum hwmon_sensor_types type, struct pt5161l_data *data = dev_get_drvdata(dev); int ret; u8 buf[8]; - long adc_code; + u32 adc_code; switch (attr) { case hwmon_temp_input: @@ -449,7 +449,7 @@ static int pt5161l_read(struct device *dev, enum hwmon_sensor_types type, adc_code = buf[3] << 24 | buf[2] << 16 | buf[1] << 8 | buf[0]; if (adc_code == 0 || adc_code >= 0x3ff) { - dev_dbg(dev, "Invalid adc_code %lx\n", adc_code); + dev_dbg(dev, "Invalid adc_code %x\n", adc_code); return -EIO; } -- cgit From 9a471de516c35219d1722c13367191ce1f120fe9 Mon Sep 17 00:00:00 2001 From: ZHANG Yuntian Date: Sat, 3 Aug 2024 15:46:07 +0800 Subject: USB: serial: option: add MeiG Smart SRM825L Add support for MeiG Smart SRM825L which is based on Qualcomm 315 chip. T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2dee ProdID=4d22 Rev= 4.14 S: Manufacturer=MEIG S: Product=LTE-A Module S: SerialNumber=6f345e48 C:* #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=896mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=05(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=88(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms Signed-off-by: ZHANG Yuntian Link: https://lore.kernel.org/0041DFA5200EFB1B+20240803074619.563116-1-yt@radxa.com/ Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 311040f9b935..176f38750ad5 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -619,6 +619,8 @@ static void option_instat_callback(struct urb *urb); /* MeiG Smart Technology products */ #define MEIGSMART_VENDOR_ID 0x2dee +/* MeiG Smart SRM825L based on Qualcomm 315 */ +#define MEIGSMART_PRODUCT_SRM825L 0x4d22 /* MeiG Smart SLM320 based on UNISOC UIS8910 */ #define MEIGSMART_PRODUCT_SLM320 0x4d41 @@ -2366,6 +2368,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SLM320, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x60) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- cgit From 6d30bb88f623526197c0e18a366e68a4254a2c83 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 23 Aug 2024 15:15:20 +0200 Subject: wifi: wfx: repair open network AP mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RSN IE missing in beacon is normal in open networks. Avoid returning -EINVAL in this case. Steps to reproduce: $ cat /etc/wpa_supplicant.conf network={ ssid="testNet" mode=2 key_mgmt=NONE } $ wpa_supplicant -iwlan0 -c /etc/wpa_supplicant.conf nl80211: Beacon set failed: -22 (Invalid argument) Failed to set beacon parameters Interface initialization failed wlan0: interface state UNINITIALIZED->DISABLED wlan0: AP-DISABLED wlan0: Unable to setup interface. Failed to initialize AP interface After the change: $ wpa_supplicant -iwlan0 -c /etc/wpa_supplicant.conf Successfully initialized wpa_supplicant wlan0: interface state UNINITIALIZED->ENABLED wlan0: AP-ENABLED Cc: stable@vger.kernel.org Fixes: fe0a7776d4d1 ("wifi: wfx: fix possible NULL pointer dereference in wfx_set_mfp_ap()") Signed-off-by: Alexander Sverdlin Reviewed-by: Jérôme Pouiller Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240823131521.3309073-1-alexander.sverdlin@siemens.com --- drivers/net/wireless/silabs/wfx/sta.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/silabs/wfx/sta.c b/drivers/net/wireless/silabs/wfx/sta.c index 216d43c8bd6e..7c04810dbf3d 100644 --- a/drivers/net/wireless/silabs/wfx/sta.c +++ b/drivers/net/wireless/silabs/wfx/sta.c @@ -352,8 +352,11 @@ static int wfx_set_mfp_ap(struct wfx_vif *wvif) ptr = (u16 *)cfg80211_find_ie(WLAN_EID_RSN, skb->data + ieoffset, skb->len - ieoffset); - if (unlikely(!ptr)) + if (!ptr) { + /* No RSN IE is fine in open networks */ + ret = 0; goto free_skb; + } ptr += pairwise_cipher_suite_count_offset; if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) -- cgit From 094513f8a2fbddee51b055d8035f995551f98fce Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 25 Aug 2024 19:17:01 +0300 Subject: wifi: iwlwifi: clear trans->state earlier upon error When the firmware crashes, we first told the op_mode and only then, changed the transport's state. This is a problem if the op_mode's nic_error() handler needs to send a host command: it'll see that the transport's state still reflects that the firmware is alive. Today, this has no consequences since we set the STATUS_FW_ERROR bit and that will prevent sending host commands. iwl_fw_dbg_stop_restart_recording looks at this bit to know not to send a host command for example. To fix the hibernation, we needed to reset the firmware without having an error and checking STATUS_FW_ERROR to see whether the firmware is alive will no longer hold, so this change is necessary as well. Change the flow a bit. Change trans->state before calling the op_mode's nic_error() method and check trans->state instead of STATUS_FW_ERROR. This will keep the current behavior of iwl_fw_dbg_stop_restart_recording upon firmware error, and it'll allow us to call iwl_fw_dbg_stop_restart_recording safely even if STATUS_FW_ERROR is clear, but yet, the firmware is not alive. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240825191257.9d7427fbdfd7.Ia056ca57029a382c921d6f7b6a6b28fc480f2f22@changeid [I missed this was a dependency for the hibernation fix, changed the commit message a bit accordingly] Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 2 +- drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index fa57df336785..fb2ea38e89ac 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -3348,7 +3348,7 @@ void iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt, { int ret __maybe_unused = 0; - if (test_bit(STATUS_FW_ERROR, &fwrt->trans->status)) + if (!iwl_trans_fw_running(fwrt->trans)) return; if (fw_has_capa(&fwrt->fw->ucode_capa, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 6148acbac6af..0ef48effeefb 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -1128,8 +1128,8 @@ static inline void iwl_trans_fw_error(struct iwl_trans *trans, bool sync) /* prevent double restarts due to the same erroneous FW */ if (!test_and_set_bit(STATUS_FW_ERROR, &trans->status)) { - iwl_op_mode_nic_error(trans->op_mode, sync); trans->state = IWL_TRANS_NO_FW; + iwl_op_mode_nic_error(trans->op_mode, sync); } } -- cgit From 7d058e6bac9afab6a406e34344ebbfd3068bb2d5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 23 Aug 2024 09:50:55 +0200 Subject: drm/i915/dsi: Make Lenovo Yoga Tab 3 X90F DMI match less strict There are 2G and 4G RAM versions of the Lenovo Yoga Tab 3 X90F and it turns out that the 2G version has a DMI product name of "CHERRYVIEW D1 PLATFORM" where as the 4G version has "CHERRYVIEW C0 PLATFORM". The sys-vendor + product-version check are unique enough that the product-name check is not necessary. Drop the product-name check so that the existing DMI match for the 4G RAM version also matches the 2G RAM version. Fixes: f6f4a0862bde ("drm/i915/vlv_dsi: Add DMI quirk for backlight control issues on Lenovo Yoga Tab 3 (v2)") Cc: stable@vger.kernel.org Acked-by: Jani Nikula Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20240823075055.17198-1-hdegoede@redhat.com (cherry picked from commit a4dbe45c4c14edc316ae94b9af86a28f8c5d8123) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/vlv_dsi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index eae5b5e09aa8..931d2cf74ed8 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1870,7 +1870,6 @@ static const struct dmi_system_id vlv_dsi_dmi_quirk_table[] = { /* Lenovo Yoga Tab 3 Pro YT3-X90F */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"), }, .driver_data = (void *)vlv_dsi_lenovo_yoga_tab3_backlight_fixup, -- cgit From 2955ae8186c8a6f029e429f7890e0c7e5f6e215e Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 1 Aug 2024 20:10:51 -0700 Subject: drm/i915: ARL requires a newer GSC firmware ARL and MTL share a single GSC firmware blob. However, ARL requires a newer version of it. So add differentiate of the PCI ids for ARL from MTL and create ARL as a sub-platform of MTL. That way, all the existing workarounds and such still treat ARL as MTL exactly as before. However, now the GSC code can check for ARL and do an extra version check on the firmware before committing to it. Also, the version extraction code has various ways of failing but the return code was being ignore and so the firmware load would attempt to continue anyway. Fix that by propagating the return code to the next level out. Signed-off-by: John Harrison Fixes: 213c43676beb ("drm/i915/mtl: Remove the 'force_probe' requirement for Meteor Lake") Reviewed-by: Daniele Ceraolo Spurio Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240802031051.3816392-1-John.C.Harrison@Intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 67733d7a71503fd3e32eeada371f8aa2516c5c95) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 31 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++++++++-- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/intel_device_info.c | 7 +++++++ drivers/gpu/drm/i915/intel_device_info.h | 3 +++ include/drm/intel/i915_pciids.h | 11 +++++++---- 6 files changed, 58 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index 3b69bc6616bd..551b0d7974ff 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -212,6 +212,37 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s } } + if (IS_ARROWLAKE(gt->i915)) { + bool too_old = false; + + /* + * ARL requires a newer firmware than MTL did (102.0.10.1878) but the + * firmware is actually common. So, need to do an explicit version check + * here rather than using a separate table entry. And if the older + * MTL-only version is found, then just don't use GSC rather than aborting + * the driver load. + */ + if (gsc->release.major < 102) { + too_old = true; + } else if (gsc->release.major == 102) { + if (gsc->release.minor == 0) { + if (gsc->release.patch < 10) { + too_old = true; + } else if (gsc->release.patch == 10) { + if (gsc->release.build < 1878) + too_old = true; + } + } + } + + if (too_old) { + gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least 102.0.10.1878", + gsc->release.major, gsc->release.minor, + gsc->release.patch, gsc->release.build); + return -EINVAL; + } + } + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index d80278eb45d7..ec33ad942115 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -698,12 +698,18 @@ static int check_gsc_manifest(struct intel_gt *gt, const struct firmware *fw, struct intel_uc_fw *uc_fw) { + int ret; + switch (uc_fw->type) { case INTEL_UC_FW_TYPE_HUC: - intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size); + ret = intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size); + if (ret) + return ret; break; case INTEL_UC_FW_TYPE_GSC: - intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size); + ret = intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size); + if (ret) + return ret; break; default: MISSING_CASE(uc_fw->type); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d7723dd11c80..110340e02a02 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -546,6 +546,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_LUNARLAKE(i915) (0 && i915) #define IS_BATTLEMAGE(i915) (0 && i915) +#define IS_ARROWLAKE(i915) \ + IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_ARL) #define IS_DG2_G10(i915) \ IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G10) #define IS_DG2_G11(i915) \ diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index d26de37719a7..eede5417cb3f 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -203,6 +203,10 @@ static const u16 subplatform_g12_ids[] = { INTEL_DG2_G12_IDS(ID), }; +static const u16 subplatform_arl_ids[] = { + INTEL_ARL_IDS(ID), +}; + static bool find_devid(u16 id, const u16 *p, unsigned int num) { for (; num; num--, p++) { @@ -260,6 +264,9 @@ static void intel_device_info_subplatform_init(struct drm_i915_private *i915) } else if (find_devid(devid, subplatform_g12_ids, ARRAY_SIZE(subplatform_g12_ids))) { mask = BIT(INTEL_SUBPLATFORM_G12); + } else if (find_devid(devid, subplatform_arl_ids, + ARRAY_SIZE(subplatform_arl_ids))) { + mask = BIT(INTEL_SUBPLATFORM_ARL); } GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK); diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index d1a2abc7e513..df73ef94615d 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -127,6 +127,9 @@ enum intel_platform { #define INTEL_SUBPLATFORM_N 1 #define INTEL_SUBPLATFORM_RPLU 2 +/* MTL */ +#define INTEL_SUBPLATFORM_ARL 0 + enum intel_ppgtt_type { INTEL_PPGTT_NONE = I915_GEM_PPGTT_NONE, INTEL_PPGTT_ALIASING = I915_GEM_PPGTT_ALIASING, diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h index b21374f76df2..2bf03ebfcf73 100644 --- a/include/drm/intel/i915_pciids.h +++ b/include/drm/intel/i915_pciids.h @@ -772,15 +772,18 @@ INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) /* MTL */ +#define INTEL_ARL_IDS(MACRO__, ...) \ + MACRO__(0x7D41, ## __VA_ARGS__), \ + MACRO__(0x7D51, ## __VA_ARGS__), \ + MACRO__(0x7D67, ## __VA_ARGS__), \ + MACRO__(0x7DD1, ## __VA_ARGS__) + #define INTEL_MTL_IDS(MACRO__, ...) \ + INTEL_ARL_IDS(MACRO__, ## __VA_ARGS__), \ MACRO__(0x7D40, ## __VA_ARGS__), \ - MACRO__(0x7D41, ## __VA_ARGS__), \ MACRO__(0x7D45, ## __VA_ARGS__), \ - MACRO__(0x7D51, ## __VA_ARGS__), \ MACRO__(0x7D55, ## __VA_ARGS__), \ MACRO__(0x7D60, ## __VA_ARGS__), \ - MACRO__(0x7D67, ## __VA_ARGS__), \ - MACRO__(0x7DD1, ## __VA_ARGS__), \ MACRO__(0x7DD5, ## __VA_ARGS__) /* LNL */ -- cgit From 4786fe29f5a0dd74d9ccdce8c734bde1fb88cf37 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 22 Aug 2024 12:25:57 -0700 Subject: ionic: Prevent tx_timeout due to frequent doorbell ringing With recent work to the doorbell workaround code a small hole was introduced that could cause a tx_timeout. This happens if the rx dbell_deadline goes beyond the netdev watchdog timeout set by the driver (i.e. 2 seconds). Fix this by changing the netdev watchdog timeout to 5 seconds and reduce the max rx dbell_deadline to 4 seconds. The test that can reproduce the issue being fixed is a multi-queue send test via pktgen with the "burst" setting to 1. This causes the queue's doorbell to be rung on every packet sent to the driver, which may result in the device missing doorbells due to the high doorbell rate. Cc: stable@vger.kernel.org Fixes: 4ded136c78f8 ("ionic: add work item for missed-doorbell check") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Link: https://patch.msgid.link/20240822192557.9089-1-brett.creeley@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/pensando/ionic/ionic_dev.h | 2 +- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index c647033f3ad2..f2f07bf88545 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -32,7 +32,7 @@ #define IONIC_ADMIN_DOORBELL_DEADLINE (HZ / 2) /* 500ms */ #define IONIC_TX_DOORBELL_DEADLINE (HZ / 100) /* 10ms */ #define IONIC_RX_MIN_DOORBELL_DEADLINE (HZ / 100) /* 10ms */ -#define IONIC_RX_MAX_DOORBELL_DEADLINE (HZ * 5) /* 5s */ +#define IONIC_RX_MAX_DOORBELL_DEADLINE (HZ * 4) /* 4s */ struct ionic_dev_bar { void __iomem *vaddr; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index aa0cc31dfe6e..86774d9922d8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3220,7 +3220,7 @@ int ionic_lif_alloc(struct ionic *ionic) netdev->netdev_ops = &ionic_netdev_ops; ionic_ethtool_set_ops(netdev); - netdev->watchdog_timeo = 2 * HZ; + netdev->watchdog_timeo = 5 * HZ; netif_carrier_off(netdev); lif->identity = lid; -- cgit From 6b35cc8d9239569700cc7cc737c8ed40b8b9cfdb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 22 Aug 2024 17:00:20 -0700 Subject: xfs: use XFS_BUF_DADDR_NULL for daddrs in getfsmap code Use XFS_BUF_DADDR_NULL (instead of a magic sentinel value) to mean "this field is null" like the rest of xfs. Cc: wozizhi@huawei.com Fixes: e89c041338ed6 ("xfs: implement the GETFSMAP ioctl") Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Signed-off-by: Chandan Babu R --- fs/xfs/xfs_fsmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 3a30b36779db..613a0ec20412 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -252,7 +252,7 @@ xfs_getfsmap_rec_before_start( const struct xfs_rmap_irec *rec, xfs_daddr_t rec_daddr) { - if (info->low_daddr != -1ULL) + if (info->low_daddr != XFS_BUF_DADDR_NULL) return rec_daddr < info->low_daddr; if (info->low.rm_blockcount) return xfs_rmap_compare(rec, &info->low) < 0; @@ -983,7 +983,7 @@ xfs_getfsmap( info.dev = handlers[i].dev; info.last = false; info.pag = NULL; - info.low_daddr = -1ULL; + info.low_daddr = XFS_BUF_DADDR_NULL; info.low.rm_blockcount = 0; error = handlers[i].fn(tp, dkeys, &info); if (error) -- cgit From ca6448aed4f10ad88eba79055f181eb9a589a7b3 Mon Sep 17 00:00:00 2001 From: Zizhi Wo Date: Thu, 22 Aug 2024 17:00:35 -0700 Subject: xfs: Fix missing interval for missing_owner in xfs fsmap In the fsmap query of xfs, there is an interval missing problem: [root@fedora ~]# xfs_io -c 'fsmap -vvvv' /mnt EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL 0: 253:16 [0..7]: static fs metadata 0 (0..7) 8 1: 253:16 [8..23]: per-AG metadata 0 (8..23) 16 2: 253:16 [24..39]: inode btree 0 (24..39) 16 3: 253:16 [40..47]: per-AG metadata 0 (40..47) 8 4: 253:16 [48..55]: refcount btree 0 (48..55) 8 5: 253:16 [56..103]: per-AG metadata 0 (56..103) 48 6: 253:16 [104..127]: free space 0 (104..127) 24 ...... BUG: [root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt [root@fedora ~]# Normally, we should be able to get [104, 107), but we got nothing. The problem is caused by shifting. The query for the problem-triggered scenario is for the missing_owner interval (e.g. freespace in rmapbt/ unknown space in bnobt), which is obtained by subtraction (gap). For this scenario, the interval is obtained by info->last. However, rec_daddr is calculated based on the start_block recorded in key[1], which is converted by calling XFS_BB_TO_FSBT. Then if rec_daddr does not exceed info->next_daddr, which means keys[1].fmr_physical >> (mp)->m_blkbb_log <= info->next_daddr, no records will be displayed. In the above example, 104 >> (mp)->m_blkbb_log = 12 and 107 >> (mp)->m_blkbb_log = 12, so the two are reduced to 0 and the gap is ignored: before calculate ----------------> after shifting 104(st) 107(ed) 12(st/ed) |---------| | sector size block size Resolve this issue by introducing the "end_daddr" field in xfs_getfsmap_info. This records |key[1].fmr_physical + key[1].length| at the granularity of sector. If the current query is the last, the rec_daddr is end_daddr to prevent missing interval problems caused by shifting. We only need to focus on the last query, because xfs disks are internally aligned with disk blocksize that are powers of two and minimum 512, so there is no problem with shifting in previous queries. After applying this patch, the above problem have been solved: [root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 104 107' /mnt EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL 0: 253:16 [104..106]: free space 0 (104..106) 3 Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl") Signed-off-by: Zizhi Wo Reviewed-by: Darrick J. Wong [djwong: limit the range of end_addr correctly] Signed-off-by: Darrick J. Wong Signed-off-by: Chandan Babu R --- fs/xfs/xfs_fsmap.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 613a0ec20412..71f32354944e 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -162,6 +162,7 @@ struct xfs_getfsmap_info { xfs_daddr_t next_daddr; /* next daddr we expect */ /* daddr of low fsmap key when we're using the rtbitmap */ xfs_daddr_t low_daddr; + xfs_daddr_t end_daddr; /* daddr of high fsmap key */ u64 missing_owner; /* owner of holes */ u32 dev; /* device id */ /* @@ -182,6 +183,7 @@ struct xfs_getfsmap_dev { int (*fn)(struct xfs_trans *tp, const struct xfs_fsmap *keys, struct xfs_getfsmap_info *info); + sector_t nr_sectors; }; /* Compare two getfsmap device handlers. */ @@ -294,6 +296,18 @@ xfs_getfsmap_helper( return 0; } + /* + * For an info->last query, we're looking for a gap between the last + * mapping emitted and the high key specified by userspace. If the + * user's query spans less than 1 fsblock, then info->high and + * info->low will have the same rm_startblock, which causes rec_daddr + * and next_daddr to be the same. Therefore, use the end_daddr that + * we calculated from userspace's high key to synthesize the record. + * Note that if the btree query found a mapping, there won't be a gap. + */ + if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL) + rec_daddr = info->end_daddr; + /* Are we just counting mappings? */ if (info->head->fmh_count == 0) { if (info->head->fmh_entries == UINT_MAX) @@ -904,17 +918,21 @@ xfs_getfsmap( /* Set up our device handlers. */ memset(handlers, 0, sizeof(handlers)); + handlers[0].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev); if (use_rmap) handlers[0].fn = xfs_getfsmap_datadev_rmapbt; else handlers[0].fn = xfs_getfsmap_datadev_bnobt; if (mp->m_logdev_targp != mp->m_ddev_targp) { + handlers[1].nr_sectors = XFS_FSB_TO_BB(mp, + mp->m_sb.sb_logblocks); handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev); handlers[1].fn = xfs_getfsmap_logdev; } #ifdef CONFIG_XFS_RT if (mp->m_rtdev_targp) { + handlers[2].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev); handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap; } @@ -946,6 +964,7 @@ xfs_getfsmap( info.next_daddr = head->fmh_keys[0].fmr_physical + head->fmh_keys[0].fmr_length; + info.end_daddr = XFS_BUF_DADDR_NULL; info.fsmap_recs = fsmap_recs; info.head = head; @@ -966,8 +985,11 @@ xfs_getfsmap( * low key, zero out the low key so that we get * everything from the beginning. */ - if (handlers[i].dev == head->fmh_keys[1].fmr_device) + if (handlers[i].dev == head->fmh_keys[1].fmr_device) { dkeys[1] = head->fmh_keys[1]; + info.end_daddr = min(handlers[i].nr_sectors - 1, + dkeys[1].fmr_physical); + } if (handlers[i].dev > head->fmh_keys[0].fmr_device) memset(&dkeys[0], 0, sizeof(struct xfs_fsmap)); -- cgit From 16e1fbdce9c8d084863fd63cdaff8fb2a54e2f88 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 22 Aug 2024 17:00:51 -0700 Subject: xfs: take m_growlock when running growfsrt Take the grow lock when we're expanding the realtime volume, like we do for the other growfs calls. Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Signed-off-by: Chandan Babu R --- fs/xfs/xfs_rtalloc.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 0c3e96c621a6..776d6c401f62 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -821,34 +821,39 @@ xfs_growfs_rt( /* Needs to have been mounted with an rt device. */ if (!XFS_IS_REALTIME_MOUNT(mp)) return -EINVAL; + + if (!mutex_trylock(&mp->m_growlock)) + return -EWOULDBLOCK; /* * Mount should fail if the rt bitmap/summary files don't load, but * we'll check anyway. */ + error = -EINVAL; if (!mp->m_rbmip || !mp->m_rsumip) - return -EINVAL; + goto out_unlock; /* Shrink not supported. */ if (in->newblocks <= sbp->sb_rblocks) - return -EINVAL; + goto out_unlock; /* Can only change rt extent size when adding rt volume. */ if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize) - return -EINVAL; + goto out_unlock; /* Range check the extent size. */ if (XFS_FSB_TO_B(mp, in->extsize) > XFS_MAX_RTEXTSIZE || XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE) - return -EINVAL; + goto out_unlock; /* Unsupported realtime features. */ + error = -EOPNOTSUPP; if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp)) - return -EOPNOTSUPP; + goto out_unlock; nrblocks = in->newblocks; error = xfs_sb_validate_fsb_count(sbp, nrblocks); if (error) - return error; + goto out_unlock; /* * Read in the last block of the device, make sure it exists. */ @@ -856,7 +861,7 @@ xfs_growfs_rt( XFS_FSB_TO_BB(mp, nrblocks - 1), XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); if (error) - return error; + goto out_unlock; xfs_buf_relse(bp); /* @@ -864,8 +869,10 @@ xfs_growfs_rt( */ nrextents = nrblocks; do_div(nrextents, in->extsize); - if (!xfs_validate_rtextents(nrextents)) - return -EINVAL; + if (!xfs_validate_rtextents(nrextents)) { + error = -EINVAL; + goto out_unlock; + } nrbmblocks = xfs_rtbitmap_blockcount(mp, nrextents); nrextslog = xfs_compute_rextslog(nrextents); nrsumlevels = nrextslog + 1; @@ -876,8 +883,11 @@ xfs_growfs_rt( * the log. This prevents us from getting a log overflow, * since we'll log basically the whole summary file at once. */ - if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1)) - return -EINVAL; + if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1)) { + error = -EINVAL; + goto out_unlock; + } + /* * Get the old block counts for bitmap and summary inodes. * These can't change since other growfs callers are locked out. @@ -889,10 +899,10 @@ xfs_growfs_rt( */ error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip); if (error) - return error; + goto out_unlock; error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip); if (error) - return error; + goto out_unlock; rsum_cache = mp->m_rsum_cache; if (nrbmblocks != sbp->sb_rbmblocks) @@ -1059,6 +1069,8 @@ out_free: } } +out_unlock: + mutex_unlock(&mp->m_growlock); return error; } -- cgit From a24cae8fc1f13f6f6929351309f248fd2e9351ce Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 22 Aug 2024 17:01:07 -0700 Subject: xfs: reset rootdir extent size hint after growfsrt If growfsrt is run on a filesystem that doesn't have a rt volume, it's possible to change the rt extent size. If the root directory was previously set up with an inherited extent size hint and rtinherit, it's possible that the hint is no longer a multiple of the rt extent size. Although the verifiers don't complain about this, xfs_repair will, so if we detect this situation, log the root directory to clean it up. This is still racy, but it's better than nothing. Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Signed-off-by: Chandan Babu R --- fs/xfs/xfs_rtalloc.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 776d6c401f62..ebeab8e4dab1 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -784,6 +784,39 @@ xfs_alloc_rsum_cache( xfs_warn(mp, "could not allocate realtime summary cache"); } +/* + * If we changed the rt extent size (meaning there was no rt volume previously) + * and the root directory had EXTSZINHERIT and RTINHERIT set, it's possible + * that the extent size hint on the root directory is no longer congruent with + * the new rt extent size. Log the rootdir inode to fix this. + */ +static int +xfs_growfs_rt_fixup_extsize( + struct xfs_mount *mp) +{ + struct xfs_inode *ip = mp->m_rootip; + struct xfs_trans *tp; + int error = 0; + + xfs_ilock(ip, XFS_IOLOCK_EXCL); + if (!(ip->i_diflags & XFS_DIFLAG_RTINHERIT) || + !(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)) + goto out_iolock; + + error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_ichange, 0, 0, false, + &tp); + if (error) + goto out_iolock; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + +out_iolock: + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return error; +} + /* * Visible (exported) functions. */ @@ -812,6 +845,7 @@ xfs_growfs_rt( xfs_extlen_t rsumblocks; /* current number of rt summary blks */ xfs_sb_t *sbp; /* old superblock */ uint8_t *rsum_cache; /* old summary cache */ + xfs_agblock_t old_rextsize = mp->m_sb.sb_rextsize; sbp = &mp->m_sb; @@ -1046,6 +1080,12 @@ error_cancel: if (error) goto out_free; + if (old_rextsize != in->extsize) { + error = xfs_growfs_rt_fixup_extsize(mp); + if (error) + goto out_free; + } + /* Update secondary superblocks now the physical grow has completed */ error = xfs_update_secondary_sbs(mp); -- cgit From 1eb52589a299f8b29df0f214206da6616e33a8b6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 09:01:29 -0700 Subject: drm/xe: Invalidate media_gt TLBs Testing on LNL has shown media TLBs need to be invalidated via the GuC, update xe_vm_invalidate_vma appropriately. v2: Fix 2 tile case v3: Include missing local change Fixes: 3330361543fc ("drm/xe/lnl: Add LNL platform definition") Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240820160129.986889-1-matthew.brost@intel.com (cherry picked from commit 77cc3f6c58b1b28cee73904946c46a1415187d04) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c7561a56abaf..50e8fc49ba6c 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3341,9 +3341,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_tile *tile; - struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE]; - u32 tile_needs_invalidate = 0; + struct xe_gt_tlb_invalidation_fence + fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; u8 id; + u32 fence_id = 0; int ret = 0; xe_assert(xe, !xe_vma_is_null(vma)); @@ -3371,27 +3372,37 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) if (xe_pt_zap_ptes(tile, vma)) { xe_device_wmb(xe); xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[id], true); + &fence[fence_id], + true); - /* - * FIXME: We potentially need to invalidate multiple - * GTs within the tile - */ ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, - &fence[id], vma); + &fence[fence_id], vma); if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence[id]); + xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); goto wait; } + ++fence_id; - tile_needs_invalidate |= BIT(id); + if (!tile->media_gt) + continue; + + xe_gt_tlb_invalidation_fence_init(tile->media_gt, + &fence[fence_id], + true); + + ret = xe_gt_tlb_invalidation_vma(tile->media_gt, + &fence[fence_id], vma); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); + goto wait; + } + ++fence_id; } } wait: - for_each_tile(tile, xe, id) - if (tile_needs_invalidate & BIT(id)) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + for (id = 0; id < fence_id; ++id) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); vma->tile_invalidated = vma->tile_mask; -- cgit From ecb54277cb63c273e8d74272e5b9bfd80c2185d9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 27 Aug 2024 11:30:10 +0100 Subject: btrfs: fix uninitialized return value from btrfs_reclaim_sweep() The return variable 'ret' at btrfs_reclaim_sweep() is never assigned if none of the space infos is reclaimable (for example if periodic reclaim is disabled, which is the default), so we return an undefined value. This can be fixed my making btrfs_reclaim_sweep() not return any value as well as do_reclaim_sweep() because: 1) do_reclaim_sweep() always returns 0, so we can make it return void; 2) The only caller of btrfs_reclaim_sweep() (btrfs_reclaim_bgs()) doesn't care about its return value, and in its context there's nothing to do about any errors anyway. Therefore remove the return value from btrfs_reclaim_sweep() and do_reclaim_sweep(). Fixes: e4ca3932ae90 ("btrfs: periodic block_group reclaim") Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/space-info.c | 17 +++++------------ fs/btrfs/space-info.h | 2 +- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 68e14fd48638..c691784b4660 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -1985,8 +1985,8 @@ static bool is_reclaim_urgent(struct btrfs_space_info *space_info) return unalloc < data_chunk_size; } -static int do_reclaim_sweep(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, int raid) +static void do_reclaim_sweep(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, int raid) { struct btrfs_block_group *bg; int thresh_pct; @@ -2031,7 +2031,6 @@ again: } up_read(&space_info->groups_sem); - return 0; } void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes) @@ -2074,21 +2073,15 @@ bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info) return ret; } -int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info) +void btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info) { - int ret; int raid; struct btrfs_space_info *space_info; list_for_each_entry(space_info, &fs_info->space_info, list) { if (!btrfs_should_periodic_reclaim(space_info)) continue; - for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++) { - ret = do_reclaim_sweep(fs_info, space_info, raid); - if (ret) - return ret; - } + for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++) + do_reclaim_sweep(fs_info, space_info, raid); } - - return ret; } diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index 88b44221ce97..5602026c5e14 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -294,6 +294,6 @@ void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s6 void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready); bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info); int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info); -int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info); +void btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info); #endif /* BTRFS_SPACE_INFO_H */ -- cgit From bc21000e99f92a6b5540d7267c6b22806c5c33d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 24 Aug 2024 18:19:01 +0000 Subject: net_sched: sch_fq: fix incorrect behavior for small weights fq_dequeue() has a complex logic to find packets in one of the 3 bands. As Neal found out, it is possible that one band has a deficit smaller than its weight. fq_dequeue() can return NULL while some packets are elligible for immediate transmit. In this case, more than one iteration is needed to refill pband->credit. With default parameters (weights 589824 196608 65536) bug can trigger if large BIG TCP packets are sent to the lowest priority band. Bisected-by: John Sperbeck Diagnosed-by: Neal Cardwell Fixes: 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20240824181901.953776-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_fq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 238974725679..19a49af5a9e5 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -663,7 +663,9 @@ begin: pband = &q->band_flows[q->band_nr]; pband->credit = min(pband->credit + pband->quantum, pband->quantum); - goto begin; + if (pband->credit > 0) + goto begin; + retry = 0; } if (q->time_next_delayed_flow != ~0ULL) qdisc_watchdog_schedule_range_ns(&q->watchdog, -- cgit From 70c261d500951cf3ea0fcf32651aab9a65a91471 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 26 Aug 2024 15:03:23 +0200 Subject: netfilter: nf_tables_ipv6: consider network offset in netdev/egress validation From netdev/egress, skb->len can include the ethernet header, therefore, subtract network offset from skb->len when validating IPv6 packet length. Fixes: 42df6e1d221d ("netfilter: Introduce egress hook") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv6.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 467d59b9e533..a0633eeaec97 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -31,8 +31,8 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) struct ipv6hdr *ip6h, _ip6h; unsigned int thoff = 0; unsigned short frag_off; + u32 pkt_len, skb_len; int protohdr; - u32 pkt_len; ip6h = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb), sizeof(*ip6h), &_ip6h); @@ -43,7 +43,8 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) return -1; pkt_len = ntohs(ip6h->payload_len); - if (pkt_len + sizeof(*ip6h) > pkt->skb->len) + skb_len = pkt->skb->len - skb_network_offset(pkt->skb); + if (pkt_len + sizeof(*ip6h) > skb_len) return -1; protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); -- cgit From 08d08e2e9f0ad1af0044e4747723f66677c35ee9 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Mon, 29 Jul 2024 09:29:34 -0400 Subject: tpm: ibmvtpm: Call tpm2_sessions_init() to initialize session support Commit d2add27cf2b8 ("tpm: Add NULL primary creation") introduced CONFIG_TCG_TPM2_HMAC. When this option is enabled on ppc64 then the following message appears in the kernel log due to a missing call to tpm2_sessions_init(). [ 2.654549] tpm tpm0: auth session is not active Add the missing call to tpm2_session_init() to the ibmvtpm driver to resolve this issue. Cc: stable@vger.kernel.org # v6.10+ Fixes: d2add27cf2b8 ("tpm: Add NULL primary creation") Signed-off-by: Stefan Berger Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm_ibmvtpm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index d3989b257f42..1e5b107d1f3b 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -698,6 +698,10 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, rc = tpm2_get_cc_attrs_tbl(chip); if (rc) goto init_irq_cleanup; + + rc = tpm2_sessions_init(chip); + if (rc) + goto init_irq_cleanup; } return tpm_chip_register(chip); -- cgit From e8497d6951ee8541d73784f9aac9942a7f239980 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 23 Aug 2024 18:25:37 +0200 Subject: selftests: forwarding: no_forwarding: Down ports on cleanup This test neglects to put ports down on cleanup. Fix it. Fixes: 476a4f05d9b8 ("selftests: forwarding: add a no_forwarding.sh test") Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://patch.msgid.link/0baf91dc24b95ae0cadfdf5db05b74888e6a228a.1724430120.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/no_forwarding.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh index af3b398d13f0..9e677aa64a06 100755 --- a/tools/testing/selftests/net/forwarding/no_forwarding.sh +++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh @@ -233,6 +233,9 @@ cleanup() { pre_cleanup + ip link set dev $swp2 down + ip link set dev $swp1 down + h2_destroy h1_destroy -- cgit From 65a3cce43d5b4c53cf16b0be1a03991f665a0806 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 26 Aug 2024 19:15:11 +0200 Subject: selftests: forwarding: local_termination: Down ports on cleanup This test neglects to put ports down on cleanup. Fix it. Fixes: 90b9566aa5cd ("selftests: forwarding: add a test for local_termination.sh") Signed-off-by: Petr Machata Link: https://patch.msgid.link/bf9b79f45de378f88344d44550f0a5052b386199.1724692132.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/local_termination.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 648868f74604..c35548767756 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -571,6 +571,10 @@ vlan_over_vlan_aware_bridge() cleanup() { pre_cleanup + + ip link set $h2 down + ip link set $h1 down + vrf_cleanup } -- cgit From ec13009472f4a756288eb4e18e20a7845da98d10 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Fri, 23 Aug 2024 06:10:54 +0300 Subject: bonding: implement xdo_dev_state_free and call it after deletion Add this implementation for bonding, so hardware resources can be freed from the active slave after xfrm state is deleted. The netdev used to invoke xdo_dev_state_free callback, is saved in the xfrm state (xs->xso.real_dev), which is also the bond's active slave. To prevent it from being freed, acquire netdev reference before leaving RCU read-side critical section, and release it after callback is done. And call it when deleting all SAs from old active real interface while switching current active slave. Fixes: 9a5605505d9c ("bonding: Add struct bond_ipesc to manage SA") Signed-off-by: Jianbo Liu Signed-off-by: Tariq Toukan Reviewed-by: Hangbin Liu Acked-by: Jay Vosburgh Link: https://patch.msgid.link/20240823031056.110999-2-jianbol@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f74bacf071fc..2b4b7ad9cd2d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -581,12 +581,47 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) __func__); } else { slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); + if (slave->dev->xfrmdev_ops->xdo_dev_state_free) + slave->dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs); } } spin_unlock_bh(&bond->ipsec_lock); rcu_read_unlock(); } +static void bond_ipsec_free_sa(struct xfrm_state *xs) +{ + struct net_device *bond_dev = xs->xso.dev; + struct net_device *real_dev; + netdevice_tracker tracker; + struct bonding *bond; + struct slave *slave; + + if (!bond_dev) + return; + + rcu_read_lock(); + bond = netdev_priv(bond_dev); + slave = rcu_dereference(bond->curr_active_slave); + real_dev = slave ? slave->dev : NULL; + netdev_hold(real_dev, &tracker, GFP_ATOMIC); + rcu_read_unlock(); + + if (!slave) + goto out; + + if (!xs->xso.real_dev) + goto out; + + WARN_ON(xs->xso.real_dev != real_dev); + + if (real_dev && real_dev->xfrmdev_ops && + real_dev->xfrmdev_ops->xdo_dev_state_free) + real_dev->xfrmdev_ops->xdo_dev_state_free(xs); +out: + netdev_put(real_dev, &tracker); +} + /** * bond_ipsec_offload_ok - can this packet use the xfrm hw offload * @skb: current data packet @@ -627,6 +662,7 @@ out: static const struct xfrmdev_ops bond_xfrmdev_ops = { .xdo_dev_state_add = bond_ipsec_add_sa, .xdo_dev_state_delete = bond_ipsec_del_sa, + .xdo_dev_state_free = bond_ipsec_free_sa, .xdo_dev_offload_ok = bond_ipsec_offload_ok, }; #endif /* CONFIG_XFRM_OFFLOAD */ -- cgit From 907ed83a7583e8ffede88c5ac088392701a7d458 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Fri, 23 Aug 2024 06:10:55 +0300 Subject: bonding: extract the use of real_device into local variable Add a local variable for slave->dev, to prepare for the lock change in the next patch. There is no functionality change. Fixes: 9a5605505d9c ("bonding: Add struct bond_ipesc to manage SA") Signed-off-by: Jianbo Liu Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Reviewed-by: Hangbin Liu Acked-by: Jay Vosburgh Link: https://patch.msgid.link/20240823031056.110999-3-jianbol@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 58 +++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2b4b7ad9cd2d..f98491748420 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -427,6 +427,7 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs, struct netlink_ext_ack *extack) { struct net_device *bond_dev = xs->xso.dev; + struct net_device *real_dev; struct bond_ipsec *ipsec; struct bonding *bond; struct slave *slave; @@ -443,9 +444,10 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs, return -ENODEV; } - if (!slave->dev->xfrmdev_ops || - !slave->dev->xfrmdev_ops->xdo_dev_state_add || - netif_is_bond_master(slave->dev)) { + real_dev = slave->dev; + if (!real_dev->xfrmdev_ops || + !real_dev->xfrmdev_ops->xdo_dev_state_add || + netif_is_bond_master(real_dev)) { NL_SET_ERR_MSG_MOD(extack, "Slave does not support ipsec offload"); rcu_read_unlock(); return -EINVAL; @@ -456,9 +458,9 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs, rcu_read_unlock(); return -ENOMEM; } - xs->xso.real_dev = slave->dev; - err = slave->dev->xfrmdev_ops->xdo_dev_state_add(xs, extack); + xs->xso.real_dev = real_dev; + err = real_dev->xfrmdev_ops->xdo_dev_state_add(xs, extack); if (!err) { ipsec->xs = xs; INIT_LIST_HEAD(&ipsec->list); @@ -475,6 +477,7 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs, static void bond_ipsec_add_sa_all(struct bonding *bond) { struct net_device *bond_dev = bond->dev; + struct net_device *real_dev; struct bond_ipsec *ipsec; struct slave *slave; @@ -483,12 +486,13 @@ static void bond_ipsec_add_sa_all(struct bonding *bond) if (!slave) goto out; - if (!slave->dev->xfrmdev_ops || - !slave->dev->xfrmdev_ops->xdo_dev_state_add || - netif_is_bond_master(slave->dev)) { + real_dev = slave->dev; + if (!real_dev->xfrmdev_ops || + !real_dev->xfrmdev_ops->xdo_dev_state_add || + netif_is_bond_master(real_dev)) { spin_lock_bh(&bond->ipsec_lock); if (!list_empty(&bond->ipsec_list)) - slave_warn(bond_dev, slave->dev, + slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_add\n", __func__); spin_unlock_bh(&bond->ipsec_lock); @@ -497,9 +501,9 @@ static void bond_ipsec_add_sa_all(struct bonding *bond) spin_lock_bh(&bond->ipsec_lock); list_for_each_entry(ipsec, &bond->ipsec_list, list) { - ipsec->xs->xso.real_dev = slave->dev; - if (slave->dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) { - slave_warn(bond_dev, slave->dev, "%s: failed to add SA\n", __func__); + ipsec->xs->xso.real_dev = real_dev; + if (real_dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) { + slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__); ipsec->xs->xso.real_dev = NULL; } } @@ -515,6 +519,7 @@ out: static void bond_ipsec_del_sa(struct xfrm_state *xs) { struct net_device *bond_dev = xs->xso.dev; + struct net_device *real_dev; struct bond_ipsec *ipsec; struct bonding *bond; struct slave *slave; @@ -532,16 +537,17 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) if (!xs->xso.real_dev) goto out; - WARN_ON(xs->xso.real_dev != slave->dev); + real_dev = slave->dev; + WARN_ON(xs->xso.real_dev != real_dev); - if (!slave->dev->xfrmdev_ops || - !slave->dev->xfrmdev_ops->xdo_dev_state_delete || - netif_is_bond_master(slave->dev)) { - slave_warn(bond_dev, slave->dev, "%s: no slave xdo_dev_state_delete\n", __func__); + if (!real_dev->xfrmdev_ops || + !real_dev->xfrmdev_ops->xdo_dev_state_delete || + netif_is_bond_master(real_dev)) { + slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_delete\n", __func__); goto out; } - slave->dev->xfrmdev_ops->xdo_dev_state_delete(xs); + real_dev->xfrmdev_ops->xdo_dev_state_delete(xs); out: spin_lock_bh(&bond->ipsec_lock); list_for_each_entry(ipsec, &bond->ipsec_list, list) { @@ -558,6 +564,7 @@ out: static void bond_ipsec_del_sa_all(struct bonding *bond) { struct net_device *bond_dev = bond->dev; + struct net_device *real_dev; struct bond_ipsec *ipsec; struct slave *slave; @@ -568,21 +575,22 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) return; } + real_dev = slave->dev; spin_lock_bh(&bond->ipsec_lock); list_for_each_entry(ipsec, &bond->ipsec_list, list) { if (!ipsec->xs->xso.real_dev) continue; - if (!slave->dev->xfrmdev_ops || - !slave->dev->xfrmdev_ops->xdo_dev_state_delete || - netif_is_bond_master(slave->dev)) { - slave_warn(bond_dev, slave->dev, + if (!real_dev->xfrmdev_ops || + !real_dev->xfrmdev_ops->xdo_dev_state_delete || + netif_is_bond_master(real_dev)) { + slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_delete\n", __func__); } else { - slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); - if (slave->dev->xfrmdev_ops->xdo_dev_state_free) - slave->dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs); + real_dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); + if (real_dev->xfrmdev_ops->xdo_dev_state_free) + real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs); } } spin_unlock_bh(&bond->ipsec_lock); -- cgit From 2aeeef906d5a526dc60cf4af92eda69836c39b1f Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Fri, 23 Aug 2024 06:10:56 +0300 Subject: bonding: change ipsec_lock from spin lock to mutex In the cited commit, bond->ipsec_lock is added to protect ipsec_list, hence xdo_dev_state_add and xdo_dev_state_delete are called inside this lock. As ipsec_lock is a spin lock and such xfrmdev ops may sleep, "scheduling while atomic" will be triggered when changing bond's active slave. [ 101.055189] BUG: scheduling while atomic: bash/902/0x00000200 [ 101.055726] Modules linked in: [ 101.058211] CPU: 3 PID: 902 Comm: bash Not tainted 6.9.0-rc4+ #1 [ 101.058760] Hardware name: [ 101.059434] Call Trace: [ 101.059436] [ 101.060873] dump_stack_lvl+0x51/0x60 [ 101.061275] __schedule_bug+0x4e/0x60 [ 101.061682] __schedule+0x612/0x7c0 [ 101.062078] ? __mod_timer+0x25c/0x370 [ 101.062486] schedule+0x25/0xd0 [ 101.062845] schedule_timeout+0x77/0xf0 [ 101.063265] ? asm_common_interrupt+0x22/0x40 [ 101.063724] ? __bpf_trace_itimer_state+0x10/0x10 [ 101.064215] __wait_for_common+0x87/0x190 [ 101.064648] ? usleep_range_state+0x90/0x90 [ 101.065091] cmd_exec+0x437/0xb20 [mlx5_core] [ 101.065569] mlx5_cmd_do+0x1e/0x40 [mlx5_core] [ 101.066051] mlx5_cmd_exec+0x18/0x30 [mlx5_core] [ 101.066552] mlx5_crypto_create_dek_key+0xea/0x120 [mlx5_core] [ 101.067163] ? bonding_sysfs_store_option+0x4d/0x80 [bonding] [ 101.067738] ? kmalloc_trace+0x4d/0x350 [ 101.068156] mlx5_ipsec_create_sa_ctx+0x33/0x100 [mlx5_core] [ 101.068747] mlx5e_xfrm_add_state+0x47b/0xaa0 [mlx5_core] [ 101.069312] bond_change_active_slave+0x392/0x900 [bonding] [ 101.069868] bond_option_active_slave_set+0x1c2/0x240 [bonding] [ 101.070454] __bond_opt_set+0xa6/0x430 [bonding] [ 101.070935] __bond_opt_set_notify+0x2f/0x90 [bonding] [ 101.071453] bond_opt_tryset_rtnl+0x72/0xb0 [bonding] [ 101.071965] bonding_sysfs_store_option+0x4d/0x80 [bonding] [ 101.072567] kernfs_fop_write_iter+0x10c/0x1a0 [ 101.073033] vfs_write+0x2d8/0x400 [ 101.073416] ? alloc_fd+0x48/0x180 [ 101.073798] ksys_write+0x5f/0xe0 [ 101.074175] do_syscall_64+0x52/0x110 [ 101.074576] entry_SYSCALL_64_after_hwframe+0x4b/0x53 As bond_ipsec_add_sa_all and bond_ipsec_del_sa_all are only called from bond_change_active_slave, which requires holding the RTNL lock. And bond_ipsec_add_sa and bond_ipsec_del_sa are xfrm state xdo_dev_state_add and xdo_dev_state_delete APIs, which are in user context. So ipsec_lock doesn't have to be spin lock, change it to mutex, and thus the above issue can be resolved. Fixes: 9a5605505d9c ("bonding: Add struct bond_ipesc to manage SA") Signed-off-by: Jianbo Liu Signed-off-by: Tariq Toukan Reviewed-by: Hangbin Liu Acked-by: Jay Vosburgh Link: https://patch.msgid.link/20240823031056.110999-4-jianbol@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 79 ++++++++++++++++++++++------------------- include/net/bonding.h | 2 +- 2 files changed, 44 insertions(+), 37 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f98491748420..bb9c3d6ef435 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -428,6 +428,7 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs, { struct net_device *bond_dev = xs->xso.dev; struct net_device *real_dev; + netdevice_tracker tracker; struct bond_ipsec *ipsec; struct bonding *bond; struct slave *slave; @@ -439,24 +440,26 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs, rcu_read_lock(); bond = netdev_priv(bond_dev); slave = rcu_dereference(bond->curr_active_slave); - if (!slave) { - rcu_read_unlock(); - return -ENODEV; + real_dev = slave ? slave->dev : NULL; + netdev_hold(real_dev, &tracker, GFP_ATOMIC); + rcu_read_unlock(); + if (!real_dev) { + err = -ENODEV; + goto out; } - real_dev = slave->dev; if (!real_dev->xfrmdev_ops || !real_dev->xfrmdev_ops->xdo_dev_state_add || netif_is_bond_master(real_dev)) { NL_SET_ERR_MSG_MOD(extack, "Slave does not support ipsec offload"); - rcu_read_unlock(); - return -EINVAL; + err = -EINVAL; + goto out; } - ipsec = kmalloc(sizeof(*ipsec), GFP_ATOMIC); + ipsec = kmalloc(sizeof(*ipsec), GFP_KERNEL); if (!ipsec) { - rcu_read_unlock(); - return -ENOMEM; + err = -ENOMEM; + goto out; } xs->xso.real_dev = real_dev; @@ -464,13 +467,14 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs, if (!err) { ipsec->xs = xs; INIT_LIST_HEAD(&ipsec->list); - spin_lock_bh(&bond->ipsec_lock); + mutex_lock(&bond->ipsec_lock); list_add(&ipsec->list, &bond->ipsec_list); - spin_unlock_bh(&bond->ipsec_lock); + mutex_unlock(&bond->ipsec_lock); } else { kfree(ipsec); } - rcu_read_unlock(); +out: + netdev_put(real_dev, &tracker); return err; } @@ -481,35 +485,35 @@ static void bond_ipsec_add_sa_all(struct bonding *bond) struct bond_ipsec *ipsec; struct slave *slave; - rcu_read_lock(); - slave = rcu_dereference(bond->curr_active_slave); - if (!slave) - goto out; + slave = rtnl_dereference(bond->curr_active_slave); + real_dev = slave ? slave->dev : NULL; + if (!real_dev) + return; - real_dev = slave->dev; + mutex_lock(&bond->ipsec_lock); if (!real_dev->xfrmdev_ops || !real_dev->xfrmdev_ops->xdo_dev_state_add || netif_is_bond_master(real_dev)) { - spin_lock_bh(&bond->ipsec_lock); if (!list_empty(&bond->ipsec_list)) slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_add\n", __func__); - spin_unlock_bh(&bond->ipsec_lock); goto out; } - spin_lock_bh(&bond->ipsec_lock); list_for_each_entry(ipsec, &bond->ipsec_list, list) { + /* If new state is added before ipsec_lock acquired */ + if (ipsec->xs->xso.real_dev == real_dev) + continue; + ipsec->xs->xso.real_dev = real_dev; if (real_dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) { slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__); ipsec->xs->xso.real_dev = NULL; } } - spin_unlock_bh(&bond->ipsec_lock); out: - rcu_read_unlock(); + mutex_unlock(&bond->ipsec_lock); } /** @@ -520,6 +524,7 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) { struct net_device *bond_dev = xs->xso.dev; struct net_device *real_dev; + netdevice_tracker tracker; struct bond_ipsec *ipsec; struct bonding *bond; struct slave *slave; @@ -530,6 +535,9 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) rcu_read_lock(); bond = netdev_priv(bond_dev); slave = rcu_dereference(bond->curr_active_slave); + real_dev = slave ? slave->dev : NULL; + netdev_hold(real_dev, &tracker, GFP_ATOMIC); + rcu_read_unlock(); if (!slave) goto out; @@ -537,7 +545,6 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) if (!xs->xso.real_dev) goto out; - real_dev = slave->dev; WARN_ON(xs->xso.real_dev != real_dev); if (!real_dev->xfrmdev_ops || @@ -549,7 +556,8 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) real_dev->xfrmdev_ops->xdo_dev_state_delete(xs); out: - spin_lock_bh(&bond->ipsec_lock); + netdev_put(real_dev, &tracker); + mutex_lock(&bond->ipsec_lock); list_for_each_entry(ipsec, &bond->ipsec_list, list) { if (ipsec->xs == xs) { list_del(&ipsec->list); @@ -557,8 +565,7 @@ out: break; } } - spin_unlock_bh(&bond->ipsec_lock); - rcu_read_unlock(); + mutex_unlock(&bond->ipsec_lock); } static void bond_ipsec_del_sa_all(struct bonding *bond) @@ -568,15 +575,12 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) struct bond_ipsec *ipsec; struct slave *slave; - rcu_read_lock(); - slave = rcu_dereference(bond->curr_active_slave); - if (!slave) { - rcu_read_unlock(); + slave = rtnl_dereference(bond->curr_active_slave); + real_dev = slave ? slave->dev : NULL; + if (!real_dev) return; - } - real_dev = slave->dev; - spin_lock_bh(&bond->ipsec_lock); + mutex_lock(&bond->ipsec_lock); list_for_each_entry(ipsec, &bond->ipsec_list, list) { if (!ipsec->xs->xso.real_dev) continue; @@ -593,8 +597,7 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs); } } - spin_unlock_bh(&bond->ipsec_lock); - rcu_read_unlock(); + mutex_unlock(&bond->ipsec_lock); } static void bond_ipsec_free_sa(struct xfrm_state *xs) @@ -5921,7 +5924,7 @@ void bond_setup(struct net_device *bond_dev) /* set up xfrm device ops (only supported in active-backup right now) */ bond_dev->xfrmdev_ops = &bond_xfrmdev_ops; INIT_LIST_HEAD(&bond->ipsec_list); - spin_lock_init(&bond->ipsec_lock); + mutex_init(&bond->ipsec_lock); #endif /* CONFIG_XFRM_OFFLOAD */ /* don't acquire bond device's netif_tx_lock when transmitting */ @@ -5970,6 +5973,10 @@ static void bond_uninit(struct net_device *bond_dev) __bond_release_one(bond_dev, slave->dev, true, true); netdev_info(bond_dev, "Released all slaves\n"); +#ifdef CONFIG_XFRM_OFFLOAD + mutex_destroy(&bond->ipsec_lock); +#endif /* CONFIG_XFRM_OFFLOAD */ + bond_set_slave_arr(bond, NULL, NULL); list_del_rcu(&bond->bond_list); diff --git a/include/net/bonding.h b/include/net/bonding.h index b61fb1aa3a56..8bb5f016969f 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -260,7 +260,7 @@ struct bonding { #ifdef CONFIG_XFRM_OFFLOAD struct list_head ipsec_list; /* protecting ipsec_list */ - spinlock_t ipsec_lock; + struct mutex ipsec_lock; #endif /* CONFIG_XFRM_OFFLOAD */ struct bpf_prog *xdp_prog; }; -- cgit From 7d2fc5a4038df307393769e198a8b1bf189fd6e5 Mon Sep 17 00:00:00 2001 From: Jon Mulder Date: Mon, 26 Aug 2024 17:10:32 -0400 Subject: docs: rust: remove unintended blockquote in Quick Start Remove indentation within the "Hacking" section of the Rust Quick Start guide, i.e. remove a `
` HTML element from the rendered documentation. Reported-by: Miguel Ojeda Closes: https://github.com/Rust-for-Linux/linux/issues/1103 Fixes: d07479b211b7 ("docs: add Rust documentation") Signed-off-by: Jon Mulder Link: https://lore.kernel.org/r/20240826-pr-docs-rust-remove-quickstart-blockquote-v1-1-c51317d8d71a@gmail.com [ Added Fixes tag, reworded slightly and matched title to a previous, similar commit. - Miguel ] Signed-off-by: Miguel Ojeda --- Documentation/rust/quick-start.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst index d06a36106cd4..8e3ad9678719 100644 --- a/Documentation/rust/quick-start.rst +++ b/Documentation/rust/quick-start.rst @@ -305,7 +305,7 @@ If GDB/Binutils is used and Rust symbols are not getting demangled, the reason is the toolchain does not support Rust's new v0 mangling scheme yet. There are a few ways out: - - Install a newer release (GDB >= 10.2, Binutils >= 2.36). +- Install a newer release (GDB >= 10.2, Binutils >= 2.36). - - Some versions of GDB (e.g. vanilla GDB 10.1) are able to use - the pre-demangled names embedded in the debug info (``CONFIG_DEBUG_INFO``). +- Some versions of GDB (e.g. vanilla GDB 10.1) are able to use + the pre-demangled names embedded in the debug info (``CONFIG_DEBUG_INFO``). -- cgit From 8e95e53ca379a03d7f5bfc567a610baa85e15424 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Tue, 27 Aug 2024 12:04:03 +0200 Subject: rust: allow `stable_features` lint Support for several Rust compiler versions started in commit 63b27f4a0074 ("rust: start supporting several compiler versions"). Since we currently need to use a number of unstable features in the kernel, it is a matter of time until one gets stabilized and the `stable_features` lint warns. For instance, the `new_uninit` feature may become stable soon, which would give us multiple warnings like the following: warning: the feature `new_uninit` has been stable since 1.82.0-dev and no longer requires an attribute to enable --> rust/kernel/lib.rs:17:12 | 17 | #![feature(new_uninit)] | ^^^^^^^^^^ | = note: `#[warn(stable_features)]` on by default Thus allow the `stable_features` lint to avoid such warnings. This is the simplest approach -- we do not have that many cases (and the goal is to stop using unstable features anyway) and cleanups can be easily done when we decide to update the minimum version. An alternative would be to conditionally enable them based on the compiler version (with the upcoming `RUSTC_VERSION` or maybe with the unstable `cfg(version(...))`, but that one apparently will not work for the nightly case). However, doing so is more complex and may not work well for different nightlies of the same version, unless we do not care about older nightlies. Another alternative is using explicit tests of the feature calling `rustc`, but that is also more complex and slower. Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20240827100403.376389-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 68ebd6d6b444..cf1111476f46 100644 --- a/Makefile +++ b/Makefile @@ -445,6 +445,7 @@ KBUILD_USERLDFLAGS := $(USERLDFLAGS) # host programs. export rust_common_flags := --edition=2021 \ -Zbinary_dep_depinfo=y \ + -Astable_features \ -Dunsafe_op_in_unsafe_fn \ -Dnon_ascii_idents \ -Wrust_2018_idioms \ -- cgit From defd8b3c37b0f9cb3e0f60f47d3d78d459d57fda Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 25 Aug 2024 12:16:38 -0700 Subject: gtp: fix a potential NULL pointer dereference When sockfd_lookup() fails, gtp_encap_enable_socket() returns a NULL pointer, but its callers only check for error pointers thus miss the NULL pointer case. Fix it by returning an error pointer with the error code carried from sockfd_lookup(). (I found this bug during code inspection.) Fixes: 1e3a3abd8b28 ("gtp: make GTP sockets in gtp_newlink optional") Cc: Andreas Schultz Cc: Harald Welte Signed-off-by: Cong Wang Reviewed-by: Simon Horman Reviewed-by: Pablo Neira Ayuso Link: https://patch.msgid.link/20240825191638.146748-1-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/gtp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 0696faf60013..2e94d10348cc 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1653,7 +1653,7 @@ static struct sock *gtp_encap_enable_socket(int fd, int type, sock = sockfd_lookup(fd, &err); if (!sock) { pr_debug("gtp socket fd=%d not found\n", fd); - return NULL; + return ERR_PTR(err); } sk = sock->sk; -- cgit From bac76cf89816bff06c4ec2f3df97dc34e150a1c4 Mon Sep 17 00:00:00 2001 From: Xueming Feng Date: Mon, 26 Aug 2024 18:23:27 +0800 Subject: tcp: fix forever orphan socket caused by tcp_abort We have some problem closing zero-window fin-wait-1 tcp sockets in our environment. This patch come from the investigation. Previously tcp_abort only sends out reset and calls tcp_done when the socket is not SOCK_DEAD, aka orphan. For orphan socket, it will only purging the write queue, but not close the socket and left it to the timer. While purging the write queue, tp->packets_out and sk->sk_write_queue is cleared along the way. However tcp_retransmit_timer have early return based on !tp->packets_out and tcp_probe_timer have early return based on !sk->sk_write_queue. This caused ICSK_TIME_RETRANS and ICSK_TIME_PROBE0 not being resched and socket not being killed by the timers, converting a zero-windowed orphan into a forever orphan. This patch removes the SOCK_DEAD check in tcp_abort, making it send reset to peer and close the socket accordingly. Preventing the timer-less orphan from happening. According to Lorenzo's email in the v1 thread, the check was there to prevent force-closing the same socket twice. That situation is handled by testing for TCP_CLOSE inside lock, and returning -ENOENT if it is already closed. The -ENOENT code comes from the associate patch Lorenzo made for iproute2-ss; link attached below, which also conform to RFC 9293. At the end of the patch, tcp_write_queue_purge(sk) is removed because it was already called in tcp_done_with_error(). p.s. This is the same patch with v2. Resent due to mis-labeled "changes requested" on patchwork.kernel.org. Link: https://patchwork.ozlabs.org/project/netdev/patch/1450773094-7978-3-git-send-email-lorenzo@google.com/ Fixes: c1e64e298b8c ("net: diag: Support destroying TCP sockets.") Signed-off-by: Xueming Feng Tested-by: Lorenzo Colitti Reviewed-by: Jason Xing Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240826102327.1461482-1-kuro@kuroa.me Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e03a342c9162..831a18dc7aa6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4637,6 +4637,13 @@ int tcp_abort(struct sock *sk, int err) /* Don't race with userspace socket closes such as tcp_close. */ lock_sock(sk); + /* Avoid closing the same socket twice. */ + if (sk->sk_state == TCP_CLOSE) { + if (!has_current_bpf_ctx()) + release_sock(sk); + return -ENOENT; + } + if (sk->sk_state == TCP_LISTEN) { tcp_set_state(sk, TCP_CLOSE); inet_csk_listen_stop(sk); @@ -4646,16 +4653,13 @@ int tcp_abort(struct sock *sk, int err) local_bh_disable(); bh_lock_sock(sk); - if (!sock_flag(sk, SOCK_DEAD)) { - if (tcp_need_reset(sk->sk_state)) - tcp_send_active_reset(sk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); - tcp_done_with_error(sk, err); - } + if (tcp_need_reset(sk->sk_state)) + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); + tcp_done_with_error(sk, err); bh_unlock_sock(sk); local_bh_enable(); - tcp_write_queue_purge(sk); if (!has_current_bpf_ctx()) release_sock(sk); return 0; -- cgit From f09b0ad55a1196f5891663f8888463c0541059cb Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 26 Aug 2024 19:11:18 +0200 Subject: mptcp: close subflow when receiving TCP+FIN When a peer decides to close one subflow in the middle of a connection having multiple subflows, the receiver of the first FIN should accept that, and close the subflow on its side as well. If not, the subflow will stay half closed, and would even continue to be used until the end of the MPTCP connection or a reset from the network. The issue has not been seen before, probably because the in-kernel path-manager always sends a RM_ADDR before closing the subflow. Upon the reception of this RM_ADDR, the other peer will initiate the closure on its side as well. On the other hand, if the RM_ADDR is lost, or if the path-manager of the other peer only closes the subflow without sending a RM_ADDR, the subflow would switch to TCP_CLOSE_WAIT, but that's it, leaving the subflow half-closed. So now, when the subflow switches to the TCP_CLOSE_WAIT state, and if the MPTCP connection has not been closed before with a DATA_FIN, the kernel owning the subflow schedules its worker to initiate the closure on its side as well. This issue can be easily reproduced with packetdrill, as visible in [1], by creating an additional subflow, injecting a FIN+ACK before sending the DATA_FIN, and expecting a FIN+ACK in return. Fixes: 40947e13997a ("mptcp: schedule worker when subflow is closed") Cc: stable@vger.kernel.org Link: https://github.com/multipath-tcp/packetdrill/pull/154 [1] Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240826-net-mptcp-close-extra-sf-fin-v1-1-905199fe1172@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 5 ++++- net/mptcp/subflow.c | 8 ++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 0d536b183a6c..151e82e2ff2e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2533,8 +2533,11 @@ static void __mptcp_close_subflow(struct sock *sk) mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int ssk_state = inet_sk_state_load(ssk); - if (inet_sk_state_load(ssk) != TCP_CLOSE) + if (ssk_state != TCP_CLOSE && + (ssk_state != TCP_CLOSE_WAIT || + inet_sk_state_load(sk) != TCP_ESTABLISHED)) continue; /* 'subflow_data_ready' will re-sched once rx queue is empty */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a21c712350c3..4834e7fc2fb6 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1255,12 +1255,16 @@ out: /* sched mptcp worker to remove the subflow if no more data is pending */ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk) { - if (likely(ssk->sk_state != TCP_CLOSE)) + struct sock *sk = (struct sock *)msk; + + if (likely(ssk->sk_state != TCP_CLOSE && + (ssk->sk_state != TCP_CLOSE_WAIT || + inet_sk_state_load(sk) != TCP_ESTABLISHED))) return; if (skb_queue_empty(&ssk->sk_receive_queue) && !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) - mptcp_schedule_work((struct sock *)msk); + mptcp_schedule_work(sk); } static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) -- cgit From e93681afcb96864ec26c3b2ce94008ce93577373 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 26 Aug 2024 19:11:19 +0200 Subject: selftests: mptcp: join: cannot rm sf if closed Thanks to the previous commit, the MPTCP subflows are now closed on both directions even when only the MPTCP path-manager of one peer asks for their closure. In the two tests modified here -- "userspace pm add & remove address" and "userspace pm create destroy subflow" -- one peer is controlled by the userspace PM, and the other one by the in-kernel PM. When the userspace PM sends a RM_ADDR notification, the in-kernel PM will automatically react by closing all subflows using this address. Now, thanks to the previous commit, the subflows are properly closed on both directions, the userspace PM can then no longer closes the same subflows if they are already closed. Before, it was OK to do that, because the subflows were still half-opened, still OK to send a RM_ADDR. In other words, thanks to the previous commit closing the subflows, an error will be returned to the userspace if it tries to close a subflow that has already been closed. So no need to run this command, which mean that the linked counters will then not be incremented. These tests are then no longer sending both a RM_ADDR, then closing the linked subflow just after. The test with the userspace PM on the server side is now removing one subflow linked to one address, then sending a RM_ADDR for another address. The test with the userspace PM on the client side is now only removing the subflow that was previously created. Fixes: 4369c198e599 ("selftests: mptcp: test userspace pm out of transfer") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240826-net-mptcp-close-extra-sf-fin-v1-2-905199fe1172@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 89e553e0e0c2..264040a760c6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3429,14 +3429,12 @@ userspace_tests() "signal" userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1" userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1" - userspace_pm_rm_addr $ns1 10 userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns1}" \ - "id 20 flags signal 10.0.3.1" "after rm_addr 10" + "id 20 flags signal 10.0.3.1" "after rm_sf 10" userspace_pm_rm_addr $ns1 20 - userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20" - chk_rm_nr 2 2 invert + chk_rm_nr 1 1 invert chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids @@ -3460,12 +3458,11 @@ userspace_tests() "id 20 flags subflow 10.0.3.2" \ "subflow" userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2" - userspace_pm_rm_addr $ns2 20 userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns2}" \ "" \ - "after rm_addr 20" - chk_rm_nr 1 1 + "after rm_sf 20" + chk_rm_nr 0 1 chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids -- cgit From 2a1f596ebb23eadc0f9b95a8012e18ef76295fc8 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 26 Aug 2024 19:11:20 +0200 Subject: mptcp: sched: check both backup in retrans The 'mptcp_subflow_context' structure has two items related to the backup flags: - 'backup': the subflow has been marked as backup by the other peer - 'request_bkup': the backup flag has been set by the host Looking only at the 'backup' flag can make sense in some cases, but it is not the behaviour of the default packet scheduler when selecting paths. As explained in the commit b6a66e521a20 ("mptcp: sched: check both directions for backup"), the packet scheduler should look at both flags, because that was the behaviour from the beginning: the 'backup' flag was set by accident instead of the 'request_bkup' one. Now that the latter has been fixed, get_retrans() needs to be adapted as well. Fixes: b6a66e521a20 ("mptcp: sched: check both directions for backup") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240826-net-mptcp-close-extra-sf-fin-v1-3-905199fe1172@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 151e82e2ff2e..34fec753b9c1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2326,7 +2326,7 @@ struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk) continue; } - if (subflow->backup) { + if (subflow->backup || subflow->request_bkup) { if (!backup) backup = ssk; continue; -- cgit From cb41b195e634d3f1ecfcd845314e64fd4bb3c7aa Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 26 Aug 2024 19:11:21 +0200 Subject: mptcp: pr_debug: add missing \n at the end pr_debug() have been added in various places in MPTCP code to help developers to debug some situations. With the dynamic debug feature, it is easy to enable all or some of them, and asks users to reproduce issues with extra debug. Many of these pr_debug() don't end with a new line, while no 'pr_cont()' are used in MPTCP code. So the goal was not to display multiple debug messages on one line: they were then not missing the '\n' on purpose. Not having the new line at the end causes these messages to be printed with a delay, when something else needs to be printed. This issue is not visible when many messages need to be printed, but it is annoying and confusing when only specific messages are expected, e.g. # echo "func mptcp_pm_add_addr_echoed +fmp" \ > /sys/kernel/debug/dynamic_debug/control # ./mptcp_join.sh "signal address"; \ echo "$(awk '{print $1}' /proc/uptime) - end"; \ sleep 5s; \ echo "$(awk '{print $1}' /proc/uptime) - restart"; \ ./mptcp_join.sh "signal address" 013 signal address (...) 10.75 - end 15.76 - restart 013 signal address [ 10.367935] mptcp:mptcp_pm_add_addr_echoed: MPTCP: msk=(...) (...) => a delay of 5 seconds: printed with a 10.36 ts, but after 'restart' which was printed at the 15.76 ts. The 'Fixes' tag here below points to the first pr_debug() used without '\n' in net/mptcp. This patch could be split in many small ones, with different Fixes tag, but it doesn't seem worth it, because it is easy to re-generate this patch with this simple 'sed' command: git grep -l pr_debug -- net/mptcp | xargs sed -i "s/\(pr_debug(\".*[^n]\)\(\"[,)]\)/\1\\\n\2/g" So in case of conflicts, simply drop the modifications, and launch this command. Fixes: f870fa0b5768 ("mptcp: Add MPTCP socket stubs") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20240826-net-mptcp-close-extra-sf-fin-v1-4-905199fe1172@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/fastopen.c | 4 ++-- net/mptcp/options.c | 50 ++++++++++++++++++++++++------------------------ net/mptcp/pm.c | 28 +++++++++++++-------------- net/mptcp/pm_netlink.c | 20 +++++++++---------- net/mptcp/protocol.c | 52 +++++++++++++++++++++++++------------------------- net/mptcp/protocol.h | 4 ++-- net/mptcp/sched.c | 4 ++-- net/mptcp/sockopt.c | 4 ++-- net/mptcp/subflow.c | 48 +++++++++++++++++++++++----------------------- 9 files changed, 107 insertions(+), 107 deletions(-) diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index ad28da655f8b..a29ff901df75 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -68,12 +68,12 @@ void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflo skb = skb_peek_tail(&sk->sk_receive_queue); if (skb) { WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq); - pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx", sk, + pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx\n", sk, MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq, MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq); MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq; MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq; } - pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq); + pr_debug("msk=%p ack_seq=%llx\n", msk, msk->ack_seq); } diff --git a/net/mptcp/options.c b/net/mptcp/options.c index ac2f1a54cc43..370c3836b771 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -117,7 +117,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; ptr += 2; } - pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u", + pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u\n", version, flags, opsize, mp_opt->sndr_key, mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum); break; @@ -131,7 +131,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 4; mp_opt->nonce = get_unaligned_be32(ptr); ptr += 4; - pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u", + pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u\n", mp_opt->backup, mp_opt->join_id, mp_opt->token, mp_opt->nonce); } else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) { @@ -142,19 +142,19 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 8; mp_opt->nonce = get_unaligned_be32(ptr); ptr += 4; - pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u", + pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u\n", mp_opt->backup, mp_opt->join_id, mp_opt->thmac, mp_opt->nonce); } else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) { mp_opt->suboptions |= OPTION_MPTCP_MPJ_ACK; ptr += 2; memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN); - pr_debug("MP_JOIN hmac"); + pr_debug("MP_JOIN hmac\n"); } break; case MPTCPOPT_DSS: - pr_debug("DSS"); + pr_debug("DSS\n"); ptr++; /* we must clear 'mpc_map' be able to detect MP_CAPABLE @@ -169,7 +169,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0; mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK); - pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d", + pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d\n", mp_opt->data_fin, mp_opt->dsn64, mp_opt->use_map, mp_opt->ack64, mp_opt->use_ack); @@ -207,7 +207,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 4; } - pr_debug("data_ack=%llu", mp_opt->data_ack); + pr_debug("data_ack=%llu\n", mp_opt->data_ack); } if (mp_opt->use_map) { @@ -231,7 +231,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 2; } - pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", + pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u\n", mp_opt->data_seq, mp_opt->subflow_seq, mp_opt->data_len, !!(mp_opt->suboptions & OPTION_MPTCP_CSUMREQD), mp_opt->csum); @@ -293,7 +293,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->ahmac = get_unaligned_be64(ptr); ptr += 8; } - pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d", + pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d\n", (mp_opt->addr.family == AF_INET6) ? "6" : "", mp_opt->addr.id, mp_opt->ahmac, mp_opt->echo, ntohs(mp_opt->addr.port)); break; @@ -309,7 +309,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE; for (i = 0; i < mp_opt->rm_list.nr; i++) mp_opt->rm_list.ids[i] = *ptr++; - pr_debug("RM_ADDR: rm_list_nr=%d", mp_opt->rm_list.nr); + pr_debug("RM_ADDR: rm_list_nr=%d\n", mp_opt->rm_list.nr); break; case MPTCPOPT_MP_PRIO: @@ -318,7 +318,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->suboptions |= OPTION_MPTCP_PRIO; mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP; - pr_debug("MP_PRIO: prio=%d", mp_opt->backup); + pr_debug("MP_PRIO: prio=%d\n", mp_opt->backup); break; case MPTCPOPT_MP_FASTCLOSE: @@ -329,7 +329,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; mp_opt->suboptions |= OPTION_MPTCP_FASTCLOSE; - pr_debug("MP_FASTCLOSE: recv_key=%llu", mp_opt->rcvr_key); + pr_debug("MP_FASTCLOSE: recv_key=%llu\n", mp_opt->rcvr_key); break; case MPTCPOPT_RST: @@ -343,7 +343,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, flags = *ptr++; mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT; mp_opt->reset_reason = *ptr; - pr_debug("MP_RST: transient=%u reason=%u", + pr_debug("MP_RST: transient=%u reason=%u\n", mp_opt->reset_transient, mp_opt->reset_reason); break; @@ -354,7 +354,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 2; mp_opt->suboptions |= OPTION_MPTCP_FAIL; mp_opt->fail_seq = get_unaligned_be64(ptr); - pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq); + pr_debug("MP_FAIL: data_seq=%llu\n", mp_opt->fail_seq); break; default: @@ -417,7 +417,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, *size = TCPOLEN_MPTCP_MPC_SYN; return true; } else if (subflow->request_join) { - pr_debug("remote_token=%u, nonce=%u", subflow->remote_token, + pr_debug("remote_token=%u, nonce=%u\n", subflow->remote_token, subflow->local_nonce); opts->suboptions = OPTION_MPTCP_MPJ_SYN; opts->join_id = subflow->local_id; @@ -500,7 +500,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, *size = TCPOLEN_MPTCP_MPC_ACK; } - pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d", + pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d\n", subflow, subflow->local_key, subflow->remote_key, data_len); @@ -509,7 +509,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, opts->suboptions = OPTION_MPTCP_MPJ_ACK; memcpy(opts->hmac, subflow->hmac, MPTCPOPT_HMAC_LEN); *size = TCPOLEN_MPTCP_MPJ_ACK; - pr_debug("subflow=%p", subflow); + pr_debug("subflow=%p\n", subflow); /* we can use the full delegate action helper only from BH context * If we are in process context - sk is flushing the backlog at @@ -675,7 +675,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * *size = len; if (drop_other_suboptions) { - pr_debug("drop other suboptions"); + pr_debug("drop other suboptions\n"); opts->suboptions = 0; /* note that e.g. DSS could have written into the memory @@ -695,7 +695,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * } else { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX); } - pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d", + pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d\n", opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port)); return true; @@ -726,7 +726,7 @@ static bool mptcp_established_options_rm_addr(struct sock *sk, opts->rm_list = rm_list; for (i = 0; i < opts->rm_list.nr; i++) - pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]); + pr_debug("rm_list_ids[%d]=%d\n", i, opts->rm_list.ids[i]); MPTCP_ADD_STATS(sock_net(sk), MPTCP_MIB_RMADDRTX, opts->rm_list.nr); return true; } @@ -752,7 +752,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk, opts->suboptions |= OPTION_MPTCP_PRIO; opts->backup = subflow->request_bkup; - pr_debug("prio=%d", opts->backup); + pr_debug("prio=%d\n", opts->backup); return true; } @@ -794,7 +794,7 @@ static bool mptcp_established_options_fastclose(struct sock *sk, opts->suboptions |= OPTION_MPTCP_FASTCLOSE; opts->rcvr_key = READ_ONCE(msk->remote_key); - pr_debug("FASTCLOSE key=%llu", opts->rcvr_key); + pr_debug("FASTCLOSE key=%llu\n", opts->rcvr_key); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX); return true; } @@ -816,7 +816,7 @@ static bool mptcp_established_options_mp_fail(struct sock *sk, opts->suboptions |= OPTION_MPTCP_FAIL; opts->fail_seq = subflow->map_seq; - pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq); + pr_debug("MP_FAIL fail_seq=%llu\n", opts->fail_seq); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX); return true; @@ -904,7 +904,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, opts->csum_reqd = subflow_req->csum_reqd; opts->allow_join_id0 = subflow_req->allow_join_id0; *size = TCPOLEN_MPTCP_MPC_SYNACK; - pr_debug("subflow_req=%p, local_key=%llu", + pr_debug("subflow_req=%p, local_key=%llu\n", subflow_req, subflow_req->local_key); return true; } else if (subflow_req->mp_join) { @@ -913,7 +913,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, opts->join_id = subflow_req->local_id; opts->thmac = subflow_req->thmac; opts->nonce = subflow_req->local_nonce; - pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u", + pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u\n", subflow_req, opts->backup, opts->join_id, opts->thmac, opts->nonce); *size = TCPOLEN_MPTCP_MPJ_SYNACK; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 3e6e0f5510bb..3f8dbde243f1 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -19,7 +19,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, { u8 add_addr = READ_ONCE(msk->pm.addr_signal); - pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo); + pr_debug("msk=%p, local_id=%d, echo=%d\n", msk, addr->id, echo); lockdep_assert_held(&msk->pm.lock); @@ -45,7 +45,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ { u8 rm_addr = READ_ONCE(msk->pm.addr_signal); - pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); + pr_debug("msk=%p, rm_list_nr=%d\n", msk, rm_list->nr); if (rm_addr) { MPTCP_ADD_STATS(sock_net((struct sock *)msk), @@ -66,7 +66,7 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int { struct mptcp_pm_data *pm = &msk->pm; - pr_debug("msk=%p, token=%u side=%d", msk, READ_ONCE(msk->token), server_side); + pr_debug("msk=%p, token=%u side=%d\n", msk, READ_ONCE(msk->token), server_side); WRITE_ONCE(pm->server_side, server_side); mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC); @@ -90,7 +90,7 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) subflows_max = mptcp_pm_get_subflows_max(msk); - pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows, + pr_debug("msk=%p subflows=%d max=%d allow=%d\n", msk, pm->subflows, subflows_max, READ_ONCE(pm->accept_subflow)); /* try to avoid acquiring the lock below */ @@ -114,7 +114,7 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, enum mptcp_pm_status new_status) { - pr_debug("msk=%p status=%x new=%lx", msk, msk->pm.status, + pr_debug("msk=%p status=%x new=%lx\n", msk, msk->pm.status, BIT(new_status)); if (msk->pm.status & BIT(new_status)) return false; @@ -129,7 +129,7 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk) struct mptcp_pm_data *pm = &msk->pm; bool announce = false; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); spin_lock_bh(&pm->lock); @@ -153,14 +153,14 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk) void mptcp_pm_connection_closed(struct mptcp_sock *msk) { - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); } void mptcp_pm_subflow_established(struct mptcp_sock *msk) { struct mptcp_pm_data *pm = &msk->pm; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); if (!READ_ONCE(pm->work_pending)) return; @@ -212,7 +212,7 @@ void mptcp_pm_add_addr_received(const struct sock *ssk, struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_pm_data *pm = &msk->pm; - pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, + pr_debug("msk=%p remote_id=%d accept=%d\n", msk, addr->id, READ_ONCE(pm->accept_addr)); mptcp_event_addr_announced(ssk, addr); @@ -243,7 +243,7 @@ void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, { struct mptcp_pm_data *pm = &msk->pm; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); spin_lock_bh(&pm->lock); @@ -267,7 +267,7 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, struct mptcp_pm_data *pm = &msk->pm; u8 i; - pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr); + pr_debug("msk=%p remote_ids_nr=%d\n", msk, rm_list->nr); for (i = 0; i < rm_list->nr; i++) mptcp_event_addr_removed(msk, rm_list->ids[i]); @@ -299,19 +299,19 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); - pr_debug("fail_seq=%llu", fail_seq); + pr_debug("fail_seq=%llu\n", fail_seq); if (!READ_ONCE(msk->allow_infinite_fallback)) return; if (!subflow->fail_tout) { - pr_debug("send MP_FAIL response and infinite map"); + pr_debug("send MP_FAIL response and infinite map\n"); subflow->send_mp_fail = 1; subflow->send_infinite_map = 1; tcp_send_ack(sk); } else { - pr_debug("MP_FAIL response received"); + pr_debug("MP_FAIL response received\n"); WRITE_ONCE(subflow->fail_tout, 0); } } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 3e4ad801786f..8d2f97854c64 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -287,7 +287,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) struct mptcp_sock *msk = entry->sock; struct sock *sk = (struct sock *)msk; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); if (!msk) return; @@ -306,7 +306,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) spin_lock_bh(&msk->pm.lock); if (!mptcp_pm_should_add_signal_addr(msk)) { - pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id); + pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id); mptcp_pm_announce_addr(msk, &entry->addr, false); mptcp_pm_add_addr_send_ack(msk); entry->retrans_times++; @@ -387,7 +387,7 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) struct sock *sk = (struct sock *)msk; LIST_HEAD(free_list); - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); spin_lock_bh(&msk->pm.lock); list_splice_init(&msk->pm.anno_list, &free_list); @@ -473,7 +473,7 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow; - pr_debug("send ack for %s", + pr_debug("send ack for %s\n", prio ? "mp_prio" : (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr")); slow = lock_sock_fast(ssk); @@ -708,7 +708,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); subflows_max = mptcp_pm_get_subflows_max(msk); - pr_debug("accepted %d:%d remote family %d", + pr_debug("accepted %d:%d remote family %d\n", msk->pm.add_addr_accepted, add_addr_accept_max, msk->pm.remote.family); @@ -767,7 +767,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, { struct mptcp_subflow_context *subflow; - pr_debug("bkup=%d", bkup); + pr_debug("bkup=%d\n", bkup); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -803,7 +803,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, struct sock *sk = (struct sock *)msk; u8 i; - pr_debug("%s rm_list_nr %d", + pr_debug("%s rm_list_nr %d\n", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); msk_owned_by_me(msk); @@ -832,7 +832,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) continue; - pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", + pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u\n", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", i, rm_id, id, remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); @@ -889,7 +889,7 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk) spin_lock_bh(&msk->pm.lock); - pr_debug("msk=%p status=%x", msk, pm->status); + pr_debug("msk=%p status=%x\n", msk, pm->status); if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); mptcp_pm_nl_add_addr_received(msk); @@ -1476,7 +1476,7 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, long s_slot = 0, s_num = 0; struct mptcp_sock *msk; - pr_debug("remove_id=%d", addr->id); + pr_debug("remove_id=%d\n", addr->id); list.ids[list.nr++] = addr->id; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 34fec753b9c1..b571fba88a2f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -139,7 +139,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, !skb_try_coalesce(to, from, &fragstolen, &delta)) return false; - pr_debug("colesced seq %llx into %llx new len %d new end seq %llx", + pr_debug("colesced seq %llx into %llx new len %d new end seq %llx\n", MPTCP_SKB_CB(from)->map_seq, MPTCP_SKB_CB(to)->map_seq, to->len, MPTCP_SKB_CB(from)->end_seq); MPTCP_SKB_CB(to)->end_seq = MPTCP_SKB_CB(from)->end_seq; @@ -217,7 +217,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb) end_seq = MPTCP_SKB_CB(skb)->end_seq; max_seq = atomic64_read(&msk->rcv_wnd_sent); - pr_debug("msk=%p seq=%llx limit=%llx empty=%d", msk, seq, max_seq, + pr_debug("msk=%p seq=%llx limit=%llx empty=%d\n", msk, seq, max_seq, RB_EMPTY_ROOT(&msk->out_of_order_queue)); if (after64(end_seq, max_seq)) { /* out of window */ @@ -643,7 +643,7 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, } } - pr_debug("msk=%p ssk=%p", msk, ssk); + pr_debug("msk=%p ssk=%p\n", msk, ssk); tp = tcp_sk(ssk); do { u32 map_remaining, offset; @@ -724,7 +724,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) u64 end_seq; p = rb_first(&msk->out_of_order_queue); - pr_debug("msk=%p empty=%d", msk, RB_EMPTY_ROOT(&msk->out_of_order_queue)); + pr_debug("msk=%p empty=%d\n", msk, RB_EMPTY_ROOT(&msk->out_of_order_queue)); while (p) { skb = rb_to_skb(p); if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) @@ -746,7 +746,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) int delta = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq; /* skip overlapping data, if any */ - pr_debug("uncoalesced seq=%llx ack seq=%llx delta=%d", + pr_debug("uncoalesced seq=%llx ack seq=%llx delta=%d\n", MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq, delta); MPTCP_SKB_CB(skb)->offset += delta; @@ -1240,7 +1240,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, size_t copy; int i; - pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u", + pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u\n", msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent); if (WARN_ON_ONCE(info->sent > info->limit || @@ -1341,7 +1341,7 @@ alloc_skb: mpext->use_map = 1; mpext->dsn64 = 1; - pr_debug("data_seq=%llu subflow_seq=%u data_len=%u dsn64=%d", + pr_debug("data_seq=%llu subflow_seq=%u data_len=%u dsn64=%d\n", mpext->data_seq, mpext->subflow_seq, mpext->data_len, mpext->dsn64); @@ -1892,7 +1892,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!msk->first_pending) WRITE_ONCE(msk->first_pending, dfrag); } - pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d", msk, + pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d\n", msk, dfrag->data_seq, dfrag->data_len, dfrag->already_sent, !dfrag_collapsed); @@ -2248,7 +2248,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } } - pr_debug("block timeout %ld", timeo); + pr_debug("block timeout %ld\n", timeo); sk_wait_data(sk, &timeo, NULL); } @@ -2264,7 +2264,7 @@ out_err: } } - pr_debug("msk=%p rx queue empty=%d:%d copied=%d", + pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n", msk, skb_queue_empty_lockless(&sk->sk_receive_queue), skb_queue_empty(&msk->receive_queue), copied); if (!(flags & MSG_PEEK)) @@ -2717,7 +2717,7 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) if (!ssk) return; - pr_debug("MP_FAIL doesn't respond, reset the subflow"); + pr_debug("MP_FAIL doesn't respond, reset the subflow\n"); slow = lock_sock_fast(ssk); mptcp_subflow_reset(ssk); @@ -2891,7 +2891,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) break; default: if (__mptcp_check_fallback(mptcp_sk(sk))) { - pr_debug("Fallback"); + pr_debug("Fallback\n"); ssk->sk_shutdown |= how; tcp_shutdown(ssk, how); @@ -2901,7 +2901,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt); mptcp_schedule_work(sk); } else { - pr_debug("Sending DATA_FIN on subflow %p", ssk); + pr_debug("Sending DATA_FIN on subflow %p\n", ssk); tcp_send_ack(ssk); if (!mptcp_rtx_timer_pending(sk)) mptcp_reset_rtx_timer(sk); @@ -2967,7 +2967,7 @@ static void mptcp_check_send_data_fin(struct sock *sk) struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); - pr_debug("msk=%p snd_data_fin_enable=%d pending=%d snd_nxt=%llu write_seq=%llu", + pr_debug("msk=%p snd_data_fin_enable=%d pending=%d snd_nxt=%llu write_seq=%llu\n", msk, msk->snd_data_fin_enable, !!mptcp_send_head(sk), msk->snd_nxt, msk->write_seq); @@ -2991,7 +2991,7 @@ static void __mptcp_wr_shutdown(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - pr_debug("msk=%p snd_data_fin_enable=%d shutdown=%x state=%d pending=%d", + pr_debug("msk=%p snd_data_fin_enable=%d shutdown=%x state=%d pending=%d\n", msk, msk->snd_data_fin_enable, sk->sk_shutdown, sk->sk_state, !!mptcp_send_head(sk)); @@ -3006,7 +3006,7 @@ static void __mptcp_destroy_sock(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); might_sleep(); @@ -3114,7 +3114,7 @@ cleanup: mptcp_set_state(sk, TCP_CLOSE); sock_hold(sk); - pr_debug("msk=%p state=%d", sk, sk->sk_state); + pr_debug("msk=%p state=%d\n", sk, sk->sk_state); if (msk->token) mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); @@ -3546,7 +3546,7 @@ static int mptcp_get_port(struct sock *sk, unsigned short snum) { struct mptcp_sock *msk = mptcp_sk(sk); - pr_debug("msk=%p, ssk=%p", msk, msk->first); + pr_debug("msk=%p, ssk=%p\n", msk, msk->first); if (WARN_ON_ONCE(!msk->first)) return -EINVAL; @@ -3563,7 +3563,7 @@ void mptcp_finish_connect(struct sock *ssk) sk = subflow->conn; msk = mptcp_sk(sk); - pr_debug("msk=%p, token=%u", sk, subflow->token); + pr_debug("msk=%p, token=%u\n", sk, subflow->token); subflow->map_seq = subflow->iasn; subflow->map_subflow_seq = 1; @@ -3592,7 +3592,7 @@ bool mptcp_finish_join(struct sock *ssk) struct sock *parent = (void *)msk; bool ret = true; - pr_debug("msk=%p, subflow=%p", msk, subflow); + pr_debug("msk=%p, subflow=%p\n", msk, subflow); /* mptcp socket already closing? */ if (!mptcp_is_fully_established(parent)) { @@ -3638,7 +3638,7 @@ err_prohibited: static void mptcp_shutdown(struct sock *sk, int how) { - pr_debug("sk=%p, how=%d", sk, how); + pr_debug("sk=%p, how=%d\n", sk, how); if ((how & SEND_SHUTDOWN) && mptcp_close_state(sk)) __mptcp_wr_shutdown(sk); @@ -3859,7 +3859,7 @@ static int mptcp_listen(struct socket *sock, int backlog) struct sock *ssk; int err; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); lock_sock(sk); @@ -3898,7 +3898,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, struct mptcp_sock *msk = mptcp_sk(sock->sk); struct sock *ssk, *newsk; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); /* Buggy applications can call accept on socket states other then LISTEN * but no need to allocate the first subflow just to error out. @@ -3907,12 +3907,12 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssk) return -EINVAL; - pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); + pr_debug("ssk=%p, listener=%p\n", ssk, mptcp_subflow_ctx(ssk)); newsk = inet_csk_accept(ssk, arg); if (!newsk) return arg->err; - pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); + pr_debug("newsk=%p, subflow is mptcp=%d\n", newsk, sk_is_mptcp(newsk)); if (sk_is_mptcp(newsk)) { struct mptcp_subflow_context *subflow; struct sock *new_mptcp_sock; @@ -4005,7 +4005,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sock, wait); state = inet_sk_state_load(sk); - pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags); + pr_debug("msk=%p state=%d flags=%lx\n", msk, state, msk->flags); if (state == TCP_LISTEN) { struct sock *ssk = READ_ONCE(msk->first); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a1c1b0ff1ce1..240d7c2ea551 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1177,7 +1177,7 @@ static inline bool mptcp_check_fallback(const struct sock *sk) static inline void __mptcp_do_fallback(struct mptcp_sock *msk) { if (__mptcp_check_fallback(msk)) { - pr_debug("TCP fallback already done (msk=%p)", msk); + pr_debug("TCP fallback already done (msk=%p)\n", msk); return; } set_bit(MPTCP_FALLBACK_DONE, &msk->flags); @@ -1213,7 +1213,7 @@ static inline void mptcp_do_fallback(struct sock *ssk) } } -#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a) +#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a) static inline bool mptcp_check_infinite_map(struct sk_buff *skb) { diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index 4a7fd0508ad2..78ed508ebc1b 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -86,7 +86,7 @@ int mptcp_register_scheduler(struct mptcp_sched_ops *sched) list_add_tail_rcu(&sched->list, &mptcp_sched_list); spin_unlock(&mptcp_sched_list_lock); - pr_debug("%s registered", sched->name); + pr_debug("%s registered\n", sched->name); return 0; } @@ -118,7 +118,7 @@ int mptcp_init_sched(struct mptcp_sock *msk, if (msk->sched->init) msk->sched->init(msk); - pr_debug("sched=%s", msk->sched->name); + pr_debug("sched=%s\n", msk->sched->name); return 0; } diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 2026a9a36f80..505445a9598f 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -873,7 +873,7 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname, struct mptcp_sock *msk = mptcp_sk(sk); struct sock *ssk; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); if (level == SOL_SOCKET) return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); @@ -1453,7 +1453,7 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname, struct mptcp_sock *msk = mptcp_sk(sk); struct sock *ssk; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); /* @@ the meaning of setsockopt() when the socket is connected and * there are multiple subflows is not yet defined. It is up to the diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4834e7fc2fb6..064ab3235893 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -39,7 +39,7 @@ static void subflow_req_destructor(struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - pr_debug("subflow_req=%p", subflow_req); + pr_debug("subflow_req=%p\n", subflow_req); if (subflow_req->msk) sock_put((struct sock *)subflow_req->msk); @@ -146,7 +146,7 @@ static int subflow_check_req(struct request_sock *req, struct mptcp_options_received mp_opt; bool opt_mp_capable, opt_mp_join; - pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); + pr_debug("subflow_req=%p, listener=%p\n", subflow_req, listener); #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -221,7 +221,7 @@ again: } if (subflow_use_different_sport(subflow_req->msk, sk_listener)) { - pr_debug("syn inet_sport=%d %d", + pr_debug("syn inet_sport=%d %d\n", ntohs(inet_sk(sk_listener)->inet_sport), ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport)); if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) { @@ -243,7 +243,7 @@ again: subflow_init_req_cookie_join_save(subflow_req, skb); } - pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token, + pr_debug("token=%u, remote_nonce=%u msk=%p\n", subflow_req->token, subflow_req->remote_nonce, subflow_req->msk); } @@ -527,7 +527,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->rel_write_seq = 1; subflow->conn_finished = 1; subflow->ssn_offset = TCP_SKB_CB(skb)->seq; - pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); + pr_debug("subflow=%p synack seq=%x\n", subflow, subflow->ssn_offset); mptcp_get_options(skb, &mp_opt); if (subflow->request_mptcp) { @@ -559,7 +559,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; WRITE_ONCE(subflow->remote_id, mp_opt.join_id); - pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d\n", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); @@ -585,7 +585,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKBACKUPRX); if (subflow_use_different_dport(msk, sk)) { - pr_debug("synack inet_dport=%d %d", + pr_debug("synack inet_dport=%d %d\n", ntohs(inet_sk(sk)->inet_dport), ntohs(inet_sk(parent)->inet_dport)); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX); @@ -655,7 +655,7 @@ static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - pr_debug("subflow=%p", subflow); + pr_debug("subflow=%p\n", subflow); /* Never answer to SYNs sent to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -686,7 +686,7 @@ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - pr_debug("subflow=%p", subflow); + pr_debug("subflow=%p\n", subflow); if (skb->protocol == htons(ETH_P_IP)) return subflow_v4_conn_request(sk, skb); @@ -807,7 +807,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_sock *owner; struct sock *child; - pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); + pr_debug("listener=%p, req=%p, conn=%p\n", listener, req, listener->conn); /* After child creation we must look for MPC even when options * are not parsed @@ -898,7 +898,7 @@ create_child: ctx->conn = (struct sock *)owner; if (subflow_use_different_sport(owner, sk)) { - pr_debug("ack inet_sport=%d %d", + pr_debug("ack inet_sport=%d %d\n", ntohs(inet_sk(sk)->inet_sport), ntohs(inet_sk((struct sock *)owner)->inet_sport)); if (!mptcp_pm_sport_in_anno_list(owner, sk)) { @@ -961,7 +961,7 @@ enum mapping_status { static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) { - pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d", + pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d\n", ssn, subflow->map_subflow_seq, subflow->map_data_len); } @@ -1121,7 +1121,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, data_len = mpext->data_len; if (data_len == 0) { - pr_debug("infinite mapping received"); + pr_debug("infinite mapping received\n"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); subflow->map_data_len = 0; return MAPPING_INVALID; @@ -1133,7 +1133,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, if (data_len == 1) { bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq, mpext->dsn64); - pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq); + pr_debug("DATA_FIN with no payload seq=%llu\n", mpext->data_seq); if (subflow->map_valid) { /* A DATA_FIN might arrive in a DSS * option before the previous mapping @@ -1159,7 +1159,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, data_fin_seq &= GENMASK_ULL(31, 0); mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64); - pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d", + pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d\n", data_fin_seq, mpext->dsn64); /* Adjust for DATA_FIN using 1 byte of sequence space */ @@ -1205,7 +1205,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, if (unlikely(subflow->map_csum_reqd != csum_reqd)) return MAPPING_INVALID; - pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", + pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u\n", subflow->map_seq, subflow->map_subflow_seq, subflow->map_data_len, subflow->map_csum_reqd, subflow->map_data_csum); @@ -1240,7 +1240,7 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, avail_len = skb->len - offset; incr = limit >= avail_len ? avail_len + fin : limit; - pr_debug("discarding=%d len=%d offset=%d seq=%d", incr, skb->len, + pr_debug("discarding=%d len=%d offset=%d seq=%d\n", incr, skb->len, offset, subflow->map_subflow_seq); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA); tcp_sk(ssk)->copied_seq += incr; @@ -1341,7 +1341,7 @@ static bool subflow_check_data_avail(struct sock *ssk) old_ack = READ_ONCE(msk->ack_seq); ack_seq = mptcp_subflow_get_mapped_dsn(subflow); - pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack, + pr_debug("msk ack_seq=%llx subflow ack_seq=%llx\n", old_ack, ack_seq); if (unlikely(before64(ack_seq, old_ack))) { mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); @@ -1413,7 +1413,7 @@ bool mptcp_subflow_data_available(struct sock *sk) subflow->map_valid = 0; WRITE_ONCE(subflow->data_avail, false); - pr_debug("Done with mapping: seq=%u data_len=%u", + pr_debug("Done with mapping: seq=%u data_len=%u\n", subflow->map_subflow_seq, subflow->map_data_len); } @@ -1523,7 +1523,7 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped) target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk); - pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d", + pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d\n", subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped); if (likely(icsk->icsk_af_ops == target)) @@ -1616,7 +1616,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, goto failed; mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL); - pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, + pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d\n", msk, remote_token, local_id, remote_id); subflow->remote_token = remote_token; WRITE_ONCE(subflow->remote_id, remote_id); @@ -1751,7 +1751,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid; subflow = mptcp_subflow_ctx(sf->sk); - pr_debug("subflow=%p", subflow); + pr_debug("subflow=%p\n", subflow); *new_sock = sf; sock_hold(sk); @@ -1780,7 +1780,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, INIT_LIST_HEAD(&ctx->node); INIT_LIST_HEAD(&ctx->delegated_node); - pr_debug("subflow=%p", ctx); + pr_debug("subflow=%p\n", ctx); ctx->tcp_sock = sk; WRITE_ONCE(ctx->local_id, -1); @@ -1931,7 +1931,7 @@ static int subflow_ulp_init(struct sock *sk) goto out; } - pr_debug("subflow=%p, family=%d", ctx, sk->sk_family); + pr_debug("subflow=%p, family=%d\n", ctx, sk->sk_family); tp->is_mptcp = 1; ctx->icsk_af_ops = icsk->icsk_af_ops; -- cgit From 3a0504d54b3b57f0d7bf3d9184a00c9f8887f6d7 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Mon, 26 Aug 2024 15:07:11 +0200 Subject: sctp: fix association labeling in the duplicate COOKIE-ECHO case sctp_sf_do_5_2_4_dupcook() currently calls security_sctp_assoc_request() on new_asoc, but as it turns out, this association is always discarded and the LSM labels never get into the final association (asoc). This can be reproduced by having two SCTP endpoints try to initiate an association with each other at approximately the same time and then peel off the association into a new socket, which exposes the unitialized labels and triggers SELinux denials. Fix it by calling security_sctp_assoc_request() on asoc instead of new_asoc. Xin Long also suggested limit calling the hook only to cases A, B, and D, since in cases C and E the COOKIE ECHO chunk is discarded and the association doesn't enter the ESTABLISHED state, so rectify that as well. One related caveat with SELinux and peer labeling: When an SCTP connection is set up simultaneously in this way, we will end up with an association that is initialized with security_sctp_assoc_request() on both sides, so the MLS component of the security context of the association will get swapped between the peers, instead of just one side setting it to the other's MLS component. However, at that point security_sctp_assoc_request() had already been called on both sides in sctp_sf_do_unexpected_init() (on a temporary association) and thus if the exchange didn't fail before due to MLS, it won't fail now either (most likely both endpoints have the same MLS range). Tested by: - reproducer from https://src.fedoraproject.org/tests/selinux/pull-request/530 - selinux-testsuite (https://github.com/SELinuxProject/selinux-testsuite/) - sctp-tests (https://github.com/sctp/sctp-tests) - no tests failed that wouldn't fail also without the patch applied Fixes: c081d53f97a1 ("security: pass asoc to sctp_assoc_request and sctp_sk_clone") Suggested-by: Xin Long Signed-off-by: Ondrej Mosnacek Acked-by: Xin Long Acked-by: Paul Moore (LSM/SELinux) Link: https://patch.msgid.link/20240826130711.141271-1-omosnace@redhat.com Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 5adf0c0a6c1a..7d315a18612b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2260,12 +2260,6 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( } } - /* Update socket peer label if first association. */ - if (security_sctp_assoc_request(new_asoc, chunk->head_skb ?: chunk->skb)) { - sctp_association_free(new_asoc); - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - } - /* Set temp so that it won't be added into hashtable */ new_asoc->temp = 1; @@ -2274,6 +2268,22 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( */ action = sctp_tietags_compare(new_asoc, asoc); + /* In cases C and E the association doesn't enter the ESTABLISHED + * state, so there is no need to call security_sctp_assoc_request(). + */ + switch (action) { + case 'A': /* Association restart. */ + case 'B': /* Collision case B. */ + case 'D': /* Collision case D. */ + /* Update socket peer label if first association. */ + if (security_sctp_assoc_request((struct sctp_association *)asoc, + chunk->head_skb ?: chunk->skb)) { + sctp_association_free(new_asoc); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + } + break; + } + switch (action) { case 'A': /* Association restart. */ retval = sctp_sf_do_dupcook_a(net, ep, asoc, chunk, commands, -- cgit From a2ccc33b88e2953a6bf0b309e7e8849cc5320018 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 23 Aug 2024 19:29:18 +0300 Subject: drm/i915/dp_mst: Fix MST state after a sink reset In some cases the sink can reset itself after it was configured into MST mode, without the driver noticing the disconnected state. For instance the reset may happen in the middle of a modeset, or the (long) HPD pulse generated may be not long enough for the encoder detect handler to observe the HPD's deasserted state. In this case the sink's DPCD register programmed to enable MST will be reset, while the driver still assumes MST is still enabled. Detect this condition, which will tear down and recreate/re-enable the MST topology. v2: - Add a code comment about adjusting the expected DP_MSTM_CTRL register value for SST + SideBand. (Suraj, Jani) - Print a debug message about detecting the link reset. (Jani) - Verify the DPCD MST state only if it wasn't already determined that the sink is disconnected. Cc: stable@vger.kernel.org Cc: Jani Nikula Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11195 Reviewed-by: Suraj Kandpal (v1) Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240823162918.1211875-1-imre.deak@intel.com (cherry picked from commit 594cf78dc36f31c0c7e0de4567e644f406d46bae) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp.c | 12 +++++++++ drivers/gpu/drm/i915/display/intel_dp_mst.c | 40 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dp_mst.h | 1 + 3 files changed, 53 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 59f11af3b0a1..dc75a929d3ed 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5935,6 +5935,18 @@ intel_dp_detect(struct drm_connector *connector, else status = connector_status_disconnected; + if (status != connector_status_disconnected && + !intel_dp_mst_verify_dpcd_state(intel_dp)) + /* + * This requires retrying detection for instance to re-enable + * the MST mode that got reset via a long HPD pulse. The retry + * will happen either via the hotplug handler's retry logic, + * ensured by setting the connector here to SST/disconnected, + * or via a userspace connector probing in response to the + * hotplug uevent sent when removing the MST connectors. + */ + status = connector_status_disconnected; + if (status == connector_status_disconnected) { memset(&intel_dp->compliance, 0, sizeof(intel_dp->compliance)); memset(intel_connector->dp.dsc_dpcd, 0, sizeof(intel_connector->dp.dsc_dpcd)); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 27ce5c3f5951..17978a1f9ab0 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1998,3 +1998,43 @@ bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state, return false; } + +/* + * intel_dp_mst_verify_dpcd_state - verify the MST SW enabled state wrt. the DPCD + * @intel_dp: DP port object + * + * Verify if @intel_dp's MST enabled SW state matches the corresponding DPCD + * state. A long HPD pulse - not long enough to be detected as a disconnected + * state - could've reset the DPCD state, which requires tearing + * down/recreating the MST topology. + * + * Returns %true if the SW MST enabled and DPCD states match, %false + * otherwise. + */ +bool intel_dp_mst_verify_dpcd_state(struct intel_dp *intel_dp) +{ + struct intel_display *display = to_intel_display(intel_dp); + struct intel_connector *connector = intel_dp->attached_connector; + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct intel_encoder *encoder = &dig_port->base; + int ret; + u8 val; + + if (!intel_dp->is_mst) + return true; + + ret = drm_dp_dpcd_readb(intel_dp->mst_mgr.aux, DP_MSTM_CTRL, &val); + + /* Adjust the expected register value for SST + SideBand. */ + if (ret < 0 || val != (DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC)) { + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s][ENCODER:%d:%s] MST mode got reset, removing topology (ret=%d, ctrl=0x%02x)\n", + connector->base.base.id, connector->base.name, + encoder->base.base.id, encoder->base.name, + ret, val); + + return false; + } + + return true; +} diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h index 8ca1d599091c..9e4c7679f1c3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.h +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h @@ -27,5 +27,6 @@ int intel_dp_mst_atomic_check_link(struct intel_atomic_state *state, struct intel_link_bw_limits *limits); bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state, struct intel_crtc *crtc); +bool intel_dp_mst_verify_dpcd_state(struct intel_dp *intel_dp); #endif /* __INTEL_DP_MST_H__ */ -- cgit From a547a5880cba6f287179135381f1b484b251be31 Mon Sep 17 00:00:00 2001 From: Peter Newman Date: Thu, 22 Aug 2024 12:02:11 -0700 Subject: x86/resctrl: Fix arch_mbm_* array overrun on SNC When using resctrl on systems with Sub-NUMA Clustering enabled, monitoring groups may be allocated RMID values which would overrun the arch_mbm_{local,total} arrays. This is due to inconsistencies in whether the SNC-adjusted num_rmid value or the unadjusted value in resctrl_arch_system_num_rmid_idx() is used. The num_rmid value for the L3 resource is currently: resctrl_arch_system_num_rmid_idx() / snc_nodes_per_l3_cache As a simple fix, make resctrl_arch_system_num_rmid_idx() return the SNC-adjusted, L3 num_rmid value on x86. Fixes: e13db55b5a0d ("x86/resctrl: Introduce snc_nodes_per_l3_cache") Signed-off-by: Peter Newman Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/r/20240822190212.1848788-1-peternewman@google.com --- arch/x86/include/asm/resctrl.h | 6 ------ arch/x86/kernel/cpu/resctrl/core.c | 8 ++++++++ include/linux/resctrl.h | 1 + 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 12dbd2588ca7..8b1b6ce1e51b 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -156,12 +156,6 @@ static inline void resctrl_sched_in(struct task_struct *tsk) __resctrl_sched_in(tsk); } -static inline u32 resctrl_arch_system_num_rmid_idx(void) -{ - /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */ - return boot_cpu_data.x86_cache_max_rmid + 1; -} - static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid) { *rmid = idx; diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 1930fce9dfe9..8591d53c144b 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -119,6 +119,14 @@ struct rdt_hw_resource rdt_resources_all[] = { }, }; +u32 resctrl_arch_system_num_rmid_idx(void) +{ + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + + /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */ + return r->num_rmid; +} + /* * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs * as they do not have CPUID enumeration support for Cache allocation. diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index b0875b99e811..d94abba1c716 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -248,6 +248,7 @@ struct resctrl_schema { /* The number of closid supported by this resource regardless of CDP */ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); +u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); /* -- cgit From 6a5dcd487791e0c2d86622064602a5c7459941ed Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Aug 2024 23:06:48 +0100 Subject: cifs: Fix lack of credit renegotiation on read retry When netfslib asks cifs to issue a read operation, it prefaces this with a call to ->clamp_length() which cifs uses to negotiate credits, providing receive capacity on the server; however, in the event that a read op needs reissuing, netfslib doesn't call ->clamp_length() again as that could shorten the subrequest, leaving a gap. This causes the retried read to be done with zero credits which causes the server to reject it with STATUS_INVALID_PARAMETER. This is a problem for a DIO read that is requested that would go over the EOF. The short read will be retried, causing EINVAL to be returned to the user when it fails. Fix this by making cifs_req_issue_read() negotiate new credits if retrying (NETFS_SREQ_RETRYING now gets set in the read side as well as the write side in this instance). This isn't sufficient, however: the new credits might not be sufficient to complete the remainder of the read, so also add an additional field, rreq->actual_len, that holds the actual size of the op we want to perform without having to alter subreq->len. We then rely on repeated short reads being retried until we finish the read or reach the end of file and make a zero-length read. Also fix a couple of places where the subrequest start and length need to be altered by the amount so far transferred when being used. Fixes: 69c3c023af25 ("cifs: Implement netfslib hooks") Signed-off-by: David Howells cc: Steve French cc: Paulo Alcantara cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/netfs/io.c | 2 ++ fs/smb/client/cifsglob.h | 1 + fs/smb/client/file.c | 37 +++++++++++++++++++++++++++++++++---- fs/smb/client/smb2ops.c | 2 +- fs/smb/client/smb2pdu.c | 28 +++++++++++++++++----------- fs/smb/client/trace.h | 1 + 6 files changed, 55 insertions(+), 16 deletions(-) diff --git a/fs/netfs/io.c b/fs/netfs/io.c index 4da0a494e860..3303b515b536 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -306,6 +306,7 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq) break; subreq->source = NETFS_DOWNLOAD_FROM_SERVER; subreq->error = 0; + __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); netfs_stat(&netfs_n_rh_download_instead); trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead); netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); @@ -313,6 +314,7 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq) netfs_reset_subreq_iter(rreq, subreq); netfs_read_from_server(rreq, subreq); } else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) { + __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); netfs_reset_subreq_iter(rreq, subreq); netfs_rreq_short_read(rreq, subreq); } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index f379b9dc93ba..9eae8649f90c 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1485,6 +1485,7 @@ struct cifs_io_subrequest { struct cifs_io_request *req; }; ssize_t got_bytes; + size_t actual_len; unsigned int xid; int result; bool have_xid; diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index f9b302cb8233..2d387485f05b 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -111,6 +111,7 @@ static void cifs_issue_write(struct netfs_io_subrequest *subreq) goto fail; } + wdata->actual_len = wdata->subreq.len; rc = adjust_credits(wdata->server, wdata, cifs_trace_rw_credits_issue_write_adjust); if (rc) goto fail; @@ -153,7 +154,7 @@ static bool cifs_clamp_length(struct netfs_io_subrequest *subreq) struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); struct TCP_Server_Info *server = req->server; struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); - size_t rsize = 0; + size_t rsize; int rc; rdata->xid = get_xid(); @@ -166,8 +167,8 @@ static bool cifs_clamp_length(struct netfs_io_subrequest *subreq) cifs_sb->ctx); - rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize, - &rdata->credits); + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, + &rsize, &rdata->credits); if (rc) { subreq->error = rc; return false; @@ -183,7 +184,8 @@ static bool cifs_clamp_length(struct netfs_io_subrequest *subreq) server->credits, server->in_flight, 0, cifs_trace_rw_credits_read_submit); - subreq->len = min_t(size_t, subreq->len, rsize); + subreq->len = umin(subreq->len, rsize); + rdata->actual_len = subreq->len; #ifdef CONFIG_CIFS_SMB_DIRECT if (server->smbd_conn) @@ -203,12 +205,39 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq) struct netfs_io_request *rreq = subreq->rreq; struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); + struct TCP_Server_Info *server = req->server; + struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); int rc = 0; cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n", __func__, rreq->debug_id, subreq->debug_index, rreq->mapping, subreq->transferred, subreq->len); + if (test_bit(NETFS_SREQ_RETRYING, &subreq->flags)) { + /* + * As we're issuing a retry, we need to negotiate some new + * credits otherwise the server may reject the op with + * INVALID_PARAMETER. Note, however, we may get back less + * credit than we need to complete the op, in which case, we + * shorten the op and rely on additional rounds of retry. + */ + size_t rsize = umin(subreq->len - subreq->transferred, + cifs_sb->ctx->rsize); + + rc = server->ops->wait_mtu_credits(server, rsize, &rdata->actual_len, + &rdata->credits); + if (rc) + goto out; + + rdata->credits.in_flight_check = 1; + + trace_smb3_rw_credits(rdata->rreq->debug_id, + rdata->subreq.debug_index, + rdata->credits.value, + server->credits, server->in_flight, 0, + cifs_trace_rw_credits_read_resubmit); + } + if (req->cfile->invalidHandle) { do { rc = cifs_reopen_file(req->cfile, true); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 0b9cb1a60d4a..a6f00b157275 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -301,7 +301,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server, unsigned int /*enum smb3_rw_credits_trace*/ trace) { struct cifs_credits *credits = &subreq->credits; - int new_val = DIV_ROUND_UP(subreq->subreq.len, SMB2_MAX_BUFFER_SIZE); + int new_val = DIV_ROUND_UP(subreq->actual_len, SMB2_MAX_BUFFER_SIZE); int scredits, in_flight; if (!credits->value || credits->value == new_val) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 2d7e6c42cf18..be7a1a9c691d 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4529,9 +4529,9 @@ smb2_readv_callback(struct mid_q_entry *mid) "rdata server %p != mid server %p", rdata->server, mid->server); - cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu\n", + cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu/%zu\n", __func__, mid->mid, mid->mid_state, rdata->result, - rdata->subreq.len); + rdata->actual_len, rdata->subreq.len - rdata->subreq.transferred); switch (mid->mid_state) { case MID_RESPONSE_RECEIVED: @@ -4585,15 +4585,18 @@ smb2_readv_callback(struct mid_q_entry *mid) rdata->subreq.debug_index, rdata->xid, rdata->req->cfile->fid.persistent_fid, - tcon->tid, tcon->ses->Suid, rdata->subreq.start, - rdata->subreq.len, rdata->result); + tcon->tid, tcon->ses->Suid, + rdata->subreq.start + rdata->subreq.transferred, + rdata->actual_len, + rdata->result); } else trace_smb3_read_done(rdata->rreq->debug_id, rdata->subreq.debug_index, rdata->xid, rdata->req->cfile->fid.persistent_fid, tcon->tid, tcon->ses->Suid, - rdata->subreq.start, rdata->got_bytes); + rdata->subreq.start + rdata->subreq.transferred, + rdata->got_bytes); if (rdata->result == -ENODATA) { /* We may have got an EOF error because fallocate @@ -4621,6 +4624,7 @@ smb2_async_readv(struct cifs_io_subrequest *rdata) { int rc, flags = 0; char *buf; + struct netfs_io_subrequest *subreq = &rdata->subreq; struct smb2_hdr *shdr; struct cifs_io_parms io_parms; struct smb_rqst rqst = { .rq_iov = rdata->iov, @@ -4631,15 +4635,15 @@ smb2_async_readv(struct cifs_io_subrequest *rdata) int credit_request; cifs_dbg(FYI, "%s: offset=%llu bytes=%zu\n", - __func__, rdata->subreq.start, rdata->subreq.len); + __func__, subreq->start, subreq->len); if (!rdata->server) rdata->server = cifs_pick_channel(tcon->ses); io_parms.tcon = tlink_tcon(rdata->req->cfile->tlink); io_parms.server = server = rdata->server; - io_parms.offset = rdata->subreq.start; - io_parms.length = rdata->subreq.len; + io_parms.offset = subreq->start + subreq->transferred; + io_parms.length = rdata->actual_len; io_parms.persistent_fid = rdata->req->cfile->fid.persistent_fid; io_parms.volatile_fid = rdata->req->cfile->fid.volatile_fid; io_parms.pid = rdata->req->pid; @@ -4654,11 +4658,13 @@ smb2_async_readv(struct cifs_io_subrequest *rdata) rdata->iov[0].iov_base = buf; rdata->iov[0].iov_len = total_len; + rdata->got_bytes = 0; + rdata->result = 0; shdr = (struct smb2_hdr *)buf; if (rdata->credits.value > 0) { - shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->subreq.len, + shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->actual_len, SMB2_MAX_BUFFER_SIZE)); credit_request = le16_to_cpu(shdr->CreditCharge) + 8; if (server->credits >= server->max_credits) @@ -4682,11 +4688,11 @@ smb2_async_readv(struct cifs_io_subrequest *rdata) if (rc) { cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE); trace_smb3_read_err(rdata->rreq->debug_id, - rdata->subreq.debug_index, + subreq->debug_index, rdata->xid, io_parms.persistent_fid, io_parms.tcon->tid, io_parms.tcon->ses->Suid, - io_parms.offset, io_parms.length, rc); + io_parms.offset, rdata->actual_len, rc); } async_readv_out: diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 0f0c10c7ada7..8e9964001e2a 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -30,6 +30,7 @@ EM(cifs_trace_rw_credits_old_session, "old-session") \ EM(cifs_trace_rw_credits_read_response_add, "rd-resp-add") \ EM(cifs_trace_rw_credits_read_response_clear, "rd-resp-clr") \ + EM(cifs_trace_rw_credits_read_resubmit, "rd-resubmit") \ EM(cifs_trace_rw_credits_read_submit, "rd-submit ") \ EM(cifs_trace_rw_credits_write_prepare, "wr-prepare ") \ EM(cifs_trace_rw_credits_write_response_add, "wr-resp-add") \ -- cgit From 1da29f2c39b67b846b74205c81bf0ccd96d34727 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Aug 2024 23:06:49 +0100 Subject: netfs, cifs: Fix handling of short DIO read Short DIO reads, particularly in relation to cifs, are not being handled correctly by cifs and netfslib. This can be tested by doing a DIO read of a file where the size of read is larger than the size of the file. When it crosses the EOF, it gets a short read and this gets retried, and in the case of cifs, the retry read fails, with the failure being translated to ENODATA. Fix this by the following means: (1) Add a flag, NETFS_SREQ_HIT_EOF, for the filesystem to set when it detects that the read did hit the EOF. (2) Make the netfslib read assessment stop processing subrequests when it encounters one with that flag set. (3) Return rreq->transferred, the accumulated contiguous amount read to that point, to userspace for a DIO read. (4) Make cifs set the flag and clear the error if the read RPC returned ENODATA. (5) Make cifs set the flag and clear the error if a short read occurred without error and the read-to file position is now at the remote inode size. Fixes: 69c3c023af25 ("cifs: Implement netfslib hooks") Signed-off-by: David Howells cc: Steve French cc: Paulo Alcantara cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/netfs/io.c | 17 +++++++++++------ fs/smb/client/smb2pdu.c | 13 +++++++++---- include/linux/netfs.h | 1 + 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/fs/netfs/io.c b/fs/netfs/io.c index 3303b515b536..943128507af5 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -368,7 +368,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) if (subreq->error || subreq->transferred == 0) break; transferred += subreq->transferred; - if (subreq->transferred < subreq->len) + if (subreq->transferred < subreq->len || + test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) break; } @@ -503,7 +504,8 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq, subreq->error = 0; subreq->transferred += transferred_or_error; - if (subreq->transferred < subreq->len) + if (subreq->transferred < subreq->len && + !test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) goto incomplete; complete: @@ -782,10 +784,13 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) TASK_UNINTERRUPTIBLE); ret = rreq->error; - if (ret == 0 && rreq->submitted < rreq->len && - rreq->origin != NETFS_DIO_READ) { - trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); - ret = -EIO; + if (ret == 0) { + if (rreq->origin == NETFS_DIO_READ) { + ret = rreq->transferred; + } else if (rreq->submitted < rreq->len) { + trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); + ret = -EIO; + } } } else { /* If we decrement nr_outstanding to 0, the ref belongs to us. */ diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index be7a1a9c691d..88dc49d67037 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4507,6 +4507,7 @@ static void smb2_readv_callback(struct mid_q_entry *mid) { struct cifs_io_subrequest *rdata = mid->callback_data; + struct netfs_inode *ictx = netfs_inode(rdata->rreq->inode); struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink); struct TCP_Server_Info *server = rdata->server; struct smb2_hdr *shdr = @@ -4599,11 +4600,15 @@ smb2_readv_callback(struct mid_q_entry *mid) rdata->got_bytes); if (rdata->result == -ENODATA) { - /* We may have got an EOF error because fallocate - * failed to enlarge the file. - */ - if (rdata->subreq.start < rdata->subreq.rreq->i_size) + __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags); + rdata->result = 0; + } else { + if (rdata->got_bytes < rdata->actual_len && + rdata->subreq.start + rdata->subreq.transferred + rdata->got_bytes == + ictx->remote_i_size) { + __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags); rdata->result = 0; + } } trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, rdata->credits.value, server->credits, server->in_flight, diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 983816608f15..c47443e7a97e 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -198,6 +198,7 @@ struct netfs_io_subrequest { #define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */ #define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */ #define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */ +#define NETFS_SREQ_HIT_EOF 12 /* Set if we hit the EOF */ }; enum netfs_io_origin { -- cgit From 8101d6e112e2524e967368f920c404ae445a9757 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 27 Aug 2024 15:47:27 +0100 Subject: cifs: Fix copy offload to flush destination region Fix cifs_file_copychunk_range() to flush the destination region before invalidating it to avoid potential loss of data should the copy fail, in whole or in part, in some way. Fixes: 7b2404a886f8 ("cifs: Fix flushing, invalidation and file size with copy_file_range()") Signed-off-by: David Howells cc: Steve French cc: Paulo Alcantara cc: Shyam Prasad N cc: Rohith Surabattula cc: Matthew Wilcox cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: linux-mm@kvack.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index d89485235425..2a2523c93944 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1341,7 +1341,6 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, struct cifsFileInfo *smb_file_target; struct cifs_tcon *src_tcon; struct cifs_tcon *target_tcon; - unsigned long long destend, fstart, fend; ssize_t rc; cifs_dbg(FYI, "copychunk range\n"); @@ -1391,25 +1390,13 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, goto unlock; } - destend = destoff + len - 1; - - /* Flush the folios at either end of the destination range to prevent - * accidental loss of dirty data outside of the range. + /* Flush and invalidate all the folios in the destination region. If + * the copy was successful, then some of the flush is extra overhead, + * but we need to allow for the copy failing in some way (eg. ENOSPC). */ - fstart = destoff; - fend = destend; - - rc = cifs_flush_folio(target_inode, destoff, &fstart, &fend, true); + rc = filemap_invalidate_inode(target_inode, true, destoff, destoff + len - 1); if (rc) goto unlock; - rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false); - if (rc) - goto unlock; - if (fend > target_cifsi->netfs.zero_point) - target_cifsi->netfs.zero_point = fend + 1; - - /* Discard all the folios that overlap the destination region. */ - truncate_inode_pages_range(&target_inode->i_data, fstart, fend); fscache_invalidate(cifs_inode_cookie(target_inode), NULL, i_size_read(target_inode), 0); -- cgit From 383baf5c8f062091af34c63f28d37642a8f188ae Mon Sep 17 00:00:00 2001 From: Mrinmay Sarkar Date: Mon, 26 Aug 2024 17:41:00 +0530 Subject: dmaengine: dw-edma: Fix unmasking STOP and ABORT interrupts for HDMA The current logic is enabling both STOP_INT_MASK and ABORT_INT_MASK bit. This is apparently masking those particular interrupts rather than unmasking the same. If the interrupts are masked, they would never get triggered. So fix the issue by unmasking the STOP and ABORT interrupts properly. Fixes: e74c39573d35 ("dmaengine: dw-edma: Add support for native HDMA") cc: stable@vger.kernel.org Signed-off-by: Mrinmay Sarkar Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1724674261-3144-2-git-send-email-quic_msarkar@quicinc.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-hdma-v0-core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c index 10e8f0715114..2addaca3b694 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-core.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c @@ -247,10 +247,11 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) if (first) { /* Enable engine */ SET_CH_32(dw, chan->dir, chan->id, ch_en, BIT(0)); - /* Interrupt enable&unmask - done, abort */ - tmp = GET_CH_32(dw, chan->dir, chan->id, int_setup) | - HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK | - HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_ABORT_INT_EN; + /* Interrupt unmask - stop, abort */ + tmp = GET_CH_32(dw, chan->dir, chan->id, int_setup); + tmp &= ~(HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK); + /* Interrupt enable - stop, abort */ + tmp |= HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_ABORT_INT_EN; if (!(dw->chip->flags & DW_EDMA_CHIP_LOCAL)) tmp |= HDMA_V0_REMOTE_STOP_INT_EN | HDMA_V0_REMOTE_ABORT_INT_EN; SET_CH_32(dw, chan->dir, chan->id, int_setup, tmp); -- cgit From 9f646ff25c09c52cebe726601db27a60f876f15e Mon Sep 17 00:00:00 2001 From: Mrinmay Sarkar Date: Mon, 26 Aug 2024 17:41:01 +0530 Subject: dmaengine: dw-edma: Do not enable watermark interrupts for HDMA DW_HDMA_V0_LIE and DW_HDMA_V0_RIE are initialized as BIT(3) and BIT(4) respectively in dw_hdma_control enum. But as per HDMA register these bits are corresponds to LWIE and RWIE bit i.e local watermark interrupt enable and remote watermarek interrupt enable. In linked list mode LWIE and RWIE bits only enable the local and remote watermark interrupt. Since the watermark interrupts are not used but enabled, this leads to spurious interrupts getting generated. So remove the code that enables them to avoid generating spurious watermark interrupts. And also rename DW_HDMA_V0_LIE to DW_HDMA_V0_LWIE and DW_HDMA_V0_RIE to DW_HDMA_V0_RWIE as there is no LIE and RIE bits in HDMA and those bits are corresponds to LWIE and RWIE bits. Fixes: e74c39573d35 ("dmaengine: dw-edma: Add support for native HDMA") cc: stable@vger.kernel.org Signed-off-by: Mrinmay Sarkar Reviewed-by: Manivannan Sadhasivam Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/1724674261-3144-3-git-send-email-quic_msarkar@quicinc.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-hdma-v0-core.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c index 2addaca3b694..e3f8db4fe909 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-core.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c @@ -17,8 +17,8 @@ enum dw_hdma_control { DW_HDMA_V0_CB = BIT(0), DW_HDMA_V0_TCB = BIT(1), DW_HDMA_V0_LLP = BIT(2), - DW_HDMA_V0_LIE = BIT(3), - DW_HDMA_V0_RIE = BIT(4), + DW_HDMA_V0_LWIE = BIT(3), + DW_HDMA_V0_RWIE = BIT(4), DW_HDMA_V0_CCS = BIT(8), DW_HDMA_V0_LLE = BIT(9), }; @@ -195,25 +195,14 @@ static void dw_hdma_v0_write_ll_link(struct dw_edma_chunk *chunk, static void dw_hdma_v0_core_write_chunk(struct dw_edma_chunk *chunk) { struct dw_edma_burst *child; - struct dw_edma_chan *chan = chunk->chan; u32 control = 0, i = 0; - int j; if (chunk->cb) control = DW_HDMA_V0_CB; - j = chunk->bursts_alloc; - list_for_each_entry(child, &chunk->burst->list, list) { - j--; - if (!j) { - control |= DW_HDMA_V0_LIE; - if (!(chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL)) - control |= DW_HDMA_V0_RIE; - } - + list_for_each_entry(child, &chunk->burst->list, list) dw_hdma_v0_write_ll_data(chunk, i++, control, child->sz, child->sar, child->dar); - } control = DW_HDMA_V0_LLP | DW_HDMA_V0_TCB; if (!chunk->cb) -- cgit From 8f614469de248a4bc55fb07e55d5f4c340c75b11 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 21 Aug 2024 14:32:02 -0400 Subject: drm/amdgpu: align pp_power_profile_mode with kernel docs The kernel doc says you need to select manual mode to adjust this, but the code only allows you to adjust it when manual mode is not selected. Remove the manual mode check. Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher (cherry picked from commit bbb05f8a9cd87f5046d05a0c596fddfb714ee457) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 9d7454b3c314..bc83cd89f8a0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2265,8 +2265,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu, smu_dpm_ctx->dpm_level = level; } - if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && - smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { index = fls(smu->workload_mask); index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; workload[0] = smu->workload_setting[index]; @@ -2343,8 +2342,7 @@ static int smu_switch_power_profile(void *handle, workload[0] = smu->workload_setting[index]; } - if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && - smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) smu_bump_power_profile_mode(smu, workload, 0); return 0; -- cgit From 948f279dc48a6db17204f9b23f76b67abcd5d702 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 22 Aug 2024 16:20:10 -0400 Subject: drm/amdgpu/smu13.0.7: print index for profiles Print the index for the profiles. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3543 Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher (cherry picked from commit b86a6a57b8ad1699ba8b1c270a79678383baf632) --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index a7d0231727e8..7bc95c404377 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2378,7 +2378,7 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf size += sysfs_emit_at(buf, size, " "); for (i = 0; i <= PP_SMC_POWER_PROFILE_WINDOW3D; i++) - size += sysfs_emit_at(buf, size, "%-14s%s", amdgpu_pp_profile_name[i], + size += sysfs_emit_at(buf, size, "%d %-14s%s", i, amdgpu_pp_profile_name[i], (i == smu->power_profile_mode) ? "* " : " "); size += sysfs_emit_at(buf, size, "\n"); @@ -2408,7 +2408,7 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf do { \ size += sysfs_emit_at(buf, size, "%-30s", #field); \ for (j = 0; j <= PP_SMC_POWER_PROFILE_WINDOW3D; j++) \ - size += sysfs_emit_at(buf, size, "%-16d", activity_monitor_external[j].DpmActivityMonitorCoeffInt.field); \ + size += sysfs_emit_at(buf, size, "%-18d", activity_monitor_external[j].DpmActivityMonitorCoeffInt.field); \ size += sysfs_emit_at(buf, size, "\n"); \ } while (0) -- cgit From d420c857d85777663e8d16adfc24463f5d5c2dbc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 22 Aug 2024 21:54:24 -0400 Subject: drm/amdgpu/swsmu: always force a state reprogram on init Always reprogram the hardware state on init. This ensures the PMFW state is explicitly programmed and we are not relying on the default PMFW state. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3131 Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher (cherry picked from commit c50fe289ed7207f71df3b5f1720512a9620e84fb) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index bc83cd89f8a0..74e35f8ddefc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2224,8 +2224,9 @@ static int smu_bump_power_profile_mode(struct smu_context *smu, } static int smu_adjust_power_state_dynamic(struct smu_context *smu, - enum amd_dpm_forced_level level, - bool skip_display_settings) + enum amd_dpm_forced_level level, + bool skip_display_settings, + bool force_update) { int ret = 0; int index = 0; @@ -2254,7 +2255,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu, } } - if (smu_dpm_ctx->dpm_level != level) { + if (force_update || smu_dpm_ctx->dpm_level != level) { ret = smu_asic_set_performance_level(smu, level); if (ret) { dev_err(smu->adev->dev, "Failed to set performance level!"); @@ -2270,7 +2271,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu, index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; workload[0] = smu->workload_setting[index]; - if (smu->power_profile_mode != workload[0]) + if (force_update || smu->power_profile_mode != workload[0]) smu_bump_power_profile_mode(smu, workload, 0); } @@ -2291,11 +2292,13 @@ static int smu_handle_task(struct smu_context *smu, ret = smu_pre_display_config_changed(smu); if (ret) return ret; - ret = smu_adjust_power_state_dynamic(smu, level, false); + ret = smu_adjust_power_state_dynamic(smu, level, false, false); break; case AMD_PP_TASK_COMPLETE_INIT: + ret = smu_adjust_power_state_dynamic(smu, level, true, true); + break; case AMD_PP_TASK_READJUST_POWER_STATE: - ret = smu_adjust_power_state_dynamic(smu, level, true); + ret = smu_adjust_power_state_dynamic(smu, level, true, false); break; default: break; -- cgit From 37a45fb8db2619e03d26de59dbdb4ae2b0b02d7d Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 20 Aug 2024 08:57:15 +0800 Subject: drm/amd/pm: update message interface for smu v14.0.2/3 update message interface for smu v14.0.2/3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher (cherry picked from commit 01bfabc2d1d8aaffe5268f8df0843a6d916dcbaa) --- .../drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h | 18 ++++++++++++++---- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 1 - 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h index de2e442281ff..87ca5ceb1ece 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h @@ -92,7 +92,6 @@ //Resets #define PPSMC_MSG_PrepareMp1ForUnload 0x2E -#define PPSMC_MSG_Mode1Reset 0x2F //Set SystemVirtual DramAddrHigh #define PPSMC_MSG_SetSystemVirtualDramAddrHigh 0x30 @@ -119,11 +118,12 @@ //STB to dram log #define PPSMC_MSG_DumpSTBtoDram 0x3D -#define PPSMC_MSG_STBtoDramLogSetDramAddrHigh 0x3E -#define PPSMC_MSG_STBtoDramLogSetDramAddrLow 0x3F +#define PPSMC_MSG_STBtoDramLogSetDramAddress 0x3E +#define PPSMC_MSG_DummyUndefined 0x3F #define PPSMC_MSG_STBtoDramLogSetDramSize 0x40 #define PPSMC_MSG_SetOBMTraceBufferLogging 0x41 +#define PPSMC_MSG_UseProfilingMode 0x42 #define PPSMC_MSG_AllowGfxDcs 0x43 #define PPSMC_MSG_DisallowGfxDcs 0x44 #define PPSMC_MSG_EnableAudioStutterWA 0x45 @@ -135,6 +135,16 @@ #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4B #define PPSMC_MSG_SetPriorityDeltaGain 0x4C #define PPSMC_MSG_AllowIHHostInterrupt 0x4D +#define PPSMC_MSG_EnableShadowDpm 0x4E #define PPSMC_MSG_Mode3Reset 0x4F -#define PPSMC_Message_Count 0x50 +#define PPSMC_MSG_SetDriverDramAddr 0x50 +#define PPSMC_MSG_SetToolsDramAddr 0x51 +#define PPSMC_MSG_TransferTableSmu2DramWithAddr 0x52 +#define PPSMC_MSG_TransferTableDram2SmuWithAddr 0x53 +#define PPSMC_MSG_GetAllRunningSmuFeatures 0x54 +#define PPSMC_MSG_GetSvi3Voltage 0x55 +#define PPSMC_MSG_UpdatePolicy 0x56 +#define PPSMC_MSG_ExtPwrConnSupport 0x57 +#define PPSMC_MSG_PreloadSwPstateForUclkOverDrive 0x58 +#define PPSMC_Message_Count 0x59 #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index e1a27903c80a..e291137176bf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -115,7 +115,6 @@ static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0), MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 0), - MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), -- cgit From 959fc102ff4c39f5ab021da311c2cfd1d5602a0c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 20 Aug 2024 13:11:22 -0400 Subject: drm/amdgpu/gfx12: set UNORD_DISPATCH in compute MQDs This needs to be set to 1 to avoid a potential deadlock in the GC 10.x and newer. On GC 9.x and older, this needs to be set to 0. This can lead to hangs in some mixed graphics and compute workloads. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3575 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit 40318a2406bd426c6f4591269669c04e8eda571d) --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 2c611b8577a7..e45d23e82878 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3005,7 +3005,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c index b7a08e7a4423..d163d92a692f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c @@ -187,6 +187,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); -- cgit From 3b9a33235c773c7a3768060cf1d2cf8a9153bc37 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Wed, 21 Aug 2024 12:27:24 +0800 Subject: drm/amd/display: avoid using null object of framebuffer Instead of using state->fb->obj[0] directly, get object from framebuffer by calling drm_gem_fb_get_obj() and return error code when object is null to avoid using null object of framebuffer. Fixes: 5d945cbcd4b1 ("drm/amd/display: Create a file dedicated to planes") Signed-off-by: Ma Ke Signed-off-by: Alex Deucher (cherry picked from commit 73dd0ad9e5dad53766ea3e631303430116f834b3) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index a83bd0331c3b..5cb11cc2d063 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "amdgpu.h" @@ -935,10 +936,14 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, } afb = to_amdgpu_framebuffer(new_state->fb); - obj = new_state->fb->obj[0]; + obj = drm_gem_fb_get_obj(new_state->fb, 0); + if (!obj) { + DRM_ERROR("Failed to get obj from framebuffer\n"); + return -EINVAL; + } + rbo = gem_to_amdgpu_bo(obj); adev = amdgpu_ttm_adev(rbo->tbo.bdev); - r = amdgpu_bo_reserve(rbo, true); if (r) { dev_err(adev->dev, "fail to reserve bo (%d)\n", r); -- cgit From 6d5064c379557d92832b51d247b385bb8bd6aa5b Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Thu, 22 Aug 2024 11:44:12 +0800 Subject: drm/amdgpu: support for gc_info table v1.3 Add gc_info table v1.3 for IP discovery. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit 875ff9a7ee8824200885384effa7743892a34ed6) --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 11 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 ++++ drivers/gpu/drm/amd/include/discovery.h | 42 +++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 7b561e8e3caf..4bd61c169ca8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1500,6 +1500,7 @@ union gc_info { struct gc_info_v1_0 v1; struct gc_info_v1_1 v1_1; struct gc_info_v1_2 v1_2; + struct gc_info_v1_3 v1_3; struct gc_info_v2_0 v2; struct gc_info_v2_1 v2_1; }; @@ -1558,6 +1559,16 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance); adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu); } + if (le16_to_cpu(gc_info->v1.header.version_minor) >= 3) { + adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v1_3.gc_tcp_size_per_cu); + adev->gfx.config.gc_tcp_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcp_cache_line_size); + adev->gfx.config.gc_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_size_per_sqc); + adev->gfx.config.gc_instruction_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_line_size); + adev->gfx.config.gc_scalar_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_size_per_sqc); + adev->gfx.config.gc_scalar_data_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_line_size); + adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v1_3.gc_tcc_size); + adev->gfx.config.gc_tcc_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcc_cache_line_size); + } break; case 2: adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ddda94e49db4..56cc58edbb4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -240,6 +240,12 @@ struct amdgpu_gfx_config { uint32_t gc_tcp_size_per_cu; uint32_t gc_num_cu_per_sqc; uint32_t gc_tcc_size; + uint32_t gc_tcp_cache_line_size; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_instruction_cache_line_size; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_line_size; + uint32_t gc_tcc_cache_line_size; }; struct amdgpu_cu_info { diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index 46bf19c9c5c4..710e328fad48 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -258,6 +258,48 @@ struct gc_info_v1_2 { uint32_t gc_gl2c_per_gpu; }; +struct gc_info_v1_3 { + struct gpu_info_header header; + uint32_t gc_num_se; + uint32_t gc_num_wgp0_per_sa; + uint32_t gc_num_wgp1_per_sa; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_gl2c; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_sa_per_se; + uint32_t gc_num_packer_per_sc; + uint32_t gc_num_gl2a; + uint32_t gc_num_tcp_per_sa; + uint32_t gc_num_sdp_interface; + uint32_t gc_num_tcps; + uint32_t gc_num_tcp_per_wpg; + uint32_t gc_tcp_l1_size; + uint32_t gc_num_sqc_per_wgp; + uint32_t gc_l1_instruction_cache_size_per_sqc; + uint32_t gc_l1_data_cache_size_per_sqc; + uint32_t gc_gl1c_per_sa; + uint32_t gc_gl1c_size_per_instance; + uint32_t gc_gl2c_per_gpu; + uint32_t gc_tcp_size_per_cu; + uint32_t gc_tcp_cache_line_size; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_instruction_cache_line_size; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_line_size; + uint32_t gc_tcc_size; + uint32_t gc_tcc_cache_line_size; +}; + struct gc_info_v2_0 { struct gpu_info_header header; -- cgit From badfdc6211f27803bc805fb56629f7d418670870 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 16 Aug 2024 14:34:17 +0530 Subject: drm/amd/pm: Add support for new P2S table revision Add p2s table support for a new revision of SMUv13.0.6. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Asad Kamal Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 010cc730ace807c6d267481b5fb6ff99acc35c46) --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 78c3f94bb3ff..9974c9f8135e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -121,6 +121,7 @@ struct mca_ras_info { #define P2S_TABLE_ID_A 0x50325341 #define P2S_TABLE_ID_X 0x50325358 +#define P2S_TABLE_ID_3 0x50325303 // clang-format off static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { @@ -271,14 +272,18 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; uint32_t p2s_table_id = P2S_TABLE_ID_A; int ret = 0, i, p2stable_count; + int var = (adev->pdev->device & 0xF); char ucode_prefix[15]; /* No need to load P2S tables in IOV mode */ if (amdgpu_sriov_vf(adev)) return 0; - if (!(adev->flags & AMD_IS_APU)) + if (!(adev->flags & AMD_IS_APU)) { p2s_table_id = P2S_TABLE_ID_X; + if (var == 0x5) + p2s_table_id = P2S_TABLE_ID_3; + } amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); -- cgit From 849f0d5880b7494030c4ee1e4fbaf2ca5422bca9 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Wed, 21 Aug 2024 13:10:58 +0800 Subject: drm/amd/pm: Drop unsupported features on smu v14_0_2 Drop unsupported features on smu v14_0_2. Signed-off-by: Candice Li Reviewed-by: Yang Wang Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 3376f922bfe070eff762164b3fc66981e3079417) --- .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 47 ---------------------- 1 file changed, 47 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index e291137176bf..0c09b8c4ff49 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1823,50 +1823,6 @@ static void smu_v14_0_2_set_smu_mailbox_registers(struct smu_context *smu) smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, regMP1_SMN_C2PMSG_54); } -static int smu_v14_0_2_smu_send_bad_mem_page_num(struct smu_context *smu, - uint32_t size) -{ - int ret = 0; - - /* message SMU to update the bad page number on SMUBUS */ - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetNumBadMemoryPagesRetired, - size, NULL); - if (ret) - dev_err(smu->adev->dev, - "[%s] failed to message SMU to update bad memory pages number\n", - __func__); - - return ret; -} - -static int smu_v14_0_2_send_bad_mem_channel_flag(struct smu_context *smu, - uint32_t size) -{ - int ret = 0; - - /* message SMU to update the bad channel info on SMUBUS */ - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, - size, NULL); - if (ret) - dev_err(smu->adev->dev, - "[%s] failed to message SMU to update bad memory pages channel info\n", - __func__); - - return ret; -} - -static ssize_t smu_v14_0_2_get_ecc_info(struct smu_context *smu, - void *table) -{ - int ret = 0; - - // TODO - - return ret; -} - static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, void **table) { @@ -2014,12 +1970,9 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .enable_gfx_features = smu_v14_0_2_enable_gfx_features, .set_mp1_state = smu_v14_0_2_set_mp1_state, .set_df_cstate = smu_v14_0_2_set_df_cstate, - .send_hbm_bad_pages_num = smu_v14_0_2_smu_send_bad_mem_page_num, - .send_hbm_bad_channel_flag = smu_v14_0_2_send_bad_mem_channel_flag, #if 0 .gpo_control = smu_v14_0_gpo_control, #endif - .get_ecc_info = smu_v14_0_2_get_ecc_info, }; void smu_v14_0_2_set_ppt_funcs(struct smu_context *smu) -- cgit From 9d824c7fce58f59982228aa85b0376b113cdfa35 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 13 Aug 2024 11:25:04 +0100 Subject: drm/v3d: Disable preemption while updating GPU stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We forgot to disable preemption around the write_seqcount_begin/end() pair while updating GPU stats: [ ] WARNING: CPU: 2 PID: 12 at include/linux/seqlock.h:221 __seqprop_assert.isra.0+0x128/0x150 [v3d] [ ] Workqueue: v3d_bin drm_sched_run_job_work [gpu_sched] <...snip...> [ ] Call trace: [ ] __seqprop_assert.isra.0+0x128/0x150 [v3d] [ ] v3d_job_start_stats.isra.0+0x90/0x218 [v3d] [ ] v3d_bin_job_run+0x23c/0x388 [v3d] [ ] drm_sched_run_job_work+0x520/0x6d0 [gpu_sched] [ ] process_one_work+0x62c/0xb48 [ ] worker_thread+0x468/0x5b0 [ ] kthread+0x1c4/0x1e0 [ ] ret_from_fork+0x10/0x20 Fix it. Cc: Maíra Canal Cc: stable@vger.kernel.org # v6.10+ Fixes: 6abe93b621ab ("drm/v3d: Fix race-condition between sysfs/fdinfo and interrupt handler") Signed-off-by: Tvrtko Ursulin Acked-by: Maíra Canal Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20240813102505.80512-1-tursulin@igalia.com --- drivers/gpu/drm/v3d/v3d_sched.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index b8682818bafa..ad1e6236ff6f 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -134,6 +134,8 @@ v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue) struct v3d_stats *local_stats = &file->stats[queue]; u64 now = local_clock(); + preempt_disable(); + write_seqcount_begin(&local_stats->lock); local_stats->start_ns = now; write_seqcount_end(&local_stats->lock); @@ -141,6 +143,8 @@ v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue) write_seqcount_begin(&global_stats->lock); global_stats->start_ns = now; write_seqcount_end(&global_stats->lock); + + preempt_enable(); } static void @@ -162,8 +166,10 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue) struct v3d_stats *local_stats = &file->stats[queue]; u64 now = local_clock(); + preempt_disable(); v3d_stats_update(local_stats, now); v3d_stats_update(global_stats, now); + preempt_enable(); } static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) -- cgit From e33a97a830b230b79a98dbbb4121d4741a2be619 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 27 Aug 2024 10:53:40 -0700 Subject: block: fix detection of unsupported WRITE SAME in blkdev_issue_write_zeroes On error, blkdev_issue_write_zeroes used to recheck the block device's WRITE SAME queue limits after submitting WRITE SAME bios. As stated in the comment, the purpose of this was to collapse all IO errors to EOPNOTSUPP if the effect of issuing bios was that WRITE SAME got turned off in the queue limits. Therefore, it does not make sense to reuse the zeroes limit that was read earlier in the function because we only care about the queue limit *now*, not what it was at the start of the function. Found by running generic/351 from fstests. Fixes: 64b582ca88ca1 ("block: Read max write zeroes once for __blkdev_issue_write_zeroes()") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: John Garry Link: https://lore.kernel.org/r/20240827175340.GB1977952@frogsfrogsfrogs Signed-off-by: Jens Axboe --- block/blk-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index 83eb7761c2bf..4c9f20a689f7 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -174,7 +174,7 @@ static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, * on an I/O error, in which case we'll turn any error into * "not supported" here. */ - if (ret && !limit) + if (ret && !bdev_write_zeroes_sectors(bdev)) return -EOPNOTSUPP; return ret; } -- cgit From b18915248a15eae7d901262f108d6ff0ffb4ffc1 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 19 Aug 2024 19:52:30 +0200 Subject: fuse: use unsigned type for getxattr/listxattr size truncation The existing code uses min_t(ssize_t, outarg.size, XATTR_LIST_MAX) when parsing the FUSE daemon's response to a zero-length getxattr/listxattr request. On 32-bit kernels, where ssize_t and outarg.size are the same size, this is wrong: The min_t() will pass through any size values that are negative when interpreted as signed. fuse_listxattr() will then return this userspace-supplied negative value, which callers will treat as an error value. This kind of bug pattern can lead to fairly bad security bugs because of how error codes are used in the Linux kernel. If a caller were to convert the numeric error into an error pointer, like so: struct foo *func(...) { int len = fuse_getxattr(..., NULL, 0); if (len < 0) return ERR_PTR(len); ... } then it would end up returning this userspace-supplied negative value cast to a pointer - but the caller of this function wouldn't recognize it as an error pointer (IS_ERR_VALUE() only detects values in the narrow range in which legitimate errno values are), and so it would just be treated as a kernel pointer. I think there is at least one theoretical codepath where this could happen, but that path would involve virtio-fs with submounts plus some weird SELinux configuration, so I think it's probably not a concern in practice. Cc: stable@vger.kernel.org # v4.9 Fixes: 63401ccdb2ca ("fuse: limit xattr returned size") Signed-off-by: Jann Horn Signed-off-by: Miklos Szeredi --- fs/fuse/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 5b423fdbb13f..9f568d345c51 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -81,7 +81,7 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value, } ret = fuse_simple_request(fm, &args); if (!ret && !size) - ret = min_t(ssize_t, outarg.size, XATTR_SIZE_MAX); + ret = min_t(size_t, outarg.size, XATTR_SIZE_MAX); if (ret == -ENOSYS) { fm->fc->no_getxattr = 1; ret = -EOPNOTSUPP; @@ -143,7 +143,7 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) } ret = fuse_simple_request(fm, &args); if (!ret && !size) - ret = min_t(ssize_t, outarg.size, XATTR_LIST_MAX); + ret = min_t(size_t, outarg.size, XATTR_LIST_MAX); if (ret > 0 && size) ret = fuse_verify_xattr_list(list, ret); if (ret == -ENOSYS) { -- cgit From 97f30876c94382d1b01d45c2c76be8911b196527 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 25 Jul 2024 10:53:34 -0700 Subject: fuse: check aborted connection before adding requests to pending list for resending There is a race condition where inflight requests will not be aborted if they are in the middle of being re-sent when the connection is aborted. If fuse_resend has already moved all the requests in the fpq->processing lists to its private queue ("to_queue") and then the connection starts and finishes aborting, these requests will be added to the pending queue and remain on it indefinitely. Fixes: 760eac73f9f6 ("fuse: Introduce a new notification type for resend pending requests") Signed-off-by: Joanne Koong Reviewed-by: Josef Bacik Reviewed-by: Jingbo Xu Cc: # v6.9 Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9eb191b5c4de..a11461ef6022 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -31,6 +31,8 @@ MODULE_ALIAS("devname:fuse"); static struct kmem_cache *fuse_req_cachep; +static void end_requests(struct list_head *head); + static struct fuse_dev *fuse_get_dev(struct file *file) { /* @@ -1820,6 +1822,13 @@ static void fuse_resend(struct fuse_conn *fc) } spin_lock(&fiq->lock); + if (!fiq->connected) { + spin_unlock(&fiq->lock); + list_for_each_entry(req, &to_queue, list) + clear_bit(FR_PENDING, &req->flags); + end_requests(&to_queue); + return; + } /* iq and pq requests are both oldest to newest */ list_splice(&to_queue, &fiq->pending); fiq->ops->wake_pending_and_unlock(fiq); -- cgit From 3002240d16494d798add0575e8ba1f284258ab34 Mon Sep 17 00:00:00 2001 From: yangyun Date: Fri, 23 Aug 2024 16:51:46 +0800 Subject: fuse: fix memory leak in fuse_create_open The memory of struct fuse_file is allocated but not freed when get_create_ext return error. Fixes: 3e2b6fdbdc9a ("fuse: send security context of inode on file") Cc: stable@vger.kernel.org # v5.17 Signed-off-by: yangyun Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 2b0d4781f394..8e96df9fd76c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -670,7 +670,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, err = get_create_ext(&args, dir, entry, mode); if (err) - goto out_put_forget_req; + goto out_free_ff; err = fuse_simple_request(fm, &args); free_ext_value(&args); -- cgit From 76a51ac00ca2a72fe3e168b7fb0e70f75ba6f512 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 28 Aug 2024 15:55:17 +0200 Subject: fuse: clear PG_uptodate when using a stolen page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Originally when a stolen page was inserted into fuse's page cache by fuse_try_move_page(), it would be marked uptodate. Then fuse_readpages_end() would call SetPageUptodate() again on the already uptodate page. Commit 413e8f014c8b ("fuse: Convert fuse_readpages_end() to use folio_end_read()") changed that by replacing the SetPageUptodate() + unlock_page() combination with folio_end_read(), which does mostly the same, except it sets the uptodate flag with an xor operation, which in the above scenario resulted in the uptodate flag being cleared, which in turn resulted in EIO being returned on the read. Fix by clearing PG_uptodate instead of setting it in fuse_try_move_page(), conforming to the expectation of folio_end_read(). Reported-by: Jürg Billeter Debugged-by: Matthew Wilcox Fixes: 413e8f014c8b ("fuse: Convert fuse_readpages_end() to use folio_end_read()") Cc: # v6.10 Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index a11461ef6022..67443ef07285 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -775,7 +775,6 @@ static int fuse_check_folio(struct folio *folio) (folio->flags & PAGE_FLAGS_CHECK_AT_PREP & ~(1 << PG_locked | 1 << PG_referenced | - 1 << PG_uptodate | 1 << PG_lru | 1 << PG_active | 1 << PG_workingset | @@ -820,9 +819,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) newfolio = page_folio(buf->page); - if (!folio_test_uptodate(newfolio)) - folio_mark_uptodate(newfolio); - + folio_clear_uptodate(newfolio); folio_clear_mappedtodisk(newfolio); if (fuse_check_folio(newfolio) != 0) -- cgit From f7790d67785302b3116bbbfda62a5a44524601a3 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 26 Aug 2024 14:19:04 -0700 Subject: fuse: update stats for pages in dropped aux writeback list In the case where the aux writeback list is dropped (e.g. the pages have been truncated or the connection is broken), the stats for its pages and backing device info need to be updated as well. Fixes: e2653bd53a98 ("fuse: fix leaked aux requests") Signed-off-by: Joanne Koong Reviewed-by: Josef Bacik Cc: # v5.1 Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f39456c65ed7..ed76121f73f2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1832,10 +1832,16 @@ __acquires(fi->lock) fuse_writepage_finish(fm, wpa); spin_unlock(&fi->lock); - /* After fuse_writepage_finish() aux request list is private */ + /* After rb_erase() aux request list is private */ for (aux = wpa->next; aux; aux = next) { + struct backing_dev_info *bdi = inode_to_bdi(aux->inode); + next = aux->next; aux->next = NULL; + + dec_wb_stat(&bdi->wb, WB_WRITEBACK); + dec_node_page_state(aux->ia.ap.pages[0], NR_WRITEBACK_TEMP); + wb_writeout_inc(&bdi->wb); fuse_writepage_free(aux); } -- cgit From be721b451affbecc4ba4eaac3b71cdbdcade1b1b Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 27 Aug 2024 10:11:16 -0700 Subject: spi: rockchip: Resolve unbalanced runtime PM / system PM handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit e882575efc77 ("spi: rockchip: Suspend and resume the bus during NOIRQ_SYSTEM_SLEEP_PM ops") stopped respecting runtime PM status and simply disabled clocks unconditionally when suspending the system. This causes problems when the device is already runtime suspended when we go to sleep -- in which case we double-disable clocks and produce a WARNing. Switch back to pm_runtime_force_{suspend,resume}(), because that still seems like the right thing to do, and the aforementioned commit makes no explanation why it stopped using it. Also, refactor some of the resume() error handling, because it's not actually a good idea to re-disable clocks on failure. Fixes: e882575efc77 ("spi: rockchip: Suspend and resume the bus during NOIRQ_SYSTEM_SLEEP_PM ops") Cc: stable@vger.kernel.org Reported-by: Ondřej Jirman Closes: https://lore.kernel.org/lkml/20220621154218.sau54jeij4bunf56@core/ Signed-off-by: Brian Norris Link: https://patch.msgid.link/20240827171126.1115748-1-briannorris@chromium.org Signed-off-by: Mark Brown --- drivers/spi/spi-rockchip.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index e1ecd96c7858..0bb33c43b1b4 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -945,14 +945,16 @@ static int rockchip_spi_suspend(struct device *dev) { int ret; struct spi_controller *ctlr = dev_get_drvdata(dev); - struct rockchip_spi *rs = spi_controller_get_devdata(ctlr); ret = spi_controller_suspend(ctlr); if (ret < 0) return ret; - clk_disable_unprepare(rs->spiclk); - clk_disable_unprepare(rs->apb_pclk); + ret = pm_runtime_force_suspend(dev); + if (ret < 0) { + spi_controller_resume(ctlr); + return ret; + } pinctrl_pm_select_sleep_state(dev); @@ -963,25 +965,14 @@ static int rockchip_spi_resume(struct device *dev) { int ret; struct spi_controller *ctlr = dev_get_drvdata(dev); - struct rockchip_spi *rs = spi_controller_get_devdata(ctlr); pinctrl_pm_select_default_state(dev); - ret = clk_prepare_enable(rs->apb_pclk); + ret = pm_runtime_force_resume(dev); if (ret < 0) return ret; - ret = clk_prepare_enable(rs->spiclk); - if (ret < 0) - clk_disable_unprepare(rs->apb_pclk); - - ret = spi_controller_resume(ctlr); - if (ret < 0) { - clk_disable_unprepare(rs->spiclk); - clk_disable_unprepare(rs->apb_pclk); - } - - return 0; + return spi_controller_resume(ctlr); } #endif /* CONFIG_PM_SLEEP */ -- cgit From 91d1dfae464987aaf6c79ff51d8674880fb3be77 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 28 Aug 2024 21:08:25 +0100 Subject: cifs: Fix FALLOC_FL_ZERO_RANGE to preflush buffered part of target region Under certain conditions, the range to be cleared by FALLOC_FL_ZERO_RANGE may only be buffered locally and not yet have been flushed to the server. For example: xfs_io -f -t -c "pwrite -S 0x41 0 4k" \ -c "pwrite -S 0x42 4k 4k" \ -c "fzero 0 4k" \ -c "pread -v 0 8k" /xfstest.test/foo will write two 4KiB blocks of data, which get buffered in the pagecache, and then fallocate() is used to clear the first 4KiB block on the server - but we don't flush the data first, which means the EOF position on the server is wrong, and so the FSCTL_SET_ZERO_DATA RPC fails (and xfs_io ignores the error), but then when we try to read it, we see the old data. Fix this by preflushing any part of the target region that above the server's idea of the EOF position to force the server to update its EOF position. Note, however, that we don't want to simply expand the file by moving the EOF before doing the FSCTL_SET_ZERO_DATA[*] because someone else might see the zeroed region or if the RPC fails we then have to try to clean it up or risk getting corruption. [*] And we have to move the EOF first otherwise FSCTL_SET_ZERO_DATA won't do what we want. This fixes the generic/008 xfstest. [!] Note: A better way to do this might be to split the operation into two parts: we only do FSCTL_SET_ZERO_DATA for the part of the range below the server's EOF and then, if that worked, invalidate the buffered pages for the part above the range. Fixes: 6b69040247e1 ("cifs/smb3: Fix data inconsistent when zero file range") Signed-off-by: David Howells cc: Steve French cc: Zhang Xiaoxu cc: Pavel Shilovsky cc: Paulo Alcantara cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index a6f00b157275..4df84ebe8dbe 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3237,13 +3237,15 @@ static long smb3_zero_data(struct file *file, struct cifs_tcon *tcon, } static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, - loff_t offset, loff_t len, bool keep_size) + unsigned long long offset, unsigned long long len, + bool keep_size) { struct cifs_ses *ses = tcon->ses; struct inode *inode = file_inode(file); struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifsFileInfo *cfile = file->private_data; - unsigned long long new_size; + struct netfs_inode *ictx = netfs_inode(inode); + unsigned long long i_size, new_size, remote_size; long rc; unsigned int xid; @@ -3255,6 +3257,16 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, inode_lock(inode); filemap_invalidate_lock(inode->i_mapping); + i_size = i_size_read(inode); + remote_size = ictx->remote_i_size; + if (offset + len >= remote_size && offset < i_size) { + unsigned long long top = umin(offset + len, i_size); + + rc = filemap_write_and_wait_range(inode->i_mapping, offset, top - 1); + if (rc < 0) + goto zero_range_exit; + } + /* * We zero the range through ioctl, so we need remove the page caches * first, otherwise the data may be inconsistent with the server. -- cgit From ba8cf80724dbc09825b52498e4efacb563935408 Mon Sep 17 00:00:00 2001 From: Daiwei Li Date: Tue, 13 Aug 2024 21:55:53 -0700 Subject: igb: Fix not clearing TimeSync interrupts for 82580 82580 NICs have a hardware bug that makes it necessary to write into the TSICR (TimeSync Interrupt Cause) register to clear it: https://lore.kernel.org/all/CDCB8BE0.1EC2C%25matthew.vick@intel.com/ Add a conditional so only for 82580 we write into the TSICR register, so we don't risk losing events for other models. Without this change, when running ptp4l with an Intel 82580 card, I get the following output: > timed out while polling for tx timestamp increasing tx_timestamp_timeout or > increasing kworker priority may correct this issue, but a driver bug likely > causes it This goes away with this change. This (partially) reverts commit ee14cc9ea19b ("igb: Fix missing time sync events"). Fixes: ee14cc9ea19b ("igb: Fix missing time sync events") Closes: https://lore.kernel.org/intel-wired-lan/CAN0jFd1kO0MMtOh8N2Ztxn6f7vvDKp2h507sMryobkBKe=xk=w@mail.gmail.com/ Tested-by: Daiwei Li Suggested-by: Vinicius Costa Gomes Signed-off-by: Daiwei Li Acked-by: Vinicius Costa Gomes Reviewed-by: Kurt Kanzenbach Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 33a42b4c21e0..9dc7c60838ed 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6960,10 +6960,20 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) static void igb_tsync_interrupt(struct igb_adapter *adapter) { + const u32 mask = (TSINTR_SYS_WRAP | E1000_TSICR_TXTS | + TSINTR_TT0 | TSINTR_TT1 | + TSINTR_AUTT0 | TSINTR_AUTT1); struct e1000_hw *hw = &adapter->hw; u32 tsicr = rd32(E1000_TSICR); struct ptp_clock_event event; + if (hw->mac.type == e1000_82580) { + /* 82580 has a hardware bug that requires an explicit + * write to clear the TimeSync interrupt cause. + */ + wr32(E1000_TSICR, tsicr & mask); + } + if (tsicr & TSINTR_SYS_WRAP) { event.type = PTP_CLOCK_PPS; if (adapter->ptp_caps.pps) -- cgit From d11a67634227f9f9da51938af085fb41a733848f Mon Sep 17 00:00:00 2001 From: Dawid Osuchowski Date: Wed, 21 Aug 2024 18:06:40 +0200 Subject: ice: Add netif_device_attach/detach into PF reset flow Ethtool callbacks can be executed while reset is in progress and try to access deleted resources, e.g. getting coalesce settings can result in a NULL pointer dereference seen below. Reproduction steps: Once the driver is fully initialized, trigger reset: # echo 1 > /sys/class/net//device/reset when reset is in progress try to get coalesce settings using ethtool: # ethtool -c BUG: kernel NULL pointer dereference, address: 0000000000000020 PGD 0 P4D 0 Oops: Oops: 0000 [#1] PREEMPT SMP PTI CPU: 11 PID: 19713 Comm: ethtool Tainted: G S 6.10.0-rc7+ #7 RIP: 0010:ice_get_q_coalesce+0x2e/0xa0 [ice] RSP: 0018:ffffbab1e9bcf6a8 EFLAGS: 00010206 RAX: 000000000000000c RBX: ffff94512305b028 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9451c3f2e588 RDI: ffff9451c3f2e588 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: ffff9451c3f2e580 R11: 000000000000001f R12: ffff945121fa9000 R13: ffffbab1e9bcf760 R14: 0000000000000013 R15: ffffffff9e65dd40 FS: 00007faee5fbe740(0000) GS:ffff94546fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000020 CR3: 0000000106c2e005 CR4: 00000000001706f0 Call Trace: ice_get_coalesce+0x17/0x30 [ice] coalesce_prepare_data+0x61/0x80 ethnl_default_doit+0xde/0x340 genl_family_rcv_msg_doit+0xf2/0x150 genl_rcv_msg+0x1b3/0x2c0 netlink_rcv_skb+0x5b/0x110 genl_rcv+0x28/0x40 netlink_unicast+0x19c/0x290 netlink_sendmsg+0x222/0x490 __sys_sendto+0x1df/0x1f0 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x82/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7faee60d8e27 Calling netif_device_detach() before reset makes the net core not call the driver when ethtool command is issued, the attempt to execute an ethtool command during reset will result in the following message: netlink error: No such device instead of NULL pointer dereference. Once reset is done and ice_rebuild() is executing, the netif_device_attach() is called to allow for ethtool operations to occur again in a safe manner. Fixes: fcea6f3da546 ("ice: Add stats and ethtool support") Suggested-by: Jakub Kicinski Reviewed-by: Igor Bagnucki Signed-off-by: Dawid Osuchowski Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Reviewed-by: Michal Schmidt Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 6f97ed471fe9..46d3c5a34d6a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -608,6 +608,9 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt)); } } + + if (vsi->netdev) + netif_device_detach(vsi->netdev); skip: /* clear SW filtering DB */ @@ -7589,6 +7592,7 @@ static void ice_update_pf_netdev_link(struct ice_pf *pf) */ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) { + struct ice_vsi *vsi = ice_get_main_vsi(pf); struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; bool dvm; @@ -7731,6 +7735,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ice_rebuild_arfs(pf); } + if (vsi && vsi->netdev) + netif_device_attach(vsi->netdev); + ice_update_pf_netdev_link(pf); /* tell the firmware we are up */ -- cgit From 76a0e79bc84f466999fa501fce5bf7a07641b8a7 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Wed, 28 Aug 2024 15:51:29 -0400 Subject: selinux,smack: don't bypass permissions check in inode_setsecctx hook Marek Gresko reports that the root user on an NFS client is able to change the security labels on files on an NFS filesystem that is exported with root squashing enabled. The end of the kerneldoc comment for __vfs_setxattr_noperm() states: * This function requires the caller to lock the inode's i_mutex before it * is executed. It also assumes that the caller will make the appropriate * permission checks. nfsd_setattr() does do permissions checking via fh_verify() and nfsd_permission(), but those don't do all the same permissions checks that are done by security_inode_setxattr() and its related LSM hooks do. Since nfsd_setattr() is the only consumer of security_inode_setsecctx(), simplest solution appears to be to replace the call to __vfs_setxattr_noperm() with a call to __vfs_setxattr_locked(). This fixes the above issue and has the added benefit of causing nfsd to recall conflicting delegations on a file when a client tries to change its security label. Cc: stable@kernel.org Reported-by: Marek Gresko Link: https://bugzilla.kernel.org/show_bug.cgi?id=218809 Signed-off-by: Scott Mayhew Tested-by: Stephen Smalley Reviewed-by: Stephen Smalley Reviewed-by: Chuck Lever Reviewed-by: Jeff Layton Acked-by: Casey Schaufler Signed-off-by: Paul Moore --- security/selinux/hooks.c | 4 ++-- security/smack/smack_lsm.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 55c78c318ccd..90afdfc48c0f 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6650,8 +6650,8 @@ static int selinux_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen */ static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen) { - return __vfs_setxattr_noperm(&nop_mnt_idmap, dentry, XATTR_NAME_SELINUX, - ctx, ctxlen, 0); + return __vfs_setxattr_locked(&nop_mnt_idmap, dentry, XATTR_NAME_SELINUX, + ctx, ctxlen, 0, NULL); } static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 4164699cd4f6..002a1b9ed83a 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4880,8 +4880,8 @@ static int smack_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen) static int smack_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen) { - return __vfs_setxattr_noperm(&nop_mnt_idmap, dentry, XATTR_NAME_SMACK, - ctx, ctxlen, 0); + return __vfs_setxattr_locked(&nop_mnt_idmap, dentry, XATTR_NAME_SMACK, + ctx, ctxlen, 0, NULL); } static int smack_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) -- cgit From 0870b0d8b393dde53106678a1e2cec9dfa52f9b7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Aug 2024 11:49:16 +0000 Subject: net: busy-poll: use ktime_get_ns() instead of local_clock() Typically, busy-polling durations are below 100 usec. When/if the busy-poller thread migrates to another cpu, local_clock() can be off by +/-2msec or more for small values of HZ, depending on the platform. Use ktimer_get_ns() to ensure deterministic behavior, which is the whole point of busy-polling. Fixes: 060212928670 ("net: add low latency socket poll") Fixes: 9a3c71aa8024 ("net: convert low latency sockets to sched_clock()") Fixes: 37089834528b ("sched, net: Fixup busy_loop_us_clock()") Signed-off-by: Eric Dumazet Cc: Mina Almasry Cc: Willem de Bruijn Reviewed-by: Joe Damato Link: https://patch.msgid.link/20240827114916.223377-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/busy_poll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 9b09acac538e..522f1da8b747 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -68,7 +68,7 @@ static inline bool sk_can_busy_loop(struct sock *sk) static inline unsigned long busy_loop_current_time(void) { #ifdef CONFIG_NET_RX_BUSY_POLL - return (unsigned long)(local_clock() >> 10); + return (unsigned long)(ktime_get_ns() >> 10); #else return 0; #endif -- cgit From 8b8ed1b429f8fa7ebd5632555e7b047bc0620075 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:24 +0200 Subject: mptcp: pm: reuse ID 0 after delete and re-add When the endpoint used by the initial subflow is removed and re-added later, the PM has to force the ID 0, it is a special case imposed by the MPTCP specs. Note that the endpoint should then need to be re-added reusing the same ID. Fixes: 3ad14f54bd74 ("mptcp: more accurate MPC endpoint tracking") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- net/mptcp/pm_netlink.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 8d2f97854c64..ec45ab4c66ab 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -585,6 +585,11 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; + + /* Special case for ID0: set the correct ID */ + if (local.addr.id == msk->mpc_endpoint_id) + local.addr.id = 0; + mptcp_pm_announce_addr(msk, &local.addr, false); mptcp_pm_nl_addr_send_ack(msk); @@ -609,6 +614,11 @@ subflow: msk->pm.local_addr_used++; __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + + /* Special case for ID0: set the correct ID */ + if (local.addr.id == msk->mpc_endpoint_id) + local.addr.id = 0; + nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); if (nr == 0) continue; -- cgit From 87b5896f3f7848130095656739b05881904e2697 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:25 +0200 Subject: mptcp: pm: fix RM_ADDR ID for the initial subflow The initial subflow has a special local ID: 0. When an endpoint is being deleted, it is then important to check if its address is not linked to the initial subflow to send the right ID. If there was an endpoint linked to the initial subflow, msk's mpc_endpoint_id field will be set. We can then use this info when an endpoint is being removed to see if it is linked to the initial subflow. So now, the correct IDs are passed to mptcp_pm_nl_rm_addr_or_subflow(), it is no longer needed to use mptcp_local_id_match(). Fixes: 3ad14f54bd74 ("mptcp: more accurate MPC endpoint tracking") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- net/mptcp/pm_netlink.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index ec45ab4c66ab..42d4e7b5f65d 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -800,11 +800,6 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, return -EINVAL; } -static bool mptcp_local_id_match(const struct mptcp_sock *msk, u8 local_id, u8 id) -{ - return local_id == id || (!local_id && msk->mpc_endpoint_id == id); -} - static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list, enum linux_mptcp_mib_field rm_type) @@ -839,7 +834,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) continue; - if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) + if (rm_type == MPTCP_MIB_RMSUBFLOW && id != rm_id) continue; pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u\n", @@ -1448,6 +1443,12 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, return false; } +static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr) +{ + return msk->mpc_endpoint_id == addr->id ? 0 : addr->id; +} + static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool force) @@ -1455,7 +1456,7 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, struct mptcp_rm_list list = { .nr = 0 }; bool ret; - list.ids[list.nr++] = addr->id; + list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); ret = remove_anno_list_by_saddr(msk, addr); if (ret || force) { @@ -1482,14 +1483,12 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, const struct mptcp_pm_addr_entry *entry) { const struct mptcp_addr_info *addr = &entry->addr; - struct mptcp_rm_list list = { .nr = 0 }; + struct mptcp_rm_list list = { .nr = 1 }; long s_slot = 0, s_num = 0; struct mptcp_sock *msk; pr_debug("remove_id=%d\n", addr->id); - list.ids[list.nr++] = addr->id; - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; bool remove_subflow; @@ -1507,6 +1506,7 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); + list.ids[0] = mptcp_endp_get_local_id(msk, addr); if (remove_subflow) { spin_lock_bh(&msk->pm.lock); mptcp_pm_nl_rm_subflow_received(msk, &list); @@ -1613,6 +1613,7 @@ int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) return ret; } +/* Called from the userspace PM only */ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }; @@ -1641,6 +1642,7 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) } } +/* Called from the in-kernel PM only */ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list) { @@ -1650,11 +1652,11 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, list_for_each_entry(entry, rm_list, list) { if (slist.nr < MPTCP_RM_IDS_MAX && lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) - slist.ids[slist.nr++] = entry->addr.id; + slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); if (alist.nr < MPTCP_RM_IDS_MAX && remove_anno_list_by_saddr(msk, &entry->addr)) - alist.ids[alist.nr++] = entry->addr.id; + alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); } spin_lock_bh(&msk->pm.lock); @@ -1951,7 +1953,7 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, { struct mptcp_rm_list list = { .nr = 0 }; - list.ids[list.nr++] = addr->id; + list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); spin_lock_bh(&msk->pm.lock); mptcp_pm_nl_rm_subflow_received(msk, &list); -- cgit From 5f94b08c001290acda94d9d8868075590931c198 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:26 +0200 Subject: selftests: mptcp: join: check removing ID 0 endpoint Removing the endpoint linked to the initial subflow should trigger a RM_ADDR for the right ID, and the removal of the subflow. That's what is now being verified in the "delete and re-add" test. Note that removing the initial subflow will not decrement the 'subflows' counters, which corresponds to the *additional* subflows. On the other hand, when the same endpoint is re-added, it will increment this counter, as it will be seen as an additional subflow this time. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 3ad14f54bd74 ("mptcp: more accurate MPC endpoint tracking") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 264040a760c6..8b4529ff15e5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3572,8 +3572,9 @@ endpoint_tests() if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then - pm_nl_set_limits $ns1 0 2 - pm_nl_set_limits $ns2 0 2 + pm_nl_set_limits $ns1 0 3 + pm_nl_set_limits $ns2 0 3 + pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow test_linkfail=4 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & @@ -3582,17 +3583,17 @@ endpoint_tests() wait_mpj $ns2 pm_nl_check_endpoint "creation" \ $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2 - chk_subflow_nr "before delete" 2 + chk_subflow_nr "before delete id 2" 2 chk_mptcp_info subflows 1 subflows 1 pm_nl_del_endpoint $ns2 2 10.0.2.2 sleep 0.5 - chk_subflow_nr "after delete" 1 + chk_subflow_nr "after delete id 2" 1 chk_mptcp_info subflows 0 subflows 0 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow wait_mpj $ns2 - chk_subflow_nr "after re-add" 2 + chk_subflow_nr "after re-add id 2" 2 chk_mptcp_info subflows 1 subflows 1 pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow @@ -3607,10 +3608,20 @@ endpoint_tests() chk_subflow_nr "after no reject" 3 chk_mptcp_info subflows 2 subflows 2 + pm_nl_del_endpoint $ns2 1 10.0.1.2 + sleep 0.5 + chk_subflow_nr "after delete id 0" 2 + chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf + + pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after re-add id 0" 3 + chk_mptcp_info subflows 3 subflows 3 + mptcp_lib_kill_wait $tests_pid - chk_join_nr 3 3 3 - chk_rm_nr 1 1 + chk_join_nr 4 4 4 + chk_rm_nr 2 2 fi # remove and re-add -- cgit From c07cc3ed895f9bfe0c53b5ed6be710c133b4271c Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:27 +0200 Subject: mptcp: pm: send ACK on an active subflow Taking the first one on the list doesn't work in some cases, e.g. if the initial subflow is being removed. Pick another one instead of not sending anything. Fixes: 84dfe3677a6f ("mptcp: send out dedicated ADD_ADDR packet") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- net/mptcp/pm_netlink.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 42d4e7b5f65d..ed2205ef7208 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -765,9 +765,12 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) !mptcp_pm_should_rm_signal(msk)) return; - subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node); - if (subflow) - mptcp_pm_send_ack(msk, subflow, false, false); + mptcp_for_each_subflow(msk, subflow) { + if (__mptcp_subflow_active(subflow)) { + mptcp_pm_send_ack(msk, subflow, false, false); + break; + } + } } int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, -- cgit From bc19ff57637ff563d2bdf2b385b48c41e6509e0d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:28 +0200 Subject: mptcp: pm: skip connecting to already established sf The lookup_subflow_by_daddr() helper checks if there is already a subflow connected to this address. But there could be a subflow that is closing, but taking time due to some reasons: latency, losses, data to process, etc. If an ADD_ADDR is received while the endpoint is being closed, it is better to try connecting to it, instead of rejecting it: the peer which has sent the ADD_ADDR will not be notified that the ADD_ADDR has been rejected for this reason, and the expected subflow will not be created at the end. This helper should then only look for subflows that are established, or going to be, but not the ones being closed. Fixes: d84ad04941c3 ("mptcp: skip connecting the connected address") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- net/mptcp/pm_netlink.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index ed2205ef7208..0134b6273c54 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -130,12 +130,15 @@ static bool lookup_subflow_by_daddr(const struct list_head *list, { struct mptcp_subflow_context *subflow; struct mptcp_addr_info cur; - struct sock_common *skc; list_for_each_entry(subflow, list, node) { - skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + if (!((1 << inet_sk_state_load(ssk)) & + (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV))) + continue; - remote_address(skc, &cur); + remote_address((struct sock_common *)ssk, &cur); if (mptcp_addresses_equal(&cur, daddr, daddr->port)) return true; } -- cgit From dce1c6d1e92535f165219695a826caedcca4e9b9 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:29 +0200 Subject: mptcp: pm: reset MPC endp ID when re-added The initial subflow has a special local ID: 0. It is specific per connection. When a global endpoint is deleted and re-added later, it can have a different ID -- most services managing the endpoints automatically don't force the ID to be the same as before. It is then important to track these modifications to be consistent with the ID being used for the address used by the initial subflow, not to confuse the other peer or to send the ID 0 for the wrong address. Now when removing an endpoint, msk->mpc_endpoint_id is reset if it corresponds to this endpoint. When adding a new endpoint, the same variable is updated if the address match the one of the initial subflow. Fixes: 3ad14f54bd74 ("mptcp: more accurate MPC endpoint tracking") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- net/mptcp/pm_netlink.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 0134b6273c54..5a84a55e37cc 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1318,20 +1318,27 @@ static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) return pm_nl_get_pernet(genl_info_net(info)); } -static int mptcp_nl_add_subflow_or_signal_addr(struct net *net) +static int mptcp_nl_add_subflow_or_signal_addr(struct net *net, + struct mptcp_addr_info *addr) { struct mptcp_sock *msk; long s_slot = 0, s_num = 0; while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; + struct mptcp_addr_info mpc_addr; if (!READ_ONCE(msk->fully_established) || mptcp_pm_is_userspace(msk)) goto next; + /* if the endp linked to the init sf is re-added with a != ID */ + mptcp_local_address((struct sock_common *)msk, &mpc_addr); + lock_sock(sk); spin_lock_bh(&msk->pm.lock); + if (mptcp_addresses_equal(addr, &mpc_addr, addr->port)) + msk->mpc_endpoint_id = addr->id; mptcp_pm_create_subflow_or_signal_addr(msk); spin_unlock_bh(&msk->pm.lock); release_sock(sk); @@ -1404,7 +1411,7 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) goto out_free; } - mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk)); + mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr); return 0; out_free: @@ -1525,6 +1532,8 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, spin_unlock_bh(&msk->pm.lock); } + if (msk->mpc_endpoint_id == entry->addr.id) + msk->mpc_endpoint_id = 0; release_sock(sk); next: -- cgit From 1c2326fcae4f0c5de8ad0d734ced43a8e5f17dac Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:30 +0200 Subject: selftests: mptcp: join: check re-adding init endp with != id The initial subflow has a special local ID: 0. It is specific per connection. When a global endpoint is deleted and re-added later, it can have a different ID, but the kernel should still use the ID 0 if it corresponds to the initial address. This test validates this behaviour: the endpoint linked to the initial subflow is removed, and re-added with a different ID. Note that removing the initial subflow will not decrement the 'subflows' counters, which corresponds to the *additional* subflows. On the other hand, when the same endpoint is re-added, it will increment this counter, as it will be seen as an additional subflow this time. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 3ad14f54bd74 ("mptcp: more accurate MPC endpoint tracking") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 8b4529ff15e5..75458ade32c7 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3627,11 +3627,12 @@ endpoint_tests() # remove and re-add if reset "delete re-add signal" && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then - pm_nl_set_limits $ns1 0 2 - pm_nl_set_limits $ns2 2 2 + pm_nl_set_limits $ns1 0 3 + pm_nl_set_limits $ns2 3 3 pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal + pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal test_linkfail=4 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & local tests_pid=$! @@ -3653,11 +3654,21 @@ endpoint_tests() wait_mpj $ns2 chk_subflow_nr "after re-add" 3 chk_mptcp_info subflows 2 subflows 2 + + pm_nl_del_endpoint $ns1 42 10.0.1.1 + sleep 0.5 + chk_subflow_nr "after delete ID 0" 2 + chk_mptcp_info subflows 2 subflows 2 + + pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-add" 3 + chk_mptcp_info subflows 3 subflows 3 mptcp_lib_kill_wait $tests_pid - chk_join_nr 3 3 3 - chk_add_nr 4 4 - chk_rm_nr 2 1 invert + chk_join_nr 4 4 4 + chk_add_nr 5 5 + chk_rm_nr 3 2 invert fi # flush and re-add -- cgit From 76a2d8394cc183df872adf04bf636eaf42746449 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:31 +0200 Subject: selftests: mptcp: join: no extra msg if no counter The checksum and fail counters might not be available. Then no need to display an extra message with missing info. While at it, fix the indentation around, which is wrong since the same commit. Fixes: 47867f0a7e83 ("selftests: mptcp: join: skip check if MIB counter not supported") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 75458ade32c7..a10714b6952f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1112,26 +1112,26 @@ chk_csum_nr() print_check "sum" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr") - if [ "$count" != "$csum_ns1" ]; then + if [ -n "$count" ] && [ "$count" != "$csum_ns1" ]; then extra_msg+=" ns1=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } || - { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then + { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then fail_test "got $count data checksum error[s] expected $csum_ns1" else print_ok fi print_check "csum" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr") - if [ "$count" != "$csum_ns2" ]; then + if [ -n "$count" ] && [ "$count" != "$csum_ns2" ]; then extra_msg+=" ns2=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } || - { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then + { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then fail_test "got $count data checksum error[s] expected $csum_ns2" else print_ok @@ -1169,13 +1169,13 @@ chk_fail_nr() print_check "ftx" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx") - if [ "$count" != "$fail_tx" ]; then + if [ -n "$count" ] && [ "$count" != "$fail_tx" ]; then extra_msg+=",tx=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } || - { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then + { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then fail_test "got $count MP_FAIL[s] TX expected $fail_tx" else print_ok @@ -1183,13 +1183,13 @@ chk_fail_nr() print_check "failrx" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx") - if [ "$count" != "$fail_rx" ]; then + if [ -n "$count" ] && [ "$count" != "$fail_rx" ]; then extra_msg+=",rx=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } || - { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then + { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then fail_test "got $count MP_FAIL[s] RX expected $fail_rx" else print_ok -- cgit From 58e1b66b4e4b8a602d3f2843e8eba00a969ecce2 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:32 +0200 Subject: mptcp: pm: do not remove already closed subflows It is possible to have in the list already closed subflows, e.g. the initial subflow has been already closed, but still in the list. No need to try to close it again, and increments the related counters again. Fixes: 0ee4261a3681 ("mptcp: implement mptcp_pm_remove_subflow") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- net/mptcp/pm_netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 5a84a55e37cc..3ff273e219f2 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -838,6 +838,8 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, int how = RCV_SHUTDOWN | SEND_SHUTDOWN; u8 id = subflow_get_local_id(subflow); + if (inet_sk_state_load(ssk) == TCP_CLOSE) + continue; if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) continue; if (rm_type == MPTCP_MIB_RMSUBFLOW && id != rm_id) -- cgit From 9366922adc6a71378ca01f898c41be295309f044 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:33 +0200 Subject: mptcp: pm: fix ID 0 endp usage after multiple re-creations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'local_addr_used' and 'add_addr_accepted' are decremented for addresses not related to the initial subflow (ID0), because the source and destination addresses of the initial subflows are known from the beginning: they don't count as "additional local address being used" or "ADD_ADDR being accepted". It is then required not to increment them when the entrypoint used by the initial subflow is removed and re-added during a connection. Without this modification, this entrypoint cannot be removed and re-added more than once. Reported-by: Arınç ÜNAL Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/512 Fixes: 3ad14f54bd74 ("mptcp: more accurate MPC endpoint tracking") Reported-by: syzbot+455d38ecd5f655fc45cf@syzkaller.appspotmail.com Closes: https://lore.kernel.org/00000000000049861306209237f4@google.com Cc: stable@vger.kernel.org Tested-by: Arınç ÜNAL Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- net/mptcp/pm_netlink.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 3ff273e219f2..a93450ded50a 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -615,12 +615,13 @@ subflow: fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); - msk->pm.local_addr_used++; __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); /* Special case for ID0: set the correct ID */ if (local.addr.id == msk->mpc_endpoint_id) local.addr.id = 0; + else /* local_addr_used is not decr for ID 0 */ + msk->pm.local_addr_used++; nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); if (nr == 0) @@ -750,7 +751,9 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) spin_lock_bh(&msk->pm.lock); if (sf_created) { - msk->pm.add_addr_accepted++; + /* add_addr_accepted is not decr for ID 0 */ + if (remote.id) + msk->pm.add_addr_accepted++; if (msk->pm.add_addr_accepted >= add_addr_accept_max || msk->pm.subflows >= subflows_max) WRITE_ONCE(msk->pm.accept_addr, false); -- cgit From d397d7246c11ca36c33c932bc36d38e3a79e9aa0 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:34 +0200 Subject: selftests: mptcp: join: check re-re-adding ID 0 endp This test extends "delete and re-add" to validate the previous commit: when the endpoint linked to the initial subflow (ID 0) is re-added multiple times, it was no longer being used, because the internal linked counters are not decremented for this special endpoint: it is not an additional endpoint. Here, the "del/add id 0" steps are done 3 times to unsure this case is validated. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 3ad14f54bd74 ("mptcp: more accurate MPC endpoint tracking") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 27 ++++++++++++++----------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index a10714b6952f..965b614e4b16 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3576,7 +3576,7 @@ endpoint_tests() pm_nl_set_limits $ns2 0 3 pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - test_linkfail=4 speed=20 \ + test_linkfail=4 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & local tests_pid=$! @@ -3608,20 +3608,23 @@ endpoint_tests() chk_subflow_nr "after no reject" 3 chk_mptcp_info subflows 2 subflows 2 - pm_nl_del_endpoint $ns2 1 10.0.1.2 - sleep 0.5 - chk_subflow_nr "after delete id 0" 2 - chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf - - pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow - wait_mpj $ns2 - chk_subflow_nr "after re-add id 0" 3 - chk_mptcp_info subflows 3 subflows 3 + local i + for i in $(seq 3); do + pm_nl_del_endpoint $ns2 1 10.0.1.2 + sleep 0.5 + chk_subflow_nr "after delete id 0 ($i)" 2 + chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf + + pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after re-add id 0 ($i)" 3 + chk_mptcp_info subflows 3 subflows 3 + done mptcp_lib_kill_wait $tests_pid - chk_join_nr 4 4 4 - chk_rm_nr 2 2 + chk_join_nr 6 6 6 + chk_rm_nr 4 4 fi # remove and re-add -- cgit From d82809b6c5f2676b382f77a5cbeb1a5d91ed2235 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:35 +0200 Subject: mptcp: avoid duplicated SUB_CLOSED events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The initial subflow might have already been closed, but still in the connection list. When the worker is instructed to close the subflows that have been marked as closed, it might then try to close the initial subflow again. A consequence of that is that the SUB_CLOSED event can be seen twice: # ip mptcp endpoint 1.1.1.1 id 1 subflow dev eth0 2.2.2.2 id 2 subflow dev eth1 # ip mptcp monitor & [ CREATED] remid=0 locid=0 saddr4=1.1.1.1 daddr4=9.9.9.9 [ ESTABLISHED] remid=0 locid=0 saddr4=1.1.1.1 daddr4=9.9.9.9 [ SF_ESTABLISHED] remid=0 locid=2 saddr4=2.2.2.2 daddr4=9.9.9.9 # ip mptcp endpoint delete id 1 [ SF_CLOSED] remid=0 locid=0 saddr4=1.1.1.1 daddr4=9.9.9.9 [ SF_CLOSED] remid=0 locid=0 saddr4=1.1.1.1 daddr4=9.9.9.9 The first one is coming from mptcp_pm_nl_rm_subflow_received(), and the second one from __mptcp_close_subflow(). To avoid doing the post-closed processing twice, the subflow is now marked as closed the first time. Note that it is not enough to check if we are dealing with the first subflow and check its sk_state: the subflow might have been reset or closed before calling mptcp_close_ssk(). Fixes: b911c97c7dc7 ("mptcp: add netlink event support") Cc: stable@vger.kernel.org Tested-by: Arınç ÜNAL Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 6 ++++++ net/mptcp/protocol.h | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b571fba88a2f..37ebcb7640eb 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2508,6 +2508,12 @@ out: void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow) { + /* The first subflow can already be closed and still in the list */ + if (subflow->close_event_done) + return; + + subflow->close_event_done = true; + if (sk->sk_state == TCP_ESTABLISHED) mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 240d7c2ea551..26eb898a202b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -524,7 +524,8 @@ struct mptcp_subflow_context { stale : 1, /* unable to snd/rcv data, do not use for xmit */ valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ - __unused : 10; + close_event_done : 1, /* has done the post-closed part */ + __unused : 9; bool data_avail; bool scheduled; u32 remote_nonce; -- cgit From 20ccc7c5f7a3aa48092441a4b182f9f40418392e Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:36 +0200 Subject: selftests: mptcp: join: validate event numbers This test extends "delete and re-add" and "delete re-add signal" to validate the previous commit: the number of MPTCP events are checked to make sure there are no duplicated or unexpected ones. A new helper has been introduced to easily check these events. The missing events have been added to the lib. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: b911c97c7dc7 ("mptcp: add netlink event support") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 74 ++++++++++++++++++++++++- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 4 ++ 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 965b614e4b16..a8ea0fe200fb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -420,12 +420,17 @@ reset_with_fail() fi } +start_events() +{ + mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid + mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid +} + reset_with_events() { reset "${1}" || return 1 - mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid - mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid + start_events } reset_with_tcp_filter() @@ -3333,6 +3338,36 @@ userspace_pm_chk_get_addr() fi } +# $1: ns ; $2: event type ; $3: count +chk_evt_nr() +{ + local ns=${1} + local evt_name="${2}" + local exp="${3}" + + local evts="${evts_ns1}" + local evt="${!evt_name}" + local count + + evt_name="${evt_name:16}" # without MPTCP_LIB_EVENT_ + [ "${ns}" == "ns2" ] && evts="${evts_ns2}" + + print_check "event ${ns} ${evt_name} (${exp})" + + if [[ "${evt_name}" = "LISTENER_"* ]] && + ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then + print_skip "event not supported" + return + fi + + count=$(grep -cw "type:${evt}" "${evts}") + if [ "${count}" != "${exp}" ]; then + fail_test "got ${count} events, expected ${exp}" + else + print_ok + fi +} + userspace_tests() { # userspace pm type prevents add_addr @@ -3572,6 +3607,7 @@ endpoint_tests() if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + start_events pm_nl_set_limits $ns1 0 3 pm_nl_set_limits $ns2 0 3 pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow @@ -3623,12 +3659,28 @@ endpoint_tests() mptcp_lib_kill_wait $tests_pid + kill_events_pids + chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 4 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 4 + + chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 0 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 5 # one has been closed before estab + chk_join_nr 6 6 6 chk_rm_nr 4 4 fi # remove and re-add - if reset "delete re-add signal" && + if reset_with_events "delete re-add signal" && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 3 pm_nl_set_limits $ns2 3 3 @@ -3669,6 +3721,22 @@ endpoint_tests() chk_mptcp_info subflows 3 subflows 3 mptcp_lib_kill_wait $tests_pid + kill_events_pids + chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 4 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 2 + + chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 5 + chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 3 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 4 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 2 + chk_join_nr 4 4 4 chk_add_nr 5 5 chk_rm_nr 3 2 invert diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 438280e68434..4578a331041e 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -12,10 +12,14 @@ readonly KSFT_SKIP=4 readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" # These variables are used in some selftests, read-only +declare -rx MPTCP_LIB_EVENT_CREATED=1 # MPTCP_EVENT_CREATED +declare -rx MPTCP_LIB_EVENT_ESTABLISHED=2 # MPTCP_EVENT_ESTABLISHED +declare -rx MPTCP_LIB_EVENT_CLOSED=3 # MPTCP_EVENT_CLOSED declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED +declare -rx MPTCP_LIB_EVENT_SUB_PRIORITY=13 # MPTCP_EVENT_SUB_PRIORITY declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED -- cgit From 57f86203b41c98b322119dfdbb1ec54ce5e3369b Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:37 +0200 Subject: mptcp: pm: ADD_ADDR 0 is not a new address The ADD_ADDR 0 with the address from the initial subflow should not be considered as a new address: this is not something new. If the host receives it, it simply means that the address is available again. When receiving an ADD_ADDR for the ID 0, the PM already doesn't consider it as new by not incrementing the 'add_addr_accepted' counter. But the 'accept_addr' might not be set if the limit has already been reached: this can be bypassed in this case. But before, it is important to check that this ADD_ADDR for the ID 0 is for the same address as the initial subflow. If not, it is not something that should happen, and the ADD_ADDR can be ignored. Note that if an ADD_ADDR is received while there is already a subflow opened using the same address, this ADD_ADDR is ignored as well. It means that if multiple ADD_ADDR for ID 0 are received, there will not be any duplicated subflows created by the client. Fixes: d0876b2284cf ("mptcp: add the incoming RM_ADDR support") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- net/mptcp/pm.c | 4 +++- net/mptcp/pm_netlink.c | 9 +++++++++ net/mptcp/protocol.h | 2 ++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 3f8dbde243f1..37f6dbcd8434 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -226,7 +226,9 @@ void mptcp_pm_add_addr_received(const struct sock *ssk, } else { __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); } - } else if (!READ_ONCE(pm->accept_addr)) { + /* id0 should not have a different address */ + } else if ((addr->id == 0 && !mptcp_pm_nl_is_init_remote_addr(msk, addr)) || + (addr->id > 0 && !READ_ONCE(pm->accept_addr))) { mptcp_pm_announce_addr(msk, addr, true); mptcp_pm_add_addr_send_ack(msk); } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a93450ded50a..f891bc714668 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -760,6 +760,15 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) } } +bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, + const struct mptcp_addr_info *remote) +{ + struct mptcp_addr_info mpc_remote; + + remote_address((struct sock_common *)msk, &mpc_remote); + return mptcp_addresses_equal(&mpc_remote, remote, remote->port); +} + void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 26eb898a202b..3b22313d1b86 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -993,6 +993,8 @@ void mptcp_pm_add_addr_received(const struct sock *ssk, void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); +bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, + const struct mptcp_addr_info *remote); void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); -- cgit From f18fa2abf81099d822d842a107f8c9889c86043c Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 28 Aug 2024 08:14:38 +0200 Subject: selftests: mptcp: join: check re-re-adding ID 0 signal This test extends "delete re-add signal" to validate the previous commit: when the 'signal' endpoint linked to the initial subflow (ID 0) is re-added multiple times, it will re-send the ADD_ADDR with id 0. The client should still be able to re-create this subflow, even if the add_addr_accepted limit has been reached as this special address is not considered as a new address. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: d0876b2284cf ("mptcp: add the incoming RM_ADDR support") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 32 ++++++++++++++++--------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index a8ea0fe200fb..a4762c49a878 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3688,7 +3688,7 @@ endpoint_tests() # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal - test_linkfail=4 speed=20 \ + test_linkfail=4 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & local tests_pid=$! @@ -3717,7 +3717,17 @@ endpoint_tests() pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal wait_mpj $ns2 - chk_subflow_nr "after re-add" 3 + chk_subflow_nr "after re-add ID 0" 3 + chk_mptcp_info subflows 3 subflows 3 + + pm_nl_del_endpoint $ns1 99 10.0.1.1 + sleep 0.5 + chk_subflow_nr "after re-delete ID 0" 2 + chk_mptcp_info subflows 2 subflows 2 + + pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-re-add ID 0" 3 chk_mptcp_info subflows 3 subflows 3 mptcp_lib_kill_wait $tests_pid @@ -3727,19 +3737,19 @@ endpoint_tests() chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0 chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 0 - chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 4 - chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 2 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 3 chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1 chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 - chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 5 - chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 3 - chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 4 - chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 2 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 6 + chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 4 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 3 - chk_join_nr 4 4 4 - chk_add_nr 5 5 - chk_rm_nr 3 2 invert + chk_join_nr 5 5 5 + chk_add_nr 6 6 + chk_rm_nr 4 3 invert fi # flush and re-add -- cgit From 6213dcc752f5d605cc50e08597f47fcbe658a40e Mon Sep 17 00:00:00 2001 From: Sriram Yagnaraman Date: Wed, 28 Aug 2024 09:24:17 +0200 Subject: mailmap: update entry for Sriram Yagnaraman Link my old est.tech address to my active mail address Signed-off-by: Sriram Yagnaraman Reviewed-by: Kurt Kanzenbach Link: https://patch.msgid.link/20240828072417.4111996-1-sriram.yagnaraman@ericsson.com Signed-off-by: Paolo Abeni --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 8ee01d9d7046..53ebff0e268a 100644 --- a/.mailmap +++ b/.mailmap @@ -614,6 +614,7 @@ Simon Kelley Sricharan Ramabadhran Srinivas Ramana Sriram R +Sriram Yagnaraman Stanislav Fomichev Stefan Wahren Stéphane Witzmann -- cgit From 3ab394b363c5fd14b231e335fb6746ddfb93aaaa Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Wed, 3 Jul 2024 19:30:20 +0200 Subject: fuse: disable the combination of passthrough and writeback cache Current design and handling of passthrough is without fuse caching and with that FUSE_WRITEBACK_CACHE is conflicting. Fixes: 7dc4e97a4f9a ("fuse: introduce FUSE_PASSTHROUGH capability") Cc: stable@kernel.org # v6.9 Signed-off-by: Bernd Schubert Acked-by: Amir Goldstein Reviewed-by: Josef Bacik Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d8ab4e93916f..bebd89002328 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1332,11 +1332,16 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, * on a stacked fs (e.g. overlayfs) themselves and with * max_stack_depth == 1, FUSE fs can be stacked as the * underlying fs of a stacked fs (e.g. overlayfs). + * + * Also don't allow the combination of FUSE_PASSTHROUGH + * and FUSE_WRITEBACK_CACHE, current design doesn't handle + * them together. */ if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) && (flags & FUSE_PASSTHROUGH) && arg->max_stack_depth > 0 && - arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH) { + arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH && + !(flags & FUSE_WRITEBACK_CACHE)) { fc->passthrough = 1; fc->max_stack_depth = arg->max_stack_depth; fm->sb->s_stack_depth = arg->max_stack_depth; -- cgit From febccb39255f9df35527b88c953b2e0deae50e53 Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Tue, 27 Aug 2024 11:48:22 +0300 Subject: nfc: pn533: Add poll mod list filling check In case of im_protocols value is 1 and tm_protocols value is 0 this combination successfully passes the check 'if (!im_protocols && !tm_protocols)' in the nfc_start_poll(). But then after pn533_poll_create_mod_list() call in pn533_start_poll() poll mod list will remain empty and dev->poll_mod_count will remain 0 which lead to division by zero. Normally no im protocol has value 1 in the mask, so this combination is not expected by driver. But these protocol values actually come from userspace via Netlink interface (NFC_CMD_START_POLL operation). So a broken or malicious program may pass a message containing a "bad" combination of protocol parameter values so that dev->poll_mod_count is not incremented inside pn533_poll_create_mod_list(), thus leading to division by zero. Call trace looks like: nfc_genl_start_poll() nfc_start_poll() ->start_poll() pn533_start_poll() Add poll mod list filling check. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: dfccd0f58044 ("NFC: pn533: Add some polling entropy") Signed-off-by: Aleksandr Mishin Acked-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240827084822.18785-1-amishin@t-argos.ru Signed-off-by: Paolo Abeni --- drivers/nfc/pn533/pn533.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c index b19c39dcfbd9..e2bc67300a91 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c @@ -1723,6 +1723,11 @@ static int pn533_start_poll(struct nfc_dev *nfc_dev, } pn533_poll_create_mod_list(dev, im_protocols, tm_protocols); + if (!dev->poll_mod_count) { + nfc_err(dev->dev, + "Poll mod list is empty\n"); + return -EINVAL; + } /* Do not always start polling from the same modulation */ get_random_bytes(&rand_mod, sizeof(rand_mod)); -- cgit From 5f3eee1eef5d0edd23d8ac0974f56283649a1512 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 28 Aug 2024 15:00:56 -0300 Subject: spi: spidev: Add an entry for elgin,jg10309-01 The rv1108-elgin-r1 board has an LCD controlled via SPI in userspace. The marking on the LCD is JG10309-01. Add the "elgin,jg10309-01" compatible string. Signed-off-by: Fabio Estevam Reviewed-by: Heiko Stuebner Link: https://patch.msgid.link/20240828180057.3167190-2-festevam@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spidev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 5304728c68c2..14bf0fa65bef 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -731,6 +731,7 @@ static int spidev_of_check(struct device *dev) static const struct of_device_id spidev_dt_ids[] = { { .compatible = "cisco,spi-petra", .data = &spidev_of_check }, { .compatible = "dh,dhcom-board", .data = &spidev_of_check }, + { .compatible = "elgin,jg10309-01", .data = &spidev_of_check }, { .compatible = "lineartechnology,ltc2488", .data = &spidev_of_check }, { .compatible = "lwn,bk4", .data = &spidev_of_check }, { .compatible = "menlo,m53cpld", .data = &spidev_of_check }, -- cgit From 59d237c8a241168c7ae34c48244059b7bafaff38 Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Tue, 27 Aug 2024 21:23:01 +0530 Subject: drm/xe/hwmon: Fix WRITE_I1 param from u32 to u16 WRITE_I1 sub-command of the POWER_SETUP pcode command accepts a u16 parameter instead of u32. This change prevents potential illegal sub-command errors. v2: Mask uval instead of changing the prototype. (Badal) v3: Rephrase commit message. (Badal) Signed-off-by: Karthik Poosa Fixes: 92d44a422d0d ("drm/xe/hwmon: Expose card reactive critical power") Reviewed-by: Badal Nilawar Link: https://patchwork.freedesktop.org/patch/msgid/20240827155301.183383-1-karthik.poosa@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit a7f657097e96d8fa745c74bb1a239ebd5a8c971c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hwmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 832ea81faeee..1faeca70900e 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -450,7 +450,7 @@ static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval) { return xe_pcode_write(gt, PCODE_MBOX(PCODE_POWER_SETUP, POWER_SETUP_SUBCOMMAND_WRITE_I1, 0), - uval); + (uval & POWER_SETUP_I1_DATA_MASK)); } static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, -- cgit From c472d33bcbf7a1ed3710efe93822b5e94eabe18c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 29 Aug 2024 08:38:54 -0700 Subject: Input: cypress_ps2 - fix waiting for command response Commit 8bccf667f62a ("Input: cypress_ps2 - report timeouts when reading command status") uncovered an existing problem with cypress_ps2 driver: it tries waiting on a PS/2 device waitqueue without using the rest of libps2. Unfortunately without it nobody signals wakeup for the waiting process, and each "extended" command was timing out. But the rest of the code simply did not notice it. Fix this by switching from homegrown way of sending request to get command response and reading it to standard ps2_command() which does the right thing. Reported-by: Woody Suwalski Tested-by: Woody Suwalski Fixes: 8bccf667f62a ("Input: cypress_ps2 - report timeouts when reading command status") Link: https://lore.kernel.org/r/a8252e0f-dab4-ef5e-2aa1-407a6f4c7204@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/cypress_ps2.c | 58 +++++++++------------------------------ 1 file changed, 13 insertions(+), 45 deletions(-) diff --git a/drivers/input/mouse/cypress_ps2.c b/drivers/input/mouse/cypress_ps2.c index b3c34ebcc4ef..9446657a5f35 100644 --- a/drivers/input/mouse/cypress_ps2.c +++ b/drivers/input/mouse/cypress_ps2.c @@ -91,48 +91,6 @@ static int cypress_ps2_ext_cmd(struct psmouse *psmouse, u8 prefix, u8 nibble) return rc; } -static int cypress_ps2_read_cmd_status(struct psmouse *psmouse, - u8 cmd, u8 *param) -{ - struct ps2dev *ps2dev = &psmouse->ps2dev; - enum psmouse_state old_state; - int pktsize; - int rc; - - ps2_begin_command(ps2dev); - - old_state = psmouse->state; - psmouse->state = PSMOUSE_CMD_MODE; - psmouse->pktcnt = 0; - - pktsize = (cmd == CYTP_CMD_READ_TP_METRICS) ? 8 : 3; - memset(param, 0, pktsize); - - rc = cypress_ps2_sendbyte(psmouse, PSMOUSE_CMD_GETINFO & 0xff); - if (rc) - goto out; - - if (!wait_event_timeout(ps2dev->wait, - psmouse->pktcnt >= pktsize, - msecs_to_jiffies(CYTP_CMD_TIMEOUT))) { - rc = -ETIMEDOUT; - goto out; - } - - memcpy(param, psmouse->packet, pktsize); - - psmouse_dbg(psmouse, "Command 0x%02x response data (0x): %*ph\n", - cmd, pktsize, param); - -out: - psmouse->state = old_state; - psmouse->pktcnt = 0; - - ps2_end_command(ps2dev); - - return rc; -} - static bool cypress_verify_cmd_state(struct psmouse *psmouse, u8 cmd, u8* param) { bool rate_match = false; @@ -166,6 +124,8 @@ static bool cypress_verify_cmd_state(struct psmouse *psmouse, u8 cmd, u8* param) static int cypress_send_ext_cmd(struct psmouse *psmouse, u8 cmd, u8 *param) { u8 cmd_prefix = PSMOUSE_CMD_SETRES & 0xff; + unsigned int resp_size = cmd == CYTP_CMD_READ_TP_METRICS ? 8 : 3; + unsigned int ps2_cmd = (PSMOUSE_CMD_GETINFO & 0xff) | (resp_size << 8); int tries = CYTP_PS2_CMD_TRIES; int error; @@ -179,10 +139,18 @@ static int cypress_send_ext_cmd(struct psmouse *psmouse, u8 cmd, u8 *param) cypress_ps2_ext_cmd(psmouse, cmd_prefix, DECODE_CMD_BB(cmd)); cypress_ps2_ext_cmd(psmouse, cmd_prefix, DECODE_CMD_AA(cmd)); - error = cypress_ps2_read_cmd_status(psmouse, cmd, param); - if (!error && cypress_verify_cmd_state(psmouse, cmd, param)) - return 0; + error = ps2_command(&psmouse->ps2dev, param, ps2_cmd); + if (error) { + psmouse_dbg(psmouse, "Command 0x%02x failed: %d\n", + cmd, error); + } else { + psmouse_dbg(psmouse, + "Command 0x%02x response data (0x): %*ph\n", + cmd, resp_size, param); + if (cypress_verify_cmd_state(psmouse, cmd, param)) + return 0; + } } while (--tries > 0); return -EIO; -- cgit From b57d643a673ce54bc1437d1cca25e1909f553a7e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Aug 2024 10:58:21 -0700 Subject: MAINTAINERS: exclude bluetooth and wireless DT bindings from netdev ML We exclude wireless drivers from the netdev@ traffic, to delegate it to linux-wireless@, and avoid overwhelming netdev@. Bluetooth drivers are implicitly excluded because they live under drivers/bluetooth, not drivers/net. In both cases DT bindings sit under Documentation/devicetree/bindings/net/ and aren't excluded. So if a patch series touches DT bindings netdev@ ends up getting CCed, and these are usually fairly boring series. Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240828175821.2960423-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a70b7c9c3533..5e5e85841a2f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15892,6 +15892,8 @@ F: include/uapi/linux/ethtool_netlink.h F: include/uapi/linux/if_* F: include/uapi/linux/netdev* F: tools/testing/selftests/drivers/net/ +X: Documentation/devicetree/bindings/net/bluetooth/ +X: Documentation/devicetree/bindings/net/wireless/ X: drivers/net/wireless/ NETWORKING DRIVERS (WIRELESS) -- cgit From 04c8abae1b7b2abeb638a3d5d5950fa2a031c244 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Thu, 29 Aug 2024 11:20:49 -0700 Subject: dcache: keep dentry_hashtable or d_hash_shift even when not used The runtime constant feature removes all the users of these variables, allowing the compiler to optimize them away. It's quite difficult to extract their values from the kernel text, and the memory saved by removing them is tiny, and it was never the point of this optimization. Since the dentry_hashtable is a core data structure, it's valuable for debugging tools to be able to read it easily. For instance, scripts built on drgn, like the dentrycache script[1], rely on it to be able to perform diagnostics on the contents of the dcache. Annotate it as used, so the compiler doesn't discard it. Link: https://github.com/oracle-samples/drgn-tools/blob/3afc56146f54d09dfd1f6d3c1b7436eda7e638be/drgn_tools/dentry.py#L325-L355 [1] Fixes: e3c92e81711d ("runtime constants: add x86 architecture support") Signed-off-by: Stephen Brennan Signed-off-by: Linus Torvalds --- fs/dcache.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 3d8daaecb6d1..6386b9b625dd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -96,11 +96,16 @@ EXPORT_SYMBOL(dotdot_name); * * This hash-function tries to avoid losing too many bits of hash * information, yet avoid using a prime hash-size or similar. + * + * Marking the variables "used" ensures that the compiler doesn't + * optimize them away completely on architectures with runtime + * constant infrastructure, this allows debuggers to see their + * values. But updating these values has no effect on those arches. */ -static unsigned int d_hash_shift __ro_after_init; +static unsigned int d_hash_shift __ro_after_init __used; -static struct hlist_bl_head *dentry_hashtable __ro_after_init; +static struct hlist_bl_head *dentry_hashtable __ro_after_init __used; static inline struct hlist_bl_head *d_hash(unsigned long hashlen) { -- cgit From 8d8d244726c8436c50f84092616c92bf551ea89a Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 23 Aug 2024 13:47:05 +0200 Subject: smb: Annotate struct xattr_smb_acl with __counted_by() Add the __counted_by compiler attribute to the flexible array member entries to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. Signed-off-by: Thorsten Blum Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/xattr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/xattr.h b/fs/smb/server/xattr.h index 16499ca5c82d..fa3e27d6971b 100644 --- a/fs/smb/server/xattr.h +++ b/fs/smb/server/xattr.h @@ -76,7 +76,7 @@ struct xattr_acl_entry { struct xattr_smb_acl { int count; int next; - struct xattr_acl_entry entries[]; + struct xattr_acl_entry entries[] __counted_by(count); }; /* 64bytes hash in xattr_ntacl is computed with sha256 */ -- cgit From 78c5a6f1f630172b19af4912e755e1da93ef0ab5 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 27 Aug 2024 21:44:41 +0900 Subject: ksmbd: unset the binding mark of a reused connection Steve French reported null pointer dereference error from sha256 lib. cifs.ko can send session setup requests on reused connection. If reused connection is used for binding session, conn->binding can still remain true and generate_preauth_hash() will not set sess->Preauth_HashValue and it will be NULL. It is used as a material to create an encryption key in ksmbd_gen_smb311_encryptionkey. ->Preauth_HashValue cause null pointer dereference error from crypto_shash_update(). BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 8 PID: 429254 Comm: kworker/8:39 Hardware name: LENOVO 20MAS08500/20MAS08500, BIOS N2CET69W (1.52 ) Workqueue: ksmbd-io handle_ksmbd_work [ksmbd] RIP: 0010:lib_sha256_base_do_update.isra.0+0x11e/0x1d0 [sha256_ssse3] ? show_regs+0x6d/0x80 ? __die+0x24/0x80 ? page_fault_oops+0x99/0x1b0 ? do_user_addr_fault+0x2ee/0x6b0 ? exc_page_fault+0x83/0x1b0 ? asm_exc_page_fault+0x27/0x30 ? __pfx_sha256_transform_rorx+0x10/0x10 [sha256_ssse3] ? lib_sha256_base_do_update.isra.0+0x11e/0x1d0 [sha256_ssse3] ? __pfx_sha256_transform_rorx+0x10/0x10 [sha256_ssse3] ? __pfx_sha256_transform_rorx+0x10/0x10 [sha256_ssse3] _sha256_update+0x77/0xa0 [sha256_ssse3] sha256_avx2_update+0x15/0x30 [sha256_ssse3] crypto_shash_update+0x1e/0x40 hmac_update+0x12/0x20 crypto_shash_update+0x1e/0x40 generate_key+0x234/0x380 [ksmbd] generate_smb3encryptionkey+0x40/0x1c0 [ksmbd] ksmbd_gen_smb311_encryptionkey+0x72/0xa0 [ksmbd] ntlm_authenticate.isra.0+0x423/0x5d0 [ksmbd] smb2_sess_setup+0x952/0xaa0 [ksmbd] __process_request+0xa3/0x1d0 [ksmbd] __handle_ksmbd_work+0x1c4/0x2f0 [ksmbd] handle_ksmbd_work+0x2d/0xa0 [ksmbd] process_one_work+0x16c/0x350 worker_thread+0x306/0x440 ? __pfx_worker_thread+0x10/0x10 kthread+0xef/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x44/0x70 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fixes: f5a544e3bab7 ("ksmbd: add support for SMB3 multichannel") Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 20846a4d3031..8bdc59251418 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1690,6 +1690,8 @@ int smb2_sess_setup(struct ksmbd_work *work) rc = ksmbd_session_register(conn, sess); if (rc) goto out_err; + + conn->binding = false; } else if (conn->dialect >= SMB30_PROT_ID && (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) && req->Flags & SMB2_SESSION_REQ_FLAG_BINDING) { @@ -1768,6 +1770,8 @@ int smb2_sess_setup(struct ksmbd_work *work) sess = NULL; goto out_err; } + + conn->binding = false; } work->sess = sess; -- cgit From 844436e045ac2ab7895d8b281cb784a24de1d14d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 29 Aug 2024 22:22:35 +0300 Subject: ksmbd: Unlock on in ksmbd_tcp_set_interfaces() Unlock before returning an error code if this allocation fails. Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Dan Carpenter Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_tcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index a84788396daa..aaed9e293b2e 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -624,8 +624,10 @@ int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz) for_each_netdev(&init_net, netdev) { if (netif_is_bridge_port(netdev)) continue; - if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL))) + if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL))) { + rtnl_unlock(); return -ENOMEM; + } } rtnl_unlock(); bind_additional_ifaces = 1; -- cgit From ffc17e1479e8e9459b7afa80e5d9d40d0dd78abb Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Fri, 30 Aug 2024 09:54:28 +0300 Subject: platform/x86: dell-smbios: Fix error path in dell_smbios_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case of error in build_tokens_sysfs(), all the memory that has been allocated is freed at end of this function. But then free_group() is called which performs memory deallocation again. Also, instead of free_group() call, there should be exit_dell_smbios_smm() and exit_dell_smbios_wmi() calls, since there is initialization, but there is no release of resources in case of an error. Fix these issues by replacing free_group() call with exit_dell_smbios_wmi() and exit_dell_smbios_smm(). Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 33b9ca1e53b4 ("platform/x86: dell-smbios: Add a sysfs interface for SMBIOS tokens") Signed-off-by: Aleksandr Mishin Link: https://lore.kernel.org/r/20240830065428.9544-1-amishin@t-argos.ru Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/dell-smbios-base.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/dell/dell-smbios-base.c b/drivers/platform/x86/dell/dell-smbios-base.c index 6565fac24cde..73e41eb69cb5 100644 --- a/drivers/platform/x86/dell/dell-smbios-base.c +++ b/drivers/platform/x86/dell/dell-smbios-base.c @@ -622,7 +622,10 @@ static int __init dell_smbios_init(void) return 0; fail_sysfs: - free_group(platform_device); + if (!wmi) + exit_dell_smbios_wmi(); + if (!smm) + exit_dell_smbios_smm(); fail_create_group: platform_device_del(platform_device); -- cgit From 2920294686ec23211637998f3ec386dfd3d784a6 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Fri, 30 Aug 2024 15:41:06 +0800 Subject: spi: intel: Add check devm_kasprintf() returned value intel_spi_populate_chip() use devm_kasprintf() to set pdata->name. This can return a NULL pointer on failure but this returned value is not checked. Fixes: e58db3bcd93b ("spi: intel: Add default partition and name to the second chip") Signed-off-by: Charles Han Reviewed-by: Mika Westerberg Link: https://patch.msgid.link/20240830074106.8744-1-hanchunchao@inspur.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-intel.c b/drivers/spi/spi-intel.c index 3e5dcf2b3c8a..795b7e72baea 100644 --- a/drivers/spi/spi-intel.c +++ b/drivers/spi/spi-intel.c @@ -1390,6 +1390,9 @@ static int intel_spi_populate_chip(struct intel_spi *ispi) pdata->name = devm_kasprintf(ispi->dev, GFP_KERNEL, "%s-chip1", dev_name(ispi->dev)); + if (!pdata->name) + return -ENOMEM; + pdata->nr_parts = 1; parts = devm_kcalloc(ispi->dev, pdata->nr_parts, sizeof(*parts), GFP_KERNEL); -- cgit From 98d4435efcbf37801a3246fb53856c4b934a2613 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Thu, 29 Aug 2024 12:56:48 +0900 Subject: net/smc: prevent NULL pointer dereference in txopt_get Since smc_inet6_prot does not initialize ipv6_pinfo_offset, inet6_create() copies an incorrect address value, sk + 0 (offset), to inet_sk(sk)->pinet6. In addition, since inet_sk(sk)->pinet6 and smc_sk(sk)->clcsock practically point to the same address, when smc_create_clcsk() stores the newly created clcsock in smc_sk(sk)->clcsock, inet_sk(sk)->pinet6 is corrupted into clcsock. This causes NULL pointer dereference and various other memory corruptions. To solve this problem, you need to initialize ipv6_pinfo_offset, add a smc6_sock structure, and then add ipv6_pinfo as the second member of the smc_sock structure. Reported-by: syzkaller Fixes: d25a92ccae6b ("net/smc: Introduce IPPROTO_SMC") Signed-off-by: Jeongjun Park Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/smc/smc.h | 3 +++ net/smc/smc_inet.c | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/net/smc/smc.h b/net/smc/smc.h index 34b781e463c4..ad77d6b6b8d3 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -284,6 +284,9 @@ struct smc_connection { struct smc_sock { /* smc sock container */ struct sock sk; +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6_pinfo *pinet6; +#endif struct socket *clcsock; /* internal tcp socket */ void (*clcsk_state_change)(struct sock *sk); /* original stat_change fct. */ diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c index bece346dd8e9..a5b2041600f9 100644 --- a/net/smc/smc_inet.c +++ b/net/smc/smc_inet.c @@ -60,6 +60,11 @@ static struct inet_protosw smc_inet_protosw = { }; #if IS_ENABLED(CONFIG_IPV6) +struct smc6_sock { + struct smc_sock smc; + struct ipv6_pinfo inet6; +}; + static struct proto smc_inet6_prot = { .name = "INET6_SMC", .owner = THIS_MODULE, @@ -67,9 +72,10 @@ static struct proto smc_inet6_prot = { .hash = smc_hash_sk, .unhash = smc_unhash_sk, .release_cb = smc_release_cb, - .obj_size = sizeof(struct smc_sock), + .obj_size = sizeof(struct smc6_sock), .h.smc_hash = &smc_v6_hashinfo, .slab_flags = SLAB_TYPESAFE_BY_RCU, + .ipv6_pinfo_offset = offsetof(struct smc6_sock, inet6), }; static const struct proto_ops smc_inet6_stream_ops = { -- cgit From c26096ee0278c5e765009c5eee427bbafe6dc090 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 28 Aug 2024 22:02:45 +0100 Subject: mm: Fix filemap_invalidate_inode() to use invalidate_inode_pages2_range() Fix filemap_invalidate_inode() to use invalidate_inode_pages2_range() rather than truncate_inode_pages_range(). The latter clears the invalidated bit of a partial pages rather than discarding it entirely. This causes copy_file_range() to fail on cifs because the partial pages at either end of the destination range aren't evicted and reread, but rather just partly cleared. This causes generic/075 and generic/112 xfstests to fail. Fixes: 74e797d79cf1 ("mm: Provide a means of invalidation without using launder_folio") Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240828210249.1078637-5-dhowells@redhat.com cc: Matthew Wilcox cc: Miklos Szeredi cc: Trond Myklebust cc: Christoph Hellwig cc: Andrew Morton cc: Alexander Viro cc: Christian Brauner cc: Jeff Layton cc: linux-mm@kvack.org cc: linux-fsdevel@vger.kernel.org cc: netfs@lists.linux.dev cc: v9fs@lists.linux.dev cc: linux-afs@lists.infradead.org cc: ceph-devel@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: linux-nfs@vger.kernel.org cc: devel@lists.orangefs.org Signed-off-by: Christian Brauner --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index d62150418b91..0ca9c1377b68 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -4231,7 +4231,7 @@ int filemap_invalidate_inode(struct inode *inode, bool flush, } /* Wait for writeback to complete on all folios and discard. */ - truncate_inode_pages_range(mapping, start, end); + invalidate_inode_pages2_range(mapping, start / PAGE_SIZE, end / PAGE_SIZE); unlock: filemap_invalidate_unlock(mapping); -- cgit From 1c47c0d6014c832ad8e2ba04fc2c5b7070d999f7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 28 Aug 2024 09:42:33 -0600 Subject: io_uring/rsrc: ensure compat iovecs are copied correctly For buffer registration (or updates), a userspace iovec is copied in and updated. If the application is within a compat syscall, then the iovec type is compat_iovec rather than iovec. However, the type used in __io_sqe_buffers_update() and io_sqe_buffers_register() is always struct iovec, and hence the source is incremented by the size of a non-compat iovec in the loop. This misses every other iovec in the source, and will run into garbage half way through the copies and return -EFAULT to the application. Maintain the source address separately and assign to our user vec pointer, so that copies always happen from the right source address. While in there, correct a bad placement of __user which triggered the following sparse warning prior to this fix: io_uring/rsrc.c:981:33: warning: cast removes address space '__user' of expression io_uring/rsrc.c:981:30: warning: incorrect type in assignment (different address spaces) io_uring/rsrc.c:981:30: expected struct iovec const [noderef] __user *uvec io_uring/rsrc.c:981:30: got struct iovec *[noderef] __user Fixes: f4eaf8eda89e ("io_uring/rsrc: Drop io_copy_iov in favor of iovec API") Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index a860516bf448..453867add7ca 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -394,10 +394,11 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, struct io_uring_rsrc_update2 *up, unsigned int nr_args) { - struct iovec __user *uvec = u64_to_user_ptr(up->data); u64 __user *tags = u64_to_user_ptr(up->tags); struct iovec fast_iov, *iov; struct page *last_hpage = NULL; + struct iovec __user *uvec; + u64 user_data = up->data; __u32 done; int i, err; @@ -410,7 +411,8 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu; u64 tag = 0; - iov = iovec_from_user(&uvec[done], 1, 1, &fast_iov, ctx->compat); + uvec = u64_to_user_ptr(user_data); + iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat); if (IS_ERR(iov)) { err = PTR_ERR(iov); break; @@ -443,6 +445,10 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, ctx->user_bufs[i] = imu; *io_get_tag_slot(ctx->buf_data, i) = tag; + if (ctx->compat) + user_data += sizeof(struct compat_iovec); + else + user_data += sizeof(struct iovec); } return done ? done : err; } @@ -949,7 +955,7 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, struct page *last_hpage = NULL; struct io_rsrc_data *data; struct iovec fast_iov, *iov = &fast_iov; - const struct iovec __user *uvec = (struct iovec * __user) arg; + const struct iovec __user *uvec; int i, ret; BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16)); @@ -972,7 +978,8 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) { if (arg) { - iov = iovec_from_user(&uvec[i], 1, 1, &fast_iov, ctx->compat); + uvec = (struct iovec __user *) arg; + iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat); if (IS_ERR(iov)) { ret = PTR_ERR(iov); break; @@ -980,6 +987,10 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, ret = io_buffer_validate(iov); if (ret) break; + if (ctx->compat) + arg += sizeof(struct compat_iovec); + else + arg += sizeof(struct iovec); } if (!iov->iov_base && *io_get_tag_slot(data, i)) { -- cgit From 1a5caec7f80ca2e659c03f45378ee26915f4eda2 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 30 Aug 2024 07:35:12 -0700 Subject: regulator: core: Stub devm_regulator_bulk_get_const() if !CONFIG_REGULATOR When adding devm_regulator_bulk_get_const() I missed adding a stub for when CONFIG_REGULATOR is not enabled. Under certain conditions (like randconfig testing) this can cause the compiler to reports errors like: error: implicit declaration of function 'devm_regulator_bulk_get_const'; did you mean 'devm_regulator_bulk_get_enable'? Add the stub. Fixes: 1de452a0edda ("regulator: core: Allow drivers to define their init data as const") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408301813.TesFuSbh-lkp@intel.com/ Cc: Neil Armstrong Signed-off-by: Douglas Anderson Link: https://patch.msgid.link/20240830073511.1.Ib733229a8a19fad8179213c05e1af01b51e42328@changeid Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index d986ec13092e..b9ce521910a0 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -452,6 +452,14 @@ static inline int of_regulator_bulk_get_all(struct device *dev, struct device_no return 0; } +static inline int devm_regulator_bulk_get_const( + struct device *dev, int num_consumers, + const struct regulator_bulk_data *in_consumers, + struct regulator_bulk_data **out_consumers) +{ + return 0; +} + static inline int regulator_bulk_enable(int num_consumers, struct regulator_bulk_data *consumers) { -- cgit From 40927f3d0972bf86357a32a5749be71a551241b6 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 29 Aug 2024 09:06:28 +1000 Subject: nfsd: fix nfsd4_deleg_getattr_conflict in presence of third party lease It is not safe to dereference fl->c.flc_owner without first confirming fl->fl_lmops is the expected manager. nfsd4_deleg_getattr_conflict() tests fl_lmops but largely ignores the result and assumes that flc_owner is an nfs4_delegation anyway. This is wrong. With this patch we restore the "!= &nfsd_lease_mng_ops" case to behave as it did before the change mentioned below. This is the same as the current code, but without any reference to a possible delegation. Fixes: c5967721e106 ("NFSD: handle GETATTR conflict with write delegation") Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 07f2496850c4..a366fb1c1b9b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -8859,7 +8859,15 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, */ if (type == F_RDLCK) break; - goto break_lease; + + nfsd_stats_wdeleg_getattr_inc(nn); + spin_unlock(&ctx->flc_lock); + + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); + if (status != nfserr_jukebox || + !nfsd_wait_for_delegreturn(rqstp, inode)) + return status; + return 0; } if (type == F_WRLCK) { struct nfs4_delegation *dp = fl->c.flc_owner; @@ -8868,7 +8876,6 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, spin_unlock(&ctx->flc_lock); return 0; } -break_lease: nfsd_stats_wdeleg_getattr_inc(nn); dp = fl->c.flc_owner; refcount_inc(&dp->dl_stid.sc_count); -- cgit From f274495aea7b15225b3d83837121b22ef96e560c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 30 Aug 2024 10:45:54 -0600 Subject: io_uring/kbuf: return correct iovec count from classic buffer peek io_provided_buffers_select() returns 0 to indicate success, but it should be returning 1 to indicate that 1 vec was mapped. This causes peeking to fail with classic provided buffers, and while that's not a use case that anyone should use, it should still work correctly. The end result is that no buffer will be selected, and hence a completion with '0' as the result will be posted, without a buffer attached. Fixes: 35c8711c8fc4 ("io_uring/kbuf: add helpers for getting/peeking multiple buffers") Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 1af2bd56af44..bdfa30b38321 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -129,7 +129,7 @@ static int io_provided_buffers_select(struct io_kiocb *req, size_t *len, iov[0].iov_base = buf; iov[0].iov_len = *len; - return 0; + return 1; } static struct io_uring_buf *io_ring_head_to_buf(struct io_uring_buf_ring *br, -- cgit From b408473ea01b2e499d23503e2bf898416da9d7ac Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 29 Aug 2024 18:22:14 -0700 Subject: bpf: Fix a crash when btf_parse_base() returns an error pointer The pointer returned by btf_parse_base could be an error pointer. IS_ERR() check is needed before calling btf_free(base_btf). Fixes: 8646db238997 ("libbpf,bpf: Share BTF relocate-related code with kernel") Signed-off-by: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Reviewed-by: Alan Maguire Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240830012214.1646005-1-martin.lau@linux.dev --- kernel/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 520f49f422fe..e3377dd61f7e 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6283,7 +6283,7 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, errout: btf_verifier_env_free(env); - if (base_btf != vmlinux_btf) + if (!IS_ERR(base_btf) && base_btf != vmlinux_btf) btf_free(base_btf); if (btf) { kvfree(btf->data); -- cgit From 150b572a7c1df30f5d32d87ad96675200cca7b80 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 26 Aug 2024 16:27:39 -0400 Subject: MAINTAINERS: PCI: Add NXP PCI controller mailing list imx@lists.linux.dev Add imx mailing list imx@lists.linux.dev for PCI controller of NXP chips (Layerscape and iMX). Link: https://lore.kernel.org/r/20240826202740.970015-1-Frank.Li@nxp.com Signed-off-by: Frank Li Signed-off-by: Bjorn Helgaas Acked-by: Richard Zhu --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3fb27f41515d..1b7a6a8073bb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17412,6 +17412,7 @@ M: Roy Zang L: linuxppc-dev@lists.ozlabs.org L: linux-pci@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +L: imx@lists.linux.dev S: Maintained F: drivers/pci/controller/dwc/*layerscape* @@ -17438,6 +17439,7 @@ M: Richard Zhu M: Lucas Stach L: linux-pci@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +L: imx@lists.linux.dev S: Maintained F: Documentation/devicetree/bindings/pci/fsl,imx6q-pcie-common.yaml F: Documentation/devicetree/bindings/pci/fsl,imx6q-pcie-ep.yaml -- cgit From fe1910f9337bd46a9343967b547ccab26b4b2c6e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 20 Aug 2024 20:07:44 -0700 Subject: tcp_bpf: fix return value of tcp_bpf_sendmsg() When we cork messages in psock->cork, the last message triggers the flushing will result in sending a sk_msg larger than the current message size. In this case, in tcp_bpf_send_verdict(), 'copied' becomes negative at least in the following case: 468 case __SK_DROP: 469 default: 470 sk_msg_free_partial(sk, msg, tosend); 471 sk_msg_apply_bytes(psock, tosend); 472 *copied -= (tosend + delta); // <==== HERE 473 return -EACCES; Therefore, it could lead to the following BUG with a proper value of 'copied' (thanks to syzbot). We should not use negative 'copied' as a return value here. ------------[ cut here ]------------ kernel BUG at net/socket.c:733! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 0 UID: 0 PID: 3265 Comm: syz-executor510 Not tainted 6.11.0-rc3-syzkaller-00060-gd07b43284ab3 #0 Hardware name: linux,dummy-virt (DT) pstate: 61400009 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) pc : sock_sendmsg_nosec net/socket.c:733 [inline] pc : sock_sendmsg_nosec net/socket.c:728 [inline] pc : __sock_sendmsg+0x5c/0x60 net/socket.c:745 lr : sock_sendmsg_nosec net/socket.c:730 [inline] lr : __sock_sendmsg+0x54/0x60 net/socket.c:745 sp : ffff800088ea3b30 x29: ffff800088ea3b30 x28: fbf00000062bc900 x27: 0000000000000000 x26: ffff800088ea3bc0 x25: ffff800088ea3bc0 x24: 0000000000000000 x23: f9f00000048dc000 x22: 0000000000000000 x21: ffff800088ea3d90 x20: f9f00000048dc000 x19: ffff800088ea3d90 x18: 0000000000000001 x17: 0000000000000000 x16: 0000000000000000 x15: 000000002002ffaf x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: ffff8000815849c0 x9 : ffff8000815b49c0 x8 : 0000000000000000 x7 : 000000000000003f x6 : 0000000000000000 x5 : 00000000000007e0 x4 : fff07ffffd239000 x3 : fbf00000062bc900 x2 : 0000000000000000 x1 : 0000000000000000 x0 : 00000000fffffdef Call trace: sock_sendmsg_nosec net/socket.c:733 [inline] __sock_sendmsg+0x5c/0x60 net/socket.c:745 ____sys_sendmsg+0x274/0x2ac net/socket.c:2597 ___sys_sendmsg+0xac/0x100 net/socket.c:2651 __sys_sendmsg+0x84/0xe0 net/socket.c:2680 __do_sys_sendmsg net/socket.c:2689 [inline] __se_sys_sendmsg net/socket.c:2687 [inline] __arm64_sys_sendmsg+0x24/0x30 net/socket.c:2687 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x48/0x110 arch/arm64/kernel/syscall.c:49 el0_svc_common.constprop.0+0x40/0xe0 arch/arm64/kernel/syscall.c:132 do_el0_svc+0x1c/0x28 arch/arm64/kernel/syscall.c:151 el0_svc+0x34/0xec arch/arm64/kernel/entry-common.c:712 el0t_64_sync_handler+0x100/0x12c arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x19c/0x1a0 arch/arm64/kernel/entry.S:598 Code: f9404463 d63f0060 3108441f 54fffe81 (d4210000) ---[ end trace 0000000000000000 ]--- Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") Reported-by: syzbot+58c03971700330ce14d8@syzkaller.appspotmail.com Cc: Jakub Sitnicki Signed-off-by: Cong Wang Reviewed-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/20240821030744.320934-1-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 53b0d62fd2c2..fe6178715ba0 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -577,7 +577,7 @@ out_err: err = sk_stream_error(sk, msg->msg_flags, err); release_sock(sk); sk_psock_put(sk, psock); - return copied ? copied : err; + return copied > 0 ? copied : err; } enum { -- cgit From dd885d90c047dbdd2773c1d33954cbd8747d81e2 Mon Sep 17 00:00:00 2001 From: Martin Jocic Date: Fri, 30 Aug 2024 17:31:13 +0200 Subject: can: kvaser_pciefd: Use a single write when releasing RX buffers Kvaser's PCIe cards uses the KCAN FPGA IP block which has dual 4K buffers for incoming messages shared by all (currently up to eight) channels. While the driver processes messages in one buffer, new incoming messages are stored in the other and so on. The design of KCAN is such that a buffer must be fully read and then released. Releasing a buffer will make the FPGA switch buffers. If the other buffer contains at least one incoming message the FPGA will also instantly issue a new interrupt, if not the interrupt will be issued after receiving the first new message. With IRQx interrupts, it takes a little time for the interrupt to happen, enough for any previous ISR call to do it's business and return, but MSI interrupts are way faster so this time is reduced to almost nothing. So with MSI, releasing the buffer HAS to be the very last action of the ISR before returning, otherwise the new interrupt might be "masked" by the kernel because the previous ISR call hasn't returned. And the interrupts are edge-triggered so we cannot loose one, or the ping-pong reading process will stop. This is why this patch modifies the driver to use a single write to the SRB_CMD register before returning. Signed-off-by: Martin Jocic Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20240830153113.2081440-1-martin.jocic@kvaser.com Fixes: 26ad340e582d ("can: kvaser_pciefd: Add driver for Kvaser PCIEcan devices") Signed-off-by: Marc Kleine-Budde --- drivers/net/can/kvaser_pciefd.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index a60d9efd5f8d..9ffc3ffb4e8f 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -1686,6 +1686,7 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) const struct kvaser_pciefd_irq_mask *irq_mask = pcie->driver_data->irq_mask; u32 pci_irq = ioread32(KVASER_PCIEFD_PCI_IRQ_ADDR(pcie)); u32 srb_irq = 0; + u32 srb_release = 0; int i; if (!(pci_irq & irq_mask->all)) @@ -1699,17 +1700,14 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) kvaser_pciefd_transmit_irq(pcie->can[i]); } - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD0) { - /* Reset DMA buffer 0, may trigger new interrupt */ - iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, - KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); - } + if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD0) + srb_release |= KVASER_PCIEFD_SRB_CMD_RDB0; - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD1) { - /* Reset DMA buffer 1, may trigger new interrupt */ - iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1, - KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); - } + if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD1) + srb_release |= KVASER_PCIEFD_SRB_CMD_RDB1; + + if (srb_release) + iowrite32(srb_release, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); return IRQ_HANDLED; } -- cgit From 8ae22de9d2eae3c432de64bf2b3a5a69cf1d1124 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 21 Aug 2024 15:43:40 -0700 Subject: Bluetooth: qca: If memdump doesn't work, re-enable IBS On systems in the field, we are seeing this sometimes in the kernel logs: Bluetooth: qca_controller_memdump() hci0: hci_devcd_init Return:-95 This means that _something_ decided that it wanted to get a memdump but then hci_devcd_init() returned -EOPNOTSUPP (AKA -95). The cleanup code in qca_controller_memdump() when we get back an error from hci_devcd_init() undoes most things but forgets to clear QCA_IBS_DISABLED. One side effect of this is that, during the next suspend, qca_suspend() will always get a timeout. Let's fix it so that we clear the bit. Fixes: 06d3fdfcdf5c ("Bluetooth: hci_qca: Add qcom devcoredump support") Reviewed-by: Guenter Roeck Reviewed-by: Stephen Boyd Signed-off-by: Douglas Anderson Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 4b1ad7ea5b95..678f150229e7 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1091,6 +1091,7 @@ static void qca_controller_memdump(struct work_struct *work) qca->memdump_state = QCA_MEMDUMP_COLLECTED; cancel_delayed_work(&qca->ctrl_memdump_timeout); clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); + clear_bit(QCA_IBS_DISABLED, &qca->flags); mutex_unlock(&qca->hci_memdump_lock); return; } -- cgit From c898f6d7b093bd71e66569cd6797c87d4056f44b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 26 Aug 2024 15:47:30 -0400 Subject: Bluetooth: hci_sync: Introduce hci_cmd_sync_run/hci_cmd_sync_run_once This introduces hci_cmd_sync_run/hci_cmd_sync_run_once which acts like hci_cmd_sync_queue/hci_cmd_sync_queue_once but runs immediately when already on hdev->cmd_sync_work context. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 4 ++++ net/bluetooth/hci_sync.c | 42 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 75e052909b5f..f3052cb252ef 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -73,6 +73,10 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); +int hci_cmd_sync_run(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); +int hci_cmd_sync_run_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); struct hci_cmd_sync_work_entry * hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index e79cd40bd079..5533e6f561b3 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -112,7 +112,7 @@ static void hci_cmd_sync_add(struct hci_request *req, u16 opcode, u32 plen, skb_queue_tail(&req->cmd_q, skb); } -static int hci_cmd_sync_run(struct hci_request *req) +static int hci_req_sync_run(struct hci_request *req) { struct hci_dev *hdev = req->hdev; struct sk_buff *skb; @@ -169,7 +169,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, hdev->req_status = HCI_REQ_PEND; - err = hci_cmd_sync_run(&req); + err = hci_req_sync_run(&req); if (err < 0) return ERR_PTR(err); @@ -782,6 +782,44 @@ int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, } EXPORT_SYMBOL(hci_cmd_sync_queue_once); +/* Run HCI command: + * + * - hdev must be running + * - if on cmd_sync_work then run immediately otherwise queue + */ +int hci_cmd_sync_run(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + /* Only queue command if hdev is running which means it had been opened + * and is either on init phase or is already up. + */ + if (!test_bit(HCI_RUNNING, &hdev->flags)) + return -ENETDOWN; + + /* If on cmd_sync_work then run immediately otherwise queue */ + if (current_work() == &hdev->cmd_sync_work) + return func(hdev, data); + + return hci_cmd_sync_submit(hdev, func, data, destroy); +} +EXPORT_SYMBOL(hci_cmd_sync_run); + +/* Run HCI command entry once: + * + * - Lookup if an entry already exist and only if it doesn't creates a new entry + * and run it. + * - if on cmd_sync_work then run immediately otherwise queue + */ +int hci_cmd_sync_run_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy)) + return 0; + + return hci_cmd_sync_run(hdev, func, data, destroy); +} +EXPORT_SYMBOL(hci_cmd_sync_run_once); + /* Lookup HCI command entry: * * - Return first entry that matches by function callback or data or -- cgit From 227a0cdf4a028a73dc256d0f5144b4808d718893 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 26 Aug 2024 16:14:04 -0400 Subject: Bluetooth: MGMT: Fix not generating command complete for MGMT_OP_DISCONNECT MGMT_OP_DISCONNECT can be called while mgmt_device_connected has not been called yet, which will cause the connection procedure to be aborted, so mgmt_device_disconnected shall still respond with command complete to MGMT_OP_DISCONNECT and just not emit MGMT_EV_DEVICE_DISCONNECTED since MGMT_EV_DEVICE_CONNECTED was never sent. To fix this MGMT_OP_DISCONNECT is changed to work similarly to other command which do use hci_cmd_sync_queue and then use hci_conn_abort to disconnect and returns the result, in order for hci_conn_abort to be used from hci_cmd_sync context it now uses hci_cmd_sync_run_once. Link: https://github.com/bluez/bluez/issues/932 Fixes: 12d4a3b2ccb3 ("Bluetooth: Move check for MGMT_CONNECTED flag into mgmt.c") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 6 +++- net/bluetooth/mgmt.c | 84 ++++++++++++++++++++++++------------------------ 2 files changed, 47 insertions(+), 43 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 8e48ccd2af30..c82502e213a8 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2952,5 +2952,9 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) return 0; } - return hci_cmd_sync_queue_once(hdev, abort_conn_sync, conn, NULL); + /* Run immediately if on cmd_sync_work since this may be called + * as a result to MGMT_OP_DISCONNECT/MGMT_OP_UNPAIR which does + * already queue its callback on cmd_sync_work. + */ + return hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 25979f4283a6..4c20dbf92c71 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2921,7 +2921,12 @@ static int unpair_device_sync(struct hci_dev *hdev, void *data) if (!conn) return 0; - return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM); + /* Disregard any possible error since the likes of hci_abort_conn_sync + * will clean up the connection no matter the error. + */ + hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); + + return 0; } static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, @@ -3053,13 +3058,44 @@ unlock: return err; } +static void disconnect_complete(struct hci_dev *hdev, void *data, int err) +{ + struct mgmt_pending_cmd *cmd = data; + + cmd->cmd_complete(cmd, mgmt_status(err)); + mgmt_pending_free(cmd); +} + +static int disconnect_sync(struct hci_dev *hdev, void *data) +{ + struct mgmt_pending_cmd *cmd = data; + struct mgmt_cp_disconnect *cp = cmd->param; + struct hci_conn *conn; + + if (cp->addr.type == BDADDR_BREDR) + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, + &cp->addr.bdaddr); + else + conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr, + le_addr_type(cp->addr.type)); + + if (!conn) + return -ENOTCONN; + + /* Disregard any possible error since the likes of hci_abort_conn_sync + * will clean up the connection no matter the error. + */ + hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); + + return 0; +} + static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_disconnect *cp = data; struct mgmt_rp_disconnect rp; struct mgmt_pending_cmd *cmd; - struct hci_conn *conn; int err; bt_dev_dbg(hdev, "sock %p", sk); @@ -3082,27 +3118,7 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - if (pending_find(MGMT_OP_DISCONNECT, hdev)) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_BUSY, &rp, sizeof(rp)); - goto failed; - } - - if (cp->addr.type == BDADDR_BREDR) - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, - &cp->addr.bdaddr); - else - conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr, - le_addr_type(cp->addr.type)); - - if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_NOT_CONNECTED, &rp, - sizeof(rp)); - goto failed; - } - - cmd = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, hdev, data, len); + cmd = mgmt_pending_new(sk, MGMT_OP_DISCONNECT, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -3110,9 +3126,10 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, cmd->cmd_complete = generic_cmd_complete; - err = hci_disconnect(conn, HCI_ERROR_REMOTE_USER_TERM); + err = hci_cmd_sync_queue(hdev, disconnect_sync, cmd, + disconnect_complete); if (err < 0) - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); failed: hci_dev_unlock(hdev); @@ -9689,18 +9706,6 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, mgmt_event_skb(skb, NULL); } -static void disconnect_rsp(struct mgmt_pending_cmd *cmd, void *data) -{ - struct sock **sk = data; - - cmd->cmd_complete(cmd, 0); - - *sk = cmd->sk; - sock_hold(*sk); - - mgmt_pending_remove(cmd); -} - static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data) { struct hci_dev *hdev = data; @@ -9744,8 +9749,6 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, if (link_type != ACL_LINK && link_type != LE_LINK) return; - mgmt_pending_foreach(MGMT_OP_DISCONNECT, hdev, disconnect_rsp, &sk); - bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = link_to_bdaddr(link_type, addr_type); ev.reason = reason; @@ -9758,9 +9761,6 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, if (sk) sock_put(sk); - - mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp, - hdev); } void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, -- cgit From 532f8bcd1c2c4e8112f62e1922fd1703bc0ffce0 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 27 Aug 2024 14:37:22 -0400 Subject: Revert "Bluetooth: MGMT/SMP: Fix address type when using SMP over BREDR/LE" This reverts commit 59b047bc98084f8af2c41483e4d68a5adf2fa7f7 which breaks compatibility with commands like: bluetoothd[46328]: @ MGMT Command: Load.. (0x0013) plen 74 {0x0001} [hci0] Keys: 2 BR/EDR Address: C0:DC:DA:A5:E5:47 (Samsung Electronics Co.,Ltd) Key type: Authenticated key from P-256 (0x03) Central: 0x00 Encryption size: 16 Diversifier[2]: 0000 Randomizer[8]: 0000000000000000 Key[16]: 6ed96089bd9765be2f2c971b0b95f624 LE Address: D7:2A:DE:1E:73:A2 (Static) Key type: Unauthenticated key from P-256 (0x02) Central: 0x00 Encryption size: 16 Diversifier[2]: 0000 Randomizer[8]: 0000000000000000 Key[16]: 87dd2546ededda380ffcdc0a8faa4597 @ MGMT Event: Command Status (0x0002) plen 3 {0x0001} [hci0] Load Long Term Keys (0x0013) Status: Invalid Parameters (0x0d) Cc: stable@vger.kernel.org Link: https://github.com/bluez/bluez/issues/875 Fixes: 59b047bc9808 ("Bluetooth: MGMT/SMP: Fix address type when using SMP over BREDR/LE") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 5 ----- net/bluetooth/mgmt.c | 25 +++++++------------------ net/bluetooth/smp.c | 7 ------- 3 files changed, 7 insertions(+), 30 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index e449dba698f3..1a32e602630e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -186,7 +186,6 @@ struct blocked_key { struct smp_csrk { bdaddr_t bdaddr; u8 bdaddr_type; - u8 link_type; u8 type; u8 val[16]; }; @@ -196,7 +195,6 @@ struct smp_ltk { struct rcu_head rcu; bdaddr_t bdaddr; u8 bdaddr_type; - u8 link_type; u8 authenticated; u8 type; u8 enc_size; @@ -211,7 +209,6 @@ struct smp_irk { bdaddr_t rpa; bdaddr_t bdaddr; u8 addr_type; - u8 link_type; u8 val[16]; }; @@ -219,8 +216,6 @@ struct link_key { struct list_head list; struct rcu_head rcu; bdaddr_t bdaddr; - u8 bdaddr_type; - u8 link_type; u8 type; u8 val[HCI_LINK_KEY_SIZE]; u8 pin_len; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4c20dbf92c71..240dd8cf7c7d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2833,8 +2833,7 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, for (i = 0; i < key_count; i++) { struct mgmt_link_key_info *key = &cp->keys[i]; - /* Considering SMP over BREDR/LE, there is no need to check addr_type */ - if (key->type > 0x08) + if (key->addr.type != BDADDR_BREDR || key->type > 0x08) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); @@ -7089,7 +7088,6 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, for (i = 0; i < irk_count; i++) { struct mgmt_irk_info *irk = &cp->irks[i]; - u8 addr_type = le_addr_type(irk->addr.type); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK, @@ -7099,12 +7097,8 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, continue; } - /* When using SMP over BR/EDR, the addr type should be set to BREDR */ - if (irk->addr.type == BDADDR_BREDR) - addr_type = BDADDR_BREDR; - hci_add_irk(hdev, &irk->addr.bdaddr, - addr_type, irk->val, + le_addr_type(irk->addr.type), irk->val, BDADDR_ANY); } @@ -7185,7 +7179,6 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, for (i = 0; i < key_count; i++) { struct mgmt_ltk_info *key = &cp->keys[i]; u8 type, authenticated; - u8 addr_type = le_addr_type(key->addr.type); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_LTK, @@ -7220,12 +7213,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, continue; } - /* When using SMP over BR/EDR, the addr type should be set to BREDR */ - if (key->addr.type == BDADDR_BREDR) - addr_type = BDADDR_BREDR; - hci_add_ltk(hdev, &key->addr.bdaddr, - addr_type, type, authenticated, + le_addr_type(key->addr.type), type, authenticated, key->val, key->enc_size, key->ediv, key->rand); } @@ -9519,7 +9508,7 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); - ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type); + ev.key.addr.type = BDADDR_BREDR; ev.key.type = key->type; memcpy(ev.key.val, key->val, HCI_LINK_KEY_SIZE); ev.key.pin_len = key->pin_len; @@ -9570,7 +9559,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); - ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type); + ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type); ev.key.type = mgmt_ltk_type(key); ev.key.enc_size = key->enc_size; ev.key.ediv = key->ediv; @@ -9599,7 +9588,7 @@ void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent) bacpy(&ev.rpa, &irk->rpa); bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr); - ev.irk.addr.type = link_to_bdaddr(irk->link_type, irk->addr_type); + ev.irk.addr.type = link_to_bdaddr(LE_LINK, irk->addr_type); memcpy(ev.irk.val, irk->val, sizeof(irk->val)); mgmt_event(MGMT_EV_NEW_IRK, hdev, &ev, sizeof(ev), NULL); @@ -9628,7 +9617,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr); - ev.key.addr.type = link_to_bdaddr(csrk->link_type, csrk->bdaddr_type); + ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type); ev.key.type = csrk->type; memcpy(ev.key.val, csrk->val, sizeof(csrk->val)); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 4f9fdf400584..8b9724fd752a 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1060,7 +1060,6 @@ static void smp_notify_keys(struct l2cap_conn *conn) } if (smp->remote_irk) { - smp->remote_irk->link_type = hcon->type; mgmt_new_irk(hdev, smp->remote_irk, persistent); /* Now that user space can be considered to know the @@ -1080,28 +1079,24 @@ static void smp_notify_keys(struct l2cap_conn *conn) } if (smp->csrk) { - smp->csrk->link_type = hcon->type; smp->csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->csrk->bdaddr, &hcon->dst); mgmt_new_csrk(hdev, smp->csrk, persistent); } if (smp->responder_csrk) { - smp->responder_csrk->link_type = hcon->type; smp->responder_csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->responder_csrk->bdaddr, &hcon->dst); mgmt_new_csrk(hdev, smp->responder_csrk, persistent); } if (smp->ltk) { - smp->ltk->link_type = hcon->type; smp->ltk->bdaddr_type = hcon->dst_type; bacpy(&smp->ltk->bdaddr, &hcon->dst); mgmt_new_ltk(hdev, smp->ltk, persistent); } if (smp->responder_ltk) { - smp->responder_ltk->link_type = hcon->type; smp->responder_ltk->bdaddr_type = hcon->dst_type; bacpy(&smp->responder_ltk->bdaddr, &hcon->dst); mgmt_new_ltk(hdev, smp->responder_ltk, persistent); @@ -1121,8 +1116,6 @@ static void smp_notify_keys(struct l2cap_conn *conn) key = hci_add_link_key(hdev, smp->conn->hcon, &hcon->dst, smp->link_key, type, 0, &persistent); if (key) { - key->link_type = hcon->type; - key->bdaddr_type = hcon->dst_type; mgmt_new_link_key(hdev, key, persistent); /* Don't keep debug keys around if the relevant -- cgit From 1e9683c9b6ca88cc9340cdca85edd6134c8cffe3 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 27 Aug 2024 15:01:34 -0400 Subject: Bluetooth: MGMT: Ignore keys being loaded with invalid type Due to 59b047bc98084f8af2c41483e4d68a5adf2fa7f7 there could be keys stored with the wrong address type so this attempt to detect it and ignore them instead of just failing to load all keys. Cc: stable@vger.kernel.org Link: https://github.com/bluez/bluez/issues/875 Fixes: 59b047bc9808 ("Bluetooth: MGMT/SMP: Fix address type when using SMP over BREDR/LE") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 240dd8cf7c7d..279902e8bd8a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2830,15 +2830,6 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "debug_keys %u key_count %u", cp->debug_keys, key_count); - for (i = 0; i < key_count; i++) { - struct mgmt_link_key_info *key = &cp->keys[i]; - - if (key->addr.type != BDADDR_BREDR || key->type > 0x08) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_LOAD_LINK_KEYS, - MGMT_STATUS_INVALID_PARAMS); - } - hci_dev_lock(hdev); hci_link_keys_clear(hdev); @@ -2863,6 +2854,19 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, continue; } + if (key->addr.type != BDADDR_BREDR) { + bt_dev_warn(hdev, + "Invalid link address type %u for %pMR", + key->addr.type, &key->addr.bdaddr); + continue; + } + + if (key->type > 0x08) { + bt_dev_warn(hdev, "Invalid link key type %u for %pMR", + key->type, &key->addr.bdaddr); + continue; + } + /* Always ignore debug keys and require a new pairing if * the user wants to use them. */ @@ -7163,15 +7167,6 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, bt_dev_dbg(hdev, "key_count %u", key_count); - for (i = 0; i < key_count; i++) { - struct mgmt_ltk_info *key = &cp->keys[i]; - - if (!ltk_is_valid(key)) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_LOAD_LONG_TERM_KEYS, - MGMT_STATUS_INVALID_PARAMS); - } - hci_dev_lock(hdev); hci_smp_ltks_clear(hdev); @@ -7188,6 +7183,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, continue; } + if (!ltk_is_valid(key)) { + bt_dev_warn(hdev, "Invalid LTK for %pMR", + &key->addr.bdaddr); + continue; + } + switch (key->type) { case MGMT_LTK_UNAUTHENTICATED: authenticated = 0x00; -- cgit From d8b762070c3fde224f8b9ea3cf59bc41a5a3eb57 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Aug 2024 13:55:00 +0200 Subject: power: sequencing: qcom-wcn: set the wlan-enable GPIO to output Commit a9aaf1ff88a8 ("power: sequencing: request the WLAN enable GPIO as-is") broke WLAN on boards on which the wlan-enable GPIO enabling the wifi module isn't in output mode by default. We need to set direction to output while retaining the value that was already set to keep the ath module on if it's already started. Fixes: a9aaf1ff88a8 ("power: sequencing: request the WLAN enable GPIO as-is") Link: https://lore.kernel.org/r/20240823115500.37280-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/power/sequencing/pwrseq-qcom-wcn.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/power/sequencing/pwrseq-qcom-wcn.c b/drivers/power/sequencing/pwrseq-qcom-wcn.c index d786cbf1b2cd..700879474abf 100644 --- a/drivers/power/sequencing/pwrseq-qcom-wcn.c +++ b/drivers/power/sequencing/pwrseq-qcom-wcn.c @@ -288,6 +288,13 @@ static int pwrseq_qcom_wcn_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(ctx->wlan_gpio), "Failed to get the WLAN enable GPIO\n"); + /* + * Set direction to output but keep the current value in order to not + * disable the WLAN module accidentally if it's already powered on. + */ + gpiod_direction_output(ctx->wlan_gpio, + gpiod_get_value_cansleep(ctx->wlan_gpio)); + ctx->clk = devm_clk_get_optional(dev, NULL); if (IS_ERR(ctx->clk)) return dev_err_probe(dev, PTR_ERR(ctx->clk), -- cgit From e3e6940940910c2287fe962bdf72015efd4fee81 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 31 Aug 2024 17:44:51 -0400 Subject: bcachefs: Revert lockless buffered IO path We had a report of data corruption on nixos when building installer images. https://github.com/NixOS/nixpkgs/pull/321055#issuecomment-2184131334 It seems that writes are being dropped, but only when issued by QEMU, and possibly only in snapshot mode. It's undetermined if it's write calls are being dropped or dirty folios. Further testing, via minimizing the original patch to just the change that skips the inode lock on non appends/truncates, reveals that it really is just not taking the inode lock that causes the corruption: it has nothing to do with the other logic changes for preserving write atomicity in corner cases. It's also kernel config dependent: it doesn't reproduce with the minimal kernel config that ktest uses, but it does reproduce with nixos's distro config. Bisection the kernel config initially pointer the finger at page migration or compaction, but it appears that was erroneous; we haven't yet determined what kernel config option actually triggers it. Sadly it appears this will have to be reverted since we're getting too close to release and my plate is full, but we'd _really_ like to fully debug it. My suspicion is that this patch is exposing a preexisting bug - the inode lock actually covers very little in IO paths, and we have a different lock (the pagecache add lock) that guards against races with truncate here. Fixes: 7e64c86cdc6c ("bcachefs: Buffered write path now can avoid the inode lock") Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 1 - fs/bcachefs/fs-io-buffered.c | 149 ++++++++++++------------------------------- 2 files changed, 40 insertions(+), 110 deletions(-) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index ab5a7adece10..742dcdd3e5d7 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -257,7 +257,6 @@ x(BCH_ERR_nopromote, nopromote_in_flight) \ x(BCH_ERR_nopromote, nopromote_no_writes) \ x(BCH_ERR_nopromote, nopromote_enomem) \ - x(0, need_inode_lock) \ x(0, invalid_snapshot_node) \ x(0, option_needs_open_fs) diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 184d03851676..ec8c427bf588 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -802,8 +802,7 @@ static noinline void folios_trunc(folios *fs, struct folio **fi) static int __bch2_buffered_write(struct bch_inode_info *inode, struct address_space *mapping, struct iov_iter *iter, - loff_t pos, unsigned len, - bool inode_locked) + loff_t pos, unsigned len) { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch2_folio_reservation res; @@ -827,15 +826,6 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, BUG_ON(!fs.nr); - /* - * If we're not using the inode lock, we need to lock all the folios for - * atomiticity of writes vs. other writes: - */ - if (!inode_locked && folio_end_pos(darray_last(fs)) < end) { - ret = -BCH_ERR_need_inode_lock; - goto out; - } - f = darray_first(fs); if (pos != folio_pos(f) && !folio_test_uptodate(f)) { ret = bch2_read_single_folio(f, mapping); @@ -932,10 +922,8 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, end = pos + copied; spin_lock(&inode->v.i_lock); - if (end > inode->v.i_size) { - BUG_ON(!inode_locked); + if (end > inode->v.i_size) i_size_write(&inode->v, end); - } spin_unlock(&inode->v.i_lock); f_pos = pos; @@ -979,68 +967,12 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter) struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct bch_inode_info *inode = file_bch_inode(file); - loff_t pos; - bool inode_locked = false; - ssize_t written = 0, written2 = 0, ret = 0; - - /* - * We don't take the inode lock unless i_size will be changing. Folio - * locks provide exclusion with other writes, and the pagecache add lock - * provides exclusion with truncate and hole punching. - * - * There is one nasty corner case where atomicity would be broken - * without great care: when copying data from userspace to the page - * cache, we do that with faults disable - a page fault would recurse - * back into the filesystem, taking filesystem locks again, and - * deadlock; so it's done with faults disabled, and we fault in the user - * buffer when we aren't holding locks. - * - * If we do part of the write, but we then race and in the userspace - * buffer have been evicted and are no longer resident, then we have to - * drop our folio locks to re-fault them in, breaking write atomicity. - * - * To fix this, we restart the write from the start, if we weren't - * holding the inode lock. - * - * There is another wrinkle after that; if we restart the write from the - * start, and then get an unrecoverable error, we _cannot_ claim to - * userspace that we did not write data we actually did - so we must - * track (written2) the most we ever wrote. - */ - - if ((iocb->ki_flags & IOCB_APPEND) || - (iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v))) { - inode_lock(&inode->v); - inode_locked = true; - } - - ret = generic_write_checks(iocb, iter); - if (ret <= 0) - goto unlock; - - ret = file_remove_privs_flags(file, !inode_locked ? IOCB_NOWAIT : 0); - if (ret) { - if (!inode_locked) { - inode_lock(&inode->v); - inode_locked = true; - ret = file_remove_privs_flags(file, 0); - } - if (ret) - goto unlock; - } - - ret = file_update_time(file); - if (ret) - goto unlock; - - pos = iocb->ki_pos; + loff_t pos = iocb->ki_pos; + ssize_t written = 0; + int ret = 0; bch2_pagecache_add_get(inode); - if (!inode_locked && - (iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v))) - goto get_inode_lock; - do { unsigned offset = pos & (PAGE_SIZE - 1); unsigned bytes = iov_iter_count(iter); @@ -1065,17 +997,12 @@ again: } } - if (unlikely(bytes != iov_iter_count(iter) && !inode_locked)) - goto get_inode_lock; - if (unlikely(fatal_signal_pending(current))) { ret = -EINTR; break; } - ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes, inode_locked); - if (ret == -BCH_ERR_need_inode_lock) - goto get_inode_lock; + ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes); if (unlikely(ret < 0)) break; @@ -1096,46 +1023,50 @@ again: } pos += ret; written += ret; - written2 = max(written, written2); - - if (ret != bytes && !inode_locked) - goto get_inode_lock; ret = 0; balance_dirty_pages_ratelimited(mapping); - - if (0) { -get_inode_lock: - bch2_pagecache_add_put(inode); - inode_lock(&inode->v); - inode_locked = true; - bch2_pagecache_add_get(inode); - - iov_iter_revert(iter, written); - pos -= written; - written = 0; - ret = 0; - } } while (iov_iter_count(iter)); - bch2_pagecache_add_put(inode); -unlock: - if (inode_locked) - inode_unlock(&inode->v); - iocb->ki_pos += written; + bch2_pagecache_add_put(inode); - ret = max(written, written2) ?: ret; - if (ret > 0) - ret = generic_write_sync(iocb, ret); - return ret; + return written ? written : ret; } -ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *iter) +ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) { - ssize_t ret = iocb->ki_flags & IOCB_DIRECT - ? bch2_direct_write(iocb, iter) - : bch2_buffered_write(iocb, iter); + struct file *file = iocb->ki_filp; + struct bch_inode_info *inode = file_bch_inode(file); + ssize_t ret; + + if (iocb->ki_flags & IOCB_DIRECT) { + ret = bch2_direct_write(iocb, from); + goto out; + } + + inode_lock(&inode->v); + + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto unlock; + + ret = file_remove_privs(file); + if (ret) + goto unlock; + + ret = file_update_time(file); + if (ret) + goto unlock; + + ret = bch2_buffered_write(iocb, from); + if (likely(ret > 0)) + iocb->ki_pos += ret; +unlock: + inode_unlock(&inode->v); + if (ret > 0) + ret = generic_write_sync(iocb, ret); +out: return bch2_err_class(ret); } -- cgit From 3d3020c461936009dc58702e267ff67b0076cbf2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 22 Aug 2024 11:47:32 -0400 Subject: bcachefs: Mark more errors as autofix errors that are known to always be safe to fix should be autofix: this should be most errors even at this point, but that will need some thorough review. note that errors are still logged in the superblock, so we'll still know that they happened. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index d3a498617303..f0c14702f9e6 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -23,7 +23,7 @@ enum bch_fsck_flags { x(jset_past_bucket_end, 9, 0) \ x(jset_seq_blacklisted, 10, 0) \ x(journal_entries_missing, 11, 0) \ - x(journal_entry_replicas_not_marked, 12, 0) \ + x(journal_entry_replicas_not_marked, 12, FSCK_AUTOFIX) \ x(journal_entry_past_jset_end, 13, 0) \ x(journal_entry_replicas_data_mismatch, 14, 0) \ x(journal_entry_bkey_u64s_0, 15, 0) \ @@ -288,10 +288,10 @@ enum bch_fsck_flags { x(invalid_btree_id, 274, 0) \ x(alloc_key_io_time_bad, 275, 0) \ x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \ - x(accounting_key_junk_at_end, 277, 0) \ - x(accounting_key_replicas_nr_devs_0, 278, 0) \ - x(accounting_key_replicas_nr_required_bad, 279, 0) \ - x(accounting_key_replicas_devs_unsorted, 280, 0) \ + x(accounting_key_junk_at_end, 277, FSCK_AUTOFIX) \ + x(accounting_key_replicas_nr_devs_0, 278, FSCK_AUTOFIX) \ + x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \ + x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, -- cgit From 431c1646e1f86b949fa3685efc50b660a364c2b6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Sep 2024 19:46:02 +1200 Subject: Linux 6.11-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7b60eb103c5d..d57cfc6896b8 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION* -- cgit From 72a6e22c604c95ddb3b10b5d3bb85b6ff4dbc34f Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Mon, 26 Aug 2024 19:20:56 +0800 Subject: fscache: delete fscache_cookie_lru_timer when fscache exits to avoid UAF The fscache_cookie_lru_timer is initialized when the fscache module is inserted, but is not deleted when the fscache module is removed. If timer_reduce() is called before removing the fscache module, the fscache_cookie_lru_timer will be added to the timer list of the current cpu. Afterwards, a use-after-free will be triggered in the softIRQ after removing the fscache module, as follows: ================================================================== BUG: unable to handle page fault for address: fffffbfff803c9e9 PF: supervisor read access in kernel mode PF: error_code(0x0000) - not-present page PGD 21ffea067 P4D 21ffea067 PUD 21ffe6067 PMD 110a7c067 PTE 0 Oops: Oops: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Tainted: G W 6.11.0-rc3 #855 Tainted: [W]=WARN RIP: 0010:__run_timer_base.part.0+0x254/0x8a0 Call Trace: tmigr_handle_remote_up+0x627/0x810 __walk_groups.isra.0+0x47/0x140 tmigr_handle_remote+0x1fa/0x2f0 handle_softirqs+0x180/0x590 irq_exit_rcu+0x84/0xb0 sysvec_apic_timer_interrupt+0x6e/0x90 asm_sysvec_apic_timer_interrupt+0x1a/0x20 RIP: 0010:default_idle+0xf/0x20 default_idle_call+0x38/0x60 do_idle+0x2b5/0x300 cpu_startup_entry+0x54/0x60 start_secondary+0x20d/0x280 common_startup_64+0x13e/0x148 Modules linked in: [last unloaded: netfs] ================================================================== Therefore delete fscache_cookie_lru_timer when removing the fscahe module. Fixes: 12bb21a29c19 ("fscache: Implement cookie user counting and resource pinning") Cc: stable@kernel.org Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240826112056.2458299-1-libaokun@huaweicloud.com Acked-by: David Howells Signed-off-by: Christian Brauner --- fs/netfs/fscache_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/netfs/fscache_main.c b/fs/netfs/fscache_main.c index 42e98bb523e3..49849005eb7c 100644 --- a/fs/netfs/fscache_main.c +++ b/fs/netfs/fscache_main.c @@ -103,6 +103,7 @@ void __exit fscache_exit(void) kmem_cache_destroy(fscache_cookie_jar); fscache_proc_cleanup(); + timer_shutdown_sync(&fscache_cookie_lru_timer); destroy_workqueue(fscache_wq); pr_notice("FS-Cache unloaded\n"); } -- cgit From 6a422a96bc84cf9b9f0ff741f293a1f9059e0883 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Fri, 30 Aug 2024 13:13:50 +0200 Subject: hwmon: ltc2991: fix register bits defines In the LTC2991, V5 and V6 channels use the low nibble of the "V5, V6, V7, and V8 Control Register" for configuration, but currently, the high nibble is defined. This patch changes the defines to use the low nibble. Fixes: 2b9ea4262ae9 ("hwmon: Add driver for ltc2991") Signed-off-by: Pawel Dembicki Message-ID: <20240830111349.30531-1-paweldembicki@gmail.com> Signed-off-by: Guenter Roeck --- drivers/hwmon/ltc2991.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/ltc2991.c b/drivers/hwmon/ltc2991.c index 573cd8f5721b..7ca139e4b6af 100644 --- a/drivers/hwmon/ltc2991.c +++ b/drivers/hwmon/ltc2991.c @@ -42,9 +42,9 @@ #define LTC2991_V7_V8_FILT_EN BIT(7) #define LTC2991_V7_V8_TEMP_EN BIT(5) #define LTC2991_V7_V8_DIFF_EN BIT(4) -#define LTC2991_V5_V6_FILT_EN BIT(7) -#define LTC2991_V5_V6_TEMP_EN BIT(5) -#define LTC2991_V5_V6_DIFF_EN BIT(4) +#define LTC2991_V5_V6_FILT_EN BIT(3) +#define LTC2991_V5_V6_TEMP_EN BIT(1) +#define LTC2991_V5_V6_DIFF_EN BIT(0) #define LTC2991_REPEAT_ACQ_EN BIT(4) #define LTC2991_T_INT_FILT_EN BIT(3) -- cgit From 4fa9c5181cfe083d0beefb5157a643560e7bd152 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 29 Aug 2024 15:43:45 +0800 Subject: net: mctp-serial: Add kunit test for next_chunk_len() Test various edge cases of inputs that contain characters that need escaping. This adds a new kunit suite for mctp-serial. Signed-off-by: Matt Johnston Signed-off-by: David S. Miller --- drivers/net/mctp/Kconfig | 5 ++ drivers/net/mctp/mctp-serial.c | 109 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/drivers/net/mctp/Kconfig b/drivers/net/mctp/Kconfig index ce9d2d2ccf3b..15860d6ac39f 100644 --- a/drivers/net/mctp/Kconfig +++ b/drivers/net/mctp/Kconfig @@ -21,6 +21,11 @@ config MCTP_SERIAL Say y here if you need to connect to MCTP endpoints over serial. To compile as a module, use m; the module will be called mctp-serial. +config MCTP_SERIAL_TEST + bool "MCTP serial tests" if !KUNIT_ALL_TESTS + depends on MCTP_SERIAL=y && KUNIT=y + default KUNIT_ALL_TESTS + config MCTP_TRANSPORT_I2C tristate "MCTP SMBus/I2C transport" # i2c-mux is optional, but we must build as a module if i2c-mux is a module diff --git a/drivers/net/mctp/mctp-serial.c b/drivers/net/mctp/mctp-serial.c index 5bf6fdff701c..7a40d07ff77b 100644 --- a/drivers/net/mctp/mctp-serial.c +++ b/drivers/net/mctp/mctp-serial.c @@ -521,3 +521,112 @@ module_exit(mctp_serial_exit); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Jeremy Kerr "); MODULE_DESCRIPTION("MCTP Serial transport"); + +#if IS_ENABLED(CONFIG_MCTP_SERIAL_TEST) +#include + +#define MAX_CHUNKS 6 +struct test_chunk_tx { + u8 input_len; + u8 input[MCTP_SERIAL_MTU]; + u8 chunks[MAX_CHUNKS]; +}; + +static void test_next_chunk_len(struct kunit *test) +{ + struct mctp_serial devx; + struct mctp_serial *dev = &devx; + int next; + + const struct test_chunk_tx *params = test->param_value; + + memset(dev, 0x0, sizeof(*dev)); + memcpy(dev->txbuf, params->input, params->input_len); + dev->txlen = params->input_len; + + for (size_t i = 0; i < MAX_CHUNKS; i++) { + next = next_chunk_len(dev); + dev->txpos += next; + KUNIT_EXPECT_EQ(test, next, params->chunks[i]); + + if (next == 0) { + KUNIT_EXPECT_EQ(test, dev->txpos, dev->txlen); + return; + } + } + + KUNIT_FAIL_AND_ABORT(test, "Ran out of chunks"); +} + +static struct test_chunk_tx chunk_tx_tests[] = { + { + .input_len = 5, + .input = { 0x00, 0x11, 0x22, 0x7e, 0x80 }, + .chunks = { 3, 1, 1, 0}, + }, + { + .input_len = 5, + .input = { 0x00, 0x11, 0x22, 0x7e, 0x7d }, + .chunks = { 3, 1, 1, 0}, + }, + { + .input_len = 3, + .input = { 0x7e, 0x11, 0x22, }, + .chunks = { 1, 2, 0}, + }, + { + .input_len = 3, + .input = { 0x7e, 0x7e, 0x7d, }, + .chunks = { 1, 1, 1, 0}, + }, + { + .input_len = 4, + .input = { 0x7e, 0x7e, 0x00, 0x7d, }, + .chunks = { 1, 1, 1, 1, 0}, + }, + { + .input_len = 6, + .input = { 0x7e, 0x7e, 0x00, 0x7d, 0x10, 0x10}, + .chunks = { 1, 1, 1, 1, 2, 0}, + }, + { + .input_len = 1, + .input = { 0x7e }, + .chunks = { 1, 0 }, + }, + { + .input_len = 1, + .input = { 0x80 }, + .chunks = { 1, 0 }, + }, + { + .input_len = 3, + .input = { 0x80, 0x80, 0x00 }, + .chunks = { 3, 0 }, + }, + { + .input_len = 7, + .input = { 0x01, 0x00, 0x08, 0xc8, 0x00, 0x80, 0x02 }, + .chunks = { 7, 0 }, + }, + { + .input_len = 7, + .input = { 0x01, 0x00, 0x08, 0xc8, 0x7e, 0x80, 0x02 }, + .chunks = { 4, 1, 2, 0 }, + }, +}; + +KUNIT_ARRAY_PARAM(chunk_tx, chunk_tx_tests, NULL); + +static struct kunit_case mctp_serial_test_cases[] = { + KUNIT_CASE_PARAM(test_next_chunk_len, chunk_tx_gen_params), +}; + +static struct kunit_suite mctp_serial_test_suite = { + .name = "mctp_serial", + .test_cases = mctp_serial_test_cases, +}; + +kunit_test_suite(mctp_serial_test_suite); + +#endif /* CONFIG_MCTP_SERIAL_TEST */ -- cgit From f962e8361adfa84e8252d3fc3e5e6bb879f029b1 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 29 Aug 2024 15:43:46 +0800 Subject: net: mctp-serial: Fix missing escapes on transmit 0x7d and 0x7e bytes are meant to be escaped in the data portion of frames, but this didn't occur since next_chunk_len() had an off-by-one error. That also resulted in the final byte of a payload being written as a separate tty write op. The chunk prior to an escaped byte would be one byte short, and the next call would never test the txpos+1 case, which is where the escaped byte was located. That meant it never hit the escaping case in mctp_serial_tx_work(). Example Input: 01 00 08 c8 7e 80 02 Previous incorrect chunks from next_chunk_len(): 01 00 08 c8 7e 80 02 With this fix: 01 00 08 c8 7e 80 02 Cc: stable@vger.kernel.org Fixes: a0c2ccd9b5ad ("mctp: Add MCTP-over-serial transport binding") Signed-off-by: Matt Johnston Signed-off-by: David S. Miller --- drivers/net/mctp/mctp-serial.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/mctp/mctp-serial.c b/drivers/net/mctp/mctp-serial.c index 7a40d07ff77b..f39bbe255497 100644 --- a/drivers/net/mctp/mctp-serial.c +++ b/drivers/net/mctp/mctp-serial.c @@ -91,8 +91,8 @@ static int next_chunk_len(struct mctp_serial *dev) * will be those non-escaped bytes, and does not include the escaped * byte. */ - for (i = 1; i + dev->txpos + 1 < dev->txlen; i++) { - if (needs_escape(dev->txbuf[dev->txpos + i + 1])) + for (i = 1; i + dev->txpos < dev->txlen; i++) { + if (needs_escape(dev->txbuf[dev->txpos + i])) break; } -- cgit From 7f12a963b65872fda1219f065c1cc1b1b9a806e8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 1 Sep 2024 15:53:03 -0400 Subject: bcachefs: fix rebalance accounting Fixes: 49aa7830396b ("bcachefs: Fix rebalance_work accounting") Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index a2274429e7f4..20219c1e6ddf 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -876,7 +876,7 @@ int bch2_trigger_extent(struct btree_trans *trans, need_rebalance_delta -= s != 0; need_rebalance_sectors_delta -= s; - s = bch2_bkey_sectors_need_rebalance(c, old); + s = bch2_bkey_sectors_need_rebalance(c, new.s_c); need_rebalance_delta += s != 0; need_rebalance_sectors_delta += s; -- cgit From b808f629215685c1941b1cd567c7b7ccb3c90278 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 9 Aug 2024 13:25:11 +0500 Subject: selftests: mm: fix build errors on armhf The __NR_mmap isn't found on armhf. The mmap() is commonly available system call and its wrapper is present on all architectures. So it should be used directly. It solves problem for armhf and doesn't create problem for other architectures. Remove sys_mmap() functions as they aren't doing anything else other than calling mmap(). There is no need to set errno = 0 manually as glibc always resets it. For reference errors are as following: CC seal_elf seal_elf.c: In function 'sys_mmap': seal_elf.c:39:33: error: '__NR_mmap' undeclared (first use in this function) 39 | sret = (void *) syscall(__NR_mmap, addr, len, prot, | ^~~~~~~~~ mseal_test.c: In function 'sys_mmap': mseal_test.c:90:33: error: '__NR_mmap' undeclared (first use in this function) 90 | sret = (void *) syscall(__NR_mmap, addr, len, prot, | ^~~~~~~~~ Link: https://lkml.kernel.org/r/20240809082511.497266-1-usama.anjum@collabora.com Fixes: 4926c7a52de7 ("selftest mm/mseal memory sealing") Signed-off-by: Muhammad Usama Anjum Cc: Jeff Xu Cc: Kees Cook Cc: Liam R. Howlett Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mseal_test.c | 37 ++++++++++++--------------------- tools/testing/selftests/mm/seal_elf.c | 13 +----------- 2 files changed, 14 insertions(+), 36 deletions(-) diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c index a818f010de47..bfcea5cf9a48 100644 --- a/tools/testing/selftests/mm/mseal_test.c +++ b/tools/testing/selftests/mm/mseal_test.c @@ -81,17 +81,6 @@ static int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, return sret; } -static void *sys_mmap(void *addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long fd, unsigned long offset) -{ - void *sret; - - errno = 0; - sret = (void *) syscall(__NR_mmap, addr, len, prot, - flags, fd, offset); - return sret; -} - static int sys_munmap(void *ptr, size_t size) { int sret; @@ -172,7 +161,7 @@ static void setup_single_address(int size, void **ptrOut) { void *ptr; - ptr = sys_mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ptr = mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); *ptrOut = ptr; } @@ -181,7 +170,7 @@ static void setup_single_address_rw(int size, void **ptrOut) void *ptr; unsigned long mapflags = MAP_ANONYMOUS | MAP_PRIVATE; - ptr = sys_mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0); + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0); *ptrOut = ptr; } @@ -205,7 +194,7 @@ bool seal_support(void) void *ptr; unsigned long page_size = getpagesize(); - ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (ptr == (void *) -1) return false; @@ -481,8 +470,8 @@ static void test_seal_zero_address(void) int prot; /* use mmap to change protection. */ - ptr = sys_mmap(0, size, PROT_NONE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + ptr = mmap(0, size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); FAIL_TEST_IF_FALSE(ptr == 0); size = get_vma_size(ptr, &prot); @@ -1209,8 +1198,8 @@ static void test_seal_mmap_overwrite_prot(bool seal) } /* use mmap to change protection. */ - ret2 = sys_mmap(ptr, size, PROT_NONE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + ret2 = mmap(ptr, size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1240,8 +1229,8 @@ static void test_seal_mmap_expand(bool seal) } /* use mmap to expand. */ - ret2 = sys_mmap(ptr, size, PROT_READ, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + ret2 = mmap(ptr, size, PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1268,8 +1257,8 @@ static void test_seal_mmap_shrink(bool seal) } /* use mmap to shrink. */ - ret2 = sys_mmap(ptr, 8 * page_size, PROT_READ, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + ret2 = mmap(ptr, 8 * page_size, PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1650,7 +1639,7 @@ static void test_seal_discard_ro_anon_on_filebacked(bool seal) ret = fallocate(fd, 0, 0, size); FAIL_TEST_IF_FALSE(!ret); - ptr = sys_mmap(NULL, size, PROT_READ, mapflags, fd, 0); + ptr = mmap(NULL, size, PROT_READ, mapflags, fd, 0); FAIL_TEST_IF_FALSE(ptr != MAP_FAILED); if (seal) { @@ -1680,7 +1669,7 @@ static void test_seal_discard_ro_anon_on_shared(bool seal) int ret; unsigned long mapflags = MAP_ANONYMOUS | MAP_SHARED; - ptr = sys_mmap(NULL, size, PROT_READ, mapflags, -1, 0); + ptr = mmap(NULL, size, PROT_READ, mapflags, -1, 0); FAIL_TEST_IF_FALSE(ptr != (void *)-1); if (seal) { diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c index 7aa1366063e4..d9f8ba8d5050 100644 --- a/tools/testing/selftests/mm/seal_elf.c +++ b/tools/testing/selftests/mm/seal_elf.c @@ -30,17 +30,6 @@ static int sys_mseal(void *start, size_t len) return sret; } -static void *sys_mmap(void *addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long fd, unsigned long offset) -{ - void *sret; - - errno = 0; - sret = (void *) syscall(__NR_mmap, addr, len, prot, - flags, fd, offset); - return sret; -} - static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot) { int sret; @@ -56,7 +45,7 @@ static bool seal_support(void) void *ptr; unsigned long page_size = getpagesize(); - ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (ptr == (void *) -1) return false; -- cgit From 3e3de7947c751509027d26b679ecd243bc9db255 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 12 Aug 2024 18:16:06 +0100 Subject: mm: vmalloc: ensure vmap_block is initialised before adding to queue Commit 8c61291fd850 ("mm: fix incorrect vbq reference in purge_fragmented_block") extended the 'vmap_block' structure to contain a 'cpu' field which is set at allocation time to the id of the initialising CPU. When a new 'vmap_block' is being instantiated by new_vmap_block(), the partially initialised structure is added to the local 'vmap_block_queue' xarray before the 'cpu' field has been initialised. If another CPU is concurrently walking the xarray (e.g. via vm_unmap_aliases()), then it may perform an out-of-bounds access to the remote queue thanks to an uninitialised index. This has been observed as UBSAN errors in Android: | Internal error: UBSAN: array index out of bounds: 00000000f2005512 [#1] PREEMPT SMP | | Call trace: | purge_fragmented_block+0x204/0x21c | _vm_unmap_aliases+0x170/0x378 | vm_unmap_aliases+0x1c/0x28 | change_memory_common+0x1dc/0x26c | set_memory_ro+0x18/0x24 | module_enable_ro+0x98/0x238 | do_init_module+0x1b0/0x310 Move the initialisation of 'vb->cpu' in new_vmap_block() ahead of the addition to the xarray. Link: https://lkml.kernel.org/r/20240812171606.17486-1-will@kernel.org Fixes: 8c61291fd850 ("mm: fix incorrect vbq reference in purge_fragmented_block") Signed-off-by: Will Deacon Reviewed-by: Baoquan He Reviewed-by: Uladzislau Rezki (Sony) Cc: Zhaoyang Huang Cc: Hailong.Liu Cc: Christoph Hellwig Cc: Lorenzo Stoakes Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index af2de36549d6..ac53d46ac8a5 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2626,6 +2626,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) vb->dirty_max = 0; bitmap_set(vb->used_map, 0, (1UL << order)); INIT_LIST_HEAD(&vb->free_list); + vb->cpu = raw_smp_processor_id(); xa = addr_to_vb_xa(va->va_start); vb_idx = addr_to_vb_idx(va->va_start); @@ -2642,7 +2643,6 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) * integrity together with list_for_each_rcu from read * side. */ - vb->cpu = raw_smp_processor_id(); vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu); spin_lock(&vbq->lock); list_add_tail_rcu(&vb->free_list, &vbq->free); -- cgit From 71c186efc1b2cf1aeabfeff3b9bd5ac4c5ac14d8 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 13 Aug 2024 22:25:21 +0200 Subject: userfaultfd: fix checks for huge PMDs Patch series "userfaultfd: fix races around pmd_trans_huge() check", v2. The pmd_trans_huge() code in mfill_atomic() is wrong in three different ways depending on kernel version: 1. The pmd_trans_huge() check is racy and can lead to a BUG_ON() (if you hit the right two race windows) - I've tested this in a kernel build with some extra mdelay() calls. See the commit message for a description of the race scenario. On older kernels (before 6.5), I think the same bug can even theoretically lead to accessing transhuge page contents as a page table if you hit the right 5 narrow race windows (I haven't tested this case). 2. As pointed out by Qi Zheng, pmd_trans_huge() is not sufficient for detecting PMDs that don't point to page tables. On older kernels (before 6.5), you'd just have to win a single fairly wide race to hit this. I've tested this on 6.1 stable by racing migration (with a mdelay() patched into try_to_migrate()) against UFFDIO_ZEROPAGE - on my x86 VM, that causes a kernel oops in ptlock_ptr(). 3. On newer kernels (>=6.5), for shmem mappings, khugepaged is allowed to yank page tables out from under us (though I haven't tested that), so I think the BUG_ON() checks in mfill_atomic() are just wrong. I decided to write two separate fixes for these (one fix for bugs 1+2, one fix for bug 3), so that the first fix can be backported to kernels affected by bugs 1+2. This patch (of 2): This fixes two issues. I discovered that the following race can occur: mfill_atomic other thread ============ ============ pmdp_get_lockless() [reads none pmd] __pte_alloc [no-op] BUG_ON(pmd_none(*dst_pmd)) I have experimentally verified this in a kernel with extra mdelay() calls; the BUG_ON(pmd_none(*dst_pmd)) triggers. On kernels newer than commit 0d940a9b270b ("mm/pgtable: allow pte_offset_map[_lock]() to fail"), this can't lead to anything worse than a BUG_ON(), since the page table access helpers are actually designed to deal with page tables concurrently disappearing; but on older kernels (<=6.4), I think we could probably theoretically race past the two BUG_ON() checks and end up treating a hugepage as a page table. The second issue is that, as Qi Zheng pointed out, there are other types of huge PMDs that pmd_trans_huge() can't catch: devmap PMDs and swap PMDs (in particular, migration PMDs). On <=6.4, this is worse than the first issue: If mfill_atomic() runs on a PMD that contains a migration entry (which just requires winning a single, fairly wide race), it will pass the PMD to pte_offset_map_lock(), which assumes that the PMD points to a page table. Breakage follows: First, the kernel tries to take the PTE lock (which will crash or maybe worse if there is no "struct page" for the address bits in the migration entry PMD - I think at least on X86 there usually is no corresponding "struct page" thanks to the PTE inversion mitigation, amd64 looks different). If that didn't crash, the kernel would next try to write a PTE into what it wrongly thinks is a page table. As part of fixing these issues, get rid of the check for pmd_trans_huge() before __pte_alloc() - that's redundant, we're going to have to check for that after the __pte_alloc() anyway. Backport note: pmdp_get_lockless() is pmd_read_atomic() in older kernels. Link: https://lkml.kernel.org/r/20240813-uffd-thp-flip-fix-v2-0-5efa61078a41@google.com Link: https://lkml.kernel.org/r/20240813-uffd-thp-flip-fix-v2-1-5efa61078a41@google.com Fixes: c1a4de99fada ("userfaultfd: mcopy_atomic|mfill_zeropage: UFFDIO_COPY|UFFDIO_ZEROPAGE preparation") Signed-off-by: Jann Horn Acked-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Jann Horn Cc: Pavel Emelyanov Cc: Qi Zheng Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index e54e5c8907fa..290b2a0d84ac 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -787,21 +787,23 @@ retry: } dst_pmdval = pmdp_get_lockless(dst_pmd); - /* - * If the dst_pmd is mapped as THP don't - * override it and just be strict. - */ - if (unlikely(pmd_trans_huge(dst_pmdval))) { - err = -EEXIST; - break; - } if (unlikely(pmd_none(dst_pmdval)) && unlikely(__pte_alloc(dst_mm, dst_pmd))) { err = -ENOMEM; break; } - /* If an huge pmd materialized from under us fail */ - if (unlikely(pmd_trans_huge(*dst_pmd))) { + dst_pmdval = pmdp_get_lockless(dst_pmd); + /* + * If the dst_pmd is THP don't override it and just be strict. + * (This includes the case where the PMD used to be THP and + * changed back to none after __pte_alloc().) + */ + if (unlikely(!pmd_present(dst_pmdval) || pmd_trans_huge(dst_pmdval) || + pmd_devmap(dst_pmdval))) { + err = -EEXIST; + break; + } + if (unlikely(pmd_bad(dst_pmdval))) { err = -EFAULT; break; } -- cgit From 4828d207dc5161dc7ddf9a4f6dcfd80c7dd7d20a Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 13 Aug 2024 22:25:22 +0200 Subject: userfaultfd: don't BUG_ON() if khugepaged yanks our page table Since khugepaged was changed to allow retracting page tables in file mappings without holding the mmap lock, these BUG_ON()s are wrong - get rid of them. We could also remove the preceding "if (unlikely(...))" block, but then we could reach pte_offset_map_lock() with transhuge pages not just for file mappings but also for anonymous mappings - which would probably be fine but I think is not necessarily expected. Link: https://lkml.kernel.org/r/20240813-uffd-thp-flip-fix-v2-2-5efa61078a41@google.com Fixes: 1d65b771bc08 ("mm/khugepaged: retract_page_tables() without mmap or vma lock") Signed-off-by: Jann Horn Reviewed-by: Qi Zheng Acked-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Pavel Emelyanov Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 290b2a0d84ac..acc56c75ba99 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -807,9 +807,10 @@ retry: err = -EFAULT; break; } - - BUG_ON(pmd_none(*dst_pmd)); - BUG_ON(pmd_trans_huge(*dst_pmd)); + /* + * For shmem mappings, khugepaged is allowed to remove page + * tables under us; pte_offset_map_lock() will deal with that. + */ err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, src_addr, flags, &folio); -- cgit From 683408258917541bdb294cd717c210a04381931e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 11 Aug 2024 19:03:20 +0900 Subject: nilfs2: protect references to superblock parameters exposed in sysfs The superblock buffers of nilfs2 can not only be overwritten at runtime for modifications/repairs, but they are also regularly swapped, replaced during resizing, and even abandoned when degrading to one side due to backing device issues. So, accessing them requires mutual exclusion using the reader/writer semaphore "nilfs->ns_sem". Some sysfs attribute show methods read this superblock buffer without the necessary mutual exclusion, which can cause problems with pointer dereferencing and memory access, so fix it. Link: https://lkml.kernel.org/r/20240811100320.9913-1-konishi.ryusuke@gmail.com Fixes: da7141fb78db ("nilfs2: add /sys/fs/nilfs2/ group") Signed-off-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/sysfs.c | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index a5569b7f47a3..14868a3dd592 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -836,9 +836,15 @@ ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; - u32 major = le32_to_cpu(sbp[0]->s_rev_level); - u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level); + struct nilfs_super_block *raw_sb; + u32 major; + u16 minor; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + major = le32_to_cpu(raw_sb->s_rev_level); + minor = le16_to_cpu(raw_sb->s_minor_rev_level); + up_read(&nilfs->ns_sem); return sysfs_emit(buf, "%d.%d\n", major, minor); } @@ -856,8 +862,13 @@ ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; - u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size); + struct nilfs_super_block *raw_sb; + u64 dev_size; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + dev_size = le64_to_cpu(raw_sb->s_dev_size); + up_read(&nilfs->ns_sem); return sysfs_emit(buf, "%llu\n", dev_size); } @@ -879,9 +890,15 @@ ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct nilfs_super_block *raw_sb; + ssize_t len; - return sysfs_emit(buf, "%pUb\n", sbp[0]->s_uuid); + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + len = sysfs_emit(buf, "%pUb\n", raw_sb->s_uuid); + up_read(&nilfs->ns_sem); + + return len; } static @@ -889,10 +906,16 @@ ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct nilfs_super_block *raw_sb; + ssize_t len; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + len = scnprintf(buf, sizeof(raw_sb->s_volume_name), "%s\n", + raw_sb->s_volume_name); + up_read(&nilfs->ns_sem); - return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n", - sbp[0]->s_volume_name); + return len; } static const char dev_readme_str[] = -- cgit From 5787fcaab9eb5930f5378d6a1dd03d916d146622 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 10 Aug 2024 15:52:42 +0900 Subject: nilfs2: fix missing cleanup on rollforward recovery error In an error injection test of a routine for mount-time recovery, KASAN found a use-after-free bug. It turned out that if data recovery was performed using partial logs created by dsync writes, but an error occurred before starting the log writer to create a recovered checkpoint, the inodes whose data had been recovered were left in the ns_dirty_files list of the nilfs object and were not freed. Fix this issue by cleaning up inodes that have read the recovery data if the recovery routine fails midway before the log writer starts. Link: https://lkml.kernel.org/r/20240810065242.3701-1-konishi.ryusuke@gmail.com Fixes: 0f3e1c7f23f8 ("nilfs2: recovery functions") Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/recovery.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index b638dc06df2f..61e25a980f73 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -715,6 +715,33 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, brelse(bh); } +/** + * nilfs_abort_roll_forward - cleaning up after a failed rollforward recovery + * @nilfs: nilfs object + */ +static void nilfs_abort_roll_forward(struct the_nilfs *nilfs) +{ + struct nilfs_inode_info *ii, *n; + LIST_HEAD(head); + + /* Abandon inodes that have read recovery data */ + spin_lock(&nilfs->ns_inode_lock); + list_splice_init(&nilfs->ns_dirty_files, &head); + spin_unlock(&nilfs->ns_inode_lock); + if (list_empty(&head)) + return; + + set_nilfs_purging(nilfs); + list_for_each_entry_safe(ii, n, &head, i_dirty) { + spin_lock(&nilfs->ns_inode_lock); + list_del_init(&ii->i_dirty); + spin_unlock(&nilfs->ns_inode_lock); + + iput(&ii->vfs_inode); + } + clear_nilfs_purging(nilfs); +} + /** * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint * @nilfs: nilfs object @@ -773,15 +800,19 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, if (unlikely(err)) { nilfs_err(sb, "error %d writing segment for recovery", err); - goto failed; + goto put_root; } nilfs_finish_roll_forward(nilfs, ri); } - failed: +put_root: nilfs_put_root(root); return err; + +failed: + nilfs_abort_roll_forward(nilfs); + goto put_root; } /** -- cgit From 6576dd6695f2afca3f4954029ac4a64f82ba60ab Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 14 Aug 2024 19:11:19 +0900 Subject: nilfs2: fix state management in error path of log writing function After commit a694291a6211 ("nilfs2: separate wait function from nilfs_segctor_write") was applied, the log writing function nilfs_segctor_do_construct() was able to issue I/O requests continuously even if user data blocks were split into multiple logs across segments, but two potential flaws were introduced in its error handling. First, if nilfs_segctor_begin_construction() fails while creating the second or subsequent logs, the log writing function returns without calling nilfs_segctor_abort_construction(), so the writeback flag set on pages/folios will remain uncleared. This causes page cache operations to hang waiting for the writeback flag. For example, truncate_inode_pages_final(), which is called via nilfs_evict_inode() when an inode is evicted from memory, will hang. Second, the NILFS_I_COLLECTED flag set on normal inodes remain uncleared. As a result, if the next log write involves checkpoint creation, that's fine, but if a partial log write is performed that does not, inodes with NILFS_I_COLLECTED set are erroneously removed from the "sc_dirty_files" list, and their data and b-tree blocks may not be written to the device, corrupting the block mapping. Fix these issues by uniformly calling nilfs_segctor_abort_construction() on failure of each step in the loop in nilfs_segctor_do_construct(), having it clean up logs and segment usages according to progress, and correcting the conditions for calling nilfs_redirty_inodes() to ensure that the NILFS_I_COLLECTED flag is cleared. Link: https://lkml.kernel.org/r/20240814101119.4070-1-konishi.ryusuke@gmail.com Fixes: a694291a6211 ("nilfs2: separate wait function from nilfs_segctor_write") Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 0ca3110d6386..871ec35ea8e8 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1812,6 +1812,9 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, nilfs_abort_logs(&logs, ret ? : err); list_splice_tail_init(&sci->sc_segbufs, &logs); + if (list_empty(&logs)) + return; /* if the first segment buffer preparation failed */ + nilfs_cancel_segusage(&logs, nilfs->ns_sufile); nilfs_free_incomplete_logs(&logs, nilfs); @@ -2056,7 +2059,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) err = nilfs_segctor_begin_construction(sci, nilfs); if (unlikely(err)) - goto out; + goto failed; /* Update time stamp */ sci->sc_seg_ctime = ktime_get_real_seconds(); @@ -2120,10 +2123,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) return err; failed_to_write: - if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) - nilfs_redirty_inodes(&sci->sc_dirty_files); - failed: + if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_IFILE) + nilfs_redirty_inodes(&sci->sc_dirty_files); if (nilfs_doing_gc()) nilfs_redirty_inodes(&sci->sc_gc_inodes); nilfs_segctor_abort_construction(sci, nilfs, err); -- cgit From ab7ca09520e9c41c219a4427fe0dae24024bfe7f Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Fri, 16 Aug 2024 09:33:36 +0800 Subject: mm/slub: add check for s->flags in the alloc_tagging_slab_free_hook When enable CONFIG_MEMCG & CONFIG_KFENCE & CONFIG_KMEMLEAK, the following warning always occurs,This is because the following call stack occurred: mem_pool_alloc kmem_cache_alloc_noprof slab_alloc_node kfence_alloc Once the kfence allocation is successful,slab->obj_exts will not be empty, because it has already been assigned a value in kfence_init_pool. Since in the prepare_slab_obj_exts_hook function,we perform a check for s->flags & (SLAB_NO_OBJ_EXT | SLAB_NOLEAKTRACE),the alloc_tag_add function will not be called as a result.Therefore,ref->ct remains NULL. However,when we call mem_pool_free,since obj_ext is not empty, it eventually leads to the alloc_tag_sub scenario being invoked. This is where the warning occurs. So we should add corresponding checks in the alloc_tagging_slab_free_hook. For __GFP_NO_OBJ_EXT case,I didn't see the specific case where it's using kfence,so I won't add the corresponding check in alloc_tagging_slab_free_hook for now. [ 3.734349] ------------[ cut here ]------------ [ 3.734807] alloc_tag was not set [ 3.735129] WARNING: CPU: 4 PID: 40 at ./include/linux/alloc_tag.h:130 kmem_cache_free+0x444/0x574 [ 3.735866] Modules linked in: autofs4 [ 3.736211] CPU: 4 UID: 0 PID: 40 Comm: ksoftirqd/4 Tainted: G W 6.11.0-rc3-dirty #1 [ 3.736969] Tainted: [W]=WARN [ 3.737258] Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022 [ 3.737875] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 3.738501] pc : kmem_cache_free+0x444/0x574 [ 3.738951] lr : kmem_cache_free+0x444/0x574 [ 3.739361] sp : ffff80008357bb60 [ 3.739693] x29: ffff80008357bb70 x28: 0000000000000000 x27: 0000000000000000 [ 3.740338] x26: ffff80008207f000 x25: ffff000b2eb2fd60 x24: ffff0000c0005700 [ 3.740982] x23: ffff8000804229e4 x22: ffff800082080000 x21: ffff800081756000 [ 3.741630] x20: fffffd7ff8253360 x19: 00000000000000a8 x18: ffffffffffffffff [ 3.742274] x17: ffff800ab327f000 x16: ffff800083398000 x15: ffff800081756df0 [ 3.742919] x14: 0000000000000000 x13: 205d344320202020 x12: 5b5d373038343337 [ 3.743560] x11: ffff80008357b650 x10: 000000000000005d x9 : 00000000ffffffd0 [ 3.744231] x8 : 7f7f7f7f7f7f7f7f x7 : ffff80008237bad0 x6 : c0000000ffff7fff [ 3.744907] x5 : ffff80008237ba78 x4 : ffff8000820bbad0 x3 : 0000000000000001 [ 3.745580] x2 : 68d66547c09f7800 x1 : 68d66547c09f7800 x0 : 0000000000000000 [ 3.746255] Call trace: [ 3.746530] kmem_cache_free+0x444/0x574 [ 3.746931] mem_pool_free+0x44/0xf4 [ 3.747306] free_object_rcu+0xc8/0xdc [ 3.747693] rcu_do_batch+0x234/0x8a4 [ 3.748075] rcu_core+0x230/0x3e4 [ 3.748424] rcu_core_si+0x14/0x1c [ 3.748780] handle_softirqs+0x134/0x378 [ 3.749189] run_ksoftirqd+0x70/0x9c [ 3.749560] smpboot_thread_fn+0x148/0x22c [ 3.749978] kthread+0x10c/0x118 [ 3.750323] ret_from_fork+0x10/0x20 [ 3.750696] ---[ end trace 0000000000000000 ]--- Link: https://lkml.kernel.org/r/20240816013336.17505-1-hao.ge@linux.dev Fixes: 4b8736964640 ("mm/slab: add allocation accounting into slab allocation and free paths") Signed-off-by: Hao Ge Cc: Christoph Lameter Cc: David Rientjes Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joonsoo Kim Cc: Kees Cook Cc: Kent Overstreet Cc: Pekka Enberg Cc: Roman Gushchin Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/slub.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index c9d8a2497fd6..a77f354f8325 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2116,6 +2116,10 @@ alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, if (!mem_alloc_profiling_enabled()) return; + /* slab->obj_exts might not be NULL if it was created for MEMCG accounting. */ + if (s->flags & (SLAB_NO_OBJ_EXT | SLAB_NOLEAKTRACE)) + return; + obj_exts = slab_obj_exts(slab); if (!obj_exts) return; -- cgit From 6dacd79d28842ff01f18b4900d897741aac5999e Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Mon, 5 Aug 2024 17:07:50 +0200 Subject: kexec_file: fix elfcorehdr digest exclusion when CONFIG_CRASH_HOTPLUG=y Fix the condition to exclude the elfcorehdr segment from the SHA digest calculation. The j iterator is an index into the output sha_regions[] array, not into the input image->segment[] array. Once it reaches image->elfcorehdr_index, all subsequent segments are excluded. Besides, if the purgatory segment precedes the elfcorehdr segment, the elfcorehdr may be wrongly included in the calculation. Link: https://lkml.kernel.org/r/20240805150750.170739-1-petr.tesarik@suse.com Fixes: f7cc804a9fd4 ("kexec: exclude elfcorehdr from the segment digest") Signed-off-by: Petr Tesarik Acked-by: Baoquan He Cc: Eric Biederman Cc: Hari Bathini Cc: Sourabh Jain Cc: Eric DeVolder Cc: Signed-off-by: Andrew Morton --- kernel/kexec_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 3d64290d24c9..3eedb8c226ad 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -752,7 +752,7 @@ static int kexec_calculate_store_digests(struct kimage *image) #ifdef CONFIG_CRASH_HOTPLUG /* Exclude elfcorehdr segment to allow future changes via hotplug */ - if (j == image->elfcorehdr_index) + if (i == image->elfcorehdr_index) continue; #endif -- cgit From f806de88d8f7f8191afd0fd9b94db4cd058e7d4f Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Tue, 20 Aug 2024 13:54:17 -0400 Subject: maple_tree: remove rcu_read_lock() from mt_validate() The write lock should be held when validating the tree to avoid updates racing with checks. Holding the rcu read lock during a large tree validation may also cause a prolonged rcu read window and "rcu_preempt detected stalls" warnings. Link: https://lore.kernel.org/all/0000000000001d12d4062005aea1@google.com/ Link: https://lkml.kernel.org/r/20240820175417.2782532-1-Liam.Howlett@oracle.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam R. Howlett Reported-by: syzbot+036af2f0c7338a33b0cd@syzkaller.appspotmail.com Cc: Hillf Danton Cc: Matthew Wilcox Cc: "Paul E. McKenney" Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index aa3a5df15b8e..6df3a8b95808 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -7566,14 +7566,14 @@ static void mt_validate_nulls(struct maple_tree *mt) * 2. The gap is correctly set in the parents */ void mt_validate(struct maple_tree *mt) + __must_hold(mas->tree->ma_lock) { unsigned char end; MA_STATE(mas, mt, 0, 0); - rcu_read_lock(); mas_start(&mas); if (!mas_is_active(&mas)) - goto done; + return; while (!mte_is_leaf(mas.node)) mas_descend(&mas); @@ -7594,9 +7594,6 @@ void mt_validate(struct maple_tree *mt) mas_dfs_postorder(&mas, ULONG_MAX); } mt_validate_nulls(mt); -done: - rcu_read_unlock(); - } EXPORT_SYMBOL_GPL(mt_validate); -- cgit From bfe0857c20c663fcc1592fa4e3a61ca12b07dac9 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Wed, 21 Aug 2024 20:26:07 +0100 Subject: Revert "mm: skip CMA pages when they are not available" This reverts commit 5da226dbfce3 ("mm: skip CMA pages when they are not available") and b7108d66318a ("Multi-gen LRU: skip CMA pages when they are not eligible"). lruvec->lru_lock is highly contended and is held when calling isolate_lru_folios. If the lru has a large number of CMA folios consecutively, while the allocation type requested is not MIGRATE_MOVABLE, isolate_lru_folios can hold the lock for a very long time while it skips those. For FIO workload, ~150million order=0 folios were skipped to isolate a few ZONE_DMA folios [1]. This can cause lockups [1] and high memory pressure for extended periods of time [2]. Remove skipping CMA for MGLRU as well, as it was introduced in sort_folio for the same resaon as 5da226dbfce3a2f44978c2c7cf88166e69a6788b. [1] https://lore.kernel.org/all/CAOUHufbkhMZYz20aM_3rHZ3OcK4m2puji2FGpUpn_-DevGk3Kg@mail.gmail.com/ [2] https://lore.kernel.org/all/ZrssOrcJIDy8hacI@gmail.com/ [usamaarif642@gmail.com: also revert b7108d66318a, per Johannes] Link: https://lkml.kernel.org/r/9060a32d-b2d7-48c0-8626-1db535653c54@gmail.com Link: https://lkml.kernel.org/r/357ac325-4c61-497a-92a3-bdbd230d5ec9@gmail.com Link: https://lkml.kernel.org/r/9060a32d-b2d7-48c0-8626-1db535653c54@gmail.com Fixes: 5da226dbfce3 ("mm: skip CMA pages when they are not available") Signed-off-by: Usama Arif Acked-by: Johannes Weiner Cc: Bharata B Rao Cc: Breno Leitao Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Rik van Riel Cc: Vlastimil Babka Cc: Yu Zhao Cc: Zhaoyang Huang Cc: Zhaoyang Huang Cc: Signed-off-by: Andrew Morton --- mm/vmscan.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index cfa839284b92..bd489c1af228 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1604,25 +1604,6 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec, } -#ifdef CONFIG_CMA -/* - * It is waste of effort to scan and reclaim CMA pages if it is not available - * for current allocation context. Kswapd can not be enrolled as it can not - * distinguish this scenario by using sc->gfp_mask = GFP_KERNEL - */ -static bool skip_cma(struct folio *folio, struct scan_control *sc) -{ - return !current_is_kswapd() && - gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE && - folio_migratetype(folio) == MIGRATE_CMA; -} -#else -static bool skip_cma(struct folio *folio, struct scan_control *sc) -{ - return false; -} -#endif - /* * Isolating page from the lruvec to fill in @dst list by nr_to_scan times. * @@ -1669,8 +1650,7 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan, nr_pages = folio_nr_pages(folio); total_scan += nr_pages; - if (folio_zonenum(folio) > sc->reclaim_idx || - skip_cma(folio, sc)) { + if (folio_zonenum(folio) > sc->reclaim_idx) { nr_skipped[folio_zonenum(folio)] += nr_pages; move_to = &folios_skipped; goto move; @@ -4320,7 +4300,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c } /* ineligible */ - if (zone > sc->reclaim_idx || skip_cma(folio, sc)) { + if (zone > sc->reclaim_idx) { gen = folio_inc_gen(lruvec, folio, false); list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); return true; -- cgit From a3f6a89c834a4cba0f881da21307b26de3796133 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 23 Aug 2024 17:38:50 +0100 Subject: scripts: fix gfp-translate after ___GFP_*_BITS conversion to an enum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Richard reports that since 772dd0342727c ("mm: enumerate all gfp flags"), gfp-translate is broken, as the bit numbers are implicit, leaving the shell script unable to extract them. Even more, some bits are now at a variable location, making it double extra hard to parse using a simple shell script. Use a brute-force approach to the problem by generating a small C stub that will use the enum to dump the interesting bits. As an added bonus, we are now able to identify invalid bits for a given configuration. As an added drawback, we cannot parse include files that predate this change anymore. Tough luck. Link: https://lkml.kernel.org/r/20240823163850.3791201-1-maz@kernel.org Fixes: 772dd0342727 ("mm: enumerate all gfp flags") Signed-off-by: Marc Zyngier Reported-by: Richard Weinberger Cc: Petr Tesařík Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- scripts/gfp-translate | 66 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/scripts/gfp-translate b/scripts/gfp-translate index 6c9aed17cf56..8385ae0d5af9 100755 --- a/scripts/gfp-translate +++ b/scripts/gfp-translate @@ -62,25 +62,57 @@ if [ "$GFPMASK" = "none" ]; then fi # Extract GFP flags from the kernel source -TMPFILE=`mktemp -t gfptranslate-XXXXXX` || exit 1 -grep -q ___GFP $SOURCE/include/linux/gfp_types.h -if [ $? -eq 0 ]; then - grep "^#define ___GFP" $SOURCE/include/linux/gfp_types.h | sed -e 's/u$//' | grep -v GFP_BITS > $TMPFILE -else - grep "^#define __GFP" $SOURCE/include/linux/gfp_types.h | sed -e 's/(__force gfp_t)//' | sed -e 's/u)/)/' | grep -v GFP_BITS | sed -e 's/)\//) \//' > $TMPFILE -fi +TMPFILE=`mktemp -t gfptranslate-XXXXXX.c` || exit 1 -# Parse the flags -IFS=" -" echo Source: $SOURCE echo Parsing: $GFPMASK -for LINE in `cat $TMPFILE`; do - MASK=`echo $LINE | awk '{print $3}'` - if [ $(($GFPMASK&$MASK)) -ne 0 ]; then - echo $LINE - fi -done -rm -f $TMPFILE +( + cat < +#include + +// Try to fool compiler.h into not including extra stuff +#define __ASSEMBLY__ 1 + +#include +#include + +static const char *masks[] = { +EOF + + sed -nEe 's/^[[:space:]]+(___GFP_.*)_BIT,.*$/\1/p' $SOURCE/include/linux/gfp_types.h | + while read b; do + cat < 0) + [${b}_BIT] = "$b", +#endif +EOF + done + + cat < $TMPFILE + +${CC:-gcc} -Wall -o ${TMPFILE}.bin -I $SOURCE/include $TMPFILE && ${TMPFILE}.bin + +rm -f $TMPFILE ${TMPFILE}.bin + exit 0 -- cgit From e399257349098bf7c84343f99efb2bc9c22eb9fd Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Fri, 23 Aug 2024 16:27:06 +0000 Subject: mm/memcontrol: respect zswap.writeback setting from parent cg too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the behavior of zswap.writeback wrt. the cgroup hierarchy seems a bit odd. Unlike zswap.max, it doesn't honor the value from parent cgroups. This surfaced when people tried to globally disable zswap writeback, i.e. reserve physical swap space only for hibernation [1] - disabling zswap.writeback only for the root cgroup results in subcgroups with zswap.writeback=1 still performing writeback. The inconsistency became more noticeable after I introduced the MemoryZSwapWriteback= systemd unit setting [2] for controlling the knob. The patch assumed that the kernel would enforce the value of parent cgroups. It could probably be workarounded from systemd's side, by going up the slice unit tree and inheriting the value. Yet I think it's more sensible to make it behave consistently with zswap.max and friends. [1] https://wiki.archlinux.org/title/Power_management/Suspend_and_hibernate#Disable_zswap_writeback_to_use_the_swap_space_only_for_hibernation [2] https://github.com/systemd/systemd/pull/31734 Link: https://lkml.kernel.org/r/20240823162506.12117-1-me@yhndnzj.com Fixes: 501a06fe8e4c ("zswap: memcontrol: implement zswap writeback disabling") Signed-off-by: Mike Yuan Reviewed-by: Nhat Pham Acked-by: Yosry Ahmed Cc: Johannes Weiner Cc: Michal Hocko Cc: Michal Koutný Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Cc: Signed-off-by: Andrew Morton --- Documentation/admin-guide/cgroup-v2.rst | 7 ++++--- mm/memcontrol.c | 12 +++++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 86311c2907cd..95c18bc17083 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1717,9 +1717,10 @@ The following nested keys are defined. entries fault back in or are written out to disk. memory.zswap.writeback - A read-write single value file. The default value is "1". The - initial value of the root cgroup is 1, and when a new cgroup is - created, it inherits the current value of its parent. + A read-write single value file. The default value is "1". + Note that this setting is hierarchical, i.e. the writeback would be + implicitly disabled for child cgroups if the upper hierarchy + does so. When this is set to 0, all swapping attempts to swapping devices are disabled. This included both zswap writebacks, and swapping due diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f29157288b7d..d563fb515766 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3613,8 +3613,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) memcg1_soft_limit_reset(memcg); #ifdef CONFIG_ZSWAP memcg->zswap_max = PAGE_COUNTER_MAX; - WRITE_ONCE(memcg->zswap_writeback, - !parent || READ_ONCE(parent->zswap_writeback)); + WRITE_ONCE(memcg->zswap_writeback, true); #endif page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX); if (parent) { @@ -5320,7 +5319,14 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size) bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg) { /* if zswap is disabled, do not block pages going to the swapping device */ - return !zswap_is_enabled() || !memcg || READ_ONCE(memcg->zswap_writeback); + if (!zswap_is_enabled()) + return true; + + for (; memcg; memcg = parent_mem_cgroup(memcg)) + if (!READ_ONCE(memcg->zswap_writeback)) + return false; + + return true; } static u64 zswap_current_read(struct cgroup_subsys_state *css, -- cgit From 5e9784e997620af7c1399029282f5d6964b41942 Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Mon, 26 Aug 2024 00:36:49 +0800 Subject: codetag: debug: mark codetags for poisoned page as empty When PG_hwpoison pages are freed they are treated differently in free_pages_prepare() and instead of being released they are isolated. Page allocation tag counters are decremented at this point since the page is considered not in use. Later on when such pages are released by unpoison_memory(), the allocation tag counters will be decremented again and the following warning gets reported: [ 113.930443][ T3282] ------------[ cut here ]------------ [ 113.931105][ T3282] alloc_tag was not set [ 113.931576][ T3282] WARNING: CPU: 2 PID: 3282 at ./include/linux/alloc_tag.h:130 pgalloc_tag_sub.part.66+0x154/0x164 [ 113.932866][ T3282] Modules linked in: hwpoison_inject fuse ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 xt_conntrack ebtable_nat ebtable_broute ip6table_nat ip6table_man4 [ 113.941638][ T3282] CPU: 2 UID: 0 PID: 3282 Comm: madvise11 Kdump: loaded Tainted: G W 6.11.0-rc4-dirty #18 [ 113.943003][ T3282] Tainted: [W]=WARN [ 113.943453][ T3282] Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022 [ 113.944378][ T3282] pstate: 40400005 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 113.945319][ T3282] pc : pgalloc_tag_sub.part.66+0x154/0x164 [ 113.946016][ T3282] lr : pgalloc_tag_sub.part.66+0x154/0x164 [ 113.946706][ T3282] sp : ffff800087093a10 [ 113.947197][ T3282] x29: ffff800087093a10 x28: ffff0000d7a9d400 x27: ffff80008249f0a0 [ 113.948165][ T3282] x26: 0000000000000000 x25: ffff80008249f2b0 x24: 0000000000000000 [ 113.949134][ T3282] x23: 0000000000000001 x22: 0000000000000001 x21: 0000000000000000 [ 113.950597][ T3282] x20: ffff0000c08fcad8 x19: ffff80008251e000 x18: ffffffffffffffff [ 113.952207][ T3282] x17: 0000000000000000 x16: 0000000000000000 x15: ffff800081746210 [ 113.953161][ T3282] x14: 0000000000000000 x13: 205d323832335420 x12: 5b5d353031313339 [ 113.954120][ T3282] x11: ffff800087093500 x10: 000000000000005d x9 : 00000000ffffffd0 [ 113.955078][ T3282] x8 : 7f7f7f7f7f7f7f7f x7 : ffff80008236ba90 x6 : c0000000ffff7fff [ 113.956036][ T3282] x5 : ffff000b34bf4dc8 x4 : ffff8000820aba90 x3 : 0000000000000001 [ 113.956994][ T3282] x2 : ffff800ab320f000 x1 : 841d1e35ac932e00 x0 : 0000000000000000 [ 113.957962][ T3282] Call trace: [ 113.958350][ T3282] pgalloc_tag_sub.part.66+0x154/0x164 [ 113.959000][ T3282] pgalloc_tag_sub+0x14/0x1c [ 113.959539][ T3282] free_unref_page+0xf4/0x4b8 [ 113.960096][ T3282] __folio_put+0xd4/0x120 [ 113.960614][ T3282] folio_put+0x24/0x50 [ 113.961103][ T3282] unpoison_memory+0x4f0/0x5b0 [ 113.961678][ T3282] hwpoison_unpoison+0x30/0x48 [hwpoison_inject] [ 113.962436][ T3282] simple_attr_write_xsigned.isra.34+0xec/0x1cc [ 113.963183][ T3282] simple_attr_write+0x38/0x48 [ 113.963750][ T3282] debugfs_attr_write+0x54/0x80 [ 113.964330][ T3282] full_proxy_write+0x68/0x98 [ 113.964880][ T3282] vfs_write+0xdc/0x4d0 [ 113.965372][ T3282] ksys_write+0x78/0x100 [ 113.965875][ T3282] __arm64_sys_write+0x24/0x30 [ 113.966440][ T3282] invoke_syscall+0x7c/0x104 [ 113.966984][ T3282] el0_svc_common.constprop.1+0x88/0x104 [ 113.967652][ T3282] do_el0_svc+0x2c/0x38 [ 113.968893][ T3282] el0_svc+0x3c/0x1b8 [ 113.969379][ T3282] el0t_64_sync_handler+0x98/0xbc [ 113.969980][ T3282] el0t_64_sync+0x19c/0x1a0 [ 113.970511][ T3282] ---[ end trace 0000000000000000 ]--- To fix this, clear the page tag reference after the page got isolated and accounted for. Link: https://lkml.kernel.org/r/20240825163649.33294-1-hao.ge@linux.dev Fixes: d224eb0287fb ("codetag: debug: mark codetags for reserved pages as empty") Signed-off-by: Hao Ge Reviewed-by: Miaohe Lin Acked-by: Suren Baghdasaryan Cc: David Hildenbrand Cc: Hao Ge Cc: Kent Overstreet Cc: Naoya Horiguchi Cc: Pasha Tatashin Cc: [6.10+] Signed-off-by: Andrew Morton --- mm/page_alloc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c565de8f48e9..91ace8ca97e2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1054,6 +1054,13 @@ __always_inline bool free_pages_prepare(struct page *page, reset_page_owner(page, order); page_table_check_free(page, order); pgalloc_tag_sub(page, 1 << order); + + /* + * The page is isolated and accounted for. + * Mark the codetag as empty to avoid accounting error + * when the page is freed by unpoison_memory(). + */ + clear_page_tag_ref(page); return false; } -- cgit From 4f295229b279145bdc667c58f62e89f5968e12fb Mon Sep 17 00:00:00 2001 From: Jan Kuliga Date: Fri, 30 Aug 2024 11:56:58 +0200 Subject: mailmap: update entry for Jan Kuliga Soon I won't be able to use my current email address. Link: https://lkml.kernel.org/r/20240830095658.1203198-1-jankul@alatek.krakow.pl Signed-off-by: Jan Kuliga Cc: David S. Miller Cc: Matthieu Baerts (NGI0) Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index caf46a652f15..a7969e2d590a 100644 --- a/.mailmap +++ b/.mailmap @@ -269,6 +269,7 @@ James Ketrenos Jan Glauber Jan Glauber Jan Glauber +Jan Kuliga Jarkko Sakkinen Jarkko Sakkinen Jarkko Sakkinen -- cgit From 409faf8c97d5abb0597ea43e99c8b3dd8dbe99e3 Mon Sep 17 00:00:00 2001 From: Adrian Huang Date: Thu, 29 Aug 2024 21:06:33 +0800 Subject: mm: vmalloc: optimize vmap_lazy_nr arithmetic when purging each vmap_area When running the vmalloc stress on a 448-core system, observe the average latency of purge_vmap_node() is about 2 seconds by using the eBPF/bcc 'funclatency.py' tool [1]. # /your-git-repo/bcc/tools/funclatency.py -u purge_vmap_node & pid1=$! && sleep 8 && modprobe test_vmalloc nr_threads=$(nproc) run_test_mask=0x7; kill -SIGINT $pid1 usecs : count distribution 0 -> 1 : 0 | | 2 -> 3 : 29 | | 4 -> 7 : 19 | | 8 -> 15 : 56 | | 16 -> 31 : 483 |**** | 32 -> 63 : 1548 |************ | 64 -> 127 : 2634 |********************* | 128 -> 255 : 2535 |********************* | 256 -> 511 : 1776 |************** | 512 -> 1023 : 1015 |******** | 1024 -> 2047 : 573 |**** | 2048 -> 4095 : 488 |**** | 4096 -> 8191 : 1091 |********* | 8192 -> 16383 : 3078 |************************* | 16384 -> 32767 : 4821 |****************************************| 32768 -> 65535 : 3318 |*************************** | 65536 -> 131071 : 1718 |************** | 131072 -> 262143 : 2220 |****************** | 262144 -> 524287 : 1147 |********* | 524288 -> 1048575 : 1179 |********* | 1048576 -> 2097151 : 822 |****** | 2097152 -> 4194303 : 906 |******* | 4194304 -> 8388607 : 2148 |***************** | 8388608 -> 16777215 : 4497 |************************************* | 16777216 -> 33554431 : 289 |** | avg = 2041714 usecs, total: 78381401772 usecs, count: 38390 The worst case is over 16-33 seconds, so soft lockup is triggered [2]. [Root Cause] 1) Each purge_list has the long list. The following shows the number of vmap_area is purged. crash> p vmap_nodes vmap_nodes = $27 = (struct vmap_node *) 0xff2de5a900100000 crash> vmap_node 0xff2de5a900100000 128 | grep nr_purged nr_purged = 663070 ... nr_purged = 821670 nr_purged = 692214 nr_purged = 726808 ... 2) atomic_long_sub() employs the 'lock' prefix to ensure the atomic operation when purging each vmap_area. However, the iteration is over 600000 vmap_area (See 'nr_purged' above). Here is objdump output: $ objdump -D vmlinux ffffffff813e8c80 : ... ffffffff813e8d70: f0 48 29 2d 68 0c bb lock sub %rbp,0x2bb0c68(%rip) ... Quote from "Instruction tables" pdf file [3]: Instructions with a LOCK prefix have a long latency that depends on cache organization and possibly RAM speed. If there are multiple processors or cores or direct memory access (DMA) devices, then all locked instructions will lock a cache line for exclusive access, which may involve RAM access. A LOCK prefix typically costs more than a hundred clock cycles, even on single-processor systems. That's why the latency of purge_vmap_node() dramatically increases on a many-core system: One core is busy on purging each vmap_area of the *long* purge_list and executing atomic_long_sub() for each vmap_area, while other cores free vmalloc allocations and execute atomic_long_add_return() in free_vmap_area_noflush(). [Solution] Employ a local variable to record the total purged pages, and execute atomic_long_sub() after the traversal of the purge_list is done. The experiment result shows the latency improvement is 99%. [Experiment Result] 1) System Configuration: Three servers (with HT-enabled) are tested. * 72-core server: 3rd Gen Intel Xeon Scalable Processor*1 * 192-core server: 5th Gen Intel Xeon Scalable Processor*2 * 448-core server: AMD Zen 4 Processor*2 2) Kernel Config * CONFIG_KASAN is disabled 3) The data in column "w/o patch" and "w/ patch" * Unit: micro seconds (us) * Each data is the average of 3-time measurements System w/o patch (us) w/ patch (us) Improvement (%) --------------- -------------- ------------- ------------- 72-core server 2194 14 99.36% 192-core server 143799 1139 99.21% 448-core server 1992122 6883 99.65% [1] https://github.com/iovisor/bcc/blob/master/tools/funclatency.py [2] https://gist.github.com/AdrianHuang/37c15f67b45407b83c2d32f918656c12 [3] https://www.agner.org/optimize/instruction_tables.pdf Link: https://lkml.kernel.org/r/20240829130633.2184-1-ahuang12@lenovo.com Signed-off-by: Adrian Huang Reviewed-by: Uladzislau Rezki (Sony) Cc: Christoph Hellwig Cc: Signed-off-by: Andrew Morton --- mm/vmalloc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ac53d46ac8a5..a0df1e2e155a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2191,6 +2191,7 @@ static void purge_vmap_node(struct work_struct *work) { struct vmap_node *vn = container_of(work, struct vmap_node, purge_work); + unsigned long nr_purged_pages = 0; struct vmap_area *va, *n_va; LIST_HEAD(local_list); @@ -2208,7 +2209,7 @@ static void purge_vmap_node(struct work_struct *work) kasan_release_vmalloc(orig_start, orig_end, va->va_start, va->va_end); - atomic_long_sub(nr, &vmap_lazy_nr); + nr_purged_pages += nr; vn->nr_purged++; if (is_vn_id_valid(vn_id) && !vn->skip_populate) @@ -2219,6 +2220,8 @@ static void purge_vmap_node(struct work_struct *work) list_add(&va->list, &local_list); } + atomic_long_sub(nr_purged_pages, &vmap_lazy_nr); + reclaim_list_global(&local_list); } -- cgit From 052a45c1cb1b32f05dd63a295d65496d8b403283 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 28 Aug 2024 16:15:36 -0700 Subject: alloc_tag: fix allocation tag reporting when CONFIG_MODULES=n codetag_module_init() is used to initialize sections containing allocation tags. This function is used to initialize module sections as well as core kernel sections, in which case the module parameter is set to NULL. This function has to be called even when CONFIG_MODULES=n to initialize core kernel allocation tag sections. When CONFIG_MODULES=n, this function is a NOP, which is wrong. This leads to /proc/allocinfo reported as empty. Fix this by making it independent of CONFIG_MODULES. Link: https://lkml.kernel.org/r/20240828231536.1770519-1-surenb@google.com Fixes: 916cc5167cc6 ("lib: code tagging framework") Signed-off-by: Suren Baghdasaryan Cc: David Hildenbrand Cc: Kees Cook Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Sourav Panda Cc: Vlastimil Babka Cc: [6.10+] Signed-off-by: Andrew Morton --- lib/codetag.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/codetag.c b/lib/codetag.c index 5ace625f2328..afa8a2d4f317 100644 --- a/lib/codetag.c +++ b/lib/codetag.c @@ -125,7 +125,6 @@ static inline size_t range_size(const struct codetag_type *cttype, cttype->desc.tag_size; } -#ifdef CONFIG_MODULES static void *get_symbol(struct module *mod, const char *prefix, const char *name) { DECLARE_SEQ_BUF(sb, KSYM_NAME_LEN); @@ -155,6 +154,15 @@ static struct codetag_range get_section_range(struct module *mod, }; } +static const char *get_mod_name(__maybe_unused struct module *mod) +{ +#ifdef CONFIG_MODULES + if (mod) + return mod->name; +#endif + return "(built-in)"; +} + static int codetag_module_init(struct codetag_type *cttype, struct module *mod) { struct codetag_range range; @@ -164,8 +172,7 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod) range = get_section_range(mod, cttype->desc.section); if (!range.start || !range.stop) { pr_warn("Failed to load code tags of type %s from the module %s\n", - cttype->desc.section, - mod ? mod->name : "(built-in)"); + cttype->desc.section, get_mod_name(mod)); return -EINVAL; } @@ -199,6 +206,7 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod) return 0; } +#ifdef CONFIG_MODULES void codetag_load_module(struct module *mod) { struct codetag_type *cttype; @@ -248,9 +256,6 @@ bool codetag_unload_module(struct module *mod) return unload_ok; } - -#else /* CONFIG_MODULES */ -static int codetag_module_init(struct codetag_type *cttype, struct module *mod) { return 0; } #endif /* CONFIG_MODULES */ struct codetag_type * -- cgit From a5a3c952e82c1ada12bf8c55b73af26f1a454bd2 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 28 Aug 2024 11:01:29 -0700 Subject: rust: macros: provide correct provenance when constructing THIS_MODULE Currently while defining `THIS_MODULE` symbol in `module!()`, the pointer used to construct `ThisModule` is derived from an immutable reference of `__this_module`, which means the pointer doesn't have the provenance for writing, and that means any write to that pointer is UB regardless of data races or not. However, the usage of `THIS_MODULE` includes passing this pointer to functions that may write to it (probably in unsafe code), and this will create soundness issues. One way to fix this is using `addr_of_mut!()` but that requires the unstable feature "const_mut_refs". So instead of `addr_of_mut()!`, an extern static `Opaque` is used here: since `Opaque` is transparent to `T`, an extern static `Opaque` will just wrap the C symbol (defined in a C compile unit) in an `Opaque`, which provides a pointer with writable provenance via `Opaque::get()`. This fix the potential UBs because of pointer provenance unmatched. Reported-by: Alice Ryhl Signed-off-by: Boqun Feng Reviewed-by: Alice Ryhl Reviewed-by: Trevor Gross Reviewed-by: Benno Lossin Reviewed-by: Gary Guo Closes: https://rust-for-linux.zulipchat.com/#narrow/stream/x/topic/x/near/465412664 Fixes: 1fbde52bde73 ("rust: add `macros` crate") Cc: stable@vger.kernel.org # 6.6.x: be2ca1e03965: ("rust: types: Make Opaque::get const") Link: https://lore.kernel.org/r/20240828180129.4046355-1-boqun.feng@gmail.com [ Fixed two typos, reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/macros/module.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rust/macros/module.rs b/rust/macros/module.rs index 411dc103d82e..7a5b899e47b7 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -217,7 +217,11 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { // freed until the module is unloaded. #[cfg(MODULE)] static THIS_MODULE: kernel::ThisModule = unsafe {{ - kernel::ThisModule::from_ptr(&kernel::bindings::__this_module as *const _ as *mut _) + extern \"C\" {{ + static __this_module: kernel::types::Opaque; + }} + + kernel::ThisModule::from_ptr(__this_module.get()) }}; #[cfg(not(MODULE))] static THIS_MODULE: kernel::ThisModule = unsafe {{ -- cgit From 376174f5a49ac8701df15a68e9d3269c5b62abed Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Wed, 28 Aug 2024 18:11:17 -0300 Subject: MAINTAINERS: Remove Wedson as Rust maintainer I am retiring from the project, so removing myself from MAINTAINERS as I won't have time to dedicate to it. Signed-off-by: Wedson Almeida Filho Link: https://lore.kernel.org/r/20240828211117.9422-2-wedsonaf@gmail.com Signed-off-by: Miguel Ojeda --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f328373463b0..682708bdb940 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19907,7 +19907,6 @@ F: tools/verification/ RUST M: Miguel Ojeda M: Alex Gaynor -M: Wedson Almeida Filho R: Boqun Feng R: Gary Guo R: Björn Roy Baron -- cgit From a3c1e45156ad39f225cd7ddae0f81230a3b1e657 Mon Sep 17 00:00:00 2001 From: Jens Emil Schulz Østergaard Date: Thu, 29 Aug 2024 11:52:54 +0200 Subject: net: microchip: vcap: Fix use-after-free error in kunit test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a clear use-after-free error. We remove it, and rely on checking the return code of vcap_del_rule. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/kernel-janitors/7bffefc6-219a-4f71-baa0-ad4526e5c198@kili.mountain/ Fixes: c956b9b318d9 ("net: microchip: sparx5: Adding KUNIT tests of key/action values in VCAP API") Signed-off-by: Jens Emil Schulz Østergaard Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c index 51d9423b08a6..f2a5a36fdacd 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c @@ -1442,18 +1442,8 @@ static void vcap_api_encode_rule_test(struct kunit *test) vcap_enable_lookups(&test_vctrl, &test_netdev, 0, 0, rule->cookie, false); - vcap_free_rule(rule); - - /* Check that the rule has been freed: tricky to access since this - * memory should not be accessible anymore - */ - KUNIT_EXPECT_PTR_NE(test, NULL, rule); - ret = list_empty(&rule->keyfields); - KUNIT_EXPECT_EQ(test, true, ret); - ret = list_empty(&rule->actionfields); - KUNIT_EXPECT_EQ(test, true, ret); - - vcap_del_rule(&test_vctrl, &test_netdev, id); + ret = vcap_del_rule(&test_vctrl, &test_netdev, id); + KUNIT_EXPECT_EQ(test, 0, ret); } static void vcap_api_set_rule_counter_test(struct kunit *test) -- cgit From 709df70a20e990d262c473ad9899314039e8ec82 Mon Sep 17 00:00:00 2001 From: Liao Chen Date: Sat, 31 Aug 2024 09:42:31 +0000 Subject: spi: bcm63xx: Enable module autoloading Add MODULE_DEVICE_TABLE(), so modules could be properly autoloaded based on the alias from of_device_id table. Signed-off-by: Liao Chen Link: https://patch.msgid.link/20240831094231.795024-1-liaochen4@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-bcm63xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c index aac41bd05f98..2fb8d4e55c77 100644 --- a/drivers/spi/spi-bcm63xx.c +++ b/drivers/spi/spi-bcm63xx.c @@ -472,6 +472,7 @@ static const struct of_device_id bcm63xx_spi_of_match[] = { { .compatible = "brcm,bcm6358-spi", .data = &bcm6358_spi_reg_offsets }, { }, }; +MODULE_DEVICE_TABLE(of, bcm63xx_spi_of_match); static int bcm63xx_spi_probe(struct platform_device *pdev) { -- cgit From ef4a99a0164e3972abb421cbb1b09ea6c61414df Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 29 Aug 2024 22:22:45 +0300 Subject: igc: Unlock on error in igc_io_resume() Call rtnl_unlock() on this error path, before returning. Fixes: bc23aa949aeb ("igc: Add pcie error handler support") Signed-off-by: Dan Carpenter Reviewed-by: Gerhard Engleder Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igc/igc_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index dfd6c00b4205..0a095cdea4fb 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7413,6 +7413,7 @@ static void igc_io_resume(struct pci_dev *pdev) rtnl_lock(); if (netif_running(netdev)) { if (igc_open(netdev)) { + rtnl_unlock(); netdev_err(netdev, "igc_open failed after reset\n"); return; } -- cgit From a54da9df75cd1b4b5028f6c60f9a211532680585 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sun, 1 Sep 2024 05:10:51 +0200 Subject: hwmon: (hp-wmi-sensors) Check if WMI event data exists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BIOS can choose to return no event data in response to a WMI event, so the ACPI object passed to the WMI notify handler can be NULL. Check for such a situation and ignore the event in such a case. Fixes: 23902f98f8d4 ("hwmon: add HP WMI Sensors driver") Signed-off-by: Armin Wolf Reviewed-by: Ilpo Järvinen Message-ID: <20240901031055.3030-2-W_Armin@gmx.de> Signed-off-by: Guenter Roeck --- drivers/hwmon/hp-wmi-sensors.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/hp-wmi-sensors.c b/drivers/hwmon/hp-wmi-sensors.c index b5325d0e72b9..dfa1d6926dea 100644 --- a/drivers/hwmon/hp-wmi-sensors.c +++ b/drivers/hwmon/hp-wmi-sensors.c @@ -1637,6 +1637,8 @@ static void hp_wmi_notify(u32 value, void *context) goto out_unlock; wobj = out.pointer; + if (!wobj) + goto out_unlock; err = populate_event_from_wobj(dev, &event, wobj); if (err) { -- cgit From d3e154d7776ba57ab679fb816fb87b627fba21c9 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Fri, 30 Aug 2024 15:34:19 +0800 Subject: Revert "wifi: ath11k: restore country code during resume" This reverts commit 7f0343b7b8710436c1e6355c71782d32ada47e0c. We are going to revert commit 166a490f59ac ("wifi: ath11k: support hibernation"), on which this commit depends. With that commit reverted, this one is not needed any more, so revert this commit first. Signed-off-by: Baochen Qiang Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240830073420.5790-2-quic_bqiang@quicinc.com --- drivers/net/wireless/ath/ath11k/core.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 03187df26000..325b930aaf06 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -1009,16 +1009,6 @@ int ath11k_core_resume(struct ath11k_base *ab) return -ETIMEDOUT; } - if (ab->hw_params.current_cc_support && - ar->alpha2[0] != 0 && ar->alpha2[1] != 0) { - ret = ath11k_reg_set_cc(ar); - if (ret) { - ath11k_warn(ab, "failed to set country code during resume: %d\n", - ret); - return ret; - } - } - ret = ath11k_dp_rx_pktlog_start(ab); if (ret) ath11k_warn(ab, "failed to start rx pktlog during resume: %d\n", -- cgit From 2f833e8948d6c88a3a257d4e426c9897b4907d5a Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Fri, 30 Aug 2024 15:34:20 +0800 Subject: Revert "wifi: ath11k: support hibernation" This reverts commit 166a490f59ac10340ee5330e51c15188ce2a7f8f. There are several reports that this commit breaks system suspend on some specific Lenovo platforms. Since there is no fix available, for now revert this commit to make suspend work again on those platforms. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219196 Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2301921 Cc: # 6.10.x: d3e154d7776b: Revert "wifi: ath11k: restore country code during resume" Cc: # 6.10.x Signed-off-by: Baochen Qiang Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240830073420.5790-3-quic_bqiang@quicinc.com --- drivers/net/wireless/ath/ath11k/ahb.c | 4 +- drivers/net/wireless/ath/ath11k/core.c | 107 ++++++++++----------------------- drivers/net/wireless/ath/ath11k/core.h | 4 -- drivers/net/wireless/ath/ath11k/hif.h | 12 +--- drivers/net/wireless/ath/ath11k/mhi.c | 12 +--- drivers/net/wireless/ath/ath11k/mhi.h | 3 +- drivers/net/wireless/ath/ath11k/pci.c | 44 +++----------- drivers/net/wireless/ath/ath11k/qmi.c | 2 +- 8 files changed, 49 insertions(+), 139 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index 634d385fd9ad..97b12f51ef28 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -413,7 +413,7 @@ static int ath11k_ahb_power_up(struct ath11k_base *ab) return ret; } -static void ath11k_ahb_power_down(struct ath11k_base *ab, bool is_suspend) +static void ath11k_ahb_power_down(struct ath11k_base *ab) { struct ath11k_ahb *ab_ahb = ath11k_ahb_priv(ab); @@ -1280,7 +1280,7 @@ static void ath11k_ahb_remove(struct platform_device *pdev) struct ath11k_base *ab = platform_get_drvdata(pdev); if (test_bit(ATH11K_FLAG_QMI_FAIL, &ab->dev_flags)) { - ath11k_ahb_power_down(ab, false); + ath11k_ahb_power_down(ab); ath11k_debugfs_soc_destroy(ab); ath11k_qmi_deinit_service(ab); goto qmi_fail; diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 325b930aaf06..ccf4ad35fdc3 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -906,6 +906,12 @@ int ath11k_core_suspend(struct ath11k_base *ab) return ret; } + ret = ath11k_wow_enable(ab); + if (ret) { + ath11k_warn(ab, "failed to enable wow during suspend: %d\n", ret); + return ret; + } + ret = ath11k_dp_rx_pktlog_stop(ab, false); if (ret) { ath11k_warn(ab, "failed to stop dp rx pktlog during suspend: %d\n", @@ -916,85 +922,29 @@ int ath11k_core_suspend(struct ath11k_base *ab) ath11k_ce_stop_shadow_timers(ab); ath11k_dp_stop_shadow_timers(ab); - /* PM framework skips suspend_late/resume_early callbacks - * if other devices report errors in their suspend callbacks. - * However ath11k_core_resume() would still be called because - * here we return success thus kernel put us on dpm_suspended_list. - * Since we won't go through a power down/up cycle, there is - * no chance to call complete(&ab->restart_completed) in - * ath11k_core_restart(), making ath11k_core_resume() timeout. - * So call it here to avoid this issue. This also works in case - * no error happens thus suspend_late/resume_early get called, - * because it will be reinitialized in ath11k_core_resume_early(). - */ - complete(&ab->restart_completed); - - return 0; -} -EXPORT_SYMBOL(ath11k_core_suspend); - -int ath11k_core_suspend_late(struct ath11k_base *ab) -{ - struct ath11k_pdev *pdev; - struct ath11k *ar; - - if (!ab->hw_params.supports_suspend) - return -EOPNOTSUPP; - - /* so far single_pdev_only chips have supports_suspend as true - * and only the first pdev is valid. - */ - pdev = ath11k_core_get_single_pdev(ab); - ar = pdev->ar; - if (!ar || ar->state != ATH11K_STATE_OFF) - return 0; - ath11k_hif_irq_disable(ab); ath11k_hif_ce_irq_disable(ab); - ath11k_hif_power_down(ab, true); + ret = ath11k_hif_suspend(ab); + if (ret) { + ath11k_warn(ab, "failed to suspend hif: %d\n", ret); + return ret; + } return 0; } -EXPORT_SYMBOL(ath11k_core_suspend_late); - -int ath11k_core_resume_early(struct ath11k_base *ab) -{ - int ret; - struct ath11k_pdev *pdev; - struct ath11k *ar; - - if (!ab->hw_params.supports_suspend) - return -EOPNOTSUPP; - - /* so far single_pdev_only chips have supports_suspend as true - * and only the first pdev is valid. - */ - pdev = ath11k_core_get_single_pdev(ab); - ar = pdev->ar; - if (!ar || ar->state != ATH11K_STATE_OFF) - return 0; - - reinit_completion(&ab->restart_completed); - ret = ath11k_hif_power_up(ab); - if (ret) - ath11k_warn(ab, "failed to power up hif during resume: %d\n", ret); - - return ret; -} -EXPORT_SYMBOL(ath11k_core_resume_early); +EXPORT_SYMBOL(ath11k_core_suspend); int ath11k_core_resume(struct ath11k_base *ab) { int ret; struct ath11k_pdev *pdev; struct ath11k *ar; - long time_left; if (!ab->hw_params.supports_suspend) return -EOPNOTSUPP; - /* so far single_pdev_only chips have supports_suspend as true + /* so far signle_pdev_only chips have supports_suspend as true * and only the first pdev is valid. */ pdev = ath11k_core_get_single_pdev(ab); @@ -1002,19 +952,29 @@ int ath11k_core_resume(struct ath11k_base *ab) if (!ar || ar->state != ATH11K_STATE_OFF) return 0; - time_left = wait_for_completion_timeout(&ab->restart_completed, - ATH11K_RESET_TIMEOUT_HZ); - if (time_left == 0) { - ath11k_warn(ab, "timeout while waiting for restart complete"); - return -ETIMEDOUT; + ret = ath11k_hif_resume(ab); + if (ret) { + ath11k_warn(ab, "failed to resume hif during resume: %d\n", ret); + return ret; } + ath11k_hif_ce_irq_enable(ab); + ath11k_hif_irq_enable(ab); + ret = ath11k_dp_rx_pktlog_start(ab); - if (ret) + if (ret) { ath11k_warn(ab, "failed to start rx pktlog during resume: %d\n", ret); + return ret; + } - return ret; + ret = ath11k_wow_wakeup(ab); + if (ret) { + ath11k_warn(ab, "failed to wakeup wow during resume: %d\n", ret); + return ret; + } + + return 0; } EXPORT_SYMBOL(ath11k_core_resume); @@ -2109,8 +2069,6 @@ static void ath11k_core_restart(struct work_struct *work) if (!ab->is_reset) ath11k_core_post_reconfigure_recovery(ab); - - complete(&ab->restart_completed); } static void ath11k_core_reset(struct work_struct *work) @@ -2180,7 +2138,7 @@ static void ath11k_core_reset(struct work_struct *work) ath11k_hif_irq_disable(ab); ath11k_hif_ce_irq_disable(ab); - ath11k_hif_power_down(ab, false); + ath11k_hif_power_down(ab); ath11k_hif_power_up(ab); ath11k_dbg(ab, ATH11K_DBG_BOOT, "reset started\n"); @@ -2253,7 +2211,7 @@ void ath11k_core_deinit(struct ath11k_base *ab) mutex_unlock(&ab->core_lock); - ath11k_hif_power_down(ab, false); + ath11k_hif_power_down(ab); ath11k_mac_destroy(ab); ath11k_core_soc_destroy(ab); ath11k_fw_destroy(ab); @@ -2306,7 +2264,6 @@ struct ath11k_base *ath11k_core_alloc(struct device *dev, size_t priv_size, timer_setup(&ab->rx_replenish_retry, ath11k_ce_rx_replenish_retry, 0); init_completion(&ab->htc_suspend); init_completion(&ab->wow.wakeup_completed); - init_completion(&ab->restart_completed); ab->dev = dev; ab->hif.bus = bus; diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index df24f0e409af..b655967a465b 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -1036,8 +1036,6 @@ struct ath11k_base { DECLARE_BITMAP(fw_features, ATH11K_FW_FEATURE_COUNT); } fw; - struct completion restart_completed; - #ifdef CONFIG_NL80211_TESTMODE struct { u32 data_pos; @@ -1237,10 +1235,8 @@ void ath11k_core_free_bdf(struct ath11k_base *ab, struct ath11k_board_data *bd); int ath11k_core_check_dt(struct ath11k_base *ath11k); int ath11k_core_check_smbios(struct ath11k_base *ab); void ath11k_core_halt(struct ath11k *ar); -int ath11k_core_resume_early(struct ath11k_base *ab); int ath11k_core_resume(struct ath11k_base *ab); int ath11k_core_suspend(struct ath11k_base *ab); -int ath11k_core_suspend_late(struct ath11k_base *ab); void ath11k_core_pre_reconfigure_recovery(struct ath11k_base *ab); bool ath11k_core_coldboot_cal_support(struct ath11k_base *ab); diff --git a/drivers/net/wireless/ath/ath11k/hif.h b/drivers/net/wireless/ath/ath11k/hif.h index c4c6cc09c7c1..674ff772b181 100644 --- a/drivers/net/wireless/ath/ath11k/hif.h +++ b/drivers/net/wireless/ath/ath11k/hif.h @@ -18,7 +18,7 @@ struct ath11k_hif_ops { int (*start)(struct ath11k_base *ab); void (*stop)(struct ath11k_base *ab); int (*power_up)(struct ath11k_base *ab); - void (*power_down)(struct ath11k_base *ab, bool is_suspend); + void (*power_down)(struct ath11k_base *ab); int (*suspend)(struct ath11k_base *ab); int (*resume)(struct ath11k_base *ab); int (*map_service_to_pipe)(struct ath11k_base *ab, u16 service_id, @@ -67,18 +67,12 @@ static inline void ath11k_hif_irq_disable(struct ath11k_base *ab) static inline int ath11k_hif_power_up(struct ath11k_base *ab) { - if (!ab->hif.ops->power_up) - return -EOPNOTSUPP; - return ab->hif.ops->power_up(ab); } -static inline void ath11k_hif_power_down(struct ath11k_base *ab, bool is_suspend) +static inline void ath11k_hif_power_down(struct ath11k_base *ab) { - if (!ab->hif.ops->power_down) - return; - - ab->hif.ops->power_down(ab, is_suspend); + ab->hif.ops->power_down(ab); } static inline int ath11k_hif_suspend(struct ath11k_base *ab) diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c index ab182690aed3..6974a551883f 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c @@ -453,17 +453,9 @@ int ath11k_mhi_start(struct ath11k_pci *ab_pci) return 0; } -void ath11k_mhi_stop(struct ath11k_pci *ab_pci, bool is_suspend) +void ath11k_mhi_stop(struct ath11k_pci *ab_pci) { - /* During suspend we need to use mhi_power_down_keep_dev() - * workaround, otherwise ath11k_core_resume() will timeout - * during resume. - */ - if (is_suspend) - mhi_power_down_keep_dev(ab_pci->mhi_ctrl, true); - else - mhi_power_down(ab_pci->mhi_ctrl, true); - + mhi_power_down(ab_pci->mhi_ctrl, true); mhi_unprepare_after_power_down(ab_pci->mhi_ctrl); } diff --git a/drivers/net/wireless/ath/ath11k/mhi.h b/drivers/net/wireless/ath/ath11k/mhi.h index 2d567705e732..a682aad52fc5 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.h +++ b/drivers/net/wireless/ath/ath11k/mhi.h @@ -18,7 +18,7 @@ #define MHICTRL_RESET_MASK 0x2 int ath11k_mhi_start(struct ath11k_pci *ar_pci); -void ath11k_mhi_stop(struct ath11k_pci *ar_pci, bool is_suspend); +void ath11k_mhi_stop(struct ath11k_pci *ar_pci); int ath11k_mhi_register(struct ath11k_pci *ar_pci); void ath11k_mhi_unregister(struct ath11k_pci *ar_pci); void ath11k_mhi_set_mhictrl_reset(struct ath11k_base *ab); @@ -26,4 +26,5 @@ void ath11k_mhi_clear_vector(struct ath11k_base *ab); int ath11k_mhi_suspend(struct ath11k_pci *ar_pci); int ath11k_mhi_resume(struct ath11k_pci *ar_pci); + #endif diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c index 8d63b84d1261..be9d2c69cc41 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c @@ -638,7 +638,7 @@ static int ath11k_pci_power_up(struct ath11k_base *ab) return 0; } -static void ath11k_pci_power_down(struct ath11k_base *ab, bool is_suspend) +static void ath11k_pci_power_down(struct ath11k_base *ab) { struct ath11k_pci *ab_pci = ath11k_pci_priv(ab); @@ -649,7 +649,7 @@ static void ath11k_pci_power_down(struct ath11k_base *ab, bool is_suspend) ath11k_pci_msi_disable(ab_pci); - ath11k_mhi_stop(ab_pci, is_suspend); + ath11k_mhi_stop(ab_pci); clear_bit(ATH11K_FLAG_DEVICE_INIT_DONE, &ab->dev_flags); ath11k_pci_sw_reset(ab_pci->ab, false); } @@ -970,7 +970,7 @@ static void ath11k_pci_remove(struct pci_dev *pdev) ath11k_pci_set_irq_affinity_hint(ab_pci, NULL); if (test_bit(ATH11K_FLAG_QMI_FAIL, &ab->dev_flags)) { - ath11k_pci_power_down(ab, false); + ath11k_pci_power_down(ab); ath11k_debugfs_soc_destroy(ab); ath11k_qmi_deinit_service(ab); goto qmi_fail; @@ -998,7 +998,7 @@ static void ath11k_pci_shutdown(struct pci_dev *pdev) struct ath11k_pci *ab_pci = ath11k_pci_priv(ab); ath11k_pci_set_irq_affinity_hint(ab_pci, NULL); - ath11k_pci_power_down(ab, false); + ath11k_pci_power_down(ab); } static __maybe_unused int ath11k_pci_pm_suspend(struct device *dev) @@ -1035,39 +1035,9 @@ static __maybe_unused int ath11k_pci_pm_resume(struct device *dev) return ret; } -static __maybe_unused int ath11k_pci_pm_suspend_late(struct device *dev) -{ - struct ath11k_base *ab = dev_get_drvdata(dev); - int ret; - - ret = ath11k_core_suspend_late(ab); - if (ret) - ath11k_warn(ab, "failed to late suspend core: %d\n", ret); - - /* Similar to ath11k_pci_pm_suspend(), we return success here - * even error happens, to allow system suspend/hibernation survive. - */ - return 0; -} - -static __maybe_unused int ath11k_pci_pm_resume_early(struct device *dev) -{ - struct ath11k_base *ab = dev_get_drvdata(dev); - int ret; - - ret = ath11k_core_resume_early(ab); - if (ret) - ath11k_warn(ab, "failed to early resume core: %d\n", ret); - - return ret; -} - -static const struct dev_pm_ops __maybe_unused ath11k_pci_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(ath11k_pci_pm_suspend, - ath11k_pci_pm_resume) - SET_LATE_SYSTEM_SLEEP_PM_OPS(ath11k_pci_pm_suspend_late, - ath11k_pci_pm_resume_early) -}; +static SIMPLE_DEV_PM_OPS(ath11k_pci_pm_ops, + ath11k_pci_pm_suspend, + ath11k_pci_pm_resume); static struct pci_driver ath11k_pci_driver = { .name = "ath11k_pci", diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c index 1bc648920ab6..f477afd325de 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.c +++ b/drivers/net/wireless/ath/ath11k/qmi.c @@ -2877,7 +2877,7 @@ int ath11k_qmi_fwreset_from_cold_boot(struct ath11k_base *ab) } /* reset the firmware */ - ath11k_hif_power_down(ab, false); + ath11k_hif_power_down(ab); ath11k_hif_power_up(ab); ath11k_dbg(ab, ATH11K_DBG_QMI, "exit wait for cold boot done\n"); return 0; -- cgit From c346c629765ab982967017e2ae859156d0e235cf Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 28 Aug 2024 19:14:11 +0300 Subject: btrfs: qgroup: don't use extent changeset when not needed The local extent changeset is passed to clear_record_extent_bits() where it may have some additional memory dynamically allocated for ulist. When qgroup is disabled, the memory is leaked because in this case the changeset is not released upon __btrfs_qgroup_release_data() return. Since the recorded contents of the changeset are not used thereafter, just don't pass it. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Reported-by: syzbot+81670362c283f3dd889c@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/000000000000aa8c0c060ade165e@google.com Fixes: af0e2aab3b70 ("btrfs: qgroup: flush reservations during quota disable") CC: stable@vger.kernel.org # 6.10+ Reviewed-by: Boris Burkov Reviewed-by: Qu Wenruo Signed-off-by: Fedor Pchelkin Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 7d6f5d9420ec..feb8f9f2f358 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4346,10 +4346,9 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, int ret; if (btrfs_qgroup_mode(inode->root->fs_info) == BTRFS_QGROUP_MODE_DISABLED) { - extent_changeset_init(&changeset); return clear_record_extent_bits(&inode->io_tree, start, start + len - 1, - EXTENT_QGROUP_RESERVED, &changeset); + EXTENT_QGROUP_RESERVED, NULL); } /* In release case, we shouldn't have @reserved */ -- cgit From 1c7fb536e899a2f66f9b1719a0234570dda2e634 Mon Sep 17 00:00:00 2001 From: Veronika Molnarova Date: Thu, 8 Aug 2024 12:37:49 +0200 Subject: perf test pmu: Set uninitialized PMU alias to null Commit 3e0bf9fde2984469 ("perf pmu: Restore full PMU name wildcard support") adds a test case "PMU cmdline match" that covers PMU name wildcard support provided by function perf_pmu__match(). The test works with a wide range of supported combinations of PMU name matching but omits the case that if the perf_pmu__match() cannot match the PMU name to the wildcard, it tries to match its alias. However, this variable is not set up, causing the test case to fail when run with subprocesses or to segfault if run as a single process. ./perf test -vv 9 9: Sysfs PMU tests : 9.1: Parsing with PMU format directory : Ok 9.2: Parsing with PMU event : Ok 9.3: PMU event names : Ok 9.4: PMU name combining : Ok 9.5: PMU name comparison : Ok 9.6: PMU cmdline match : FAILED! ./perf test -F 9 9.1: Parsing with PMU format directory : Ok 9.2: Parsing with PMU event : Ok 9.3: PMU event names : Ok 9.4: PMU name combining : Ok 9.5: PMU name comparison : Ok Segmentation fault (core dumped) Initialize the PMU alias to null for all tests of perf_pmu__match() as this functionality is not being tested and the alias matching works exactly the same as the matching of the PMU name. ./perf test -F 9 9.1: Parsing with PMU format directory : Ok 9.2: Parsing with PMU event : Ok 9.3: PMU event names : Ok 9.4: PMU name combining : Ok 9.5: PMU name comparison : Ok 9.6: PMU cmdline match : Ok Fixes: 3e0bf9fde2984469 ("perf pmu: Restore full PMU name wildcard support") Signed-off-by: Veronika Molnarova Cc: james.clark@arm.com Cc: mpetlan@redhat.com Cc: rstoyano@redhat.com Link: https://lore.kernel.org/r/20240808103749.9356-1-vmolnaro@redhat.com Signed-off-by: Namhyung Kim --- tools/perf/tests/pmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 40132655ccd1..c76f53a90a7b 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -456,11 +456,13 @@ static int test__name_cmp(struct test_suite *test __maybe_unused, int subtest __ /** * Test perf_pmu__match() that's used to search for a PMU given a name passed * on the command line. The name that's passed may also be a filename type glob - * match. + * match. If the name does not match, perf_pmu__match() attempts to match the + * alias of the PMU, if provided. */ static int test__pmu_match(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { struct perf_pmu test_pmu; + test_pmu.alias_name = NULL; test_pmu.name = "pmuname"; TEST_ASSERT_EQUAL("Exact match", perf_pmu__match(&test_pmu, "pmuname"), true); -- cgit From 287bd5cf06e0f2c02293ce942777ad1f18059ed3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 27 Aug 2024 22:29:53 -0700 Subject: perf lock contention: Fix spinlock and rwlock accounting The spinlock and rwlock use a single-element per-cpu array to track current locks due to performance reason. But this means the key is always available and it cannot simply account lock stats in the array because some of them are invalid. In fact, the contention_end() program in the BPF invalidates the entry by setting the 'lock' value to 0 instead of deleting the entry for the hashmap. So it should skip entries with the lock value of 0 in the account_end_timestamp(). Otherwise, it'd have spurious high contention on an idle machine: $ sudo perf lock con -ab -Y spinlock sleep 3 contended total wait max wait avg wait type caller 8 4.72 s 1.84 s 590.46 ms spinlock rcu_core+0xc7 8 1.87 s 1.87 s 233.48 ms spinlock process_one_work+0x1b5 2 1.87 s 1.87 s 933.92 ms spinlock worker_thread+0x1a2 3 1.81 s 1.81 s 603.93 ms spinlock tmigr_update_events+0x13c 2 1.72 s 1.72 s 861.98 ms spinlock tick_do_update_jiffies64+0x25 6 42.48 us 13.02 us 7.08 us spinlock futex_q_lock+0x2a 1 13.03 us 13.03 us 13.03 us spinlock futex_wake+0xce 1 11.61 us 11.61 us 11.61 us spinlock rcu_core+0xc7 I don't believe it has contention on a spinlock longer than 1 second. After this change, it only reports some small contentions. $ sudo perf lock con -ab -Y spinlock sleep 3 contended total wait max wait avg wait type caller 4 133.51 us 43.29 us 33.38 us spinlock tick_do_update_jiffies64+0x25 4 69.06 us 31.82 us 17.27 us spinlock process_one_work+0x1b5 2 50.66 us 25.77 us 25.33 us spinlock rcu_core+0xc7 1 28.45 us 28.45 us 28.45 us spinlock rcu_core+0xc7 1 24.77 us 24.77 us 24.77 us spinlock tmigr_update_events+0x13c 1 23.34 us 23.34 us 23.34 us spinlock raw_spin_rq_lock_nested+0x15 Fixes: b5711042a1c8 ("perf lock contention: Use per-cpu array map for spinlocks") Reported-by: Xi Wang Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20240828052953.1445862-1-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/util/bpf_lock_contention.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index b4cb3fe5cc25..bc4e92c0c08b 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -286,6 +286,9 @@ static void account_end_timestamp(struct lock_contention *con) goto next; for (int i = 0; i < total_cpus; i++) { + if (cpu_data[i].lock == 0) + continue; + update_lock_stat(stat_fd, -1, end_ts, aggr_mode, &cpu_data[i]); } -- cgit From aee1d55922977bf9282398283a72d38fc5514540 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 19 Aug 2024 10:34:03 +0800 Subject: perf python: include "util/sample.h" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 32-bit arm build system will complain: tools/perf/util/python.c:75:28: error: field ‘sample’ has incomplete type 75 | struct perf_sample sample; However, arm64 build system doesn't complain this. The root cause is arm64 define "HAVE_KVM_STAT_SUPPORT := 1" in tools/perf/arch/arm64/Makefile, but arm arch doesn't define this. This will lead to kvm-stat.h include other header files on arm64 build system, especially "util/sample.h" for util/python.c. This will try to directly include "util/sample.h" for "util/python.c" to avoid such build issue on arm platform. Signed-off-by: Xu Yang Cc: imx@lists.linux.dev Link: https://lore.kernel.org/r/20240819023403.201324-1-xu.yang_2@nxp.com Signed-off-by: Namhyung Kim --- tools/perf/util/python.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 3be882b2e845..31a223eaf8e6 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -20,6 +20,7 @@ #include "util/env.h" #include "util/kvm-stat.h" #include "util/kwork.h" +#include "util/sample.h" #include "util/lock-contention.h" #include #include "../builtin.h" -- cgit From e162cb25c410afc42051a582c46a47dde597f51c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Aug 2024 21:43:01 -0300 Subject: perf daemon: Fix the build on more 32-bit architectures FYI: I'm carrying this on perf-tools-next. The previous attempt fixed the build on debian:experimental-x-mipsel, but when building on a larger set of containers I noticed it broke the build on some other 32-bit architectures such as: 42 7.87 ubuntu:18.04-x-arm : FAIL gcc version 7.5.0 (Ubuntu/Linaro 7.5.0-3ubuntu1~18.04) builtin-daemon.c: In function 'cmd_session_list': builtin-daemon.c:692:16: error: format '%llu' expects argument of type 'long long unsigned int', but argument 4 has type 'long int' [-Werror=format=] fprintf(out, "%c%" PRIu64, ^~~~~ builtin-daemon.c:694:13: csv_sep, (curr - daemon->start) / 60); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from builtin-daemon.c:3:0: /usr/arm-linux-gnueabihf/include/inttypes.h:105:34: note: format string is defined here # define PRIu64 __PRI64_PREFIX "u" So lets cast that time_t (32-bit/64-bit) to uint64_t to make sure it builds everywhere. Fixes: 4bbe6002931954bb ("perf daemon: Fix the build on 32-bit architectures") Signed-off-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/ZsPmldtJ0D9Cua9_@x1 Signed-off-by: Namhyung Kim --- tools/perf/builtin-daemon.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 5c9335fff2d3..9a95871afc95 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -691,7 +691,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) fprintf(out, "%c%" PRIu64, /* session up time */ - csv_sep, (curr - daemon->start) / 60); + csv_sep, (uint64_t)((curr - daemon->start) / 60)); fprintf(out, "\n"); } else { @@ -702,7 +702,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) fprintf(out, " lock: %s/lock\n", daemon->base); fprintf(out, " up: %" PRIu64 " minutes\n", - (curr - daemon->start) / 60); + (uint64_t)((curr - daemon->start) / 60)); } } @@ -730,7 +730,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) fprintf(out, "%c%" PRIu64, /* session up time */ - csv_sep, (curr - session->start) / 60); + csv_sep, (uint64_t)((curr - session->start) / 60)); fprintf(out, "\n"); } else { @@ -747,7 +747,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) fprintf(out, " ack: %s/%s\n", session->base, SESSION_ACK); fprintf(out, " up: %" PRIu64 " minutes\n", - (curr - session->start) / 60); + (uint64_t)((curr - session->start) / 60)); } } -- cgit From b1934cd6069538db2255dc94ba573771ecf3b560 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Sat, 31 Aug 2024 01:32:49 +0900 Subject: btrfs: zoned: handle broken write pointer on zones Btrfs rejects to mount a FS if it finds a block group with a broken write pointer (e.g, unequal write pointers on two zones of RAID1 block group). Since such case can happen easily with a power-loss or crash of a system, we need to handle the case more gently. Handle such block group by making it unallocatable, so that there will be no writes into it. That can be done by setting the allocation pointer at the end of allocating region (= block_group->zone_capacity). Then, existing code handle zone_unusable properly. Having proper zone_capacity is necessary for the change. So, set it as fast as possible. We cannot handle RAID0 and RAID10 case like this. But, they are anyway unable to read because of a missing stripe. Fixes: 265f7237dd25 ("btrfs: zoned: allow DUP on meta-data block groups") Fixes: 568220fa9657 ("btrfs: zoned: support RAID0/1/10 on top of raid stripe tree") CC: stable@vger.kernel.org # 6.1+ Reported-by: HAN Yuwei Cc: Xuefer Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 66f63e82af79..047e3337852e 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1406,6 +1406,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, return -EINVAL; } + bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity); + if (zone_info[0].alloc_offset == WP_MISSING_DEV) { btrfs_err(bg->fs_info, "zoned: cannot recover write pointer for zone %llu", @@ -1432,7 +1434,6 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, } bg->alloc_offset = zone_info[0].alloc_offset; - bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity); return 0; } @@ -1450,6 +1451,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, return -EINVAL; } + /* In case a device is missing we have a cap of 0, so don't use it. */ + bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity); + for (i = 0; i < map->num_stripes; i++) { if (zone_info[i].alloc_offset == WP_MISSING_DEV || zone_info[i].alloc_offset == WP_CONVENTIONAL) @@ -1471,9 +1475,6 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, if (test_bit(0, active)) set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); } - /* In case a device is missing we have a cap of 0, so don't use it. */ - bg->zone_capacity = min_not_zero(zone_info[0].capacity, - zone_info[1].capacity); } if (zone_info[0].alloc_offset != WP_MISSING_DEV) @@ -1563,6 +1564,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) unsigned long *active = NULL; u64 last_alloc = 0; u32 num_sequential = 0, num_conventional = 0; + u64 profile; if (!btrfs_is_zoned(fs_info)) return 0; @@ -1623,7 +1625,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } } - switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK; + switch (profile) { case 0: /* single */ ret = btrfs_load_block_group_single(cache, &zone_info[0], active); break; @@ -1650,6 +1653,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } + if (ret == -EIO && profile != 0 && profile != BTRFS_BLOCK_GROUP_RAID0 && + profile != BTRFS_BLOCK_GROUP_RAID10) { + /* + * Detected broken write pointer. Make this block group + * unallocatable by setting the allocation pointer at the end of + * allocatable region. Relocating this block group will fix the + * mismatch. + * + * Currently, we cannot handle RAID0 or RAID10 case like this + * because we don't have a proper zone_capacity value. But, + * reading from this block group won't work anyway by a missing + * stripe. + */ + cache->alloc_offset = cache->zone_capacity; + ret = 0; + } + out: /* Reject non SINGLE data profiles without RST */ if ((map->type & BTRFS_BLOCK_GROUP_DATA) && -- cgit From 5e24db550bd6f484d2c7687ee488708260e1f84a Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 29 Aug 2024 15:03:19 +0300 Subject: net: ethernet: ti: am65-cpsw: fix XDP_DROP, XDP_TX and XDP_REDIRECT The following XDP_DROP test from [1] stalls the interface after 250 packets. ~# xdb-bench drop -m native eth0 This is because new RX requests are never queued. Fix that. The below XDP_TX test from [1] fails with a warning [ 499.947381] XDP_WARN: xdp_update_frame_from_buff(line:277): Driver BUG: missing reserved tailroom ~# xdb-bench tx -m native eth0 Fix that by using PAGE_SIZE during xdp_init_buf(). In XDP_REDIRECT case only 1 packet was processed in rx_poll. Fix it to process up to budget packets. Fix all XDP error cases to call trace_xdp_exception() and drop the packet in am65_cpsw_run_xdp(). [1] xdp-tools suite https://github.com/xdp-project/xdp-tools Fixes: 8acacc40f733 ("net: ethernet: ti: am65-cpsw: Add minimal XDP support") Signed-off-by: Roger Quadros Reviewed-by: Jacob Keller Acked-by: Julien Panis Reviewed-by: MD Danish Anwar Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 62 +++++++++++++++++--------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 81d9f21086ec..9fd2ba26716c 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -156,12 +156,13 @@ #define AM65_CPSW_CPPI_TX_PKT_TYPE 0x7 /* XDP */ -#define AM65_CPSW_XDP_CONSUMED 2 -#define AM65_CPSW_XDP_REDIRECT 1 +#define AM65_CPSW_XDP_CONSUMED BIT(1) +#define AM65_CPSW_XDP_REDIRECT BIT(0) #define AM65_CPSW_XDP_PASS 0 /* Include headroom compatible with both skb and xdpf */ -#define AM65_CPSW_HEADROOM (max(NET_SKB_PAD, XDP_PACKET_HEADROOM) + NET_IP_ALIGN) +#define AM65_CPSW_HEADROOM_NA (max(NET_SKB_PAD, XDP_PACKET_HEADROOM) + NET_IP_ALIGN) +#define AM65_CPSW_HEADROOM ALIGN(AM65_CPSW_HEADROOM_NA, sizeof(long)) static void am65_cpsw_port_set_sl_mac(struct am65_cpsw_port *slave, const u8 *dev_addr) @@ -933,7 +934,7 @@ static int am65_cpsw_xdp_tx_frame(struct net_device *ndev, host_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); if (unlikely(!host_desc)) { ndev->stats.tx_dropped++; - return -ENOMEM; + return AM65_CPSW_XDP_CONSUMED; /* drop */ } am65_cpsw_nuss_set_buf_type(tx_chn, host_desc, buf_type); @@ -942,7 +943,7 @@ static int am65_cpsw_xdp_tx_frame(struct net_device *ndev, pkt_len, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(tx_chn->dma_dev, dma_buf))) { ndev->stats.tx_dropped++; - ret = -ENOMEM; + ret = AM65_CPSW_XDP_CONSUMED; /* drop */ goto pool_free; } @@ -977,6 +978,7 @@ static int am65_cpsw_xdp_tx_frame(struct net_device *ndev, /* Inform BQL */ netdev_tx_completed_queue(netif_txq, 1, pkt_len); ndev->stats.tx_errors++; + ret = AM65_CPSW_XDP_CONSUMED; /* drop */ goto dma_unmap; } @@ -1004,6 +1006,7 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_common *common, struct bpf_prog *prog; struct page *page; u32 act; + int err; prog = READ_ONCE(port->xdp_prog); if (!prog) @@ -1023,14 +1026,14 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_common *common, xdpf = xdp_convert_buff_to_frame(xdp); if (unlikely(!xdpf)) - break; + goto drop; __netif_tx_lock(netif_txq, cpu); - ret = am65_cpsw_xdp_tx_frame(ndev, tx_chn, xdpf, + err = am65_cpsw_xdp_tx_frame(ndev, tx_chn, xdpf, AM65_CPSW_TX_BUF_TYPE_XDP_TX); __netif_tx_unlock(netif_txq); - if (ret) - break; + if (err) + goto drop; ndev->stats.rx_bytes += *len; ndev->stats.rx_packets++; @@ -1038,7 +1041,7 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_common *common, goto out; case XDP_REDIRECT: if (unlikely(xdp_do_redirect(ndev, xdp, prog))) - break; + goto drop; ndev->stats.rx_bytes += *len; ndev->stats.rx_packets++; @@ -1048,6 +1051,7 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_common *common, bpf_warn_invalid_xdp_action(ndev, prog, act); fallthrough; case XDP_ABORTED: +drop: trace_xdp_exception(ndev, prog, act); fallthrough; case XDP_DROP: @@ -1056,7 +1060,6 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_common *common, page = virt_to_head_page(xdp->data); am65_cpsw_put_page(rx_chn, page, true, desc_idx); - out: return ret; } @@ -1095,7 +1098,7 @@ static void am65_cpsw_nuss_rx_csum(struct sk_buff *skb, u32 csum_info) } static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, - u32 flow_idx, int cpu) + u32 flow_idx, int cpu, int *xdp_state) { struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns; u32 buf_dma_len, pkt_len, port_id = 0, csum_info; @@ -1114,6 +1117,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, void **swdata; u32 *psdata; + *xdp_state = AM65_CPSW_XDP_PASS; ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_idx, &desc_dma); if (ret) { if (ret != -ENODATA) @@ -1161,15 +1165,13 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, } if (port->xdp_prog) { - xdp_init_buff(&xdp, AM65_CPSW_MAX_PACKET_SIZE, &port->xdp_rxq); - - xdp_prepare_buff(&xdp, page_addr, skb_headroom(skb), + xdp_init_buff(&xdp, PAGE_SIZE, &port->xdp_rxq); + xdp_prepare_buff(&xdp, page_addr, AM65_CPSW_HEADROOM, pkt_len, false); - - ret = am65_cpsw_run_xdp(common, port, &xdp, desc_idx, - cpu, &pkt_len); - if (ret != AM65_CPSW_XDP_PASS) - return ret; + *xdp_state = am65_cpsw_run_xdp(common, port, &xdp, desc_idx, + cpu, &pkt_len); + if (*xdp_state != AM65_CPSW_XDP_PASS) + goto allocate; /* Compute additional headroom to be reserved */ headroom = (xdp.data - xdp.data_hard_start) - skb_headroom(skb); @@ -1193,9 +1195,13 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, stats->rx_bytes += pkt_len; u64_stats_update_end(&stats->syncp); +allocate: new_page = page_pool_dev_alloc_pages(rx_chn->page_pool); - if (unlikely(!new_page)) + if (unlikely(!new_page)) { + dev_err(dev, "page alloc failed\n"); return -ENOMEM; + } + rx_chn->pages[desc_idx] = new_page; if (netif_dormant(ndev)) { @@ -1229,8 +1235,9 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) struct am65_cpsw_common *common = am65_cpsw_napi_to_common(napi_rx); int flow = AM65_CPSW_MAX_RX_FLOWS; int cpu = smp_processor_id(); - bool xdp_redirect = false; + int xdp_state_or = 0; int cur_budget, ret; + int xdp_state; int num_rx = 0; /* process every flow */ @@ -1238,12 +1245,11 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) cur_budget = budget - num_rx; while (cur_budget--) { - ret = am65_cpsw_nuss_rx_packets(common, flow, cpu); - if (ret) { - if (ret == AM65_CPSW_XDP_REDIRECT) - xdp_redirect = true; + ret = am65_cpsw_nuss_rx_packets(common, flow, cpu, + &xdp_state); + xdp_state_or |= xdp_state; + if (ret) break; - } num_rx++; } @@ -1251,7 +1257,7 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) break; } - if (xdp_redirect) + if (xdp_state_or & AM65_CPSW_XDP_REDIRECT) xdp_do_flush(); dev_dbg(common->dev, "%s num_rx:%d %d\n", __func__, num_rx, budget); -- cgit From 0a50c35277f96481a5a6ed5faf347f282040c57d Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 29 Aug 2024 15:03:20 +0300 Subject: net: ethernet: ti: am65-cpsw: Fix NULL dereference on XDP_TX If number of TX queues are set to 1 we get a NULL pointer dereference during XDP_TX. ~# ethtool -L eth0 tx 1 ~# ./xdp-trafficgen udp -A -a eth0 -t 2 Transmitting on eth0 (ifindex 2) [ 241.135257] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000030 Fix this by using actual TX queues instead of max TX queues when picking the TX channel in am65_cpsw_ndo_xdp_xmit(). Fixes: 8acacc40f733 ("net: ethernet: ti: am65-cpsw: Add minimal XDP support") Signed-off-by: Roger Quadros Reviewed-by: Jacob Keller Acked-by: Julien Panis Reviewed-by: MD Danish Anwar Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 9fd2ba26716c..03577a008df2 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1924,12 +1924,13 @@ static int am65_cpsw_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf) static int am65_cpsw_ndo_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames, u32 flags) { + struct am65_cpsw_common *common = am65_ndev_to_common(ndev); struct am65_cpsw_tx_chn *tx_chn; struct netdev_queue *netif_txq; int cpu = smp_processor_id(); int i, nxmit = 0; - tx_chn = &am65_ndev_to_common(ndev)->tx_chns[cpu % AM65_CPSW_MAX_TX_QUEUES]; + tx_chn = &common->tx_chns[cpu % common->tx_ch_num]; netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); __netif_tx_lock(netif_txq, cpu); -- cgit From 624d3291484f9cada10660f820db926c0bce7741 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 29 Aug 2024 15:03:21 +0300 Subject: net: ethernet: ti: am65-cpsw: Fix RX statistics for XDP_TX and XDP_REDIRECT We are not using ndev->stats for rx_packets and rx_bytes anymore. Instead, we use per CPU stats which are collated in am65_cpsw_nuss_ndo_get_stats(). Fix RX statistics for XDP_TX and XDP_REDIRECT cases. Fixes: 8acacc40f733 ("net: ethernet: ti: am65-cpsw: Add minimal XDP support") Signed-off-by: Roger Quadros Reviewed-by: Jacob Keller Acked-by: Julien Panis Reviewed-by: MD Danish Anwar Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 03577a008df2..b06b8872b4eb 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -998,7 +998,9 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_common *common, int desc_idx, int cpu, int *len) { struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns; + struct am65_cpsw_ndev_priv *ndev_priv; struct net_device *ndev = port->ndev; + struct am65_cpsw_ndev_stats *stats; int ret = AM65_CPSW_XDP_CONSUMED; struct am65_cpsw_tx_chn *tx_chn; struct netdev_queue *netif_txq; @@ -1016,6 +1018,9 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_common *common, /* XDP prog might have changed packet data and boundaries */ *len = xdp->data_end - xdp->data; + ndev_priv = netdev_priv(ndev); + stats = this_cpu_ptr(ndev_priv->stats); + switch (act) { case XDP_PASS: ret = AM65_CPSW_XDP_PASS; @@ -1035,16 +1040,20 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_common *common, if (err) goto drop; - ndev->stats.rx_bytes += *len; - ndev->stats.rx_packets++; + u64_stats_update_begin(&stats->syncp); + stats->rx_bytes += *len; + stats->rx_packets++; + u64_stats_update_end(&stats->syncp); ret = AM65_CPSW_XDP_CONSUMED; goto out; case XDP_REDIRECT: if (unlikely(xdp_do_redirect(ndev, xdp, prog))) goto drop; - ndev->stats.rx_bytes += *len; - ndev->stats.rx_packets++; + u64_stats_update_begin(&stats->syncp); + stats->rx_bytes += *len; + stats->rx_packets++; + u64_stats_update_end(&stats->syncp); ret = AM65_CPSW_XDP_REDIRECT; goto out; default: -- cgit From 2560db6ede1aaf162a73b2df43e0b6c5ed8819f7 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Fri, 30 Aug 2024 10:20:25 +0800 Subject: net: phy: Fix missing of_node_put() for leds The call of of_get_child_by_name() will cause refcount incremented for leds, if it succeeds, it should call of_node_put() to decrease it, fix it. Fixes: 01e5b728e9e4 ("net: phy: Add a binding for PHY LEDs") Reviewed-by: Jonathan Cameron Signed-off-by: Jinjie Ruan Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20240830022025.610844-1-ruanjinjie@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/phy/phy_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 7752e9386b40..6bb2793de0a9 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3347,11 +3347,13 @@ static int of_phy_leds(struct phy_device *phydev) err = of_phy_led(phydev, led); if (err) { of_node_put(led); + of_node_put(leds); phy_leds_unregister(phydev); return err; } } + of_node_put(leds); return 0; } -- cgit From 213aa670153ed675a007c1f35c5db544b0fefc94 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 31 Aug 2024 14:02:06 +0200 Subject: parisc: Delay write-protection until mark_rodata_ro() call Do not write-protect the kernel read-only and __ro_after_init sections earlier than before mark_rodata_ro() is called. This fixes a boot issue on parisc which is triggered by commit 91a1d97ef482 ("jump_label,module: Don't alloc static_key_mod for __ro_after_init keys"). That commit may modify static key contents in the __ro_after_init section at bootup, so this section needs to be writable at least until mark_rodata_ro() is called. Signed-off-by: Helge Deller Reported-by: matoro Reported-by: Christoph Biedl Tested-by: Christoph Biedl Link: https://lore.kernel.org/linux-parisc/096cad5aada514255cd7b0b9dbafc768@matoro.tk/#r Fixes: 91a1d97ef482 ("jump_label,module: Don't alloc static_key_mod for __ro_after_init keys") Cc: stable@vger.kernel.org # v6.10+ --- arch/parisc/mm/init.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 34d91cb8b259..96970fa75e4a 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -459,7 +459,6 @@ void free_initmem(void) unsigned long kernel_end = (unsigned long)&_end; /* Remap kernel text and data, but do not touch init section yet. */ - kernel_set_to_readonly = true; map_pages(init_end, __pa(init_end), kernel_end - init_end, PAGE_KERNEL, 0); @@ -493,11 +492,18 @@ void free_initmem(void) #ifdef CONFIG_STRICT_KERNEL_RWX void mark_rodata_ro(void) { - /* rodata memory was already mapped with KERNEL_RO access rights by - pagetable_init() and map_pages(). No need to do additional stuff here */ - unsigned long roai_size = __end_ro_after_init - __start_ro_after_init; + unsigned long start = (unsigned long) &__start_rodata; + unsigned long end = (unsigned long) &__end_rodata; + + pr_info("Write protecting the kernel read-only data: %luk\n", + (end - start) >> 10); + + kernel_set_to_readonly = true; + map_pages(start, __pa(start), end - start, PAGE_KERNEL, 0); - pr_info("Write protected read-only-after-init data: %luk\n", roai_size >> 10); + /* force the kernel to see the new page table entries */ + flush_cache_all(); + flush_tlb_all(); } #endif -- cgit From 5478a4f7b94414def7b56d2f18bc2ed9b0f3f1f2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Sep 2024 14:32:27 +0200 Subject: spi: spidev: Add missing spi_device_id for jg10309-01 When the of_device_id entry for "elgin,jg10309-01" was added, the corresponding spi_device_id was forgotten, causing a warning message during boot-up: SPI driver spidev has no spi_device_id for elgin,jg10309-01 Fix module autoloading and shut up the warning by adding the missing entry. Fixes: 5f3eee1eef5d0edd ("spi: spidev: Add an entry for elgin,jg10309-01") Signed-off-by: Geert Uytterhoeven Link: https://patch.msgid.link/54bbb9d8a8db7e52d13e266f2d4a9bcd8b42a98a.1725366625.git.geert+renesas@glider.be Signed-off-by: Mark Brown --- drivers/spi/spidev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 14bf0fa65bef..face93a9cf20 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -702,6 +702,7 @@ static const struct class spidev_class = { static const struct spi_device_id spidev_spi_ids[] = { { .name = "bh2228fv" }, { .name = "dh2228fv" }, + { .name = "jg10309-01" }, { .name = "ltc2488" }, { .name = "sx1301" }, { .name = "bk4" }, -- cgit From d7875b4b078f7e2d862e88aed99c3ea0381aa189 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 29 Aug 2024 11:36:01 -0700 Subject: ptp: ocp: convert serial ports to array Simplify serial port management code by using array of ports and helpers to get the name of the port. This change is needed to make the next patch simplier. Signed-off-by: Vadim Fedorenko Reviewed-by: Greg Kroah-Hartman Signed-off-by: Paolo Abeni --- drivers/ptp/ptp_ocp.c | 120 ++++++++++++++++++++++++-------------------------- 1 file changed, 57 insertions(+), 63 deletions(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index ee2ced88ab34..46369de8e30b 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -316,6 +316,15 @@ struct ptp_ocp_serial_port { #define OCP_SERIAL_LEN 6 #define OCP_SMA_NUM 4 +enum { + PORT_GNSS, + PORT_GNSS2, + PORT_MAC, /* miniature atomic clock */ + PORT_NMEA, + + __PORT_COUNT, +}; + struct ptp_ocp { struct pci_dev *pdev; struct device dev; @@ -357,10 +366,7 @@ struct ptp_ocp { struct delayed_work sync_work; int id; int n_irqs; - struct ptp_ocp_serial_port gnss_port; - struct ptp_ocp_serial_port gnss2_port; - struct ptp_ocp_serial_port mac_port; /* miniature atomic clock */ - struct ptp_ocp_serial_port nmea_port; + struct ptp_ocp_serial_port port[__PORT_COUNT]; bool fw_loader; u8 fw_tag; u16 fw_version; @@ -655,28 +661,28 @@ static struct ocp_resource ocp_fb_resource[] = { }, }, { - OCP_SERIAL_RESOURCE(gnss_port), + OCP_SERIAL_RESOURCE(port[PORT_GNSS]), .offset = 0x00160000 + 0x1000, .irq_vec = 3, .extra = &(struct ptp_ocp_serial_port) { .baud = 115200, }, }, { - OCP_SERIAL_RESOURCE(gnss2_port), + OCP_SERIAL_RESOURCE(port[PORT_GNSS2]), .offset = 0x00170000 + 0x1000, .irq_vec = 4, .extra = &(struct ptp_ocp_serial_port) { .baud = 115200, }, }, { - OCP_SERIAL_RESOURCE(mac_port), + OCP_SERIAL_RESOURCE(port[PORT_MAC]), .offset = 0x00180000 + 0x1000, .irq_vec = 5, .extra = &(struct ptp_ocp_serial_port) { .baud = 57600, }, }, { - OCP_SERIAL_RESOURCE(nmea_port), + OCP_SERIAL_RESOURCE(port[PORT_NMEA]), .offset = 0x00190000 + 0x1000, .irq_vec = 10, }, { @@ -740,7 +746,7 @@ static struct ocp_resource ocp_art_resource[] = { .offset = 0x01000000, .size = 0x10000, }, { - OCP_SERIAL_RESOURCE(gnss_port), + OCP_SERIAL_RESOURCE(port[PORT_GNSS]), .offset = 0x00160000 + 0x1000, .irq_vec = 3, .extra = &(struct ptp_ocp_serial_port) { .baud = 115200, @@ -839,7 +845,7 @@ static struct ocp_resource ocp_art_resource[] = { }, }, { - OCP_SERIAL_RESOURCE(mac_port), + OCP_SERIAL_RESOURCE(port[PORT_MAC]), .offset = 0x00190000, .irq_vec = 7, .extra = &(struct ptp_ocp_serial_port) { .baud = 9600, @@ -950,14 +956,14 @@ static struct ocp_resource ocp_adva_resource[] = { .offset = 0x00220000, .size = 0x1000, }, { - OCP_SERIAL_RESOURCE(gnss_port), + OCP_SERIAL_RESOURCE(port[PORT_GNSS]), .offset = 0x00160000 + 0x1000, .irq_vec = 3, .extra = &(struct ptp_ocp_serial_port) { .baud = 9600, }, }, { - OCP_SERIAL_RESOURCE(mac_port), + OCP_SERIAL_RESOURCE(port[PORT_MAC]), .offset = 0x00180000 + 0x1000, .irq_vec = 5, .extra = &(struct ptp_ocp_serial_port) { .baud = 115200, @@ -1649,6 +1655,15 @@ ptp_ocp_tod_gnss_name(int idx) return gnss_name[idx]; } +static const char * +ptp_ocp_tty_port_name(int idx) +{ + static const char * const tty_name[] = { + "GNSS", "GNSS2", "MAC", "NMEA" + }; + return tty_name[idx]; +} + struct ptp_ocp_nvmem_match_info { struct ptp_ocp *bp; const void * const tag; @@ -3960,16 +3975,11 @@ ptp_ocp_summary_show(struct seq_file *s, void *data) bp = dev_get_drvdata(dev); seq_printf(s, "%7s: /dev/ptp%d\n", "PTP", ptp_clock_index(bp->ptp)); - if (bp->gnss_port.line != -1) - seq_printf(s, "%7s: /dev/ttyS%d\n", "GNSS1", - bp->gnss_port.line); - if (bp->gnss2_port.line != -1) - seq_printf(s, "%7s: /dev/ttyS%d\n", "GNSS2", - bp->gnss2_port.line); - if (bp->mac_port.line != -1) - seq_printf(s, "%7s: /dev/ttyS%d\n", "MAC", bp->mac_port.line); - if (bp->nmea_port.line != -1) - seq_printf(s, "%7s: /dev/ttyS%d\n", "NMEA", bp->nmea_port.line); + for (i = 0; i < __PORT_COUNT; i++) { + if (bp->port[i].line != -1) + seq_printf(s, "%7s: /dev/ttyS%d\n", ptp_ocp_tty_port_name(i), + bp->port[i].line); + } memset(sma_val, 0xff, sizeof(sma_val)); if (bp->sma_map1) { @@ -4279,7 +4289,7 @@ ptp_ocp_dev_release(struct device *dev) static int ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev) { - int err; + int i, err; mutex_lock(&ptp_ocp_lock); err = idr_alloc(&ptp_ocp_idr, bp, 0, 0, GFP_KERNEL); @@ -4292,10 +4302,10 @@ ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev) bp->ptp_info = ptp_ocp_clock_info; spin_lock_init(&bp->lock); - bp->gnss_port.line = -1; - bp->gnss2_port.line = -1; - bp->mac_port.line = -1; - bp->nmea_port.line = -1; + + for (i = 0; i < __PORT_COUNT; i++) + bp->port[i].line = -1; + bp->pdev = pdev; device_initialize(&bp->dev); @@ -4351,23 +4361,15 @@ ptp_ocp_complete(struct ptp_ocp *bp) { struct pps_device *pps; char buf[32]; + int i; - if (bp->gnss_port.line != -1) { - sprintf(buf, "ttyS%d", bp->gnss_port.line); - ptp_ocp_link_child(bp, buf, "ttyGNSS"); - } - if (bp->gnss2_port.line != -1) { - sprintf(buf, "ttyS%d", bp->gnss2_port.line); - ptp_ocp_link_child(bp, buf, "ttyGNSS2"); - } - if (bp->mac_port.line != -1) { - sprintf(buf, "ttyS%d", bp->mac_port.line); - ptp_ocp_link_child(bp, buf, "ttyMAC"); - } - if (bp->nmea_port.line != -1) { - sprintf(buf, "ttyS%d", bp->nmea_port.line); - ptp_ocp_link_child(bp, buf, "ttyNMEA"); + for (i = 0; i < __PORT_COUNT; i++) { + if (bp->port[i].line != -1) { + sprintf(buf, "ttyS%d", bp->port[i].line); + ptp_ocp_link_child(bp, buf, ptp_ocp_tty_port_name(i)); + } } + sprintf(buf, "ptp%d", ptp_clock_index(bp->ptp)); ptp_ocp_link_child(bp, buf, "ptp"); @@ -4416,23 +4418,20 @@ ptp_ocp_info(struct ptp_ocp *bp) }; struct device *dev = &bp->pdev->dev; u32 reg; + int i; ptp_ocp_phc_info(bp); - ptp_ocp_serial_info(dev, "GNSS", bp->gnss_port.line, - bp->gnss_port.baud); - ptp_ocp_serial_info(dev, "GNSS2", bp->gnss2_port.line, - bp->gnss2_port.baud); - ptp_ocp_serial_info(dev, "MAC", bp->mac_port.line, bp->mac_port.baud); - if (bp->nmea_out && bp->nmea_port.line != -1) { - bp->nmea_port.baud = -1; + for (i = 0; i < __PORT_COUNT; i++) { + if (i == PORT_NMEA && bp->nmea_out && bp->port[PORT_NMEA].line != -1) { + bp->port[PORT_NMEA].baud = -1; - reg = ioread32(&bp->nmea_out->uart_baud); - if (reg < ARRAY_SIZE(nmea_baud)) - bp->nmea_port.baud = nmea_baud[reg]; - - ptp_ocp_serial_info(dev, "NMEA", bp->nmea_port.line, - bp->nmea_port.baud); + reg = ioread32(&bp->nmea_out->uart_baud); + if (reg < ARRAY_SIZE(nmea_baud)) + bp->port[PORT_NMEA].baud = nmea_baud[reg]; + } + ptp_ocp_serial_info(dev, ptp_ocp_tty_port_name(i), bp->port[i].line, + bp->port[i].baud); } } @@ -4473,14 +4472,9 @@ ptp_ocp_detach(struct ptp_ocp *bp) for (i = 0; i < 4; i++) if (bp->signal_out[i]) ptp_ocp_unregister_ext(bp->signal_out[i]); - if (bp->gnss_port.line != -1) - serial8250_unregister_port(bp->gnss_port.line); - if (bp->gnss2_port.line != -1) - serial8250_unregister_port(bp->gnss2_port.line); - if (bp->mac_port.line != -1) - serial8250_unregister_port(bp->mac_port.line); - if (bp->nmea_port.line != -1) - serial8250_unregister_port(bp->nmea_port.line); + for (i = 0; i < __PORT_COUNT; i++) + if (bp->port[i].line != -1) + serial8250_unregister_port(bp->port[i].line); platform_device_unregister(bp->spi_flash); platform_device_unregister(bp->i2c_ctrl); if (bp->i2c_clk) -- cgit From 82ace0c8fe9b025eaa273365e27057402cdaeb02 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 29 Aug 2024 11:36:02 -0700 Subject: ptp: ocp: adjust sysfs entries to expose tty information Implement additional attribute group to expose serial port information. Fixes tag points to the commit which introduced the change in serial port subsystem and made it impossible to use symlinks. Fixes: b286f4e87e32 ("serial: core: Move tty and serdev to be children of serial core port device") Signed-off-by: Vadim Fedorenko Reviewed-by: Greg Kroah-Hartman Signed-off-by: Paolo Abeni --- drivers/ptp/ptp_ocp.c | 62 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 11 deletions(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 46369de8e30b..e7479b9b90cb 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -3361,6 +3361,54 @@ static EXT_ATTR_RO(freq, frequency, 1); static EXT_ATTR_RO(freq, frequency, 2); static EXT_ATTR_RO(freq, frequency, 3); +static ssize_t +ptp_ocp_tty_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *ea = to_ext_attr(attr); + struct ptp_ocp *bp = dev_get_drvdata(dev); + + return sysfs_emit(buf, "ttyS%d", bp->port[(uintptr_t)ea->var].line); +} + +static umode_t +ptp_ocp_timecard_tty_is_visible(struct kobject *kobj, struct attribute *attr, int n) +{ + struct ptp_ocp *bp = dev_get_drvdata(kobj_to_dev(kobj)); + struct ptp_ocp_serial_port *port; + struct device_attribute *dattr; + struct dev_ext_attribute *ea; + + if (strncmp(attr->name, "tty", 3)) + return attr->mode; + + dattr = container_of(attr, struct device_attribute, attr); + ea = container_of(dattr, struct dev_ext_attribute, attr); + port = &bp->port[(uintptr_t)ea->var]; + return port->line == -1 ? 0 : 0444; +} + +#define EXT_TTY_ATTR_RO(_name, _val) \ + struct dev_ext_attribute dev_attr_tty##_name = \ + { __ATTR(tty##_name, 0444, ptp_ocp_tty_show, NULL), (void *)_val } + +static EXT_TTY_ATTR_RO(GNSS, PORT_GNSS); +static EXT_TTY_ATTR_RO(GNSS2, PORT_GNSS2); +static EXT_TTY_ATTR_RO(MAC, PORT_MAC); +static EXT_TTY_ATTR_RO(NMEA, PORT_NMEA); +static struct attribute *ptp_ocp_timecard_tty_attrs[] = { + &dev_attr_ttyGNSS.attr.attr, + &dev_attr_ttyGNSS2.attr.attr, + &dev_attr_ttyMAC.attr.attr, + &dev_attr_ttyNMEA.attr.attr, + NULL, +}; + +static const struct attribute_group ptp_ocp_timecard_tty_group = { + .name = "tty", + .attrs = ptp_ocp_timecard_tty_attrs, + .is_visible = ptp_ocp_timecard_tty_is_visible, +}; + static ssize_t serialnum_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -3790,6 +3838,7 @@ static const struct attribute_group fb_timecard_group = { static const struct ocp_attr_group fb_timecard_groups[] = { { .cap = OCP_CAP_BASIC, .group = &fb_timecard_group }, + { .cap = OCP_CAP_BASIC, .group = &ptp_ocp_timecard_tty_group }, { .cap = OCP_CAP_SIGNAL, .group = &fb_timecard_signal0_group }, { .cap = OCP_CAP_SIGNAL, .group = &fb_timecard_signal1_group }, { .cap = OCP_CAP_SIGNAL, .group = &fb_timecard_signal2_group }, @@ -3829,6 +3878,7 @@ static const struct attribute_group art_timecard_group = { static const struct ocp_attr_group art_timecard_groups[] = { { .cap = OCP_CAP_BASIC, .group = &art_timecard_group }, + { .cap = OCP_CAP_BASIC, .group = &ptp_ocp_timecard_tty_group }, { }, }; @@ -3856,6 +3906,7 @@ static const struct attribute_group adva_timecard_group = { static const struct ocp_attr_group adva_timecard_groups[] = { { .cap = OCP_CAP_BASIC, .group = &adva_timecard_group }, + { .cap = OCP_CAP_BASIC, .group = &ptp_ocp_timecard_tty_group }, { .cap = OCP_CAP_SIGNAL, .group = &fb_timecard_signal0_group }, { .cap = OCP_CAP_SIGNAL, .group = &fb_timecard_signal1_group }, { .cap = OCP_CAP_FREQ, .group = &fb_timecard_freq0_group }, @@ -4361,14 +4412,6 @@ ptp_ocp_complete(struct ptp_ocp *bp) { struct pps_device *pps; char buf[32]; - int i; - - for (i = 0; i < __PORT_COUNT; i++) { - if (bp->port[i].line != -1) { - sprintf(buf, "ttyS%d", bp->port[i].line); - ptp_ocp_link_child(bp, buf, ptp_ocp_tty_port_name(i)); - } - } sprintf(buf, "ptp%d", ptp_clock_index(bp->ptp)); ptp_ocp_link_child(bp, buf, "ptp"); @@ -4440,9 +4483,6 @@ ptp_ocp_detach_sysfs(struct ptp_ocp *bp) { struct device *dev = &bp->dev; - sysfs_remove_link(&dev->kobj, "ttyGNSS"); - sysfs_remove_link(&dev->kobj, "ttyGNSS2"); - sysfs_remove_link(&dev->kobj, "ttyMAC"); sysfs_remove_link(&dev->kobj, "ptp"); sysfs_remove_link(&dev->kobj, "pps"); } -- cgit From 40bec579d4c718dabc3e3baf7d84c93a89e6bcce Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 29 Aug 2024 11:36:03 -0700 Subject: docs: ABI: update OCP TimeCard sysfs entries Update documentation according to the changes in the driver. New attributes group tty is exposed and ttyGNSS, ttyGNSS2, ttyMAC and ttyNMEA are moved to this group. Also, these attributes are no more links to the devices but rather simple text files containing names of tty devices. Signed-off-by: Vadim Fedorenko Reviewed-by: Greg Kroah-Hartman Signed-off-by: Paolo Abeni --- Documentation/ABI/testing/sysfs-timecard | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-timecard b/Documentation/ABI/testing/sysfs-timecard index 220478156297..3ae41b7634ac 100644 --- a/Documentation/ABI/testing/sysfs-timecard +++ b/Documentation/ABI/testing/sysfs-timecard @@ -258,24 +258,29 @@ Description: (RW) When retrieving the PHC with the PTP SYS_OFFSET_EXTENDED the estimated point where the FPGA latches the PHC time. This value may be changed by writing an unsigned integer. -What: /sys/class/timecard/ocpN/ttyGNSS -What: /sys/class/timecard/ocpN/ttyGNSS2 -Date: September 2021 +What: /sys/class/timecard/ocpN/tty +Date: August 2024 +Contact: Vadim Fedorenko +Description: (RO) Directory containing the sysfs nodes for TTY attributes + +What: /sys/class/timecard/ocpN/tty/ttyGNSS +What: /sys/class/timecard/ocpN/tty/ttyGNSS2 +Date: August 2024 Contact: Jonathan Lemon -Description: These optional attributes link to the TTY serial ports - associated with the GNSS devices. +Description: (RO) These optional attributes contain names of the TTY serial + ports associated with the GNSS devices. -What: /sys/class/timecard/ocpN/ttyMAC -Date: September 2021 +What: /sys/class/timecard/ocpN/tty/ttyMAC +Date: August 2024 Contact: Jonathan Lemon -Description: This optional attribute links to the TTY serial port - associated with the Miniature Atomic Clock. +Description: (RO) This optional attribute contains name of the TTY serial + port associated with the Miniature Atomic Clock. -What: /sys/class/timecard/ocpN/ttyNMEA -Date: September 2021 +What: /sys/class/timecard/ocpN/tty/ttyNMEA +Date: August 2024 Contact: Jonathan Lemon -Description: This optional attribute links to the TTY serial port - which outputs the PHC time in NMEA ZDA format. +Description: (RO) This optional attribute contains name of the TTY serial + port which outputs the PHC time in NMEA ZDA format. What: /sys/class/timecard/ocpN/utc_tai_offset Date: September 2021 -- cgit From d34af755a533271f39cc7d86e49c0e74fde63a37 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Sat, 31 Aug 2024 12:39:05 +1200 Subject: platform/x86/amd: pmf: Make ASUS GA403 quirk generic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original quirk should match to GA403U so that the full range of GA403U models can benefit. Signed-off-by: Luke D. Jones Link: https://lore.kernel.org/r/20240831003905.1060977-1-luke@ljones.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/pmf-quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/pmf/pmf-quirks.c b/drivers/platform/x86/amd/pmf/pmf-quirks.c index 460444cda1b2..48870ca52b41 100644 --- a/drivers/platform/x86/amd/pmf/pmf-quirks.c +++ b/drivers/platform/x86/amd/pmf/pmf-quirks.c @@ -25,7 +25,7 @@ static const struct dmi_system_id fwbug_list[] = { .ident = "ROG Zephyrus G14", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "GA403UV"), + DMI_MATCH(DMI_PRODUCT_NAME, "GA403U"), }, .driver_data = &quirk_no_sps_bug, }, -- cgit From 2a5dc090b92cfa5270e20056074241c6db5c9cdd Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 23 Aug 2024 11:59:26 +0200 Subject: ice: move netif_queue_set_napi to rtnl-protected sections Currently, netif_queue_set_napi() is called from ice_vsi_rebuild() that is not rtnl-locked when called from the reset. This creates the need to take the rtnl_lock just for a single function and complicates the synchronization with .ndo_bpf. At the same time, there no actual need to fill napi-to-queue information at this exact point. Fill napi-to-queue information when opening the VSI and clear it when the VSI is being closed. Those routines are already rtnl-locked. Also, rewrite napi-to-queue assignment in a way that prevents inclusion of XDP queues, as this leads to out-of-bounds writes, such as one below. [ +0.000004] BUG: KASAN: slab-out-of-bounds in netif_queue_set_napi+0x1c2/0x1e0 [ +0.000012] Write of size 8 at addr ffff889881727c80 by task bash/7047 [ +0.000006] CPU: 24 PID: 7047 Comm: bash Not tainted 6.10.0-rc2+ #2 [ +0.000004] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0014.082620210524 08/26/2021 [ +0.000003] Call Trace: [ +0.000003] [ +0.000002] dump_stack_lvl+0x60/0x80 [ +0.000007] print_report+0xce/0x630 [ +0.000007] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ +0.000007] ? __virt_addr_valid+0x1c9/0x2c0 [ +0.000005] ? netif_queue_set_napi+0x1c2/0x1e0 [ +0.000003] kasan_report+0xe9/0x120 [ +0.000004] ? netif_queue_set_napi+0x1c2/0x1e0 [ +0.000004] netif_queue_set_napi+0x1c2/0x1e0 [ +0.000005] ice_vsi_close+0x161/0x670 [ice] [ +0.000114] ice_dis_vsi+0x22f/0x270 [ice] [ +0.000095] ice_pf_dis_all_vsi.constprop.0+0xae/0x1c0 [ice] [ +0.000086] ice_prepare_for_reset+0x299/0x750 [ice] [ +0.000087] pci_dev_save_and_disable+0x82/0xd0 [ +0.000006] pci_reset_function+0x12d/0x230 [ +0.000004] reset_store+0xa0/0x100 [ +0.000006] ? __pfx_reset_store+0x10/0x10 [ +0.000002] ? __pfx_mutex_lock+0x10/0x10 [ +0.000004] ? __check_object_size+0x4c1/0x640 [ +0.000007] kernfs_fop_write_iter+0x30b/0x4a0 [ +0.000006] vfs_write+0x5d6/0xdf0 [ +0.000005] ? fd_install+0x180/0x350 [ +0.000005] ? __pfx_vfs_write+0x10/0xA10 [ +0.000004] ? do_fcntl+0x52c/0xcd0 [ +0.000004] ? kasan_save_track+0x13/0x60 [ +0.000003] ? kasan_save_free_info+0x37/0x60 [ +0.000006] ksys_write+0xfa/0x1d0 [ +0.000003] ? __pfx_ksys_write+0x10/0x10 [ +0.000002] ? __x64_sys_fcntl+0x121/0x180 [ +0.000004] ? _raw_spin_lock+0x87/0xe0 [ +0.000005] do_syscall_64+0x80/0x170 [ +0.000007] ? _raw_spin_lock+0x87/0xe0 [ +0.000004] ? __pfx__raw_spin_lock+0x10/0x10 [ +0.000003] ? file_close_fd_locked+0x167/0x230 [ +0.000005] ? syscall_exit_to_user_mode+0x7d/0x220 [ +0.000005] ? do_syscall_64+0x8c/0x170 [ +0.000004] ? do_syscall_64+0x8c/0x170 [ +0.000003] ? do_syscall_64+0x8c/0x170 [ +0.000003] ? fput+0x1a/0x2c0 [ +0.000004] ? filp_close+0x19/0x30 [ +0.000004] ? do_dup2+0x25a/0x4c0 [ +0.000004] ? __x64_sys_dup2+0x6e/0x2e0 [ +0.000002] ? syscall_exit_to_user_mode+0x7d/0x220 [ +0.000004] ? do_syscall_64+0x8c/0x170 [ +0.000003] ? __count_memcg_events+0x113/0x380 [ +0.000005] ? handle_mm_fault+0x136/0x820 [ +0.000005] ? do_user_addr_fault+0x444/0xa80 [ +0.000004] ? clear_bhb_loop+0x25/0x80 [ +0.000004] ? clear_bhb_loop+0x25/0x80 [ +0.000002] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ +0.000005] RIP: 0033:0x7f2033593154 Fixes: 080b0c8d6d26 ("ice: Fix ASSERT_RTNL() warning during certain scenarios") Fixes: 91fdbce7e8d6 ("ice: Add support in the driver for associating queue with napi") Reviewed-by: Wojciech Drewek Reviewed-by: Jacob Keller Reviewed-by: Amritha Nambiar Signed-off-by: Larysa Zaremba Reviewed-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_base.c | 11 +-- drivers/net/ethernet/intel/ice/ice_lib.c | 129 +++++++----------------------- drivers/net/ethernet/intel/ice/ice_lib.h | 10 +-- drivers/net/ethernet/intel/ice/ice_main.c | 17 +++- 4 files changed, 49 insertions(+), 118 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index f448d3a84564..c158749a80e0 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -190,16 +190,11 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) } q_vector = vsi->q_vectors[v_idx]; - ice_for_each_tx_ring(tx_ring, q_vector->tx) { - ice_queue_set_napi(vsi, tx_ring->q_index, NETDEV_QUEUE_TYPE_TX, - NULL); + ice_for_each_tx_ring(tx_ring, vsi->q_vectors[v_idx]->tx) tx_ring->q_vector = NULL; - } - ice_for_each_rx_ring(rx_ring, q_vector->rx) { - ice_queue_set_napi(vsi, rx_ring->q_index, NETDEV_QUEUE_TYPE_RX, - NULL); + + ice_for_each_rx_ring(rx_ring, vsi->q_vectors[v_idx]->rx) rx_ring->q_vector = NULL; - } /* only VSI with an associated netdev is set up with NAPI */ if (vsi->netdev) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index f559e60992fa..6676596df88b 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2286,9 +2286,6 @@ static int ice_vsi_cfg_def(struct ice_vsi *vsi) ice_vsi_map_rings_to_vectors(vsi); - /* Associate q_vector rings to napi */ - ice_vsi_set_napi_queues(vsi); - vsi->stat_offsets_loaded = false; /* ICE_VSI_CTRL does not need RSS so skip RSS processing */ @@ -2628,6 +2625,7 @@ void ice_vsi_close(struct ice_vsi *vsi) if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) ice_down(vsi); + ice_vsi_clear_napi_queues(vsi); ice_vsi_free_irq(vsi); ice_vsi_free_tx_rings(vsi); ice_vsi_free_rx_rings(vsi); @@ -2694,120 +2692,55 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) } /** - * __ice_queue_set_napi - Set the napi instance for the queue - * @dev: device to which NAPI and queue belong - * @queue_index: Index of queue - * @type: queue type as RX or TX - * @napi: NAPI context - * @locked: is the rtnl_lock already held - * - * Set the napi instance for the queue. Caller indicates the lock status. - */ -static void -__ice_queue_set_napi(struct net_device *dev, unsigned int queue_index, - enum netdev_queue_type type, struct napi_struct *napi, - bool locked) -{ - if (!locked) - rtnl_lock(); - netif_queue_set_napi(dev, queue_index, type, napi); - if (!locked) - rtnl_unlock(); -} - -/** - * ice_queue_set_napi - Set the napi instance for the queue - * @vsi: VSI being configured - * @queue_index: Index of queue - * @type: queue type as RX or TX - * @napi: NAPI context + * ice_vsi_set_napi_queues - associate netdev queues with napi + * @vsi: VSI pointer * - * Set the napi instance for the queue. The rtnl lock state is derived from the - * execution path. + * Associate queue[s] with napi for all vectors. + * The caller must hold rtnl_lock. */ -void -ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index, - enum netdev_queue_type type, struct napi_struct *napi) +void ice_vsi_set_napi_queues(struct ice_vsi *vsi) { - struct ice_pf *pf = vsi->back; + struct net_device *netdev = vsi->netdev; + int q_idx, v_idx; - if (!vsi->netdev) + if (!netdev) return; - if (current_work() == &pf->serv_task || - test_bit(ICE_PREPARED_FOR_RESET, pf->state) || - test_bit(ICE_DOWN, pf->state) || - test_bit(ICE_SUSPENDED, pf->state)) - __ice_queue_set_napi(vsi->netdev, queue_index, type, napi, - false); - else - __ice_queue_set_napi(vsi->netdev, queue_index, type, napi, - true); -} + ice_for_each_rxq(vsi, q_idx) + netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_RX, + &vsi->rx_rings[q_idx]->q_vector->napi); -/** - * __ice_q_vector_set_napi_queues - Map queue[s] associated with the napi - * @q_vector: q_vector pointer - * @locked: is the rtnl_lock already held - * - * Associate the q_vector napi with all the queue[s] on the vector. - * Caller indicates the lock status. - */ -void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked) -{ - struct ice_rx_ring *rx_ring; - struct ice_tx_ring *tx_ring; - - ice_for_each_rx_ring(rx_ring, q_vector->rx) - __ice_queue_set_napi(q_vector->vsi->netdev, rx_ring->q_index, - NETDEV_QUEUE_TYPE_RX, &q_vector->napi, - locked); - - ice_for_each_tx_ring(tx_ring, q_vector->tx) - __ice_queue_set_napi(q_vector->vsi->netdev, tx_ring->q_index, - NETDEV_QUEUE_TYPE_TX, &q_vector->napi, - locked); + ice_for_each_txq(vsi, q_idx) + netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_TX, + &vsi->tx_rings[q_idx]->q_vector->napi); /* Also set the interrupt number for the NAPI */ - netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq); -} + ice_for_each_q_vector(vsi, v_idx) { + struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; -/** - * ice_q_vector_set_napi_queues - Map queue[s] associated with the napi - * @q_vector: q_vector pointer - * - * Associate the q_vector napi with all the queue[s] on the vector - */ -void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector) -{ - struct ice_rx_ring *rx_ring; - struct ice_tx_ring *tx_ring; - - ice_for_each_rx_ring(rx_ring, q_vector->rx) - ice_queue_set_napi(q_vector->vsi, rx_ring->q_index, - NETDEV_QUEUE_TYPE_RX, &q_vector->napi); - - ice_for_each_tx_ring(tx_ring, q_vector->tx) - ice_queue_set_napi(q_vector->vsi, tx_ring->q_index, - NETDEV_QUEUE_TYPE_TX, &q_vector->napi); - /* Also set the interrupt number for the NAPI */ - netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq); + netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq); + } } /** - * ice_vsi_set_napi_queues + * ice_vsi_clear_napi_queues - dissociate netdev queues from napi * @vsi: VSI pointer * - * Associate queue[s] with napi for all vectors + * Clear the association between all VSI queues queue[s] and napi. + * The caller must hold rtnl_lock. */ -void ice_vsi_set_napi_queues(struct ice_vsi *vsi) +void ice_vsi_clear_napi_queues(struct ice_vsi *vsi) { - int i; + struct net_device *netdev = vsi->netdev; + int q_idx; - if (!vsi->netdev) + if (!netdev) return; - ice_for_each_q_vector(vsi, i) - ice_q_vector_set_napi_queues(vsi->q_vectors[i]); + ice_for_each_txq(vsi, q_idx) + netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_TX, NULL); + + ice_for_each_rxq(vsi, q_idx) + netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_RX, NULL); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 94ce8964dda6..36d86535695d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -44,16 +44,10 @@ void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params); -void -ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index, - enum netdev_queue_type type, struct napi_struct *napi); - -void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked); - -void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector); - void ice_vsi_set_napi_queues(struct ice_vsi *vsi); +void ice_vsi_clear_napi_queues(struct ice_vsi *vsi); + int ice_vsi_release(struct ice_vsi *vsi); void ice_vsi_close(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 46d3c5a34d6a..263833346d3a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3558,11 +3558,9 @@ static void ice_napi_add(struct ice_vsi *vsi) if (!vsi->netdev) return; - ice_for_each_q_vector(vsi, v_idx) { + ice_for_each_q_vector(vsi, v_idx) netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi, ice_napi_poll); - __ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false); - } } /** @@ -5540,7 +5538,9 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf) if (ret) goto err_reinit; ice_vsi_map_rings_to_vectors(pf->vsi[v]); + rtnl_lock(); ice_vsi_set_napi_queues(pf->vsi[v]); + rtnl_unlock(); } ret = ice_req_irq_msix_misc(pf); @@ -5554,8 +5554,12 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf) err_reinit: while (v--) - if (pf->vsi[v]) + if (pf->vsi[v]) { + rtnl_lock(); + ice_vsi_clear_napi_queues(pf->vsi[v]); + rtnl_unlock(); ice_vsi_free_q_vectors(pf->vsi[v]); + } return ret; } @@ -5620,6 +5624,9 @@ static int ice_suspend(struct device *dev) ice_for_each_vsi(pf, v) { if (!pf->vsi[v]) continue; + rtnl_lock(); + ice_vsi_clear_napi_queues(pf->vsi[v]); + rtnl_unlock(); ice_vsi_free_q_vectors(pf->vsi[v]); } ice_clear_interrupt_scheme(pf); @@ -7455,6 +7462,8 @@ int ice_vsi_open(struct ice_vsi *vsi) err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq); if (err) goto err_set_qs; + + ice_vsi_set_napi_queues(vsi); } err = ice_up_complete(vsi); -- cgit From 2504b8405768a57a71e660dbfd5abd59f679a03f Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 23 Aug 2024 11:59:27 +0200 Subject: ice: protect XDP configuration with a mutex The main threat to data consistency in ice_xdp() is a possible asynchronous PF reset. It can be triggered by a user or by TX timeout handler. XDP setup and PF reset code access the same resources in the following sections: * ice_vsi_close() in ice_prepare_for_reset() - already rtnl-locked * ice_vsi_rebuild() for the PF VSI - not protected * ice_vsi_open() - already rtnl-locked With an unfortunate timing, such accesses can result in a crash such as the one below: [ +1.999878] ice 0000:b1:00.0: Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring 14 [ +2.002992] ice 0000:b1:00.0: Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring 18 [Mar15 18:17] ice 0000:b1:00.0 ens801f0np0: NETDEV WATCHDOG: CPU: 38: transmit queue 14 timed out 80692736 ms [ +0.000093] ice 0000:b1:00.0 ens801f0np0: tx_timeout: VSI_num: 6, Q 14, NTC: 0x0, HW_HEAD: 0x0, NTU: 0x0, INT: 0x4000001 [ +0.000012] ice 0000:b1:00.0 ens801f0np0: tx_timeout recovery level 1, txqueue 14 [ +0.394718] ice 0000:b1:00.0: PTP reset successful [ +0.006184] BUG: kernel NULL pointer dereference, address: 0000000000000098 [ +0.000045] #PF: supervisor read access in kernel mode [ +0.000023] #PF: error_code(0x0000) - not-present page [ +0.000023] PGD 0 P4D 0 [ +0.000018] Oops: 0000 [#1] PREEMPT SMP NOPTI [ +0.000023] CPU: 38 PID: 7540 Comm: kworker/38:1 Not tainted 6.8.0-rc7 #1 [ +0.000031] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0014.082620210524 08/26/2021 [ +0.000036] Workqueue: ice ice_service_task [ice] [ +0.000183] RIP: 0010:ice_clean_tx_ring+0xa/0xd0 [ice] [...] [ +0.000013] Call Trace: [ +0.000016] [ +0.000014] ? __die+0x1f/0x70 [ +0.000029] ? page_fault_oops+0x171/0x4f0 [ +0.000029] ? schedule+0x3b/0xd0 [ +0.000027] ? exc_page_fault+0x7b/0x180 [ +0.000022] ? asm_exc_page_fault+0x22/0x30 [ +0.000031] ? ice_clean_tx_ring+0xa/0xd0 [ice] [ +0.000194] ice_free_tx_ring+0xe/0x60 [ice] [ +0.000186] ice_destroy_xdp_rings+0x157/0x310 [ice] [ +0.000151] ice_vsi_decfg+0x53/0xe0 [ice] [ +0.000180] ice_vsi_rebuild+0x239/0x540 [ice] [ +0.000186] ice_vsi_rebuild_by_type+0x76/0x180 [ice] [ +0.000145] ice_rebuild+0x18c/0x840 [ice] [ +0.000145] ? delay_tsc+0x4a/0xc0 [ +0.000022] ? delay_tsc+0x92/0xc0 [ +0.000020] ice_do_reset+0x140/0x180 [ice] [ +0.000886] ice_service_task+0x404/0x1030 [ice] [ +0.000824] process_one_work+0x171/0x340 [ +0.000685] worker_thread+0x277/0x3a0 [ +0.000675] ? preempt_count_add+0x6a/0xa0 [ +0.000677] ? _raw_spin_lock_irqsave+0x23/0x50 [ +0.000679] ? __pfx_worker_thread+0x10/0x10 [ +0.000653] kthread+0xf0/0x120 [ +0.000635] ? __pfx_kthread+0x10/0x10 [ +0.000616] ret_from_fork+0x2d/0x50 [ +0.000612] ? __pfx_kthread+0x10/0x10 [ +0.000604] ret_from_fork_asm+0x1b/0x30 [ +0.000604] The previous way of handling this through returning -EBUSY is not viable, particularly when destroying AF_XDP socket, because the kernel proceeds with removal anyway. There is plenty of code between those calls and there is no need to create a large critical section that covers all of them, same as there is no need to protect ice_vsi_rebuild() with rtnl_lock(). Add xdp_state_lock mutex to protect ice_vsi_rebuild() and ice_xdp(). Leaving unprotected sections in between would result in two states that have to be considered: 1. when the VSI is closed, but not yet rebuild 2. when VSI is already rebuild, but not yet open The latter case is actually already handled through !netif_running() case, we just need to adjust flag checking a little. The former one is not as trivial, because between ice_vsi_close() and ice_vsi_rebuild(), a lot of hardware interaction happens, this can make adding/deleting rings exit with an error. Luckily, VSI rebuild is pending and can apply new configuration for us in a managed fashion. Therefore, add an additional VSI state flag ICE_VSI_REBUILD_PENDING to indicate that ice_xdp() can just hot-swap the program. Also, as ice_vsi_rebuild() flow is touched in this patch, make it more consistent by deconfiguring VSI when coalesce allocation fails. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Fixes: efc2214b6047 ("ice: Add support for XDP") Reviewed-by: Wojciech Drewek Reviewed-by: Jacob Keller Tested-by: Chandan Kumar Rout Signed-off-by: Larysa Zaremba Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 2 ++ drivers/net/ethernet/intel/ice/ice_lib.c | 34 +++++++++++++++++++------------ drivers/net/ethernet/intel/ice/ice_main.c | 19 ++++++++++++----- drivers/net/ethernet/intel/ice/ice_xsk.c | 3 ++- 4 files changed, 39 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index caaa10157909..ce8b5505b16d 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -318,6 +318,7 @@ enum ice_vsi_state { ICE_VSI_UMAC_FLTR_CHANGED, ICE_VSI_MMAC_FLTR_CHANGED, ICE_VSI_PROMISC_CHANGED, + ICE_VSI_REBUILD_PENDING, ICE_VSI_STATE_NBITS /* must be last */ }; @@ -411,6 +412,7 @@ struct ice_vsi { struct ice_tx_ring **xdp_rings; /* XDP ring array */ u16 num_xdp_txq; /* Used XDP queues */ u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ + struct mutex xdp_state_lock; struct net_device **target_netdevs; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 6676596df88b..c1c1b63d9701 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -447,6 +447,7 @@ static void ice_vsi_free(struct ice_vsi *vsi) ice_vsi_free_stats(vsi); ice_vsi_free_arrays(vsi); + mutex_destroy(&vsi->xdp_state_lock); mutex_unlock(&pf->sw_mutex); devm_kfree(dev, vsi); } @@ -626,6 +627,8 @@ static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf) pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi, pf->next_vsi); + mutex_init(&vsi->xdp_state_lock); + unlock_pf: mutex_unlock(&pf->sw_mutex); return vsi; @@ -2972,19 +2975,23 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf)) return -EINVAL; + mutex_lock(&vsi->xdp_state_lock); + ret = ice_vsi_realloc_stat_arrays(vsi); if (ret) - goto err_vsi_cfg; + goto unlock; ice_vsi_decfg(vsi); ret = ice_vsi_cfg_def(vsi); if (ret) - goto err_vsi_cfg; + goto unlock; coalesce = kcalloc(vsi->num_q_vectors, sizeof(struct ice_coalesce_stored), GFP_KERNEL); - if (!coalesce) - return -ENOMEM; + if (!coalesce) { + ret = -ENOMEM; + goto decfg; + } prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); @@ -2992,22 +2999,23 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) if (ret) { if (vsi_flags & ICE_VSI_FLAG_INIT) { ret = -EIO; - goto err_vsi_cfg_tc_lan; + goto free_coalesce; } - kfree(coalesce); - return ice_schedule_reset(pf, ICE_RESET_PFR); + ret = ice_schedule_reset(pf, ICE_RESET_PFR); + goto free_coalesce; } ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors); - kfree(coalesce); + clear_bit(ICE_VSI_REBUILD_PENDING, vsi->state); - return 0; - -err_vsi_cfg_tc_lan: - ice_vsi_decfg(vsi); +free_coalesce: kfree(coalesce); -err_vsi_cfg: +decfg: + if (ret) + ice_vsi_decfg(vsi); +unlock: + mutex_unlock(&vsi->xdp_state_lock); return ret; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 263833346d3a..4edaddcba3b4 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -616,6 +616,7 @@ skip: /* clear SW filtering DB */ ice_clear_hw_tbls(hw); /* disable the VSIs and their queues that are not already DOWN */ + set_bit(ICE_VSI_REBUILD_PENDING, ice_get_main_vsi(pf)->state); ice_pf_dis_all_vsi(pf, false); if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) @@ -3016,7 +3017,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } /* hot swap progs and avoid toggling link */ - if (ice_is_xdp_ena_vsi(vsi) == !!prog) { + if (ice_is_xdp_ena_vsi(vsi) == !!prog || + test_bit(ICE_VSI_REBUILD_PENDING, vsi->state)) { ice_vsi_assign_bpf_prog(vsi, prog); return 0; } @@ -3088,21 +3090,28 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp) { struct ice_netdev_priv *np = netdev_priv(dev); struct ice_vsi *vsi = np->vsi; + int ret; if (vsi->type != ICE_VSI_PF) { NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI"); return -EINVAL; } + mutex_lock(&vsi->xdp_state_lock); + switch (xdp->command) { case XDP_SETUP_PROG: - return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack); + ret = ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack); + break; case XDP_SETUP_XSK_POOL: - return ice_xsk_pool_setup(vsi, xdp->xsk.pool, - xdp->xsk.queue_id); + ret = ice_xsk_pool_setup(vsi, xdp->xsk.pool, xdp->xsk.queue_id); + break; default: - return -EINVAL; + ret = -EINVAL; } + + mutex_unlock(&vsi->xdp_state_lock); + return ret; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 240a7bec242b..a659951fa987 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -390,7 +390,8 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) goto failure; } - if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi); + if_running = !test_bit(ICE_VSI_DOWN, vsi->state) && + ice_is_xdp_ena_vsi(vsi); if (if_running) { struct ice_rx_ring *rx_ring = vsi->rx_rings[qid]; -- cgit From f50c68763436bc8f805712a7c5ceaf58cfcf5f07 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 23 Aug 2024 11:59:28 +0200 Subject: ice: check for XDP rings instead of bpf program when unconfiguring If VSI rebuild is pending, .ndo_bpf() can attach/detach the XDP program on VSI without applying new ring configuration. When unconfiguring the VSI, we can encounter the state in which there is an XDP program but no XDP rings to destroy or there will be XDP rings that need to be destroyed, but no XDP program to indicate their presence. When unconfiguring, rely on the presence of XDP rings rather then XDP program, as they better represent the current state that has to be destroyed. Reviewed-by: Wojciech Drewek Reviewed-by: Jacob Keller Tested-by: Chandan Kumar Rout Acked-by: Maciej Fijalkowski Signed-off-by: Larysa Zaremba Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_main.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_xsk.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index c1c1b63d9701..3dccfaba024c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2426,7 +2426,7 @@ void ice_vsi_decfg(struct ice_vsi *vsi) dev_err(ice_pf_to_dev(pf), "Failed to remove RDMA scheduler config for VSI %u, err %d\n", vsi->vsi_num, err); - if (ice_is_xdp_ena_vsi(vsi)) + if (vsi->xdp_rings) /* return value check can be skipped here, it always returns * 0 if reset is in progress */ @@ -2528,7 +2528,7 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi) for (q = 0; q < q_vector->num_ring_tx; q++) { ice_write_itr(&q_vector->tx, 0); wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0); - if (ice_is_xdp_ena_vsi(vsi)) { + if (vsi->xdp_rings) { u32 xdp_txq = txq + vsi->num_xdp_txq; wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4edaddcba3b4..22b8ef5faf8d 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7249,7 +7249,7 @@ int ice_down(struct ice_vsi *vsi) if (tx_err) netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n", vsi->vsi_num, tx_err); - if (!tx_err && ice_is_xdp_ena_vsi(vsi)) { + if (!tx_err && vsi->xdp_rings) { tx_err = ice_vsi_stop_xdp_tx_rings(vsi); if (tx_err) netdev_err(vsi->netdev, "Failed stop XDP rings, VSI %d error %d\n", @@ -7266,7 +7266,7 @@ int ice_down(struct ice_vsi *vsi) ice_for_each_txq(vsi, i) ice_clean_tx_ring(vsi->tx_rings[i]); - if (ice_is_xdp_ena_vsi(vsi)) + if (vsi->xdp_rings) ice_for_each_xdp_txq(vsi, i) ice_clean_tx_ring(vsi->xdp_rings[i]); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index a659951fa987..8693509efbe7 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -39,7 +39,7 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) sizeof(vsi_stat->rx_ring_stats[q_idx]->rx_stats)); memset(&vsi_stat->tx_ring_stats[q_idx]->stats, 0, sizeof(vsi_stat->tx_ring_stats[q_idx]->stats)); - if (ice_is_xdp_ena_vsi(vsi)) + if (vsi->xdp_rings) memset(&vsi->xdp_rings[q_idx]->ring_stats->stats, 0, sizeof(vsi->xdp_rings[q_idx]->ring_stats->stats)); } @@ -52,7 +52,7 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) { ice_clean_tx_ring(vsi->tx_rings[q_idx]); - if (ice_is_xdp_ena_vsi(vsi)) + if (vsi->xdp_rings) ice_clean_tx_ring(vsi->xdp_rings[q_idx]); ice_clean_rx_ring(vsi->rx_rings[q_idx]); } @@ -194,7 +194,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta); if (!fail) fail = err; - if (ice_is_xdp_ena_vsi(vsi)) { + if (vsi->xdp_rings) { struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; memset(&txq_meta, 0, sizeof(txq_meta)); -- cgit From d8c40b9d3a6cef61eb5a0c58c34a3090ea938d89 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 23 Aug 2024 11:59:29 +0200 Subject: ice: check ICE_VSI_DOWN under rtnl_lock when preparing for reset Consider the following scenario: .ndo_bpf() | ice_prepare_for_reset() | ________________________|_______________________________________| rtnl_lock() | | ice_down() | | | test_bit(ICE_VSI_DOWN) - true | | ice_dis_vsi() returns | ice_up() | | | proceeds to rebuild a running VSI | .ndo_bpf() is not the only rtnl-locked callback that toggles the interface to apply new configuration. Another example is .set_channels(). To avoid the race condition above, act only after reading ICE_VSI_DOWN under rtnl_lock. Fixes: 0f9d5027a749 ("ice: Refactor VSI allocation, deletion and rebuild flow") Reviewed-by: Wojciech Drewek Reviewed-by: Jacob Keller Tested-by: Chandan Kumar Rout Signed-off-by: Larysa Zaremba Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 3dccfaba024c..737c00b02dd0 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2672,8 +2672,7 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked) */ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) { - if (test_bit(ICE_VSI_DOWN, vsi->state)) - return; + bool already_down = test_bit(ICE_VSI_DOWN, vsi->state); set_bit(ICE_VSI_NEEDS_RESTART, vsi->state); @@ -2681,15 +2680,16 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) if (netif_running(vsi->netdev)) { if (!locked) rtnl_lock(); - - ice_vsi_close(vsi); + already_down = test_bit(ICE_VSI_DOWN, vsi->state); + if (!already_down) + ice_vsi_close(vsi); if (!locked) rtnl_unlock(); - } else { + } else if (!already_down) { ice_vsi_close(vsi); } - } else if (vsi->type == ICE_VSI_CTRL) { + } else if (vsi->type == ICE_VSI_CTRL && !already_down) { ice_vsi_close(vsi); } } -- cgit From 7e3b407ccbea3259b8583ccc34807622025e390f Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 23 Aug 2024 11:59:30 +0200 Subject: ice: remove ICE_CFG_BUSY locking from AF_XDP code Locking used in ice_qp_ena() and ice_qp_dis() does pretty much nothing, because ICE_CFG_BUSY is a state flag that is supposed to be set in a PF state, not VSI one. Therefore it does not protect the queue pair from e.g. reset. Remove ICE_CFG_BUSY locking from ice_qp_dis() and ice_qp_ena(). Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Reviewed-by: Wojciech Drewek Reviewed-by: Jacob Keller Tested-by: Chandan Kumar Rout Reviewed-by: Maciej Fijalkowski Signed-off-by: Larysa Zaremba Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 8693509efbe7..5dee829bfc47 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -165,7 +165,6 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) struct ice_q_vector *q_vector; struct ice_tx_ring *tx_ring; struct ice_rx_ring *rx_ring; - int timeout = 50; int fail = 0; int err; @@ -176,13 +175,6 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) rx_ring = vsi->rx_rings[q_idx]; q_vector = rx_ring->q_vector; - while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) { - timeout--; - if (!timeout) - return -EBUSY; - usleep_range(1000, 2000); - } - synchronize_net(); netif_carrier_off(vsi->netdev); netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); @@ -261,7 +253,6 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); netif_carrier_on(vsi->netdev); } - clear_bit(ICE_CFG_BUSY, vsi->state); return fail; } -- cgit From 04c7e14e5b0b6227e7b00d7a96ca2f2426ab9171 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 23 Aug 2024 11:59:31 +0200 Subject: ice: do not bring the VSI up, if it was down before the XDP setup After XDP configuration is completed, we bring the interface up unconditionally, regardless of its state before the call to .ndo_bpf(). Preserve the information whether the interface had to be brought down and later bring it up only in such case. Fixes: efc2214b6047 ("ice: Add support for XDP") Reviewed-by: Wojciech Drewek Reviewed-by: Jacob Keller Tested-by: Chandan Kumar Rout Acked-by: Maciej Fijalkowski Signed-off-by: Larysa Zaremba Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 22b8ef5faf8d..c7db88b517da 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3005,8 +3005,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, struct netlink_ext_ack *extack) { unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD; - bool if_running = netif_running(vsi->netdev); int ret = 0, xdp_ring_err = 0; + bool if_running; if (prog && !prog->aux->xdp_has_frags) { if (frame_size > ice_max_xdp_frame_size(vsi)) { @@ -3023,8 +3023,11 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, return 0; } + if_running = netif_running(vsi->netdev) && + !test_and_set_bit(ICE_VSI_DOWN, vsi->state); + /* need to stop netdev while setting up the program for Rx rings */ - if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { + if (if_running) { ret = ice_down(vsi); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed"); -- cgit From cd9253c23aedd61eb5ff11f37a36247cd46faf86 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 29 Aug 2024 18:25:49 +0100 Subject: btrfs: fix race between direct IO write and fsync when using same fd If we have 2 threads that are using the same file descriptor and one of them is doing direct IO writes while the other is doing fsync, we have a race where we can end up either: 1) Attempt a fsync without holding the inode's lock, triggering an assertion failures when assertions are enabled; 2) Do an invalid memory access from the fsync task because the file private points to memory allocated on stack by the direct IO task and it may be used by the fsync task after the stack was destroyed. The race happens like this: 1) A user space program opens a file descriptor with O_DIRECT; 2) The program spawns 2 threads using libpthread for example; 3) One of the threads uses the file descriptor to do direct IO writes, while the other calls fsync using the same file descriptor. 4) Call task A the thread doing direct IO writes and task B the thread doing fsyncs; 5) Task A does a direct IO write, and at btrfs_direct_write() sets the file's private to an on stack allocated private with the member 'fsync_skip_inode_lock' set to true; 6) Task B enters btrfs_sync_file() and sees that there's a private structure associated to the file which has 'fsync_skip_inode_lock' set to true, so it skips locking the inode's VFS lock; 7) Task A completes the direct IO write, and resets the file's private to NULL since it had no prior private and our private was stack allocated. Then it unlocks the inode's VFS lock; 8) Task B enters btrfs_get_ordered_extents_for_logging(), then the assertion that checks the inode's VFS lock is held fails, since task B never locked it and task A has already unlocked it. The stack trace produced is the following: assertion failed: inode_is_locked(&inode->vfs_inode), in fs/btrfs/ordered-data.c:983 ------------[ cut here ]------------ kernel BUG at fs/btrfs/ordered-data.c:983! Oops: invalid opcode: 0000 [#1] PREEMPT SMP PTI CPU: 9 PID: 5072 Comm: worker Tainted: G U OE 6.10.5-1-default #1 openSUSE Tumbleweed 69f48d427608e1c09e60ea24c6c55e2ca1b049e8 Hardware name: Acer Predator PH315-52/Covini_CFS, BIOS V1.12 07/28/2020 RIP: 0010:btrfs_get_ordered_extents_for_logging.cold+0x1f/0x42 [btrfs] Code: 50 d6 86 c0 e8 (...) RSP: 0018:ffff9e4a03dcfc78 EFLAGS: 00010246 RAX: 0000000000000054 RBX: ffff9078a9868e98 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff907dce4a7800 RDI: ffff907dce4a7800 RBP: ffff907805518800 R08: 0000000000000000 R09: ffff9e4a03dcfb38 R10: ffff9e4a03dcfb30 R11: 0000000000000003 R12: ffff907684ae7800 R13: 0000000000000001 R14: ffff90774646b600 R15: 0000000000000000 FS: 00007f04b96006c0(0000) GS:ffff907dce480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f32acbfc000 CR3: 00000001fd4fa005 CR4: 00000000003726f0 Call Trace: ? __die_body.cold+0x14/0x24 ? die+0x2e/0x50 ? do_trap+0xca/0x110 ? do_error_trap+0x6a/0x90 ? btrfs_get_ordered_extents_for_logging.cold+0x1f/0x42 [btrfs bb26272d49b4cdc847cf3f7faadd459b62caee9a] ? exc_invalid_op+0x50/0x70 ? btrfs_get_ordered_extents_for_logging.cold+0x1f/0x42 [btrfs bb26272d49b4cdc847cf3f7faadd459b62caee9a] ? asm_exc_invalid_op+0x1a/0x20 ? btrfs_get_ordered_extents_for_logging.cold+0x1f/0x42 [btrfs bb26272d49b4cdc847cf3f7faadd459b62caee9a] ? btrfs_get_ordered_extents_for_logging.cold+0x1f/0x42 [btrfs bb26272d49b4cdc847cf3f7faadd459b62caee9a] btrfs_sync_file+0x21a/0x4d0 [btrfs bb26272d49b4cdc847cf3f7faadd459b62caee9a] ? __seccomp_filter+0x31d/0x4f0 __x64_sys_fdatasync+0x4f/0x90 do_syscall_64+0x82/0x160 ? do_futex+0xcb/0x190 ? __x64_sys_futex+0x10e/0x1d0 ? switch_fpu_return+0x4f/0xd0 ? syscall_exit_to_user_mode+0x72/0x220 ? do_syscall_64+0x8e/0x160 ? syscall_exit_to_user_mode+0x72/0x220 ? do_syscall_64+0x8e/0x160 ? syscall_exit_to_user_mode+0x72/0x220 ? do_syscall_64+0x8e/0x160 ? syscall_exit_to_user_mode+0x72/0x220 ? do_syscall_64+0x8e/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e Another problem here is if task B grabs the private pointer and then uses it after task A has finished, since the private was allocated in the stack of task A, it results in some invalid memory access with a hard to predict result. This issue, triggering the assertion, was observed with QEMU workloads by two users in the Link tags below. Fix this by not relying on a file's private to pass information to fsync that it should skip locking the inode and instead pass this information through a special value stored in current->journal_info. This is safe because in the relevant section of the direct IO write path we are not holding a transaction handle, so current->journal_info is NULL. The following C program triggers the issue: $ cat repro.c /* Get the O_DIRECT definition. */ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include static int fd; static ssize_t do_write(int fd, const void *buf, size_t count, off_t offset) { while (count > 0) { ssize_t ret; ret = pwrite(fd, buf, count, offset); if (ret < 0) { if (errno == EINTR) continue; return ret; } count -= ret; buf += ret; } return 0; } static void *fsync_loop(void *arg) { while (1) { int ret; ret = fsync(fd); if (ret != 0) { perror("Fsync failed"); exit(6); } } } int main(int argc, char *argv[]) { long pagesize; void *write_buf; pthread_t fsyncer; int ret; if (argc != 2) { fprintf(stderr, "Use: %s \n", argv[0]); return 1; } fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT, 0666); if (fd == -1) { perror("Failed to open/create file"); return 1; } pagesize = sysconf(_SC_PAGE_SIZE); if (pagesize == -1) { perror("Failed to get page size"); return 2; } ret = posix_memalign(&write_buf, pagesize, pagesize); if (ret) { perror("Failed to allocate buffer"); return 3; } ret = pthread_create(&fsyncer, NULL, fsync_loop, NULL); if (ret != 0) { fprintf(stderr, "Failed to create writer thread: %d\n", ret); return 4; } while (1) { ret = do_write(fd, write_buf, pagesize, 0); if (ret != 0) { perror("Write failed"); exit(5); } } return 0; } $ mkfs.btrfs -f /dev/sdi $ mount /dev/sdi /mnt/sdi $ timeout 10 ./repro /mnt/sdi/foo Usually the race is triggered within less than 1 second. A test case for fstests will follow soon. Reported-by: Paulo Dias Link: https://bugzilla.kernel.org/show_bug.cgi?id=219187 Reported-by: Andreas Jahn Link: https://bugzilla.kernel.org/show_bug.cgi?id=219199 Reported-by: syzbot+4704b3cc972bd76024f1@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/00000000000044ff540620d7dee2@google.com/ Fixes: 939b656bc8ab ("btrfs: fix corruption after buffer fault in during direct IO append write") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 1 - fs/btrfs/direct-io.c | 16 +++------------- fs/btrfs/file.c | 9 +++++++-- fs/btrfs/transaction.h | 6 ++++++ 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 75fa563e4cac..c8568b1a61c4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -459,7 +459,6 @@ struct btrfs_file_private { void *filldir_buf; u64 last_index; struct extent_state *llseek_cached_state; - bool fsync_skip_inode_lock; }; static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c index 67adbe9d294a..364bce34f034 100644 --- a/fs/btrfs/direct-io.c +++ b/fs/btrfs/direct-io.c @@ -864,13 +864,6 @@ again: if (IS_ERR_OR_NULL(dio)) { ret = PTR_ERR_OR_ZERO(dio); } else { - struct btrfs_file_private stack_private = { 0 }; - struct btrfs_file_private *private; - const bool have_private = (file->private_data != NULL); - - if (!have_private) - file->private_data = &stack_private; - /* * If we have a synchronous write, we must make sure the fsync * triggered by the iomap_dio_complete() call below doesn't @@ -879,13 +872,10 @@ again: * partial writes due to the input buffer (or parts of it) not * being already faulted in. */ - private = file->private_data; - private->fsync_skip_inode_lock = true; + ASSERT(current->journal_info == NULL); + current->journal_info = BTRFS_TRANS_DIO_WRITE_STUB; ret = iomap_dio_complete(dio); - private->fsync_skip_inode_lock = false; - - if (!have_private) - file->private_data = NULL; + current->journal_info = NULL; } /* No increment (+=) because iomap returns a cumulative value. */ diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9914419f3b7d..2aeb8116549c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1603,7 +1603,6 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) */ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { - struct btrfs_file_private *private = file->private_data; struct dentry *dentry = file_dentry(file); struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); struct btrfs_root *root = inode->root; @@ -1613,7 +1612,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ret = 0, err; u64 len; bool full_sync; - const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false); + bool skip_ilock = false; + + if (current->journal_info == BTRFS_TRANS_DIO_WRITE_STUB) { + skip_ilock = true; + current->journal_info = NULL; + lockdep_assert_held(&inode->vfs_inode.i_rwsem); + } trace_btrfs_sync_file(file, datasync); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 98c03ddc760b..dd9ce9b9f69e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -27,6 +27,12 @@ struct btrfs_root_item; struct btrfs_root; struct btrfs_path; +/* + * Signal that a direct IO write is in progress, to avoid deadlock for sync + * direct IO writes when fsync is called during the direct IO write path. + */ +#define BTRFS_TRANS_DIO_WRITE_STUB ((void *) 1) + /* Radix-tree tag for roots that are part of the trasaction. */ #define BTRFS_ROOT_TRANS_TAG 0 -- cgit From 5872b47ce18efad5862b74ad334cbdfffa7f8a0c Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 2 Sep 2024 10:09:37 +0100 Subject: MAINTAINERS: wifi: cw1200: add net-cw1200.h This is part of an effort [1] to assign a section in MAINTAINERS to header files that relate to Networking. In this case the files with "net" in their name. [1] https://lore.kernel.org/netdev/20240821-net-mnt-v2-0-59a5af38e69d@kernel.org/ It seems that net-cw1200.h is part of the CW1200 WLAN driver and this it is appropriate to add it to the section for that driver. Signed-off-by: Simon Horman Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240902-wifi-mnt-v2-1-f5ad1f36e993@kernel.org --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8766f3e5e87e..d705e22f1d28 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5951,6 +5951,7 @@ F: Documentation/process/cve.rst CW1200 WLAN driver S: Orphan F: drivers/net/wireless/st/cw1200/ +F: include/linux/platform_data/net-cw1200.h CX18 VIDEO4LINUX DRIVER M: Andy Walls -- cgit From bab8eb0dd4cb995caa4a0529d5655531c2ec5e8e Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 29 Aug 2024 19:50:55 +0200 Subject: usbnet: modern method to get random MAC The driver generates a random MAC once on load and uses it over and over, including on two devices needing a random MAC at the same time. Jakub suggested revamping the driver to the modern API for setting a random MAC rather than fixing the old stuff. The bug is as old as the driver. Signed-off-by: Oliver Neukum Reviewed-by: Simon Horman Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://patch.msgid.link/20240829175201.670718-1-oneukum@suse.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 9fd516e8bb10..18eb5ba436df 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -61,9 +61,6 @@ /*-------------------------------------------------------------------------*/ -// randomly generated ethernet address -static u8 node_id [ETH_ALEN]; - /* use ethtool to change the level for any given device */ static int msg_level = -1; module_param (msg_level, int, 0); @@ -1725,7 +1722,6 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) dev->net = net; strscpy(net->name, "usb%d", sizeof(net->name)); - eth_hw_addr_set(net, node_id); /* rx and tx sides can use different message sizes; * bind() should set rx_urb_size in that case. @@ -1801,9 +1797,9 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) goto out4; } - /* let userspace know we have a random address */ - if (ether_addr_equal(net->dev_addr, node_id)) - net->addr_assign_type = NET_ADDR_RANDOM; + /* this flags the device for user space */ + if (!is_valid_ether_addr(net->dev_addr)) + eth_hw_addr_random(net); if ((dev->driver_info->flags & FLAG_WLAN) != 0) SET_NETDEV_DEVTYPE(net, &wlan_type); @@ -2211,7 +2207,6 @@ static int __init usbnet_init(void) BUILD_BUG_ON( sizeof_field(struct sk_buff, cb) < sizeof(struct skb_data)); - eth_random_addr(node_id); return 0; } module_init(usbnet_init); -- cgit From 3b3a2a9c6349e25a025d2330f479bc33a6ccb54a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 1 Sep 2024 11:16:07 -0700 Subject: sch/netem: fix use after free in netem_dequeue If netem_dequeue() enqueues packet to inner qdisc and that qdisc returns __NET_XMIT_STOLEN. The packet is dropped but qdisc_tree_reduce_backlog() is not called to update the parent's q.qlen, leading to the similar use-after-free as Commit e04991a48dbaf382 ("netem: fix return value if duplicate enqueue fails") Commands to trigger KASAN UaF: ip link add type dummy ip link set lo up ip link set dummy0 up tc qdisc add dev lo parent root handle 1: drr tc filter add dev lo parent 1: basic classid 1:1 tc class add dev lo classid 1:1 drr tc qdisc add dev lo parent 1:1 handle 2: netem tc qdisc add dev lo parent 2: handle 3: drr tc filter add dev lo parent 3: basic classid 3:1 action mirred egress redirect dev dummy0 tc class add dev lo classid 3:1 drr ping -c1 -W0.01 localhost # Trigger bug tc class del dev lo classid 1:1 tc class add dev lo classid 1:1 drr ping -c1 -W0.01 localhost # UaF Fixes: 50612537e9ab ("netem: fix classful handling") Reported-by: Budimir Markovic Signed-off-by: Stephen Hemminger Link: https://patch.msgid.link/20240901182438.4992-1-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 0f8d581438c3..39382ee1e331 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -742,11 +742,10 @@ deliver: err = qdisc_enqueue(skb, q->qdisc, &to_free); kfree_skb_list(to_free); - if (err != NET_XMIT_SUCCESS && - net_xmit_drop_count(err)) { - qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, - pkt_len); + if (err != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(err)) + qdisc_qstats_drop(sch); + qdisc_tree_reduce_backlog(sch, 1, pkt_len); } goto tfifo_dequeue; } -- cgit From 77461c10819103eaee7b33c744174b32a8c78b40 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 2 Sep 2024 03:17:30 -0700 Subject: net: dqs: Do not use extern for unused dql_group When CONFIG_DQL is not enabled, dql_group should be treated as a dead declaration. However, its current extern declaration assumes the linker will ignore it, which is generally true across most compiler and architecture combinations. But in certain cases, the linker still attempts to resolve the extern struct, even when the associated code is dead, resulting in a linking error. For instance the following error in loongarch64: >> loongarch64-linux-ld: net-sysfs.c:(.text+0x589c): undefined reference to `dql_group' Modify the declaration of the dead object to be an empty declaration instead of an extern. This change will prevent the linker from attempting to resolve an undefined reference. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202409012047.eCaOdfQJ-lkp@intel.com/ Fixes: 74293ea1c4db ("net: sysfs: Do not create sysfs for non BQL device") Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Tested-by: Simon Horman # build-tested Link: https://patch.msgid.link/20240902101734.3260455-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/core/net-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 444f23e74f8e..291fdf4a328b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1524,7 +1524,7 @@ static const struct attribute_group dql_group = { }; #else /* Fake declaration, all the code using it should be dead */ -extern const struct attribute_group dql_group; +static const struct attribute_group dql_group = {}; #endif /* CONFIG_BQL */ #ifdef CONFIG_XPS -- cgit From 33f339a1ba54e56bba57ee9a77c71e385ab4825c Mon Sep 17 00:00:00 2001 From: Tze-nan Wu Date: Fri, 30 Aug 2024 16:25:17 +0800 Subject: bpf, net: Fix a potential race in do_sock_getsockopt() There's a potential race when `cgroup_bpf_enabled(CGROUP_GETSOCKOPT)` is false during the execution of `BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN`, but becomes true when `BPF_CGROUP_RUN_PROG_GETSOCKOPT` is called. This inconsistency can lead to `BPF_CGROUP_RUN_PROG_GETSOCKOPT` receiving an "-EFAULT" from `__cgroup_bpf_run_filter_getsockopt(max_optlen=0)`. Scenario shown as below: `process A` `process B` ----------- ------------ BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN enable CGROUP_GETSOCKOPT BPF_CGROUP_RUN_PROG_GETSOCKOPT (-EFAULT) To resolve this, remove the `BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN` macro and directly uses `copy_from_sockptr` to ensure that `max_optlen` is always set before `BPF_CGROUP_RUN_PROG_GETSOCKOPT` is invoked. Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Co-developed-by: Yanghui Li Signed-off-by: Yanghui Li Co-developed-by: Cheng-Jui Wang Signed-off-by: Cheng-Jui Wang Signed-off-by: Tze-nan Wu Acked-by: Stanislav Fomichev Acked-by: Alexei Starovoitov Link: https://patch.msgid.link/20240830082518.23243-1-Tze-nan.Wu@mediatek.com Signed-off-by: Jakub Kicinski --- include/linux/bpf-cgroup.h | 9 --------- net/socket.c | 4 ++-- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index fb3c3e7181e6..ce91d9b2acb9 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -390,14 +390,6 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, __ret; \ }) -#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \ -({ \ - int __ret = 0; \ - if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \ - copy_from_sockptr(&__ret, optlen, sizeof(int)); \ - __ret; \ -}) - #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \ max_optlen, retval) \ ({ \ @@ -518,7 +510,6 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) -#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ optlen, max_optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ diff --git a/net/socket.c b/net/socket.c index fcbdd5bc47ac..0a2bd22ec105 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2362,7 +2362,7 @@ INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, int do_sock_getsockopt(struct socket *sock, bool compat, int level, int optname, sockptr_t optval, sockptr_t optlen) { - int max_optlen __maybe_unused; + int max_optlen __maybe_unused = 0; const struct proto_ops *ops; int err; @@ -2371,7 +2371,7 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level, return err; if (!compat) - max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen); + copy_from_sockptr(&max_optlen, optlen, sizeof(int)); ops = READ_ONCE(sock->ops); if (level == SOL_SOCKET) { -- cgit From cff56ff737e2da095ca9f228e2c5afc903bfca69 Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Tue, 3 Sep 2024 22:09:48 +0200 Subject: MAINTAINERS: update Andreas Hindborg's email address Move away from corporate infrastructure for upstream work. Also update mailmap. Signed-off-by: Andreas Hindborg Link: https://lore.kernel.org/r/20240903200956.68231-1-a.hindborg@kernel.org [ Reworded title slightly. - Miguel ] Signed-off-by: Miguel Ojeda --- .mailmap | 1 + MAINTAINERS | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index 8ee01d9d7046..8615526994f7 100644 --- a/.mailmap +++ b/.mailmap @@ -60,6 +60,7 @@ Amit Nischal Andi Kleen Andi Shyti Andreas Herrmann +Andreas Hindborg Andrej Shadura Andrej Shadura Andrew Morton diff --git a/MAINTAINERS b/MAINTAINERS index 682708bdb940..05ca3f940874 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3863,7 +3863,7 @@ F: kernel/trace/blktrace.c F: lib/sbitmap.c BLOCK LAYER DEVICE DRIVER API [RUST] -M: Andreas Hindborg +M: Andreas Hindborg R: Boqun Feng L: linux-block@vger.kernel.org L: rust-for-linux@vger.kernel.org @@ -19911,7 +19911,7 @@ R: Boqun Feng R: Gary Guo R: Björn Roy Baron R: Benno Lossin -R: Andreas Hindborg +R: Andreas Hindborg R: Alice Ryhl L: rust-for-linux@vger.kernel.org S: Supported -- cgit From 53f6619554fb1edf8d7599b560d44dbea085c730 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 1 Sep 2024 18:09:18 -0400 Subject: bcachefs: BCH_SB_MEMBER_INVALID Create a sentinal value for "invalid device". This is needed for removing devices that have stripes on them (force removing, without evacuating); we need a sentinal value for the stripe pointers to the device being removed. Signed-off-by: Kent Overstreet --- fs/bcachefs/replicas.c | 3 ++- fs/bcachefs/sb-members.c | 3 ++- fs/bcachefs/sb-members_format.h | 5 +++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 12b1d28b7eb4..12d4de65ae17 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -82,7 +82,8 @@ int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, } for (unsigned i = 0; i < r->nr_devs; i++) - if (!bch2_member_exists(sb, r->devs[i])) { + if (r->devs[i] != BCH_SB_MEMBER_INVALID && + !bch2_member_exists(sb, r->devs[i])) { prt_printf(err, "invalid device %u in entry ", r->devs[i]); goto bad; } diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 39196f2a4197..4b765422dd77 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -11,7 +11,8 @@ void bch2_dev_missing(struct bch_fs *c, unsigned dev) { - bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); + if (dev != BCH_SB_MEMBER_INVALID) + bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); } void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket) diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h index e2630548c0f6..d727d2dfda08 100644 --- a/fs/bcachefs/sb-members_format.h +++ b/fs/bcachefs/sb-members_format.h @@ -8,6 +8,11 @@ */ #define BCH_SB_MEMBERS_MAX 64 +/* + * Sentinal value - indicates a device that does not exist + */ +#define BCH_SB_MEMBER_INVALID 255 + #define BCH_MIN_NR_NBUCKETS (1 << 6) #define BCH_IOPS_MEASUREMENTS() \ -- cgit From b6ecc662037694488bfff7c9fd21c405df8411f2 Mon Sep 17 00:00:00 2001 From: Souradeep Chakrabarti Date: Mon, 2 Sep 2024 05:43:47 -0700 Subject: net: mana: Fix error handling in mana_create_txq/rxq's NAPI cleanup Currently napi_disable() gets called during rxq and txq cleanup, even before napi is enabled and hrtimer is initialized. It causes kernel panic. ? page_fault_oops+0x136/0x2b0 ? page_counter_cancel+0x2e/0x80 ? do_user_addr_fault+0x2f2/0x640 ? refill_obj_stock+0xc4/0x110 ? exc_page_fault+0x71/0x160 ? asm_exc_page_fault+0x27/0x30 ? __mmdrop+0x10/0x180 ? __mmdrop+0xec/0x180 ? hrtimer_active+0xd/0x50 hrtimer_try_to_cancel+0x2c/0xf0 hrtimer_cancel+0x15/0x30 napi_disable+0x65/0x90 mana_destroy_rxq+0x4c/0x2f0 mana_create_rxq.isra.0+0x56c/0x6d0 ? mana_uncfg_vport+0x50/0x50 mana_alloc_queues+0x21b/0x320 ? skb_dequeue+0x5f/0x80 Cc: stable@vger.kernel.org Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ") Signed-off-by: Souradeep Chakrabarti Reviewed-by: Haiyang Zhang Reviewed-by: Shradha Gupta Signed-off-by: David S. Miller --- drivers/net/ethernet/microsoft/mana/mana_en.c | 22 +++++++++++++--------- include/net/mana/mana.h | 2 ++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 39f56973746d..3d151700f658 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1872,10 +1872,12 @@ static void mana_destroy_txq(struct mana_port_context *apc) for (i = 0; i < apc->num_queues; i++) { napi = &apc->tx_qp[i].tx_cq.napi; - napi_synchronize(napi); - napi_disable(napi); - netif_napi_del(napi); - + if (apc->tx_qp[i].txq.napi_initialized) { + napi_synchronize(napi); + napi_disable(napi); + netif_napi_del(napi); + apc->tx_qp[i].txq.napi_initialized = false; + } mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); @@ -1931,6 +1933,7 @@ static int mana_create_txq(struct mana_port_context *apc, txq->ndev = net; txq->net_txq = netdev_get_tx_queue(net, i); txq->vp_offset = apc->tx_vp_offset; + txq->napi_initialized = false; skb_queue_head_init(&txq->pending_skbs); memset(&spec, 0, sizeof(spec)); @@ -1997,6 +2000,7 @@ static int mana_create_txq(struct mana_port_context *apc, netif_napi_add_tx(net, &cq->napi, mana_poll); napi_enable(&cq->napi); + txq->napi_initialized = true; mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); } @@ -2008,7 +2012,7 @@ out: } static void mana_destroy_rxq(struct mana_port_context *apc, - struct mana_rxq *rxq, bool validate_state) + struct mana_rxq *rxq, bool napi_initialized) { struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; @@ -2023,15 +2027,15 @@ static void mana_destroy_rxq(struct mana_port_context *apc, napi = &rxq->rx_cq.napi; - if (validate_state) + if (napi_initialized) { napi_synchronize(napi); - napi_disable(napi); + napi_disable(napi); + netif_napi_del(napi); + } xdp_rxq_info_unreg(&rxq->xdp_rxq); - netif_napi_del(napi); - mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); mana_deinit_cq(apc, &rxq->rx_cq); diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 7caa334f4888..b8a6c7504ee1 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -98,6 +98,8 @@ struct mana_txq { atomic_t pending_sends; + bool napi_initialized; + struct mana_stats_tx stats; }; -- cgit From 0d437918fb6473d25fb83188c2d6040f47acfbcd Mon Sep 17 00:00:00 2001 From: Yuntao Liu Date: Wed, 21 Aug 2024 07:34:41 +0100 Subject: ARM: 9414/1: Fix build issue with LD_DEAD_CODE_DATA_ELIMINATION There is a build issue with LD segmentation fault, while CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is not enabled, as bellow. scripts/link-vmlinux.sh: line 49: 3796 Segmentation fault (core dumped) ${ld} ${ldflags} -o ${output} ${wl}--whole-archive ${objs} ${wl}--no-whole-archive ${wl}--start-group ${libs} ${wl}--end-group ${kallsymso} ${btf_vmlinux_bin_o} ${ldlibs} The error occurs in older versions of the GNU ld with version earlier than 2.36. It makes most sense to have a minimum LD version as a dependency for HAVE_LD_DEAD_CODE_DATA_ELIMINATION and eliminate the impact of ".reloc .text, R_ARM_NONE, ." when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is not enabled. Fixes: ed0f94102251 ("ARM: 9404/1: arm32: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION") Reported-by: Harith George Tested-by: Harith George Suggested-by: Arnd Bergmann Acked-by: Arnd Bergmann Signed-off-by: Yuntao Liu Link: https://lore.kernel.org/all/14e9aefb-88d1-4eee-8288-ef15d4a9b059@gmail.com/ Signed-off-by: Russell King (Oracle) --- arch/arm/Kconfig | 2 +- arch/arm/kernel/entry-armv.S | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 54b2bb817a7f..173159e93c99 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -117,7 +117,7 @@ config ARM select HAVE_KERNEL_XZ select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M select HAVE_KRETPROBES if HAVE_KPROBES - select HAVE_LD_DEAD_CODE_DATA_ELIMINATION + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if (LD_VERSION >= 23600 || LD_IS_LLD) select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_OPTPROBES if !THUMB2_KERNEL diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index f01d23a220e6..1dfae1af8e31 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -29,6 +29,12 @@ #include "entry-header.S" #include +#ifdef CONFIG_HAVE_LD_DEAD_CODE_DATA_ELIMINATION +#define RELOC_TEXT_NONE .reloc .text, R_ARM_NONE, . +#else +#define RELOC_TEXT_NONE +#endif + /* * Interrupt handling. */ @@ -1065,7 +1071,7 @@ vector_addrexcptn: .globl vector_fiq .section .vectors, "ax", %progbits - .reloc .text, R_ARM_NONE, . + RELOC_TEXT_NONE W(b) vector_rst W(b) vector_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_swi ) @@ -1079,7 +1085,7 @@ THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_swi ) #ifdef CONFIG_HARDEN_BRANCH_HISTORY .section .vectors.bhb.loop8, "ax", %progbits - .reloc .text, R_ARM_NONE, . + RELOC_TEXT_NONE W(b) vector_rst W(b) vector_bhb_loop8_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_loop8_swi ) @@ -1092,7 +1098,7 @@ THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_loop8_swi ) W(b) vector_bhb_loop8_fiq .section .vectors.bhb.bpiall, "ax", %progbits - .reloc .text, R_ARM_NONE, . + RELOC_TEXT_NONE W(b) vector_rst W(b) vector_bhb_bpiall_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_bpiall_swi ) -- cgit From bb6705c3f93bed2af03d43691743d4c43e3c8e6f Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Sat, 31 Aug 2024 14:47:02 +0900 Subject: bpf: add check for invalid name in btf_name_valid_section() If the length of the name string is 1 and the value of name[0] is NULL byte, an OOB vulnerability occurs in btf_name_valid_section() and the return value is true, so the invalid name passes the check. To solve this, you need to check if the first position is NULL byte and if the first character is printable. Suggested-by: Eduard Zingerman Fixes: bd70a8fb7ca4 ("bpf: Allow all printable characters in BTF DATASEC names") Signed-off-by: Jeongjun Park Link: https://lore.kernel.org/r/20240831054702.364455-1-aha310510@gmail.com Signed-off-by: Alexei Starovoitov Acked-by: Eduard Zingerman --- kernel/bpf/btf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index e3377dd61f7e..a4e4f8d43ecf 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -823,9 +823,11 @@ static bool btf_name_valid_section(const struct btf *btf, u32 offset) const char *src = btf_str_by_offset(btf, offset); const char *src_limit; + if (!*src) + return false; + /* set a limit on identifier length */ src_limit = src + KSYM_NAME_LEN; - src++; while (*src && src < src_limit) { if (!isprint(*src)) return false; -- cgit From 743070894724bf5ee0b2c77a28f838f6244d19bd Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Sat, 31 Aug 2024 14:47:42 +0900 Subject: selftests/bpf: Add a selftest to check for incorrect names Add selftest for cases where btf_name_valid_section() does not properly check for certain types of names. Suggested-by: Eduard Zingerman Signed-off-by: Jeongjun Park Link: https://lore.kernel.org/r/20240831054742.364585-1-aha310510@gmail.com Signed-off-by: Alexei Starovoitov Acked-by: Eduard Zingerman --- tools/testing/selftests/bpf/prog_tests/btf.c | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 00965a6e83bb..61de88cf4ad0 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3550,6 +3550,40 @@ static struct btf_raw_test raw_tests[] = { }, BTF_STR_SEC("\0x\0?.foo bar:buz"), }, +{ + .descr = "datasec: name with non-printable first char not is ok", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC ?.data */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0x\0\7foo"), + .err_str = "Invalid name", + .btf_load_err = true, +}, +{ + .descr = "datasec: name '\\0' is not ok", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC \0 */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0x\0"), + .err_str = "Invalid name", + .btf_load_err = true, +}, { .descr = "type name '?foo' is not ok", .raw_types = { -- cgit From 4963d2343af81f493519f9c3ea9f2169eaa7353a Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 30 Aug 2024 17:31:07 +0200 Subject: bareudp: Fix device stats updates. Bareudp devices update their stats concurrently. Therefore they need proper atomic increments. Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Signed-off-by: Guillaume Nault Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/04b7b9d0b480158eb3ab4366ec80aa2ab7e41fcb.1725031794.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index d5c56ca91b77..7aca0544fb29 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -83,7 +83,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (skb_copy_bits(skb, BAREUDP_BASE_HLEN, &ipversion, sizeof(ipversion))) { - bareudp->dev->stats.rx_dropped++; + DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } ipversion >>= 4; @@ -93,7 +93,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) } else if (ipversion == 6 && bareudp->multi_proto_mode) { proto = htons(ETH_P_IPV6); } else { - bareudp->dev->stats.rx_dropped++; + DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } } else if (bareudp->ethertype == htons(ETH_P_MPLS_UC)) { @@ -107,7 +107,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) ipv4_is_multicast(tunnel_hdr->daddr)) { proto = htons(ETH_P_MPLS_MC); } else { - bareudp->dev->stats.rx_dropped++; + DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } } else { @@ -123,7 +123,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) (addr_type & IPV6_ADDR_MULTICAST)) { proto = htons(ETH_P_MPLS_MC); } else { - bareudp->dev->stats.rx_dropped++; + DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } } @@ -135,7 +135,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) proto, !net_eq(bareudp->net, dev_net(bareudp->dev)))) { - bareudp->dev->stats.rx_dropped++; + DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } @@ -143,7 +143,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) tun_dst = udp_tun_rx_dst(skb, family, key, 0, 0); if (!tun_dst) { - bareudp->dev->stats.rx_dropped++; + DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } skb_dst_set(skb, &tun_dst->dst); @@ -169,8 +169,8 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) &((struct ipv6hdr *)oiph)->saddr); } if (err > 1) { - ++bareudp->dev->stats.rx_frame_errors; - ++bareudp->dev->stats.rx_errors; + DEV_STATS_INC(bareudp->dev, rx_frame_errors); + DEV_STATS_INC(bareudp->dev, rx_errors); goto drop; } } @@ -467,11 +467,11 @@ tx_error: dev_kfree_skb(skb); if (err == -ELOOP) - dev->stats.collisions++; + DEV_STATS_INC(dev, collisions); else if (err == -ENETUNREACH) - dev->stats.tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); - dev->stats.tx_errors++; + DEV_STATS_INC(dev, tx_errors); return NETDEV_TX_OK; } -- cgit From 7e4196935069947d8b70b09c1660b67b067e75cb Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 2 Sep 2024 10:39:27 -0700 Subject: fou: Fix null-ptr-deref in GRO. We observed a null-ptr-deref in fou_gro_receive() while shutting down a host. [0] The NULL pointer is sk->sk_user_data, and the offset 8 is of protocol in struct fou. When fou_release() is called due to netns dismantle or explicit tunnel teardown, udp_tunnel_sock_release() sets NULL to sk->sk_user_data. Then, the tunnel socket is destroyed after a single RCU grace period. So, in-flight udp4_gro_receive() could find the socket and execute the FOU GRO handler, where sk->sk_user_data could be NULL. Let's use rcu_dereference_sk_user_data() in fou_from_sock() and add NULL checks in FOU GRO handlers. [0]: BUG: kernel NULL pointer dereference, address: 0000000000000008 PF: supervisor read access in kernel mode PF: error_code(0x0000) - not-present page PGD 80000001032f4067 P4D 80000001032f4067 PUD 103240067 PMD 0 SMP PTI CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.10.216-204.855.amzn2.x86_64 #1 Hardware name: Amazon EC2 c5.large/, BIOS 1.0 10/16/2017 RIP: 0010:fou_gro_receive (net/ipv4/fou.c:233) [fou] Code: 41 5f c3 cc cc cc cc e8 e7 2e 69 f4 0f 1f 80 00 00 00 00 0f 1f 44 00 00 49 89 f8 41 54 48 89 f7 48 89 d6 49 8b 80 88 02 00 00 <0f> b6 48 08 0f b7 42 4a 66 25 fd fd 80 cc 02 66 89 42 4a 0f b6 42 RSP: 0018:ffffa330c0003d08 EFLAGS: 00010297 RAX: 0000000000000000 RBX: ffff93d9e3a6b900 RCX: 0000000000000010 RDX: ffff93d9e3a6b900 RSI: ffff93d9e3a6b900 RDI: ffff93dac2e24d08 RBP: ffff93d9e3a6b900 R08: ffff93dacbce6400 R09: 0000000000000002 R10: 0000000000000000 R11: ffffffffb5f369b0 R12: ffff93dacbce6400 R13: ffff93dac2e24d08 R14: 0000000000000000 R15: ffffffffb4edd1c0 FS: 0000000000000000(0000) GS:ffff93daee800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000102140001 CR4: 00000000007706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? show_trace_log_lvl (arch/x86/kernel/dumpstack.c:259) ? __die_body.cold (arch/x86/kernel/dumpstack.c:478 arch/x86/kernel/dumpstack.c:420) ? no_context (arch/x86/mm/fault.c:752) ? exc_page_fault (arch/x86/include/asm/irqflags.h:49 arch/x86/include/asm/irqflags.h:89 arch/x86/mm/fault.c:1435 arch/x86/mm/fault.c:1483) ? asm_exc_page_fault (arch/x86/include/asm/idtentry.h:571) ? fou_gro_receive (net/ipv4/fou.c:233) [fou] udp_gro_receive (include/linux/netdevice.h:2552 net/ipv4/udp_offload.c:559) udp4_gro_receive (net/ipv4/udp_offload.c:604) inet_gro_receive (net/ipv4/af_inet.c:1549 (discriminator 7)) dev_gro_receive (net/core/dev.c:6035 (discriminator 4)) napi_gro_receive (net/core/dev.c:6170) ena_clean_rx_irq (drivers/amazon/net/ena/ena_netdev.c:1558) [ena] ena_io_poll (drivers/amazon/net/ena/ena_netdev.c:1742) [ena] napi_poll (net/core/dev.c:6847) net_rx_action (net/core/dev.c:6917) __do_softirq (arch/x86/include/asm/jump_label.h:25 include/linux/jump_label.h:200 include/trace/events/irq.h:142 kernel/softirq.c:299) asm_call_irq_on_stack (arch/x86/entry/entry_64.S:809) do_softirq_own_stack (arch/x86/include/asm/irq_stack.h:27 arch/x86/include/asm/irq_stack.h:77 arch/x86/kernel/irq_64.c:77) irq_exit_rcu (kernel/softirq.c:393 kernel/softirq.c:423 kernel/softirq.c:435) common_interrupt (arch/x86/kernel/irq.c:239) asm_common_interrupt (arch/x86/include/asm/idtentry.h:626) RIP: 0010:acpi_idle_do_entry (arch/x86/include/asm/irqflags.h:49 arch/x86/include/asm/irqflags.h:89 drivers/acpi/processor_idle.c:114 drivers/acpi/processor_idle.c:575) Code: 8b 15 d1 3c c4 02 ed c3 cc cc cc cc 65 48 8b 04 25 40 ef 01 00 48 8b 00 a8 08 75 eb 0f 1f 44 00 00 0f 00 2d d5 09 55 00 fb f4 c3 cc cc cc cc e9 be fc ff ff 66 66 2e 0f 1f 84 00 00 00 00 00 RSP: 0018:ffffffffb5603e58 EFLAGS: 00000246 RAX: 0000000000004000 RBX: ffff93dac0929c00 RCX: ffff93daee833900 RDX: ffff93daee800000 RSI: ffff93daee87dc00 RDI: ffff93daee87dc64 RBP: 0000000000000001 R08: ffffffffb5e7b6c0 R09: 0000000000000044 R10: ffff93daee831b04 R11: 00000000000001cd R12: 0000000000000001 R13: ffffffffb5e7b740 R14: 0000000000000001 R15: 0000000000000000 ? sched_clock_cpu (kernel/sched/clock.c:371) acpi_idle_enter (drivers/acpi/processor_idle.c:712 (discriminator 3)) cpuidle_enter_state (drivers/cpuidle/cpuidle.c:237) cpuidle_enter (drivers/cpuidle/cpuidle.c:353) cpuidle_idle_call (kernel/sched/idle.c:158 kernel/sched/idle.c:239) do_idle (kernel/sched/idle.c:302) cpu_startup_entry (kernel/sched/idle.c:395 (discriminator 1)) start_kernel (init/main.c:1048) secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:310) Modules linked in: udp_diag tcp_diag inet_diag nft_nat ipip tunnel4 dummy fou ip_tunnel nft_masq nft_chain_nat nf_nat wireguard nft_ct curve25519_x86_64 libcurve25519_generic nf_conntrack libchacha20poly1305 nf_defrag_ipv6 nf_defrag_ipv4 nft_objref chacha_x86_64 nft_counter nf_tables nfnetlink poly1305_x86_64 ip6_udp_tunnel udp_tunnel libchacha crc32_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper mousedev psmouse button ena ptp pps_core crc32c_intel CR2: 0000000000000008 Fixes: d92283e338f6 ("fou: change to use UDP socket GRO") Reported-by: Alphonse Kurian Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20240902173927.62706-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/fou_core.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c index 0abbc413e0fe..78b869b31492 100644 --- a/net/ipv4/fou_core.c +++ b/net/ipv4/fou_core.c @@ -50,7 +50,7 @@ struct fou_net { static inline struct fou *fou_from_sock(struct sock *sk) { - return sk->sk_user_data; + return rcu_dereference_sk_user_data(sk); } static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len) @@ -233,9 +233,15 @@ static struct sk_buff *fou_gro_receive(struct sock *sk, struct sk_buff *skb) { const struct net_offload __rcu **offloads; - u8 proto = fou_from_sock(sk)->protocol; + struct fou *fou = fou_from_sock(sk); const struct net_offload *ops; struct sk_buff *pp = NULL; + u8 proto; + + if (!fou) + goto out; + + proto = fou->protocol; /* We can clear the encap_mark for FOU as we are essentially doing * one of two possible things. We are either adding an L4 tunnel @@ -263,14 +269,24 @@ static int fou_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) { const struct net_offload __rcu **offloads; - u8 proto = fou_from_sock(sk)->protocol; + struct fou *fou = fou_from_sock(sk); const struct net_offload *ops; - int err = -ENOSYS; + u8 proto; + int err; + + if (!fou) { + err = -ENOENT; + goto out; + } + + proto = fou->protocol; offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); - if (WARN_ON(!ops || !ops->callbacks.gro_complete)) + if (WARN_ON(!ops || !ops->callbacks.gro_complete)) { + err = -ENOSYS; goto out; + } err = ops->callbacks.gro_complete(skb, nhoff); @@ -320,6 +336,9 @@ static struct sk_buff *gue_gro_receive(struct sock *sk, struct gro_remcsum grc; u8 proto; + if (!fou) + goto out; + skb_gro_remcsum_init(&grc); off = skb_gro_offset(skb); -- cgit From 8487b4af59d4d7feda4b119dc2d92c67ca25c27e Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Tue, 3 Sep 2024 14:33:33 +0800 Subject: r8152: fix the firmware doesn't work generic_ocp_write() asks the parameter "size" must be 4 bytes align. Therefore, write the bp would fail, if the mac->bp_num is odd. Align the size to 4 for fixing it. The way may write an extra bp, but the rtl8152_is_fw_mac_ok() makes sure the value must be 0 for the bp whose index is more than mac->bp_num. That is, there is no influence for the firmware. Besides, I check the return value of generic_ocp_write() to make sure everything is correct. Fixes: e5c266a61186 ("r8152: set bp in bulk") Signed-off-by: Hayes Wang Link: https://patch.msgid.link/20240903063333.4502-1-hayeswang@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 15e12f46d0ea..a5612c799f5e 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5178,14 +5178,23 @@ static void rtl8152_fw_mac_apply(struct r8152 *tp, struct fw_mac *mac) data = (u8 *)mac; data += __le16_to_cpu(mac->fw_offset); - generic_ocp_write(tp, __le16_to_cpu(mac->fw_reg), 0xff, length, data, - type); + if (generic_ocp_write(tp, __le16_to_cpu(mac->fw_reg), 0xff, length, + data, type) < 0) { + dev_err(&tp->intf->dev, "Write %s fw fail\n", + type ? "PLA" : "USB"); + return; + } ocp_write_word(tp, type, __le16_to_cpu(mac->bp_ba_addr), __le16_to_cpu(mac->bp_ba_value)); - generic_ocp_write(tp, __le16_to_cpu(mac->bp_start), BYTE_EN_DWORD, - __le16_to_cpu(mac->bp_num) << 1, mac->bp, type); + if (generic_ocp_write(tp, __le16_to_cpu(mac->bp_start), BYTE_EN_DWORD, + ALIGN(__le16_to_cpu(mac->bp_num) << 1, 4), + mac->bp, type) < 0) { + dev_err(&tp->intf->dev, "Write %s bp fail\n", + type ? "PLA" : "USB"); + return; + } bp_en_addr = __le16_to_cpu(mac->bp_en_addr); if (bp_en_addr) -- cgit From bee2ef946d3184e99077be526567d791c473036f Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 3 Sep 2024 10:19:57 +0200 Subject: net: bridge: br_fdb_external_learn_add(): always set EXT_LEARN When userspace wants to take over a fdb entry by setting it as EXTERN_LEARNED, we set both flags BR_FDB_ADDED_BY_EXT_LEARN and BR_FDB_ADDED_BY_USER in br_fdb_external_learn_add(). If the bridge updates the entry later because its port changed, we clear the BR_FDB_ADDED_BY_EXT_LEARN flag, but leave the BR_FDB_ADDED_BY_USER flag set. If userspace then wants to take over the entry again, br_fdb_external_learn_add() sees that BR_FDB_ADDED_BY_USER and skips setting the BR_FDB_ADDED_BY_EXT_LEARN flags, thus silently ignores the update. Fix this by always allowing to set BR_FDB_ADDED_BY_EXT_LEARN regardless if this was a user fdb entry or not. Fixes: 710ae7287737 ("net: bridge: Mark FDB entries that were added by user as such") Signed-off-by: Jonas Gorski Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20240903081958.29951-1-jonas.gorski@bisdn.de Signed-off-by: Jakub Kicinski --- net/bridge/br_fdb.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index c77591e63841..ad7a42b505ef 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -1469,12 +1469,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, modified = true; } - if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) { + if (test_and_set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) { /* Refresh entry */ fdb->used = jiffies; - } else if (!test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags)) { - /* Take over SW learned entry */ - set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags); + } else { modified = true; } -- cgit From 858430db28a5f5a11f8faa3a6fa805438e6f0851 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Tue, 3 Sep 2024 13:51:41 -0400 Subject: net: xilinx: axienet: Fix race in axienet_stop axienet_dma_err_handler can race with axienet_stop in the following manner: CPU 1 CPU 2 ====================== ================== axienet_stop() napi_disable() axienet_dma_stop() axienet_dma_err_handler() napi_disable() axienet_dma_stop() axienet_dma_start() napi_enable() cancel_work_sync() free_irq() Fix this by setting a flag in axienet_stop telling axienet_dma_err_handler not to bother doing anything. I chose not to use disable_work_sync to allow for easier backporting. Signed-off-by: Sean Anderson Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Link: https://patch.msgid.link/20240903175141.4132898-1-sean.anderson@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet.h | 3 +++ drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 09c9f9787180..1223fcc1a8da 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -436,6 +436,8 @@ struct skbuf_dma_descriptor { * @tx_bytes: TX byte count for statistics * @tx_stat_sync: Synchronization object for TX stats * @dma_err_task: Work structure to process Axi DMA errors + * @stopping: Set when @dma_err_task shouldn't do anything because we are + * about to stop the device. * @tx_irq: Axidma TX IRQ number * @rx_irq: Axidma RX IRQ number * @eth_irq: Ethernet core IRQ number @@ -507,6 +509,7 @@ struct axienet_local { struct u64_stats_sync tx_stat_sync; struct work_struct dma_err_task; + bool stopping; int tx_irq; int rx_irq; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 9aeb7b9f3ae4..9eb300fc3590 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1460,6 +1460,7 @@ static int axienet_init_legacy_dma(struct net_device *ndev) struct axienet_local *lp = netdev_priv(ndev); /* Enable worker thread for Axi DMA error handling */ + lp->stopping = false; INIT_WORK(&lp->dma_err_task, axienet_dma_err_handler); napi_enable(&lp->napi_rx); @@ -1580,6 +1581,9 @@ static int axienet_stop(struct net_device *ndev) dev_dbg(&ndev->dev, "axienet_close()\n"); if (!lp->use_dmaengine) { + WRITE_ONCE(lp->stopping, true); + flush_work(&lp->dma_err_task); + napi_disable(&lp->napi_tx); napi_disable(&lp->napi_rx); } @@ -2154,6 +2158,10 @@ static void axienet_dma_err_handler(struct work_struct *work) dma_err_task); struct net_device *ndev = lp->ndev; + /* Don't bother if we are going to stop anyway */ + if (READ_ONCE(lp->stopping)) + return; + napi_disable(&lp->napi_tx); napi_disable(&lp->napi_rx); -- cgit From c82299fbbccecf5866bdc3fa9cc46d5c6f5005ad Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 30 Aug 2024 10:14:42 -0700 Subject: docs: netdev: document guidance on cleanup.h Document what was discussed multiple times on list and various virtual / in-person conversations. guard() being okay in functions <= 20 LoC is a bit of my own invention. If the function is trivial it should be fine, but feel free to disagree :) We'll obviously revisit this guidance as time passes and we and other subsystems get more experience. Reviewed-by: Eric Dumazet Reviewed-by: Nikolay Aleksandrov Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20240830171443.3532077-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- Documentation/process/maintainer-netdev.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 30d24eecdaaa..c9edf9e7362d 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -375,6 +375,22 @@ When working in existing code which uses nonstandard formatting make your code follow the most recent guidelines, so that eventually all code in the domain of netdev is in the preferred format. +Using device-managed and cleanup.h constructs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Netdev remains skeptical about promises of all "auto-cleanup" APIs, +including even ``devm_`` helpers, historically. They are not the preferred +style of implementation, merely an acceptable one. + +Use of ``guard()`` is discouraged within any function longer than 20 lines, +``scoped_guard()`` is considered more readable. Using normal lock/unlock is +still (weakly) preferred. + +Low level cleanup constructs (such as ``__free()``) can be used when building +APIs and helpers, especially scoped iterators. However, direct use of +``__free()`` within networking core and drivers is discouraged. +Similar guidance applies to declaring variables mid-function. + Resending after review ~~~~~~~~~~~~~~~~~~~~~~ -- cgit From 546ea84d07e3e324644025e2aae2d12ea4c5896e Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 3 Sep 2024 18:08:45 +0200 Subject: sched: sch_cake: fix bulk flow accounting logic for host fairness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In sch_cake, we keep track of the count of active bulk flows per host, when running in dst/src host fairness mode, which is used as the round-robin weight when iterating through flows. The count of active bulk flows is updated whenever a flow changes state. This has a peculiar interaction with the hash collision handling: when a hash collision occurs (after the set-associative hashing), the state of the hash bucket is simply updated to match the new packet that collided, and if host fairness is enabled, that also means assigning new per-host state to the flow. For this reason, the bulk flow counters of the host(s) assigned to the flow are decremented, before new state is assigned (and the counters, which may not belong to the same host anymore, are incremented again). Back when this code was introduced, the host fairness mode was always enabled, so the decrement was unconditional. When the configuration flags were introduced the *increment* was made conditional, but the *decrement* was not. Which of course can lead to a spurious decrement (and associated wrap-around to U16_MAX). AFAICT, when host fairness is disabled, the decrement and wrap-around happens as soon as a hash collision occurs (which is not that common in itself, due to the set-associative hashing). However, in most cases this is harmless, as the value is only used when host fairness mode is enabled. So in order to trigger an array overflow, sch_cake has to first be configured with host fairness disabled, and while running in this mode, a hash collision has to occur to cause the overflow. Then, the qdisc has to be reconfigured to enable host fairness, which leads to the array out-of-bounds because the wrapped-around value is retained and used as an array index. It seems that syzbot managed to trigger this, which is quite impressive in its own right. This patch fixes the issue by introducing the same conditional check on decrement as is used on increment. The original bug predates the upstreaming of cake, but the commit listed in the Fixes tag touched that code, meaning that this patch won't apply before that. Fixes: 712639929912 ("sch_cake: Make the dual modes fairer") Reported-by: syzbot+7fe7b81d602cc1e6b94d@syzkaller.appspotmail.com Signed-off-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20240903160846.20909-1-toke@redhat.com Signed-off-by: Paolo Abeni --- net/sched/sch_cake.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 9602dafe32e6..d2f49db70523 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -786,12 +786,15 @@ skip_hash: * queue, accept the collision, update the host tags. */ q->way_collisions++; - if (q->flows[outer_hash + k].set == CAKE_SET_BULK) { - q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--; - q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--; - } allocate_src = cake_dsrc(flow_mode); allocate_dst = cake_ddst(flow_mode); + + if (q->flows[outer_hash + k].set == CAKE_SET_BULK) { + if (allocate_src) + q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--; + if (allocate_dst) + q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--; + } found: /* reserve queue for future packets in same flow */ reduced_hash = outer_hash + k; -- cgit From 8e69c96df771ab469cec278edb47009351de4da6 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Tue, 3 Sep 2024 22:33:41 +0200 Subject: net: dsa: vsc73xx: fix possible subblocks range of CAPT block CAPT block (CPU Capture Buffer) have 7 sublocks: 0-3, 4, 6, 7. Function 'vsc73xx_is_addr_valid' allows to use only block 0 at this moment. This patch fix it. Fixes: 05bd97fc559d ("net: dsa: Add Vitesse VSC73xx DSA router driver") Signed-off-by: Pawel Dembicki Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20240903203340.1518789-1-paweldembicki@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/dsa/vitesse-vsc73xx-core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index e3f95d2cc2c1..212421e9d42e 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -36,7 +36,7 @@ #define VSC73XX_BLOCK_ANALYZER 0x2 /* Only subblock 0 */ #define VSC73XX_BLOCK_MII 0x3 /* Subblocks 0 and 1 */ #define VSC73XX_BLOCK_MEMINIT 0x3 /* Only subblock 2 */ -#define VSC73XX_BLOCK_CAPTURE 0x4 /* Only subblock 2 */ +#define VSC73XX_BLOCK_CAPTURE 0x4 /* Subblocks 0-4, 6, 7 */ #define VSC73XX_BLOCK_ARBITER 0x5 /* Only subblock 0 */ #define VSC73XX_BLOCK_SYSTEM 0x7 /* Only subblock 0 */ @@ -410,13 +410,19 @@ int vsc73xx_is_addr_valid(u8 block, u8 subblock) break; case VSC73XX_BLOCK_MII: - case VSC73XX_BLOCK_CAPTURE: case VSC73XX_BLOCK_ARBITER: switch (subblock) { case 0 ... 1: return 1; } break; + case VSC73XX_BLOCK_CAPTURE: + switch (subblock) { + case 0 ... 4: + case 6 ... 7: + return 1; + } + break; } return 0; -- cgit From ff949d981c775332be94be70397ee1df20bc68e5 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 5 Sep 2024 13:15:37 +0200 Subject: spi: spi-fsl-lpspi: Fix off-by-one in prescale max The commit 783bf5d09f86 ("spi: spi-fsl-lpspi: limit PRESCALE bit in TCR register") doesn't implement the prescaler maximum as intended. The maximum allowed value for i.MX93 should be 1 and for i.MX7ULP it should be 7. So this needs also a adjustment of the comparison in the scldiv calculation. Fixes: 783bf5d09f86 ("spi: spi-fsl-lpspi: limit PRESCALE bit in TCR register") Signed-off-by: Stefan Wahren Link: https://patch.msgid.link/20240905111537.90389-1-wahrenst@gmx.net Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 350c5d91d869..8ecb426be45c 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -136,7 +136,7 @@ static struct fsl_lpspi_devtype_data imx93_lpspi_devtype_data = { }; static struct fsl_lpspi_devtype_data imx7ulp_lpspi_devtype_data = { - .prescale_max = 8, + .prescale_max = 7, }; static const struct of_device_id fsl_lpspi_dt_ids[] = { @@ -336,7 +336,7 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) div = DIV_ROUND_UP(perclk_rate, config.speed_hz); - for (prescale = 0; prescale < prescale_max; prescale++) { + for (prescale = 0; prescale <= prescale_max; prescale++) { scldiv = div / (1 << prescale) - 2; if (scldiv < 256) { fsl_lpspi->config.prescale = prescale; -- cgit From 49aa8a1f4d6800721c7971ed383078257f12e8f9 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Tue, 27 Aug 2024 20:46:54 +0800 Subject: tracing: Avoid possible softlockup in tracing_iter_reset() In __tracing_open(), when max latency tracers took place on the cpu, the time start of its buffer would be updated, then event entries with timestamps being earlier than start of the buffer would be skipped (see tracing_iter_reset()). Softlockup will occur if the kernel is non-preemptible and too many entries were skipped in the loop that reset every cpu buffer, so add cond_resched() to avoid it. Cc: stable@vger.kernel.org Fixes: 2f26ebd549b9a ("tracing: use timestamp to determine start of latency traces") Link: https://lore.kernel.org/20240827124654.3817443-1-zhengyejian@huaweicloud.com Suggested-by: Steven Rostedt Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ebe7ce2f5f4a..edf6bc817aa1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3958,6 +3958,8 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) break; entries++; ring_buffer_iter_advance(buf_iter); + /* This could be a big loop */ + cond_resched(); } per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; -- cgit From d2603279c7d645bf0d11fa253b23f1ab48fc8d3c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Sep 2024 13:16:05 -0400 Subject: eventfs: Use list_del_rcu() for SRCU protected list variable Chi Zhiling reported: We found a null pointer accessing in tracefs[1], the reason is that the variable 'ei_child' is set to LIST_POISON1, that means the list was removed in eventfs_remove_rec. so when access the ei_child->is_freed, the panic triggered. by the way, the following script can reproduce this panic loop1 (){ while true do echo "p:kp submit_bio" > /sys/kernel/debug/tracing/kprobe_events echo "" > /sys/kernel/debug/tracing/kprobe_events done } loop2 (){ while true do tree /sys/kernel/debug/tracing/events/kprobes/ done } loop1 & loop2 [1]: [ 1147.959632][T17331] Unable to handle kernel paging request at virtual address dead000000000150 [ 1147.968239][T17331] Mem abort info: [ 1147.971739][T17331] ESR = 0x0000000096000004 [ 1147.976172][T17331] EC = 0x25: DABT (current EL), IL = 32 bits [ 1147.982171][T17331] SET = 0, FnV = 0 [ 1147.985906][T17331] EA = 0, S1PTW = 0 [ 1147.989734][T17331] FSC = 0x04: level 0 translation fault [ 1147.995292][T17331] Data abort info: [ 1147.998858][T17331] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 [ 1148.005023][T17331] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 1148.010759][T17331] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 1148.016752][T17331] [dead000000000150] address between user and kernel address ranges [ 1148.024571][T17331] Internal error: Oops: 0000000096000004 [#1] SMP [ 1148.030825][T17331] Modules linked in: team_mode_loadbalance team nlmon act_gact cls_flower sch_ingress bonding tls macvlan dummy ib_core bridge stp llc veth amdgpu amdxcp mfd_core gpu_sched drm_exec drm_buddy radeon crct10dif_ce video drm_suballoc_helper ghash_ce drm_ttm_helper sha2_ce ttm sha256_arm64 i2c_algo_bit sha1_ce sbsa_gwdt cp210x drm_display_helper cec sr_mod cdrom drm_kms_helper binfmt_misc sg loop fuse drm dm_mod nfnetlink ip_tables autofs4 [last unloaded: tls] [ 1148.072808][T17331] CPU: 3 PID: 17331 Comm: ls Tainted: G W ------- ---- 6.6.43 #2 [ 1148.081751][T17331] Source Version: 21b3b386e948bedd29369af66f3e98ab01b1c650 [ 1148.088783][T17331] Hardware name: Greatwall GW-001M1A-FTF/GW-001M1A-FTF, BIOS KunLun BIOS V4.0 07/16/2020 [ 1148.098419][T17331] pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 1148.106060][T17331] pc : eventfs_iterate+0x2c0/0x398 [ 1148.111017][T17331] lr : eventfs_iterate+0x2fc/0x398 [ 1148.115969][T17331] sp : ffff80008d56bbd0 [ 1148.119964][T17331] x29: ffff80008d56bbf0 x28: ffff001ff5be2600 x27: 0000000000000000 [ 1148.127781][T17331] x26: ffff001ff52ca4e0 x25: 0000000000009977 x24: dead000000000100 [ 1148.135598][T17331] x23: 0000000000000000 x22: 000000000000000b x21: ffff800082645f10 [ 1148.143415][T17331] x20: ffff001fddf87c70 x19: ffff80008d56bc90 x18: 0000000000000000 [ 1148.151231][T17331] x17: 0000000000000000 x16: 0000000000000000 x15: ffff001ff52ca4e0 [ 1148.159048][T17331] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 [ 1148.166864][T17331] x11: 0000000000000000 x10: 0000000000000000 x9 : ffff8000804391d0 [ 1148.174680][T17331] x8 : 0000000180000000 x7 : 0000000000000018 x6 : 0000aaab04b92862 [ 1148.182498][T17331] x5 : 0000aaab04b92862 x4 : 0000000080000000 x3 : 0000000000000068 [ 1148.190314][T17331] x2 : 000000000000000f x1 : 0000000000007ea8 x0 : 0000000000000001 [ 1148.198131][T17331] Call trace: [ 1148.201259][T17331] eventfs_iterate+0x2c0/0x398 [ 1148.205864][T17331] iterate_dir+0x98/0x188 [ 1148.210036][T17331] __arm64_sys_getdents64+0x78/0x160 [ 1148.215161][T17331] invoke_syscall+0x78/0x108 [ 1148.219593][T17331] el0_svc_common.constprop.0+0x48/0xf0 [ 1148.224977][T17331] do_el0_svc+0x24/0x38 [ 1148.228974][T17331] el0_svc+0x40/0x168 [ 1148.232798][T17331] el0t_64_sync_handler+0x120/0x130 [ 1148.237836][T17331] el0t_64_sync+0x1a4/0x1a8 [ 1148.242182][T17331] Code: 54ffff6c f9400676 910006d6 f9000676 (b9405300) [ 1148.248955][T17331] ---[ end trace 0000000000000000 ]--- The issue is that list_del() is used on an SRCU protected list variable before the synchronization occurs. This can poison the list pointers while there is a reader iterating the list. This is simply fixed by using list_del_rcu() that is specifically made for this purpose. Link: https://lore.kernel.org/linux-trace-kernel/20240829085025.3600021-1-chizhiling@163.com/ Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20240904131605.640d42b1@gandalf.local.home Fixes: 43aa6f97c2d03 ("eventfs: Get rid of dentry pointers without refcounts") Reported-by: Chi Zhiling Tested-by: Chi Zhiling Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 01e99e98457d..8705c77a9e75 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -862,7 +862,7 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level) list_for_each_entry(ei_child, &ei->children, list) eventfs_remove_rec(ei_child, level + 1); - list_del(&ei->list); + list_del_rcu(&ei->list); free_ei(ei); } -- cgit From 177e1cc2f41235c145041eed03ef5bab18f32328 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Sep 2024 10:34:28 -0400 Subject: tracing/osnoise: Use a cpumask to know what threads are kthreads The start_kthread() and stop_thread() code was not always called with the interface_lock held. This means that the kthread variable could be unexpectedly changed causing the kthread_stop() to be called on it when it should not have been, leading to: while true; do rtla timerlat top -u -q & PID=$!; sleep 5; kill -INT $PID; sleep 0.001; kill -TERM $PID; wait $PID; done Causing the following OOPS: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 5 UID: 0 PID: 885 Comm: timerlatu/5 Not tainted 6.11.0-rc4-test-00002-gbc754cc76d1b-dirty #125 a533010b71dab205ad2f507188ce8c82203b0254 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:hrtimer_active+0x58/0x300 Code: 48 c1 ee 03 41 54 48 01 d1 48 01 d6 55 53 48 83 ec 20 80 39 00 0f 85 30 02 00 00 49 8b 6f 30 4c 8d 75 10 4c 89 f0 48 c1 e8 03 <0f> b6 3c 10 4c 89 f0 83 e0 07 83 c0 03 40 38 f8 7c 09 40 84 ff 0f RSP: 0018:ffff88811d97f940 EFLAGS: 00010202 RAX: 0000000000000002 RBX: ffff88823c6b5b28 RCX: ffffed10478d6b6b RDX: dffffc0000000000 RSI: ffffed10478d6b6c RDI: ffff88823c6b5b28 RBP: 0000000000000000 R08: ffff88823c6b5b58 R09: ffff88823c6b5b60 R10: ffff88811d97f957 R11: 0000000000000010 R12: 00000000000a801d R13: ffff88810d8b35d8 R14: 0000000000000010 R15: ffff88823c6b5b28 FS: 0000000000000000(0000) GS:ffff88823c680000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000561858ad7258 CR3: 000000007729e001 CR4: 0000000000170ef0 Call Trace: ? die_addr+0x40/0xa0 ? exc_general_protection+0x154/0x230 ? asm_exc_general_protection+0x26/0x30 ? hrtimer_active+0x58/0x300 ? __pfx_mutex_lock+0x10/0x10 ? __pfx_locks_remove_file+0x10/0x10 hrtimer_cancel+0x15/0x40 timerlat_fd_release+0x8e/0x1f0 ? security_file_release+0x43/0x80 __fput+0x372/0xb10 task_work_run+0x11e/0x1f0 ? _raw_spin_lock+0x85/0xe0 ? __pfx_task_work_run+0x10/0x10 ? poison_slab_object+0x109/0x170 ? do_exit+0x7a0/0x24b0 do_exit+0x7bd/0x24b0 ? __pfx_migrate_enable+0x10/0x10 ? __pfx_do_exit+0x10/0x10 ? __pfx_read_tsc+0x10/0x10 ? ktime_get+0x64/0x140 ? _raw_spin_lock_irq+0x86/0xe0 do_group_exit+0xb0/0x220 get_signal+0x17ba/0x1b50 ? vfs_read+0x179/0xa40 ? timerlat_fd_read+0x30b/0x9d0 ? __pfx_get_signal+0x10/0x10 ? __pfx_timerlat_fd_read+0x10/0x10 arch_do_signal_or_restart+0x8c/0x570 ? __pfx_arch_do_signal_or_restart+0x10/0x10 ? vfs_read+0x179/0xa40 ? ksys_read+0xfe/0x1d0 ? __pfx_ksys_read+0x10/0x10 syscall_exit_to_user_mode+0xbc/0x130 do_syscall_64+0x74/0x110 ? __pfx___rseq_handle_notify_resume+0x10/0x10 ? __pfx_ksys_read+0x10/0x10 ? fpregs_restore_userregs+0xdb/0x1e0 ? fpregs_restore_userregs+0xdb/0x1e0 ? syscall_exit_to_user_mode+0x116/0x130 ? do_syscall_64+0x74/0x110 ? do_syscall_64+0x74/0x110 ? do_syscall_64+0x74/0x110 entry_SYSCALL_64_after_hwframe+0x71/0x79 RIP: 0033:0x7ff0070eca9c Code: Unable to access opcode bytes at 0x7ff0070eca72. RSP: 002b:00007ff006dff8c0 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: 0000000000000000 RBX: 0000000000000005 RCX: 00007ff0070eca9c RDX: 0000000000000400 RSI: 00007ff006dff9a0 RDI: 0000000000000003 RBP: 00007ff006dffde0 R08: 0000000000000000 R09: 00007ff000000ba0 R10: 00007ff007004b08 R11: 0000000000000246 R12: 0000000000000003 R13: 00007ff006dff9a0 R14: 0000000000000007 R15: 0000000000000008 Modules linked in: snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec snd_hwdep snd_hda_core ---[ end trace 0000000000000000 ]--- This is because it would mistakenly call kthread_stop() on a user space thread making it "exit" before it actually exits. Since kthreads are created based on global behavior, use a cpumask to know when kthreads are running and that they need to be shutdown before proceeding to do new work. Link: https://lore.kernel.org/all/20240820130001.124768-1-tglozar@redhat.com/ This was debugged by using the persistent ring buffer: Link: https://lore.kernel.org/all/20240823013902.135036960@goodmis.org/ Note, locking was originally used to fix this, but that proved to cause too many deadlocks to work around: https://lore.kernel.org/linux-trace-kernel/20240823102816.5e55753b@gandalf.local.home/ Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: "Luis Claudio R. Goncalves" Link: https://lore.kernel.org/20240904103428.08efdf4c@gandalf.local.home Fixes: e88ed227f639e ("tracing/timerlat: Add user-space interface") Reported-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 66a871553d4a..d770927efcd9 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1612,6 +1612,7 @@ out: static struct cpumask osnoise_cpumask; static struct cpumask save_cpumask; +static struct cpumask kthread_cpumask; /* * osnoise_sleep - sleep until the next period @@ -1675,6 +1676,7 @@ static inline int osnoise_migration_pending(void) */ mutex_lock(&interface_lock); this_cpu_osn_var()->kthread = NULL; + cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask); mutex_unlock(&interface_lock); return 1; @@ -1947,9 +1949,10 @@ static void stop_kthread(unsigned int cpu) kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; if (kthread) { - if (test_bit(OSN_WORKLOAD, &osnoise_options)) { + if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) && + !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) { kthread_stop(kthread); - } else { + } else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) { /* * This is a user thread waiting on the timerlat_fd. We need * to close all users, and the best way to guarantee this is @@ -2021,6 +2024,7 @@ static int start_kthread(unsigned int cpu) } per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; + cpumask_set_cpu(cpu, &kthread_cpumask); return 0; } @@ -2048,8 +2052,16 @@ static int start_per_cpu_kthreads(void) */ cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { + if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) { + struct task_struct *kthread; + + kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; + if (!WARN_ON(!kthread)) + kthread_stop(kthread); + } per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; + } for_each_cpu(cpu, current_mask) { retval = start_kthread(cpu); -- cgit From e6a53481da292d970d1edf0d8831121d1c5e2f0d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 Sep 2024 08:53:30 -0400 Subject: tracing/timerlat: Only clear timer if a kthread exists The timerlat tracer can use user space threads to check for osnoise and timer latency. If the program using this is killed via a SIGTERM, the threads are shutdown one at a time and another tracing instance can start up resetting the threads before they are fully closed. That causes the hrtimer assigned to the kthread to be shutdown and freed twice when the dying thread finally closes the file descriptors, causing a use-after-free bug. Only cancel the hrtimer if the associated thread is still around. Also add the interface_lock around the resetting of the tlat_var->kthread. Note, this is just a quick fix that can be backported to stable. A real fix is to have a better synchronization between the shutdown of old threads and the starting of new ones. Link: https://lore.kernel.org/all/20240820130001.124768-1-tglozar@redhat.com/ Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: "Luis Claudio R. Goncalves" Link: https://lore.kernel.org/20240905085330.45985730@gandalf.local.home Fixes: e88ed227f639e ("tracing/timerlat: Add user-space interface") Reported-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index d770927efcd9..48e5014dd4ab 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -252,6 +252,11 @@ static inline struct timerlat_variables *this_cpu_tmr_var(void) return this_cpu_ptr(&per_cpu_timerlat_var); } +/* + * Protect the interface. + */ +static struct mutex interface_lock; + /* * tlat_var_reset - Reset the values of the given timerlat_variables */ @@ -259,14 +264,20 @@ static inline void tlat_var_reset(void) { struct timerlat_variables *tlat_var; int cpu; + + /* Synchronize with the timerlat interfaces */ + mutex_lock(&interface_lock); /* * So far, all the values are initialized as 0, so * zeroing the structure is perfect. */ for_each_cpu(cpu, cpu_online_mask) { tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); + if (tlat_var->kthread) + hrtimer_cancel(&tlat_var->timer); memset(tlat_var, 0, sizeof(*tlat_var)); } + mutex_unlock(&interface_lock); } #else /* CONFIG_TIMERLAT_TRACER */ #define tlat_var_reset() do {} while (0) @@ -331,11 +342,6 @@ struct timerlat_sample { }; #endif -/* - * Protect the interface. - */ -static struct mutex interface_lock; - /* * Tracer data. */ @@ -2591,7 +2597,8 @@ static int timerlat_fd_release(struct inode *inode, struct file *file) osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); - hrtimer_cancel(&tlat_var->timer); + if (tlat_var->kthread) + hrtimer_cancel(&tlat_var->timer); memset(tlat_var, 0, sizeof(*tlat_var)); osn_var->sampling = 0; -- cgit From 5bfbcd1ee57b607fd29e4645c7f350dd385dd9ad Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 Sep 2024 11:33:59 -0400 Subject: tracing/timerlat: Add interface_lock around clearing of kthread in stop_kthread() The timerlat interface will get and put the task that is part of the "kthread" field of the osn_var to keep it around until all references are released. But here's a race in the "stop_kthread()" code that will call put_task_struct() on the kthread if it is not a kernel thread. This can race with the releasing of the references to that task struct and the put_task_struct() can be called twice when it should have been called just once. Take the interface_lock() in stop_kthread() to synchronize this change. But to do so, the function stop_per_cpu_kthreads() needs to change the loop from for_each_online_cpu() to for_each_possible_cpu() and remove the cpu_read_lock(), as the interface_lock can not be taken while the cpu locks are held. The only side effect of this change is that it may do some extra work, as the per_cpu variables of the offline CPUs would not be set anyway, and would simply be skipped in the loop. Remove unneeded "return;" in stop_kthread(). Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Tomas Glozar Cc: John Kacur Cc: "Luis Claudio R. Goncalves" Link: https://lore.kernel.org/20240905113359.2b934242@gandalf.local.home Fixes: e88ed227f639e ("tracing/timerlat: Add user-space interface") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 48e5014dd4ab..bbe47781617e 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1953,8 +1953,12 @@ static void stop_kthread(unsigned int cpu) { struct task_struct *kthread; + mutex_lock(&interface_lock); kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; if (kthread) { + per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; + mutex_unlock(&interface_lock); + if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) && !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) { kthread_stop(kthread); @@ -1967,8 +1971,8 @@ static void stop_kthread(unsigned int cpu) kill_pid(kthread->thread_pid, SIGKILL, 1); put_task_struct(kthread); } - per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; } else { + mutex_unlock(&interface_lock); /* if no workload, just return */ if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { /* @@ -1976,7 +1980,6 @@ static void stop_kthread(unsigned int cpu) */ per_cpu(per_cpu_osnoise_var, cpu).sampling = false; barrier(); - return; } } } @@ -1991,12 +1994,8 @@ static void stop_per_cpu_kthreads(void) { int cpu; - cpus_read_lock(); - - for_each_online_cpu(cpu) + for_each_possible_cpu(cpu) stop_kthread(cpu); - - cpus_read_unlock(); } /* -- cgit From fb9820c550cfc58684b612471cc50e4b5c305404 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 5 Sep 2024 11:41:24 -0400 Subject: MAINTAINERS: SPI: Add freescale lpspi maintainer information Add imx@lists.linux.dev and NXP maintainer information for lpspi driver (drivers/spi/spi-fsl-lpspi.c). Signed-off-by: Frank Li Reviewed-by: Stefan Wahren Link: https://patch.msgid.link/20240905154124.1901311-1-Frank.Li@nxp.com Signed-off-by: Mark Brown --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 878dcd23b331..3e49840cff42 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8945,6 +8945,14 @@ S: Maintained F: Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml F: drivers/i2c/busses/i2c-imx-lpi2c.c +FREESCALE IMX LPSPI DRIVER +M: Frank Li +L: linux-spi@vger.kernel.org +L: imx@lists.linux.dev +S: Maintained +F: Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml +F: drivers/spi/spi-fsl-lpspi.c + FREESCALE MPC I2C DRIVER M: Chris Packham L: linux-i2c@vger.kernel.org -- cgit From c9ca76e8239810ccb08825a7b847c39d367410a6 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 5 Sep 2024 11:52:30 -0400 Subject: MAINTAINERS: SPI: Add mailing list imx@lists.linux.dev for nxp spi drivers Add mailing list imx@lists.linux.dev for nxp spi drivers(qspi, fspi and dspi). Signed-off-by: Frank Li Reviewed-by: Stefan Wahren Link: https://patch.msgid.link/20240905155230.1901787-1-Frank.Li@nxp.com Signed-off-by: Mark Brown --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3e49840cff42..931838892e85 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8861,6 +8861,7 @@ F: drivers/dma/fsldma.* FREESCALE DSPI DRIVER M: Vladimir Oltean L: linux-spi@vger.kernel.org +L: imx@lists.linux.dev S: Maintained F: Documentation/devicetree/bindings/spi/fsl,dspi*.yaml F: drivers/spi/spi-fsl-dspi.c @@ -8989,6 +8990,7 @@ F: include/linux/fsl/ptp_qoriq.h FREESCALE QUAD SPI DRIVER M: Han Xu L: linux-spi@vger.kernel.org +L: imx@lists.linux.dev S: Maintained F: Documentation/devicetree/bindings/spi/fsl,spi-fsl-qspi.yaml F: drivers/spi/spi-fsl-qspi.c @@ -16413,6 +16415,7 @@ M: Han Xu M: Haibo Chen R: Yogesh Gaur L: linux-spi@vger.kernel.org +L: imx@lists.linux.dev S: Maintained F: Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml F: drivers/spi/spi-nxp-fspi.c -- cgit From e4af74a53b7aa865e7fcc104630ebb7a9129b71f Mon Sep 17 00:00:00 2001 From: Jamie Bainbridge Date: Wed, 4 Sep 2024 16:12:26 +1000 Subject: selftests: net: enable bind tests bind_wildcard is compiled but not run, bind_timewait is not compiled. These two tests complete in a very short time, use the test harness properly, and seem reasonable to enable. The author of the tests confirmed via email that these were intended to be run. Enable these two tests. Fixes: 13715acf8ab5 ("selftest: Add test for bind() conflicts.") Fixes: 2c042e8e54ef ("tcp: Add selftest for bind() and TIME_WAIT.") Signed-off-by: Jamie Bainbridge Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/5a009b26cf5fb1ad1512d89c61b37e2fac702323.1725430322.git.jamie.bainbridge@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 8eaffd7a641c..9d5aa817411b 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -85,7 +85,8 @@ TEST_GEN_PROGS += so_incoming_cpu TEST_PROGS += sctp_vrf.sh TEST_GEN_FILES += sctp_hello TEST_GEN_FILES += ip_local_port_range -TEST_GEN_FILES += bind_wildcard +TEST_GEN_PROGS += bind_wildcard +TEST_GEN_PROGS += bind_timewait TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh -- cgit From 20d664ebd212a85ad9c134e557619402bee6987f Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Wed, 4 Sep 2024 13:18:55 +0000 Subject: MAINTAINERS: fix ptp ocp driver maintainers address While checking the latest series for ptp_ocp driver I realised that MAINTAINERS file has wrong item about email on linux.dev domain. Fixes: 795fd9342c62 ("ptp_ocp: adjust MAINTAINERS and mailmap") Signed-off-by: Vadim Fedorenko Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240904131855.559078-1-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4502a16844e4..e6534d938c53 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17119,7 +17119,7 @@ F: include/dt-bindings/ OPENCOMPUTE PTP CLOCK DRIVER M: Jonathan Lemon -M: Vadim Fedorenko +M: Vadim Fedorenko L: netdev@vger.kernel.org S: Maintained F: drivers/ptp/ptp_ocp.c -- cgit From 6fda63c45fe8a0870226c13dcce1cc21b7c4d508 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Wed, 4 Sep 2024 15:50:34 +0200 Subject: tools/net/ynl: fix cli.py --subscribe feature Execution of command: ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/dpll.yaml / --subscribe "monitor" --sleep 10 fails with: File "/repo/./tools/net/ynl/cli.py", line 109, in main ynl.check_ntf() File "/repo/tools/net/ynl/lib/ynl.py", line 924, in check_ntf op = self.rsp_by_value[nl_msg.cmd()] KeyError: 19 Parsing Generic Netlink notification messages performs lookup for op in the message. The message was not yet decoded, and is not yet considered GenlMsg, thus msg.cmd() returns Generic Netlink family id (19) instead of proper notification command id (i.e.: DPLL_CMD_PIN_CHANGE_NTF=13). Allow the op to be obtained within NetlinkProtocol.decode(..) itself if the op was not passed to the decode function, thus allow parsing of Generic Netlink notifications without causing the failure. Suggested-by: Donald Hunter Link: https://lore.kernel.org/netdev/m2le0n5xpn.fsf@gmail.com/ Fixes: 0a966d606c68 ("tools/net/ynl: Fix extack decoding for directional ops") Signed-off-by: Arkadiusz Kubalewski Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20240904135034.316033-1-arkadiusz.kubalewski@intel.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index d42c1d605969..c22c22bf2cb7 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -388,6 +388,8 @@ class NetlinkProtocol: def decode(self, ynl, nl_msg, op): msg = self._decode(nl_msg) + if op is None: + op = ynl.rsp_by_value[msg.cmd()] fixed_header_size = ynl._struct_size(op.fixed_header) msg.raw_attrs = NlAttrs(msg.raw, fixed_header_size) return msg @@ -921,8 +923,7 @@ class YnlFamily(SpecFamily): print("Netlink done while checking for ntf!?") continue - op = self.rsp_by_value[nl_msg.cmd()] - decoded = self.nlproto.decode(self, nl_msg, op) + decoded = self.nlproto.decode(self, nl_msg, None) if decoded.cmd() not in self.async_msg_ids: print("Unexpected msg id done while checking for ntf", decoded) continue @@ -980,7 +981,7 @@ class YnlFamily(SpecFamily): if nl_msg.extack: self._decode_extack(req_msg, op, nl_msg.extack) else: - op = self.rsp_by_value[nl_msg.cmd()] + op = None req_flags = [] if nl_msg.error: -- cgit From 031ae72825cef43e4650140b800ad58bf7a6a466 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Sep 2024 14:44:18 +0000 Subject: ila: call nf_unregister_net_hooks() sooner syzbot found an use-after-free Read in ila_nf_input [1] Issue here is that ila_xlat_exit_net() frees the rhashtable, then call nf_unregister_net_hooks(). It should be done in the reverse way, with a synchronize_rcu(). This is a good match for a pre_exit() method. [1] BUG: KASAN: use-after-free in rht_key_hashfn include/linux/rhashtable.h:159 [inline] BUG: KASAN: use-after-free in __rhashtable_lookup include/linux/rhashtable.h:604 [inline] BUG: KASAN: use-after-free in rhashtable_lookup include/linux/rhashtable.h:646 [inline] BUG: KASAN: use-after-free in rhashtable_lookup_fast+0x77a/0x9b0 include/linux/rhashtable.h:672 Read of size 4 at addr ffff888064620008 by task ksoftirqd/0/16 CPU: 0 UID: 0 PID: 16 Comm: ksoftirqd/0 Not tainted 6.11.0-rc4-syzkaller-00238-g2ad6d23f465a #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 Call Trace: __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:119 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 rht_key_hashfn include/linux/rhashtable.h:159 [inline] __rhashtable_lookup include/linux/rhashtable.h:604 [inline] rhashtable_lookup include/linux/rhashtable.h:646 [inline] rhashtable_lookup_fast+0x77a/0x9b0 include/linux/rhashtable.h:672 ila_lookup_wildcards net/ipv6/ila/ila_xlat.c:132 [inline] ila_xlat_addr net/ipv6/ila/ila_xlat.c:652 [inline] ila_nf_input+0x1fe/0x3c0 net/ipv6/ila/ila_xlat.c:190 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xc3/0x220 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] NF_HOOK+0x29e/0x450 include/linux/netfilter.h:312 __netif_receive_skb_one_core net/core/dev.c:5661 [inline] __netif_receive_skb+0x1ea/0x650 net/core/dev.c:5775 process_backlog+0x662/0x15b0 net/core/dev.c:6108 __napi_poll+0xcb/0x490 net/core/dev.c:6772 napi_poll net/core/dev.c:6841 [inline] net_rx_action+0x89b/0x1240 net/core/dev.c:6963 handle_softirqs+0x2c4/0x970 kernel/softirq.c:554 run_ksoftirqd+0xca/0x130 kernel/softirq.c:928 smpboot_thread_fn+0x544/0xa30 kernel/smpboot.c:164 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 The buggy address belongs to the physical page: page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x64620 flags: 0xfff00000000000(node=0|zone=1|lastcpupid=0x7ff) page_type: 0xbfffffff(buddy) raw: 00fff00000000000 ffffea0000959608 ffffea00019d9408 0000000000000000 raw: 0000000000000000 0000000000000003 00000000bfffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as freed page last allocated via order 3, migratetype Unmovable, gfp_mask 0x52dc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_ZERO), pid 5242, tgid 5242 (syz-executor), ts 73611328570, free_ts 618981657187 set_page_owner include/linux/page_owner.h:32 [inline] post_alloc_hook+0x1f3/0x230 mm/page_alloc.c:1493 prep_new_page mm/page_alloc.c:1501 [inline] get_page_from_freelist+0x2e4c/0x2f10 mm/page_alloc.c:3439 __alloc_pages_noprof+0x256/0x6c0 mm/page_alloc.c:4695 __alloc_pages_node_noprof include/linux/gfp.h:269 [inline] alloc_pages_node_noprof include/linux/gfp.h:296 [inline] ___kmalloc_large_node+0x8b/0x1d0 mm/slub.c:4103 __kmalloc_large_node_noprof+0x1a/0x80 mm/slub.c:4130 __do_kmalloc_node mm/slub.c:4146 [inline] __kmalloc_node_noprof+0x2d2/0x440 mm/slub.c:4164 __kvmalloc_node_noprof+0x72/0x190 mm/util.c:650 bucket_table_alloc lib/rhashtable.c:186 [inline] rhashtable_init_noprof+0x534/0xa60 lib/rhashtable.c:1071 ila_xlat_init_net+0xa0/0x110 net/ipv6/ila/ila_xlat.c:613 ops_init+0x359/0x610 net/core/net_namespace.c:139 setup_net+0x515/0xca0 net/core/net_namespace.c:343 copy_net_ns+0x4e2/0x7b0 net/core/net_namespace.c:508 create_new_namespaces+0x425/0x7b0 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0x124/0x180 kernel/nsproxy.c:228 ksys_unshare+0x619/0xc10 kernel/fork.c:3328 __do_sys_unshare kernel/fork.c:3399 [inline] __se_sys_unshare kernel/fork.c:3397 [inline] __x64_sys_unshare+0x38/0x40 kernel/fork.c:3397 page last free pid 11846 tgid 11846 stack trace: reset_page_owner include/linux/page_owner.h:25 [inline] free_pages_prepare mm/page_alloc.c:1094 [inline] free_unref_page+0xd22/0xea0 mm/page_alloc.c:2612 __folio_put+0x2c8/0x440 mm/swap.c:128 folio_put include/linux/mm.h:1486 [inline] free_large_kmalloc+0x105/0x1c0 mm/slub.c:4565 kfree+0x1c4/0x360 mm/slub.c:4588 rhashtable_free_and_destroy+0x7c6/0x920 lib/rhashtable.c:1169 ila_xlat_exit_net+0x55/0x110 net/ipv6/ila/ila_xlat.c:626 ops_exit_list net/core/net_namespace.c:173 [inline] cleanup_net+0x802/0xcc0 net/core/net_namespace.c:640 process_one_work kernel/workqueue.c:3231 [inline] process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3312 worker_thread+0x86d/0xd40 kernel/workqueue.c:3390 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Memory state around the buggy address: ffff88806461ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88806461ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff888064620000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff888064620080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888064620100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Tom Herbert Reviewed-by: Florian Westphal Link: https://patch.msgid.link/20240904144418.1162839-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ila/ila.h | 1 + net/ipv6/ila/ila_main.c | 6 ++++++ net/ipv6/ila/ila_xlat.c | 13 +++++++++---- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h index ad5f6f6ba333..85b92917849b 100644 --- a/net/ipv6/ila/ila.h +++ b/net/ipv6/ila/ila.h @@ -108,6 +108,7 @@ int ila_lwt_init(void); void ila_lwt_fini(void); int ila_xlat_init_net(struct net *net); +void ila_xlat_pre_exit_net(struct net *net); void ila_xlat_exit_net(struct net *net); int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c index 69caed07315f..976c78efbae1 100644 --- a/net/ipv6/ila/ila_main.c +++ b/net/ipv6/ila/ila_main.c @@ -71,6 +71,11 @@ ila_xlat_init_fail: return err; } +static __net_exit void ila_pre_exit_net(struct net *net) +{ + ila_xlat_pre_exit_net(net); +} + static __net_exit void ila_exit_net(struct net *net) { ila_xlat_exit_net(net); @@ -78,6 +83,7 @@ static __net_exit void ila_exit_net(struct net *net) static struct pernet_operations ila_net_ops = { .init = ila_init_net, + .pre_exit = ila_pre_exit_net, .exit = ila_exit_net, .id = &ila_net_id, .size = sizeof(struct ila_net), diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 67e8c9440977..534a4498e280 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -619,6 +619,15 @@ int ila_xlat_init_net(struct net *net) return 0; } +void ila_xlat_pre_exit_net(struct net *net) +{ + struct ila_net *ilan = net_generic(net, ila_net_id); + + if (ilan->xlat.hooks_registered) + nf_unregister_net_hooks(net, ila_nf_hook_ops, + ARRAY_SIZE(ila_nf_hook_ops)); +} + void ila_xlat_exit_net(struct net *net) { struct ila_net *ilan = net_generic(net, ila_net_id); @@ -626,10 +635,6 @@ void ila_xlat_exit_net(struct net *net) rhashtable_free_and_destroy(&ilan->xlat.rhash_table, ila_free_cb, NULL); free_bucket_spinlocks(ilan->xlat.locks); - - if (ilan->xlat.hooks_registered) - nf_unregister_net_hooks(net, ila_nf_hook_ops, - ARRAY_SIZE(ila_nf_hook_ops)); } static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila) -- cgit