From 7269cba53d906cf257c139d3b3a53ad272176bca Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Thu, 2 Nov 2023 13:00:55 +0530 Subject: tee: optee: Fix supplicant based device enumeration Currently supplicant dependent optee device enumeration only registers devices whenever tee-supplicant is invoked for the first time. But it forgets to remove devices when tee-supplicant daemon stops running and closes its context gracefully. This leads to following error for fTPM driver during reboot/shutdown: [ 73.466791] tpm tpm0: ftpm_tee_tpm_op_send: SUBMIT_COMMAND invoke error: 0xffff3024 Fix this by adding an attribute for supplicant dependent devices so that the user-space service can detect and detach supplicant devices before closing the supplicant: $ for dev in /sys/bus/tee/devices/*; do if [[ -f "$dev/need_supplicant" && -f "$dev/driver/unbind" ]]; \ then echo $(basename "$dev") > $dev/driver/unbind; fi done Reported-by: Jan Kiszka Closes: https://github.com/OP-TEE/optee_os/issues/6094 Fixes: 5f178bb71e3a ("optee: enable support for multi-stage bus enumeration") Signed-off-by: Sumit Garg Reviewed-by: Ilias Apalodimas Acked-by: Jerome Forissier [jw: fixed up Date documentation] Signed-off-by: Jens Wiklander --- Documentation/ABI/testing/sysfs-bus-optee-devices | 9 +++++++++ drivers/tee/optee/device.c | 17 +++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-optee-devices b/Documentation/ABI/testing/sysfs-bus-optee-devices index 0f58701367b6..af31e5a22d89 100644 --- a/Documentation/ABI/testing/sysfs-bus-optee-devices +++ b/Documentation/ABI/testing/sysfs-bus-optee-devices @@ -6,3 +6,12 @@ Description: OP-TEE bus provides reference to registered drivers under this directory. The matches Trusted Application (TA) driver and corresponding TA in secure OS. Drivers are free to create needed API under optee-ta- directory. + +What: /sys/bus/tee/devices/optee-ta-/need_supplicant +Date: November 2023 +KernelVersion: 6.7 +Contact: op-tee@lists.trustedfirmware.org +Description: + Allows to distinguish whether an OP-TEE based TA/device requires user-space + tee-supplicant to function properly or not. This attribute will be present for + devices which depend on tee-supplicant to be running. diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index 64f0e047c23d..4b1092127694 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -60,7 +60,16 @@ static void optee_release_device(struct device *dev) kfree(optee_device); } -static int optee_register_device(const uuid_t *device_uuid) +static ssize_t need_supplicant_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return 0; +} + +static DEVICE_ATTR_RO(need_supplicant); + +static int optee_register_device(const uuid_t *device_uuid, u32 func) { struct tee_client_device *optee_device = NULL; int rc; @@ -83,6 +92,10 @@ static int optee_register_device(const uuid_t *device_uuid) put_device(&optee_device->dev); } + if (func == PTA_CMD_GET_DEVICES_SUPP) + device_create_file(&optee_device->dev, + &dev_attr_need_supplicant); + return rc; } @@ -142,7 +155,7 @@ static int __optee_enumerate_devices(u32 func) num_devices = shm_size / sizeof(uuid_t); for (idx = 0; idx < num_devices; idx++) { - rc = optee_register_device(&device_uuid[idx]); + rc = optee_register_device(&device_uuid[idx], func); if (rc) goto out_shm; } -- cgit From 8f51593cdcab82fb23ef2e1a0010b2e6f99aae02 Mon Sep 17 00:00:00 2001 From: "Nícolas F. R. A. Prado" Date: Tue, 7 Nov 2023 17:55:28 -0500 Subject: dt: dt-extract-compatibles: Don't follow symlinks when walking tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The iglob function, which we use to find C source files in the kernel tree, always follows symbolic links. This can cause unintentional recursions whenever a symbolic link points to a parent directory. A common scenario is building the kernel with the output set to a directory inside the kernel tree, which will contain such a symlink. Instead of using the iglob function, use os.walk to traverse the directory tree, which by default doesn't follow symbolic links. fnmatch is then used to match the glob on the filename, as well as ignore hidden files (which were ignored by default with iglob). This approach runs just as fast as using iglob. Fixes: b6acf8073517 ("dt: Add a check for undocumented compatible strings in kernel") Reported-by: Aishwarya TCV Closes: https://lore.kernel.org/all/e90cb52f-d55b-d3ba-3933-6cc7b43fcfbc@arm.com Signed-off-by: "Nícolas F. R. A. Prado" Link: https://lore.kernel.org/r/20231107225624.9811-1-nfraprado@collabora.com Signed-off-by: Rob Herring --- scripts/dtc/dt-extract-compatibles | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/scripts/dtc/dt-extract-compatibles b/scripts/dtc/dt-extract-compatibles index bd07477dd144..5ffb2364409b 100755 --- a/scripts/dtc/dt-extract-compatibles +++ b/scripts/dtc/dt-extract-compatibles @@ -1,8 +1,8 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-only +import fnmatch import os -import glob import re import argparse @@ -81,10 +81,20 @@ def print_compat(filename, compatibles): else: print(*compatibles, sep='\n') +def glob_without_symlinks(root, glob): + for path, dirs, files in os.walk(root): + # Ignore hidden directories + for d in dirs: + if fnmatch.fnmatch(d, ".*"): + dirs.remove(d) + for f in files: + if fnmatch.fnmatch(f, glob): + yield os.path.join(path, f) + def files_to_parse(path_args): for f in path_args: if os.path.isdir(f): - for filename in glob.iglob(f + "/**/*.c", recursive=True): + for filename in glob_without_symlinks(f, "*.c"): yield filename else: yield f -- cgit From 0550d4604e2ca4e653dc13f0c009fc42106b6bfc Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Wed, 8 Nov 2023 23:31:13 +0900 Subject: RDMA/core: Fix uninit-value access in ib_get_eth_speed() KMSAN reported the following uninit-value access issue: lo speed is unknown, defaulting to 1000 ===================================================== BUG: KMSAN: uninit-value in ib_get_width_and_speed drivers/infiniband/core/verbs.c:1889 [inline] BUG: KMSAN: uninit-value in ib_get_eth_speed+0x546/0xaf0 drivers/infiniband/core/verbs.c:1998 ib_get_width_and_speed drivers/infiniband/core/verbs.c:1889 [inline] ib_get_eth_speed+0x546/0xaf0 drivers/infiniband/core/verbs.c:1998 siw_query_port drivers/infiniband/sw/siw/siw_verbs.c:173 [inline] siw_get_port_immutable+0x6f/0x120 drivers/infiniband/sw/siw/siw_verbs.c:203 setup_port_data drivers/infiniband/core/device.c:848 [inline] setup_device drivers/infiniband/core/device.c:1244 [inline] ib_register_device+0x1589/0x1df0 drivers/infiniband/core/device.c:1383 siw_device_register drivers/infiniband/sw/siw/siw_main.c:72 [inline] siw_newlink+0x129e/0x13d0 drivers/infiniband/sw/siw/siw_main.c:490 nldev_newlink+0x8fd/0xa60 drivers/infiniband/core/nldev.c:1763 rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline] rdma_nl_rcv+0xe8a/0x1120 drivers/infiniband/core/netlink.c:259 netlink_unicast_kernel net/netlink/af_netlink.c:1342 [inline] netlink_unicast+0xf4b/0x1230 net/netlink/af_netlink.c:1368 netlink_sendmsg+0x1242/0x1420 net/netlink/af_netlink.c:1910 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x997/0xd60 net/socket.c:2588 ___sys_sendmsg+0x271/0x3b0 net/socket.c:2642 __sys_sendmsg net/socket.c:2671 [inline] __do_sys_sendmsg net/socket.c:2680 [inline] __se_sys_sendmsg net/socket.c:2678 [inline] __x64_sys_sendmsg+0x2fa/0x4a0 net/socket.c:2678 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Local variable lksettings created at: ib_get_eth_speed+0x4b/0xaf0 drivers/infiniband/core/verbs.c:1974 siw_query_port drivers/infiniband/sw/siw/siw_verbs.c:173 [inline] siw_get_port_immutable+0x6f/0x120 drivers/infiniband/sw/siw/siw_verbs.c:203 CPU: 0 PID: 11257 Comm: syz-executor.1 Not tainted 6.6.0-14500-g1c41041124bd #10 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014 ===================================================== If __ethtool_get_link_ksettings() fails, `netdev_speed` is set to the default value, SPEED_1000. In this case, if `lanes` field of struct ethtool_link_ksettings is not initialized, an uninitialized value is passed to ib_get_width_and_speed(). This causes the above issue. This patch resolves the issue by initializing `lanes` to 0. Fixes: cb06b6b3f6cb ("RDMA/core: Get IB width and speed from netdev") Signed-off-by: Shigeru Yoshida Link: https://lore.kernel.org/r/20231108143113.1360567-1-syoshida@redhat.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 8a6da87f464b..94a7f3b0c71c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1971,7 +1971,7 @@ int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width) int rc; u32 netdev_speed; struct net_device *netdev; - struct ethtool_link_ksettings lksettings; + struct ethtool_link_ksettings lksettings = {}; if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET) return -EINVAL; -- cgit From efb9cbf66440482ceaa90493d648226ab7ec2ebf Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Sat, 28 Oct 2023 17:32:42 +0800 Subject: RDMA/hns: Fix unnecessary err return when using invalid congest control algorithm Add a default congest control algorithm so that driver won't return an error when the configured algorithm is invalid. Fixes: f91696f2f053 ("RDMA/hns: Support congestion control type selection according to the FW") Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20231028093242.670325-1-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0cd2612a4987..2bca9560f32d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4760,10 +4760,15 @@ static int check_cong_type(struct ib_qp *ibqp, cong_alg->wnd_mode_sel = WND_LIMIT; break; default: - ibdev_err(&hr_dev->ib_dev, - "error type(%u) for congestion selection.\n", - hr_dev->caps.cong_type); - return -EINVAL; + ibdev_warn(&hr_dev->ib_dev, + "invalid type(%u) for congestion selection.\n", + hr_dev->caps.cong_type); + hr_dev->caps.cong_type = CONG_TYPE_DCQCN; + cong_alg->alg_sel = CONG_DCQCN; + cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL; + cong_alg->dip_vld = DIP_INVALID; + cong_alg->wnd_mode_sel = WND_LIMIT; + break; } return 0; -- cgit From 348ddab81f7b0983d9fb158df910254f08d3f887 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 29 Sep 2023 10:16:37 +0200 Subject: coresight: etm4x: Remove bogous __exit annotation for some functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit etm4_platform_driver (which lives in ".data" contains a reference to etm4_remove_platform_dev(). So the latter must not be marked with __exit which results in the function being discarded for a build with CONFIG_CORESIGHT_SOURCE_ETM4X=y which in turn makes the remove pointer contain invalid data. etm4x_amba_driver referencing etm4_remove_amba() has the same issue. Drop the __exit annotations for the two affected functions and a third one that is called by the other two. For reasons I don't understand this isn't catched by building with CONFIG_DEBUG_SECTION_MISMATCH=y. Fixes: c23bc382ef0e ("coresight: etm4x: Refactor probing routine") Fixes: 5214b563588e ("coresight: etm4x: Add support for sysreg only devices") Signed-off-by: Uwe Kleine-König Reviewed-by: James Clark Link: https://lore.kernel.org/all/20230929081540.yija47lsj35xtj4v@pengutronix.de/ Link: https://lore.kernel.org/r/20230929081637.2377335-1-u.kleine-koenig@pengutronix.de Signed-off-by: Suzuki K Poulose --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 77b0271ce6eb..34aee59dd147 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -2224,7 +2224,7 @@ static void clear_etmdrvdata(void *info) per_cpu(delayed_probe, cpu) = NULL; } -static void __exit etm4_remove_dev(struct etmv4_drvdata *drvdata) +static void etm4_remove_dev(struct etmv4_drvdata *drvdata) { bool had_delayed_probe; /* @@ -2253,7 +2253,7 @@ static void __exit etm4_remove_dev(struct etmv4_drvdata *drvdata) } } -static void __exit etm4_remove_amba(struct amba_device *adev) +static void etm4_remove_amba(struct amba_device *adev) { struct etmv4_drvdata *drvdata = dev_get_drvdata(&adev->dev); @@ -2261,7 +2261,7 @@ static void __exit etm4_remove_amba(struct amba_device *adev) etm4_remove_dev(drvdata); } -static int __exit etm4_remove_platform_dev(struct platform_device *pdev) +static int etm4_remove_platform_dev(struct platform_device *pdev) { struct etmv4_drvdata *drvdata = dev_get_drvdata(&pdev->dev); -- cgit From 287e82cf69aa264a52bc37591bd0eb407e20f85c Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 6 Oct 2023 14:14:52 +0100 Subject: coresight: Fix crash when Perf and sysfs modes are used concurrently Partially revert the change in commit 6148652807ba ("coresight: Enable and disable helper devices adjacent to the path") which changed the bare call from source_ops(csdev)->enable() to coresight_enable_source() for Perf sessions. It was missed that coresight_enable_source() is specifically for the sysfs interface, rather than being a generic call. This interferes with the sysfs reference counting to cause the following crash: $ perf record -e cs_etm/@tmc_etr0/ -C 0 & $ echo 1 > /sys/bus/coresight/devices/tmc_etr0/enable_sink $ echo 1 > /sys/bus/coresight/devices/etm0/enable_source $ echo 0 > /sys/bus/coresight/devices/etm0/enable_source Unable to handle kernel NULL pointer dereference at virtual address 00000000000001d0 Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP ... Call trace: etm4_disable+0x54/0x150 [coresight_etm4x] coresight_disable_source+0x6c/0x98 [coresight] coresight_disable+0x74/0x1c0 [coresight] enable_source_store+0x88/0xa0 [coresight] dev_attr_store+0x20/0x40 sysfs_kf_write+0x4c/0x68 kernfs_fop_write_iter+0x120/0x1b8 vfs_write+0x2dc/0x3b0 ksys_write+0x70/0x108 __arm64_sys_write+0x24/0x38 invoke_syscall+0x50/0x128 el0_svc_common.constprop.0+0x104/0x130 do_el0_svc+0x40/0xb8 el0_svc+0x2c/0xb8 el0t_64_sync_handler+0xc0/0xc8 el0t_64_sync+0x1a4/0x1a8 Code: d53cd042 91002000 b9402a81 b8626800 (f940ead5) ---[ end trace 0000000000000000 ]--- This commit linked below also fixes the issue, but has unlocked updates to the mode which could potentially race. So until we come up with a more complete solution that takes all locking and interaction between both modes into account, just revert back to the old behavior for Perf. Reported-by: Junhao He Closes: https://lore.kernel.org/linux-arm-kernel/20230921132904.60996-1-hejunhao3@huawei.com/ Fixes: 6148652807ba ("coresight: Enable and disable helper devices adjacent to the path") Tested-by: Junhao He Signed-off-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20231006131452.646721-1-james.clark@arm.com --- drivers/hwtracing/coresight/coresight-etm-perf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 5ca6278baff4..89e8ed214ea4 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -493,7 +493,7 @@ static void etm_event_start(struct perf_event *event, int flags) goto fail_end_stop; /* Finally enable the tracer */ - if (coresight_enable_source(csdev, CS_MODE_PERF, event)) + if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF)) goto fail_disable_path; /* @@ -587,7 +587,7 @@ static void etm_event_stop(struct perf_event *event, int mode) return; /* stop tracer */ - coresight_disable_source(csdev, event); + source_ops(csdev)->disable(csdev, event); /* tell the core */ event->hw.state = PERF_HES_STOPPED; -- cgit From 55e0a2fb0cb5ab7c9c99c1ad4d3e6954de8b73a0 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 10 Oct 2023 16:47:31 +0800 Subject: hwtracing: hisi_ptt: Add dummy callback pmu::read() When start trace with perf option "-C $cpu" and immediately stop it with SIGTERM or others, the perf core will invoke pmu::read() while the driver doesn't implement it. Add a dummy pmu::read() to avoid any issues. Fixes: ff0de066b463 ("hwtracing: hisi_ptt: Add trace function support for HiSilicon PCIe Tune and Trace device") Signed-off-by: Junhao He Signed-off-by: Yicong Yang Acked-by: Jonathan Cameron Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20231010084731.30450-6-yangyicong@huawei.com --- drivers/hwtracing/ptt/hisi_ptt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/ptt/hisi_ptt.c b/drivers/hwtracing/ptt/hisi_ptt.c index 49ea1b0f7489..3045d1894b81 100644 --- a/drivers/hwtracing/ptt/hisi_ptt.c +++ b/drivers/hwtracing/ptt/hisi_ptt.c @@ -1178,6 +1178,10 @@ static void hisi_ptt_pmu_del(struct perf_event *event, int flags) hisi_ptt_pmu_stop(event, PERF_EF_UPDATE); } +static void hisi_ptt_pmu_read(struct perf_event *event) +{ +} + static void hisi_ptt_remove_cpuhp_instance(void *hotplug_node) { cpuhp_state_remove_instance_nocalls(hisi_ptt_pmu_online, hotplug_node); @@ -1221,6 +1225,7 @@ static int hisi_ptt_register_pmu(struct hisi_ptt *hisi_ptt) .stop = hisi_ptt_pmu_stop, .add = hisi_ptt_pmu_add, .del = hisi_ptt_pmu_del, + .read = hisi_ptt_pmu_read, }; reg = readl(hisi_ptt->iobase + HISI_PTT_LOCATION); -- cgit From e0dd27ad8af00f147ac3c9de88e0687986afc3ea Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 10 Oct 2023 16:47:28 +0800 Subject: hwtracing: hisi_ptt: Handle the interrupt in hardirq context Handle the trace interrupt in the hardirq context, make sure the irq core won't threaded it by declaring IRQF_NO_THREAD and userspace won't balance it by declaring IRQF_NOBALANCING. Otherwise we may violate the synchronization requirements of the perf core, referenced to the change of arm-ccn PMU commit 0811ef7e2f54 ("bus: arm-ccn: fix PMU interrupt flags"). In the interrupt handler we mainly doing 2 things: - Copy the data from the local DMA buffer to the AUX buffer - Commit the data in the AUX buffer Signed-off-by: Yicong Yang Acked-by: Jonathan Cameron [ Fixed commit description to suppress checkpatch warning ] Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20231010084731.30450-3-yangyicong@huawei.com --- drivers/hwtracing/ptt/hisi_ptt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/ptt/hisi_ptt.c b/drivers/hwtracing/ptt/hisi_ptt.c index 3045d1894b81..dbed4fd4e296 100644 --- a/drivers/hwtracing/ptt/hisi_ptt.c +++ b/drivers/hwtracing/ptt/hisi_ptt.c @@ -342,9 +342,9 @@ static int hisi_ptt_register_irq(struct hisi_ptt *hisi_ptt) return ret; hisi_ptt->trace_irq = pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ); - ret = devm_request_threaded_irq(&pdev->dev, hisi_ptt->trace_irq, - NULL, hisi_ptt_isr, 0, - DRV_NAME, hisi_ptt); + ret = devm_request_irq(&pdev->dev, hisi_ptt->trace_irq, hisi_ptt_isr, + IRQF_NOBALANCING | IRQF_NO_THREAD, DRV_NAME, + hisi_ptt); if (ret) { pci_err(pdev, "failed to request irq %d, ret = %d\n", hisi_ptt->trace_irq, ret); -- cgit From aff787f64ad7cbb54614b51b82c682fe06411ef3 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 10 Oct 2023 16:47:30 +0800 Subject: hwtracing: hisi_ptt: Don't try to attach a task PTT is an uncore PMU and shouldn't be attached to any task. Block the usage in pmu::event_init(). Signed-off-by: Yicong Yang Acked-by: Jonathan Cameron Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20231010084731.30450-5-yangyicong@huawei.com --- drivers/hwtracing/ptt/hisi_ptt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwtracing/ptt/hisi_ptt.c b/drivers/hwtracing/ptt/hisi_ptt.c index dbed4fd4e296..a991ecb7515a 100644 --- a/drivers/hwtracing/ptt/hisi_ptt.c +++ b/drivers/hwtracing/ptt/hisi_ptt.c @@ -1000,6 +1000,9 @@ static int hisi_ptt_pmu_event_init(struct perf_event *event) return -EOPNOTSUPP; } + if (event->attach_state & PERF_ATTACH_TASK) + return -EOPNOTSUPP; + if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type) return -ENOENT; -- cgit From 3fad96e9b21bed214c1593d7d7fb3e40d1fbf6f4 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 24 Oct 2023 11:57:15 +0100 Subject: firmware: arm_ffa: Declare ffa_bus_type structure in the header smatch reports: drivers/firmware/arm_ffa/bus.c:108:17: warning: symbol 'ffa_bus_type' was not declared. Should it be static? ffa_bus_type is exported to be useful in the FF-A driver. So this warning is not correct. However, declaring the ffa_bus_type structure in the header like many other bus_types do already removes this warning. So let us just do the same and get rid of the warning. Link: https://lore.kernel.org/r/20231024105715.2369638-1-sudeep.holla@arm.com Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 1abedb5b2e48..3d0fde57ba90 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -209,6 +209,8 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; } #define module_ffa_driver(__ffa_driver) \ module_driver(__ffa_driver, ffa_register, ffa_unregister) +extern struct bus_type ffa_bus_type; + /* FFA transport related */ struct ffa_partition_info { u16 id; -- cgit From 95520fc07743d3f58e8872acd72e928b09fbc143 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 24 Oct 2023 11:56:17 +0100 Subject: firmware: arm_ffa: Allow FF-A initialisation even when notification fails FF-A notifications are optional feature in the specification. Currently we allow to continue if the firmware reports no support for the notifications. However, we fail to continue and complete the FF-A driver initialisation if the notification setup fails for any reason. Let us allow the FF-A driver to complete the initialisation even if the notification fails to setup. We will just flag the error and continue to provide other features in the driver. Link: https://lore.kernel.org/r/20231024-ffa-notification-fixes-v1-1-d552c0ec260d@arm.com Tested-by: Jens Wiklander Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 07b72c679247..585632a444b4 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1390,20 +1390,20 @@ static void ffa_notifications_cleanup(void) } } -static int ffa_notifications_setup(void) +static void ffa_notifications_setup(void) { int ret, irq; ret = ffa_features(FFA_NOTIFICATION_BITMAP_CREATE, 0, NULL, NULL); if (ret) { - pr_err("Notifications not supported, continuing with it ..\n"); - return 0; + pr_info("Notifications not supported, continuing with it ..\n"); + return; } ret = ffa_notification_bitmap_create(); if (ret) { - pr_err("notification_bitmap_create error %d\n", ret); - return ret; + pr_info("Notification bitmap create error %d\n", ret); + return; } drv_info->bitmap_created = true; @@ -1426,10 +1426,10 @@ static int ffa_notifications_setup(void) ret = ffa_sched_recv_cb_update(drv_info->vm_id, ffa_self_notif_handle, drv_info, true); if (!ret) - return ret; + return; cleanup: + pr_info("Notification setup failed %d, not enabled\n", ret); ffa_notifications_cleanup(); - return ret; } static int __init ffa_init(void) @@ -1487,13 +1487,9 @@ static int __init ffa_init(void) ffa_set_up_mem_ops_native_flag(); - ret = ffa_notifications_setup(); - if (ret) - goto partitions_cleanup; + ffa_notifications_setup(); return 0; -partitions_cleanup: - ffa_partitions_cleanup(); free_pages: if (drv_info->tx_buffer) free_pages_exact(drv_info->tx_buffer, RXTX_BUFFER_SIZE); -- cgit From 6f47023f7a52f3482937e9271ba41570b8752067 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 24 Oct 2023 11:56:18 +0100 Subject: firmware: arm_ffa: Setup the partitions after the notification initialisation Currently the notifications are setup of the partitions are probed and FF-A devices are added on the FF-A bus. The FF-A driver probe can be called even before the FF-A notification setup happens which is wrong and may result in failure or misbehaviour in the FF-A partition device probe. In order to ensure the FF-A notifications are setup before the FF-A devices are probed, let us move the FF-A partition setup after the completion of FF-A notification setup. Link: https://lore.kernel.org/r/20231024-ffa-notification-fixes-v1-2-d552c0ec260d@arm.com Tested-by: Jens Wiklander Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 585632a444b4..69ad3add3175 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1422,11 +1422,7 @@ static void ffa_notifications_setup(void) hash_init(drv_info->notifier_hash); mutex_init(&drv_info->notify_lock); - /* Register internal scheduling callback */ - ret = ffa_sched_recv_cb_update(drv_info->vm_id, ffa_self_notif_handle, - drv_info, true); - if (!ret) - return; + return; cleanup: pr_info("Notification setup failed %d, not enabled\n", ret); ffa_notifications_cleanup(); @@ -1483,12 +1479,17 @@ static int __init ffa_init(void) mutex_init(&drv_info->rx_lock); mutex_init(&drv_info->tx_lock); - ffa_setup_partitions(); - ffa_set_up_mem_ops_native_flag(); ffa_notifications_setup(); + ffa_setup_partitions(); + + ret = ffa_sched_recv_cb_update(drv_info->vm_id, ffa_self_notif_handle, + drv_info, true); + if (ret) + pr_info("Failed to register driver sched callback %d\n", ret); + return 0; free_pages: if (drv_info->tx_buffer) -- cgit From f4bfcaee34bc9527274710884f8d14039f3ee506 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 24 Oct 2023 11:56:19 +0100 Subject: firmware: arm_ffa: Add checks for the notification enabled state We need to check if the FF-A notifications are enabled or not in all the notification operations that are accessible for the FF-A device from the FF-A driver. This helps to avoid making calls to the FF-A firmware even if the notification setup has failed. Link: https://lore.kernel.org/r/20231024-ffa-notification-fixes-v1-3-d552c0ec260d@arm.com Tested-by: Jens Wiklander Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 69ad3add3175..1724a0cbb2cf 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -99,6 +99,7 @@ struct ffa_drv_info { void *tx_buffer; bool mem_ops_native; bool bitmap_created; + bool notif_enabled; unsigned int sched_recv_irq; unsigned int cpuhp_state; struct ffa_pcpu_irq __percpu *irq_pcpu; @@ -889,6 +890,8 @@ static int ffa_memory_lend(struct ffa_mem_ops_args *args) #define FFA_SECURE_PARTITION_ID_FLAG BIT(15) +#define ffa_notifications_disabled() (!drv_info->notif_enabled) + enum notify_type { NON_SECURE_VM, SECURE_PARTITION, @@ -908,6 +911,9 @@ static int ffa_sched_recv_cb_update(u16 part_id, ffa_sched_recv_cb callback, struct ffa_dev_part_info *partition; bool cb_valid; + if (ffa_notifications_disabled()) + return -EOPNOTSUPP; + partition = xa_load(&drv_info->partition_info, part_id); write_lock(&partition->rw_lock); @@ -1001,6 +1007,9 @@ static int ffa_notify_relinquish(struct ffa_device *dev, int notify_id) int rc; enum notify_type type = ffa_notify_type_get(dev->vm_id); + if (ffa_notifications_disabled()) + return -EOPNOTSUPP; + if (notify_id >= FFA_MAX_NOTIFICATIONS) return -EINVAL; @@ -1027,6 +1036,9 @@ static int ffa_notify_request(struct ffa_device *dev, bool is_per_vcpu, u32 flags = 0; enum notify_type type = ffa_notify_type_get(dev->vm_id); + if (ffa_notifications_disabled()) + return -EOPNOTSUPP; + if (notify_id >= FFA_MAX_NOTIFICATIONS) return -EINVAL; @@ -1057,6 +1069,9 @@ static int ffa_notify_send(struct ffa_device *dev, int notify_id, { u32 flags = 0; + if (ffa_notifications_disabled()) + return -EOPNOTSUPP; + if (is_per_vcpu) flags |= (PER_VCPU_NOTIFICATION_FLAG | vcpu << 16); @@ -1388,6 +1403,7 @@ static void ffa_notifications_cleanup(void) ffa_notification_bitmap_destroy(); drv_info->bitmap_created = false; } + drv_info->notif_enabled = false; } static void ffa_notifications_setup(void) @@ -1422,6 +1438,7 @@ static void ffa_notifications_setup(void) hash_init(drv_info->notifier_hash); mutex_init(&drv_info->notify_lock); + drv_info->notif_enabled = true; return; cleanup: pr_info("Notification setup failed %d, not enabled\n", ret); -- cgit From 6d67cbe67a86a87307df0c6fafa74394a6820ad6 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 24 Oct 2023 11:56:20 +0100 Subject: firmware: arm_ffa: Fix FFA notifications cleanup path We allow the FF-A to be initialised successfully even when notification fails. When the notification fails, ffa_notifications_cleanup() gets called on the failure path. However, the driver information about the notifications like the irq, workqueues and cpu hotplug state for enabling and disabling percpu IRQ are not cleared. This may result in unexpected behaviour during CPU hotplug because of percpu IRQ being enabled and disabled or during the driver removal when ffa_notifications_cleanup() gets executed again. Fix the FFA notifications cleanup path by clearing all the notification related driver information. Link: https://lore.kernel.org/r/20231024-ffa-notification-fixes-v1-4-d552c0ec260d@arm.com Tested-by: Jens Wiklander Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 1724a0cbb2cf..49ebdc2775dd 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1326,8 +1326,10 @@ static int ffa_sched_recv_irq_map(void) static void ffa_sched_recv_irq_unmap(void) { - if (drv_info->sched_recv_irq) + if (drv_info->sched_recv_irq) { irq_dispose_mapping(drv_info->sched_recv_irq); + drv_info->sched_recv_irq = 0; + } } static int ffa_cpuhp_pcpu_irq_enable(unsigned int cpu) @@ -1344,17 +1346,23 @@ static int ffa_cpuhp_pcpu_irq_disable(unsigned int cpu) static void ffa_uninit_pcpu_irq(void) { - if (drv_info->cpuhp_state) + if (drv_info->cpuhp_state) { cpuhp_remove_state(drv_info->cpuhp_state); + drv_info->cpuhp_state = 0; + } - if (drv_info->notif_pcpu_wq) + if (drv_info->notif_pcpu_wq) { destroy_workqueue(drv_info->notif_pcpu_wq); + drv_info->notif_pcpu_wq = NULL; + } if (drv_info->sched_recv_irq) free_percpu_irq(drv_info->sched_recv_irq, drv_info->irq_pcpu); - if (drv_info->irq_pcpu) + if (drv_info->irq_pcpu) { free_percpu(drv_info->irq_pcpu); + drv_info->irq_pcpu = NULL; + } } static int ffa_init_pcpu_irq(unsigned int irq) -- cgit From 05857a1eb723190923b091a857184ced17a83770 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 31 Oct 2023 14:13:35 +0000 Subject: firmware: arm_ffa: Fix the size of the allocation in ffa_partitions_cleanup() Arry of pointer to struct ffa_dev_part_info needs to be allocated to fetch the pointers stored in XArray. However, currently we allocate the entire structure instead of just pointers. Fix the allocation size. This will also eliminate the below Smatch istatic checker warning: | drivers/firmware/arm_ffa/driver.c:1251 ffa_partitions_cleanup() | warn: double check that we're allocating correct size: 8 vs 88 Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/0e8ddbca-d9da-4a3b-aae3-328993b62ba2@moroto.mountain Link: https://lore.kernel.org/r/20231031141335.3077026-1-sudeep.holla@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 49ebdc2775dd..034bce8a2ca1 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1248,7 +1248,7 @@ static void ffa_partitions_cleanup(void) if (!count) return; - info = kcalloc(count, sizeof(**info), GFP_KERNEL); + info = kcalloc(count, sizeof(*info), GFP_KERNEL); if (!info) return; -- cgit From f1ed48ef97e2d12dee21e42db4a6ebb895ed3a79 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 8 Nov 2023 12:15:49 +0100 Subject: firmware: arm_ffa: Fix ffa_notification_info_get() IDs handling To parse the retrieved ID lists appropriately in ffa_notification_info_get() the ids_processed variable should not be pre-incremented - we are dropping an identifier at the beginning of the list. Fix it by using the post-increment operator to increment the number of processed IDs. Fixes: 3522be48d82b ("firmware: arm_ffa: Implement the NOTIFICATION_INFO_GET interface") Signed-off-by: Lorenzo Pieralisi Cc: Sudeep Holla Link: https://lore.kernel.org/r/20231108111549.155974-1-lpieralisi@kernel.org Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 034bce8a2ca1..6146b2927d5c 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -783,7 +783,7 @@ static void ffa_notification_info_get(void) if (ids_processed >= max_ids - 1) break; - part_id = packed_id_list[++ids_processed]; + part_id = packed_id_list[ids_processed++]; if (!ids_count[list]) { /* Global Notification */ __do_sched_recv_cb(part_id, 0, false); @@ -795,7 +795,7 @@ static void ffa_notification_info_get(void) if (ids_processed >= max_ids - 1) break; - vcpu_id = packed_id_list[++ids_processed]; + vcpu_id = packed_id_list[ids_processed++]; __do_sched_recv_cb(part_id, vcpu_id, true); } -- cgit From 0c6498a59fbbcbf3d0a58c282dd6f0bca0eed92a Mon Sep 17 00:00:00 2001 From: Matus Malych Date: Sun, 12 Nov 2023 17:54:04 +0100 Subject: ASoC: amd: yc: Add HP 255 G10 into quirk table HP 255 G10's internal microphone array can be made to work by adding it to the quirk table. Signed-off-by: Matus Malych Link: https://lore.kernel.org/r/20231112165403.3221-1-matus@malych.org Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 15a864dcd7bd..e2a510443bf1 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -367,6 +367,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "8A3E"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8B2F"), + } + }, { .driver_data = &acp6x_card, .matches = { -- cgit From 37e6fd0cebf0b9f71afb38fd95b10408799d1f0b Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 13 Nov 2023 15:59:16 +0000 Subject: ASoC: wm8974: Correct boost mixer inputs Bit 6 of INPPGA (INPPGAMUTE) does not control the Aux path, it controls the input PGA path, as can been seen from Figure 8 Input Boost Stage in the datasheet. Update the naming of things in the driver to match this and update the routing to also reflect this. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20231113155916.1741027-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm8974.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wm8974.c b/sound/soc/codecs/wm8974.c index 044b6f604c09..260bac695b20 100644 --- a/sound/soc/codecs/wm8974.c +++ b/sound/soc/codecs/wm8974.c @@ -186,7 +186,7 @@ SOC_DAPM_SINGLE("PCM Playback Switch", WM8974_MONOMIX, 0, 1, 0), /* Boost mixer */ static const struct snd_kcontrol_new wm8974_boost_mixer[] = { -SOC_DAPM_SINGLE("Aux Switch", WM8974_INPPGA, 6, 1, 1), +SOC_DAPM_SINGLE("PGA Switch", WM8974_INPPGA, 6, 1, 1), }; /* Input PGA */ @@ -246,8 +246,8 @@ static const struct snd_soc_dapm_route wm8974_dapm_routes[] = { /* Boost Mixer */ {"ADC", NULL, "Boost Mixer"}, - {"Boost Mixer", "Aux Switch", "Aux Input"}, - {"Boost Mixer", NULL, "Input PGA"}, + {"Boost Mixer", NULL, "Aux Input"}, + {"Boost Mixer", "PGA Switch", "Input PGA"}, {"Boost Mixer", NULL, "MICP"}, /* Input PGA */ -- cgit From d9995cd96928fa72963293fa5ca87350fed16930 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 31 Oct 2023 08:42:40 +0000 Subject: hwmon: ltc2991: Fix spelling mistake "contiuous" -> "continuous" There is a spelling mistake in a dev_err_probe messages. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20231031084240.2148339-1-colin.i.king@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ltc2991.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ltc2991.c b/drivers/hwmon/ltc2991.c index bd63c61129a9..fc53fdcb2b6c 100644 --- a/drivers/hwmon/ltc2991.c +++ b/drivers/hwmon/ltc2991.c @@ -373,7 +373,7 @@ static int ltc2991_init(struct ltc2991_state *st) LTC2991_REPEAT_ACQ_EN); if (ret) return dev_err_probe(st->dev, ret, - "Error: Failed to set contiuous mode.\n"); + "Error: Failed to set continuous mode.\n"); /* Enable all channels and trigger conversions */ return regmap_write(st->regmap, LTC2991_CH_EN_TRIGGER, -- cgit From 58ebe7fb6eb2deed0fd05cf57911fea3f36124eb Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Tue, 31 Oct 2023 11:13:24 +0200 Subject: hwmon: max31827: include regulator header Include `linux/regulator/consumer.h` since the driver is using `devm_regulator_get_enable` function. Signed-off-by: Antoniu Miclaus Link: https://lore.kernel.org/r/20231031091324.23991-1-antoniu.miclaus@analog.com Signed-off-by: Guenter Roeck --- drivers/hwmon/max31827.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/max31827.c b/drivers/hwmon/max31827.c index fd1fed1a797c..a1ce65145669 100644 --- a/drivers/hwmon/max31827.c +++ b/drivers/hwmon/max31827.c @@ -12,6 +12,7 @@ #include #include #include +#include #define MAX31827_T_REG 0x0 #define MAX31827_CONFIGURATION_REG 0x2 -- cgit From 80aea01c48971a1fffc0252d036995572d84950d Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 2 Nov 2023 16:35:49 +0100 Subject: KVM: s390: vsie: fix wrong VIR 37 when MSO is used When the host invalidates a guest page, it will also check if the page was used to map the prefix of any guest CPUs, in which case they are stopped and marked as needing a prefix refresh. Upon starting the affected CPUs again, their prefix pages are explicitly faulted in and revalidated if they had been invalidated. A bit in the PGSTEs indicates whether or not a page might contain a prefix. The bit is allowed to overindicate. Pages above 2G are skipped, because they cannot be prefixes, since KVM runs all guests with MSO = 0. The same applies for nested guests (VSIE). When the host invalidates a guest page that maps the prefix of the nested guest, it has to stop the affected nested guest CPUs and mark them as needing a prefix refresh. The same PGSTE bit used for the guest prefix is also used for the nested guest. Pages above 2G are skipped like for normal guests, which is the source of the bug. The nested guest runs is the guest primary address space. The guest could be running the nested guest using MSO != 0. If the MSO + prefix for the nested guest is above 2G, the check for nested prefix will skip it. This will cause the invalidation notifier to not stop the CPUs of the nested guest and not mark them as needing refresh. When the nested guest is run again, its prefix will not be refreshed, since it has not been marked for refresh. This will cause a fatal validity intercept with VIR code 37. Fix this by removing the check for 2G for nested guests. Now all invalidations of pages with the notify bit set will always scan the existing VSIE shadow state descriptors. This allows to catch invalidations of nested guest prefix mappings even when the prefix is above 2G in the guest virtual address space. Fixes: a3508fbe9dc6 ("KVM: s390: vsie: initial support for nested virtualization") Tested-by: Nico Boehr Reviewed-by: Nico Boehr Reviewed-by: David Hildenbrand Message-ID: <20231102153549.53984-1-imbrenda@linux.ibm.com> Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/vsie.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 02dcbe82a8e5..8207a892bbe2 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -587,10 +587,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, if (!gmap_is_shadow(gmap)) return; - if (start >= 1UL << 31) - /* We are only interested in prefix pages */ - return; - /* * Only new shadow blocks are added to the list during runtime, * therefore we can safely reference them all the time. -- cgit From 27072b8e18a73ffeffb1c140939023915a35134b Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 9 Nov 2023 13:36:24 +0100 Subject: KVM: s390/mm: Properly reset no-dat When the CMMA state needs to be reset, the no-dat bit also needs to be reset. Failure to do so could cause issues in the guest, since the guest expects the bit to be cleared after a reset. Cc: Reviewed-by: Nico Boehr Message-ID: <20231109123624.37314-1-imbrenda@linux.ibm.com> Signed-off-by: Claudio Imbrenda --- arch/s390/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 3bd2ab2a9a34..5cb92941540b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -756,7 +756,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pte_clear(mm, addr, ptep); } if (reset) - pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; + pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); pgste_set_unlock(ptep, pgste); preempt_enable(); } -- cgit From ba12ab66aa83a2340a51ad6e74b284269745138c Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 14 Nov 2023 11:02:45 -0600 Subject: RDMA/irdma: Do not modify to SQD on error Remove the modify to SQD before going to ERROR state. It is not needed. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20231114170246.238-2-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 2138f0a2ff85..36e69e6ca9f8 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1424,13 +1424,6 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, case IB_QPS_SQE: case IB_QPS_ERR: case IB_QPS_RESET: - if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) { - spin_unlock_irqrestore(&iwqp->lock, flags); - info.next_iwarp_state = IRDMA_QP_STATE_SQD; - irdma_hw_modify_qp(iwdev, iwqp, &info, true); - spin_lock_irqsave(&iwqp->lock, flags); - } - if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { spin_unlock_irqrestore(&iwqp->lock, flags); if (udata && udata->inlen) { -- cgit From bd6da690c27d75cae432c09162d054b34fa2156f Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 14 Nov 2023 11:02:46 -0600 Subject: RDMA/irdma: Add wait for suspend on SQD Currently, there is no wait for the QP suspend to complete on a modify to SQD state. Add a wait, after the modify to SQD state, for the Suspend Complete AE. While we are at it, update the suspend timeout value in irdma_prep_tc_change to use IRDMA_EVENT_TIMEOUT_MS too. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20231114170246.238-3-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/hw.c | 6 +++++- drivers/infiniband/hw/irdma/main.c | 2 +- drivers/infiniband/hw/irdma/main.h | 2 +- drivers/infiniband/hw/irdma/verbs.c | 21 +++++++++++++++++++++ drivers/infiniband/hw/irdma/verbs.h | 1 + 5 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 8fa7e4a18e73..74f6bc7b7ad1 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -321,7 +321,11 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) break; case IRDMA_AE_QP_SUSPEND_COMPLETE: if (iwqp->iwdev->vsi.tc_change_pending) { - atomic_dec(&iwqp->sc_qp.vsi->qp_suspend_reqs); + if (!atomic_dec_return(&qp->vsi->qp_suspend_reqs)) + wake_up(&iwqp->iwdev->suspend_wq); + } + if (iwqp->suspend_pending) { + iwqp->suspend_pending = false; wake_up(&iwqp->iwdev->suspend_wq); } break; diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 9ac48b4dab41..3f13200ff71b 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -48,7 +48,7 @@ static void irdma_prep_tc_change(struct irdma_device *iwdev) /* Wait for all qp's to suspend */ wait_event_timeout(iwdev->suspend_wq, !atomic_read(&iwdev->vsi.qp_suspend_reqs), - IRDMA_EVENT_TIMEOUT); + msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS)); irdma_ws_reset(&iwdev->vsi); } diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index d66d87bb8bc4..b65bc2ea542f 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -78,7 +78,7 @@ extern struct auxiliary_driver i40iw_auxiliary_drv; #define MAX_DPC_ITERATIONS 128 -#define IRDMA_EVENT_TIMEOUT 50000 +#define IRDMA_EVENT_TIMEOUT_MS 5000 #define IRDMA_VCHNL_EVENT_TIMEOUT 100000 #define IRDMA_RST_TIMEOUT_HZ 4 diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 36e69e6ca9f8..5fa88e6cca4e 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1157,6 +1157,21 @@ exit: return prio; } +static int irdma_wait_for_suspend(struct irdma_qp *iwqp) +{ + if (!wait_event_timeout(iwqp->iwdev->suspend_wq, + !iwqp->suspend_pending, + msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS))) { + iwqp->suspend_pending = false; + ibdev_warn(&iwqp->iwdev->ibdev, + "modify_qp timed out waiting for suspend. qp_id = %d, last_ae = 0x%x\n", + iwqp->ibqp.qp_num, iwqp->last_aeq); + return -EBUSY; + } + + return 0; +} + /** * irdma_modify_qp_roce - modify qp request * @ibqp: qp's pointer for modify @@ -1420,6 +1435,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, info.next_iwarp_state = IRDMA_QP_STATE_SQD; issue_modify_qp = 1; + iwqp->suspend_pending = true; break; case IB_QPS_SQE: case IB_QPS_ERR: @@ -1460,6 +1476,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, ctx_info->rem_endpoint_idx = udp_info->arp_idx; if (irdma_hw_modify_qp(iwdev, iwqp, &info, true)) return -EINVAL; + if (info.next_iwarp_state == IRDMA_QP_STATE_SQD) { + ret = irdma_wait_for_suspend(iwqp); + if (ret) + return ret; + } spin_lock_irqsave(&iwqp->lock, flags); if (iwqp->iwarp_state == info.curr_iwarp_state) { iwqp->iwarp_state = info.next_iwarp_state; diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index c42ac22de00e..cfa140b36395 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -198,6 +198,7 @@ struct irdma_qp { u8 flush_issued : 1; u8 sig_all : 1; u8 pau_mode : 1; + u8 suspend_pending : 1; u8 rsvd : 1; u8 iwarp_state; u16 term_sq_flush_code; -- cgit From e49c0b1401d0d0efc8aeeb9db5a9d54e980d9f69 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sun, 22 Oct 2023 20:58:06 +0200 Subject: Documentation: coresight: fix `make refcheckdocs` warning This reference uses a glob pattern to match multiple files, but the asterisk was escaped as \* in order to not be interpreted by sphinx as reStructuredText markup. refcheckdocs/documentation-file-ref-check doesn't know about rST syntax and tries to interpret the \* literally (instead of as a glob). We can work around the warning by putting the Documentation reference inside double backticks (``..``), which allows us to not escape the asterisk. Fixes: c06475910b52 ("Documentation: coresight: Escape coresight bindings file wildcard") Cc: Mathieu Poirier Cc: Suzuki K Poulose Cc: Mike Leach Cc: Leo Yan Cc: Jonathan Corbet Cc: Rob Herring Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-next@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Stephen Rothwell Cc: Bagas Sanjaya Signed-off-by: Vegard Nossum Reviewed-by: Bagas Sanjaya Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20231022185806.919434-1-vegard.nossum@oracle.com --- Documentation/trace/coresight/coresight.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/trace/coresight/coresight.rst b/Documentation/trace/coresight/coresight.rst index 4a71ea6cb390..826e59a698da 100644 --- a/Documentation/trace/coresight/coresight.rst +++ b/Documentation/trace/coresight/coresight.rst @@ -130,7 +130,7 @@ Misc: Device Tree Bindings -------------------- -See Documentation/devicetree/bindings/arm/arm,coresight-\*.yaml for details. +See ``Documentation/devicetree/bindings/arm/arm,coresight-*.yaml`` for details. As of this writing drivers for ITM, STMs and CTIs are not provided but are expected to be added as the solution matures. -- cgit From b8411287aef4a994eff0c68f5597910c4194dfe3 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 14 Nov 2023 21:33:43 +0800 Subject: coresight: ultrasoc-smb: Fix sleep while close preempt in enable_smb When we to enable the SMB by perf, the perf sched will call perf_ctx_lock() to close system preempt in event_function_call(). But SMB::enable_smb() use mutex to lock the critical section, which may sleep. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 153023, name: perf preempt_count: 2, expected: 0 RCU nest depth: 0, expected: 0 INFO: lockdep is turned off. irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0xae8/0x2b48 softirqs last enabled at (0): [] copy_process+0xae8/0x2b48 softirqs last disabled at (0): [<0000000000000000>] 0x0 CPU: 2 PID: 153023 Comm: perf Kdump: loaded Tainted: G W O 6.5.0-rc4+ #1 Call trace: ... __mutex_lock+0xbc/0xa70 mutex_lock_nested+0x34/0x48 smb_update_buffer+0x58/0x360 [ultrasoc_smb] etm_event_stop+0x204/0x2d8 [coresight] etm_event_del+0x1c/0x30 [coresight] event_sched_out+0x17c/0x3b8 group_sched_out.part.0+0x5c/0x208 __perf_event_disable+0x15c/0x210 event_function+0xe0/0x230 remote_function+0xb4/0xe8 generic_exec_single+0x160/0x268 smp_call_function_single+0x20c/0x2a0 event_function_call+0x20c/0x220 _perf_event_disable+0x5c/0x90 perf_event_for_each_child+0x58/0xc0 _perf_ioctl+0x34c/0x1250 perf_ioctl+0x64/0x98 ... Use spinlock to replace mutex to control driver data access to one at a time. The function copy_to_user() may sleep, it cannot be in a spinlock context, so we can't simply replace it in smb_read(). But we can ensure that only one user gets the SMB device fd by smb_open(), so remove the locks from smb_read() and buffer synchronization is guaranteed by the user. Fixes: 06f5c2926aaa ("drivers/coresight: Add UltraSoc System Memory Buffer driver") Signed-off-by: Junhao He Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20231114133346.30489-2-hejunhao3@huawei.com --- drivers/hwtracing/coresight/ultrasoc-smb.c | 35 ++++++++++++------------------ drivers/hwtracing/coresight/ultrasoc-smb.h | 6 ++--- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.c b/drivers/hwtracing/coresight/ultrasoc-smb.c index e9a32a97fbee..0a0fe9fcc57f 100644 --- a/drivers/hwtracing/coresight/ultrasoc-smb.c +++ b/drivers/hwtracing/coresight/ultrasoc-smb.c @@ -99,7 +99,7 @@ static int smb_open(struct inode *inode, struct file *file) struct smb_drv_data, miscdev); int ret = 0; - mutex_lock(&drvdata->mutex); + spin_lock(&drvdata->spinlock); if (drvdata->reading) { ret = -EBUSY; @@ -115,7 +115,7 @@ static int smb_open(struct inode *inode, struct file *file) drvdata->reading = true; out: - mutex_unlock(&drvdata->mutex); + spin_unlock(&drvdata->spinlock); return ret; } @@ -132,10 +132,8 @@ static ssize_t smb_read(struct file *file, char __user *data, size_t len, if (!len) return 0; - mutex_lock(&drvdata->mutex); - if (!sdb->data_size) - goto out; + return 0; to_copy = min(sdb->data_size, len); @@ -145,20 +143,15 @@ static ssize_t smb_read(struct file *file, char __user *data, size_t len, if (copy_to_user(data, sdb->buf_base + sdb->buf_rdptr, to_copy)) { dev_dbg(dev, "Failed to copy data to user\n"); - to_copy = -EFAULT; - goto out; + return -EFAULT; } *ppos += to_copy; - smb_update_read_ptr(drvdata, to_copy); - - dev_dbg(dev, "%zu bytes copied\n", to_copy); -out: if (!sdb->data_size) smb_reset_buffer(drvdata); - mutex_unlock(&drvdata->mutex); + dev_dbg(dev, "%zu bytes copied\n", to_copy); return to_copy; } @@ -167,9 +160,9 @@ static int smb_release(struct inode *inode, struct file *file) struct smb_drv_data *drvdata = container_of(file->private_data, struct smb_drv_data, miscdev); - mutex_lock(&drvdata->mutex); + spin_lock(&drvdata->spinlock); drvdata->reading = false; - mutex_unlock(&drvdata->mutex); + spin_unlock(&drvdata->spinlock); return 0; } @@ -262,7 +255,7 @@ static int smb_enable(struct coresight_device *csdev, enum cs_mode mode, struct smb_drv_data *drvdata = dev_get_drvdata(csdev->dev.parent); int ret = 0; - mutex_lock(&drvdata->mutex); + spin_lock(&drvdata->spinlock); /* Do nothing, the trace data is reading by other interface now */ if (drvdata->reading) { @@ -294,7 +287,7 @@ static int smb_enable(struct coresight_device *csdev, enum cs_mode mode, dev_dbg(&csdev->dev, "Ultrasoc SMB enabled\n"); out: - mutex_unlock(&drvdata->mutex); + spin_unlock(&drvdata->spinlock); return ret; } @@ -304,7 +297,7 @@ static int smb_disable(struct coresight_device *csdev) struct smb_drv_data *drvdata = dev_get_drvdata(csdev->dev.parent); int ret = 0; - mutex_lock(&drvdata->mutex); + spin_lock(&drvdata->spinlock); if (drvdata->reading) { ret = -EBUSY; @@ -327,7 +320,7 @@ static int smb_disable(struct coresight_device *csdev) dev_dbg(&csdev->dev, "Ultrasoc SMB disabled\n"); out: - mutex_unlock(&drvdata->mutex); + spin_unlock(&drvdata->spinlock); return ret; } @@ -408,7 +401,7 @@ static unsigned long smb_update_buffer(struct coresight_device *csdev, if (!buf) return 0; - mutex_lock(&drvdata->mutex); + spin_lock(&drvdata->spinlock); /* Don't do anything if another tracer is using this sink. */ if (atomic_read(&csdev->refcnt) != 1) @@ -432,7 +425,7 @@ static unsigned long smb_update_buffer(struct coresight_device *csdev, if (!buf->snapshot && lost) perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); out: - mutex_unlock(&drvdata->mutex); + spin_unlock(&drvdata->spinlock); return data_size; } @@ -590,7 +583,7 @@ static int smb_probe(struct platform_device *pdev) return ret; } - mutex_init(&drvdata->mutex); + spin_lock_init(&drvdata->spinlock); drvdata->pid = -1; ret = smb_register_sink(pdev, drvdata); diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.h b/drivers/hwtracing/coresight/ultrasoc-smb.h index d2e14e8d2c8a..82a44c14a882 100644 --- a/drivers/hwtracing/coresight/ultrasoc-smb.h +++ b/drivers/hwtracing/coresight/ultrasoc-smb.h @@ -8,7 +8,7 @@ #define _ULTRASOC_SMB_H #include -#include +#include /* Offset of SMB global registers */ #define SMB_GLB_CFG_REG 0x00 @@ -105,7 +105,7 @@ struct smb_data_buffer { * @csdev: Component vitals needed by the framework. * @sdb: Data buffer for SMB. * @miscdev: Specifics to handle "/dev/xyz.smb" entry. - * @mutex: Control data access to one at a time. + * @spinlock: Control data access to one at a time. * @reading: Synchronise user space access to SMB buffer. * @pid: Process ID of the process being monitored by the * session that is using this component. @@ -116,7 +116,7 @@ struct smb_drv_data { struct coresight_device *csdev; struct smb_data_buffer sdb; struct miscdevice miscdev; - struct mutex mutex; + spinlock_t spinlock; bool reading; pid_t pid; enum cs_mode mode; -- cgit From 830a7f54db102c889a3fe1c0a225f369ac05f07f Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 14 Nov 2023 21:33:44 +0800 Subject: coresight: ultrasoc-smb: Config SMB buffer before register sink The SMB dirver register the enable/disable sysfs interface in function smb_register_sink(), however the buffer depends on the following configuration to work well. So it'll be possible for user to access an unreset one. Move the config buffer operation to before register_sink(). Ignore the return value, if smb_config_inport() fails. That will cause the hardwares disable trace path to fail, should not affect SMB driver remove. So we make smb_remove() return success, Fixes: 06f5c2926aaa ("drivers/coresight: Add UltraSoc System Memory Buffer driver") Signed-off-by: Junhao He Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20231114133346.30489-3-hejunhao3@huawei.com --- drivers/hwtracing/coresight/ultrasoc-smb.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.c b/drivers/hwtracing/coresight/ultrasoc-smb.c index 0a0fe9fcc57f..2f2aba90a514 100644 --- a/drivers/hwtracing/coresight/ultrasoc-smb.c +++ b/drivers/hwtracing/coresight/ultrasoc-smb.c @@ -583,37 +583,32 @@ static int smb_probe(struct platform_device *pdev) return ret; } + ret = smb_config_inport(dev, true); + if (ret) + return ret; + + platform_set_drvdata(pdev, drvdata); spin_lock_init(&drvdata->spinlock); drvdata->pid = -1; ret = smb_register_sink(pdev, drvdata); if (ret) { + smb_config_inport(&pdev->dev, false); dev_err(dev, "Failed to register SMB sink\n"); return ret; } - ret = smb_config_inport(dev, true); - if (ret) { - smb_unregister_sink(drvdata); - return ret; - } - - platform_set_drvdata(pdev, drvdata); - return 0; } static int smb_remove(struct platform_device *pdev) { struct smb_drv_data *drvdata = platform_get_drvdata(pdev); - int ret; - - ret = smb_config_inport(&pdev->dev, false); - if (ret) - return ret; smb_unregister_sink(drvdata); + smb_config_inport(&pdev->dev, false); + return 0; } -- cgit From 862c135bde8bc185e8aae2110374175e6a1b6ed5 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 14 Nov 2023 21:33:45 +0800 Subject: coresight: ultrasoc-smb: Fix uninitialized before use buf_hw_base In smb_reset_buffer, the sdb->buf_hw_base variable is uninitialized before use, which initializes it in smb_init_data_buffer. And the SMB regiester are set in smb_config_inport. So move the call after smb_config_inport. Fixes: 06f5c2926aaa ("drivers/coresight: Add UltraSoc System Memory Buffer driver") Signed-off-by: Junhao He Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20231114133346.30489-4-hejunhao3@huawei.com --- drivers/hwtracing/coresight/ultrasoc-smb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.c b/drivers/hwtracing/coresight/ultrasoc-smb.c index 2f2aba90a514..6e32d31a95fe 100644 --- a/drivers/hwtracing/coresight/ultrasoc-smb.c +++ b/drivers/hwtracing/coresight/ultrasoc-smb.c @@ -477,7 +477,6 @@ static int smb_init_data_buffer(struct platform_device *pdev, static void smb_init_hw(struct smb_drv_data *drvdata) { smb_disable_hw(drvdata); - smb_reset_buffer(drvdata); writel(SMB_LB_CFG_LO_DEFAULT, drvdata->base + SMB_LB_CFG_LO_REG); writel(SMB_LB_CFG_HI_DEFAULT, drvdata->base + SMB_LB_CFG_HI_REG); @@ -587,6 +586,7 @@ static int smb_probe(struct platform_device *pdev) if (ret) return ret; + smb_reset_buffer(drvdata); platform_set_drvdata(pdev, drvdata); spin_lock_init(&drvdata->spinlock); drvdata->pid = -1; -- cgit From 0f40d5099cd6d828fd7de6227d3eabe86016724c Mon Sep 17 00:00:00 2001 From: Andrew Davis Date: Wed, 25 Oct 2023 09:33:02 -0500 Subject: phy: ti: gmii-sel: Fix register offset when parent is not a syscon node When the node for this phy selector is a child node of a syscon node then the property 'reg' is used as an offset into the parent regmap. When the node is standalone and gets its own regmap this offset is pre-applied. So we need to track which method was used to get the regmap and not apply the offset in the standalone case. Fixes: 1fdfa7cccd35 ("phy: ti: gmii-sel: Allow parent to not be syscon node") Signed-off-by: Andrew Davis Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20231025143302.1265633-1-afd@ti.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-gmii-sel.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index 555b323f45da..bc847d3879f7 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -64,6 +64,7 @@ struct phy_gmii_sel_priv { u32 num_ports; u32 reg_offset; u32 qsgmii_main_ports; + bool no_offset; }; static int phy_gmii_sel_mode(struct phy *phy, enum phy_mode mode, int submode) @@ -402,7 +403,8 @@ static int phy_gmii_sel_init_ports(struct phy_gmii_sel_priv *priv) priv->num_ports = size / sizeof(u32); if (!priv->num_ports) return -EINVAL; - priv->reg_offset = __be32_to_cpu(*offset); + if (!priv->no_offset) + priv->reg_offset = __be32_to_cpu(*offset); } if_phys = devm_kcalloc(dev, priv->num_ports, @@ -471,6 +473,7 @@ static int phy_gmii_sel_probe(struct platform_device *pdev) dev_err(dev, "Failed to get syscon %d\n", ret); return ret; } + priv->no_offset = true; } ret = phy_gmii_sel_init_ports(priv); -- cgit From 0b6240d697a96eaa45a2a5503a274ebb4f162fa3 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Sun, 5 Nov 2023 23:36:15 +0000 Subject: arm64: dts: rockchip: Expand reg size of vdec node for RK3328 Expand the reg size for the vdec node to include cache/performance registers the rkvdec driver writes to. Fixes: 17408c9b119d ("arm64: dts: rockchip: Add vdec support for RK3328") Signed-off-by: Jonas Karlman Link: https://lore.kernel.org/r/20231105233630.3927502-9-jonas@kwiboo.se Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index e729e7a22b23..cc8209795c3e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -668,7 +668,7 @@ vdec: video-codec@ff360000 { compatible = "rockchip,rk3328-vdec", "rockchip,rk3399-vdec"; - reg = <0x0 0xff360000 0x0 0x400>; + reg = <0x0 0xff360000 0x0 0x480>; interrupts = ; clocks = <&cru ACLK_RKVDEC>, <&cru HCLK_RKVDEC>, <&cru SCLK_VDEC_CABAC>, <&cru SCLK_VDEC_CORE>; -- cgit From 35938c18291b5da7422b2fac6dac0af11aa8d0d7 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Sun, 5 Nov 2023 23:36:16 +0000 Subject: arm64: dts: rockchip: Expand reg size of vdec node for RK3399 Expand the reg size for the vdec node to include cache/performance registers the rkvdec driver writes to. Also add missing clocks to the related power-domain. Fixes: cbd7214402ec ("arm64: dts: rockchip: Define the rockchip Video Decoder node on rk3399") Signed-off-by: Alex Bee Signed-off-by: Jonas Karlman Link: https://lore.kernel.org/r/20231105233630.3927502-10-jonas@kwiboo.se Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index faf02e59d6c7..da0dfb237f85 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1109,7 +1109,9 @@ power-domain@RK3399_PD_VDU { reg = ; clocks = <&cru ACLK_VDU>, - <&cru HCLK_VDU>; + <&cru HCLK_VDU>, + <&cru SCLK_VDU_CA>, + <&cru SCLK_VDU_CORE>; pm_qos = <&qos_video_m1_r>, <&qos_video_m1_w>; #power-domain-cells = <0>; @@ -1384,7 +1386,7 @@ vdec: video-codec@ff660000 { compatible = "rockchip,rk3399-vdec"; - reg = <0x0 0xff660000 0x0 0x400>; + reg = <0x0 0xff660000 0x0 0x480>; interrupts = ; clocks = <&cru ACLK_VDU>, <&cru HCLK_VDU>, <&cru SCLK_VDU_CA>, <&cru SCLK_VDU_CORE>; -- cgit From 3cee9c635f27d1003d46f624d816f3455698b625 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 14 Nov 2023 16:38:34 +0100 Subject: arm64: dts: rockchip: fix rk356x pcie msg interrupt name The expected name by the binding at this position is "msg" and the SoC's manual also calls the interrupt in question "msg", so fix the rk356x dtsi to use the correct name. Reviewed-by: Sebastian Reichel Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20231114153834.934978-1-heiko@sntech.de --- arch/arm64/boot/dts/rockchip/rk356x.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index 0964761e3ce9..c19c0f1b3778 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -977,7 +977,7 @@ , , ; - interrupt-names = "sys", "pmc", "msi", "legacy", "err"; + interrupt-names = "sys", "pmc", "msg", "legacy", "err"; bus-range = <0x0 0xf>; clocks = <&cru ACLK_PCIE20_MST>, <&cru ACLK_PCIE20_SLV>, <&cru ACLK_PCIE20_DBI>, <&cru PCLK_PCIE20>, -- cgit From 1af27671f62ce919f1fb76082ed81f71cb090989 Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Wed, 18 Oct 2023 10:33:55 -0500 Subject: clk: rockchip: rk3568: Add PLL rate for 292.5MHz Add support for a PLL rate of 292.5MHz so that the Powkiddy RGB30 panel can run at a requested 60hz (59.96, close enough). I have confirmed this rate fits with all the constraints listed in the TRM for the VPLL (as an integer PLL) in Part 1 "Chapter 2 Clock & Reset Unit (CRU)." Signed-off-by: Chris Morgan Link: https://lore.kernel.org/r/20231018153357.343142-2-macroalpha82@gmail.com Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3568.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/rockchip/clk-rk3568.c b/drivers/clk/rockchip/clk-rk3568.c index 16dabe2b9c47..db713e1526cd 100644 --- a/drivers/clk/rockchip/clk-rk3568.c +++ b/drivers/clk/rockchip/clk-rk3568.c @@ -72,6 +72,7 @@ static struct rockchip_pll_rate_table rk3568_pll_rates[] = { RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), RK3036_PLL_RATE(297000000, 2, 99, 4, 1, 1, 0), + RK3036_PLL_RATE(292500000, 1, 195, 4, 4, 1, 0), RK3036_PLL_RATE(241500000, 2, 161, 4, 2, 1, 0), RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), RK3036_PLL_RATE(200000000, 1, 100, 3, 4, 1, 0), -- cgit From c6c5a5580dcb6631aa6369dabe12ef3ce784d1d2 Mon Sep 17 00:00:00 2001 From: Weihao Li Date: Tue, 31 Oct 2023 19:18:16 +0800 Subject: clk: rockchip: rk3128: Fix HCLK_OTG gate register The HCLK_OTG gate control is in CRU_CLKGATE5_CON, not CRU_CLKGATE3_CON. Signed-off-by: Weihao Li Link: https://lore.kernel.org/r/20231031111816.8777-1-cn.liweihao@gmail.com Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3128.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/clk-rk3128.c b/drivers/clk/rockchip/clk-rk3128.c index aa53797dbfc1..7782785a86e6 100644 --- a/drivers/clk/rockchip/clk-rk3128.c +++ b/drivers/clk/rockchip/clk-rk3128.c @@ -490,7 +490,7 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { GATE(HCLK_I2S_2CH, "hclk_i2s_2ch", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS), GATE(0, "hclk_usb_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 13, GFLAGS), GATE(HCLK_HOST2, "hclk_host2", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 3, GFLAGS), - GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK2928_CLKGATE_CON(3), 13, GFLAGS), + GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 13, GFLAGS), GATE(0, "hclk_peri_ahb", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 14, GFLAGS), GATE(HCLK_SPDIF, "hclk_spdif", "hclk_peri", 0, RK2928_CLKGATE_CON(10), 9, GFLAGS), GATE(HCLK_TSP, "hclk_tsp", "hclk_peri", 0, RK2928_CLKGATE_CON(10), 12, GFLAGS), -- cgit From e80ed63affc9a9b4aacb44180ecd7ed601839599 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Tue, 24 Oct 2023 09:20:35 +0100 Subject: riscv: dts: sophgo: remove address-cells from intc node A recent submission [1] from Rob has added additionalProperties: false to the interrupt-controller child node of RISC-V cpus, highlighting that the new cv1800b DT has been incorrectly using #address-cells. It has no child nodes, so #address-cells is not needed. Remove it. Link: https://patchwork.kernel.org/project/linux-riscv/patch/20230915201946.4184468-1-robh@kernel.org/ [1] Fixes: c3dffa879cca ("riscv: dts: sophgo: add initial CV1800B SoC device tree") Reviewed-by: Jisheng Zhang Acked-by: Chen Wang Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/sophgo/cv1800b.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi index df40e87ee063..aec6401a467b 100644 --- a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi +++ b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi @@ -34,7 +34,6 @@ cpu0_intc: interrupt-controller { compatible = "riscv,cpu-intc"; interrupt-controller; - #address-cells = <0>; #interrupt-cells = <1>; }; }; -- cgit From 2a842c4e2f76736f4ed2da9f02ca3ae3330d6b11 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 18 Oct 2023 08:17:11 +0200 Subject: dt-bindings: soc: rockchip: grf: add rockchip,rk3588-pmugrf Add rockchip,rk3588-pmugrf compatible string. Acked-by: Conor Dooley Signed-off-by: Sascha Hauer Link: https://lore.kernel.org/r/20231018061714.3553817-24-s.hauer@pengutronix.de Signed-off-by: Heiko Stuebner --- Documentation/devicetree/bindings/soc/rockchip/grf.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml index e4fa6a07b4fa..1309bf5ae0cd 100644 --- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml +++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml @@ -233,6 +233,7 @@ allOf: - rockchip,rk3399-grf - rockchip,rk3399-pmugrf - rockchip,rk3568-pmugrf + - rockchip,rk3588-pmugrf - rockchip,rv1108-grf - rockchip,rv1108-pmugrf -- cgit From d5c65be34df73fa01ed05611aafb73b440d89e29 Mon Sep 17 00:00:00 2001 From: Kamil Duljas Date: Thu, 16 Nov 2023 13:51:50 +0100 Subject: ASoC: Intel: Skylake: Fix mem leak in few functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The resources should be freed when function return error. Signed-off-by: Kamil Duljas Reviewed-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20231116125150.1436-1-kamil.duljas@gmail.com Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-pcm.c | 4 +++- sound/soc/intel/skylake/skl-sst-ipc.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c index d0c02e8a6785..18866bc415a5 100644 --- a/sound/soc/intel/skylake/skl-pcm.c +++ b/sound/soc/intel/skylake/skl-pcm.c @@ -240,8 +240,10 @@ static int skl_pcm_open(struct snd_pcm_substream *substream, snd_pcm_set_sync(substream); mconfig = skl_tplg_fe_get_cpr_module(dai, substream->stream); - if (!mconfig) + if (!mconfig) { + kfree(dma_params); return -EINVAL; + } skl_tplg_d0i3_get(skl, mconfig->d0i3_caps); diff --git a/sound/soc/intel/skylake/skl-sst-ipc.c b/sound/soc/intel/skylake/skl-sst-ipc.c index 7a425271b08b..fd9624ad5f72 100644 --- a/sound/soc/intel/skylake/skl-sst-ipc.c +++ b/sound/soc/intel/skylake/skl-sst-ipc.c @@ -1003,8 +1003,10 @@ int skl_ipc_get_large_config(struct sst_generic_ipc *ipc, reply.size = (reply.header >> 32) & IPC_DATA_OFFSET_SZ_MASK; buf = krealloc(reply.data, reply.size, GFP_KERNEL); - if (!buf) + if (!buf) { + kfree(reply.data); return -ENOMEM; + } *payload = buf; *bytes = reply.size; -- cgit From c1501f2597dd08601acd42256a4b0a0fc36bf302 Mon Sep 17 00:00:00 2001 From: David Lin Date: Fri, 17 Nov 2023 12:30:12 +0800 Subject: ASoC: nau8822: Fix incorrect type in assignment and cast to restricted __be16 This issue is reproduced when W=1 build in compiler gcc-12. The following are sparse warnings: sound/soc/codecs/nau8822.c:199:25: sparse: sparse: incorrect type in assignment sound/soc/codecs/nau8822.c:199:25: sparse: expected unsigned short sound/soc/codecs/nau8822.c:199:25: sparse: got restricted __be16 sound/soc/codecs/nau8822.c:235:25: sparse: sparse: cast to restricted __be16 sound/soc/codecs/nau8822.c:235:25: sparse: sparse: cast to restricted __be16 sound/soc/codecs/nau8822.c:235:25: sparse: sparse: cast to restricted __be16 sound/soc/codecs/nau8822.c:235:25: sparse: sparse: cast to restricted __be16 Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311122320.T1opZVkP-lkp@intel.com/ Signed-off-by: David Lin Link: https://lore.kernel.org/r/20231117043011.1747594-1-CTLIN0@nuvoton.com Signed-off-by: Mark Brown --- sound/soc/codecs/nau8822.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/nau8822.c b/sound/soc/codecs/nau8822.c index ff3024899f45..7199d734c79f 100644 --- a/sound/soc/codecs/nau8822.c +++ b/sound/soc/codecs/nau8822.c @@ -184,6 +184,7 @@ static int nau8822_eq_get(struct snd_kcontrol *kcontrol, struct soc_bytes_ext *params = (void *)kcontrol->private_value; int i, reg; u16 reg_val, *val; + __be16 tmp; val = (u16 *)ucontrol->value.bytes.data; reg = NAU8822_REG_EQ1; @@ -192,8 +193,8 @@ static int nau8822_eq_get(struct snd_kcontrol *kcontrol, /* conversion of 16-bit integers between native CPU format * and big endian format */ - reg_val = cpu_to_be16(reg_val); - memcpy(val + i, ®_val, sizeof(reg_val)); + tmp = cpu_to_be16(reg_val); + memcpy(val + i, &tmp, sizeof(tmp)); } return 0; @@ -216,6 +217,7 @@ static int nau8822_eq_put(struct snd_kcontrol *kcontrol, void *data; u16 *val, value; int i, reg, ret; + __be16 *tmp; data = kmemdup(ucontrol->value.bytes.data, params->max, GFP_KERNEL | GFP_DMA); @@ -228,7 +230,8 @@ static int nau8822_eq_put(struct snd_kcontrol *kcontrol, /* conversion of 16-bit integers between native CPU format * and big endian format */ - value = be16_to_cpu(*(val + i)); + tmp = (__be16 *)(val + i); + value = be16_to_cpup(tmp); ret = snd_soc_component_write(component, reg + i, value); if (ret) { dev_err(component->dev, -- cgit From 31e721fbd194d5723722eaa21df1d14cee7e12b5 Mon Sep 17 00:00:00 2001 From: Kamil Duljas Date: Thu, 16 Nov 2023 22:39:17 +0100 Subject: ASoC: SOF: topology: Fix mem leak in sof_dai_load() The function has multiple return points at which it is not released previously allocated memory. Signed-off-by: Kamil Duljas Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20231116213926.2034-2-kamil.duljas@gmail.com Signed-off-by: Mark Brown --- sound/soc/sof/topology.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index a3a3af252259..37ec671a2d76 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -1736,8 +1736,10 @@ static int sof_dai_load(struct snd_soc_component *scomp, int index, /* perform pcm set op */ if (ipc_pcm_ops && ipc_pcm_ops->pcm_setup) { ret = ipc_pcm_ops->pcm_setup(sdev, spcm); - if (ret < 0) + if (ret < 0) { + kfree(spcm); return ret; + } } dai_drv->dobj.private = spcm; -- cgit From f8ba14b780273fd290ddf7ee0d7d7decb44cc365 Mon Sep 17 00:00:00 2001 From: Kamil Duljas Date: Thu, 16 Nov 2023 23:41:13 +0100 Subject: ASoC: Intel: Skylake: mem leak in skl register function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit skl_platform_register() uses krealloc. When krealloc is fail, then previous memory is not freed. The leak is also when soc component registration failed. Signed-off-by: Kamil Duljas Reviewed-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20231116224112.2209-2-kamil.duljas@gmail.com Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-pcm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c index 18866bc415a5..174aae6e0398 100644 --- a/sound/soc/intel/skylake/skl-pcm.c +++ b/sound/soc/intel/skylake/skl-pcm.c @@ -1464,6 +1464,7 @@ int skl_platform_register(struct device *dev) dais = krealloc(skl->dais, sizeof(skl_fe_dai) + sizeof(skl_platform_dai), GFP_KERNEL); if (!dais) { + kfree(skl->dais); ret = -ENOMEM; goto err; } @@ -1476,8 +1477,10 @@ int skl_platform_register(struct device *dev) ret = devm_snd_soc_register_component(dev, &skl_component, skl->dais, num_dais); - if (ret) + if (ret) { + kfree(skl->dais); dev_err(dev, "soc component registration failed %d\n", ret); + } err: return ret; } -- cgit From e7f289a59e76a5890a57bc27b198f69f175f75d9 Mon Sep 17 00:00:00 2001 From: Maciej Strozek Date: Fri, 17 Nov 2023 14:13:38 +0000 Subject: ASoC: cs43130: Fix the position of const qualifier Signed-off-by: Maciej Strozek Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20231117141344.64320-2-mstrozek@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs43130.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/cs43130.c b/sound/soc/codecs/cs43130.c index 0b40fdfb1825..20f06679b8f7 100644 --- a/sound/soc/codecs/cs43130.c +++ b/sound/soc/codecs/cs43130.c @@ -1682,7 +1682,7 @@ static ssize_t hpload_dc_r_show(struct device *dev, return cs43130_show_dc(dev, buf, HP_RIGHT); } -static u16 const cs43130_ac_freq[CS43130_AC_FREQ] = { +static const u16 cs43130_ac_freq[CS43130_AC_FREQ] = { 24, 43, 93, @@ -2362,7 +2362,7 @@ static const struct regmap_config cs43130_regmap = { .use_single_write = true, }; -static u16 const cs43130_dc_threshold[CS43130_DC_THRESHOLD] = { +static const u16 cs43130_dc_threshold[CS43130_DC_THRESHOLD] = { 50, 120, }; -- cgit From aa7e8e5e4011571022dc06e4d7a2f108feb53d1a Mon Sep 17 00:00:00 2001 From: Maciej Strozek Date: Fri, 17 Nov 2023 14:13:39 +0000 Subject: ASoC: cs43130: Fix incorrect frame delay configuration Signed-off-by: Maciej Strozek Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20231117141344.64320-3-mstrozek@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs43130.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs43130.c b/sound/soc/codecs/cs43130.c index 20f06679b8f7..d8ec325b9cc9 100644 --- a/sound/soc/codecs/cs43130.c +++ b/sound/soc/codecs/cs43130.c @@ -578,7 +578,7 @@ static int cs43130_set_sp_fmt(int dai_id, unsigned int bitwidth_sclk, break; case SND_SOC_DAIFMT_LEFT_J: hi_size = bitwidth_sclk; - frm_delay = 2; + frm_delay = 0; frm_phase = 1; break; case SND_SOC_DAIFMT_DSP_A: -- cgit From b6f09b16558f31d93cdcda3cab90a2d309a7c823 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Fri, 17 Nov 2023 16:06:57 +0800 Subject: MAINTAINERS: Add Chengchang Tang as Hisilicon RoCE maintainer Add Chengchang Tang as Hisilicon RoCE maintainer. Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20231117080657.1844316-1-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 97f51d5ec1cf..5e93f355c786 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9628,6 +9628,7 @@ F: drivers/crypto/hisilicon/sgl.c F: include/linux/hisi_acc_qm.h HISILICON ROCE DRIVER +M: Chengchang Tang M: Junxian Huang L: linux-rdma@vger.kernel.org S: Maintained -- cgit From 14e8442e0789598514f3c9de014950de9feda7a4 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Mon, 20 Nov 2023 18:05:35 +0800 Subject: ASoC: fsl_sai: Fix no frame sync clock issue on i.MX8MP On i.MX8MP, when the TERE and FSD_MSTR enabled before configuring the word width, there will be no frame sync clock issue, because old word width impact the generation of frame sync. TERE enabled earlier only for i.MX8MP case for the hardware limitation, So need to disable FSD_MSTR before configuring word width, then enable FSD_MSTR bit for this specific case. Fixes: 3e4a82612998 ("ASoC: fsl_sai: MCLK bind with TX/RX enable bit") Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1700474735-3863-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_sai.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 79e7c6b98a75..32bbe5056a63 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -673,6 +673,20 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, FSL_SAI_CR3_TRCE_MASK, FSL_SAI_CR3_TRCE((dl_cfg[dl_cfg_idx].mask[tx] & trce_mask))); + /* + * When the TERE and FSD_MSTR enabled before configuring the word width + * There will be no frame sync clock issue, because word width impact + * the generation of frame sync clock. + * + * TERE enabled earlier only for i.MX8MP case for the hardware limitation, + * We need to disable FSD_MSTR before configuring word width, then enable + * FSD_MSTR bit for this specific case. + */ + if (sai->soc_data->mclk_with_tere && sai->mclk_direction_output && + !sai->is_consumer_mode) + regmap_update_bits(sai->regmap, FSL_SAI_xCR4(tx, ofs), + FSL_SAI_CR4_FSD_MSTR, 0); + regmap_update_bits(sai->regmap, FSL_SAI_xCR4(tx, ofs), FSL_SAI_CR4_SYWD_MASK | FSL_SAI_CR4_FRSZ_MASK | FSL_SAI_CR4_CHMOD_MASK, @@ -680,6 +694,13 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, regmap_update_bits(sai->regmap, FSL_SAI_xCR5(tx, ofs), FSL_SAI_CR5_WNW_MASK | FSL_SAI_CR5_W0W_MASK | FSL_SAI_CR5_FBT_MASK, val_cr5); + + /* Enable FSD_MSTR after configuring word width */ + if (sai->soc_data->mclk_with_tere && sai->mclk_direction_output && + !sai->is_consumer_mode) + regmap_update_bits(sai->regmap, FSL_SAI_xCR4(tx, ofs), + FSL_SAI_CR4_FSD_MSTR, FSL_SAI_CR4_FSD_MSTR); + regmap_write(sai->regmap, FSL_SAI_xMR(tx), ~0UL - ((1 << min(channels, slots)) - 1)); -- cgit From 8e4ece6889a5b1836b6a135827ac831a5350602a Mon Sep 17 00:00:00 2001 From: Kunkun Jiang Date: Mon, 20 Nov 2023 21:12:10 +0800 Subject: KVM: arm64: GICv4: Do not perform a map to a mapped vLPI Before performing a map, let's check whether the vLPI has been mapped. Fixes: 196b136498b3 ("KVM: arm/arm64: GICv4: Wire mapping/unmapping of VLPIs in VFIO irq bypass") Signed-off-by: Kunkun Jiang Acked-by: Marc Zyngier Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20231120131210.2039-1-jiangkunkun@huawei.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic-v4.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 339a55194b2c..74a67ad87f29 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -436,6 +436,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, if (ret) goto out; + /* Silently exit if the vLPI is already mapped */ + if (irq->hw) + goto out; + /* * Emit the mapping request. If it fails, the ITS probably * isn't v4 compatible, so let's silently bail out. Holding -- cgit From 98594181944daa201481ad63242806beb7c89ff4 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 16 Nov 2023 16:52:15 +0000 Subject: iommufd/selftest: Fix _test_mock_dirty_bitmaps() The ASSERT_EQ() macro sneakily expands to two statements, so the loop here needs braces to ensure it captures both and actually terminates the test upon failure. Where these tests are currently failing on my arm64 machine, this reduces the number of logged lines from a rather unreasonable ~197,000 down to 10. While we're at it, we can also clean up the tautologous "count" calculations whose assertions can never fail unless mathematics and/or the C language become fundamentally broken. Fixes: a9af47e382a4 ("iommufd/selftest: Test IOMMU_HWPT_GET_DIRTY_BITMAP") Link: https://lore.kernel.org/r/90e083045243ef407dd592bb1deec89cd1f4ddf2.1700153535.git.robin.murphy@arm.com Signed-off-by: Robin Murphy Reviewed-by: Kevin Tian Reviewed-by: Joao Martins Tested-by: Joao Martins Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd_utils.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 050e9751321c..ad9202335656 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -293,15 +293,13 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, __u64 bitmap_size, __u32 flags, struct __test_metadata *_metadata) { - unsigned long i, count, nbits = bitmap_size * BITS_PER_BYTE; + unsigned long i, nbits = bitmap_size * BITS_PER_BYTE; unsigned long nr = nbits / 2; __u64 out_dirty = 0; /* Mark all even bits as dirty in the mock domain */ - for (count = 0, i = 0; i < nbits; count += !(i % 2), i++) - if (!(i % 2)) - set_bit(i, (unsigned long *)bitmap); - ASSERT_EQ(nr, count); + for (i = 0; i < nbits; i += 2) + set_bit(i, (unsigned long *)bitmap); test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, bitmap, &out_dirty); @@ -311,9 +309,10 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, memset(bitmap, 0, bitmap_size); test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, flags); - for (count = 0, i = 0; i < nbits; count += !(i % 2), i++) + /* Beware ASSERT_EQ() is two statements -- braces are not redundant! */ + for (i = 0; i < nbits; i++) { ASSERT_EQ(!(i % 2), test_bit(i, (unsigned long *)bitmap)); - ASSERT_EQ(count, out_dirty); + } memset(bitmap, 0, bitmap_size); test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, -- cgit From 4e7a8dbd2bc0aec4605a5069df7a779bd9e64db1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 13 Nov 2023 08:46:32 -0800 Subject: pwm: bcm2835: Fix NPD in suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When 119a508c4dc9 ("pwm: bcm2835: Add support for suspend/resume") was sent out on October 11th,, there was still a call to platform_set_drvdata() which would ensure that the driver private data structure could be used in bcm2835_pwm_{suspend,resume}. A cleanup now merged as commit commit 2ce7b7f6704c ("pwm: bcm2835: Simplify using devm functions") removed that call which would now cause a NPD in bcm2835_pwm_{suspend,resume} as a consequence. Fixes: 119a508c4dc9 ("pwm: bcm2835: Add support for suspend/resume") Signed-off-by: Florian Fainelli Reviewed-by: Uwe Kleine-König Link: https://lore.kernel.org/linux-pwm/20231113164632.2439400-1-florian.fainelli@broadcom.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-bcm2835.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c index 9777babd5b95..ab30667f4f95 100644 --- a/drivers/pwm/pwm-bcm2835.c +++ b/drivers/pwm/pwm-bcm2835.c @@ -155,6 +155,8 @@ static int bcm2835_pwm_probe(struct platform_device *pdev) pc->chip.ops = &bcm2835_pwm_ops; pc->chip.npwm = 2; + platform_set_drvdata(pdev, pc); + ret = devm_pwmchip_add(&pdev->dev, &pc->chip); if (ret < 0) return dev_err_probe(&pdev->dev, ret, -- cgit From 3ee7ecd712048ade6482bea4b2f3dcaf039c0348 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 20 Nov 2023 16:41:38 +0100 Subject: RDMA/rtrs-srv: Do not unconditionally enable irq When IO is completed, rtrs can be called in softirq context, unconditionally enabling irq could cause panic. To be on safe side, use spin_lock_irqsave and spin_unlock_irqrestore instread. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Signed-off-by: Jack Wang Signed-off-by: Florian-Ewald Mueller Signed-off-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-2-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 75e56604e462..ab4200041fd3 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -65,8 +65,9 @@ static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path, { enum rtrs_srv_state old_state; bool changed = false; + unsigned long flags; - spin_lock_irq(&srv_path->state_lock); + spin_lock_irqsave(&srv_path->state_lock, flags); old_state = srv_path->state; switch (new_state) { case RTRS_SRV_CONNECTED: @@ -87,7 +88,7 @@ static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path, } if (changed) srv_path->state = new_state; - spin_unlock_irq(&srv_path->state_lock); + spin_unlock_irqrestore(&srv_path->state_lock, flags); return changed; } -- cgit From 3e44a61b5db873612e20e7b7922468d7d1ac2d22 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 20 Nov 2023 16:41:39 +0100 Subject: RDMA/rtrs-clt: Start hb after path_up If we start hb too early, it will confuse server side to close the session. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Signed-off-by: Jack Wang Reviewed-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-3-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 07261523c554..9bf5f7fb7714 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2350,8 +2350,6 @@ static int init_conns(struct rtrs_clt_path *clt_path) if (err) goto destroy; - rtrs_start_hb(&clt_path->s); - return 0; destroy: @@ -2625,6 +2623,7 @@ static int init_path(struct rtrs_clt_path *clt_path) goto out; } rtrs_clt_path_up(clt_path); + rtrs_start_hb(&clt_path->s); out: mutex_unlock(&clt_path->init_mutex); -- cgit From ed1e52aefa16f15dc2f04054a3baf11726a7460e Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Mon, 20 Nov 2023 16:41:40 +0100 Subject: RDMA/rtrs-srv: Check return values while processing info request While processing info request, it could so happen that the srv_path goes to CLOSING state, cause of any of the error events from RDMA. That state change should be picked up while trying to change the state in process_info_req, by checking the return value. In case the state change call in process_info_req fails, we fail the processing. We should also check the return value for rtrs_srv_path_up, since it sends a link event to the client above, and the client can fail for any reason. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Signed-off-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-4-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index ab4200041fd3..4be0e5b132d4 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -710,20 +710,23 @@ static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(wc->opcode != IB_WC_SEND); } -static void rtrs_srv_path_up(struct rtrs_srv_path *srv_path) +static int rtrs_srv_path_up(struct rtrs_srv_path *srv_path) { struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_srv_ctx *ctx = srv->ctx; - int up; + int up, ret = 0; mutex_lock(&srv->paths_ev_mutex); up = ++srv->paths_up; if (up == 1) - ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL); + ret = ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL); mutex_unlock(&srv->paths_ev_mutex); /* Mark session as established */ - srv_path->established = true; + if (!ret) + srv_path->established = true; + + return ret; } static void rtrs_srv_path_down(struct rtrs_srv_path *srv_path) @@ -852,7 +855,12 @@ static int process_info_req(struct rtrs_srv_con *con, goto iu_free; kobject_get(&srv_path->kobj); get_device(&srv_path->srv->dev); - rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED); + err = rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED); + if (!err) { + rtrs_err(s, "rtrs_srv_change_state(), err: %d\n", err); + goto iu_free; + } + rtrs_srv_start_hb(srv_path); /* @@ -861,7 +869,11 @@ static int process_info_req(struct rtrs_srv_con *con, * all connections are successfully established. Thus, simply notify * listener with a proper event if we are the first path. */ - rtrs_srv_path_up(srv_path); + err = rtrs_srv_path_up(srv_path); + if (err) { + rtrs_err(s, "rtrs_srv_path_up(), err: %d\n", err); + goto iu_free; + } ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev, tx_iu->dma_addr, -- cgit From 3a71cd6ca0ce33d1af019ecf1d7167406fa54400 Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Mon, 20 Nov 2023 16:41:41 +0100 Subject: RDMA/rtrs-srv: Free srv_mr iu only when always_invalidate is true Since srv_mr->iu is allocated and used only when always_invalidate is true, free it only when always_invalidate is true. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Signed-off-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-5-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 4be0e5b132d4..925b71481c62 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -551,7 +551,10 @@ static void unmap_cont_bufs(struct rtrs_srv_path *srv_path) struct rtrs_srv_mr *srv_mr; srv_mr = &srv_path->mrs[i]; - rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1); + + if (always_invalidate) + rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1); + ib_dereg_mr(srv_mr->mr); ib_dma_unmap_sg(srv_path->s.dev->ib_dev, srv_mr->sgt.sgl, srv_mr->sgt.nents, DMA_BIDIRECTIONAL); -- cgit From c4d32e77fc1006f99eeb78417efc3d81a384072a Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Mon, 20 Nov 2023 16:41:42 +0100 Subject: RDMA/rtrs-srv: Destroy path files after making sure no IOs in-flight Destroying path files may lead to the freeing of rdma_stats. This creates the following race. An IO is in-flight, or has just passed the session state check in process_read/process_write. The close_work gets triggered and the function rtrs_srv_close_work() starts and does destroy path which frees the rdma_stats. After this the function process_read/process_write resumes and tries to update the stats through the function rtrs_srv_update_rdma_stats This commit solves the problem by moving the destroy path function to a later point. This point makes sure any inflights are completed. This is done by qp drain, and waiting for all in-flights through ops_id. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Signed-off-by: Md Haris Iqbal Signed-off-by: Santosh Kumar Pradhan Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-6-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 925b71481c62..1d33efb8fb03 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1532,7 +1532,6 @@ static void rtrs_srv_close_work(struct work_struct *work) srv_path = container_of(work, typeof(*srv_path), close_work); - rtrs_srv_destroy_path_files(srv_path); rtrs_srv_stop_hb(srv_path); for (i = 0; i < srv_path->s.con_num; i++) { @@ -1552,6 +1551,8 @@ static void rtrs_srv_close_work(struct work_struct *work) /* Wait for all completion */ wait_for_completion(&srv_path->complete_done); + rtrs_srv_destroy_path_files(srv_path); + /* Notify upper layer if we are the last path */ rtrs_srv_path_down(srv_path); -- cgit From 6d09f6f7d7584e099633282ea915988914f86529 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 20 Nov 2023 16:41:43 +0100 Subject: RDMA/rtrs-clt: Fix the max_send_wr setting For each write request, we need Request, Response Memory Registration, Local Invalidate. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Signed-off-by: Jack Wang Reviewed-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-7-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 9bf5f7fb7714..16535030b442 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1699,7 +1699,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) clt_path->s.dev_ref++; max_send_wr = min_t(int, wr_limit, /* QD * (REQ + RSP + FR REGS or INVS) + drain */ - clt_path->queue_depth * 3 + 1); + clt_path->queue_depth * 4 + 1); max_recv_wr = min_t(int, wr_limit, clt_path->queue_depth * 3 + 1); max_send_sge = 2; -- cgit From 0c8bb6eb70ca41031f663b4481aac9ac78b53bc6 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 20 Nov 2023 16:41:44 +0100 Subject: RDMA/rtrs-clt: Remove the warnings for req in_use check As we chain the WR during write request: memory registration, rdma write, local invalidate, if only the last WR fail to send due to send queue overrun, the server can send back the reply, while client mark the req->in_use to false in case of error in rtrs_clt_req when error out from rtrs_post_rdma_write_sg. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Signed-off-by: Jack Wang Reviewed-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-8-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 16535030b442..7f3167ce2972 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -384,7 +384,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, struct rtrs_clt_path *clt_path; int err; - if (WARN_ON(!req->in_use)) + if (!req->in_use) return; if (WARN_ON(!req->con)) return; -- cgit From 422b19f7f006e813ee0865aadce6a62b3c263c42 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 21 Nov 2023 00:29:47 -0800 Subject: RDMA/bnxt_re: Correct module description string The word "Driver" is repeated twice in the "modinfo bnxt_re" output description. Fix it. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1700555387-6277-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index f79369c8360a..a99c68247af0 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -71,7 +71,7 @@ static char version[] = BNXT_RE_DESC "\n"; MODULE_AUTHOR("Eddie Wai "); -MODULE_DESCRIPTION(BNXT_RE_DESC " Driver"); +MODULE_DESCRIPTION(BNXT_RE_DESC); MODULE_LICENSE("Dual BSD/GPL"); /* globals */ -- cgit From 2b78832f50c4d711e161b166d7d8790968051546 Mon Sep 17 00:00:00 2001 From: Shifeng Li Date: Tue, 21 Nov 2023 02:12:36 -0800 Subject: RDMA/irdma: Fix UAF in irdma_sc_ccq_get_cqe_info() When removing the irdma driver or unplugging its aux device, the ccq queue is released before destorying the cqp_cmpl_wq queue. But in the window, there may still be completion events for wqes. That will cause a UAF in irdma_sc_ccq_get_cqe_info(). [34693.333191] BUG: KASAN: use-after-free in irdma_sc_ccq_get_cqe_info+0x82f/0x8c0 [irdma] [34693.333194] Read of size 8 at addr ffff889097f80818 by task kworker/u67:1/26327 [34693.333194] [34693.333199] CPU: 9 PID: 26327 Comm: kworker/u67:1 Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1 [34693.333200] Hardware name: SANGFOR Inspur/NULL, BIOS 4.1.13 08/01/2016 [34693.333211] Workqueue: cqp_cmpl_wq cqp_compl_worker [irdma] [34693.333213] Call Trace: [34693.333220] dump_stack+0x71/0xab [34693.333226] print_address_description+0x6b/0x290 [34693.333238] ? irdma_sc_ccq_get_cqe_info+0x82f/0x8c0 [irdma] [34693.333240] kasan_report+0x14a/0x2b0 [34693.333251] irdma_sc_ccq_get_cqe_info+0x82f/0x8c0 [irdma] [34693.333264] ? irdma_free_cqp_request+0x151/0x1e0 [irdma] [34693.333274] irdma_cqp_ce_handler+0x1fb/0x3b0 [irdma] [34693.333285] ? irdma_ctrl_init_hw+0x2c20/0x2c20 [irdma] [34693.333290] ? __schedule+0x836/0x1570 [34693.333293] ? strscpy+0x83/0x180 [34693.333296] process_one_work+0x56a/0x11f0 [34693.333298] worker_thread+0x8f/0xf40 [34693.333301] ? __kthread_parkme+0x78/0xf0 [34693.333303] ? rescuer_thread+0xc50/0xc50 [34693.333305] kthread+0x2a0/0x390 [34693.333308] ? kthread_destroy_worker+0x90/0x90 [34693.333310] ret_from_fork+0x1f/0x40 Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Signed-off-by: Shifeng Li Link: https://lore.kernel.org/r/20231121101236.581694-1-lishifeng1992@126.com Acked-by: Shiraz Saleem Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/hw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 74f6bc7b7ad1..64a382070e5c 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -585,9 +585,6 @@ static void irdma_destroy_cqp(struct irdma_pci_f *rf) struct irdma_cqp *cqp = &rf->cqp; int status = 0; - if (rf->cqp_cmpl_wq) - destroy_workqueue(rf->cqp_cmpl_wq); - status = irdma_sc_cqp_destroy(dev->cqp); if (status) ibdev_dbg(to_ibdev(dev), "ERR: Destroy CQP failed %d\n", status); @@ -752,6 +749,9 @@ static void irdma_destroy_ccq(struct irdma_pci_f *rf) struct irdma_ccq *ccq = &rf->ccq; int status = 0; + if (rf->cqp_cmpl_wq) + destroy_workqueue(rf->cqp_cmpl_wq); + if (!rf->reset) status = irdma_sc_ccq_destroy(dev->ccq, 0, true); if (status) -- cgit From c33fd110424dfcb544cf55a1b312f43fe1918235 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Wed, 22 Nov 2023 09:42:53 +0800 Subject: ASoC: fsl_xcvr: Enable 2 * TX bit clock for spdif only case The bit 10 in TX_DPTH_CTRL register controls the TX clock rate. If this bit is set, TX datapath clock should be = 2* TX bit rate. If this bit is not set, TX datapath clock should be 10* TX bit rate. As the spdif only case, we always use 2 * TX bit clock, so this bit need to be set. Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1700617373-6472-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_xcvr.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c index fa0a15263c66..77f8e2394bf9 100644 --- a/sound/soc/fsl/fsl_xcvr.c +++ b/sound/soc/fsl/fsl_xcvr.c @@ -414,6 +414,16 @@ static int fsl_xcvr_prepare(struct snd_pcm_substream *substream, switch (xcvr->mode) { case FSL_XCVR_MODE_SPDIF: + if (xcvr->soc_data->spdif_only && tx) { + ret = regmap_update_bits(xcvr->regmap, FSL_XCVR_TX_DPTH_CTRL_SET, + FSL_XCVR_TX_DPTH_CTRL_BYPASS_FEM, + FSL_XCVR_TX_DPTH_CTRL_BYPASS_FEM); + if (ret < 0) { + dev_err(dai->dev, "Failed to set bypass fem: %d\n", ret); + return ret; + } + } + fallthrough; case FSL_XCVR_MODE_ARC: if (tx) { ret = fsl_xcvr_en_aud_pll(xcvr, fout); -- cgit From 4a6c5607d4502ccd1b15b57d57f17d12b6f257a7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 21 Nov 2023 11:39:36 -1000 Subject: workqueue: Make sure that wq_unbound_cpumask is never empty During boot, depending on how the housekeeping and workqueue.unbound_cpus masks are set, wq_unbound_cpumask can end up empty. Since 8639ecebc9b1 ("workqueue: Implement non-strict affinity scope for unbound workqueues"), this may end up feeding -1 as a CPU number into scheduler leading to oopses. BUG: unable to handle page fault for address: ffffffff8305e9c0 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page ... Call Trace: select_idle_sibling+0x79/0xaf0 select_task_rq_fair+0x1cb/0x7b0 try_to_wake_up+0x29c/0x5c0 wake_up_process+0x19/0x20 kick_pool+0x5e/0xb0 __queue_work+0x119/0x430 queue_work_on+0x29/0x30 ... An empty wq_unbound_cpumask is a clear misconfiguration and already disallowed once system is booted up. Let's warn on and ignore unbound_cpumask restrictions which lead to no unbound cpus. While at it, also remove now unncessary empty check on wq_unbound_cpumask in wq_select_unbound_cpu(). Signed-off-by: Tejun Heo Reported-and-Tested-by: Yong He Link: http://lkml.kernel.org/r/20231120121623.119780-1-alexyonghe@tencent.com Fixes: 8639ecebc9b1 ("workqueue: Implement non-strict affinity scope for unbound workqueues") Cc: stable@vger.kernel.org # v6.6+ Reviewed-by: Waiman Long --- kernel/workqueue.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6e578f576a6f..2989b57e154a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1684,9 +1684,6 @@ static int wq_select_unbound_cpu(int cpu) pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n"); } - if (cpumask_empty(wq_unbound_cpumask)) - return cpu; - new_cpu = __this_cpu_read(wq_rr_cpu_last); new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask); if (unlikely(new_cpu >= nr_cpu_ids)) { @@ -6515,6 +6512,17 @@ static inline void wq_watchdog_init(void) { } #endif /* CONFIG_WQ_WATCHDOG */ +static void __init restrict_unbound_cpumask(const char *name, const struct cpumask *mask) +{ + if (!cpumask_intersects(wq_unbound_cpumask, mask)) { + pr_warn("workqueue: Restricting unbound_cpumask (%*pb) with %s (%*pb) leaves no CPU, ignoring\n", + cpumask_pr_args(wq_unbound_cpumask), name, cpumask_pr_args(mask)); + return; + } + + cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, mask); +} + /** * workqueue_init_early - early init for workqueue subsystem * @@ -6534,11 +6542,11 @@ void __init workqueue_init_early(void) BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL)); - cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_WQ)); - cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN)); - + cpumask_copy(wq_unbound_cpumask, cpu_possible_mask); + restrict_unbound_cpumask("HK_TYPE_WQ", housekeeping_cpumask(HK_TYPE_WQ)); + restrict_unbound_cpumask("HK_TYPE_DOMAIN", housekeeping_cpumask(HK_TYPE_DOMAIN)); if (!cpumask_empty(&wq_cmdline_cpumask)) - cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, &wq_cmdline_cpumask); + restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask); pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); -- cgit From cdba4301adda7c60a2064bf808e48fccd352aaa9 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Wed, 22 Nov 2023 18:01:23 +0800 Subject: ASoC: rt5650: add mutex to avoid the jack detection failure This patch adds the jd_mutex to protect the jack detection control flow. And only the headset type could check the button status. Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20231122100123.2831753-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 7938b52d741d..a0d01d71d8b5 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -448,6 +448,7 @@ struct rt5645_priv { struct regulator_bulk_data supplies[ARRAY_SIZE(rt5645_supply_names)]; struct rt5645_eq_param_s *eq_param; struct timer_list btn_check_timer; + struct mutex jd_mutex; int codec_type; int sysclk; @@ -3193,6 +3194,8 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse rt5645_enable_push_button_irq(component, true); } } else { + if (rt5645->en_button_func) + rt5645_enable_push_button_irq(component, false); snd_soc_dapm_disable_pin(dapm, "Mic Det Power"); snd_soc_dapm_sync(dapm); rt5645->jack_type = SND_JACK_HEADPHONE; @@ -3295,6 +3298,8 @@ static void rt5645_jack_detect_work(struct work_struct *work) if (!rt5645->component) return; + mutex_lock(&rt5645->jd_mutex); + switch (rt5645->pdata.jd_mode) { case 0: /* Not using rt5645 JD */ if (rt5645->gpiod_hp_det) { @@ -3321,7 +3326,7 @@ static void rt5645_jack_detect_work(struct work_struct *work) if (!val && (rt5645->jack_type == 0)) { /* jack in */ report = rt5645_jack_detect(rt5645->component, 1); - } else if (!val && rt5645->jack_type != 0) { + } else if (!val && rt5645->jack_type == SND_JACK_HEADSET) { /* for push button and jack out */ btn_type = 0; if (snd_soc_component_read(rt5645->component, RT5645_INT_IRQ_ST) & 0x4) { @@ -3377,6 +3382,8 @@ static void rt5645_jack_detect_work(struct work_struct *work) rt5645_jack_detect(rt5645->component, 0); } + mutex_unlock(&rt5645->jd_mutex); + snd_soc_jack_report(rt5645->hp_jack, report, SND_JACK_HEADPHONE); snd_soc_jack_report(rt5645->mic_jack, report, SND_JACK_MICROPHONE); if (rt5645->en_button_func) @@ -4150,6 +4157,7 @@ static int rt5645_i2c_probe(struct i2c_client *i2c) } timer_setup(&rt5645->btn_check_timer, rt5645_btn_check_callback, 0); + mutex_init(&rt5645->jd_mutex); INIT_DELAYED_WORK(&rt5645->jack_detect_work, rt5645_jack_detect_work); INIT_DELAYED_WORK(&rt5645->rcclock_work, rt5645_rcclock_work); -- cgit From 36a1c2ee50f573972aea3c3019555f47ee0094c0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 17 Nov 2023 13:18:48 -0700 Subject: cxl/hdm: Fix a benign lockdep splat The new helper "cxl_num_decoders_committed()" added a lockdep assertion to validate that port->commit_end is protected against modification. That assertion fires in init_hdm_decoder() where it is initializing port->commit_end. Given that it is both accessing and writing that property it obstensibly needs the lock. In practice, CXL decoder commit rules (must commit in order) and the in-order discovery of device decoders makes the manipulation of ->commit_end in init_hdm_decoder() safe. However, rather than rely on the subtle rules of CXL hardware, just make the implementation obviously correct from a software perspective. The Fixes: tag is only for cleaning up a lockdep splat, there is no functional issue addressed by this fix. Fixes: 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170025232811.2147250.16376901801315194121.stgit@djiang5-mobl3 Acked-by: Davidlohr Bueso Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 1cc9be85ba4c..529baa8a1759 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -839,6 +839,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, cxld->target_type = CXL_DECODER_HOSTONLYMEM; else cxld->target_type = CXL_DECODER_DEVMEM; + + guard(rwsem_write)(&cxl_region_rwsem); if (cxld->id != cxl_num_decoders_committed(port)) { dev_warn(&port->dev, "decoder%d.%d: Committed out of order\n", -- cgit From 744f5e7b69710701dc225020769138f8ca2894df Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Mon, 30 Oct 2023 20:01:13 +0100 Subject: dmaengine: ti: k3-psil-am62: Fix SPI PDMA data AM62x has 3 SPI channels where each channel has 4 TX and 4 RX threads. This also fixes the thread numbers. Signed-off-by: Ronald Wahl Fixes: 5ac6bfb58777 ("dmaengine: ti: k3-psil: Add AM62x PSIL and PDMA data") Reviewed-by: Jai Luthra Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20231030190113.16782-1-rwahl@gmx.de Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-psil-am62.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/dma/ti/k3-psil-am62.c b/drivers/dma/ti/k3-psil-am62.c index 2b6fd6e37c61..1272b1541f61 100644 --- a/drivers/dma/ti/k3-psil-am62.c +++ b/drivers/dma/ti/k3-psil-am62.c @@ -74,7 +74,9 @@ static struct psil_ep am62_src_ep_map[] = { PSIL_SAUL(0x7505, 21, 35, 8, 36, 0), PSIL_SAUL(0x7506, 22, 43, 8, 43, 0), PSIL_SAUL(0x7507, 23, 43, 8, 44, 0), - /* PDMA_MAIN0 - SPI0-3 */ + /* PDMA_MAIN0 - SPI0-2 */ + PSIL_PDMA_XY_PKT(0x4300), + PSIL_PDMA_XY_PKT(0x4301), PSIL_PDMA_XY_PKT(0x4302), PSIL_PDMA_XY_PKT(0x4303), PSIL_PDMA_XY_PKT(0x4304), @@ -85,8 +87,6 @@ static struct psil_ep am62_src_ep_map[] = { PSIL_PDMA_XY_PKT(0x4309), PSIL_PDMA_XY_PKT(0x430a), PSIL_PDMA_XY_PKT(0x430b), - PSIL_PDMA_XY_PKT(0x430c), - PSIL_PDMA_XY_PKT(0x430d), /* PDMA_MAIN1 - UART0-6 */ PSIL_PDMA_XY_PKT(0x4400), PSIL_PDMA_XY_PKT(0x4401), @@ -141,7 +141,9 @@ static struct psil_ep am62_dst_ep_map[] = { /* SAUL */ PSIL_SAUL(0xf500, 27, 83, 8, 83, 1), PSIL_SAUL(0xf501, 28, 91, 8, 91, 1), - /* PDMA_MAIN0 - SPI0-3 */ + /* PDMA_MAIN0 - SPI0-2 */ + PSIL_PDMA_XY_PKT(0xc300), + PSIL_PDMA_XY_PKT(0xc301), PSIL_PDMA_XY_PKT(0xc302), PSIL_PDMA_XY_PKT(0xc303), PSIL_PDMA_XY_PKT(0xc304), @@ -152,8 +154,6 @@ static struct psil_ep am62_dst_ep_map[] = { PSIL_PDMA_XY_PKT(0xc309), PSIL_PDMA_XY_PKT(0xc30a), PSIL_PDMA_XY_PKT(0xc30b), - PSIL_PDMA_XY_PKT(0xc30c), - PSIL_PDMA_XY_PKT(0xc30d), /* PDMA_MAIN1 - UART0-6 */ PSIL_PDMA_XY_PKT(0xc400), PSIL_PDMA_XY_PKT(0xc401), -- cgit From b52cbca22cbf6c9d2700c1e576d0ddcc670e49d5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Nov 2023 16:42:33 +0100 Subject: platform/x86: asus-wmi: Move i8042 filter install to shared asus-wmi code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit asus-nb-wmi calls i8042_install_filter() in some cases, but it never calls i8042_remove_filter(). This means that a dangling pointer to the filter function is left after rmmod leading to crashes. Fix this by moving the i8042-filter installation to the shared asus-wmi code and also remove it from the shared code on driver unbind. Fixes: b5643539b825 ("platform/x86: asus-wmi: Filter buggy scan codes on ASUS Q500A") Cc: Oleksij Rempel Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231120154235.610808-2-hdegoede@redhat.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/Kconfig | 2 +- drivers/platform/x86/asus-nb-wmi.c | 11 ----------- drivers/platform/x86/asus-wmi.c | 8 ++++++++ 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 7e69fdaccdd5..c94f31a5c6a3 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -263,6 +263,7 @@ config ASUS_WMI depends on RFKILL || RFKILL = n depends on HOTPLUG_PCI depends on ACPI_VIDEO || ACPI_VIDEO = n + depends on SERIO_I8042 || SERIO_I8042 = n select INPUT_SPARSEKMAP select LEDS_CLASS select NEW_LEDS @@ -279,7 +280,6 @@ config ASUS_WMI config ASUS_NB_WMI tristate "Asus Notebook WMI Driver" depends on ASUS_WMI - depends on SERIO_I8042 || SERIO_I8042 = n help This is a driver for newer Asus notebooks. It adds extra features like wireless radio and bluetooth control, leds, hotkeys, backlight... diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 9aa1226e74e6..ff794387581d 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -503,8 +503,6 @@ static const struct dmi_system_id asus_quirks[] = { static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver) { - int ret; - quirks = &quirk_asus_unknown; dmi_check_system(asus_quirks); @@ -519,15 +517,6 @@ static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver) if (tablet_mode_sw != -1) quirks->tablet_switch_mode = tablet_mode_sw; - - if (quirks->i8042_filter) { - ret = i8042_install_filter(quirks->i8042_filter); - if (ret) { - pr_warn("Unable to install key filter\n"); - return; - } - pr_info("Using i8042 filter function for receiving events\n"); - } } static const struct key_entry asus_nb_wmi_keymap[] = { diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 6a79f16233ab..53e25cb467d7 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -4567,6 +4567,12 @@ static int asus_wmi_add(struct platform_device *pdev) goto fail_wmi_handler; } + if (asus->driver->quirks->i8042_filter) { + err = i8042_install_filter(asus->driver->quirks->i8042_filter); + if (err) + pr_warn("Unable to install key filter - %d\n", err); + } + asus_wmi_battery_init(asus); asus_wmi_debugfs_init(asus); @@ -4603,6 +4609,8 @@ static int asus_wmi_remove(struct platform_device *device) struct asus_wmi *asus; asus = platform_get_drvdata(device); + if (asus->driver->quirks->i8042_filter) + i8042_remove_filter(asus->driver->quirks->i8042_filter); wmi_remove_notify_handler(asus->driver->event_guid); asus_wmi_backlight_exit(asus); asus_screenpad_exit(asus); -- cgit From 6db829fa2f1295aaad51a3c00f6fc57a27c444bb Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Nov 2023 16:42:34 +0100 Subject: platform/x86: asus-wmi: Change q500a_i8042_filter() into a generic i8042-filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change asus_q500a_i8042_filter() into a generic i8042-filter, using a new filter_i8042_e1_extended_codes flag in the quirks struct to decide if e1 extended codes should be filtered out or not. This is a preparation patch for adding support for filtering volume key events being reported twice through both the PS/2 keyboard and asus-wmi. Note while modifying the code also drop the unnecessary unlikely() annotations, this is not in a hot path so those are not necessary. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231120154235.610808-3-hdegoede@redhat.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-nb-wmi.c | 27 +++++++++++++++------------ drivers/platform/x86/asus-wmi.c | 8 ++++---- drivers/platform/x86/asus-wmi.h | 7 ++++--- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index ff794387581d..16241556f6fb 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -49,24 +49,26 @@ MODULE_PARM_DESC(tablet_mode_sw, "Tablet mode detect: -1:auto 0:disable 1:kbd-do static struct quirk_entry *quirks; -static bool asus_q500a_i8042_filter(unsigned char data, unsigned char str, - struct serio *port) +static bool asus_i8042_filter(unsigned char data, unsigned char str, struct serio *port) { - static bool extended; - bool ret = false; + static bool extended_e1; if (str & I8042_STR_AUXDATA) return false; - if (unlikely(data == 0xe1)) { - extended = true; - ret = true; - } else if (unlikely(extended)) { - extended = false; - ret = true; + if (quirks->filter_i8042_e1_extended_codes) { + if (data == 0xe1) { + extended_e1 = true; + return true; + } + + if (extended_e1) { + extended_e1 = false; + return true; + } } - return ret; + return false; } static struct quirk_entry quirk_asus_unknown = { @@ -75,7 +77,7 @@ static struct quirk_entry quirk_asus_unknown = { }; static struct quirk_entry quirk_asus_q500a = { - .i8042_filter = asus_q500a_i8042_filter, + .filter_i8042_e1_extended_codes = true, .wmi_backlight_set_devstate = true, }; @@ -619,6 +621,7 @@ static struct asus_wmi_driver asus_nb_wmi_driver = { .input_phys = ASUS_NB_WMI_FILE "/input0", .detect_quirks = asus_nb_wmi_quirks, .key_filter = asus_nb_wmi_key_filter, + .i8042_filter = asus_i8042_filter, }; diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 53e25cb467d7..ca668cf04020 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -4567,8 +4567,8 @@ static int asus_wmi_add(struct platform_device *pdev) goto fail_wmi_handler; } - if (asus->driver->quirks->i8042_filter) { - err = i8042_install_filter(asus->driver->quirks->i8042_filter); + if (asus->driver->i8042_filter) { + err = i8042_install_filter(asus->driver->i8042_filter); if (err) pr_warn("Unable to install key filter - %d\n", err); } @@ -4609,8 +4609,8 @@ static int asus_wmi_remove(struct platform_device *device) struct asus_wmi *asus; asus = platform_get_drvdata(device); - if (asus->driver->quirks->i8042_filter) - i8042_remove_filter(asus->driver->quirks->i8042_filter); + if (asus->driver->i8042_filter) + i8042_remove_filter(asus->driver->i8042_filter); wmi_remove_notify_handler(asus->driver->event_guid); asus_wmi_backlight_exit(asus); asus_screenpad_exit(asus); diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index adb67c925724..cc30f1853847 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -39,6 +39,7 @@ struct quirk_entry { bool wmi_backlight_set_devstate; bool wmi_force_als_set; bool wmi_ignore_fan; + bool filter_i8042_e1_extended_codes; enum asus_wmi_tablet_switch_mode tablet_switch_mode; int wapf; /* @@ -49,9 +50,6 @@ struct quirk_entry { */ int no_display_toggle; u32 xusb2pr; - - bool (*i8042_filter)(unsigned char data, unsigned char str, - struct serio *serio); }; struct asus_wmi_driver { @@ -73,6 +71,9 @@ struct asus_wmi_driver { * Return ASUS_WMI_KEY_IGNORE in code if event should be ignored. */ void (*key_filter) (struct asus_wmi_driver *driver, int *code, unsigned int *value, bool *autorelease); + /* Optional standard i8042 filter */ + bool (*i8042_filter)(unsigned char data, unsigned char str, + struct serio *serio); int (*probe) (struct platform_device *device); void (*detect_quirks) (struct asus_wmi_driver *driver); -- cgit From fb103b90e944ecd664577c0fd37a069282dcd294 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Nov 2023 16:42:35 +0100 Subject: platform/x86: asus-wmi: Filter Volume key presses if also reported via atkbd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the i8042-filter to check if Volume key presses are also reported via atkbd and if yes then filter out the WMI events to avoid reporting each key-press twice. Note depending on in which order the PS/2 data vs the WMI event are handled the first volume key press may still be reported twice. This is a compromise versus DMI quirks (unmaintainable) or other more complex solutions. Closes: https://bbs.archlinux.org/viewtopic.php?pid=2128536#p2128536 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231120154235.610808-4-hdegoede@redhat.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-nb-wmi.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 16241556f6fb..fceffe2082ec 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -48,9 +48,11 @@ module_param(tablet_mode_sw, uint, 0444); MODULE_PARM_DESC(tablet_mode_sw, "Tablet mode detect: -1:auto 0:disable 1:kbd-dock 2:lid-flip 3:lid-flip-rog"); static struct quirk_entry *quirks; +static bool atkbd_reports_vol_keys; static bool asus_i8042_filter(unsigned char data, unsigned char str, struct serio *port) { + static bool extended_e0; static bool extended_e1; if (str & I8042_STR_AUXDATA) @@ -68,6 +70,20 @@ static bool asus_i8042_filter(unsigned char data, unsigned char str, struct seri } } + if (data == 0xe0) { + extended_e0 = true; + } else if (extended_e0) { + extended_e0 = false; + + switch (data & 0x7f) { + case 0x20: /* e0 20 / e0 a0, Volume Mute press / release */ + case 0x2e: /* e0 2e / e0 ae, Volume Down press / release */ + case 0x30: /* e0 30 / e0 b0, Volume Up press / release */ + atkbd_reports_vol_keys = true; + break; + } + } + return false; } @@ -608,6 +624,13 @@ static void asus_nb_wmi_key_filter(struct asus_wmi_driver *asus_wmi, int *code, if (acpi_video_handles_brightness_key_presses()) *code = ASUS_WMI_KEY_IGNORE; + break; + case 0x30: /* Volume Up */ + case 0x31: /* Volume Down */ + case 0x32: /* Volume Mute */ + if (atkbd_reports_vol_keys) + *code = ASUS_WMI_KEY_IGNORE; + break; } } -- cgit From 3841d8a563a7473ceb7415ecfe577e20b2a66d37 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 23 Nov 2023 10:18:15 +0100 Subject: ASoC: soc-pcm: fix up bad merge A recent change to address pops and clicks with codecs like WSA883X touched the same code paths as a fix for clearing DAI parameters and resulted in a bad merge. Specifically, commit f0220575e65a ("ASoC: soc-dai: add flag to mute and unmute stream during trigger") made mute at stream close conditional, while commit 3efcb471f871 ("ASoC: soc-pcm.c: Make sure DAI parameters cleared if the DAI becomes inactive") moved that same mute call back to soc_pcm_hw_clean(). Fix up the bad merge by dropping the second mute call from soc_pcm_clean() and making sure that the call in soc_pcm_hw_clean() is conditional as intended. Fixes: bdb7e1922052 ("ASoC: Merge up workaround for CODECs that play noise on stopped stream") Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231123091815.21933-1-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 323e4d7b6adf..f6d1b2e11795 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -704,11 +704,6 @@ static int soc_pcm_clean(struct snd_soc_pcm_runtime *rtd, if (snd_soc_dai_active(dai) == 0 && (dai->rate || dai->channels || dai->sample_bits)) soc_pcm_set_dai_params(dai, NULL); - - if (snd_soc_dai_stream_active(dai, substream->stream) == 0) { - if (dai->driver->ops && !dai->driver->ops->mute_unmute_on_trigger) - snd_soc_dai_digital_mute(dai, 1, substream->stream); - } } } @@ -947,8 +942,10 @@ static int soc_pcm_hw_clean(struct snd_soc_pcm_runtime *rtd, if (snd_soc_dai_active(dai) == 1) soc_pcm_set_dai_params(dai, NULL); - if (snd_soc_dai_stream_active(dai, substream->stream) == 1) - snd_soc_dai_digital_mute(dai, 1, substream->stream); + if (snd_soc_dai_stream_active(dai, substream->stream) == 1) { + if (dai->driver->ops && !dai->driver->ops->mute_unmute_on_trigger) + snd_soc_dai_digital_mute(dai, 1, substream->stream); + } } /* run the stream event */ -- cgit From 505c83212da5bfca95109421b8f5d9f8c6cdfef2 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 23 Nov 2023 09:44:54 +0100 Subject: ASoC: SOF: mediatek: mt8186: Add Google Steelix topology compatible Add the machine compatible and topology filename for the Google Steelix MT8186 Chromebook to load the correct SOF topology file. Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231123084454.20471-1-angelogioacchino.delregno@collabora.com Signed-off-by: Mark Brown --- sound/soc/sof/mediatek/mt8186/mt8186.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/sof/mediatek/mt8186/mt8186.c b/sound/soc/sof/mediatek/mt8186/mt8186.c index b69fa788b16f..e0d88e7aa8ca 100644 --- a/sound/soc/sof/mediatek/mt8186/mt8186.c +++ b/sound/soc/sof/mediatek/mt8186/mt8186.c @@ -597,6 +597,9 @@ static struct snd_sof_dsp_ops sof_mt8186_ops = { static struct snd_sof_of_mach sof_mt8186_machs[] = { { + .compatible = "google,steelix", + .sof_tplg_filename = "sof-mt8186-google-steelix.tplg" + }, { .compatible = "mediatek,mt8186", .sof_tplg_filename = "sof-mt8186.tplg", }, -- cgit From 347ecf29a68cc8958fbcbd26ef410d07fe9d82f4 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 23 Nov 2023 09:14:53 +0800 Subject: ASoC: fsl_xcvr: refine the requested phy clock frequency As the input phy clock frequency will divided by 2 by default on i.MX8MP with the implementation of clk-imx8mp-audiomix driver, So the requested frequency need to be updated. The relation of phy clock is: sai_pll_ref_sel sai_pll sai_pll_bypass sai_pll_out sai_pll_out_div2 earc_phy_cg Signed-off-by: Shengjiu Wang Reviewed-by: Iuliana Prodan Link: https://lore.kernel.org/r/1700702093-8008-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_xcvr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c index 77f8e2394bf9..f0fb33d719c2 100644 --- a/sound/soc/fsl/fsl_xcvr.c +++ b/sound/soc/fsl/fsl_xcvr.c @@ -358,7 +358,7 @@ static int fsl_xcvr_en_aud_pll(struct fsl_xcvr *xcvr, u32 freq) struct device *dev = &xcvr->pdev->dev; int ret; - freq = xcvr->soc_data->spdif_only ? freq / 10 : freq; + freq = xcvr->soc_data->spdif_only ? freq / 5 : freq; clk_disable_unprepare(xcvr->phy_clk); ret = clk_set_rate(xcvr->phy_clk, freq); if (ret < 0) { @@ -409,7 +409,7 @@ static int fsl_xcvr_prepare(struct snd_pcm_substream *substream, bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; u32 m_ctl = 0, v_ctl = 0; u32 r = substream->runtime->rate, ch = substream->runtime->channels; - u32 fout = 32 * r * ch * 10 * 2; + u32 fout = 32 * r * ch * 10; int ret = 0; switch (xcvr->mode) { -- cgit From 9aa6a662c309e6f8770972840948af963bd6ff34 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 12:49:40 +0200 Subject: drm/mediatek: mtk_disp_gamma: Fix breakage due to merge issue While the commit that was sent to the mailing lists was fine, something happened during merge and the mtk_gamma_set() function got broken as a writel() was turned into a readl(). Fix that by changing that back to the expected writel(). Fixes: a6b39cd248f3 ("drm/mediatek: De-commonize disp_aal/disp_gamma gamma_set functions") Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20231025104940.140605-1-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_disp_gamma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_gamma.c b/drivers/gpu/drm/mediatek/mtk_disp_gamma.c index f81dc34c9c3e..c1bc8b00d938 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_gamma.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_gamma.c @@ -203,7 +203,7 @@ void mtk_gamma_set(struct device *dev, struct drm_crtc_state *state) /* Disable RELAY mode to pass the processed image */ cfg_val &= ~GAMMA_RELAY_MODE; - cfg_val = readl(gamma->regs + DISP_GAMMA_CFG); + writel(cfg_val, gamma->regs + DISP_GAMMA_CFG); } void mtk_gamma_config(struct device *dev, unsigned int w, -- cgit From 4662817aed5a9d6c695658d0105d8ff4b84ac6cb Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 5 Sep 2023 10:49:21 +0200 Subject: drm/mediatek: fix kernel oops if no crtc is found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_crtc_from_index(0) might return NULL if there are no CRTCs registered at all which will lead to a kernel oops in mtk_drm_crtc_dma_dev_get(). Add the missing return value check. Fixes: 0d9eee9118b7 ("drm/mediatek: Add drm ovl_adaptor sub driver for MT8195") Signed-off-by: Michael Walle Reviewed-by: Nícolas F. R. A. Prado Tested-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Tested-by: Eugen Hristev Reviewed-by: Eugen Hristev Link: https://patchwork.kernel.org/project/dri-devel/patch/20230905084922.3908121-1-mwalle@kernel.org/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 2dfaa613276a..2b0c35cacbc6 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -443,6 +443,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) struct mtk_drm_private *private = drm->dev_private; struct mtk_drm_private *priv_n; struct device *dma_dev = NULL; + struct drm_crtc *crtc; int ret, i, j; if (drm_firmware_drivers_only()) @@ -519,7 +520,9 @@ static int mtk_drm_kms_init(struct drm_device *drm) } /* Use OVL device for all DMA memory allocations */ - dma_dev = mtk_drm_crtc_dma_dev_get(drm_crtc_from_index(drm, 0)); + crtc = drm_crtc_from_index(drm, 0); + if (crtc) + dma_dev = mtk_drm_crtc_dma_dev_get(crtc); if (!dma_dev) { ret = -ENODEV; dev_err(drm->dev, "Need at least one OVL device\n"); -- cgit From c6bb057418876cdfdd29a6f7b8cef54539ee8811 Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Wed, 1 Nov 2023 18:14:31 +0100 Subject: serial: 8250: 8250_omap: Do not start RX DMA on THRI interrupt Starting RX DMA on THRI interrupt is too early because TX may not have finished yet. This change is inspired by commit 90b8596ac460 ("serial: 8250: Prevent starting up DMA Rx on THRI interrupt") and fixes DMA issues I had with an AM62 SoC that is using the 8250 OMAP variant. Cc: stable@vger.kernel.org Fixes: c26389f998a8 ("serial: 8250: 8250_omap: Add DMA support for UARTs on K3 SoCs") Signed-off-by: Ronald Wahl Reviewed-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/20231101171431.16495-1-rwahl@gmx.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 2d42f485c987..7d99bc3be0b8 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -1298,10 +1298,12 @@ static int omap_8250_dma_handle_irq(struct uart_port *port) status = serial_port_in(port, UART_LSR); - if (priv->habit & UART_HAS_EFR2) - am654_8250_handle_rx_dma(up, iir, status); - else - status = omap_8250_handle_rx_dma(up, iir, status); + if ((iir & 0x3f) != UART_IIR_THRI) { + if (priv->habit & UART_HAS_EFR2) + am654_8250_handle_rx_dma(up, iir, status); + else + status = omap_8250_handle_rx_dma(up, iir, status); + } serial8250_modem_status(up); if (status & UART_LSR_THRE && up->dma->tx_err) { -- cgit From 8e42c301ce64e0dcca547626eb486877d502d336 Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Tue, 31 Oct 2023 14:12:42 +0100 Subject: serial: 8250_omap: Add earlycon support for the AM654 UART controller Currently there is no support for earlycon on the AM654 UART controller. This commit adds it. Signed-off-by: Ronald Wahl Reviewed-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/20231031131242.15516-1-rwahl@gmx.de Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_early.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c index 9837a27739fd..e3f482fd3de4 100644 --- a/drivers/tty/serial/8250/8250_early.c +++ b/drivers/tty/serial/8250/8250_early.c @@ -189,5 +189,6 @@ static int __init early_omap8250_setup(struct earlycon_device *device, OF_EARLYCON_DECLARE(omap8250, "ti,omap2-uart", early_omap8250_setup); OF_EARLYCON_DECLARE(omap8250, "ti,omap3-uart", early_omap8250_setup); OF_EARLYCON_DECLARE(omap8250, "ti,omap4-uart", early_omap8250_setup); +OF_EARLYCON_DECLARE(omap8250, "ti,am654-uart", early_omap8250_setup); #endif -- cgit From 8973ab7a2441b286218f4a5c4c33680e2f139996 Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Tue, 31 Oct 2023 12:09:09 +0100 Subject: serial: 8250: 8250_omap: Clear UART_HAS_RHR_IT_DIS bit This fixes commit 439c7183e5b9 ("serial: 8250: 8250_omap: Disable RX interrupt after DMA enable") which unfortunately set the UART_HAS_RHR_IT_DIS bit in the UART_OMAP_IER2 register and never cleared it. Cc: stable@vger.kernel.org Fixes: 439c7183e5b9 ("serial: 8250: 8250_omap: Disable RX interrupt after DMA enable") Signed-off-by: Ronald Wahl Reviewed-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/20231031110909.11695-1-rwahl@gmx.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 7d99bc3be0b8..578f35895b27 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -933,7 +933,7 @@ static void __dma_rx_do_complete(struct uart_8250_port *p) if (priv->habit & UART_HAS_RHR_IT_DIS) { reg = serial_in(p, UART_OMAP_IER2); reg &= ~UART_OMAP_IER2_RHR_IT_DIS; - serial_out(p, UART_OMAP_IER2, UART_OMAP_IER2_RHR_IT_DIS); + serial_out(p, UART_OMAP_IER2, reg); } dmaengine_tx_status(rxchan, cookie, &state); @@ -1079,7 +1079,7 @@ static int omap_8250_rx_dma(struct uart_8250_port *p) if (priv->habit & UART_HAS_RHR_IT_DIS) { reg = serial_in(p, UART_OMAP_IER2); reg |= UART_OMAP_IER2_RHR_IT_DIS; - serial_out(p, UART_OMAP_IER2, UART_OMAP_IER2_RHR_IT_DIS); + serial_out(p, UART_OMAP_IER2, reg); } dma_async_issue_pending(dma->rxchan); -- cgit From 08ce9a1b72e38cf44c300a44ac5858533eb3c860 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 23 Nov 2023 08:28:18 +0100 Subject: serial: sc16is7xx: address RX timeout interrupt errata This device has a silicon bug that makes it report a timeout interrupt but no data in the FIFO. The datasheet states the following in the errata section 18.1.4: "If the host reads the receive FIFO at the same time as a time-out interrupt condition happens, the host might read 0xCC (time-out) in the Interrupt Indication Register (IIR), but bit 0 of the Line Status Register (LSR) is not set (means there is no data in the receive FIFO)." The errata description seems to indicate it concerns only polled mode of operation when reading bit 0 of the LSR register. However, tests have shown and NXP has confirmed that the RXLVL register also yields 0 when the bug is triggered, and hence the IRQ driven implementation in this driver is equally affected. This bug has hit us on production units and when it does, sc16is7xx_irq() would spin forever because sc16is7xx_port_irq() keeps seeing an interrupt in the IIR register that is not cleared because the driver does not call into sc16is7xx_handle_rx() unless the RXLVL register reports at least one byte in the FIFO. Fix this by always reading one byte from the FIFO when this condition is detected in order to clear the interrupt. This approach was confirmed to be correct by NXP through their support channels. Tested by: Hugo Villeneuve Signed-off-by: Daniel Mack Co-Developed-by: Maxim Popov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231123072818.1394539-1-daniel@zonque.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index db2bb1c0d36c..cf0c6120d30e 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -766,6 +766,18 @@ static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) case SC16IS7XX_IIR_RTOI_SRC: case SC16IS7XX_IIR_XOFFI_SRC: rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG); + + /* + * There is a silicon bug that makes the chip report a + * time-out interrupt but no data in the FIFO. This is + * described in errata section 18.1.4. + * + * When this happens, read one byte from the FIFO to + * clear the interrupt. + */ + if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen) + rxlen = 1; + if (rxlen) sc16is7xx_handle_rx(port, rxlen, iir); break; -- cgit From 58ac1b3799799069d53f5bf95c093f2fe8dd3cc5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Nov 2023 18:15:03 +0100 Subject: ARM: PL011: Fix DMA support Since there is no guarantee that the memory returned by dma_alloc_coherent() is associated with a 'struct page', using the architecture specific phys_to_page() is wrong, but using virt_to_page() would be as well. Stop using sg lists altogether and just use the *_single() functions instead. This also simplifies the code a bit since the scatterlists in this driver always have only one entry anyway. https://lore.kernel.org/lkml/86db0fe5-930d-4cbb-bd7d-03367da38951@app.fastmail.com/ Use consistent names for dma buffers gc: Add a commit log from the initial thread: https://lore.kernel.org/lkml/86db0fe5-930d-4cbb-bd7d-03367da38951@app.fastmail.com/ Use consistent names for dma buffers Fixes: cb06ff102e2d7 ("ARM: PL011: Add support for Rx DMA buffer polling.") Signed-off-by: Arnd Bergmann Tested-by: Gregory CLEMENT Signed-off-by: Gregory CLEMENT Cc: stable Link: https://lore.kernel.org/r/20231122171503.235649-1-gregory.clement@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 112 +++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 58 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 61cc24cd90e4..b7635363373e 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -218,17 +218,18 @@ static struct vendor_data vendor_st = { /* Deals with DMA transactions */ -struct pl011_sgbuf { - struct scatterlist sg; - char *buf; +struct pl011_dmabuf { + dma_addr_t dma; + size_t len; + char *buf; }; struct pl011_dmarx_data { struct dma_chan *chan; struct completion complete; bool use_buf_b; - struct pl011_sgbuf sgbuf_a; - struct pl011_sgbuf sgbuf_b; + struct pl011_dmabuf dbuf_a; + struct pl011_dmabuf dbuf_b; dma_cookie_t cookie; bool running; struct timer_list timer; @@ -241,7 +242,8 @@ struct pl011_dmarx_data { struct pl011_dmatx_data { struct dma_chan *chan; - struct scatterlist sg; + dma_addr_t dma; + size_t len; char *buf; bool queued; }; @@ -366,32 +368,24 @@ static int pl011_fifo_to_tty(struct uart_amba_port *uap) #define PL011_DMA_BUFFER_SIZE PAGE_SIZE -static int pl011_sgbuf_init(struct dma_chan *chan, struct pl011_sgbuf *sg, +static int pl011_dmabuf_init(struct dma_chan *chan, struct pl011_dmabuf *db, enum dma_data_direction dir) { - dma_addr_t dma_addr; - - sg->buf = dma_alloc_coherent(chan->device->dev, - PL011_DMA_BUFFER_SIZE, &dma_addr, GFP_KERNEL); - if (!sg->buf) + db->buf = dma_alloc_coherent(chan->device->dev, PL011_DMA_BUFFER_SIZE, + &db->dma, GFP_KERNEL); + if (!db->buf) return -ENOMEM; - - sg_init_table(&sg->sg, 1); - sg_set_page(&sg->sg, phys_to_page(dma_addr), - PL011_DMA_BUFFER_SIZE, offset_in_page(dma_addr)); - sg_dma_address(&sg->sg) = dma_addr; - sg_dma_len(&sg->sg) = PL011_DMA_BUFFER_SIZE; + db->len = PL011_DMA_BUFFER_SIZE; return 0; } -static void pl011_sgbuf_free(struct dma_chan *chan, struct pl011_sgbuf *sg, +static void pl011_dmabuf_free(struct dma_chan *chan, struct pl011_dmabuf *db, enum dma_data_direction dir) { - if (sg->buf) { + if (db->buf) { dma_free_coherent(chan->device->dev, - PL011_DMA_BUFFER_SIZE, sg->buf, - sg_dma_address(&sg->sg)); + PL011_DMA_BUFFER_SIZE, db->buf, db->dma); } } @@ -552,8 +546,8 @@ static void pl011_dma_tx_callback(void *data) uart_port_lock_irqsave(&uap->port, &flags); if (uap->dmatx.queued) - dma_unmap_sg(dmatx->chan->device->dev, &dmatx->sg, 1, - DMA_TO_DEVICE); + dma_unmap_single(dmatx->chan->device->dev, dmatx->dma, + dmatx->len, DMA_TO_DEVICE); dmacr = uap->dmacr; uap->dmacr = dmacr & ~UART011_TXDMAE; @@ -639,18 +633,19 @@ static int pl011_dma_tx_refill(struct uart_amba_port *uap) memcpy(&dmatx->buf[first], &xmit->buf[0], second); } - dmatx->sg.length = count; - - if (dma_map_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE) != 1) { + dmatx->len = count; + dmatx->dma = dma_map_single(dma_dev->dev, dmatx->buf, count, + DMA_TO_DEVICE); + if (dmatx->dma == DMA_MAPPING_ERROR) { uap->dmatx.queued = false; dev_dbg(uap->port.dev, "unable to map TX DMA\n"); return -EBUSY; } - desc = dmaengine_prep_slave_sg(chan, &dmatx->sg, 1, DMA_MEM_TO_DEV, + desc = dmaengine_prep_slave_single(chan, dmatx->dma, dmatx->len, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!desc) { - dma_unmap_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE); + dma_unmap_single(dma_dev->dev, dmatx->dma, dmatx->len, DMA_TO_DEVICE); uap->dmatx.queued = false; /* * If DMA cannot be used right now, we complete this @@ -813,8 +808,8 @@ __acquires(&uap->port.lock) dmaengine_terminate_async(uap->dmatx.chan); if (uap->dmatx.queued) { - dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1, - DMA_TO_DEVICE); + dma_unmap_single(uap->dmatx.chan->device->dev, uap->dmatx.dma, + uap->dmatx.len, DMA_TO_DEVICE); uap->dmatx.queued = false; uap->dmacr &= ~UART011_TXDMAE; pl011_write(uap->dmacr, uap, REG_DMACR); @@ -828,15 +823,15 @@ static int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap) struct dma_chan *rxchan = uap->dmarx.chan; struct pl011_dmarx_data *dmarx = &uap->dmarx; struct dma_async_tx_descriptor *desc; - struct pl011_sgbuf *sgbuf; + struct pl011_dmabuf *dbuf; if (!rxchan) return -EIO; /* Start the RX DMA job */ - sgbuf = uap->dmarx.use_buf_b ? - &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; - desc = dmaengine_prep_slave_sg(rxchan, &sgbuf->sg, 1, + dbuf = uap->dmarx.use_buf_b ? + &uap->dmarx.dbuf_b : &uap->dmarx.dbuf_a; + desc = dmaengine_prep_slave_single(rxchan, dbuf->dma, dbuf->len, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); /* @@ -876,8 +871,8 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap, bool readfifo) { struct tty_port *port = &uap->port.state->port; - struct pl011_sgbuf *sgbuf = use_buf_b ? - &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; + struct pl011_dmabuf *dbuf = use_buf_b ? + &uap->dmarx.dbuf_b : &uap->dmarx.dbuf_a; int dma_count = 0; u32 fifotaken = 0; /* only used for vdbg() */ @@ -886,7 +881,7 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap, if (uap->dmarx.poll_rate) { /* The data can be taken by polling */ - dmataken = sgbuf->sg.length - dmarx->last_residue; + dmataken = dbuf->len - dmarx->last_residue; /* Recalculate the pending size */ if (pending >= dmataken) pending -= dmataken; @@ -900,7 +895,7 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap, * Note that tty_insert_flip_buf() tries to take as many chars * as it can. */ - dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken, + dma_count = tty_insert_flip_string(port, dbuf->buf + dmataken, pending); uap->port.icount.rx += dma_count; @@ -911,7 +906,7 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap, /* Reset the last_residue for Rx DMA poll */ if (uap->dmarx.poll_rate) - dmarx->last_residue = sgbuf->sg.length; + dmarx->last_residue = dbuf->len; /* * Only continue with trying to read the FIFO if all DMA chars have @@ -946,8 +941,8 @@ static void pl011_dma_rx_irq(struct uart_amba_port *uap) { struct pl011_dmarx_data *dmarx = &uap->dmarx; struct dma_chan *rxchan = dmarx->chan; - struct pl011_sgbuf *sgbuf = dmarx->use_buf_b ? - &dmarx->sgbuf_b : &dmarx->sgbuf_a; + struct pl011_dmabuf *dbuf = dmarx->use_buf_b ? + &dmarx->dbuf_b : &dmarx->dbuf_a; size_t pending; struct dma_tx_state state; enum dma_status dmastat; @@ -969,7 +964,7 @@ static void pl011_dma_rx_irq(struct uart_amba_port *uap) pl011_write(uap->dmacr, uap, REG_DMACR); uap->dmarx.running = false; - pending = sgbuf->sg.length - state.residue; + pending = dbuf->len - state.residue; BUG_ON(pending > PL011_DMA_BUFFER_SIZE); /* Then we terminate the transfer - we now know our residue */ dmaengine_terminate_all(rxchan); @@ -996,8 +991,8 @@ static void pl011_dma_rx_callback(void *data) struct pl011_dmarx_data *dmarx = &uap->dmarx; struct dma_chan *rxchan = dmarx->chan; bool lastbuf = dmarx->use_buf_b; - struct pl011_sgbuf *sgbuf = dmarx->use_buf_b ? - &dmarx->sgbuf_b : &dmarx->sgbuf_a; + struct pl011_dmabuf *dbuf = dmarx->use_buf_b ? + &dmarx->dbuf_b : &dmarx->dbuf_a; size_t pending; struct dma_tx_state state; int ret; @@ -1015,7 +1010,7 @@ static void pl011_dma_rx_callback(void *data) * the DMA irq handler. So we check the residue here. */ rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state); - pending = sgbuf->sg.length - state.residue; + pending = dbuf->len - state.residue; BUG_ON(pending > PL011_DMA_BUFFER_SIZE); /* Then we terminate the transfer - we now know our residue */ dmaengine_terminate_all(rxchan); @@ -1067,16 +1062,16 @@ static void pl011_dma_rx_poll(struct timer_list *t) unsigned long flags; unsigned int dmataken = 0; unsigned int size = 0; - struct pl011_sgbuf *sgbuf; + struct pl011_dmabuf *dbuf; int dma_count; struct dma_tx_state state; - sgbuf = dmarx->use_buf_b ? &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; + dbuf = dmarx->use_buf_b ? &uap->dmarx.dbuf_b : &uap->dmarx.dbuf_a; rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state); if (likely(state.residue < dmarx->last_residue)) { - dmataken = sgbuf->sg.length - dmarx->last_residue; + dmataken = dbuf->len - dmarx->last_residue; size = dmarx->last_residue - state.residue; - dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken, + dma_count = tty_insert_flip_string(port, dbuf->buf + dmataken, size); if (dma_count == size) dmarx->last_residue = state.residue; @@ -1123,7 +1118,7 @@ static void pl011_dma_startup(struct uart_amba_port *uap) return; } - sg_init_one(&uap->dmatx.sg, uap->dmatx.buf, PL011_DMA_BUFFER_SIZE); + uap->dmatx.len = PL011_DMA_BUFFER_SIZE; /* The DMA buffer is now the FIFO the TTY subsystem can use */ uap->port.fifosize = PL011_DMA_BUFFER_SIZE; @@ -1133,7 +1128,7 @@ static void pl011_dma_startup(struct uart_amba_port *uap) goto skip_rx; /* Allocate and map DMA RX buffers */ - ret = pl011_sgbuf_init(uap->dmarx.chan, &uap->dmarx.sgbuf_a, + ret = pl011_dmabuf_init(uap->dmarx.chan, &uap->dmarx.dbuf_a, DMA_FROM_DEVICE); if (ret) { dev_err(uap->port.dev, "failed to init DMA %s: %d\n", @@ -1141,12 +1136,12 @@ static void pl011_dma_startup(struct uart_amba_port *uap) goto skip_rx; } - ret = pl011_sgbuf_init(uap->dmarx.chan, &uap->dmarx.sgbuf_b, + ret = pl011_dmabuf_init(uap->dmarx.chan, &uap->dmarx.dbuf_b, DMA_FROM_DEVICE); if (ret) { dev_err(uap->port.dev, "failed to init DMA %s: %d\n", "RX buffer B", ret); - pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a, + pl011_dmabuf_free(uap->dmarx.chan, &uap->dmarx.dbuf_a, DMA_FROM_DEVICE); goto skip_rx; } @@ -1200,8 +1195,9 @@ static void pl011_dma_shutdown(struct uart_amba_port *uap) /* In theory, this should already be done by pl011_dma_flush_buffer */ dmaengine_terminate_all(uap->dmatx.chan); if (uap->dmatx.queued) { - dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1, - DMA_TO_DEVICE); + dma_unmap_single(uap->dmatx.chan->device->dev, + uap->dmatx.dma, uap->dmatx.len, + DMA_TO_DEVICE); uap->dmatx.queued = false; } @@ -1212,8 +1208,8 @@ static void pl011_dma_shutdown(struct uart_amba_port *uap) if (uap->using_rx_dma) { dmaengine_terminate_all(uap->dmarx.chan); /* Clean up the RX DMA */ - pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a, DMA_FROM_DEVICE); - pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_b, DMA_FROM_DEVICE); + pl011_dmabuf_free(uap->dmarx.chan, &uap->dmarx.dbuf_a, DMA_FROM_DEVICE); + pl011_dmabuf_free(uap->dmarx.chan, &uap->dmarx.dbuf_b, DMA_FROM_DEVICE); if (uap->dmarx.poll_rate) del_timer_sync(&uap->dmarx.timer); uap->using_rx_dma = false; -- cgit From be37542afbfcd27b3bb99a135abf9b4736b96f75 Mon Sep 17 00:00:00 2001 From: Jai Luthra Date: Thu, 23 Nov 2023 14:57:31 +0530 Subject: dmaengine: ti: k3-psil-am62a: Fix SPI PDMA data AM62Ax has 3 SPI channels where each channel has 4x TX and 4x RX threads. Also fix the thread numbers to match what the firmware expects according to the PSI-L device description. Link: http://downloads.ti.com/tisci/esd/latest/5_soc_doc/am62ax/psil_cfg.html [1] Fixes: aac6db7e243a ("dmaengine: ti: k3-psil-am62a: Add AM62Ax PSIL and PDMA data") Signed-off-by: Jai Luthra Link: https://lore.kernel.org/r/20231123-psil_fix-v1-1-6604d80819be@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-psil-am62a.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/dma/ti/k3-psil-am62a.c b/drivers/dma/ti/k3-psil-am62a.c index ca9d71f91422..4cf9123b0e93 100644 --- a/drivers/dma/ti/k3-psil-am62a.c +++ b/drivers/dma/ti/k3-psil-am62a.c @@ -84,7 +84,9 @@ static struct psil_ep am62a_src_ep_map[] = { PSIL_SAUL(0x7505, 21, 35, 8, 36, 0), PSIL_SAUL(0x7506, 22, 43, 8, 43, 0), PSIL_SAUL(0x7507, 23, 43, 8, 44, 0), - /* PDMA_MAIN0 - SPI0-3 */ + /* PDMA_MAIN0 - SPI0-2 */ + PSIL_PDMA_XY_PKT(0x4300), + PSIL_PDMA_XY_PKT(0x4301), PSIL_PDMA_XY_PKT(0x4302), PSIL_PDMA_XY_PKT(0x4303), PSIL_PDMA_XY_PKT(0x4304), @@ -95,8 +97,6 @@ static struct psil_ep am62a_src_ep_map[] = { PSIL_PDMA_XY_PKT(0x4309), PSIL_PDMA_XY_PKT(0x430a), PSIL_PDMA_XY_PKT(0x430b), - PSIL_PDMA_XY_PKT(0x430c), - PSIL_PDMA_XY_PKT(0x430d), /* PDMA_MAIN1 - UART0-6 */ PSIL_PDMA_XY_PKT(0x4400), PSIL_PDMA_XY_PKT(0x4401), @@ -151,7 +151,9 @@ static struct psil_ep am62a_dst_ep_map[] = { /* SAUL */ PSIL_SAUL(0xf500, 27, 83, 8, 83, 1), PSIL_SAUL(0xf501, 28, 91, 8, 91, 1), - /* PDMA_MAIN0 - SPI0-3 */ + /* PDMA_MAIN0 - SPI0-2 */ + PSIL_PDMA_XY_PKT(0xc300), + PSIL_PDMA_XY_PKT(0xc301), PSIL_PDMA_XY_PKT(0xc302), PSIL_PDMA_XY_PKT(0xc303), PSIL_PDMA_XY_PKT(0xc304), @@ -162,8 +164,6 @@ static struct psil_ep am62a_dst_ep_map[] = { PSIL_PDMA_XY_PKT(0xc309), PSIL_PDMA_XY_PKT(0xc30a), PSIL_PDMA_XY_PKT(0xc30b), - PSIL_PDMA_XY_PKT(0xc30c), - PSIL_PDMA_XY_PKT(0xc30d), /* PDMA_MAIN1 - UART0-6 */ PSIL_PDMA_XY_PKT(0xc400), PSIL_PDMA_XY_PKT(0xc401), -- cgit From 2838a897654c4810153cc51646414ffa54fd23b0 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Tue, 14 Nov 2023 06:57:12 +0800 Subject: dmaengine: fsl-edma: Do not suspend and resume the masked dma channel when the system is sleeping Some channels may be masked. When the system is suspended, if these masked channels are not filtered out, this will lead to null pointer operations and system crash: Unable to handle kernel NULL pointer dereference at virtual address Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=0000000894300000 [00000000000002a0] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP Modules linked in: CPU: 1 PID: 989 Comm: sh Tainted: G B 6.6.0-16203-g557fb7a3ec4c-dirty #70 Hardware name: Freescale i.MX8QM MEK (DT) pstate: 400000c5 (nZcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc: fsl_edma_disable_request+0x3c/0x78 lr: fsl_edma_disable_request+0x3c/0x78 sp:ffff800089ae7690 x29: ffff800089ae7690 x28: ffff000807ab5440 x27: ffff000807ab5830 x26: 0000000000000008 x25: 0000000000000278 x24: 0000000000000001 23: ffff000807ab4328 x22: 0000000000000000 x21: 0000000000000009 x20: ffff800082616940 x19: 0000000000000000 x18: 0000000000000000 x17: 3d3d3d3d3d3d3d3d x16: 3d3d3d3d3d3d3d3d x15: 3d3d3d3d3d3d3d3d x14: 3d3d3d3d3d3d3d3d x13: 3d3d3d3d3d3d3d3d x12: 1ffff00010d45724 x11: ffff700010d45724 x10: dfff800000000000 x9: dfff800000000000 x8: 00008fffef2ba8dc x7: 0000000000000001 x6: ffff800086a2b927 x5: ffff800086a2b920 x4: ffff700010d45725 x3: ffff8000800d5bbc x2 : 0000000000000000 x1 : ffff000800c1d880 x0 : 0000000000000001 Call trace: fsl_edma_disable_request+0x3c/0x78 fsl_edma_suspend_late+0x128/0x12c dpm_run_callback+0xd4/0x304 __device_suspend_late+0xd0/0x240 dpm_suspend_late+0x174/0x59c suspend_devices_and_enter+0x194/0xd00 pm_suspend+0x3c4/0x910 Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Xiaolei Wang Link: https://lore.kernel.org/r/20231113225713.1892643-2-xiaolei.wang@windriver.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 4635e16d7705..52577fffc62b 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -631,6 +631,8 @@ static int fsl_edma_suspend_late(struct device *dev) for (i = 0; i < fsl_edma->n_chans; i++) { fsl_chan = &fsl_edma->chans[i]; + if (fsl_edma->chan_masked & BIT(i)) + continue; spin_lock_irqsave(&fsl_chan->vchan.lock, flags); /* Make sure chan is idle or will force disable. */ if (unlikely(!fsl_chan->idle)) { @@ -655,6 +657,8 @@ static int fsl_edma_resume_early(struct device *dev) for (i = 0; i < fsl_edma->n_chans; i++) { fsl_chan = &fsl_edma->chans[i]; + if (fsl_edma->chan_masked & BIT(i)) + continue; fsl_chan->pm_state = RUNNING; edma_write_tcdreg(fsl_chan, 0, csr); if (fsl_chan->slave_id != 0) -- cgit From 3448397a47c08c291c3fccb7ac5f0f429fd547e0 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Tue, 14 Nov 2023 06:57:13 +0800 Subject: dmaengine: fsl-edma: Add judgment on enabling round robin arbitration Add judgment on enabling round robin arbitration to avoid exceptions if this function is not supported. Call trace: fsl_edma_resume_early+0x1d4/0x208 dpm_run_callback+0xd4/0x304 device_resume_early+0xb0/0x208 dpm_resume_early+0x224/0x528 suspend_devices_and_enter+0x3e4/0xd00 pm_suspend+0x3c4/0x910 state_store+0x90/0x124 kobj_attr_store+0x48/0x64 sysfs_kf_write+0x84/0xb4 kernfs_fop_write_iter+0x19c/0x264 vfs_write+0x664/0x858 ksys_write+0xc8/0x180 __arm64_sys_write+0x44/0x58 invoke_syscall+0x5c/0x178 el0_svc_common.constprop.0+0x11c/0x14c do_el0_svc+0x30/0x40 el0_svc+0x58/0xa8 el0t_64_sync_handler+0xc0/0xc4 el0t_64_sync+0x190/0x194 Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Xiaolei Wang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20231113225713.1892643-3-xiaolei.wang@windriver.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 52577fffc62b..aea7a703dda7 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -665,7 +665,8 @@ static int fsl_edma_resume_early(struct device *dev) fsl_edma_chan_mux(fsl_chan, fsl_chan->slave_id, true); } - edma_writel(fsl_edma, EDMA_CR_ERGA | EDMA_CR_ERCA, regs->cr); + if (!(fsl_edma->drvdata->flags & FSL_EDMA_DRV_SPLIT_REG)) + edma_writel(fsl_edma, EDMA_CR_ERGA | EDMA_CR_ERCA, regs->cr); return 0; } -- cgit From 3d1dc8b1030df8ca0fdfd4905c88ee10db943bf8 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 24 Nov 2023 14:40:15 +0200 Subject: ASoC: Intel: skl_hda_dsp_generic: Drop HDMI routes when HDMI is not available When the HDMI is not present due to disabled display support we will use dummy codec and the HDMI routes will refer to non existent DAPM widgets. Trim the route list from the HDMI routes to be able to probe the card even if the HDMI dais are not registered. Signed-off-by: Peter Ujfalusi Reviewed-by: Bard Liao Reviewed-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20231124124015.15878-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/skl_hda_dsp_generic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/intel/boards/skl_hda_dsp_generic.c b/sound/soc/intel/boards/skl_hda_dsp_generic.c index 6c6ef63cd5d9..6e172719c979 100644 --- a/sound/soc/intel/boards/skl_hda_dsp_generic.c +++ b/sound/soc/intel/boards/skl_hda_dsp_generic.c @@ -154,6 +154,8 @@ static int skl_hda_fill_card_info(struct snd_soc_acpi_mach_params *mach_params) card->dapm_widgets = skl_hda_widgets; card->num_dapm_widgets = ARRAY_SIZE(skl_hda_widgets); if (!ctx->idisp_codec) { + card->dapm_routes = &skl_hda_map[IDISP_ROUTE_COUNT]; + num_route -= IDISP_ROUTE_COUNT; for (i = 0; i < IDISP_DAI_COUNT; i++) { skl_hda_be_dai_links[i].codecs = &snd_soc_dummy_dlc; skl_hda_be_dai_links[i].num_codecs = 1; -- cgit From fba293488ccb1902e715da328e71aa868dd561f6 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 24 Nov 2023 14:40:32 +0200 Subject: ASoC: Intel: sof_sdw: Always register the HDMI dai links The topology files for SDW devices require HDMI dai links to be present and this is granted under normal conditions but in case of special use cases the display (i915) driver might not be enabled due to deny-listing, booting with nomodeset or just not compiled at all. This should not block the non HDMI audio to be usable so register the dai links unconditionally. The code has been prepared for this and in case of no HDMI audio the link is created with dummy codec. Closes: https://github.com/thesofproject/linux/issues/4594 Closes: https://github.com/thesofproject/linux/issues/4648 Signed-off-by: Peter Ujfalusi Reviewed-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20231124124032.15946-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 3312ad8a563b..4e4284729773 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -1546,7 +1546,7 @@ static int sof_card_dai_links_create(struct snd_soc_card *card) { struct device *dev = card->dev; struct snd_soc_acpi_mach *mach = dev_get_platdata(card->dev); - int sdw_be_num = 0, ssp_num = 0, dmic_num = 0, hdmi_num = 0, bt_num = 0; + int sdw_be_num = 0, ssp_num = 0, dmic_num = 0, bt_num = 0; struct mc_private *ctx = snd_soc_card_get_drvdata(card); struct snd_soc_acpi_mach_params *mach_params = &mach->mach_params; const struct snd_soc_acpi_link_adr *adr_link = mach_params->links; @@ -1564,6 +1564,7 @@ static int sof_card_dai_links_create(struct snd_soc_card *card) char *codec_name, *codec_dai_name; int i, j, be_id = 0; int codec_index; + int hdmi_num; int ret; ret = get_dailink_info(dev, adr_link, &sdw_be_num, &codec_conf_num); @@ -1584,14 +1585,13 @@ static int sof_card_dai_links_create(struct snd_soc_card *card) ssp_num = hweight_long(ssp_mask); } - if (mach_params->codec_mask & IDISP_CODEC_MASK) { + if (mach_params->codec_mask & IDISP_CODEC_MASK) ctx->hdmi.idisp_codec = true; - if (sof_sdw_quirk & SOF_SDW_TGL_HDMI) - hdmi_num = SOF_TGL_HDMI_COUNT; - else - hdmi_num = SOF_PRE_TGL_HDMI_COUNT; - } + if (sof_sdw_quirk & SOF_SDW_TGL_HDMI) + hdmi_num = SOF_TGL_HDMI_COUNT; + else + hdmi_num = SOF_PRE_TGL_HDMI_COUNT; /* enable dmic01 & dmic16k */ if (sof_sdw_quirk & SOF_SDW_PCH_DMIC || mach_params->dmic_num) @@ -1601,7 +1601,8 @@ static int sof_card_dai_links_create(struct snd_soc_card *card) bt_num = 1; dev_dbg(dev, "sdw %d, ssp %d, dmic %d, hdmi %d, bt: %d\n", - sdw_be_num, ssp_num, dmic_num, hdmi_num, bt_num); + sdw_be_num, ssp_num, dmic_num, + ctx->hdmi.idisp_codec ? hdmi_num : 0, bt_num); /* allocate BE dailinks */ num_links = sdw_be_num + ssp_num + dmic_num + hdmi_num + bt_num; -- cgit From 54bed6bafa0f38daf9697af50e3aff5ff1354fe1 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 6 Nov 2023 14:48:32 +0100 Subject: dmaengine: stm32-dma: avoid bitfield overflow assertion stm32_dma_get_burst() returns a negative error for invalid input, which gets turned into a large u32 value in stm32_dma_prep_dma_memcpy() that in turn triggers an assertion because it does not fit into a two-bit field: drivers/dma/stm32-dma.c: In function 'stm32_dma_prep_dma_memcpy': include/linux/compiler_types.h:354:38: error: call to '__compiletime_assert_282' declared with attribute error: FIELD_PREP: value too large for the field _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ include/linux/compiler_types.h:335:4: note: in definition of macro '__compiletime_assert' prefix ## suffix(); \ ^~~~~~ include/linux/compiler_types.h:354:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^~~~~~~~~~~~~~~~~~~ include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert' #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) ^~~~~~~~~~~~~~~~~~ include/linux/bitfield.h:68:3: note: in expansion of macro 'BUILD_BUG_ON_MSG' BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ ^~~~~~~~~~~~~~~~ include/linux/bitfield.h:114:3: note: in expansion of macro '__BF_FIELD_CHECK' __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: "); \ ^~~~~~~~~~~~~~~~ drivers/dma/stm32-dma.c:1237:4: note: in expansion of macro 'FIELD_PREP' FIELD_PREP(STM32_DMA_SCR_PBURST_MASK, dma_burst) | ^~~~~~~~~~ As an easy workaround, assume the error can happen, so try to handle this by failing stm32_dma_prep_dma_memcpy() before the assertion. It replicates what is done in stm32_dma_set_xfer_param() where stm32_dma_get_burst() is also used. Fixes: 1c32d6c37cc2 ("dmaengine: stm32-dma: use bitfield helpers") Fixes: a2b6103b7a8a ("dmaengine: stm32-dma: Improve memory burst management") Signed-off-by: Arnd Bergmann Signed-off-by: Amelie Delaunay Cc: stable@vger.kernel.org Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311060135.Q9eMnpCL-lkp@intel.com/ Link: https://lore.kernel.org/r/20231106134832.1470305-1-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 72d83cd9ed6b..90857d08a1a7 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -1246,8 +1246,8 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy( enum dma_slave_buswidth max_width; struct stm32_dma_desc *desc; size_t xfer_count, offset; - u32 num_sgs, best_burst, dma_burst, threshold; - int i; + u32 num_sgs, best_burst, threshold; + int dma_burst, i; num_sgs = DIV_ROUND_UP(len, STM32_DMA_ALIGNED_MAX_DATA_ITEMS); desc = kzalloc(struct_size(desc, sg_req, num_sgs), GFP_NOWAIT); @@ -1266,6 +1266,10 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy( best_burst = stm32_dma_get_best_burst(len, STM32_DMA_MAX_BURST, threshold, max_width); dma_burst = stm32_dma_get_burst(chan, best_burst); + if (dma_burst < 0) { + kfree(desc); + return NULL; + } stm32_dma_clear_reg(&desc->sg_req[i].chan_reg); desc->sg_req[i].chan_reg.dma_scr = -- cgit From 0376b995bb7a65fb0c056f3adc5e9695ad0c1805 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Fri, 24 Nov 2023 15:57:42 +0200 Subject: ASoC: SOF: ipc4-topology: Add core_mask in struct snd_sof_pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With IPC4, a pipeline may contain multiple modules in the data processing domain and they can be scheduled to run on different cores. Add a new field in struct snd_sof_pipeline to keep track of all the cores that are associated with the modules in the pipeline. Set the pipeline core mask for IPC3 when initializing the pipeline widget IPC structure. For IPC4, set the core mark when initializing the pipeline widget and initializing processing modules in the data processing domain. Signed-off-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20231124135743.24674-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc3-topology.c | 2 ++ sound/soc/sof/ipc4-topology.c | 9 +++++++++ sound/soc/sof/sof-audio.h | 2 ++ 3 files changed, 13 insertions(+) diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c index ba4ef290b634..2c7a5e7a364c 100644 --- a/sound/soc/sof/ipc3-topology.c +++ b/sound/soc/sof/ipc3-topology.c @@ -493,6 +493,7 @@ static int sof_ipc3_widget_setup_comp_mixer(struct snd_sof_widget *swidget) static int sof_ipc3_widget_setup_comp_pipeline(struct snd_sof_widget *swidget) { struct snd_soc_component *scomp = swidget->scomp; + struct snd_sof_pipeline *spipe = swidget->spipe; struct sof_ipc_pipe_new *pipeline; struct snd_sof_widget *comp_swidget; int ret; @@ -545,6 +546,7 @@ static int sof_ipc3_widget_setup_comp_pipeline(struct snd_sof_widget *swidget) swidget->dynamic_pipeline_widget); swidget->core = pipeline->core; + spipe->core_mask |= BIT(pipeline->core); return 0; diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index b24a64377f68..19f36db30979 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -656,6 +656,7 @@ static int sof_ipc4_widget_setup_comp_pipeline(struct snd_sof_widget *swidget) { struct snd_soc_component *scomp = swidget->scomp; struct sof_ipc4_pipeline *pipeline; + struct snd_sof_pipeline *spipe = swidget->spipe; int ret; pipeline = kzalloc(sizeof(*pipeline), GFP_KERNEL); @@ -670,6 +671,7 @@ static int sof_ipc4_widget_setup_comp_pipeline(struct snd_sof_widget *swidget) } swidget->core = pipeline->core_id; + spipe->core_mask |= BIT(pipeline->core_id); if (pipeline->use_chain_dma) { dev_dbg(scomp->dev, "Set up chain DMA for %s\n", swidget->widget->name); @@ -797,6 +799,7 @@ err: static int sof_ipc4_widget_setup_comp_src(struct snd_sof_widget *swidget) { struct snd_soc_component *scomp = swidget->scomp; + struct snd_sof_pipeline *spipe = swidget->spipe; struct sof_ipc4_src *src; int ret; @@ -819,6 +822,8 @@ static int sof_ipc4_widget_setup_comp_src(struct snd_sof_widget *swidget) goto err; } + spipe->core_mask |= BIT(swidget->core); + dev_dbg(scomp->dev, "SRC sink rate %d\n", src->sink_rate); ret = sof_ipc4_widget_setup_msg(swidget, &src->msg); @@ -864,6 +869,7 @@ static int sof_ipc4_widget_setup_comp_process(struct snd_sof_widget *swidget) { struct snd_soc_component *scomp = swidget->scomp; struct sof_ipc4_fw_module *fw_module; + struct snd_sof_pipeline *spipe = swidget->spipe; struct sof_ipc4_process *process; void *cfg; int ret; @@ -920,6 +926,9 @@ static int sof_ipc4_widget_setup_comp_process(struct snd_sof_widget *swidget) sof_ipc4_widget_update_kcontrol_module_id(swidget); + /* set pipeline core mask to keep track of the core the module is scheduled to run on */ + spipe->core_mask |= BIT(swidget->core); + return 0; free_base_cfg_ext: kfree(process->base_config_ext); diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h index 5d5eeb1a1a6f..a6d6bcd00cee 100644 --- a/sound/soc/sof/sof-audio.h +++ b/sound/soc/sof/sof-audio.h @@ -480,6 +480,7 @@ struct snd_sof_widget { * @paused_count: Count of number of PCM's that have started and have currently paused this pipeline * @complete: flag used to indicate that pipeline set up is complete. + * @core_mask: Mask containing target cores for all modules in the pipeline * @list: List item in sdev pipeline_list */ struct snd_sof_pipeline { @@ -487,6 +488,7 @@ struct snd_sof_pipeline { int started_count; int paused_count; int complete; + unsigned long core_mask; struct list_head list; }; -- cgit From 31ed8da1c8e5e504710bb36863700e3389f8fc81 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Fri, 24 Nov 2023 15:57:43 +0200 Subject: ASoC: SOF: sof-audio: Modify logic for enabling/disabling topology cores MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the current code, we enable a widget core when it is set up and disable it when it is freed. This is problematic with IPC4 because widget free is essentially a NOP and all widgets are freed in the firmware when the pipeline is deleted. This results in a crash during pipeline deletion when one of it's widgets is scheduled to run on a secondary core and is powered off when widget is freed. So, change the logic to enable all cores needed by all the modules in a pipeline when the pipeline widget is set up and disable them after the pipeline widget is freed. Signed-off-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20231124135743.24674-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/sof-audio.c | 65 ++++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 24 deletions(-) diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c index 563fe6f7789f..77cc64ac7113 100644 --- a/sound/soc/sof/sof-audio.c +++ b/sound/soc/sof/sof-audio.c @@ -46,6 +46,7 @@ static int sof_widget_free_unlocked(struct snd_sof_dev *sdev, struct snd_sof_widget *swidget) { const struct sof_ipc_tplg_ops *tplg_ops = sof_ipc_get_ops(sdev, tplg); + struct snd_sof_pipeline *spipe = swidget->spipe; struct snd_sof_widget *pipe_widget; int err = 0; int ret; @@ -87,15 +88,22 @@ static int sof_widget_free_unlocked(struct snd_sof_dev *sdev, } /* - * disable widget core. continue to route setup status and complete flag - * even if this fails and return the appropriate error + * decrement ref count for cores associated with all modules in the pipeline and clear + * the complete flag */ - ret = snd_sof_dsp_core_put(sdev, swidget->core); - if (ret < 0) { - dev_err(sdev->dev, "error: failed to disable target core: %d for widget %s\n", - swidget->core, swidget->widget->name); - if (!err) - err = ret; + if (swidget->id == snd_soc_dapm_scheduler) { + int i; + + for_each_set_bit(i, &spipe->core_mask, sdev->num_cores) { + ret = snd_sof_dsp_core_put(sdev, i); + if (ret < 0) { + dev_err(sdev->dev, "failed to disable target core: %d for pipeline %s\n", + i, swidget->widget->name); + if (!err) + err = ret; + } + } + swidget->spipe->complete = 0; } /* @@ -108,10 +116,6 @@ static int sof_widget_free_unlocked(struct snd_sof_dev *sdev, err = ret; } - /* clear pipeline complete */ - if (swidget->id == snd_soc_dapm_scheduler) - swidget->spipe->complete = 0; - if (!err) dev_dbg(sdev->dev, "widget %s freed\n", swidget->widget->name); @@ -134,8 +138,10 @@ static int sof_widget_setup_unlocked(struct snd_sof_dev *sdev, struct snd_sof_widget *swidget) { const struct sof_ipc_tplg_ops *tplg_ops = sof_ipc_get_ops(sdev, tplg); + struct snd_sof_pipeline *spipe = swidget->spipe; bool use_count_decremented = false; int ret; + int i; /* skip if there is no private data */ if (!swidget->private) @@ -166,19 +172,23 @@ static int sof_widget_setup_unlocked(struct snd_sof_dev *sdev, goto use_count_dec; } - /* enable widget core */ - ret = snd_sof_dsp_core_get(sdev, swidget->core); - if (ret < 0) { - dev_err(sdev->dev, "error: failed to enable target core for widget %s\n", - swidget->widget->name); - goto pipe_widget_free; + /* update ref count for cores associated with all modules in the pipeline */ + if (swidget->id == snd_soc_dapm_scheduler) { + for_each_set_bit(i, &spipe->core_mask, sdev->num_cores) { + ret = snd_sof_dsp_core_get(sdev, i); + if (ret < 0) { + dev_err(sdev->dev, "failed to enable target core %d for pipeline %s\n", + i, swidget->widget->name); + goto pipe_widget_free; + } + } } /* setup widget in the DSP */ if (tplg_ops && tplg_ops->widget_setup) { ret = tplg_ops->widget_setup(sdev, swidget); if (ret < 0) - goto core_put; + goto pipe_widget_free; } /* send config for DAI components */ @@ -208,15 +218,22 @@ static int sof_widget_setup_unlocked(struct snd_sof_dev *sdev, return 0; widget_free: - /* widget use_count and core ref_count will both be decremented by sof_widget_free() */ + /* widget use_count will be decremented by sof_widget_free() */ sof_widget_free_unlocked(sdev, swidget); use_count_decremented = true; -core_put: - if (!use_count_decremented) - snd_sof_dsp_core_put(sdev, swidget->core); pipe_widget_free: - if (swidget->id != snd_soc_dapm_scheduler) + if (swidget->id != snd_soc_dapm_scheduler) { sof_widget_free_unlocked(sdev, swidget->spipe->pipe_widget); + } else { + int j; + + /* decrement ref count for all cores that were updated previously */ + for_each_set_bit(j, &spipe->core_mask, sdev->num_cores) { + if (j >= i) + break; + snd_sof_dsp_core_put(sdev, j); + } + } use_count_dec: if (!use_count_decremented) swidget->use_count--; -- cgit From f83d38def6b1b00c9bb17173837045b41df7e7d7 Mon Sep 17 00:00:00 2001 From: Chancel Liu Date: Sat, 25 Nov 2023 14:53:00 +0800 Subject: ASoC: imx-rpmsg: SND_SOC_IMX_RPMSG should depend on OF and I2C SND_SOC_IMX_RPMSG should depend on OF and I2C. It fixes the following error reported by kernel test robot: ld: sound/soc/fsl/imx-rpmsg.o: in function `imx_rpmsg_late_probe': imx-rpmsg.c:(.text+0x4f): undefined reference to `i2c_find_device_by_fwnode' Fixes: 5d9f746ca64c ("ASoC: imx-rpmsg: Force codec power on in low power audio mode") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311230506.DPF9vvYY-lkp@intel.com/ Signed-off-by: Chancel Liu Link: https://lore.kernel.org/r/20231125065300.6385-1-chancel.liu@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index 725c530a3636..be342ee03fb9 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -360,6 +360,7 @@ config SND_SOC_IMX_HDMI config SND_SOC_IMX_RPMSG tristate "SoC Audio support for i.MX boards with rpmsg" depends on RPMSG + depends on OF && I2C select SND_SOC_IMX_PCM_RPMSG select SND_SOC_IMX_AUDIO_RPMSG help -- cgit From fe4c5f662097978b6c91c23a13c24ed92339a180 Mon Sep 17 00:00:00 2001 From: "Jason-JH.Lin" Date: Wed, 20 Sep 2023 17:06:58 +0800 Subject: drm/mediatek: Add spinlock for setting vblank event in atomic_begin Add spinlock protection to avoid race condition on vblank event between mtk_drm_crtc_atomic_begin() and mtk_drm_finish_page_flip(). Fixes: 119f5173628a ("drm/mediatek: Add DRM Driver for Mediatek SoC MT8173.") Signed-off-by: Jason-JH.Lin Suggested-by: AngeloGioacchino Del Regno Reviewed-by: Alexandre Mergnat Reviewed-by: Fei Shao Tested-by: Fei Shao Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20230920090658.31181-1-jason-jh.lin@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index c277b9fae950..0a7d60c191b8 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -788,6 +788,7 @@ static void mtk_drm_crtc_atomic_begin(struct drm_crtc *crtc, crtc); struct mtk_crtc_state *mtk_crtc_state = to_mtk_crtc_state(crtc_state); struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); + unsigned long flags; if (mtk_crtc->event && mtk_crtc_state->base.event) DRM_ERROR("new event while there is still a pending event\n"); @@ -795,7 +796,11 @@ static void mtk_drm_crtc_atomic_begin(struct drm_crtc *crtc, if (mtk_crtc_state->base.event) { mtk_crtc_state->base.event->pipe = drm_crtc_index(crtc); WARN_ON(drm_crtc_vblank_get(crtc) != 0); + + spin_lock_irqsave(&crtc->dev->event_lock, flags); mtk_crtc->event = mtk_crtc_state->base.event; + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + mtk_crtc_state->base.event = NULL; } } -- cgit From 79997eda0d31bc68203c95ecb978773ee6ce7a1f Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sun, 26 Nov 2023 11:40:54 +0000 Subject: riscv: dts: microchip: move timebase-frequency to mpfs.dtsi The timebase-frequency on PolarFire SoC is not set by an oscillator on the board, but rather by an internal divider, so move the property to mpfs.dtsi. This looks to be copy-pasta from the SiFive Unleashed as the comments in both places were almost identical. In the Unleashed's case this looks to actually be valid, as the clock is provided by a crystal on the PCB. Signed-off-by: Conor Dooley --- CC: Conor Dooley CC: Daire McNamara CC: Rob Herring CC: Krzysztof Kozlowski CC: Paul Walmsley CC: Palmer Dabbelt CC: linux-riscv@lists.infradead.org CC: devicetree@vger.kernel.org --- arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts | 7 ------- arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts | 7 ------- arch/riscv/boot/dts/microchip/mpfs-polarberry.dts | 7 ------- arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts | 7 ------- arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts | 7 ------- arch/riscv/boot/dts/microchip/mpfs.dtsi | 1 + 6 files changed, 1 insertion(+), 35 deletions(-) diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts index 90b261114763..dce96f27cc89 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts @@ -8,9 +8,6 @@ #include #include -/* Clock frequency (in Hz) of the rtcclk */ -#define RTCCLK_FREQ 1000000 - / { model = "Microchip PolarFire-SoC Icicle Kit"; compatible = "microchip,mpfs-icicle-reference-rtlv2210", "microchip,mpfs-icicle-kit", @@ -29,10 +26,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = ; - }; - leds { compatible = "gpio-leds"; diff --git a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts index 184cb36a175e..a8d623ee9fa4 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts @@ -10,9 +10,6 @@ #include "mpfs.dtsi" #include "mpfs-m100pfs-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { model = "Aries Embedded M100PFEVPS"; compatible = "aries,m100pfsevp", "microchip,mpfs"; @@ -33,10 +30,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = ; - }; - ddrc_cache_lo: memory@80000000 { device_type = "memory"; reg = <0x0 0x80000000 0x0 0x40000000>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts index c87cc2d8fe29..ea0808ab1042 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts @@ -6,9 +6,6 @@ #include "mpfs.dtsi" #include "mpfs-polarberry-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { model = "Sundance PolarBerry"; compatible = "sundance,polarberry", "microchip,mpfs"; @@ -22,10 +19,6 @@ stdout-path = "serial0:115200n8"; }; - cpus { - timebase-frequency = ; - }; - ddrc_cache_lo: memory@80000000 { device_type = "memory"; reg = <0x0 0x80000000 0x0 0x2e000000>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts index 013cb666c72d..f9a890579438 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts @@ -6,9 +6,6 @@ #include "mpfs.dtsi" #include "mpfs-sev-kit-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { #address-cells = <2>; #size-cells = <2>; @@ -28,10 +25,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = ; - }; - reserved-memory { #address-cells = <2>; #size-cells = <2>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts b/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts index e0797c7e1b35..d1120f5f2c01 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts @@ -11,9 +11,6 @@ #include "mpfs.dtsi" #include "mpfs-tysom-m-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { model = "Aldec TySOM-M-MPFS250T-REV2"; compatible = "aldec,tysom-m-mpfs250t-rev2", "microchip,mpfs"; @@ -34,10 +31,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = ; - }; - ddrc_cache_lo: memory@80000000 { device_type = "memory"; reg = <0x0 0x80000000 0x0 0x30000000>; diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index a6faf24f1dba..266489d43912 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -13,6 +13,7 @@ cpus { #address-cells = <1>; #size-cells = <0>; + timebase-frequency = <1000000>; cpu0: cpu@0 { compatible = "sifive,e51", "sifive,rocket0", "riscv"; -- cgit From 2bfba37b3d90d6d2d499d5b0dfe99c05c38b1b54 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 19 Oct 2023 08:32:17 +0200 Subject: arm64: dt: imx93: tqma9352-mba93xxla: Fix LPUART2 pad config LPUART2_RTS# has an external pull-down, so do not enable the internal pull-up at the same time, use a pull-down instead. Fixes: c982ecfa7992a ("arm64: dts: freescale: add initial device tree for MBa93xxLA SBC board") Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts index f06139bdff97..3c5c67ebee5d 100644 --- a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts +++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts @@ -577,7 +577,7 @@ fsl,pins = < MX93_PAD_UART2_TXD__LPUART2_TX 0x31e MX93_PAD_UART2_RXD__LPUART2_RX 0x31e - MX93_PAD_SAI1_TXD0__LPUART2_RTS_B 0x31e + MX93_PAD_SAI1_TXD0__LPUART2_RTS_B 0x51e >; }; -- cgit From 6552218f4dc47ba3c6c5b58cc1e9eb208a2b438b Mon Sep 17 00:00:00 2001 From: Stefan Kerkmann Date: Wed, 1 Nov 2023 12:03:37 +0100 Subject: ARM: dts: imx6q: skov: fix ethernet clock regression A regression was introduced in the Skov specific i.MX6 flavor reve-mi1010ait-1cp1 device tree causing the external ethernet controller to not being selected as the clock source for the i.MX6 ethernet MAC, resulting in a none functional ethernet interface. The root cause is that the ethernet clock selection is now part of the clocks node, which is overwritten in the specific device tree and wasn't updated to contain these ethernet clocks. Fixes: c89614079e44 ("ARM: dts: imx6qdl-skov-cpu: configure ethernet reference clock parent") Signed-off-by: Stefan Kerkmann Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts index a3f247c722b4..0342a79ccd5d 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts @@ -37,9 +37,9 @@ &clks { assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>; + <&clks IMX6QDL_CLK_LDB_DI1_SEL>, <&clks IMX6QDL_CLK_ENET_REF_SEL>; assigned-clock-parents = <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, - <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>; + <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, <&clk50m_phy>; }; &hdmi { -- cgit From 487635756198cad563feb47539c6a37ea57f1dae Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 27 Nov 2023 10:39:26 +0100 Subject: parisc: Fix asm operand number out of range build error in bug table Build is broken if CONFIG_DEBUG_BUGVERBOSE=n. Fix it be using the correct asm operand number. Signed-off-by: Helge Deller Reported-by: Linux Kernel Functional Testing Fixes: fe76a1349f23 ("parisc: Use natural CPU alignment for bug_table") Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/bug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 1641ff9a8b83..833555f74ffa 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -71,7 +71,7 @@ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ - "\t.align %2\n" \ + "\t.align 4\n" \ "2:\t" __BUG_REL(1b) "\n" \ "\t.short %0\n" \ "\t.blockz %1-4-2\n" \ -- cgit From 06f76e464ac81c6915430b7155769ea4ef16efe4 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 23 Nov 2023 12:02:02 +0100 Subject: phy: mediatek: mipi: mt8183: fix minimal supported frequency The lowest supported clock frequency of the PHY is 125MHz (see also mtk_mipi_tx_pll_enable()), but the clamping in .round_rate() has the wrong minimal value, which will make the .enable() op return -EINVAL on low frequencies. Fix the minimal clamping value. Fixes: efda51a58b4a ("drm/mediatek: add mipi_tx driver for mt8183") Signed-off-by: Michael Walle Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231123110202.2025585-1-mwalle@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/mediatek/phy-mtk-mipi-dsi-mt8183.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/mediatek/phy-mtk-mipi-dsi-mt8183.c b/drivers/phy/mediatek/phy-mtk-mipi-dsi-mt8183.c index f021ec5a70e5..553725e1269c 100644 --- a/drivers/phy/mediatek/phy-mtk-mipi-dsi-mt8183.c +++ b/drivers/phy/mediatek/phy-mtk-mipi-dsi-mt8183.c @@ -100,7 +100,7 @@ static void mtk_mipi_tx_pll_disable(struct clk_hw *hw) static long mtk_mipi_tx_pll_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *prate) { - return clamp_val(rate, 50000000, 1600000000); + return clamp_val(rate, 125000000, 1600000000); } static const struct clk_ops mtk_mipi_tx_pll_ops = { -- cgit From 2a9c713825b3127ece11984abf973672c9779518 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Mon, 20 Nov 2023 17:10:47 +0800 Subject: phy: sunplus: return negative error code in sp_usb_phy_probe devm_phy_create() return negative error code, 'ret' should be 'PTR_ERR(phy)' rather than '-PTR_ERR(phy)'. Fixes: 99d9ccd97385 ("phy: usb: Add USB2.0 phy driver for Sunplus SP7021") Signed-off-by: Su Hui Link: https://lore.kernel.org/r/20231120091046.163781-1-suhui@nfschina.com Signed-off-by: Vinod Koul --- drivers/phy/sunplus/phy-sunplus-usb2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/sunplus/phy-sunplus-usb2.c b/drivers/phy/sunplus/phy-sunplus-usb2.c index 0efe74ac9c6a..637a5fbae6d9 100644 --- a/drivers/phy/sunplus/phy-sunplus-usb2.c +++ b/drivers/phy/sunplus/phy-sunplus-usb2.c @@ -275,7 +275,7 @@ static int sp_usb_phy_probe(struct platform_device *pdev) phy = devm_phy_create(&pdev->dev, NULL, &sp_uphy_ops); if (IS_ERR(phy)) { - ret = -PTR_ERR(phy); + ret = PTR_ERR(phy); return ret; } -- cgit From be6f9a39969a4ad01f0051a9b94af6e7f4d2d7ac Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 26 Nov 2023 11:24:49 +0200 Subject: mei: pxp: fix mei_pxp_send_message return value mei_pxp_send_message() should return zero on success and cannot propagate number of bytes as returned by internally called mei_cldev_send(). Fixes: ee5cb39348e6 ("mei: pxp: recover from recv fail under memory pressure") Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20231126092449.88310-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/pxp/mei_pxp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/mei/pxp/mei_pxp.c b/drivers/misc/mei/pxp/mei_pxp.c index f77d78fa5054..787c6a27a4be 100644 --- a/drivers/misc/mei/pxp/mei_pxp.c +++ b/drivers/misc/mei/pxp/mei_pxp.c @@ -84,9 +84,10 @@ mei_pxp_send_message(struct device *dev, const void *message, size_t size, unsig byte = ret; break; } + return byte; } - return byte; + return 0; } /** -- cgit From 8f06aee8089cf42fd99a20184501bd1347ce61b9 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Mon, 20 Nov 2023 17:55:23 +0800 Subject: misc: mei: client.c: return negative error code in mei_cl_write mei_msg_hdr_init() return negative error code, rets should be 'PTR_ERR(mei_hdr)' rather than '-PTR_ERR(mei_hdr)'. Fixes: 0cd7c01a60f8 ("mei: add support for mei extended header.") Signed-off-by: Su Hui Link: https://lore.kernel.org/r/20231120095523.178385-1-suhui@nfschina.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 9c8fc87938a7..7ea80779a0e2 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -2011,7 +2011,7 @@ ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, unsigned long time mei_hdr = mei_msg_hdr_init(cb); if (IS_ERR(mei_hdr)) { - rets = -PTR_ERR(mei_hdr); + rets = PTR_ERR(mei_hdr); mei_hdr = NULL; goto err; } -- cgit From ee6236027218f8531916f1c5caa5dc330379f287 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Mon, 20 Nov 2023 17:55:26 +0800 Subject: misc: mei: client.c: fix problem of return '-EOVERFLOW' in mei_cl_write Clang static analyzer complains that value stored to 'rets' is never read.Let 'buf_len = -EOVERFLOW' to make sure we can return '-EOVERFLOW'. Fixes: 8c8d964ce90f ("mei: move hbuf_depth from the mei device to the hw modules") Signed-off-by: Su Hui Link: https://lore.kernel.org/r/20231120095523.178385-2-suhui@nfschina.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 7ea80779a0e2..9d090fa07516 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -2032,7 +2032,7 @@ ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, unsigned long time hbuf_slots = mei_hbuf_empty_slots(dev); if (hbuf_slots < 0) { - rets = -EOVERFLOW; + buf_len = -EOVERFLOW; goto out; } -- cgit From 076a948f5ad0da8a4438fdb3ec1b4a473084b40a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 24 Nov 2023 10:50:30 +0100 Subject: ARM: dts: rockchip: minor whitespace cleanup around '=' The DTS code coding style expects exactly one space before and after '=' sign. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231124095031.58555-1-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rockchip/rk322x.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/rockchip/rk322x.dtsi b/arch/arm/boot/dts/rockchip/rk322x.dtsi index ffc16d6b97e1..a721744cbfd1 100644 --- a/arch/arm/boot/dts/rockchip/rk322x.dtsi +++ b/arch/arm/boot/dts/rockchip/rk322x.dtsi @@ -215,9 +215,9 @@ power-domain@RK3228_PD_VOP { reg = ; - clocks =<&cru ACLK_VOP>, - <&cru DCLK_VOP>, - <&cru HCLK_VOP>; + clocks = <&cru ACLK_VOP>, + <&cru DCLK_VOP>, + <&cru HCLK_VOP>; pm_qos = <&qos_vop>; #power-domain-cells = <0>; }; -- cgit From 93dc6cd15f207be502739072ad122fa5ac812908 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 24 Nov 2023 10:50:31 +0100 Subject: arm64: dts: rockchip: minor whitespace cleanup around '=' The DTS code coding style expects exactly one space before and after '=' sign. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231124095031.58555-2-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts index de0a1f2af983..7d4c5324c61b 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts @@ -86,7 +86,7 @@ sgtl5000_clk: sgtl5000-oscillator { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <24576000>; + clock-frequency = <24576000>; }; dc_12v: dc-12v-regulator { diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts index 8f399c4317bd..e3a839a12dc6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts @@ -38,7 +38,7 @@ leds { compatible = "gpio-leds"; pinctrl-names = "default"; - pinctrl-0 =<&leds_gpio>; + pinctrl-0 = <&leds_gpio>; led-1 { gpios = <&gpio1 RK_PA2 GPIO_ACTIVE_HIGH>; -- cgit From ded965834b156c93dd914b0c6952e8f77929cf84 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 23 Nov 2023 14:49:27 +0100 Subject: dt-bindings: display: mediatek: dsi: remove Xinlei's mail Xinlei Lee's mail is bouncing: : host mailgw02.mediatek.com[216.200.240.185] said: 550 Relaying mail to xinlei.lee@mediatek.com is not allowed (in reply to RCPT TO command) Remove it. Signed-off-by: Michael Walle Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20231123134927.2034024-1-mwalle@kernel.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml index 12441b937684..43bfea32e5b6 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml @@ -10,7 +10,6 @@ maintainers: - Chun-Kuang Hu - Philipp Zabel - Jitao Shi - - Xinlei Lee description: | The MediaTek DSI function block is a sink of the display subsystem and can -- cgit From 76cab6f4fd63129e852eb15e14d6d359b57e797f Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 27 Nov 2023 12:00:26 +0800 Subject: ndtest: fix typo class_regster -> class_register Fixes: dd6cad2dcb58 ("testing: nvdimm: make struct class structures constant") Signed-off-by: Yi Zhang Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20231127040026.362729-1-yi.zhang@redhat.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/nvdimm/test/ndtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index fd26189d53be..b8419f460368 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -924,7 +924,7 @@ static __init int ndtest_init(void) nfit_test_setup(ndtest_resource_lookup, NULL); - rc = class_regster(&ndtest_dimm_class); + rc = class_register(&ndtest_dimm_class); if (rc) goto err_register; -- cgit From d79972789d17499b6091ded2fc0c6763c501a5ba Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Thu, 23 Nov 2023 15:47:18 +0100 Subject: of: dynamic: Fix of_reconfig_get_state_change() return value documentation The documented numeric return values do not match the actual returned values. Fix them by using the enum names instead of raw numbers. Fixes: b53a2340d0d3 ("of/reconfig: Add of_reconfig_get_state_change() of notifier helper.") Signed-off-by: Luca Ceresoli Link: https://lore.kernel.org/r/20231123-fix-of_reconfig_get_state_change-docs-v1-1-f51892050ff9@bootlin.com Signed-off-by: Rob Herring --- drivers/of/dynamic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 0a3483e247a8..a5afe14eef28 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -98,8 +98,9 @@ int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p) * * Returns the new state of a device based on the notifier used. * - * Return: 0 on device going from enabled to disabled, 1 on device - * going from disabled to enabled and -1 on no change. + * Return: OF_RECONFIG_CHANGE_REMOVE on device going from enabled to + * disabled, OF_RECONFIG_CHANGE_ADD on device going from disabled to + * enabled and OF_RECONFIG_NO_CHANGE on no change. */ int of_reconfig_get_state_change(unsigned long action, struct of_reconfig_data *pr) { -- cgit From e8d66d02defd3256a31c0ec09af63382b8682c0e Mon Sep 17 00:00:00 2001 From: Jagadeesh Kona Date: Tue, 7 Nov 2023 12:15:45 +0530 Subject: clk: qcom: Fix SM_CAMCC_8550 dependencies SM_GCC_8550 depends on ARM64 but it is selected by SM_CAMCC_8550, which should have the same dependencies as SM_GCC_8550 to avoid the below Kconfig warning reported by kernel test robot. WARNING: unmet direct dependencies detected for SM_GCC_8550 Depends on [n]: COMMON_CLK [=y] && COMMON_CLK_QCOM [=y] && (ARM64 || COMPILE_TEST [=n]) Selected by [y]: - SM_CAMCC_8550 [=y] && COMMON_CLK [=y] && COMMON_CLK_QCOM [=y] Fixes: ccc4e6a061a2 ("clk: qcom: camcc-sm8550: Add camera clock controller driver for SM8550") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311062309.XugQH7AH-lkp@intel.com/ Signed-off-by: Jagadeesh Kona Link: https://lore.kernel.org/r/20231107064545.13120-1-quic_jkona@quicinc.com Reviewed-by: Dmitry Baryshkov Tested-by: Randy Dunlap Reviewed-by: Randy Dunlap Signed-off-by: Stephen Boyd --- drivers/clk/qcom/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index ad1acd9b7426..dbc3950c5960 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -767,6 +767,7 @@ config SM_CAMCC_8450 config SM_CAMCC_8550 tristate "SM8550 Camera Clock Controller" + depends on ARM64 || COMPILE_TEST select SM_GCC_8550 help Support for the camera clock controller on SM8550 devices. -- cgit From e199bf52ffda8f98f129728d57244a9cd9ad5623 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 24 Nov 2023 19:01:36 +0100 Subject: soundwire: stream: fix NULL pointer dereference for multi_link If bus is marked as multi_link, but number of masters in the stream is not higher than bus->hw_sync_min_links (bus->multi_link && m_rt_count >= bus->hw_sync_min_links), bank switching should not happen. The first part of do_bank_switch() code properly takes these conditions into account, but second part (sdw_ml_sync_bank_switch()) relies purely on bus->multi_link property. This is not balanced and leads to NULL pointer dereference: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... Call trace: wait_for_completion_timeout+0x124/0x1f0 do_bank_switch+0x370/0x6f8 sdw_prepare_stream+0x2d0/0x438 qcom_snd_sdw_prepare+0xa0/0x118 sm8450_snd_prepare+0x128/0x148 snd_soc_link_prepare+0x5c/0xe8 __soc_pcm_prepare+0x28/0x1ec dpcm_be_dai_prepare+0x1e0/0x2c0 dpcm_fe_dai_prepare+0x108/0x28c snd_pcm_do_prepare+0x44/0x68 snd_pcm_action_single+0x54/0xc0 snd_pcm_action_nonatomic+0xe4/0xec snd_pcm_prepare+0xc4/0x114 snd_pcm_common_ioctl+0x1154/0x1cc0 snd_pcm_ioctl+0x54/0x74 Fixes: ce6e74d008ff ("soundwire: Add support for multi link bank switch") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20231124180136.390621-1-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/stream.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index 69719b335bcb..f048b3d55b2e 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -742,14 +742,15 @@ error_1: * sdw_ml_sync_bank_switch: Multilink register bank switch * * @bus: SDW bus instance + * @multi_link: whether this is a multi-link stream with hardware-based sync * * Caller function should free the buffers on error */ -static int sdw_ml_sync_bank_switch(struct sdw_bus *bus) +static int sdw_ml_sync_bank_switch(struct sdw_bus *bus, bool multi_link) { unsigned long time_left; - if (!bus->multi_link) + if (!multi_link) return 0; /* Wait for completion of transfer */ @@ -847,7 +848,7 @@ static int do_bank_switch(struct sdw_stream_runtime *stream) bus->bank_switch_timeout = DEFAULT_BANK_SWITCH_TIMEOUT; /* Check if bank switch was successful */ - ret = sdw_ml_sync_bank_switch(bus); + ret = sdw_ml_sync_bank_switch(bus, multi_link); if (ret < 0) { dev_err(bus->dev, "multi link bank switch failed: %d\n", ret); -- cgit From 98dcc6be3859fb15257750b8e1d4e0eefd2c5e1e Mon Sep 17 00:00:00 2001 From: Finley Xiao Date: Mon, 27 Nov 2023 19:14:16 +0100 Subject: clk: rockchip: rk3128: Fix aclk_peri_src's parent According to the TRM there are no specific gpll_peri, cpll_peri, gpll_div2_peri or gpll_div3_peri gates, but a single clk_peri_src gate. Instead mux_clk_peri_src directly connects to the plls respectively the pll divider clocks. Fix this by creating a single gated composite. Also rename all occurrences of aclk_peri_src to clk_peri_src, since it is the parent for peri aclks, pclks and hclks. That name also matches the one used in the TRM. Fixes: f6022e88faca ("clk: rockchip: add clock controller for rk3128") Signed-off-by: Finley Xiao [renamed aclk_peri_src -> clk_peri_src and added commit message] Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20231127181415.11735-4-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3128.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3128.c b/drivers/clk/rockchip/clk-rk3128.c index 7782785a86e6..17bacf6dd6e7 100644 --- a/drivers/clk/rockchip/clk-rk3128.c +++ b/drivers/clk/rockchip/clk-rk3128.c @@ -138,7 +138,7 @@ PNAME(mux_pll_src_5plls_p) = { "cpll", "gpll", "gpll_div2", "gpll_div3", "usb480 PNAME(mux_pll_src_4plls_p) = { "cpll", "gpll", "gpll_div2", "usb480m" }; PNAME(mux_pll_src_3plls_p) = { "cpll", "gpll", "gpll_div2" }; -PNAME(mux_aclk_peri_src_p) = { "gpll_peri", "cpll_peri", "gpll_div2_peri", "gpll_div3_peri" }; +PNAME(mux_clk_peri_src_p) = { "gpll", "cpll", "gpll_div2", "gpll_div3" }; PNAME(mux_mmc_src_p) = { "cpll", "gpll", "gpll_div2", "xin24m" }; PNAME(mux_clk_cif_out_src_p) = { "clk_cif_src", "xin24m" }; PNAME(mux_sclk_vop_src_p) = { "cpll", "gpll", "gpll_div2", "gpll_div3" }; @@ -275,23 +275,17 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { RK2928_CLKGATE_CON(0), 11, GFLAGS), /* PD_PERI */ - GATE(0, "gpll_peri", "gpll", CLK_IGNORE_UNUSED, + COMPOSITE(0, "clk_peri_src", mux_clk_peri_src_p, 0, + RK2928_CLKSEL_CON(10), 14, 2, MFLAGS, 0, 5, DFLAGS, RK2928_CLKGATE_CON(2), 0, GFLAGS), - GATE(0, "cpll_peri", "cpll", CLK_IGNORE_UNUSED, - RK2928_CLKGATE_CON(2), 0, GFLAGS), - GATE(0, "gpll_div2_peri", "gpll_div2", CLK_IGNORE_UNUSED, - RK2928_CLKGATE_CON(2), 0, GFLAGS), - GATE(0, "gpll_div3_peri", "gpll_div3", CLK_IGNORE_UNUSED, - RK2928_CLKGATE_CON(2), 0, GFLAGS), - COMPOSITE_NOGATE(0, "aclk_peri_src", mux_aclk_peri_src_p, 0, - RK2928_CLKSEL_CON(10), 14, 2, MFLAGS, 0, 5, DFLAGS), - COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_src", 0, + + COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "clk_peri_src", 0, RK2928_CLKSEL_CON(10), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, RK2928_CLKGATE_CON(2), 3, GFLAGS), - COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_src", 0, + COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "clk_peri_src", 0, RK2928_CLKSEL_CON(10), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, RK2928_CLKGATE_CON(2), 2, GFLAGS), - GATE(ACLK_PERI, "aclk_peri", "aclk_peri_src", 0, + GATE(ACLK_PERI, "aclk_peri", "clk_peri_src", 0, RK2928_CLKGATE_CON(2), 1, GFLAGS), GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, -- cgit From 99fe9ee56bd2f7358f1bc72551c2f3a6bbddf80a Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Mon, 27 Nov 2023 19:14:18 +0100 Subject: clk: rockchip: rk3128: Fix SCLK_SDMMC's clock name SCLK_SDMMC is the parent for SCLK_SDMMC_DRV and SCLK_SDMMC_SAMPLE, but used with the (more) correct name sclk_sdmmc. SD card tuning does currently fail as the parent can't be found under that name. There is no need to suffix the name with '0' since RK312x SoCs do have a single sdmmc controller - so rename it to the name which is already used by it's children. Fixes: f6022e88faca ("clk: rockchip: add clock controller for rk3128") Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20231127181415.11735-6-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3128.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/clk-rk3128.c b/drivers/clk/rockchip/clk-rk3128.c index 17bacf6dd6e7..75071e0cd321 100644 --- a/drivers/clk/rockchip/clk-rk3128.c +++ b/drivers/clk/rockchip/clk-rk3128.c @@ -310,7 +310,7 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { GATE(SCLK_MIPI_24M, "clk_mipi_24m", "xin24m", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(2), 15, GFLAGS), - COMPOSITE(SCLK_SDMMC, "sclk_sdmmc0", mux_mmc_src_p, 0, + COMPOSITE(SCLK_SDMMC, "sclk_sdmmc", mux_mmc_src_p, 0, RK2928_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 6, DFLAGS, RK2928_CLKGATE_CON(2), 11, GFLAGS), -- cgit From 393cae5f32d640b9798903702018a48c7a45e59f Mon Sep 17 00:00:00 2001 From: Chao Song Date: Mon, 27 Nov 2023 20:47:35 +0800 Subject: soundwire: intel_ace2x: fix AC timing setting for ACE2.x Start from ACE1.x, DOAISE is added to AC timing control register bit 5, it combines with DOAIS to get effective timing, and has the default value 1. The current code fills DOAIS, DACTQE and DODS bits to a variable initialized to zero, and updates the variable to AC timing control register. With this operation, We change DOAISE to 0, and force a much more aggressive timing. The timing is even unable to form a working waveform on SDA pin. This patch uses read-modify-write operation for the AC timing control register access, thus makes sure those bits not supposed and intended to change are not touched. Signed-off-by: Chao Song Reviewed-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20231127124735.2080562-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel_ace2x.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soundwire/intel_ace2x.c b/drivers/soundwire/intel_ace2x.c index 82672fcbc2aa..8280baa3254b 100644 --- a/drivers/soundwire/intel_ace2x.c +++ b/drivers/soundwire/intel_ace2x.c @@ -23,8 +23,9 @@ static void intel_shim_vs_init(struct sdw_intel *sdw) { void __iomem *shim_vs = sdw->link_res->shim_vs; - u16 act = 0; + u16 act; + act = intel_readw(shim_vs, SDW_SHIM2_INTEL_VS_ACTMCTL); u16p_replace_bits(&act, 0x1, SDW_SHIM2_INTEL_VS_ACTMCTL_DOAIS); act |= SDW_SHIM2_INTEL_VS_ACTMCTL_DACTQE; act |= SDW_SHIM2_INTEL_VS_ACTMCTL_DODS; -- cgit From 0c349b5001f8bdcead844484c15a0c4dfb341157 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Mon, 27 Nov 2023 19:46:44 +0100 Subject: ARM: dts: rockchip: Fix sdmmc_pwren's pinmux setting for RK3128 RK3128's reference design uses sdmmc_pwren pincontrol as GPIO - see [0]. Let's change it in the SoC DT as well. [0] https://github.com/rockchip-linux/kernel/commit/8c62deaf6025 Fixes: a0201bff6259 ("ARM: dts: rockchip: add rk3128 soc dtsi") Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20231127184643.13314-2-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rockchip/rk3128.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rockchip/rk3128.dtsi b/arch/arm/boot/dts/rockchip/rk3128.dtsi index 7bf557c99561..01edf244ddee 100644 --- a/arch/arm/boot/dts/rockchip/rk3128.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3128.dtsi @@ -848,7 +848,7 @@ }; sdmmc_pwren: sdmmc-pwren { - rockchip,pins = <1 RK_PB6 1 &pcfg_pull_default>; + rockchip,pins = <1 RK_PB6 RK_FUNC_GPIO &pcfg_pull_default>; }; sdmmc_bus4: sdmmc-bus4 { -- cgit From e0894ff038d86f30614ec16ec26dacb88c8d2bd4 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Mon, 27 Nov 2023 12:05:21 +1300 Subject: platform/x86: asus-wmi: disable USB0 hub on ROG Ally before suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ASUS have worked around an issue in XInput where it doesn't support USB selective suspend, which causes suspend issues in Windows. They worked around this by adjusting the MCU firmware to disable the USB0 hub when the screen is switched off during the Microsoft DSM suspend path in ACPI. The issue we have with this however is one of timing - the call the tells the MCU to this isn't able to complete before suspend is done so we call this in a prepare() and add a small msleep() to ensure it is done. This must be done before the screen is switched off to prevent a variety of possible races. Further to this the MCU powersave option must also be disabled as it can cause a number of issues such as: - unreliable resume connection of N-Key - complete loss of N-Key if the power is plugged in while suspended Disabling the powersave option prevents this. Without this the MCU is unable to initialise itself correctly on resume. Signed-off-by: "Luke D. Jones" Tested-by: Philip Mueller Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20231126230521.125708-2-luke@ljones.dev Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-wmi.c | 50 ++++++++++++++++++++++++++++++ include/linux/platform_data/x86/asus-wmi.h | 3 ++ 2 files changed, 53 insertions(+) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index ca668cf04020..9f7e23c5c6b4 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -132,6 +133,11 @@ module_param(fnlock_default, bool, 0444); #define ASUS_SCREENPAD_BRIGHT_MAX 255 #define ASUS_SCREENPAD_BRIGHT_DEFAULT 60 +/* Controls the power state of the USB0 hub on ROG Ally which input is on */ +#define ASUS_USB0_PWR_EC0_CSEE "\\_SB.PCI0.SBRG.EC0.CSEE" +/* 300ms so far seems to produce a reliable result on AC and battery */ +#define ASUS_USB0_PWR_EC0_CSEE_WAIT 300 + static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; static int throttle_thermal_policy_write(struct asus_wmi *); @@ -300,6 +306,9 @@ struct asus_wmi { bool fnlock_locked; + /* The ROG Ally device requires the MCU USB device be disconnected before suspend */ + bool ally_mcu_usb_switch; + struct asus_wmi_debug debug; struct asus_wmi_driver *driver; @@ -4488,6 +4497,8 @@ static int asus_wmi_add(struct platform_device *pdev) asus->nv_temp_tgt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_THERM_TARGET); asus->panel_overdrive_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PANEL_OD); asus->mini_led_mode_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE); + asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) + && dmi_match(DMI_BOARD_NAME, "RC71L"); err = fan_boost_mode_check_present(asus); if (err) @@ -4662,6 +4673,43 @@ static int asus_hotk_resume(struct device *device) asus_wmi_fnlock_update(asus); asus_wmi_tablet_mode_get_state(asus); + + return 0; +} + +static int asus_hotk_resume_early(struct device *device) +{ + struct asus_wmi *asus = dev_get_drvdata(device); + + if (asus->ally_mcu_usb_switch) { + if (ACPI_FAILURE(acpi_execute_simple_method(NULL, ASUS_USB0_PWR_EC0_CSEE, 0xB8))) + dev_err(device, "ROG Ally MCU failed to connect USB dev\n"); + else + msleep(ASUS_USB0_PWR_EC0_CSEE_WAIT); + } + return 0; +} + +static int asus_hotk_prepare(struct device *device) +{ + struct asus_wmi *asus = dev_get_drvdata(device); + int result, err; + + if (asus->ally_mcu_usb_switch) { + /* When powersave is enabled it causes many issues with resume of USB hub */ + result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_MCU_POWERSAVE); + if (result == 1) { + dev_warn(device, "MCU powersave enabled, disabling to prevent resume issues"); + err = asus_wmi_set_devstate(ASUS_WMI_DEVID_MCU_POWERSAVE, 0, &result); + if (err || result != 1) + dev_err(device, "Failed to set MCU powersave mode: %d\n", err); + } + /* sleep required to ensure USB0 is disabled before sleep continues */ + if (ACPI_FAILURE(acpi_execute_simple_method(NULL, ASUS_USB0_PWR_EC0_CSEE, 0xB7))) + dev_err(device, "ROG Ally MCU failed to disconnect USB dev\n"); + else + msleep(ASUS_USB0_PWR_EC0_CSEE_WAIT); + } return 0; } @@ -4709,6 +4757,8 @@ static const struct dev_pm_ops asus_pm_ops = { .thaw = asus_hotk_thaw, .restore = asus_hotk_restore, .resume = asus_hotk_resume, + .resume_early = asus_hotk_resume_early, + .prepare = asus_hotk_prepare, }; /* Registration ***************************************************************/ diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 63e630276499..ab1c7deff118 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -114,6 +114,9 @@ /* Charging mode - 1=Barrel, 2=USB */ #define ASUS_WMI_DEVID_CHARGE_MODE 0x0012006C +/* MCU powersave mode */ +#define ASUS_WMI_DEVID_MCU_POWERSAVE 0x001200E2 + /* epu is connected? 1 == true */ #define ASUS_WMI_DEVID_EGPU_CONNECTED 0x00090018 /* egpu on/off */ -- cgit From cff5f49d433fcd0063c8be7dd08fa5bf190c6c37 Mon Sep 17 00:00:00 2001 From: Tim Van Patten Date: Wed, 15 Nov 2023 09:20:43 -0700 Subject: cgroup_freezer: cgroup_freezing: Check if not frozen __thaw_task() was recently updated to warn if the task being thawed was part of a freezer cgroup that is still currently freezing: void __thaw_task(struct task_struct *p) { ... if (WARN_ON_ONCE(freezing(p))) goto unlock; This has exposed a bug in cgroup1 freezing where when CGROUP_FROZEN is asserted, the CGROUP_FREEZING bits are not also cleared at the same time. Meaning, when a cgroup is marked FROZEN it continues to be marked FREEZING as well. This causes the WARNING to trigger, because cgroup_freezing() thinks the cgroup is still freezing. There are two ways to fix this: 1. Whenever FROZEN is set, clear FREEZING for the cgroup and all children cgroups. 2. Update cgroup_freezing() to also verify that FROZEN is not set. This patch implements option (2), since it's smaller and more straightforward. Signed-off-by: Tim Van Patten Tested-by: Mark Hasemeyer Fixes: f5d39b020809 ("freezer,sched: Rewrite core freezer logic") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Tejun Heo --- kernel/cgroup/legacy_freezer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c index 122dacb3a443..66d1708042a7 100644 --- a/kernel/cgroup/legacy_freezer.c +++ b/kernel/cgroup/legacy_freezer.c @@ -66,9 +66,15 @@ static struct freezer *parent_freezer(struct freezer *freezer) bool cgroup_freezing(struct task_struct *task) { bool ret; + unsigned int state; rcu_read_lock(); - ret = task_freezer(task)->state & CGROUP_FREEZING; + /* Check if the cgroup is still FREEZING, but not FROZEN. The extra + * !FROZEN check is required, because the FREEZING bit is not cleared + * when the state FROZEN is reached. + */ + state = task_freezer(task)->state; + ret = (state & CGROUP_FREEZING) && !(state & CGROUP_FROZEN); rcu_read_unlock(); return ret; -- cgit From af54d778a03853801d681c98c0c2a6c316ef9ca7 Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Fri, 17 Nov 2023 20:19:32 +0530 Subject: devcoredump: Send uevent once devcd is ready dev_coredumpm() creates a devcoredump device and adds it to the core kernel framework which eventually end up sending uevent to the user space and later creates a symbolic link to the failed device. An application running in userspace may be interested in this symbolic link to get the name of the failed device. In a issue scenario, once uevent sent to the user space it start reading '/sys/class/devcoredump/devcdX/failing_device' to get the actual name of the device which might not been created and it is in its path of creation. To fix this, suppress sending uevent till the failing device symbolic link gets created and send uevent once symbolic link is created successfully. Fixes: 833c95456a70 ("device coredump: add new device coredump class") Signed-off-by: Mukesh Ojha Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1700232572-25823-1-git-send-email-quic_mojha@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/devcoredump.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index 91536ee05f14..7e2d1f0d903a 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -362,6 +362,7 @@ void dev_coredumpm(struct device *dev, struct module *owner, devcd->devcd_dev.class = &devcd_class; mutex_lock(&devcd->mutex); + dev_set_uevent_suppress(&devcd->devcd_dev, true); if (device_add(&devcd->devcd_dev)) goto put_device; @@ -376,6 +377,8 @@ void dev_coredumpm(struct device *dev, struct module *owner, "devcoredump")) dev_warn(dev, "devcoredump create_link failed\n"); + dev_set_uevent_suppress(&devcd->devcd_dev, false); + kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); mutex_unlock(&devcd->mutex); -- cgit From dc761f11af2e39119d3a7942e3d10615f3d900e7 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 18 Nov 2023 13:42:52 +0100 Subject: ARM: dts: bcm2711-rpi-400: Fix delete-node of led_act The LED ACT which is included from bcm2711-rpi-4-b doesn't exists on the Raspberry Pi 400. So the bcm2711-rpi-400.dts tries to use the delete-node directive in order to remove the complete node. Unfortunately the usage get broken in commit 1156e3a78bcc ("ARM: dts: bcm283x: Move ACT LED into separate dtsi") and now ACT and PWR LED using the same GPIO and this prevent probing of led-gpios on Raspberry Pi 400: leds-gpio: probe of leds failed with error -16 So fix the delete-node directive. Fixes: 1156e3a78bcc ("ARM: dts: bcm283x: Move ACT LED into separate dtsi") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20231118124252.14838-3-wahrenst@gmx.net Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts index 1ab8184302db..5a2869a18bd5 100644 --- a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts +++ b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts @@ -36,9 +36,7 @@ gpios = <&gpio 42 GPIO_ACTIVE_HIGH>; }; -&leds { - /delete-node/ led_act; -}; +/delete-node/ &led_act; &pm { /delete-property/ system-power-controller; -- cgit From c8820c92caf0770bec976b01fa9e82bb993c5865 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Tue, 28 Nov 2023 20:49:35 +0100 Subject: platform/surface: aggregator: fix recv_buf() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Serdev recv_buf() callback is supposed to return the amount of bytes consumed, therefore an int in between 0 and count. Do not return negative number in case of issue, when ssam_controller_receive_buf() returns ESHUTDOWN just returns 0, e.g. no bytes consumed, this keep the exact same behavior as it was before. This fixes a potential WARN in serdev-ttyport.c:ttyport_receive_buf(). Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") Cc: stable@vger.kernel.org Signed-off-by: Francesco Dolcini Reviewed-by: Maximilian Luz Link: https://lore.kernel.org/r/20231128194935.11350-1-francesco@dolcini.it Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/aggregator/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/platform/surface/aggregator/core.c b/drivers/platform/surface/aggregator/core.c index 1a6373dea109..6152be38398c 100644 --- a/drivers/platform/surface/aggregator/core.c +++ b/drivers/platform/surface/aggregator/core.c @@ -231,9 +231,12 @@ static int ssam_receive_buf(struct serdev_device *dev, const unsigned char *buf, size_t n) { struct ssam_controller *ctrl; + int ret; ctrl = serdev_device_get_drvdata(dev); - return ssam_controller_receive_buf(ctrl, buf, n); + ret = ssam_controller_receive_buf(ctrl, buf, n); + + return ret < 0 ? 0 : ret; } static void ssam_write_wakeup(struct serdev_device *dev) -- cgit From 61b94d54421a1f3670ddd5396ec70afe833e9405 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 6 Jul 2023 11:58:41 +0200 Subject: arm64: dts: mediatek: mt8195: Fix PM suspend/resume with venc clocks Before suspending the LARBs we're making sure that any operation is done: this never happens because we are unexpectedly unclocking the LARB20 before executing the suspend handler for the MediaTek Smart Multimedia Interface (SMI) and the cause of this is incorrect clocks on this LARB. Fix this issue by changing the Local Arbiter 20 (used by the video encoder secondary core) apb clock to CLK_VENC_CORE1_VENC; furthermore, in order to make sure that both the PM resume and video encoder operation is stable, add the CLK_VENC(_CORE1)_LARB clock to the VENC (main core) and VENC_CORE1 power domains, as this IP cannot communicate with the rest of the system (the AP) without local arbiter clocks being operational. Cc: stable@vger.kernel.org Fixes: 3b5838d1d82e ("arm64: dts: mt8195: Add iommu and smi nodes") Fixes: 2b515194bf0c ("arm64: dts: mt8195: Add power domains controller") Reviewed-by: Alexandre Mergnat Link: https://lore.kernel.org/r/20230706095841.109315-1-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 54c674c45b49..e0ac2e9f5b72 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -627,6 +627,8 @@ power-domain@MT8195_POWER_DOMAIN_VENC_CORE1 { reg = ; + clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>; + clock-names = "venc1-larb"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -689,6 +691,8 @@ power-domain@MT8195_POWER_DOMAIN_VENC { reg = ; + clocks = <&vencsys CLK_VENC_LARB>; + clock-names = "venc0-larb"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -2665,7 +2669,7 @@ reg = <0 0x1b010000 0 0x1000>; mediatek,larb-id = <20>; mediatek,smi = <&smi_common_vpp>; - clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>, + clocks = <&vencsys_core1 CLK_VENC_CORE1_VENC>, <&vencsys_core1 CLK_VENC_CORE1_GALS>, <&vppsys0 CLK_VPP0_GALS_VDO0_VDO1_VENCSYS_CORE1>; clock-names = "apb", "smi", "gals"; -- cgit From 19cba9a6c071db57888dc6b2ec1d9bf8996ea681 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 11:38:13 +0200 Subject: arm64: dts: mediatek: mt8183: Fix unit address for scp reserved memory The reserved memory for scp had node name "scp_mem_region" and also without unit-address: change the name to "memory@(address)". This fixes a unit_address_vs_reg warning. Cc: stable@vger.kernel.org Fixes: 1652dbf7363a ("arm64: dts: mt8183: add scp node") Link: https://lore.kernel.org/r/20231025093816.44327-6-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8183-evb.dts | 2 +- arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts index ce336a48c897..7adf473f15dc 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts @@ -31,7 +31,7 @@ #address-cells = <2>; #size-cells = <2>; ranges; - scp_mem_reserved: scp_mem_region { + scp_mem_reserved: memory@50000000 { compatible = "shared-dma-pool"; reg = <0 0x50000000 0 0x2900000>; no-map; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index bf7de35ffcbc..6e421e941e3a 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -116,7 +116,7 @@ #size-cells = <2>; ranges; - scp_mem_reserved: scp_mem_region { + scp_mem_reserved: memory@50000000 { compatible = "shared-dma-pool"; reg = <0 0x50000000 0 0x2900000>; no-map; -- cgit From 9dea1c724fc36643e83216c1f5a26613412150db Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 11:38:14 +0200 Subject: arm64: dts: mediatek: mt8183-evb: Fix unit_address_vs_reg warning on ntc The NTC is defined as ntc@0 but it doesn't need any address at all. Fix the unit_address_vs_reg warning by dropping the unit address: since the node name has to be generic also fully rename it from ntc@0 to thermal-sensor. Cc: stable@vger.kernel.org Fixes: ff9ea5c62279 ("arm64: dts: mediatek: mt8183-evb: Add node for thermistor") Link: https://lore.kernel.org/r/20231025093816.44327-7-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8183-evb.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts index 7adf473f15dc..77f9ab94c00b 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts @@ -38,7 +38,7 @@ }; }; - ntc@0 { + thermal-sensor { compatible = "murata,ncp03wf104"; pullup-uv = <1800000>; pullup-ohm = <390000>; -- cgit From 24165c5dad7ba7c7624d05575a5e0cc851396c71 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 11:38:15 +0200 Subject: arm64: dts: mediatek: mt8173-evb: Fix regulator-fixed node names Fix a unit_address_vs_reg warning for the USB VBUS fixed regulators by renaming the regulator nodes from regulator@{0,1} to regulator-usb-p0 and regulator-usb-p1. Cc: stable@vger.kernel.org Fixes: c0891284a74a ("arm64: dts: mediatek: add USB3 DRD driver") Link: https://lore.kernel.org/r/20231025093816.44327-8-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8173-evb.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts index 5122963d8743..d258c80213b2 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts @@ -44,7 +44,7 @@ id-gpio = <&pio 16 GPIO_ACTIVE_HIGH>; }; - usb_p1_vbus: regulator@0 { + usb_p1_vbus: regulator-usb-p1 { compatible = "regulator-fixed"; regulator-name = "usb_vbus"; regulator-min-microvolt = <5000000>; @@ -53,7 +53,7 @@ enable-active-high; }; - usb_p0_vbus: regulator@1 { + usb_p0_vbus: regulator-usb-p0 { compatible = "regulator-fixed"; regulator-name = "vbus"; regulator-min-microvolt = <5000000>; -- cgit From 5a60d63439694590cd5ab1f998fc917ff7ba1c1d Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 11:38:16 +0200 Subject: arm64: dts: mediatek: mt8183: Move thermal-zones to the root node The thermal zones are not a soc bus device: move it to the root node to solve simple_bus_reg warnings. Cc: stable@vger.kernel.org Fixes: b325ce39785b ("arm64: dts: mt8183: add thermal zone node") Link: https://lore.kernel.org/r/20231025093816.44327-9-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8183.dtsi | 242 +++++++++++++++---------------- 1 file changed, 121 insertions(+), 121 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index 5169779d01df..976dc968b3ca 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -1210,127 +1210,6 @@ nvmem-cell-names = "calibration-data"; }; - thermal_zones: thermal-zones { - cpu_thermal: cpu-thermal { - polling-delay-passive = <100>; - polling-delay = <500>; - thermal-sensors = <&thermal 0>; - sustainable-power = <5000>; - - trips { - threshold: trip-point0 { - temperature = <68000>; - hysteresis = <2000>; - type = "passive"; - }; - - target: trip-point1 { - temperature = <80000>; - hysteresis = <2000>; - type = "passive"; - }; - - cpu_crit: cpu-crit { - temperature = <115000>; - hysteresis = <2000>; - type = "critical"; - }; - }; - - cooling-maps { - map0 { - trip = <&target>; - cooling-device = <&cpu0 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu1 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu2 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu3 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>; - contribution = <3072>; - }; - map1 { - trip = <&target>; - cooling-device = <&cpu4 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu5 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu6 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu7 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>; - contribution = <1024>; - }; - }; - }; - - /* The tzts1 ~ tzts6 don't need to polling */ - /* The tzts1 ~ tzts6 don't need to thermal throttle */ - - tzts1: tzts1 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 1>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts2: tzts2 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 2>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts3: tzts3 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 3>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts4: tzts4 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 4>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts5: tzts5 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 5>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tztsABB: tztsABB { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 6>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - }; - pwm0: pwm@1100e000 { compatible = "mediatek,mt8183-disp-pwm"; reg = <0 0x1100e000 0 0x1000>; @@ -2105,4 +1984,125 @@ power-domains = <&spm MT8183_POWER_DOMAIN_CAM>; }; }; + + thermal_zones: thermal-zones { + cpu_thermal: cpu-thermal { + polling-delay-passive = <100>; + polling-delay = <500>; + thermal-sensors = <&thermal 0>; + sustainable-power = <5000>; + + trips { + threshold: trip-point0 { + temperature = <68000>; + hysteresis = <2000>; + type = "passive"; + }; + + target: trip-point1 { + temperature = <80000>; + hysteresis = <2000>; + type = "passive"; + }; + + cpu_crit: cpu-crit { + temperature = <115000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&target>; + cooling-device = <&cpu0 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu1 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu2 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu3 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + contribution = <3072>; + }; + map1 { + trip = <&target>; + cooling-device = <&cpu4 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu5 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu6 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu7 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + contribution = <1024>; + }; + }; + }; + + /* The tzts1 ~ tzts6 don't need to polling */ + /* The tzts1 ~ tzts6 don't need to thermal throttle */ + + tzts1: tzts1 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 1>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts2: tzts2 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 2>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts3: tzts3 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 3>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts4: tzts4 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 4>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts5: tzts5 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 5>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tztsABB: tztsABB { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 6>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + }; }; -- cgit From 8980c30141d3986beab815d85762b9c67196ed72 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Thu, 26 Oct 2023 12:09:10 -0700 Subject: arm64: dts: mt8183: kukui: Fix underscores in node names Replace underscores with hyphens in pinctrl node names both for consistency and to adhere to the bindings. Cc: stable@vger.kernel.org Fixes: cd894e274b74 ("arm64: dts: mt8183: Add krane-sku176 board") Fixes: 1652dbf7363a ("arm64: dts: mt8183: add scp node") Fixes: 27eaf34df364 ("arm64: dts: mt8183: config dsi node") Signed-off-by: Hsin-Yi Wang Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231026191343.3345279-2-hsinyi@chromium.org Signed-off-by: AngeloGioacchino Del Regno --- .../boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi | 6 +- arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 94 +++++++++++----------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi index bf97b60ae4d1..06fde1a9aab7 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi @@ -441,20 +441,20 @@ }; touchscreen_pins: touchscreen-pins { - touch_int_odl { + touch-int-odl { pinmux = ; input-enable; bias-pull-up; }; - touch_rst_l { + touch-rst-l { pinmux = ; output-high; }; }; trackpad_pins: trackpad-pins { - trackpad_int { + trackpad-int { pinmux = ; input-enable; bias-disable; /* pulled externally */ diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index 6e421e941e3a..7881a27be029 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -460,7 +460,7 @@ &pio { aud_pins_default: audiopins { - pins_bus { + pins-bus { pinmux = , , , @@ -482,7 +482,7 @@ }; aud_pins_tdm_out_on: audiotdmouton { - pins_bus { + pins-bus { pinmux = , , , @@ -494,7 +494,7 @@ }; aud_pins_tdm_out_off: audiotdmoutoff { - pins_bus { + pins-bus { pinmux = , , , @@ -508,13 +508,13 @@ }; bt_pins: bt-pins { - pins_bt_en { + pins-bt-en { pinmux = ; output-low; }; }; - ec_ap_int_odl: ec_ap_int_odl { + ec_ap_int_odl: ec-ap-int-odl { pins1 { pinmux = ; input-enable; @@ -522,7 +522,7 @@ }; }; - h1_int_od_l: h1_int_od_l { + h1_int_od_l: h1-int-od-l { pins1 { pinmux = ; input-enable; @@ -530,7 +530,7 @@ }; i2c0_pins: i2c0 { - pins_bus { + pins-bus { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -539,7 +539,7 @@ }; i2c1_pins: i2c1 { - pins_bus { + pins-bus { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -548,7 +548,7 @@ }; i2c2_pins: i2c2 { - pins_bus { + pins-bus { pinmux = , ; bias-disable; @@ -557,7 +557,7 @@ }; i2c3_pins: i2c3 { - pins_bus { + pins-bus { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -566,7 +566,7 @@ }; i2c4_pins: i2c4 { - pins_bus { + pins-bus { pinmux = , ; bias-disable; @@ -575,7 +575,7 @@ }; i2c5_pins: i2c5 { - pins_bus { + pins-bus { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -584,7 +584,7 @@ }; i2c6_pins: i2c6 { - pins_bus { + pins-bus { pinmux = , ; bias-disable; @@ -592,7 +592,7 @@ }; mmc0_pins_default: mmc0-pins-default { - pins_cmd_dat { + pins-cmd-dat { pinmux = , , , @@ -607,13 +607,13 @@ mediatek,pull-up-adv = <01>; }; - pins_clk { + pins-clk { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <10>; }; - pins_rst { + pins-rst { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <01>; @@ -621,7 +621,7 @@ }; mmc0_pins_uhs: mmc0-pins-uhs { - pins_cmd_dat { + pins-cmd-dat { pinmux = , , , @@ -636,19 +636,19 @@ mediatek,pull-up-adv = <01>; }; - pins_clk { + pins-clk { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <10>; }; - pins_ds { + pins-ds { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <10>; }; - pins_rst { + pins-rst { pinmux = ; drive-strength = ; mediatek,pull-up-adv = <01>; @@ -656,7 +656,7 @@ }; mmc1_pins_default: mmc1-pins-default { - pins_cmd_dat { + pins-cmd-dat { pinmux = , , , @@ -666,7 +666,7 @@ mediatek,pull-up-adv = <10>; }; - pins_clk { + pins-clk { pinmux = ; input-enable; mediatek,pull-down-adv = <10>; @@ -674,7 +674,7 @@ }; mmc1_pins_uhs: mmc1-pins-uhs { - pins_cmd_dat { + pins-cmd-dat { pinmux = , , , @@ -685,7 +685,7 @@ mediatek,pull-up-adv = <10>; }; - pins_clk { + pins-clk { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <10>; @@ -693,15 +693,15 @@ }; }; - panel_pins_default: panel_pins_default { - panel_reset { + panel_pins_default: panel-pins-default { + panel-reset { pinmux = ; output-low; bias-pull-up; }; }; - pwm0_pin_default: pwm0_pin_default { + pwm0_pin_default: pwm0-pin-default { pins1 { pinmux = ; output-high; @@ -713,14 +713,14 @@ }; scp_pins: scp { - pins_scp_uart { + pins-scp-uart { pinmux = , ; }; }; spi0_pins: spi0 { - pins_spi { + pins-spi { pinmux = , , , @@ -730,7 +730,7 @@ }; spi1_pins: spi1 { - pins_spi { + pins-spi { pinmux = , , , @@ -740,20 +740,20 @@ }; spi2_pins: spi2 { - pins_spi { + pins-spi { pinmux = , , ; bias-disable; }; - pins_spi_mi { + pins-spi-mi { pinmux = ; mediatek,pull-down-adv = <00>; }; }; spi3_pins: spi3 { - pins_spi { + pins-spi { pinmux = , , , @@ -763,7 +763,7 @@ }; spi4_pins: spi4 { - pins_spi { + pins-spi { pinmux = , , , @@ -773,7 +773,7 @@ }; spi5_pins: spi5 { - pins_spi { + pins-spi { pinmux = , , , @@ -783,63 +783,63 @@ }; uart0_pins_default: uart0-pins-default { - pins_rx { + pins-rx { pinmux = ; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = ; }; }; uart1_pins_default: uart1-pins-default { - pins_rx { + pins-rx { pinmux = ; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = ; }; - pins_rts { + pins-rts { pinmux = ; output-enable; }; - pins_cts { + pins-cts { pinmux = ; input-enable; }; }; uart1_pins_sleep: uart1-pins-sleep { - pins_rx { + pins-rx { pinmux = ; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = ; }; - pins_rts { + pins-rts { pinmux = ; output-enable; }; - pins_cts { + pins-cts { pinmux = ; input-enable; }; }; wifi_pins_pwrseq: wifi-pins-pwrseq { - pins_wifi_enable { + pins-wifi-enable { pinmux = ; output-low; }; }; wifi_pins_wakeup: wifi-pins-wakeup { - pins_wifi_wakeup { + pins-wifi-wakeup { pinmux = ; input-enable; }; -- cgit From 8dfe51c3f6ef31502fca3e2da8cd250ebbe4b8f2 Mon Sep 17 00:00:00 2001 From: Eric Woudstra Date: Wed, 25 Oct 2023 19:08:28 +0200 Subject: arm64: dts: mt7986: fix emmc hs400 mode without uboot initialization Eric reports errors on emmc with hs400 mode when booting linux on bpi-r3 without uboot [1]. Booting with uboot does not show this because clocks seem to be initialized by uboot. Fix this by adding assigned-clocks and assigned-clock-parents like it's done in uboot [2]. [1] https://forum.banana-pi.org/t/bpi-r3-kernel-fails-setting-emmc-clock-to-416m-depends-on-u-boot/15170 [2] https://github.com/u-boot/u-boot/blob/master/arch/arm/dts/mt7986.dtsi#L287 Cc: stable@vger.kernel.org Fixes: 513b49d19b34 ("arm64: dts: mt7986: add mmc related device nodes") Signed-off-by: Eric Woudstra Signed-off-by: Frank Wunderlich Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231025170832.78727-2-linux@fw-web.de Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index 24eda00e320d..77ddd9e44ed2 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -374,6 +374,10 @@ reg = <0 0x11230000 0 0x1000>, <0 0x11c20000 0 0x1000>; interrupts = ; + assigned-clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>, + <&topckgen CLK_TOP_EMMC_250M_SEL>; + assigned-clock-parents = <&apmixedsys CLK_APMIXED_MPLL>, + <&topckgen CLK_TOP_NET1PLL_D5_D2>; clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>, <&infracfg CLK_INFRA_MSDC_HCK_CK>, <&infracfg CLK_INFRA_MSDC_CK>, -- cgit From 6413cbc17f89b3a160f3a6f3fad1232b1678fe40 Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Wed, 25 Oct 2023 19:08:29 +0200 Subject: arm64: dts: mt7986: define 3W max power to both SFP on BPI-R3 All SFP power supplies are connected to the system VDD33 which is 3v3/8A. Set 3A per SFP slot to allow SFPs work which need more power than the default 1W. Cc: stable@vger.kernel.org Fixes: 8e01fb15b815 ("arm64: dts: mt7986: add Bananapi R3") Signed-off-by: Frank Wunderlich Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231025170832.78727-3-linux@fw-web.de Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts index af4a4309bda4..f9702284607a 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts +++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts @@ -126,6 +126,7 @@ compatible = "sff,sfp"; i2c-bus = <&i2c_sfp1>; los-gpios = <&pio 46 GPIO_ACTIVE_HIGH>; + maximum-power-milliwatt = <3000>; mod-def0-gpios = <&pio 49 GPIO_ACTIVE_LOW>; tx-disable-gpios = <&pio 20 GPIO_ACTIVE_HIGH>; tx-fault-gpios = <&pio 7 GPIO_ACTIVE_HIGH>; @@ -137,6 +138,7 @@ i2c-bus = <&i2c_sfp2>; los-gpios = <&pio 31 GPIO_ACTIVE_HIGH>; mod-def0-gpios = <&pio 47 GPIO_ACTIVE_LOW>; + maximum-power-milliwatt = <3000>; tx-disable-gpios = <&pio 15 GPIO_ACTIVE_HIGH>; tx-fault-gpios = <&pio 48 GPIO_ACTIVE_HIGH>; }; -- cgit From 1fcda8ceb014aafd56f10b33e0077c93b5dd45d1 Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Wed, 25 Oct 2023 19:08:30 +0200 Subject: arm64: dts: mt7986: change cooling trips Add Critical and hot trips for emergency system shutdown and limiting system load. Change passive trip to active to make sure fan is activated on the lowest trip. Cc: stable@vger.kernel.org Fixes: 1f5be05132f3 ("arm64: dts: mt7986: add thermal-zones") Fixes: c26f779a2295 ("arm64: dts: mt7986: add pwm-fan and cooling-maps to BPI-R3 dts") Suggested-by: Daniel Golle Signed-off-by: Frank Wunderlich Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231025170832.78727-4-linux@fw-web.de Signed-off-by: AngeloGioacchino Del Regno --- .../boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts | 10 +++++----- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 20 ++++++++++++++++---- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts index f9702284607a..b876e501216b 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts +++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts @@ -152,16 +152,16 @@ trip = <&cpu_trip_active_high>; }; - cpu-active-low { + cpu-active-med { /* active: set fan to cooling level 1 */ cooling-device = <&fan 1 1>; - trip = <&cpu_trip_active_low>; + trip = <&cpu_trip_active_med>; }; - cpu-passive { - /* passive: set fan to cooling level 0 */ + cpu-active-low { + /* active: set fan to cooling level 0 */ cooling-device = <&fan 0 0>; - trip = <&cpu_trip_passive>; + trip = <&cpu_trip_active_low>; }; }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index 77ddd9e44ed2..fc751e049953 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -614,22 +614,34 @@ thermal-sensors = <&thermal 0>; trips { + cpu_trip_crit: crit { + temperature = <125000>; + hysteresis = <2000>; + type = "critical"; + }; + + cpu_trip_hot: hot { + temperature = <120000>; + hysteresis = <2000>; + type = "hot"; + }; + cpu_trip_active_high: active-high { temperature = <115000>; hysteresis = <2000>; type = "active"; }; - cpu_trip_active_low: active-low { + cpu_trip_active_med: active-med { temperature = <85000>; hysteresis = <2000>; type = "active"; }; - cpu_trip_passive: passive { - temperature = <40000>; + cpu_trip_active_low: active-low { + temperature = <60000>; hysteresis = <2000>; - type = "passive"; + type = "active"; }; }; }; -- cgit From 59fa1e51ba54e1f513985a8177969b62973f7fd5 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 5 Oct 2023 17:11:50 +0200 Subject: arm64: dts: mediatek: mt8186: Change gpu speedbin nvmem cell name MT8186's GPU speedbin value must be interpreted, or the value will not be meaningful. Use the correct "gpu-speedbin" nvmem cell name for the GPU speedbin to allow triggering the cell info fixup handler, hence feeding the right speedbin number to the users. Cc: stable@vger.kernel.org Fixes: 263d2fd02afc ("arm64: dts: mediatek: mt8186: Add GPU speed bin NVMEM cells") Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20231005151150.355536-1-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8186.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8186.dtsi b/arch/arm64/boot/dts/mediatek/mt8186.dtsi index f04ae70c470a..f4c4f61c779d 100644 --- a/arch/arm64/boot/dts/mediatek/mt8186.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi @@ -1656,7 +1656,7 @@ #address-cells = <1>; #size-cells = <1>; - gpu_speedbin: gpu-speed-bin@59c { + gpu_speedbin: gpu-speedbin@59c { reg = <0x59c 0x4>; bits = <0 3>; }; -- cgit From 9adf7580f6d498a5839e02fa1d1535e934364602 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 5 Oct 2023 13:30:41 +0300 Subject: arm64: dts: mediatek: mt8186: fix clock names for power domains Clocks for each power domain are split into big categories: pd clocks and subsys clocks. According to the binding, all clocks which have a dash '-' in their name are treated as subsys clocks, and must be placed at the end of the list. The other clocks which are pd clocks must come first. Fixed the naming and the placing of all clocks in the power domains. For the avoidance of doubt, prefixed all subsys clocks with the 'subsys' prefix. The binding does not enforce strict clock names, the driver uses them in bulk, only making a difference for pd clocks vs subsys clocks. The above problem appears to be trivial, however, it leads to incorrect power up and power down sequence of the power domains, because some clocks will be mistakenly taken for subsys clocks and viceversa. One consequence is the fact that if the DIS power domain goes power down and power back up during the boot process, when it comes back up, there are still transactions left on the bus which makes the display inoperable. Some of the clocks for the DIS power domain were wrongly using '_' instead of '-', which again made these clocks being treated as pd clocks instead of subsys clocks. Cc: stable@vger.kernel.org Fixes: d9e43c1e7a38 ("arm64: dts: mt8186: Add power domains controller") Signed-off-by: Eugen Hristev Tested-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Tested-by: AngeloGioacchino Del Regno Reviewed-by: Alexandre Mergnat Link: https://lore.kernel.org/r/20231005103041.352478-1-eugen.hristev@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8186.dtsi | 42 ++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8186.dtsi b/arch/arm64/boot/dts/mediatek/mt8186.dtsi index f4c4f61c779d..df0c04f2ba1d 100644 --- a/arch/arm64/boot/dts/mediatek/mt8186.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi @@ -924,7 +924,8 @@ reg = ; clocks = <&topckgen CLK_TOP_SENINF>, <&topckgen CLK_TOP_SENINF1>; - clock-names = "csirx_top0", "csirx_top1"; + clock-names = "subsys-csirx-top0", + "subsys-csirx-top1"; #power-domain-cells = <0>; }; @@ -942,7 +943,8 @@ reg = ; clocks = <&topckgen CLK_TOP_AUDIODSP>, <&topckgen CLK_TOP_ADSP_BUS>; - clock-names = "audioadsp", "adsp_bus"; + clock-names = "audioadsp", + "subsys-adsp-bus"; #address-cells = <1>; #size-cells = <0>; #power-domain-cells = <1>; @@ -975,8 +977,11 @@ <&mmsys CLK_MM_SMI_COMMON>, <&mmsys CLK_MM_SMI_GALS>, <&mmsys CLK_MM_SMI_IOMMU>; - clock-names = "disp", "mdp", "smi_infra", "smi_common", - "smi_gals", "smi_iommu"; + clock-names = "disp", "mdp", + "subsys-smi-infra", + "subsys-smi-common", + "subsys-smi-gals", + "subsys-smi-iommu"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -993,15 +998,17 @@ power-domain@MT8186_POWER_DOMAIN_CAM { reg = ; - clocks = <&topckgen CLK_TOP_CAM>, - <&topckgen CLK_TOP_SENINF>, + clocks = <&topckgen CLK_TOP_SENINF>, <&topckgen CLK_TOP_SENINF1>, <&topckgen CLK_TOP_SENINF2>, <&topckgen CLK_TOP_SENINF3>, + <&camsys CLK_CAM2MM_GALS>, <&topckgen CLK_TOP_CAMTM>, - <&camsys CLK_CAM2MM_GALS>; - clock-names = "cam-top", "cam0", "cam1", "cam2", - "cam3", "cam-tm", "gals"; + <&topckgen CLK_TOP_CAM>; + clock-names = "cam0", "cam1", "cam2", + "cam3", "gals", + "subsys-cam-tm", + "subsys-cam-top"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -1020,9 +1027,9 @@ power-domain@MT8186_POWER_DOMAIN_IMG { reg = ; - clocks = <&topckgen CLK_TOP_IMG1>, - <&imgsys1 CLK_IMG1_GALS_IMG1>; - clock-names = "img-top", "gals"; + clocks = <&imgsys1 CLK_IMG1_GALS_IMG1>, + <&topckgen CLK_TOP_IMG1>; + clock-names = "gals", "subsys-img-top"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -1041,8 +1048,11 @@ <&ipesys CLK_IPE_LARB20>, <&ipesys CLK_IPE_SMI_SUBCOM>, <&ipesys CLK_IPE_GALS_IPE>; - clock-names = "ipe-top", "ipe-larb0", "ipe-larb1", - "ipe-smi", "ipe-gals"; + clock-names = "subsys-ipe-top", + "subsys-ipe-larb0", + "subsys-ipe-larb1", + "subsys-ipe-smi", + "subsys-ipe-gals"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -1061,7 +1071,9 @@ clocks = <&topckgen CLK_TOP_WPE>, <&wpesys CLK_WPE_SMI_LARB8_CK_EN>, <&wpesys CLK_WPE_SMI_LARB8_PCLK_EN>; - clock-names = "wpe0", "larb-ck", "larb-pclk"; + clock-names = "wpe0", + "subsys-larb-ck", + "subsys-larb-pclk"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; -- cgit From 8e6ecbfd44b5542a7598c1c5fc9c6dcb5d367f2a Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 14 Aug 2023 09:50:42 +0300 Subject: arm64: dts: mediatek: mt7622: fix memory node warning check dtbs_check throws a warning at the memory node: Warning (unit_address_vs_reg): /memory: node has a reg or ranges property, but no unit name fix by adding the address into the node name. Cc: stable@vger.kernel.org Fixes: 0b6286dd96c0 ("arm64: dts: mt7622: add bananapi BPI-R64 board") Signed-off-by: Eugen Hristev Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230814065042.4973-1-eugen.hristev@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts | 2 +- arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts index 3b7a176b7904..c46682150e50 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts @@ -73,7 +73,7 @@ }; }; - memory { + memory@40000000 { reg = <0 0x40000000 0 0x40000000>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts index a885a3fbe456..2dc1bdc74e21 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts @@ -55,7 +55,7 @@ }; }; - memory { + memory@40000000 { reg = <0 0x40000000 0 0x20000000>; }; -- cgit From 74543b303a9abfe4fa253d1fa215281baa05ff3a Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 14 Aug 2023 10:10:53 +0300 Subject: arm64: dts: mediatek: mt8183-kukui-jacuzzi: fix dsi unnecessary cells properties dtbs_check throws a warning at the dsi node: Warning (avoid_unnecessary_addr_size): /soc/dsi@14014000: unnecessary #address-cells/#size-cells without "ranges" or child "reg" property Other DTS have a panel child node with a reg, so the parent dtsi must have the address-cells and size-cells, however this specific DT has the panel removed, but not the cells, hence the warning above. If panel is deleted then the cells must also be deleted since they are tied together, as the child node in this DT does not have a reg. Cc: stable@vger.kernel.org Fixes: cabc71b08eb5 ("arm64: dts: mt8183: Add kukui-jacuzzi-damu board") Signed-off-by: Eugen Hristev Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230814071053.5459-1-eugen.hristev@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi index 06fde1a9aab7..820260348de9 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi @@ -91,6 +91,8 @@ &dsi0 { status = "okay"; + /delete-property/#size-cells; + /delete-property/#address-cells; /delete-node/panel@0; ports { port { -- cgit From 5943b8f7449df9881b273db07bdde1e7120dccf0 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 27 Nov 2023 14:20:26 +0100 Subject: arm64: dts: mediatek: cherry: Fix interrupt cells for MT6360 on I2C7 Change interrupt cells to 2 to suppress interrupts_property warning. Cc: stable@vger.kernel.org Fixes: 0de0fe950f1b ("arm64: dts: mediatek: cherry: Enable MT6360 sub-pmic on I2C7") Link: https://lore.kernel.org/r/20231127132026.165027-1-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi index dd5b89b73190..5a7cab489ff3 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi @@ -389,7 +389,7 @@ pinctrl-0 = <&i2c7_pins>; pmic@34 { - #interrupt-cells = <1>; + #interrupt-cells = <2>; compatible = "mediatek,mt6360"; reg = <0x34>; interrupt-controller; -- cgit From 23ab79e8e469e2605beec2e3ccb40d19c68dd2e0 Mon Sep 17 00:00:00 2001 From: Elliot Berman Date: Mon, 20 Nov 2023 09:36:31 -0800 Subject: freezer,sched: Do not restore saved_state of a thawed task It is possible for a task to be thawed multiple times when mixing the *legacy* cgroup freezer and system-wide freezer. To do this, freeze the cgroup, do system-wide freeze/thaw, then thaw the cgroup. When this happens, then a stale saved_state can be written to the task's state and cause task to hang indefinitely. Fix this by only trying to thaw tasks that are actually frozen. This change also has the marginal benefit avoiding unnecessary wake_up_state(p, TASK_FROZEN) if we know the task is already thawed. There is not possibility of time-of-compare/time-of-use race when we skip the wake_up_state because entering/exiting TASK_FROZEN is guarded by freezer_lock. Fixes: 8f0eed4a78a8 ("freezer,sched: Use saved_state to reduce some spurious wakeups") Signed-off-by: Elliot Berman Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Abhijeet Dharmapurikar Link: https://lore.kernel.org/r/20231120-freezer-state-multiple-thaws-v1-1-f2e1dd7ce5a2@quicinc.com --- kernel/freezer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/freezer.c b/kernel/freezer.c index c450fa8b8b5e..759006a9a910 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -201,7 +201,7 @@ void __thaw_task(struct task_struct *p) if (WARN_ON_ONCE(freezing(p))) goto unlock; - if (task_call_func(p, __restore_freezer_state, NULL)) + if (!frozen(p) || task_call_func(p, __restore_freezer_state, NULL)) goto unlock; wake_up_state(p, TASK_FROZEN); -- cgit From 382c27f4ed28f803b1f1473ac2d8db0afc795a1b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 29 Nov 2023 15:24:52 +0100 Subject: perf: Fix perf_event_validate_size() Budimir noted that perf_event_validate_size() only checks the size of the newly added event, even though the sizes of all existing events can also change due to not all events having the same read_format. When we attach the new event, perf_group_attach(), we do re-compute the size for all events. Fixes: a723968c0ed3 ("perf: Fix u16 overflows") Reported-by: Budimir Markovic Signed-off-by: Peter Zijlstra (Intel) --- kernel/events/core.c | 61 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index b704d83a28b2..c9d123e13b57 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1814,31 +1814,34 @@ static inline void perf_event__state_init(struct perf_event *event) PERF_EVENT_STATE_INACTIVE; } -static void __perf_event_read_size(struct perf_event *event, int nr_siblings) +static int __perf_event_read_size(u64 read_format, int nr_siblings) { int entry = sizeof(u64); /* value */ int size = 0; int nr = 1; - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) size += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) size += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_ID) + if (read_format & PERF_FORMAT_ID) entry += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_LOST) + if (read_format & PERF_FORMAT_LOST) entry += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_GROUP) { + if (read_format & PERF_FORMAT_GROUP) { nr += nr_siblings; size += sizeof(u64); } - size += entry * nr; - event->read_size = size; + /* + * Since perf_event_validate_size() limits this to 16k and inhibits + * adding more siblings, this will never overflow. + */ + return size + nr * entry; } static void __perf_event_header_size(struct perf_event *event, u64 sample_type) @@ -1888,8 +1891,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) */ static void perf_event__header_size(struct perf_event *event) { - __perf_event_read_size(event, - event->group_leader->nr_siblings); + event->read_size = + __perf_event_read_size(event->attr.read_format, + event->group_leader->nr_siblings); __perf_event_header_size(event, event->attr.sample_type); } @@ -1920,24 +1924,35 @@ static void perf_event__id_header_size(struct perf_event *event) event->id_header_size = size; } +/* + * Check that adding an event to the group does not result in anybody + * overflowing the 64k event limit imposed by the output buffer. + * + * Specifically, check that the read_size for the event does not exceed 16k, + * read_size being the one term that grows with groups size. Since read_size + * depends on per-event read_format, also (re)check the existing events. + * + * This leaves 48k for the constant size fields and things like callchains, + * branch stacks and register sets. + */ static bool perf_event_validate_size(struct perf_event *event) { - /* - * The values computed here will be over-written when we actually - * attach the event. - */ - __perf_event_read_size(event, event->group_leader->nr_siblings + 1); - __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ); - perf_event__id_header_size(event); + struct perf_event *sibling, *group_leader = event->group_leader; - /* - * Sum the lot; should not exceed the 64k limit we have on records. - * Conservative limit to allow for callchains and other variable fields. - */ - if (event->read_size + event->header_size + - event->id_header_size + sizeof(struct perf_event_header) >= 16*1024) + if (__perf_event_read_size(event->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) return false; + if (__perf_event_read_size(group_leader->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) + return false; + + for_each_sibling_event(sibling, group_leader) { + if (__perf_event_read_size(sibling->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) + return false; + } + return true; } -- cgit From 547c91929f437b42e6a9c90fec1fb5aec3e64aac Mon Sep 17 00:00:00 2001 From: Like Xu Date: Thu, 23 Nov 2023 15:58:18 +0800 Subject: KVM: x86: Get CPL directly when checking if loaded vCPU is in kernel mode When querying whether or not a vCPU "is" running in kernel mode, directly get the CPL if the vCPU is the currently loaded vCPU. In scenarios where a guest is profiled via perf-kvm, querying vcpu->arch.preempted_in_kernel from kvm_guest_state() is wrong if vCPU is actively running, i.e. isn't scheduled out due to being preempted and so preempted_in_kernel is stale. This affects perf/core's ability to accurately tag guest RIP with PERF_RECORD_MISC_GUEST_{KERNEL|USER} and record it in the sample. This causes perf/tool to fail to connect the vCPU RIPs to the guest kernel space symbols when parsing these samples due to incorrect PERF_RECORD_MISC flags: Before (perf-report of a cpu-cycles sample): 1.23% :58945 [unknown] [u] 0xffffffff818012e0 After: 1.35% :60703 [kernel.vmlinux] [g] asm_exc_page_fault Note, checking preempted_in_kernel in kvm_arch_vcpu_in_kernel() is awful as nothing in the API's suggests that it's safe to use if and only if the vCPU was preempted. That can be cleaned up in the future, for now just fix the glaring correctness bug. Note #2, checking vcpu->preempted is NOT safe, as getting the CPL on VMX requires VMREAD, i.e. is correct if and only if the vCPU is loaded. If the target vCPU *was* preempted, then it can be scheduled back in after the check on vcpu->preempted in kvm_vcpu_on_spin(), i.e. KVM could end up trying to do VMREAD on a VMCS that isn't loaded on the current pCPU. Signed-off-by: Like Xu Fixes: e1bfc24577cc ("KVM: Move x86's perf guest info callbacks to generic KVM") Link: https://lore.kernel.org/r/20231123075818.12521-1-likexu@tencent.com [sean: massage changelong, add Fixes] Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 41cce5031126..3b6a0109191b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12910,7 +12910,10 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) if (vcpu->arch.guest_state_protected) return true; - return vcpu->arch.preempted_in_kernel; + if (vcpu != kvm_get_running_vcpu()) + return vcpu->arch.preempted_in_kernel; + + return static_call(kvm_x86_get_cpl)(vcpu) == 0; } unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) -- cgit From 88a2b4d34a64bba914c4e245c6de3ca42bea93cf Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 22 Nov 2023 14:28:40 -0600 Subject: nouveau/gsp: document some aspects of GSP-RM Document a few aspects of communication with GSP-RM. These comments are derived from notes made during early development of GSP-RM support in Nouveau, but were not included in the initial patch set. Reviewed-by: Dave Airlie Signed-off-by: Timur Tabi Reviewed-by: Danilo Krummrich Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231122202840.2565153-1-ttabi@nvidia.com --- .../common/shared/msgq/inc/msgq/msgq_priv.h | 51 ++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 82 ++++++++++++++++++++++ 2 files changed, 133 insertions(+) diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h index 5a2f273d95c8..0e32e71e123f 100644 --- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h +++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h @@ -26,6 +26,49 @@ * DEALINGS IN THE SOFTWARE. */ +/** + * msgqTxHeader -- TX queue data structure + * @version: the version of this structure, must be 0 + * @size: the size of the entire queue, including this header + * @msgSize: the padded size of queue element, 16 is minimum + * @msgCount: the number of elements in this queue + * @writePtr: head index of this queue + * @flags: 1 = swap the RX pointers + * @rxHdrOff: offset of readPtr in this structure + * @entryOff: offset of beginning of queue (msgqRxHeader), relative to + * beginning of this structure + * + * The command queue is a queue of RPCs that are sent from the driver to the + * GSP. The status queue is a queue of messages/responses from GSP-RM to the + * driver. Although the driver allocates memory for both queues, the command + * queue is owned by the driver and the status queue is owned by GSP-RM. In + * addition, the headers of the two queues must not share the same 4K page. + * + * Each queue is prefixed with this data structure. The idea is that a queue + * and its header are written to only by their owner. That is, only the + * driver writes to the command queue and command queue header, and only the + * GSP writes to the status (receive) queue and its header. + * + * This is enforced by the concept of "swapping" the RX pointers. This is + * why the 'flags' field must be set to 1. 'rxHdrOff' is how the GSP knows + * where the where the tail pointer of its status queue. + * + * When the driver writes a new RPC to the command queue, it updates writePtr. + * When it reads a new message from the status queue, it updates readPtr. In + * this way, the GSP knows when a new command is in the queue (it polls + * writePtr) and it knows how much free space is in the status queue (it + * checks readPtr). The driver never cares about how much free space is in + * the status queue. + * + * As usual, producers write to the head pointer, and consumers read from the + * tail pointer. When head == tail, the queue is empty. + * + * So to summarize: + * command.writePtr = head of command queue + * command.readPtr = tail of status queue + * status.writePtr = head of status queue + * status.readPtr = tail of command queue + */ typedef struct { NvU32 version; // queue version @@ -38,6 +81,14 @@ typedef struct NvU32 entryOff; // Offset of entries from start of backing store. } msgqTxHeader; +/** + * msgqRxHeader - RX queue data structure + * @readPtr: tail index of the other queue + * + * Although this is a separate struct, it could easily be merged into + * msgqTxHeader. msgqTxHeader.rxHdrOff is simply the offset of readPtr + * from the beginning of msgqTxHeader. + */ typedef struct { NvU32 readPtr; // message id of last message read diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index f6725a5f5bfb..44fb86841c05 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1377,6 +1377,13 @@ r535_gsp_msg_post_event(void *priv, u32 fn, void *repv, u32 repc) return 0; } +/** + * r535_gsp_msg_run_cpu_sequencer() -- process I/O commands from the GSP + * + * The GSP sequencer is a list of I/O commands that the GSP can send to + * the driver to perform for various purposes. The most common usage is to + * perform a special mid-initialization reset. + */ static int r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void *repv, u32 repc) { @@ -1716,6 +1723,23 @@ r535_gsp_libos_id8(const char *name) return id; } +/** + * create_pte_array() - creates a PTE array of a physically contiguous buffer + * @ptes: pointer to the array + * @addr: base address of physically contiguous buffer (GSP_PAGE_SIZE aligned) + * @size: size of the buffer + * + * GSP-RM sometimes expects physically-contiguous buffers to have an array of + * "PTEs" for each page in that buffer. Although in theory that allows for + * the buffer to be physically discontiguous, GSP-RM does not currently + * support that. + * + * In this case, the PTEs are DMA addresses of each page of the buffer. Since + * the buffer is physically contiguous, calculating all the PTEs is simple + * math. + * + * See memdescGetPhysAddrsForGpu() + */ static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size) { unsigned int num_pages = DIV_ROUND_UP_ULL(size, GSP_PAGE_SIZE); @@ -1725,6 +1749,35 @@ static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size) ptes[i] = (u64)addr + (i << GSP_PAGE_SHIFT); } +/** + * r535_gsp_libos_init() -- create the libos arguments structure + * + * The logging buffers are byte queues that contain encoded printf-like + * messages from GSP-RM. They need to be decoded by a special application + * that can parse the buffers. + * + * The 'loginit' buffer contains logs from early GSP-RM init and + * exception dumps. The 'logrm' buffer contains the subsequent logs. Both are + * written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE. + * + * The physical address map for the log buffer is stored in the buffer + * itself, starting with offset 1. Offset 0 contains the "put" pointer. + * + * The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is + * configured for a larger page size (e.g. 64K pages), we need to give + * the GSP an array of 4K pages. Fortunately, since the buffer is + * physically contiguous, it's simple math to calculate the addresses. + * + * The buffers must be a multiple of GSP_PAGE_SIZE. GSP-RM also currently + * ignores the @kind field for LOGINIT, LOGINTR, and LOGRM, but expects the + * buffers to be physically contiguous anyway. + * + * The memory allocated for the arguments must remain until the GSP sends the + * init_done RPC. + * + * See _kgspInitLibosLoggingStructures (allocates memory for buffers) + * See kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array) + */ static int r535_gsp_libos_init(struct nvkm_gsp *gsp) { @@ -1835,6 +1888,35 @@ nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3) nvkm_gsp_mem_dtor(gsp, &rx3->mem[i]); } +/** + * nvkm_gsp_radix3_sg - build a radix3 table from a S/G list + * + * The GSP uses a three-level page table, called radix3, to map the firmware. + * Each 64-bit "pointer" in the table is either the bus address of an entry in + * the next table (for levels 0 and 1) or the bus address of the next page in + * the GSP firmware image itself. + * + * Level 0 contains a single entry in one page that points to the first page + * of level 1. + * + * Level 1, since it's also only one page in size, contains up to 512 entries, + * one for each page in Level 2. + * + * Level 2 can be up to 512 pages in size, and each of those entries points to + * the next page of the firmware image. Since there can be up to 512*512 + * pages, that limits the size of the firmware to 512*512*GSP_PAGE_SIZE = 1GB. + * + * Internally, the GSP has its window into system memory, but the base + * physical address of the aperture is not 0. In fact, it varies depending on + * the GPU architecture. Since the GPU is a PCI device, this window is + * accessed via DMA and is therefore bound by IOMMU translation. The end + * result is that GSP-RM must translate the bus addresses in the table to GSP + * physical addresses. All this should happen transparently. + * + * Returns 0 on success, or negative error code + * + * See kgspCreateRadix3_IMPL + */ static int nvkm_gsp_radix3_sg(struct nvkm_device *device, struct sg_table *sgt, u64 size, struct nvkm_gsp_radix3 *rx3) -- cgit From bd7a282650b8beb57bc9d19bfcb714b1ccae843a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 12 Nov 2023 14:50:13 -0400 Subject: iommufd: Add iommufd_ctx to iommufd_put_object() Will be used in the next patch. Link: https://lore.kernel.org/r/1-v2-ca9e00171c5b+123-iommufd_syz4_jgg@nvidia.com/ Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 14 +++++++------- drivers/iommu/iommufd/hw_pagetable.c | 8 ++++---- drivers/iommu/iommufd/ioas.c | 14 +++++++------- drivers/iommu/iommufd/iommufd_private.h | 3 ++- drivers/iommu/iommufd/selftest.c | 14 +++++++------- drivers/iommu/iommufd/vfio_compat.c | 18 +++++++++--------- 6 files changed, 36 insertions(+), 35 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 59d3a07300d9..873630c111c1 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -571,7 +571,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, continue; destroy_hwpt = (*do_attach)(idev, hwpt); if (IS_ERR(destroy_hwpt)) { - iommufd_put_object(&hwpt->obj); + iommufd_put_object(idev->ictx, &hwpt->obj); /* * -EINVAL means the domain is incompatible with the * device. Other error codes should propagate to @@ -583,7 +583,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, goto out_unlock; } *pt_id = hwpt->obj.id; - iommufd_put_object(&hwpt->obj); + iommufd_put_object(idev->ictx, &hwpt->obj); goto out_unlock; } @@ -652,7 +652,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, destroy_hwpt = ERR_PTR(-EINVAL); goto out_put_pt_obj; } - iommufd_put_object(pt_obj); + iommufd_put_object(idev->ictx, pt_obj); /* This destruction has to be after we unlock everything */ if (destroy_hwpt) @@ -660,7 +660,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, return 0; out_put_pt_obj: - iommufd_put_object(pt_obj); + iommufd_put_object(idev->ictx, pt_obj); return PTR_ERR(destroy_hwpt); } @@ -792,7 +792,7 @@ static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id) if (IS_ERR(ioas)) return PTR_ERR(ioas); rc = iommufd_access_change_ioas(access, ioas); - iommufd_put_object(&ioas->obj); + iommufd_put_object(access->ictx, &ioas->obj); return rc; } @@ -941,7 +941,7 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, access->ops->unmap(access->data, iova, length); - iommufd_put_object(&access->obj); + iommufd_put_object(access->ictx, &access->obj); xa_lock(&ioas->iopt.access_list); } xa_unlock(&ioas->iopt.access_list); @@ -1243,6 +1243,6 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) out_free: kfree(data); out_put: - iommufd_put_object(&idev->obj); + iommufd_put_object(ucmd->ictx, &idev->obj); return rc; } diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 2abbeafdbd22..cbb5df0a6c32 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -318,9 +318,9 @@ out_unlock: if (ioas) mutex_unlock(&ioas->mutex); out_put_pt: - iommufd_put_object(pt_obj); + iommufd_put_object(ucmd->ictx, pt_obj); out_put_idev: - iommufd_put_object(&idev->obj); + iommufd_put_object(ucmd->ictx, &idev->obj); return rc; } @@ -345,7 +345,7 @@ int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd) rc = iopt_set_dirty_tracking(&ioas->iopt, hwpt_paging->common.domain, enable); - iommufd_put_object(&hwpt_paging->common.obj); + iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj); return rc; } @@ -368,6 +368,6 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd) rc = iopt_read_and_clear_dirty_data( &ioas->iopt, hwpt_paging->common.domain, cmd->flags, cmd); - iommufd_put_object(&hwpt_paging->common.obj); + iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj); return rc; } diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index d5624577f79f..742248276548 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -105,7 +105,7 @@ int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd) rc = -EMSGSIZE; out_put: up_read(&ioas->iopt.iova_rwsem); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -175,7 +175,7 @@ out_free: interval_tree_remove(node, &allowed_iova); kfree(container_of(node, struct iopt_allowed, node)); } - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -228,7 +228,7 @@ int iommufd_ioas_map(struct iommufd_ucmd *ucmd) cmd->iova = iova; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_put: - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -258,7 +258,7 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd) return PTR_ERR(src_ioas); rc = iopt_get_pages(&src_ioas->iopt, cmd->src_iova, cmd->length, &pages_list); - iommufd_put_object(&src_ioas->obj); + iommufd_put_object(ucmd->ictx, &src_ioas->obj); if (rc) return rc; @@ -279,7 +279,7 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd) cmd->dst_iova = iova; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_put_dst: - iommufd_put_object(&dst_ioas->obj); + iommufd_put_object(ucmd->ictx, &dst_ioas->obj); out_pages: iopt_free_pages_list(&pages_list); return rc; @@ -315,7 +315,7 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd) rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_put: - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -393,6 +393,6 @@ int iommufd_ioas_option(struct iommufd_ucmd *ucmd) rc = -EOPNOTSUPP; } - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index a74cfefffbc6..f918a22f0d48 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -154,7 +154,8 @@ static inline bool iommufd_lock_obj(struct iommufd_object *obj) struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, enum iommufd_object_type type); -static inline void iommufd_put_object(struct iommufd_object *obj) +static inline void iommufd_put_object(struct iommufd_ctx *ictx, + struct iommufd_object *obj) { refcount_dec(&obj->users); up_read(&obj->destroy_rwsem); diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 5d93434003d8..022ef8f55088 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -86,7 +86,7 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, if (IS_ERR(ioas)) return; *iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); } struct mock_iommu_domain { @@ -500,7 +500,7 @@ get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id, return hwpt; if (hwpt->domain->type != IOMMU_DOMAIN_UNMANAGED || hwpt->domain->ops != mock_ops.default_domain_ops) { - iommufd_put_object(&hwpt->obj); + iommufd_put_object(ucmd->ictx, &hwpt->obj); return ERR_PTR(-EINVAL); } *mock = container_of(hwpt->domain, struct mock_iommu_domain, domain); @@ -518,7 +518,7 @@ get_md_pagetable_nested(struct iommufd_ucmd *ucmd, u32 mockpt_id, return hwpt; if (hwpt->domain->type != IOMMU_DOMAIN_NESTED || hwpt->domain->ops != &domain_nested_ops) { - iommufd_put_object(&hwpt->obj); + iommufd_put_object(ucmd->ictx, &hwpt->obj); return ERR_PTR(-EINVAL); } *mock_nested = container_of(hwpt->domain, @@ -681,7 +681,7 @@ static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd, rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_dev_obj: - iommufd_put_object(dev_obj); + iommufd_put_object(ucmd->ictx, dev_obj); return rc; } @@ -699,7 +699,7 @@ static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd, down_write(&ioas->iopt.iova_rwsem); rc = iopt_reserve_iova(&ioas->iopt, start, start + length - 1, NULL); up_write(&ioas->iopt.iova_rwsem); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -754,7 +754,7 @@ static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd, rc = 0; out_put: - iommufd_put_object(&hwpt->obj); + iommufd_put_object(ucmd->ictx, &hwpt->obj); return rc; } @@ -1233,7 +1233,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, out_free: kvfree(tmp); out_put: - iommufd_put_object(&hwpt->obj); + iommufd_put_object(ucmd->ictx, &hwpt->obj); return rc; } diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c index 538fbf76354d..a3ad5f0b6c59 100644 --- a/drivers/iommu/iommufd/vfio_compat.c +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -41,7 +41,7 @@ int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) if (IS_ERR(ioas)) return PTR_ERR(ioas); *out_ioas_id = ioas->obj.id; - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return 0; } EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO); @@ -98,7 +98,7 @@ int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx) if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) { ret = 0; - iommufd_put_object(&ictx->vfio_ioas->obj); + iommufd_put_object(ictx, &ictx->vfio_ioas->obj); goto out_abort; } ictx->vfio_ioas = ioas; @@ -133,7 +133,7 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) if (IS_ERR(ioas)) return PTR_ERR(ioas); cmd->ioas_id = ioas->obj.id; - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return iommufd_ucmd_respond(ucmd, sizeof(*cmd)); case IOMMU_VFIO_IOAS_SET: @@ -143,7 +143,7 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) xa_lock(&ucmd->ictx->objects); ucmd->ictx->vfio_ioas = ioas; xa_unlock(&ucmd->ictx->objects); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return 0; case IOMMU_VFIO_IOAS_CLEAR: @@ -190,7 +190,7 @@ static int iommufd_vfio_map_dma(struct iommufd_ctx *ictx, unsigned int cmd, iova = map.iova; rc = iopt_map_user_pages(ictx, &ioas->iopt, &iova, u64_to_user_ptr(map.vaddr), map.size, iommu_prot, 0); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } @@ -249,7 +249,7 @@ static int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd, rc = -EFAULT; err_put: - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } @@ -272,7 +272,7 @@ static int iommufd_vfio_cc_iommu(struct iommufd_ctx *ictx) } mutex_unlock(&ioas->mutex); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } @@ -349,7 +349,7 @@ static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type) */ if (type == VFIO_TYPE1_IOMMU) rc = iopt_disable_large_pages(&ioas->iopt); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } @@ -511,7 +511,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx, out_put: up_read(&ioas->iopt.iova_rwsem); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } -- cgit From 6f9c4d8c468c189d6dc470324bd52955f8aa0a10 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 12 Nov 2023 15:44:08 -0400 Subject: iommufd: Do not UAF during iommufd_put_object() The mixture of kernel and user space lifecycle objects continues to be complicated inside iommufd. The obj->destroy_rwsem is used to bring order to the kernel driver destruction sequence but it cannot be sequenced right with the other refcounts so we end up possibly UAF'ing: BUG: KASAN: slab-use-after-free in __up_read+0x627/0x750 kernel/locking/rwsem.c:1342 Read of size 8 at addr ffff888073cde868 by task syz-executor934/6535 CPU: 1 PID: 6535 Comm: syz-executor934 Not tainted 6.6.0-rc7-syzkaller-00195-g2af9b20dbb39 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:364 [inline] print_report+0xc4/0x620 mm/kasan/report.c:475 kasan_report+0xda/0x110 mm/kasan/report.c:588 __up_read+0x627/0x750 kernel/locking/rwsem.c:1342 iommufd_put_object drivers/iommu/iommufd/iommufd_private.h:149 [inline] iommufd_vfio_ioas+0x46c/0x580 drivers/iommu/iommufd/vfio_compat.c:146 iommufd_fops_ioctl+0x347/0x4d0 drivers/iommu/iommufd/main.c:398 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl fs/ioctl.c:857 [inline] __x64_sys_ioctl+0x18f/0x210 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd There are two races here, the more obvious one: CPU 0 CPU 1 iommufd_put_object() iommufd_destroy() refcount_dec(&obj->users) iommufd_object_remove() kfree() up_read(&obj->destroy_rwsem) // Boom And there is also perhaps some possibility that the rwsem could hit an issue: CPU 0 CPU 1 iommufd_put_object() iommufd_object_destroy_user() refcount_dec(&obj->users); down_write(&obj->destroy_rwsem) up_read(&obj->destroy_rwsem); atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count); tmp = atomic_long_add_return_release() rwsem_try_write_lock() iommufd_object_remove() up_write(&obj->destroy_rwsem) kfree() clear_nonspinnable() // Boom Fix this by reorganizing this again so that two refcounts are used to keep track of things with a rule that users == 0 && shortterm_users == 0 means no other threads have that memory. Put a wait_queue in the iommufd_ctx object that is triggered when any sub object reaches a 0 shortterm_users. This allows the same wait for userspace ioctls to finish behavior that the rwsem was providing. This is weaker still than the prior versions: - There is no bias on shortterm_users so if some thread is waiting to destroy other threads can continue to get new read sides - If destruction fails, eg because of an active in-kernel user, then shortterm_users will have cycled to zero momentarily blocking new users - If userspace races destroy with other userspace operations they continue to get an EBUSY since we still can't intermix looking up an ID and sleeping for its unref In all cases these are things that userspace brings on itself, correct programs will not hit them. Fixes: 99f98a7c0d69 ("iommufd: IOMMUFD_DESTROY should not increase the refcount") Link: https://lore.kernel.org/all/2-v2-ca9e00171c5b+123-iommufd_syz4_jgg@nvidia.com/ Reported-by: syzbot+d31adfb277377ef8fcba@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/00000000000055ef9a0609336580@google.com Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iommufd_private.h | 67 ++++++++++++--- drivers/iommu/iommufd/main.c | 146 +++++++++++++++++--------------- 2 files changed, 135 insertions(+), 78 deletions(-) diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index f918a22f0d48..abae041e256f 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -21,6 +21,7 @@ struct iommufd_ctx { struct file *file; struct xarray objects; struct xarray groups; + wait_queue_head_t destroy_wait; u8 account_mode; /* Compatibility with VFIO no iommu */ @@ -135,7 +136,7 @@ enum iommufd_object_type { /* Base struct for all objects with a userspace ID handle. */ struct iommufd_object { - struct rw_semaphore destroy_rwsem; + refcount_t shortterm_users; refcount_t users; enum iommufd_object_type type; unsigned int id; @@ -143,10 +144,15 @@ struct iommufd_object { static inline bool iommufd_lock_obj(struct iommufd_object *obj) { - if (!down_read_trylock(&obj->destroy_rwsem)) + if (!refcount_inc_not_zero(&obj->users)) return false; - if (!refcount_inc_not_zero(&obj->users)) { - up_read(&obj->destroy_rwsem); + if (!refcount_inc_not_zero(&obj->shortterm_users)) { + /* + * If the caller doesn't already have a ref on obj this must be + * called under the xa_lock. Otherwise the caller is holding a + * ref on users. Thus it cannot be one before this decrement. + */ + refcount_dec(&obj->users); return false; } return true; @@ -157,8 +163,13 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, static inline void iommufd_put_object(struct iommufd_ctx *ictx, struct iommufd_object *obj) { + /* + * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees + * a spurious !0 users with a 0 shortterm_users. + */ refcount_dec(&obj->users); - up_read(&obj->destroy_rwsem); + if (refcount_dec_and_test(&obj->shortterm_users)) + wake_up_interruptible_all(&ictx->destroy_wait); } void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj); @@ -166,17 +177,49 @@ void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, struct iommufd_object *obj); void iommufd_object_finalize(struct iommufd_ctx *ictx, struct iommufd_object *obj); -void __iommufd_object_destroy_user(struct iommufd_ctx *ictx, - struct iommufd_object *obj, bool allow_fail); + +enum { + REMOVE_WAIT_SHORTTERM = 1, +}; +int iommufd_object_remove(struct iommufd_ctx *ictx, + struct iommufd_object *to_destroy, u32 id, + unsigned int flags); + +/* + * The caller holds a users refcount and wants to destroy the object. At this + * point the caller has no shortterm_users reference and at least the xarray + * will be holding one. + */ static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx, struct iommufd_object *obj) { - __iommufd_object_destroy_user(ictx, obj, false); + int ret; + + ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM); + + /* + * If there is a bug and we couldn't destroy the object then we did put + * back the caller's users refcount and will eventually try to free it + * again during close. + */ + WARN_ON(ret); } -static inline void iommufd_object_deref_user(struct iommufd_ctx *ictx, - struct iommufd_object *obj) + +/* + * The HWPT allocated by autodomains is used in possibly many devices and + * is automatically destroyed when its refcount reaches zero. + * + * If userspace uses the HWPT manually, even for a short term, then it will + * disrupt this refcounting and the auto-free in the kernel will not work. + * Userspace that tries to use the automatically allocated HWPT must be careful + * to ensure that it is consistently destroyed, eg by not racing accesses + * and by not attaching an automatic HWPT to a device manually. + */ +static inline void +iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, + struct iommufd_object *obj) { - __iommufd_object_destroy_user(ictx, obj, true); + iommufd_object_remove(ictx, obj, obj->id, 0); } struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, @@ -312,7 +355,7 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, lockdep_assert_not_held(&hwpt_paging->ioas->mutex); if (hwpt_paging->auto_domain) { - iommufd_object_deref_user(ictx, &hwpt->obj); + iommufd_object_put_and_try_destroy(ictx, &hwpt->obj); return; } } diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 45b9d40773b1..c9091e46d208 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -33,7 +33,6 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, enum iommufd_object_type type) { - static struct lock_class_key obj_keys[IOMMUFD_OBJ_MAX]; struct iommufd_object *obj; int rc; @@ -41,15 +40,8 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, if (!obj) return ERR_PTR(-ENOMEM); obj->type = type; - /* - * In most cases the destroy_rwsem is obtained with try so it doesn't - * interact with lockdep, however on destroy we have to sleep. This - * means if we have to destroy an object while holding a get on another - * object it triggers lockdep. Using one locking class per object type - * is a simple and reasonable way to avoid this. - */ - __init_rwsem(&obj->destroy_rwsem, "iommufd_object::destroy_rwsem", - &obj_keys[type]); + /* Starts out bias'd by 1 until it is removed from the xarray */ + refcount_set(&obj->shortterm_users, 1); refcount_set(&obj->users, 1); /* @@ -129,92 +121,113 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, return obj; } +static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx, + struct iommufd_object *to_destroy) +{ + if (refcount_dec_and_test(&to_destroy->shortterm_users)) + return 0; + + if (wait_event_timeout(ictx->destroy_wait, + refcount_read(&to_destroy->shortterm_users) == + 0, + msecs_to_jiffies(10000))) + return 0; + + pr_crit("Time out waiting for iommufd object to become free\n"); + refcount_inc(&to_destroy->shortterm_users); + return -EBUSY; +} + /* * Remove the given object id from the xarray if the only reference to the - * object is held by the xarray. The caller must call ops destroy(). + * object is held by the xarray. */ -static struct iommufd_object *iommufd_object_remove(struct iommufd_ctx *ictx, - u32 id, bool extra_put) +int iommufd_object_remove(struct iommufd_ctx *ictx, + struct iommufd_object *to_destroy, u32 id, + unsigned int flags) { struct iommufd_object *obj; XA_STATE(xas, &ictx->objects, id); - - xa_lock(&ictx->objects); - obj = xas_load(&xas); - if (xa_is_zero(obj) || !obj) { - obj = ERR_PTR(-ENOENT); - goto out_xa; - } + bool zerod_shortterm = false; + int ret; /* - * If the caller is holding a ref on obj we put it here under the - * spinlock. + * The purpose of the shortterm_users is to ensure deterministic + * destruction of objects used by external drivers and destroyed by this + * function. Any temporary increment of the refcount must increment + * shortterm_users, such as during ioctl execution. */ - if (extra_put) + if (flags & REMOVE_WAIT_SHORTTERM) { + ret = iommufd_object_dec_wait_shortterm(ictx, to_destroy); + if (ret) { + /* + * We have a bug. Put back the callers reference and + * defer cleaning this object until close. + */ + refcount_dec(&to_destroy->users); + return ret; + } + zerod_shortterm = true; + } + + xa_lock(&ictx->objects); + obj = xas_load(&xas); + if (to_destroy) { + /* + * If the caller is holding a ref on obj we put it here under + * the spinlock. + */ refcount_dec(&obj->users); + if (WARN_ON(obj != to_destroy)) { + ret = -ENOENT; + goto err_xa; + } + } else if (xa_is_zero(obj) || !obj) { + ret = -ENOENT; + goto err_xa; + } + if (!refcount_dec_if_one(&obj->users)) { - obj = ERR_PTR(-EBUSY); - goto out_xa; + ret = -EBUSY; + goto err_xa; } xas_store(&xas, NULL); if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj)) ictx->vfio_ioas = NULL; - -out_xa: xa_unlock(&ictx->objects); - /* The returned object reference count is zero */ - return obj; -} - -/* - * The caller holds a users refcount and wants to destroy the object. Returns - * true if the object was destroyed. In all cases the caller no longer has a - * reference on obj. - */ -void __iommufd_object_destroy_user(struct iommufd_ctx *ictx, - struct iommufd_object *obj, bool allow_fail) -{ - struct iommufd_object *ret; - /* - * The purpose of the destroy_rwsem is to ensure deterministic - * destruction of objects used by external drivers and destroyed by this - * function. Any temporary increment of the refcount must hold the read - * side of this, such as during ioctl execution. - */ - down_write(&obj->destroy_rwsem); - ret = iommufd_object_remove(ictx, obj->id, true); - up_write(&obj->destroy_rwsem); - - if (allow_fail && IS_ERR(ret)) - return; - - /* - * If there is a bug and we couldn't destroy the object then we did put - * back the caller's refcount and will eventually try to free it again - * during close. + * Since users is zero any positive users_shortterm must be racing + * iommufd_put_object(), or we have a bug. */ - if (WARN_ON(IS_ERR(ret))) - return; + if (!zerod_shortterm) { + ret = iommufd_object_dec_wait_shortterm(ictx, obj); + if (WARN_ON(ret)) + return ret; + } iommufd_object_ops[obj->type].destroy(obj); kfree(obj); + return 0; + +err_xa: + if (zerod_shortterm) { + /* Restore the xarray owned reference */ + refcount_set(&obj->shortterm_users, 1); + } + xa_unlock(&ictx->objects); + + /* The returned object reference count is zero */ + return ret; } static int iommufd_destroy(struct iommufd_ucmd *ucmd) { struct iommu_destroy *cmd = ucmd->cmd; - struct iommufd_object *obj; - obj = iommufd_object_remove(ucmd->ictx, cmd->id, false); - if (IS_ERR(obj)) - return PTR_ERR(obj); - iommufd_object_ops[obj->type].destroy(obj); - kfree(obj); - return 0; + return iommufd_object_remove(ucmd->ictx, NULL, cmd->id, 0); } static int iommufd_fops_open(struct inode *inode, struct file *filp) @@ -238,6 +251,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); xa_init(&ictx->groups); ictx->file = filp; + init_waitqueue_head(&ictx->destroy_wait); filp->private_data = ictx; return 0; } -- cgit From 5558b92e8d39e18aa19619be2ee37274e9592528 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Sun, 26 Nov 2023 16:09:29 -0800 Subject: cxl/core: Always hold region_rwsem while reading poison lists A read of a device poison list is triggered via a sysfs attribute and the results are logged as kernel trace events of type cxl_poison. The work is managed by either: a) the region driver when one of more regions map the device, or by b) the memdev driver when no regions map the device. In the case of a) the region driver holds the region_rwsem while reading the poison by committed endpoint decoder mappings and for any unmapped resources. This makes sure that the cxl_poison trace event trace reports valid region info. (Region name, HPA, and UUID). In the case of b) the memdev driver holds the dpa_rwsem preventing new DPA resources from being attached to a region. However, it leaves a gap between region attach and decoder commit actions. If a DPA in the gap is in the poison list, the cxl_poison trace event will omit the region info. Close the gap by holding the region_rwsem and the dpa_rwsem when reading poison per memdev. Since both methods now hold both locks, down_read both from the caller. Doing so also addresses the lockdep assert that found this issue: Commit 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper") Fixes: f0832a586396 ("cxl/region: Provide region info to the cxl_poison trace event") Signed-off-by: Alison Schofield Reviewed-by: Davidlohr Bueso Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/08e8e7ec9a3413b91d51de39e385653494b1eed0.1701041440.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/memdev.c | 9 ++++++++- drivers/cxl/core/region.c | 5 ----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index fc5c2b414793..5ad1b13e780a 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -227,10 +227,16 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) if (!port || !is_cxl_endpoint(port)) return -EINVAL; - rc = down_read_interruptible(&cxl_dpa_rwsem); + rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; + rc = down_read_interruptible(&cxl_dpa_rwsem); + if (rc) { + up_read(&cxl_region_rwsem); + return rc; + } + if (cxl_num_decoders_committed(port) == 0) { /* No regions mapped to this memdev */ rc = cxl_get_poison_by_memdev(cxlmd); @@ -239,6 +245,7 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) rc = cxl_get_poison_by_endpoint(port); } up_read(&cxl_dpa_rwsem); + up_read(&cxl_region_rwsem); return rc; } diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 56e575c79bb4..3e817a6f94c6 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2467,10 +2467,6 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port) struct cxl_poison_context ctx; int rc = 0; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) - return rc; - ctx = (struct cxl_poison_context) { .port = port }; @@ -2480,7 +2476,6 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port) rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev), &ctx); - up_read(&cxl_region_rwsem); return rc; } -- cgit From 0e33ac9c3ffe5e4f55c68345f44cea7fec2fe750 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Sun, 26 Nov 2023 16:09:30 -0800 Subject: cxl/memdev: Hold region_rwsem during inject and clear poison ops Poison inject and clear are supported via debugfs where a privileged user can inject and clear poison to a device physical address. Commit 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper") added a lockdep assert that highlighted a gap in poison inject and clear functions where holding the dpa_rwsem does not assure that a a DPA is not added to a region. The impact for inject and clear is that if the DPA address being injected or cleared has been attached to a region, but not yet committed, the dev_dbg() message intended to alert the debug user that they are acting on a mapped address is not emitted. Also, the cxl_poison trace event that serves as a log of the inject and clear activity will not include region info. Close this gap by snapshotting an unchangeable region state during poison inject and clear operations. That means holding both the region_rwsem and the dpa_rwsem during the inject and clear ops. Fixes: d2fbc4865802 ("cxl/memdev: Add support for the Inject Poison mailbox command") Fixes: 9690b07748d1 ("cxl/memdev: Add support for the Clear Poison mailbox command") Signed-off-by: Alison Schofield Reviewed-by: Davidlohr Bueso Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/08721dc1df0a51e4e38fecd02425c3475912dfd5.1701041440.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/memdev.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 5ad1b13e780a..2f43d368ba07 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -331,10 +331,16 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - rc = down_read_interruptible(&cxl_dpa_rwsem); + rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; + rc = down_read_interruptible(&cxl_dpa_rwsem); + if (rc) { + up_read(&cxl_region_rwsem); + return rc; + } + rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) goto out; @@ -362,6 +368,7 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT); out: up_read(&cxl_dpa_rwsem); + up_read(&cxl_region_rwsem); return rc; } @@ -379,10 +386,16 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - rc = down_read_interruptible(&cxl_dpa_rwsem); + rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; + rc = down_read_interruptible(&cxl_dpa_rwsem); + if (rc) { + up_read(&cxl_region_rwsem); + return rc; + } + rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) goto out; @@ -419,6 +432,7 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR); out: up_read(&cxl_dpa_rwsem); + up_read(&cxl_region_rwsem); return rc; } -- cgit From cb9c919364653eeafb49e7ff5cd32f1ad64063ac Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 30 Nov 2023 11:08:52 +1000 Subject: nouveau/tu102: flush all pdbs on vmm flush This is a hack around a bug exposed with the GSP code, I'm not sure what is happening exactly, but it appears some of our flushes don't result in proper tlb invalidation for out BAR2 and we get a BAR2 fault from GSP and it all dies. Signed-off-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231130010852.4034774-1-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c index e34bc6076401..8379e72d77ab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c @@ -31,7 +31,7 @@ tu102_vmm_flush(struct nvkm_vmm *vmm, int depth) type |= 0x00000001; /* PAGE_ALL */ if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR])) - type |= 0x00000004; /* HUB_ONLY */ + type |= 0x00000006; /* HUB_ONLY | ALL PDB (hack) */ mutex_lock(&vmm->mmu->mutex); -- cgit From 27d25348d42161837be08fc63b04a2559d2e781c Mon Sep 17 00:00:00 2001 From: Ashwin Dayanand Kamat Date: Wed, 29 Nov 2023 16:10:29 +0530 Subject: x86/sev: Fix kernel crash due to late update to read-only ghcb_version A write-access violation page fault kernel crash was observed while running cpuhotplug LTP testcases on SEV-ES enabled systems. The crash was observed during hotplug, after the CPU was offlined and the process was migrated to different CPU. setup_ghcb() is called again which tries to update ghcb_version in sev_es_negotiate_protocol(). Ideally this is a read_only variable which is initialised during booting. Trying to write it results in a pagefault: BUG: unable to handle page fault for address: ffffffffba556e70 #PF: supervisor write access in kernel mode #PF: error_code(0x0003) - permissions violation [ ...] Call Trace: ? __die_body.cold+0x1a/0x1f ? __die+0x2a/0x35 ? page_fault_oops+0x10c/0x270 ? setup_ghcb+0x71/0x100 ? __x86_return_thunk+0x5/0x6 ? search_exception_tables+0x60/0x70 ? __x86_return_thunk+0x5/0x6 ? fixup_exception+0x27/0x320 ? kernelmode_fixup_or_oops+0xa2/0x120 ? __bad_area_nosemaphore+0x16a/0x1b0 ? kernel_exc_vmm_communication+0x60/0xb0 ? bad_area_nosemaphore+0x16/0x20 ? do_kern_addr_fault+0x7a/0x90 ? exc_page_fault+0xbd/0x160 ? asm_exc_page_fault+0x27/0x30 ? setup_ghcb+0x71/0x100 ? setup_ghcb+0xe/0x100 cpu_init_exception_handling+0x1b9/0x1f0 The fix is to call sev_es_negotiate_protocol() only in the BSP boot phase, and it only needs to be done once in any case. [ mingo: Refined the changelog. ] Fixes: 95d33bfaa3e1 ("x86/sev: Register GHCB memory when SEV-SNP is active") Suggested-by: Tom Lendacky Co-developed-by: Bo Gan Signed-off-by: Bo Gan Signed-off-by: Ashwin Dayanand Kamat Signed-off-by: Ingo Molnar Acked-by: Tom Lendacky Link: https://lore.kernel.org/r/1701254429-18250-1-git-send-email-kashwindayan@vmware.com --- arch/x86/kernel/sev.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 70472eebe719..c67285824e82 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1234,10 +1234,6 @@ void setup_ghcb(void) if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) return; - /* First make sure the hypervisor talks a supported protocol. */ - if (!sev_es_negotiate_protocol()) - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); - /* * Check whether the runtime #VC exception handler is active. It uses * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). @@ -1254,6 +1250,13 @@ void setup_ghcb(void) return; } + /* + * Make sure the hypervisor talks a supported protocol. + * This gets called only in the BSP boot phase. + */ + if (!sev_es_negotiate_protocol()) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); + /* * Clear the boot_ghcb. The first exception comes in before the bss * section is cleared. -- cgit From 7701ce26c747322de1f1a87f8e32792582f33249 Mon Sep 17 00:00:00 2001 From: Adrián Larumbe Date: Sat, 25 Nov 2023 20:52:02 +0000 Subject: drm/panfrost: Consider dma-buf imported objects as resident MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A GEM object constructed from a dma-buf imported sgtable should be regarded as being memory resident, because the dma-buf API mandates backing storage to be allocated when attachment succeeds. Signed-off-by: Adrián Larumbe Fixes: 9ccdac7aa822 ("drm/panfrost: Add fdinfo support for memory stats") Reported-by: Boris Brezillon Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20231125205438.375407-2-adrian.larumbe@collabora.com --- drivers/gpu/drm/panfrost/panfrost_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 0cf64456e29a..d47b40b82b0b 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -200,7 +200,7 @@ static enum drm_gem_object_status panfrost_gem_status(struct drm_gem_object *obj struct panfrost_gem_object *bo = to_panfrost_bo(obj); enum drm_gem_object_status res = 0; - if (bo->base.pages) + if (bo->base.base.import_attach || bo->base.pages) res |= DRM_GEM_OBJECT_RESIDENT; if (bo->base.madv == PANFROST_MADV_DONTNEED) -- cgit From 64111a0e22a9d4e0de7a5d04e7d5c21d0af4b900 Mon Sep 17 00:00:00 2001 From: Adrián Larumbe Date: Sat, 25 Nov 2023 20:52:03 +0000 Subject: drm/panfrost: Fix incorrect updating of current device frequency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was noticed when setting the Panfrost's DVFS device to the performant governor, GPU frequency as reported by fdinfo had dropped to 0 permamently. There are two separate issues causing this behaviour: - Not initialising the device's current_frequency variable to its original value during device probe(). - Updating said variable in Panfrost devfreq's get_dev_status() rather than after the new OPP's frequency had been retrieved in target(), which meant the old frequency would be assigned instead. Signed-off-by: Adrián Larumbe Fixes: f11b0417eec2 ("drm/panfrost: Add fdinfo support GPU load metrics") Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20231125205438.375407-3-adrian.larumbe@collabora.com --- drivers/gpu/drm/panfrost/panfrost_devfreq.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c index f59c82ea8870..2d30da38c2c3 100644 --- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c @@ -29,14 +29,20 @@ static void panfrost_devfreq_update_utilization(struct panfrost_devfreq *pfdevfr static int panfrost_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) { + struct panfrost_device *ptdev = dev_get_drvdata(dev); struct dev_pm_opp *opp; + int err; opp = devfreq_recommended_opp(dev, freq, flags); if (IS_ERR(opp)) return PTR_ERR(opp); dev_pm_opp_put(opp); - return dev_pm_opp_set_rate(dev, *freq); + err = dev_pm_opp_set_rate(dev, *freq); + if (!err) + ptdev->pfdevfreq.current_frequency = *freq; + + return err; } static void panfrost_devfreq_reset(struct panfrost_devfreq *pfdevfreq) @@ -58,7 +64,6 @@ static int panfrost_devfreq_get_dev_status(struct device *dev, spin_lock_irqsave(&pfdevfreq->lock, irqflags); panfrost_devfreq_update_utilization(pfdevfreq); - pfdevfreq->current_frequency = status->current_frequency; status->total_time = ktime_to_ns(ktime_add(pfdevfreq->busy_time, pfdevfreq->idle_time)); @@ -164,6 +169,14 @@ int panfrost_devfreq_init(struct panfrost_device *pfdev) panfrost_devfreq_profile.initial_freq = cur_freq; + /* + * We could wait until panfrost_devfreq_target() to set this value, but + * since the simple_ondemand governor works asynchronously, there's a + * chance by the time someone opens the device's fdinfo file, current + * frequency hasn't been updated yet, so let's just do an early set. + */ + pfdevfreq->current_frequency = cur_freq; + /* * Set the recommend OPP this will enable and configure the regulator * if any and will avoid a switch off by regulator_late_cleanup() -- cgit From cbf54f37600e874d82886aa3b2f471778cae01ce Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 29 Nov 2023 19:16:54 +0100 Subject: platform/x86: wmi: Skip blocks with zero instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some machines like the HP Omen 17 ck2000nf contain WMI blocks with zero instances, so any WMI driver which tries to handle the associated WMI device will fail. Skip such WMI blocks to avoid confusing any WMI drivers. Reported-by: Alexis Belmonte Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218188 Fixes: bff431e49ff5 ("ACPI: WMI: Add ACPI-WMI mapping driver") Tested-by: Alexis Belmonte Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20231129181654.5800-1-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/wmi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 5c27b4aa9690..5dd22258cb3b 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -1340,6 +1340,11 @@ static int parse_wdg(struct device *wmi_bus_dev, struct platform_device *pdev) if (debug_dump_wdg) wmi_dump_wdg(&gblock[i]); + if (!gblock[i].instance_count) { + dev_info(wmi_bus_dev, FW_INFO "%pUL has zero instances\n", &gblock[i].guid); + continue; + } + if (guid_already_parsed_for_legacy(device, &gblock[i].guid)) continue; -- cgit From 2dcf5fde6dffb312a4bfb8ef940cea2d1f402e32 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Mon, 27 Nov 2023 14:33:13 +0800 Subject: ext4: prevent the normalized size from exceeding EXT_MAX_BLOCKS For files with logical blocks close to EXT_MAX_BLOCKS, the file size predicted in ext4_mb_normalize_request() may exceed EXT_MAX_BLOCKS. This can cause some blocks to be preallocated that will not be used. And after [Fixes], the following issue may be triggered: ========================================================= kernel BUG at fs/ext4/mballoc.c:4653! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP CPU: 1 PID: 2357 Comm: xfs_io 6.7.0-rc2-00195-g0f5cc96c367f Hardware name: linux,dummy-virt (DT) pc : ext4_mb_use_inode_pa+0x148/0x208 lr : ext4_mb_use_inode_pa+0x98/0x208 Call trace: ext4_mb_use_inode_pa+0x148/0x208 ext4_mb_new_inode_pa+0x240/0x4a8 ext4_mb_use_best_found+0x1d4/0x208 ext4_mb_try_best_found+0xc8/0x110 ext4_mb_regular_allocator+0x11c/0xf48 ext4_mb_new_blocks+0x790/0xaa8 ext4_ext_map_blocks+0x7cc/0xd20 ext4_map_blocks+0x170/0x600 ext4_iomap_begin+0x1c0/0x348 ========================================================= Here is a calculation when adjusting ac_b_ex in ext4_mb_new_inode_pa(): ex.fe_logical = orig_goal_end - EXT4_C2B(sbi, ex.fe_len); if (ac->ac_o_ex.fe_logical >= ex.fe_logical) goto adjust_bex; The problem is that when orig_goal_end is subtracted from ac_b_ex.fe_len it is still greater than EXT_MAX_BLOCKS, which causes ex.fe_logical to overflow to a very small value, which ultimately triggers a BUG_ON in ext4_mb_new_inode_pa() because pa->pa_free < len. The last logical block of an actual write request does not exceed EXT_MAX_BLOCKS, so in ext4_mb_normalize_request() also avoids normalizing the last logical block to exceed EXT_MAX_BLOCKS to avoid the above issue. The test case in [Link] can reproduce the above issue with 64k block size. Link: https://patchwork.kernel.org/project/fstests/list/?series=804003 Cc: # 6.4 Fixes: 93cdf49f6eca ("ext4: Fix best extent lstart adjustment logic in ext4_mb_new_inode_pa()") Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20231127063313.3734294-1-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 454d5612641e..d72b5e3c92ec 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4478,6 +4478,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, start = max(start, rounddown(ac->ac_o_ex.fe_logical, (ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb))); + /* avoid unnecessary preallocation that may trigger assertions */ + if (start + size > EXT_MAX_BLOCKS) + size = EXT_MAX_BLOCKS - start; + /* don't cover already allocated blocks in selected range */ if (ar->pleft && start <= ar->lleft) { size -= ar->lleft + 1 - start; -- cgit From 1fefca6c57fb928d2131ff365270cbf863d89c88 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 24 Nov 2023 19:27:47 +0100 Subject: hwmon: (acpi_power_meter) Fix 4.29 MW bug The ACPI specification says: "If an error occurs while obtaining the meter reading or if the value is not available then an Integer with all bits set is returned" Since the "integer" is 32 bits in case of the ACPI power meter, userspace will get a power reading of 2^32 * 1000 miliwatts (~4.29 MW) in case of such an error. This was discovered due to a lm_sensors bugreport (https://github.com/lm-sensors/lm-sensors/issues/460). Fix this by returning -ENODATA instead. Tested-by: Fixes: de584afa5e18 ("hwmon driver for ACPI 4.0 power meters") Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20231124182747.13956-1-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- drivers/hwmon/acpi_power_meter.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 8db740214ffd..703666b95bf4 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -31,6 +31,7 @@ #define POWER_METER_CAN_NOTIFY (1 << 3) #define POWER_METER_IS_BATTERY (1 << 8) #define UNKNOWN_HYSTERESIS 0xFFFFFFFF +#define UNKNOWN_POWER 0xFFFFFFFF #define METER_NOTIFY_CONFIG 0x80 #define METER_NOTIFY_TRIP 0x81 @@ -348,6 +349,9 @@ static ssize_t show_power(struct device *dev, update_meter(resource); mutex_unlock(&resource->lock); + if (resource->power == UNKNOWN_POWER) + return -ENODATA; + return sprintf(buf, "%llu\n", resource->power * 1000); } -- cgit From 85559227211020b270728104c3b89918f7af27ac Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 29 Nov 2023 19:47:39 +0800 Subject: jbd2: correct the printing of write_flags in jbd2_write_superblock() The write_flags print in the trace of jbd2_write_superblock() is not real, so move the modification before the trace. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20231129114740.2686201-1-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ed53188472f9..71b30f6a662d 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1798,9 +1798,11 @@ static int jbd2_write_superblock(journal_t *journal, blk_opf_t write_flags) return -EIO; } - trace_jbd2_write_superblock(journal, write_flags); if (!(journal->j_flags & JBD2_BARRIER)) write_flags &= ~(REQ_FUA | REQ_PREFLUSH); + + trace_jbd2_write_superblock(journal, write_flags); + if (buffer_write_io_error(bh)) { /* * Oh, dear. A previous attempt to write the journal -- cgit From 6a3afb6ac6dfab158ebdd4b87941178f58c8939f Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 29 Nov 2023 19:47:40 +0800 Subject: jbd2: increase the journal IO's priority Current jbd2 only add REQ_SYNC for descriptor block, metadata log buffer, commit buffer and superblock buffer, the submitted IO could be throttled by writeback throttle in block layer, that could lead to priority inversion in some cases. The log IO looks like a kind of high priority metadata IO, so it should not be throttled by WBT like QOS policies in block layer, let's add REQ_SYNC | REQ_IDLE to exempt from writeback throttle, and also add REQ_META together indicates it's a metadata IO. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20231129114740.2686201-2-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/jbd2/commit.c | 9 +++++---- fs/jbd2/journal.c | 20 +++++++++++--------- include/linux/jbd2.h | 3 +++ 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 8d6f934c3d95..9bdb377a348f 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -119,7 +119,7 @@ static int journal_submit_commit_record(journal_t *journal, struct commit_header *tmp; struct buffer_head *bh; struct timespec64 now; - blk_opf_t write_flags = REQ_OP_WRITE | REQ_SYNC; + blk_opf_t write_flags = REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS; *cbh = NULL; @@ -395,8 +395,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ jbd2_journal_update_sb_log_tail(journal, journal->j_tail_sequence, - journal->j_tail, - REQ_SYNC); + journal->j_tail, 0); mutex_unlock(&journal->j_checkpoint_mutex); } else { jbd2_debug(3, "superblock not updated\n"); @@ -715,6 +714,7 @@ start_journal_io: for (i = 0; i < bufs; i++) { struct buffer_head *bh = wbuf[i]; + /* * Compute checksum. */ @@ -727,7 +727,8 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); + submit_bh(REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS, + bh); } cond_resched(); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 71b30f6a662d..206cb53ef2b0 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1100,8 +1100,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) * space and if we lose sb update during power failure we'd replay * old transaction with possibly newly overwritten data. */ - ret = jbd2_journal_update_sb_log_tail(journal, tid, block, - REQ_SYNC | REQ_FUA); + ret = jbd2_journal_update_sb_log_tail(journal, tid, block, REQ_FUA); if (ret) goto out; @@ -1775,8 +1774,7 @@ static int journal_reset(journal_t *journal) */ jbd2_journal_update_sb_log_tail(journal, journal->j_tail_sequence, - journal->j_tail, - REQ_SYNC | REQ_FUA); + journal->j_tail, REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } return jbd2_journal_start_thread(journal); @@ -1798,6 +1796,11 @@ static int jbd2_write_superblock(journal_t *journal, blk_opf_t write_flags) return -EIO; } + /* + * Always set high priority flags to exempt from block layer's + * QOS policies, e.g. writeback throttle. + */ + write_flags |= JBD2_JOURNAL_REQ_FLAGS; if (!(journal->j_flags & JBD2_BARRIER)) write_flags &= ~(REQ_FUA | REQ_PREFLUSH); @@ -2052,7 +2055,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal) jbd2_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode); sb->s_errno = cpu_to_be32(errcode); - jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA); + jbd2_write_superblock(journal, REQ_FUA); } EXPORT_SYMBOL(jbd2_journal_update_sb_errno); @@ -2173,8 +2176,7 @@ int jbd2_journal_destroy(journal_t *journal) ++journal->j_transaction_sequence; write_unlock(&journal->j_state_lock); - jbd2_mark_journal_empty(journal, - REQ_SYNC | REQ_PREFLUSH | REQ_FUA); + jbd2_mark_journal_empty(journal, REQ_PREFLUSH | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } else err = -EIO; @@ -2475,7 +2477,7 @@ int jbd2_journal_flush(journal_t *journal, unsigned int flags) * the magic code for a fully-recovered superblock. Any future * commits of data to the journal will restore the current * s_start value. */ - jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA); + jbd2_mark_journal_empty(journal, REQ_FUA); if (flags) err = __jbd2_journal_erase(journal, flags); @@ -2521,7 +2523,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) if (write) { /* Lock to make assertions happy... */ mutex_lock_io(&journal->j_checkpoint_mutex); - jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA); + jbd2_mark_journal_empty(journal, REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 6dcbb4eb80fb..beb30719ee16 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1374,6 +1374,9 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2) JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) +/* Journal high priority write IO operation flags */ +#define JBD2_JOURNAL_REQ_FLAGS (REQ_META | REQ_SYNC | REQ_IDLE) + /* * Journal flag definitions */ -- cgit From 619f75dae2cf117b1d07f27b046b9ffb071c4685 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 30 Nov 2023 10:56:53 +0100 Subject: ext4: fix warning in ext4_dio_write_end_io() The syzbot has reported that it can hit the warning in ext4_dio_write_end_io() because i_size < i_disksize. Indeed the reproducer creates a race between DIO IO completion and truncate expanding the file and thus ext4_dio_write_end_io() sees an inconsistent inode state where i_disksize is already updated but i_size is not updated yet. Since we are careful when setting up DIO write and consider it extending (and thus performing the IO synchronously with i_rwsem held exclusively) whenever it goes past either of i_size or i_disksize, we can use the same test during IO completion without risking entering ext4_handle_inode_extension() without i_rwsem held. This way we make it obvious both i_size and i_disksize are large enough when we report DIO completion without relying on unreliable WARN_ON. Reported-by: Fixes: 91562895f803 ("ext4: properly sync file size update after O_SYNC direct IO") Signed-off-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20231130095653.22679-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/file.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 0166bb9ca160..6aa15dafc677 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -349,9 +349,10 @@ static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count) return; } /* - * If i_disksize got extended due to writeback of delalloc blocks while - * the DIO was running we could fail to cleanup the orphan list in - * ext4_handle_inode_extension(). Do it now. + * If i_disksize got extended either due to writeback of delalloc + * blocks or extending truncate while the DIO was running we could fail + * to cleanup the orphan list in ext4_handle_inode_extension(). Do it + * now. */ if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); @@ -386,10 +387,11 @@ static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size, * blocks. But the code in ext4_iomap_alloc() is careful to use * zeroed/unwritten extents if this is possible; thus we won't leave * uninitialized blocks in a file even if we didn't succeed in writing - * as much as we intended. + * as much as we intended. Also we can race with truncate or write + * expanding the file so we have to be a bit careful here. */ - WARN_ON_ONCE(i_size_read(inode) < READ_ONCE(EXT4_I(inode)->i_disksize)); - if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize)) + if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize) && + pos + size <= i_size_read(inode)) return size; return ext4_handle_inode_extension(inode, pos, size); } -- cgit From 75475bb51e78a3f54ad2f69380f2a1c985e85f2d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Nov 2023 16:06:30 +0000 Subject: ipv6: fix potential NULL deref in fib6_add() If fib6_find_prefix() returns NULL, we should silently fallback using fib6_null_entry regardless of RT6_DEBUG value. syzbot reported: WARNING: CPU: 0 PID: 5477 at net/ipv6/ip6_fib.c:1516 fib6_add+0x310d/0x3fa0 net/ipv6/ip6_fib.c:1516 Modules linked in: CPU: 0 PID: 5477 Comm: syz-executor.0 Not tainted 6.7.0-rc2-syzkaller-00029-g9b6de136b5f0 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 RIP: 0010:fib6_add+0x310d/0x3fa0 net/ipv6/ip6_fib.c:1516 Code: 00 48 8b 54 24 68 e8 42 22 00 00 48 85 c0 74 14 49 89 c6 e8 d5 d3 c2 f7 eb 5d e8 ce d3 c2 f7 e9 ca 00 00 00 e8 c4 d3 c2 f7 90 <0f> 0b 90 48 b8 00 00 00 00 00 fc ff df 48 8b 4c 24 38 80 3c 01 00 RSP: 0018:ffffc90005067740 EFLAGS: 00010293 RAX: ffffffff89cba5bc RBX: ffffc90005067ab0 RCX: ffff88801a2e9dc0 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000 RBP: ffffc90005067980 R08: ffffffff89cbca85 R09: 1ffff110040d4b85 R10: dffffc0000000000 R11: ffffed10040d4b86 R12: 00000000ffffffff R13: 1ffff110051c3904 R14: ffff8880206a5c00 R15: ffff888028e1c820 FS: 00007f763783c6c0(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f763783bff8 CR3: 000000007f74d000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __ip6_ins_rt net/ipv6/route.c:1303 [inline] ip6_route_add+0x88/0x120 net/ipv6/route.c:3847 ipv6_route_ioctl+0x525/0x7b0 net/ipv6/route.c:4467 inet6_ioctl+0x21a/0x270 net/ipv6/af_inet6.c:575 sock_do_ioctl+0x152/0x460 net/socket.c:1220 sock_ioctl+0x615/0x8c0 net/socket.c:1339 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl+0xf8/0x170 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x45/0x110 arch/x86/entry/common.c:82 Fixes: 7bbfe00e0252 ("ipv6: fix general protection fault in fib6_add()") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Wei Wang Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20231129160630.3509216-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_fib.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 28b01a068412..7772f42ff2b9 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1511,13 +1511,9 @@ out: if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { pn_leaf = fib6_find_prefix(info->nl_net, table, pn); -#if RT6_DEBUG >= 2 - if (!pn_leaf) { - WARN_ON(!pn_leaf); + if (!pn_leaf) pn_leaf = info->nl_net->ipv6.fib6_null_entry; - } -#endif fib6_info_hold(pn_leaf); rcu_assign_pointer(pn->leaf, pn_leaf); } -- cgit From 96d7e79401364c6e9a63af5f74f76792b03cb832 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 21 Nov 2023 07:43:14 +0200 Subject: drm/i915: Check pipe active state in {planes,vrr}_{enabling,disabling}() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit {planes,vrr}_{enabling,disabling}() are supposed to indicate whether the specific hardware feature is supposed to be enabling or disabling. That can only makes sense if the pipe is active overall. So check for that before we go poking at the hardware. I think we're semi-safe currently on due to: - intel_pre_plane_update() doesn't get called when the pipe was not-active prior to the commit, but this is actually a bug. This saves vrr_disabling(), and vrr_enabling() is called from deeper down where we have already checked hw.active. - active_planes mirrors the crtc's hw.active Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231121054324.9988-2-ville.syrjala@linux.intel.com (cherry picked from commit bc53c4d56eb24dbe56cd2c66ef4e9fc9393b1533) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a2a806262c9e..94330108145e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -906,12 +906,18 @@ static bool needs_async_flip_vtd_wa(const struct intel_crtc_state *crtc_state) static bool planes_enabling(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { + if (!new_crtc_state->hw.active) + return false; + return is_enabling(active_planes, old_crtc_state, new_crtc_state); } static bool planes_disabling(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { + if (!old_crtc_state->hw.active) + return false; + return is_disabling(active_planes, old_crtc_state, new_crtc_state); } @@ -928,6 +934,9 @@ static bool vrr_params_changed(const struct intel_crtc_state *old_crtc_state, static bool vrr_enabling(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { + if (!new_crtc_state->hw.active) + return false; + return is_enabling(vrr.enable, old_crtc_state, new_crtc_state) || (new_crtc_state->vrr.enable && (new_crtc_state->update_m_n || new_crtc_state->update_lrr || @@ -937,6 +946,9 @@ static bool vrr_enabling(const struct intel_crtc_state *old_crtc_state, static bool vrr_disabling(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { + if (!old_crtc_state->hw.active) + return false; + return is_disabling(vrr.enable, old_crtc_state, new_crtc_state) || (old_crtc_state->vrr.enable && (new_crtc_state->update_m_n || new_crtc_state->update_lrr || -- cgit From 95dd1e34ff5bbee93a28ff3947eceaf6de811b1a Mon Sep 17 00:00:00 2001 From: Boerge Struempfel Date: Wed, 29 Nov 2023 16:23:07 +0100 Subject: gpiolib: sysfs: Fix error handling on failed export If gpio_set_transitory() fails, we should free the GPIO again. Most notably, the flag FLAG_REQUESTED has previously been set in gpiod_request_commit(), and should be reset on failure. To my knowledge, this does not affect any current users, since the gpio_set_transitory() mainly returns 0 and -ENOTSUPP, which is converted to 0. However the gpio_set_transitory() function calles the .set_config() function of the corresponding GPIO chip and there are some GPIO drivers in which some (unlikely) branches return other values like -EPROBE_DEFER, and -EINVAL. In these cases, the above mentioned FLAG_REQUESTED would not be reset, which results in the pin being blocked until the next reboot. Fixes: e10f72bf4b3e ("gpio: gpiolib: Generalise state persistence beyond sleep") Signed-off-by: Boerge Struempfel Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-sysfs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c index 6f309a3b2d9a..12d853845bb8 100644 --- a/drivers/gpio/gpiolib-sysfs.c +++ b/drivers/gpio/gpiolib-sysfs.c @@ -474,14 +474,17 @@ static ssize_t export_store(const struct class *class, goto done; status = gpiod_set_transitory(desc, false); - if (!status) { - status = gpiod_export(desc, true); - if (status < 0) - gpiod_free(desc); - else - set_bit(FLAG_SYSFS, &desc->flags); + if (status) { + gpiod_free(desc); + goto done; } + status = gpiod_export(desc, true); + if (status < 0) + gpiod_free(desc); + else + set_bit(FLAG_SYSFS, &desc->flags); + done: if (status) pr_debug("%s: status %d\n", __func__, status); -- cgit From cf50b5cae84741268c7bd3fed568c1b8beef9fa9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 28 Nov 2023 06:59:15 -0800 Subject: MAINTAINERS: exclude 9p from networking We don't have much to say about 9p, even tho it lives under net/. Avoid CCing netdev. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 012df8ccf34e..e6109201e8b4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15068,6 +15068,7 @@ F: lib/random32.c F: net/ F: tools/net/ F: tools/testing/selftests/net/ +X: net/9p/ X: net/bluetooth/ NETWORKING [IPSEC] -- cgit From 9572c949385aa2ef10368287c439bcb7935137c8 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 29 Nov 2023 10:53:42 +0530 Subject: octeontx2-pf: Add missing mutex lock in otx2_get_pauseparam All the mailbox messages sent to AF needs to be guarded by mutex lock. Add the missing lock in otx2_get_pauseparam function. Fixes: 75f36270990c ("octeontx2-pf: Support to enable/disable pause frames via ethtool") Signed-off-by: Subbaraya Sundeep Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 9efcec549834..53f6258a973c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -334,9 +334,12 @@ static void otx2_get_pauseparam(struct net_device *netdev, if (is_otx2_lbkvf(pfvf->pdev)) return; + mutex_lock(&pfvf->mbox.lock); req = otx2_mbox_alloc_msg_cgx_cfg_pause_frm(&pfvf->mbox); - if (!req) + if (!req) { + mutex_unlock(&pfvf->mbox.lock); return; + } if (!otx2_sync_mbox_msg(&pfvf->mbox)) { rsp = (struct cgx_pause_frm_cfg *) @@ -344,6 +347,7 @@ static void otx2_get_pauseparam(struct net_device *netdev, pause->rx_pause = rsp->rx_pause; pause->tx_pause = rsp->tx_pause; } + mutex_unlock(&pfvf->mbox.lock); } static int otx2_set_pauseparam(struct net_device *netdev, -- cgit From 830139e7b6911266a84a77e1f18abf758995cc89 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 29 Nov 2023 11:11:48 +0530 Subject: octeontx2-af: Check return value of nix_get_nixlf before using nixlf If a NIXLF is not attached to a PF/VF device then nix_get_nixlf function fails and returns proper error code. But npc_get_default_entry_action does not check it and uses garbage value in subsequent calls. Fix this by cheking the return value of nix_get_nixlf. Fixes: 967db3529eca ("octeontx2-af: add support for multicast/promisc packet replication feature") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 16cfc802e348..f65805860c8d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -389,7 +389,13 @@ static u64 npc_get_default_entry_action(struct rvu *rvu, struct npc_mcam *mcam, int bank, nixlf, index; /* get ucast entry rule entry index */ - nix_get_nixlf(rvu, pf_func, &nixlf, NULL); + if (nix_get_nixlf(rvu, pf_func, &nixlf, NULL)) { + dev_err(rvu->dev, "%s: nixlf not attached to pcifunc:0x%x\n", + __func__, pf_func); + /* Action 0 is drop */ + return 0; + } + index = npc_get_nixlf_mcam_index(mcam, pf_func, nixlf, NIXLF_UCAST_ENTRY); bank = npc_get_bank(mcam, index); -- cgit From 19650c0f402f53abe48a55a1c49c8ed9576a088c Mon Sep 17 00:00:00 2001 From: Jeremy Soller Date: Mon, 27 Nov 2023 11:42:38 -0700 Subject: ASoC: amd: yc: Add DMI entry to support System76 Pangolin 13 Add pang13 quirk to enable the internal microphone. Signed-off-by: Jeremy Soller Signed-off-by: Tim Crawford Link: https://lore.kernel.org/r/20231127184237.32077-2-tcrawford@system76.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index e2a510443bf1..c425652b0fad 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -388,6 +388,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "pang12"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "System76"), + DMI_MATCH(DMI_PRODUCT_VERSION, "pang13"), + } + }, {} }; -- cgit From b24e3590c94ab0aba6e455996b502a83baa5c31c Mon Sep 17 00:00:00 2001 From: Malcolm Hart Date: Mon, 27 Nov 2023 20:36:00 +0000 Subject: ASoC: amd: yc: Fix non-functional mic on ASUS E1504FA This patch adds ASUSTeK COMPUTER INC "E1504FA" to the quirks file acp6x-mach.c to enable microphone array on ASUS Vivobook GO 15. I have this laptop and can confirm that the patch succeeds in enabling the microphone array. Signed-off-by: Malcolm Hart Cc: stable@vger.kernel.org Rule: add Link: https://lore.kernel.org/stable/875y1nt1bx.fsf%405harts.com Link: https://lore.kernel.org/r/871qcbszh0.fsf@5harts.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index c425652b0fad..d83cb6e4c62a 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -283,6 +283,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "M6500RC"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "E1504FA"), + } + }, { .driver_data = &acp6x_card, .matches = { -- cgit From a2f35ed1d237c459100adb0c39bb811d7f170977 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 16 Nov 2023 17:44:21 +0100 Subject: ASoC: codecs: lpass-tx-macro: set active_decimator correct default value The -1 value for active_decimator[dai_id] is considered as "not set", but at probe the table is initialized a 0, this prevents enabling the DEC0 Mixer since it will be considered as already set. Initialize the table entries as -1 to fix tx_macro_tx_mixer_put(). Fixes: 1c6a7f5250ce ("ASoC: codecs: tx-macro: fix active_decimator array") Fixes: c1057a08af43 ("ASoC: codecs: tx-macro: fix kcontrol put") Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20231116-topic-sm8x50-upstream-tx-macro-fix-active-decimator-set-v1-1-6edf402f4b6f@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-tx-macro.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c index 82f9873ffada..124c2e144f33 100644 --- a/sound/soc/codecs/lpass-tx-macro.c +++ b/sound/soc/codecs/lpass-tx-macro.c @@ -2021,6 +2021,11 @@ static int tx_macro_probe(struct platform_device *pdev) tx->dev = dev; + /* Set active_decimator default value */ + tx->active_decimator[TX_MACRO_AIF1_CAP] = -1; + tx->active_decimator[TX_MACRO_AIF2_CAP] = -1; + tx->active_decimator[TX_MACRO_AIF3_CAP] = -1; + /* set MCLK and NPL rates */ clk_set_rate(tx->mclk, MCLK_FREQ); clk_set_rate(tx->npl, MCLK_FREQ); -- cgit From a0575b4add21a243cc3257e75ad913cd5377d5f2 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 28 Nov 2023 14:39:14 +0200 Subject: ASoC: hdac_hda: Conditionally register dais for HDMI and Analog The current driver is registering the same dais for each hdev found in the system which results duplicated widgets to be registered and the kernel log contains similar prints: snd_hda_codec_realtek ehdaudio0D0: ASoC: sink widget AIF1TX overwritten snd_hda_codec_realtek ehdaudio0D0: ASoC: source widget AIF1RX overwritten skl_hda_dsp_generic skl_hda_dsp_generic: ASoC: sink widget hifi3 overwritten skl_hda_dsp_generic skl_hda_dsp_generic: ASoC: sink widget hifi2 overwritten skl_hda_dsp_generic skl_hda_dsp_generic: ASoC: sink widget hifi1 overwritten skl_hda_dsp_generic skl_hda_dsp_generic: ASoC: source widget Codec Output Pin1 overwritten skl_hda_dsp_generic skl_hda_dsp_generic: ASoC: sink widget Codec Input Pin1 overwritten skl_hda_dsp_generic skl_hda_dsp_generic: ASoC: sink widget Analog Codec Playback overwritten skl_hda_dsp_generic skl_hda_dsp_generic: ASoC: sink widget Digital Codec Playback overwritten skl_hda_dsp_generic skl_hda_dsp_generic: ASoC: sink widget Alt Analog Codec Playback overwritten skl_hda_dsp_generic skl_hda_dsp_generic: ASoC: source widget Analog Codec Capture overwritten skl_hda_dsp_generic skl_hda_dsp_generic: ASoC: source widget Digital Codec Capture overwritten skl_hda_dsp_generic skl_hda_dsp_generic: ASoC: source widget Alt Analog Codec Capture overwritten To avoid such issue, split the dai array into HDMI and non HDMI array and register them conditionally: for HDMI hdev only register the dais needed for HDMI for non HDMI hdev do not register the HDMI dais. Depends-on: 3d1dc8b1030d ("ASoC: Intel: skl_hda_dsp_generic: Drop HDMI routes when HDMI is not available") Link: https://github.com/thesofproject/linux/issues/4509 Signed-off-by: Peter Ujfalusi Reviewed-by: Kai Vehmanen Link: https://lore.kernel.org/r/20231128123914.3986-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hda.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/hdac_hda.c b/sound/soc/codecs/hdac_hda.c index 355f30779a34..b075689db2dc 100644 --- a/sound/soc/codecs/hdac_hda.c +++ b/sound/soc/codecs/hdac_hda.c @@ -132,6 +132,9 @@ static struct snd_soc_dai_driver hdac_hda_dais[] = { .sig_bits = 24, }, }, +}; + +static struct snd_soc_dai_driver hdac_hda_hdmi_dais[] = { { .id = HDAC_HDMI_0_DAI_ID, .name = "intel-hdmi-hifi1", @@ -607,8 +610,16 @@ static const struct snd_soc_component_driver hdac_hda_codec = { .endianness = 1, }; +static const struct snd_soc_component_driver hdac_hda_hdmi_codec = { + .probe = hdac_hda_codec_probe, + .remove = hdac_hda_codec_remove, + .idle_bias_on = false, + .endianness = 1, +}; + static int hdac_hda_dev_probe(struct hdac_device *hdev) { + struct hdac_hda_priv *hda_pvt = dev_get_drvdata(&hdev->dev); struct hdac_ext_link *hlink; int ret; @@ -621,9 +632,15 @@ static int hdac_hda_dev_probe(struct hdac_device *hdev) snd_hdac_ext_bus_link_get(hdev->bus, hlink); /* ASoC specific initialization */ - ret = devm_snd_soc_register_component(&hdev->dev, - &hdac_hda_codec, hdac_hda_dais, - ARRAY_SIZE(hdac_hda_dais)); + if (hda_pvt->need_display_power) + ret = devm_snd_soc_register_component(&hdev->dev, + &hdac_hda_hdmi_codec, hdac_hda_hdmi_dais, + ARRAY_SIZE(hdac_hda_hdmi_dais)); + else + ret = devm_snd_soc_register_component(&hdev->dev, + &hdac_hda_codec, hdac_hda_dais, + ARRAY_SIZE(hdac_hda_dais)); + if (ret < 0) { dev_err(&hdev->dev, "failed to register HDA codec %d\n", ret); return ret; -- cgit From c447636970e3409ac39f0bb8c2dcff6b726f36b0 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 29 Nov 2023 15:14:10 +0200 Subject: ASoC: SOF: ipc4-topology: Correct data structures for the SRC module Separate the IPC message part as struct sof_ipc4_src_data. This struct describes the message payload passed to the firmware via the mailbox. It is not wise to be 'clever' and try to use the first part of a struct as IPC message without marking the message section as packed and aligned. Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20231129131411.27516-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-topology.c | 21 +++++++++++---------- sound/soc/sof/ipc4-topology.h | 16 ++++++++++++---- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index 19f36db30979..fae415b9235d 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -141,7 +141,7 @@ static const struct sof_topology_token gain_tokens[] = { /* SRC */ static const struct sof_topology_token src_tokens[] = { {SOF_TKN_SRC_RATE_OUT, SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32, - offsetof(struct sof_ipc4_src, sink_rate)}, + offsetof(struct sof_ipc4_src_data, sink_rate)}, }; static const struct sof_token_info ipc4_token_list[SOF_TOKEN_COUNT] = { @@ -811,11 +811,12 @@ static int sof_ipc4_widget_setup_comp_src(struct snd_sof_widget *swidget) swidget->private = src; - ret = sof_ipc4_get_audio_fmt(scomp, swidget, &src->available_fmt, &src->base_config); + ret = sof_ipc4_get_audio_fmt(scomp, swidget, &src->available_fmt, + &src->data.base_config); if (ret) goto err; - ret = sof_update_ipc_object(scomp, src, SOF_SRC_TOKENS, swidget->tuples, + ret = sof_update_ipc_object(scomp, &src->data, SOF_SRC_TOKENS, swidget->tuples, swidget->num_tuples, sizeof(*src), 1); if (ret) { dev_err(scomp->dev, "Parsing SRC tokens failed\n"); @@ -824,7 +825,7 @@ static int sof_ipc4_widget_setup_comp_src(struct snd_sof_widget *swidget) spipe->core_mask |= BIT(swidget->core); - dev_dbg(scomp->dev, "SRC sink rate %d\n", src->sink_rate); + dev_dbg(scomp->dev, "SRC sink rate %d\n", src->data.sink_rate); ret = sof_ipc4_widget_setup_msg(swidget, &src->msg); if (ret) @@ -1900,7 +1901,7 @@ static int sof_ipc4_prepare_src_module(struct snd_sof_widget *swidget, u32 out_ref_rate, out_ref_channels, out_ref_valid_bits; int output_format_index, input_format_index; - input_format_index = sof_ipc4_init_input_audio_fmt(sdev, swidget, &src->base_config, + input_format_index = sof_ipc4_init_input_audio_fmt(sdev, swidget, &src->data.base_config, pipeline_params, available_fmt); if (input_format_index < 0) return input_format_index; @@ -1930,7 +1931,7 @@ static int sof_ipc4_prepare_src_module(struct snd_sof_widget *swidget, */ out_ref_rate = params_rate(fe_params); - output_format_index = sof_ipc4_init_output_audio_fmt(sdev, &src->base_config, + output_format_index = sof_ipc4_init_output_audio_fmt(sdev, &src->data.base_config, available_fmt, out_ref_rate, out_ref_channels, out_ref_valid_bits); if (output_format_index < 0) { @@ -1940,10 +1941,10 @@ static int sof_ipc4_prepare_src_module(struct snd_sof_widget *swidget, } /* update pipeline memory usage */ - sof_ipc4_update_resource_usage(sdev, swidget, &src->base_config); + sof_ipc4_update_resource_usage(sdev, swidget, &src->data.base_config); out_audio_fmt = &available_fmt->output_pin_fmts[output_format_index].audio_fmt; - src->sink_rate = out_audio_fmt->sampling_frequency; + src->data.sink_rate = out_audio_fmt->sampling_frequency; /* update pipeline_params for sink widgets */ return sof_ipc4_update_hw_params(sdev, pipeline_params, out_audio_fmt); @@ -2344,8 +2345,8 @@ static int sof_ipc4_widget_setup(struct snd_sof_dev *sdev, struct snd_sof_widget { struct sof_ipc4_src *src = swidget->private; - ipc_size = sizeof(struct sof_ipc4_base_module_cfg) + sizeof(src->sink_rate); - ipc_data = src; + ipc_size = sizeof(src->data); + ipc_data = &src->data; msg = &src->msg; break; diff --git a/sound/soc/sof/ipc4-topology.h b/sound/soc/sof/ipc4-topology.h index 0a57b8ab3e08..127caca5262a 100644 --- a/sound/soc/sof/ipc4-topology.h +++ b/sound/soc/sof/ipc4-topology.h @@ -404,16 +404,24 @@ struct sof_ipc4_mixer { struct sof_ipc4_msg msg; }; -/** - * struct sof_ipc4_src SRC config data +/* + * struct sof_ipc4_src_data - IPC data for SRC * @base_config: IPC base config data * @sink_rate: Output rate for sink module + */ +struct sof_ipc4_src_data { + struct sof_ipc4_base_module_cfg base_config; + uint32_t sink_rate; +} __packed __aligned(4); + +/** + * struct sof_ipc4_src - SRC config data + * @data: IPC base config data * @available_fmt: Available audio format * @msg: IPC4 message struct containing header and data info */ struct sof_ipc4_src { - struct sof_ipc4_base_module_cfg base_config; - uint32_t sink_rate; + struct sof_ipc4_src_data data; struct sof_ipc4_available_audio_format available_fmt; struct sof_ipc4_msg msg; }; -- cgit From e238b68e6dc89ddab52bd98216fe5623e94792b1 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 29 Nov 2023 15:14:11 +0200 Subject: ASoC: SOF: ipc4-topology: Correct data structures for the GAIN module Move the base_cfg to struct sof_ipc4_gain_data. This struct describes the message payload passed to the firmware via the mailbox. It is not wise to be 'clever' and try to use the first part of a struct as IPC message without marking the message section as packed and aligned. Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20231129131411.27516-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-control.c | 20 ++++++++++---------- sound/soc/sof/ipc4-topology.c | 31 +++++++++++++++---------------- sound/soc/sof/ipc4-topology.h | 18 +++++++++++++----- 3 files changed, 38 insertions(+), 31 deletions(-) diff --git a/sound/soc/sof/ipc4-control.c b/sound/soc/sof/ipc4-control.c index 938efaceb81c..b4cdcec33e12 100644 --- a/sound/soc/sof/ipc4-control.c +++ b/sound/soc/sof/ipc4-control.c @@ -89,7 +89,7 @@ sof_ipc4_set_volume_data(struct snd_sof_dev *sdev, struct snd_sof_widget *swidge struct sof_ipc4_control_data *cdata = scontrol->ipc_control_data; struct sof_ipc4_gain *gain = swidget->private; struct sof_ipc4_msg *msg = &cdata->msg; - struct sof_ipc4_gain_data data; + struct sof_ipc4_gain_params params; bool all_channels_equal = true; u32 value; int ret, i; @@ -109,20 +109,20 @@ sof_ipc4_set_volume_data(struct snd_sof_dev *sdev, struct snd_sof_widget *swidge */ for (i = 0; i < scontrol->num_channels; i++) { if (all_channels_equal) { - data.channels = SOF_IPC4_GAIN_ALL_CHANNELS_MASK; - data.init_val = cdata->chanv[0].value; + params.channels = SOF_IPC4_GAIN_ALL_CHANNELS_MASK; + params.init_val = cdata->chanv[0].value; } else { - data.channels = cdata->chanv[i].channel; - data.init_val = cdata->chanv[i].value; + params.channels = cdata->chanv[i].channel; + params.init_val = cdata->chanv[i].value; } /* set curve type and duration from topology */ - data.curve_duration_l = gain->data.curve_duration_l; - data.curve_duration_h = gain->data.curve_duration_h; - data.curve_type = gain->data.curve_type; + params.curve_duration_l = gain->data.params.curve_duration_l; + params.curve_duration_h = gain->data.params.curve_duration_h; + params.curve_type = gain->data.params.curve_type; - msg->data_ptr = &data; - msg->data_size = sizeof(data); + msg->data_ptr = ¶ms; + msg->data_size = sizeof(params); ret = sof_ipc4_set_get_kcontrol_data(scontrol, true, lock); msg->data_ptr = NULL; diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index fae415b9235d..e012b6e166ac 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -130,12 +130,12 @@ static const struct sof_topology_token comp_ext_tokens[] = { static const struct sof_topology_token gain_tokens[] = { {SOF_TKN_GAIN_RAMP_TYPE, SND_SOC_TPLG_TUPLE_TYPE_WORD, - get_token_u32, offsetof(struct sof_ipc4_gain_data, curve_type)}, + get_token_u32, offsetof(struct sof_ipc4_gain_params, curve_type)}, {SOF_TKN_GAIN_RAMP_DURATION, SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32, - offsetof(struct sof_ipc4_gain_data, curve_duration_l)}, + offsetof(struct sof_ipc4_gain_params, curve_duration_l)}, {SOF_TKN_GAIN_VAL, SND_SOC_TPLG_TUPLE_TYPE_WORD, - get_token_u32, offsetof(struct sof_ipc4_gain_data, init_val)}, + get_token_u32, offsetof(struct sof_ipc4_gain_params, init_val)}, }; /* SRC */ @@ -720,15 +720,15 @@ static int sof_ipc4_widget_setup_comp_pga(struct snd_sof_widget *swidget) swidget->private = gain; - gain->data.channels = SOF_IPC4_GAIN_ALL_CHANNELS_MASK; - gain->data.init_val = SOF_IPC4_VOL_ZERO_DB; + gain->data.params.channels = SOF_IPC4_GAIN_ALL_CHANNELS_MASK; + gain->data.params.init_val = SOF_IPC4_VOL_ZERO_DB; - ret = sof_ipc4_get_audio_fmt(scomp, swidget, &gain->available_fmt, &gain->base_config); + ret = sof_ipc4_get_audio_fmt(scomp, swidget, &gain->available_fmt, &gain->data.base_config); if (ret) goto err; - ret = sof_update_ipc_object(scomp, &gain->data, SOF_GAIN_TOKENS, swidget->tuples, - swidget->num_tuples, sizeof(gain->data), 1); + ret = sof_update_ipc_object(scomp, &gain->data.params, SOF_GAIN_TOKENS, + swidget->tuples, swidget->num_tuples, sizeof(gain->data), 1); if (ret) { dev_err(scomp->dev, "Parsing gain tokens failed\n"); goto err; @@ -736,8 +736,8 @@ static int sof_ipc4_widget_setup_comp_pga(struct snd_sof_widget *swidget) dev_dbg(scomp->dev, "pga widget %s: ramp type: %d, ramp duration %d, initial gain value: %#x\n", - swidget->widget->name, gain->data.curve_type, gain->data.curve_duration_l, - gain->data.init_val); + swidget->widget->name, gain->data.params.curve_type, + gain->data.params.curve_duration_l, gain->data.params.init_val); ret = sof_ipc4_widget_setup_msg(swidget, &gain->msg); if (ret) @@ -1826,7 +1826,7 @@ static int sof_ipc4_prepare_gain_module(struct snd_sof_widget *swidget, u32 out_ref_rate, out_ref_channels, out_ref_valid_bits; int ret; - ret = sof_ipc4_init_input_audio_fmt(sdev, swidget, &gain->base_config, + ret = sof_ipc4_init_input_audio_fmt(sdev, swidget, &gain->data.base_config, pipeline_params, available_fmt); if (ret < 0) return ret; @@ -1836,7 +1836,7 @@ static int sof_ipc4_prepare_gain_module(struct snd_sof_widget *swidget, out_ref_channels = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(in_fmt->fmt_cfg); out_ref_valid_bits = SOF_IPC4_AUDIO_FORMAT_CFG_V_BIT_DEPTH(in_fmt->fmt_cfg); - ret = sof_ipc4_init_output_audio_fmt(sdev, &gain->base_config, available_fmt, + ret = sof_ipc4_init_output_audio_fmt(sdev, &gain->data.base_config, available_fmt, out_ref_rate, out_ref_channels, out_ref_valid_bits); if (ret < 0) { dev_err(sdev->dev, "Failed to initialize output format for %s", @@ -1845,7 +1845,7 @@ static int sof_ipc4_prepare_gain_module(struct snd_sof_widget *swidget, } /* update pipeline memory usage */ - sof_ipc4_update_resource_usage(sdev, swidget, &gain->base_config); + sof_ipc4_update_resource_usage(sdev, swidget, &gain->data.base_config); return 0; } @@ -2324,9 +2324,8 @@ static int sof_ipc4_widget_setup(struct snd_sof_dev *sdev, struct snd_sof_widget { struct sof_ipc4_gain *gain = swidget->private; - ipc_size = sizeof(struct sof_ipc4_base_module_cfg) + - sizeof(struct sof_ipc4_gain_data); - ipc_data = gain; + ipc_size = sizeof(gain->data); + ipc_data = &gain->data; msg = &gain->msg; break; diff --git a/sound/soc/sof/ipc4-topology.h b/sound/soc/sof/ipc4-topology.h index 127caca5262a..dce174a190dd 100644 --- a/sound/soc/sof/ipc4-topology.h +++ b/sound/soc/sof/ipc4-topology.h @@ -361,7 +361,7 @@ struct sof_ipc4_control_msg_payload { } __packed; /** - * struct sof_ipc4_gain_data - IPC gain blob + * struct sof_ipc4_gain_params - IPC gain parameters * @channels: Channels * @init_val: Initial value * @curve_type: Curve type @@ -369,24 +369,32 @@ struct sof_ipc4_control_msg_payload { * @curve_duration_l: Curve duration low part * @curve_duration_h: Curve duration high part */ -struct sof_ipc4_gain_data { +struct sof_ipc4_gain_params { uint32_t channels; uint32_t init_val; uint32_t curve_type; uint32_t reserved; uint32_t curve_duration_l; uint32_t curve_duration_h; -} __aligned(8); +} __packed __aligned(4); /** - * struct sof_ipc4_gain - gain config data + * struct sof_ipc4_gain_data - IPC gain init blob * @base_config: IPC base config data + * @params: Initial parameters for the gain module + */ +struct sof_ipc4_gain_data { + struct sof_ipc4_base_module_cfg base_config; + struct sof_ipc4_gain_params params; +} __packed __aligned(4); + +/** + * struct sof_ipc4_gain - gain config data * @data: IPC gain blob * @available_fmt: Available audio format * @msg: message structure for gain */ struct sof_ipc4_gain { - struct sof_ipc4_base_module_cfg base_config; struct sof_ipc4_gain_data data; struct sof_ipc4_available_audio_format available_fmt; struct sof_ipc4_msg msg; -- cgit From 480b3e73720f6b5d76bef2387b1f9d19ed67573b Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 3 Nov 2023 05:26:27 -0700 Subject: vdpa/mlx5: preserve CVQ vringh index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mlx5_vdpa does not preserve userland's view of vring base for the control queue in the following sequence: ioctl VHOST_SET_VRING_BASE ioctl VHOST_VDPA_SET_STATUS VIRTIO_CONFIG_S_DRIVER_OK mlx5_vdpa_set_status() setup_cvq_vring() vringh_init_iotlb() vringh_init_kern() vrh->last_avail_idx = 0; ioctl VHOST_GET_VRING_BASE To fix, restore the value of cvq->vring.last_avail_idx after calling vringh_init_iotlb. Fixes: 5262912ef3cf ("vdpa/mlx5: Add support for control VQ and MAC setting") Signed-off-by: Steve Sistare Acked-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <1699014387-194368-1-git-send-email-steven.sistare@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 12ac3397f39b..26ba7da6b410 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2815,13 +2815,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) struct mlx5_control_vq *cvq = &mvdev->cvq; int err = 0; - if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { + u16 idx = cvq->vring.last_avail_idx; + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, MLX5_CVQ_MAX_ENT, false, (struct vring_desc *)(uintptr_t)cvq->desc_addr, (struct vring_avail *)(uintptr_t)cvq->driver_addr, (struct vring_used *)(uintptr_t)cvq->device_addr); + if (!err) + cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx; + } return err; } -- cgit From 4f317d6529d7fc3ab7769ef89645d43fc7eec61b Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 10 Nov 2023 14:18:00 -0800 Subject: pds_vdpa: fix up format-truncation complaint Our friendly kernel test robot has recently been pointing out some format-truncation issues. Here's a fix for one of them. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311040109.RfgJoE7L-lkp@intel.com/ Signed-off-by: Shannon Nelson Message-Id: <20231110221802.46841-2-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/pds/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c index 9b04aad6ec35..c328e694f6e7 100644 --- a/drivers/vdpa/pds/debugfs.c +++ b/drivers/vdpa/pds/debugfs.c @@ -261,7 +261,7 @@ void pds_vdpa_debugfs_add_vdpadev(struct pds_vdpa_aux *vdpa_aux) debugfs_create_file("config", 0400, vdpa_aux->dentry, vdpa_aux->pdsv, &config_fops); for (i = 0; i < vdpa_aux->pdsv->num_vqs; i++) { - char name[8]; + char name[16]; snprintf(name, sizeof(name), "vq%02d", i); debugfs_create_file(name, 0400, vdpa_aux->dentry, -- cgit From dd3b8de16e90c5594eddd29aeeb99e97c6f863be Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 10 Nov 2023 14:18:01 -0800 Subject: pds_vdpa: clear config callback when status goes to 0 If the client driver is setting status to 0, something is getting shutdown and possibly removed. Make sure we clear the config_cb so that it doesn't end up crashing when trying to call a bogus callback. Signed-off-by: Shannon Nelson Message-Id: <20231110221802.46841-3-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/pds/vdpa_dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index 52b2449182ad..9fc89c82d1f0 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -461,8 +461,10 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) pds_vdpa_cmd_set_status(pdsv, status); - /* Note: still working with FW on the need for this reset cmd */ if (status == 0) { + struct vdpa_callback null_cb = { }; + + pds_vdpa_set_config_cb(vdpa_dev, &null_cb); pds_vdpa_cmd_reset(pdsv); for (i = 0; i < pdsv->num_vqs; i++) { -- cgit From cefc9ba6aed48a3aa085888e3262ac2aa975714b Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 10 Nov 2023 14:18:02 -0800 Subject: pds_vdpa: set features order Fix up the order that the device and negotiated features are checked to get a more reliable difference when things get changed. Signed-off-by: Shannon Nelson Message-Id: <20231110221802.46841-4-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/pds/vdpa_dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index 9fc89c82d1f0..25c0fe5ec3d5 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -318,9 +318,8 @@ static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 featur return -EOPNOTSUPP; } - pdsv->negotiated_features = nego_features; - driver_features = pds_vdpa_get_driver_features(vdpa_dev); + pdsv->negotiated_features = nego_features; dev_dbg(dev, "%s: %#llx => %#llx\n", __func__, driver_features, nego_features); -- cgit From 087e15206d6ac0d46734e2b0ab34370c0fdca481 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 18 Oct 2023 13:46:22 -0700 Subject: KVM: Set file_operations.owner appropriately for all such structures Set .owner for all KVM-owned filed types so that the KVM module is pinned until any files with callbacks back into KVM are completely freed. Using "struct kvm" as a proxy for the module, i.e. keeping KVM-the-module alive while there are active VMs, doesn't provide full protection. Userspace can invoke delete_module() the instant the last reference to KVM is put. If KVM itself puts the last reference, e.g. via kvm_destroy_vm(), then it's possible for KVM to be preempted and deleted/unloaded before KVM fully exits, e.g. when the task running kvm_destroy_vm() is scheduled back in, it will jump to a code page that is no longer mapped. Note, file types that can call into sub-module code, e.g. kvm-intel.ko or kvm-amd.ko on x86, must use the module pointer passed to kvm_init(), not THIS_MODULE (which points at kvm.ko). KVM assumes that if /dev/kvm is reachable, e.g. VMs are active, then the vendor module is loaded. To reduce the probability of forgetting to set .owner entirely, use THIS_MODULE for stats files where KVM does not call back into vendor code. This reverts commit 70375c2d8fa3fb9b0b59207a9c5df1e2e1205c10, and fixes several other file types that have been buggy since their introduction. Fixes: 70375c2d8fa3 ("Revert "KVM: set owner of cpu and vm file operations"") Fixes: 3bcd0662d66f ("KVM: X86: Introduce mmu_rmaps_stat per-vm debugfs file") Reported-by: Al Viro Link: https://lore.kernel.org/all/20231010003746.GN800259@ZenIV Link: https://lore.kernel.org/r/20231018204624.1905300-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/debugfs.c | 1 + virt/kvm/kvm_main.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index ee8c4c3496ed..eea6ea7f14af 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -182,6 +182,7 @@ static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file) } static const struct file_operations mmu_rmaps_stat_fops = { + .owner = THIS_MODULE, .open = kvm_mmu_rmaps_stat_open, .read = seq_read, .llseek = seq_lseek, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 486800a7024b..1e65a506985f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3887,7 +3887,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) return 0; } -static const struct file_operations kvm_vcpu_fops = { +static struct file_operations kvm_vcpu_fops = { .release = kvm_vcpu_release, .unlocked_ioctl = kvm_vcpu_ioctl, .mmap = kvm_vcpu_mmap, @@ -4081,6 +4081,7 @@ static int kvm_vcpu_stats_release(struct inode *inode, struct file *file) } static const struct file_operations kvm_vcpu_stats_fops = { + .owner = THIS_MODULE, .read = kvm_vcpu_stats_read, .release = kvm_vcpu_stats_release, .llseek = noop_llseek, @@ -4431,7 +4432,7 @@ static int kvm_device_release(struct inode *inode, struct file *filp) return 0; } -static const struct file_operations kvm_device_fops = { +static struct file_operations kvm_device_fops = { .unlocked_ioctl = kvm_device_ioctl, .release = kvm_device_release, KVM_COMPAT(kvm_device_ioctl), @@ -4759,6 +4760,7 @@ static int kvm_vm_stats_release(struct inode *inode, struct file *file) } static const struct file_operations kvm_vm_stats_fops = { + .owner = THIS_MODULE, .read = kvm_vm_stats_read, .release = kvm_vm_stats_release, .llseek = noop_llseek, @@ -5060,7 +5062,7 @@ static long kvm_vm_compat_ioctl(struct file *filp, } #endif -static const struct file_operations kvm_vm_fops = { +static struct file_operations kvm_vm_fops = { .release = kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, .llseek = noop_llseek, @@ -6095,6 +6097,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) goto err_async_pf; kvm_chardev_ops.owner = module; + kvm_vm_fops.owner = module; + kvm_vcpu_fops.owner = module; + kvm_device_fops.owner = module; kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_out = kvm_sched_out; -- cgit From ea61294befd361ab8260c65d53987b400e5599a7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 18 Oct 2023 13:46:24 -0700 Subject: Revert "KVM: Prevent module exit until all VMs are freed" Revert KVM's misguided attempt to "fix" a use-after-module-unload bug that was actually due to failure to flush a workqueue, not a lack of module refcounting. Pinning the KVM module until kvm_vm_destroy() doesn't prevent use-after-free due to the module being unloaded, as userspace can invoke delete_module() the instant the last reference to KVM is put, i.e. can cause all KVM code to be unmapped while KVM is actively executing said code. Generally speaking, the many instances of module_put(THIS_MODULE) notwithstanding, outside of a few special paths, a module can never safely put the last reference to itself without creating deadlock, i.e. something external to the module *must* put the last reference. In other words, having VMs grab a reference to the KVM module is futile, pointless, and as evidenced by the now-reverted commit 70375c2d8fa3 ("Revert "KVM: set owner of cpu and vm file operations""), actively dangerous. This reverts commit 405294f29faee5de8c10cb9d4a90e229c2835279 and commit 5f6de5cbebee925a612856fce6f9182bb3eee0db. Fixes: 405294f29fae ("KVM: Unconditionally get a ref to /dev/kvm module when creating a VM") Fixes: 5f6de5cbebee ("KVM: Prevent module exit until all VMs are freed") Link: https://lore.kernel.org/r/20231018204624.1905300-4-seanjc@google.com Signed-off-by: Sean Christopherson --- virt/kvm/kvm_main.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1e65a506985f..3b1b9e8dd70c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -115,8 +115,6 @@ EXPORT_SYMBOL_GPL(kvm_debugfs_dir); static const struct file_operations stat_fops_per_vm; -static struct file_operations kvm_chardev_ops; - static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); #ifdef CONFIG_KVM_COMPAT @@ -1157,9 +1155,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) if (!kvm) return ERR_PTR(-ENOMEM); - /* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */ - __module_get(kvm_chardev_ops.owner); - KVM_MMU_LOCK_INIT(kvm); mmgrab(current->mm); kvm->mm = current->mm; @@ -1279,7 +1274,6 @@ out_err_no_irq_srcu: out_err_no_srcu: kvm_arch_free_vm(kvm); mmdrop(current->mm); - module_put(kvm_chardev_ops.owner); return ERR_PTR(r); } @@ -1348,7 +1342,6 @@ static void kvm_destroy_vm(struct kvm *kvm) preempt_notifier_dec(); hardware_disable_all(); mmdrop(mm); - module_put(kvm_chardev_ops.owner); } void kvm_get_kvm(struct kvm *kvm) -- cgit From ef8d89033c3f1f6a64757f066b2c17e76d1189f8 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Sat, 7 Oct 2023 14:40:19 +0800 Subject: KVM: x86: Remove 'return void' expression for 'void function' The requested info will be stored in 'guest_xsave->region' referenced by the incoming pointer "struct kvm_xsave *guest_xsave", thus there is no need to explicitly use return void expression for a void function "static void kvm_vcpu_ioctl_x86_get_xsave(...)". The issue is caught with [-Wpedantic]. Fixes: 2d287ec65e79 ("x86/fpu: Allow caller to constrain xfeatures when copying to uabi buffer") Signed-off-by: Like Xu Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20231007064019.17472-1-likexu@tencent.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3b6a0109191b..a8bebf5c0a05 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5411,8 +5411,8 @@ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { - return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, - sizeof(guest_xsave->region)); + kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, + sizeof(guest_xsave->region)); } static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, -- cgit From c467e97f079f0019870c314996fae952cc768e82 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Tue, 28 Nov 2023 13:11:39 -0500 Subject: md/raid6: use valid sector values to determine if an I/O should wait on the reshape During a reshape or a RAID6 array such as expanding by adding an additional disk, I/Os to the region of the array which have not yet been reshaped can stall indefinitely. This is from errors in the stripe_ahead_of_reshape function causing md to think the I/O is to a region in the actively undergoing the reshape. stripe_ahead_of_reshape fails to account for the q disk having a sector value of 0. By not excluding the q disk from the for loop, raid6 will always generate a min_sector value of 0, causing a return value which stalls. The function's max_sector calculation also uses min() when it should use max(), causing the max_sector value to always be 0. During a backwards rebuild this can cause the opposite problem where it allows I/O to advance when it should wait. Fixing these errors will allow safe I/O to advance in a timely manner and delay only I/O which is unsafe due to stripes in the middle of undergoing the reshape. Fixes: 486f60558607 ("md/raid5: Check all disks in a stripe_head for reshape progress") Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: David Jeffery Tested-by: Laurence Oberman Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231128181233.6187-1-djeffery@redhat.com --- drivers/md/raid5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index dc031d42f53b..26e1e8a5e941 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5892,11 +5892,11 @@ static bool stripe_ahead_of_reshape(struct mddev *mddev, struct r5conf *conf, int dd_idx; for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) { - if (dd_idx == sh->pd_idx) + if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx) continue; min_sector = min(min_sector, sh->dev[dd_idx].sector); - max_sector = min(max_sector, sh->dev[dd_idx].sector); + max_sector = max(max_sector, sh->dev[dd_idx].sector); } spin_lock_irq(&conf->device_lock); -- cgit From dfce9cb3140592b886838e06f3e0c25fea2a9cae Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 30 Nov 2023 18:46:40 -0800 Subject: bpf: Fix a verifier bug due to incorrect branch offset comparison with cpu=v4 Bpf cpu=v4 support is introduced in [1] and Commit 4cd58e9af8b9 ("bpf: Support new 32bit offset jmp instruction") added support for new 32bit offset jmp instruction. Unfortunately, in function bpf_adj_delta_to_off(), for new branch insn with 32bit offset, the offset (plus/minor a small delta) compares to 16-bit offset bound [S16_MIN, S16_MAX], which caused the following verification failure: $ ./test_progs-cpuv4 -t verif_scale_pyperf180 ... insn 10 cannot be patched due to 16-bit range ... libbpf: failed to load object 'pyperf180.bpf.o' scale_test:FAIL:expect_success unexpected error: -12 (errno 12) #405 verif_scale_pyperf180:FAIL Note that due to recent llvm18 development, the patch [2] (already applied in bpf-next) needs to be applied to bpf tree for testing purpose. The fix is rather simple. For 32bit offset branch insn, the adjusted offset compares to [S32_MIN, S32_MAX] and then verification succeeded. [1] https://lore.kernel.org/all/20230728011143.3710005-1-yonghong.song@linux.dev [2] https://lore.kernel.org/bpf/20231110193644.3130906-1-yonghong.song@linux.dev Fixes: 4cd58e9af8b9 ("bpf: Support new 32bit offset jmp instruction") Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231201024640.3417057-1-yonghong.song@linux.dev --- kernel/bpf/core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index cd3afe57ece3..fe254ae035fe 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -371,14 +371,18 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old, s32 end_new, s32 curr, const bool probe_pass) { - const s32 off_min = S16_MIN, off_max = S16_MAX; + s64 off_min, off_max, off; s32 delta = end_new - end_old; - s32 off; - if (insn->code == (BPF_JMP32 | BPF_JA)) + if (insn->code == (BPF_JMP32 | BPF_JA)) { off = insn->imm; - else + off_min = S32_MIN; + off_max = S32_MAX; + } else { off = insn->off; + off_min = S16_MIN; + off_max = S16_MAX; + } if (curr < pos && curr + off + 1 >= end_old) off += delta; -- cgit From 16b55b1f2269962fb6b5154b8bf43f37c9a96637 Mon Sep 17 00:00:00 2001 From: Thinh Tran Date: Thu, 30 Nov 2023 18:19:11 -0600 Subject: net/tg3: fix race condition in tg3_reset_task() When an EEH error is encountered by a PCI adapter, the EEH driver modifies the PCI channel's state as shown below: enum { /* I/O channel is in normal state */ pci_channel_io_normal = (__force pci_channel_state_t) 1, /* I/O to channel is blocked */ pci_channel_io_frozen = (__force pci_channel_state_t) 2, /* PCI card is dead */ pci_channel_io_perm_failure = (__force pci_channel_state_t) 3, }; If the same EEH error then causes the tg3 driver's transmit timeout logic to execute, the tg3_tx_timeout() function schedules a reset task via tg3_reset_task_schedule(), which may cause a race condition between the tg3 and EEH driver as both attempt to recover the HW via a reset action. EEH driver gets error event --> eeh_set_channel_state() and set device to one of error state above scheduler: tg3_reset_task() get returned error from tg3_init_hw() --> dev_close() shuts down the interface tg3_io_slot_reset() and tg3_io_resume() fail to reset/resume the device To resolve this issue, we avoid the race condition by checking the PCI channel state in the tg3_reset_task() function and skip the tg3 driver initiated reset when the PCI channel is not in the normal state. (The driver has no access to tg3 device registers at this point and cannot even complete the reset task successfully without external assistance.) We'll leave the reset procedure to be managed by the EEH driver which calls the tg3_io_error_detected(), tg3_io_slot_reset() and tg3_io_resume() functions as appropriate. Adding the same checking in tg3_dump_state() to avoid dumping all device registers when the PCI channel is not in the normal state. Signed-off-by: Thinh Tran Tested-by: Venkata Sai Duggi Reviewed-by: David Christensen Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20231201001911.656-1-thinhtr@linux.vnet.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 48b6191efa56..f52830dfb26a 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6474,6 +6474,14 @@ static void tg3_dump_state(struct tg3 *tp) int i; u32 *regs; + /* If it is a PCI error, all registers will be 0xffff, + * we don't dump them out, just report the error and return + */ + if (tp->pdev->error_state != pci_channel_io_normal) { + netdev_err(tp->dev, "PCI channel ERROR!\n"); + return; + } + regs = kzalloc(TG3_REG_BLK_SIZE, GFP_ATOMIC); if (!regs) return; @@ -11259,7 +11267,8 @@ static void tg3_reset_task(struct work_struct *work) rtnl_lock(); tg3_full_lock(tp, 0); - if (tp->pcierr_recovery || !netif_running(tp->dev)) { + if (tp->pcierr_recovery || !netif_running(tp->dev) || + tp->pdev->error_state != pci_channel_io_normal) { tg3_flag_clear(tp, RESET_TASK_PENDING); tg3_full_unlock(tp); rtnl_unlock(); -- cgit From 6c89f49964375c904cea33c0247467873f4daf2c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 29 Nov 2023 21:58:53 -0800 Subject: hv_netvsc: rndis_filter needs to select NLS rndis_filter uses utf8s_to_utf16s() which is provided by setting NLS, so select NLS to fix the build error: ERROR: modpost: "utf8s_to_utf16s" [drivers/net/hyperv/hv_netvsc.ko] undefined! Fixes: 1ce09e899d28 ("hyperv: Add support for setting MAC from within guests") Signed-off-by: Randy Dunlap Cc: Haiyang Zhang Cc: K. Y. Srinivasan Cc: Wei Liu Cc: Dexuan Cui Reviewed-by: Simon Horman Tested-by: Simon Horman # build-tested Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20231130055853.19069-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/hyperv/Kconfig b/drivers/net/hyperv/Kconfig index ca7bf7f897d3..c8cbd85adcf9 100644 --- a/drivers/net/hyperv/Kconfig +++ b/drivers/net/hyperv/Kconfig @@ -3,5 +3,6 @@ config HYPERV_NET tristate "Microsoft Hyper-V virtual network driver" depends on HYPERV select UCS2_STRING + select NLS help Select this option to enable the Hyper-V virtual network driver. -- cgit From 9b8493dc43044376716d789d07699f17d538a7c4 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 1 Dec 2023 19:37:27 +0100 Subject: x86/CPU/AMD: Check vendor in the AMD microcode callback Commit in Fixes added an AMD-specific microcode callback. However, it didn't check the CPU vendor the kernel runs on explicitly. The only reason the Zenbleed check in it didn't run on other x86 vendors hardware was pure coincidental luck: if (!cpu_has_amd_erratum(c, amd_zenbleed)) return; gives true on other vendors because they don't have those families and models. However, with the removal of the cpu_has_amd_erratum() in 05f5f73936fa ("x86/CPU/AMD: Drop now unused CPU erratum checking function") that coincidental condition is gone, leading to the zenbleed check getting executed on other vendors too. Add the explicit vendor check for the whole callback as it should've been done in the first place. Fixes: 522b1d69219d ("x86/cpu/amd: Add a Zenbleed fix") Cc: Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20231201184226.16749-1-bp@alien8.de --- arch/x86/kernel/cpu/amd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a7eab05e5f29..f322ebd053a9 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1320,6 +1320,9 @@ static void zenbleed_check_cpu(void *unused) void amd_check_microcode(void) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return; + on_each_cpu(zenbleed_check_cpu, NULL, 1); } -- cgit From 9f6acd2b4dfef81dcc45486ac704cf602c88db02 Mon Sep 17 00:00:00 2001 From: Sam Edwards Date: Fri, 1 Dec 2023 23:12:12 -0800 Subject: arm64: dts: rockchip: Fix Turing RK1 interrupt pinctrls The pinctrls for the hym8563 interrupt line and fan-tach input were both mistakenly defined as `pcfg_pull_none`. As these are active-low signals (level-triggered, in the hym8563 case) which may not be driven at times, these should really be pull-up. The lack of any bias results in spurious interrupts. Fix this by modifying the `rockchip,pins` properties as necessary to enable the pull-up resistors. Fixes: 2806a69f3fef6 ("arm64: dts: rockchip: Add Turing RK1 SoM support") Signed-off-by: Sam Edwards Link: https://lore.kernel.org/r/20231202071212.1606800-1-CFSworks@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi index 9570b34aca2e..d88c0e852356 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi @@ -235,13 +235,13 @@ &pinctrl { fan { fan_int: fan-int { - rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_none>; + rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_up>; }; }; hym8563 { hym8563_int: hym8563-int { - rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>; + rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_up>; }; }; -- cgit From 4b0768b6556af56ee9b7cf4e68452a2b6289ae45 Mon Sep 17 00:00:00 2001 From: ChunHao Lin Date: Wed, 29 Nov 2023 23:53:50 +0800 Subject: r8169: fix rtl8125b PAUSE frames blasting when suspended When FIFO reaches near full state, device will issue pause frame. If pause slot is enabled(set to 1), in this time, device will issue pause frame only once. But if pause slot is disabled(set to 0), device will keep sending pause frames until FIFO reaches near empty state. When pause slot is disabled, if there is no one to handle receive packets, device FIFO will reach near full state and keep sending pause frames. That will impact entire local area network. This issue can be reproduced in Chromebox (not Chromebook) in developer mode running a test image (and v5.10 kernel): 1) ping -f $CHROMEBOX (from workstation on same local network) 2) run "powerd_dbus_suspend" from command line on the $CHROMEBOX 3) ping $ROUTER (wait until ping fails from workstation) Takes about ~20-30 seconds after step 2 for the local network to stop working. Fix this issue by enabling pause slot to only send pause frame once when FIFO reaches near full state. Fixes: f1bce4ad2f1c ("r8169: add support for RTL8125") Reported-by: Grant Grundler Tested-by: Grant Grundler Cc: stable@vger.kernel.org Signed-off-by: ChunHao Lin Reviewed-by: Jacob Keller Reviewed-by: Heiner Kallweit Link: https://lore.kernel.org/r/20231129155350.5843-1-hau@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 62cabeeb842a..bb787a52bc75 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -196,6 +196,7 @@ enum rtl_registers { /* No threshold before first PCI xfer */ #define RX_FIFO_THRESH (7 << RXCFG_FIFO_SHIFT) #define RX_EARLY_OFF (1 << 11) +#define RX_PAUSE_SLOT_ON (1 << 11) /* 8125b and later */ #define RXCFG_DMA_SHIFT 8 /* Unlimited maximum PCI burst. */ #define RX_DMA_BURST (7 << RXCFG_DMA_SHIFT) @@ -2306,9 +2307,13 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF); break; - case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_63: + case RTL_GIGA_MAC_VER_61: RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST); break; + case RTL_GIGA_MAC_VER_63: + RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST | + RX_PAUSE_SLOT_ON); + break; default: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST); break; -- cgit From c5c325bb5849868d76969d3fe014515f5e99eabc Mon Sep 17 00:00:00 2001 From: Pascal Noël Date: Fri, 1 Dec 2023 17:37:44 -0800 Subject: ALSA: hda/realtek: Apply quirk for ASUS UM3504DA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ASUS UM3504DA uses a Realtek HDA codec and two CS35L41 amplifiers via I2C. Apply existing quirk to model. Signed-off-by: Pascal Noël Cc: Link: https://lore.kernel.org/r/20231202013744.12369-1-pascal@pascalcompiles.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f9ddacfd920e..ddd74f5d3b30 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9963,6 +9963,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x17f3, "ROG Ally RC71L_RC71L", ALC294_FIXUP_ASUS_ALLY), SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS), SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1043, 0x18d3, "ASUS UM3504DA", ALC294_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x194e, "ASUS UX563FD", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1970, "ASUS UX550VE", ALC289_FIXUP_ASUS_GA401), -- cgit From 35fe2ad259a3bfca15ab78c8ffb5278cb6149c89 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 3 Dec 2023 16:24:05 +0100 Subject: hwmon: (nzxt-kraken2) Fix error handling path in kraken2_probe() There is no point in calling hid_hw_stop() if hid_hw_start() has failed. There is no point in calling hid_hw_close() if hid_hw_open() has failed. Update the error handling path accordingly. Fixes: 82e3430dfa8c ("hwmon: add driver for NZXT Kraken X42/X52/X62/X72") Reported-by: Aleksa Savic Closes: https://lore.kernel.org/all/121470f0-6c1f-418a-844c-7ec2e8a54b8e@gmail.com/ Signed-off-by: Christophe JAILLET Reviewed-by: Jonas Malaco Link: https://lore.kernel.org/r/a768e69851a07a1f4e29f270f4e2559063f07343.1701617030.git.christophe.jaillet@wanadoo.fr Signed-off-by: Guenter Roeck --- drivers/hwmon/nzxt-kraken2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/nzxt-kraken2.c b/drivers/hwmon/nzxt-kraken2.c index 428c77b5fce5..7caf387eb144 100644 --- a/drivers/hwmon/nzxt-kraken2.c +++ b/drivers/hwmon/nzxt-kraken2.c @@ -161,13 +161,13 @@ static int kraken2_probe(struct hid_device *hdev, ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW); if (ret) { hid_err(hdev, "hid hw start failed with %d\n", ret); - goto fail_and_stop; + return ret; } ret = hid_hw_open(hdev); if (ret) { hid_err(hdev, "hid hw open failed with %d\n", ret); - goto fail_and_close; + goto fail_and_stop; } priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, "kraken2", -- cgit From 0117591e69d1edff46bc87061e533a1e25a8c500 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 30 Nov 2023 23:32:20 -0500 Subject: bcachefs: Don't drop journal pins in exit path There's no need to drop journal pins in our exit paths - the code was trying to have everything cleaned up on any shutdown, but better to just tweak the assertions a bit. This fixes a bug where calling into journal reclaim in the exit path would cass a null ptr deref. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 8 +++----- fs/bcachefs/btree_io.c | 4 ++-- fs/bcachefs/btree_io.h | 3 --- fs/bcachefs/btree_key_cache.c | 2 -- 4 files changed, 5 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 47e7770d0583..79495cd7a794 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -9,6 +9,7 @@ #include "debug.h" #include "errcode.h" #include "error.h" +#include "journal.h" #include "trace.h" #include @@ -424,14 +425,11 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) BUG_ON(btree_node_read_in_flight(b) || btree_node_write_in_flight(b)); - if (btree_node_dirty(b)) - bch2_btree_complete_write(c, b, btree_current_write(b)); - clear_btree_node_dirty_acct(c, b); - btree_node_data_free(c, b); } - BUG_ON(atomic_read(&c->btree_cache.dirty)); + BUG_ON(!bch2_journal_error(&c->journal) && + atomic_read(&c->btree_cache.dirty)); list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 57c20390e10e..5a720f0cd5a6 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1704,8 +1704,8 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level)); } -void bch2_btree_complete_write(struct bch_fs *c, struct btree *b, - struct btree_write *w) +static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b, + struct btree_write *w) { unsigned long old, new, v = READ_ONCE(b->will_make_reachable); diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index 7e03dd76fb38..e0d7fa5b1dfb 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -134,9 +134,6 @@ void bch2_btree_node_read(struct bch_fs *, struct btree *, bool); int bch2_btree_root_read(struct bch_fs *, enum btree_id, const struct bkey_i *, unsigned); -void bch2_btree_complete_write(struct bch_fs *, struct btree *, - struct btree_write *); - bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *); enum btree_write_flags { diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 37fbf22de8fc..1b7a5668df7c 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -992,8 +992,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) list_for_each_entry_safe(ck, n, &items, list) { cond_resched(); - bch2_journal_pin_drop(&c->journal, &ck->journal); - list_del(&ck->list); kfree(ck->k); six_lock_exit(&ck->c.lock); -- cgit From f7b32e785042d2357c5abc23ca6db1b92c91a070 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 3 Dec 2023 15:37:53 +0000 Subject: io_uring: fix mutex_unlock with unreferenced ctx Callers of mutex_unlock() have to make sure that the mutex stays alive for the whole duration of the function call. For io_uring that means that the following pattern is not valid unless we ensure that the context outlives the mutex_unlock() call. mutex_lock(&ctx->uring_lock); req_put(req); // typically via io_req_task_submit() mutex_unlock(&ctx->uring_lock); Most contexts are fine: io-wq pins requests, syscalls hold the file, task works are taking ctx references and so on. However, the task work fallback path doesn't follow the rule. Cc: Fixes: 04fc6c802d ("io_uring: save ctx put/get for task_work submit") Reported-by: Jann Horn Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/io-uring/CAG48ez3xSoYb+45f1RLtktROJrpiDQ1otNvdR+YLQf7m+Krj5Q@mail.gmail.com/ Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index aba5657d287e..9626a363f121 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -271,6 +271,7 @@ static __cold void io_fallback_req_func(struct work_struct *work) struct io_kiocb *req, *tmp; struct io_tw_state ts = { .locked = true, }; + percpu_ref_get(&ctx->refs); mutex_lock(&ctx->uring_lock); llist_for_each_entry_safe(req, tmp, node, io_task_work.node) req->io_task_work.func(req, &ts); @@ -278,6 +279,7 @@ static __cold void io_fallback_req_func(struct work_struct *work) return; io_submit_flush_completions(ctx); mutex_unlock(&ctx->uring_lock); + percpu_ref_put(&ctx->refs); } static int io_alloc_hash_table(struct io_hash_table *table, unsigned bits) @@ -3146,12 +3148,7 @@ static __cold void io_ring_exit_work(struct work_struct *work) init_completion(&exit.completion); init_task_work(&exit.task_work, io_tctx_exit_cb); exit.ctx = ctx; - /* - * Some may use context even when all refs and requests have been put, - * and they are free to do so while still holding uring_lock or - * completion_lock, see io_req_task_submit(). Apart from other work, - * this lock/unlock section also waits them to finish. - */ + mutex_lock(&ctx->uring_lock); while (!list_empty(&ctx->tctx_list)) { WARN_ON_ONCE(time_after(jiffies, timeout)); -- cgit From 88ac06a9f938c158bbeafec16adb483b0c66142f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 2 Dec 2023 11:49:37 +0100 Subject: Revert "debugfs: annotate debugfs handlers vs. removal with lockdep" This reverts commit f4acfcd4deb1 ("debugfs: annotate debugfs handlers vs. removal with lockdep"), it appears to have false positives and really shouldn't have been in the -rc series with the fixes anyway. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20231202114936.fd55431ab160.I911aa53abeeca138126f690d383a89b13eb05667@changeid Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 10 ---------- fs/debugfs/inode.c | 7 ------- fs/debugfs/internal.h | 6 ------ 3 files changed, 23 deletions(-) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index a5ade8c16375..5063434be0fc 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -108,12 +108,6 @@ int debugfs_file_get(struct dentry *dentry) kfree(fsd); fsd = READ_ONCE(dentry->d_fsdata); } -#ifdef CONFIG_LOCKDEP - fsd->lock_name = kasprintf(GFP_KERNEL, "debugfs:%pd", dentry); - lockdep_register_key(&fsd->key); - lockdep_init_map(&fsd->lockdep_map, fsd->lock_name ?: "debugfs", - &fsd->key, 0); -#endif INIT_LIST_HEAD(&fsd->cancellations); mutex_init(&fsd->cancellations_mtx); } @@ -132,8 +126,6 @@ int debugfs_file_get(struct dentry *dentry) if (!refcount_inc_not_zero(&fsd->active_users)) return -EIO; - lock_map_acquire_read(&fsd->lockdep_map); - return 0; } EXPORT_SYMBOL_GPL(debugfs_file_get); @@ -151,8 +143,6 @@ void debugfs_file_put(struct dentry *dentry) { struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata); - lock_map_release(&fsd->lockdep_map); - if (refcount_dec_and_test(&fsd->active_users)) complete(&fsd->active_users_drained); } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index e4e7fe1bd9fb..034a617cb1a5 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -243,10 +243,6 @@ static void debugfs_release_dentry(struct dentry *dentry) /* check it wasn't a dir (no fsdata) or automount (no real_fops) */ if (fsd && fsd->real_fops) { -#ifdef CONFIG_LOCKDEP - lockdep_unregister_key(&fsd->key); - kfree(fsd->lock_name); -#endif WARN_ON(!list_empty(&fsd->cancellations)); mutex_destroy(&fsd->cancellations_mtx); } @@ -755,9 +751,6 @@ static void __debugfs_file_removed(struct dentry *dentry) if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) return; - lock_map_acquire(&fsd->lockdep_map); - lock_map_release(&fsd->lockdep_map); - /* if we hit zero, just wait for all to finish */ if (!refcount_dec_and_test(&fsd->active_users)) { wait_for_completion(&fsd->active_users_drained); diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h index 0c4c68cf161f..dae80c2a469e 100644 --- a/fs/debugfs/internal.h +++ b/fs/debugfs/internal.h @@ -7,7 +7,6 @@ #ifndef _DEBUGFS_INTERNAL_H_ #define _DEBUGFS_INTERNAL_H_ -#include #include struct file_operations; @@ -25,11 +24,6 @@ struct debugfs_fsdata { struct { refcount_t active_users; struct completion active_users_drained; -#ifdef CONFIG_LOCKDEP - struct lockdep_map lockdep_map; - struct lock_class_key key; - char *lock_name; -#endif /* protect cancellations */ struct mutex cancellations_mtx; -- cgit From 4181ef7dbb63e638739e929bca5b7bca3a37a311 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 30 Oct 2023 21:09:09 -0700 Subject: greybus: BeaglePlay driver needs CRC_CCITT The gb-beagleplay driver uses crc_ccitt(), so it should select CRC_CCITT to make sure that the function is available. Fixes these build errors: s390-linux-ld: drivers/greybus/gb-beagleplay.o: in function `hdlc_append_tx_u8': gb-beagleplay.c:(.text+0x2c0): undefined reference to `crc_ccitt' s390-linux-ld: drivers/greybus/gb-beagleplay.o: in function `hdlc_rx_frame': gb-beagleplay.c:(.text+0x6a0): undefined reference to `crc_ccitt' Fixes: ec558bbfea67 ("greybus: Add BeaglePlay Linux Driver") Signed-off-by: Randy Dunlap Cc: Ayush Singh Cc: Johan Hovold Cc: Alex Elder Cc: greybus-dev@lists.linaro.org Link: https://lore.kernel.org/r/20231031040909.21201-1-rdunlap@infradead.org Reviewed-by: Ayush Singh Signed-off-by: Greg Kroah-Hartman --- drivers/greybus/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/greybus/Kconfig b/drivers/greybus/Kconfig index 033d31dbf3b8..ab81ceceb337 100644 --- a/drivers/greybus/Kconfig +++ b/drivers/greybus/Kconfig @@ -20,6 +20,7 @@ if GREYBUS config GREYBUS_BEAGLEPLAY tristate "Greybus BeaglePlay driver" depends on SERIAL_DEV_BUS + select CRC_CCITT help Select this option if you have a BeaglePlay where CC1352 co-processor acts as Greybus SVC. -- cgit From 52eb67861ebeb2110318bd9fe33d85ddcf92aac7 Mon Sep 17 00:00:00 2001 From: Ayush Singh Date: Sun, 3 Dec 2023 13:23:10 +0530 Subject: greybus: gb-beagleplay: Ensure le for values in transport Ensure that the following values are little-endian: - header->pad (which is used for cport_id) - header->size Fixes: ec558bbfea67 ("greybus: Add BeaglePlay Linux Driver") Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202311072329.Xogj7hGW-lkp@intel.com/ Signed-off-by: Ayush Singh Link: https://lore.kernel.org/r/20231203075312.255233-1-ayushdevel1325@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/greybus/gb-beagleplay.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/greybus/gb-beagleplay.c b/drivers/greybus/gb-beagleplay.c index 43318c1993ba..1e70ff7e3da4 100644 --- a/drivers/greybus/gb-beagleplay.c +++ b/drivers/greybus/gb-beagleplay.c @@ -93,9 +93,9 @@ static void hdlc_rx_greybus_frame(struct gb_beagleplay *bg, u8 *buf, u16 len) memcpy(&cport_id, hdr->pad, sizeof(cport_id)); dev_dbg(&bg->sd->dev, "Greybus Operation %u type %X cport %u status %u received", - hdr->operation_id, hdr->type, cport_id, hdr->result); + hdr->operation_id, hdr->type, le16_to_cpu(cport_id), hdr->result); - greybus_data_rcvd(bg->gb_hd, cport_id, buf, len); + greybus_data_rcvd(bg->gb_hd, le16_to_cpu(cport_id), buf, len); } static void hdlc_rx_dbg_frame(const struct gb_beagleplay *bg, const char *buf, u16 len) @@ -340,14 +340,15 @@ static int gb_message_send(struct gb_host_device *hd, u16 cport, struct gb_messa { struct gb_beagleplay *bg = dev_get_drvdata(&hd->dev); struct hdlc_payload payloads[2]; + __le16 cport_id = le16_to_cpu(cport); dev_dbg(&hd->dev, "Sending greybus message with Operation %u, Type: %X on Cport %u", msg->header->operation_id, msg->header->type, cport); - if (msg->header->size > RX_HDLC_PAYLOAD) + if (le16_to_cpu(msg->header->size) > RX_HDLC_PAYLOAD) return dev_err_probe(&hd->dev, -E2BIG, "Greybus message too big"); - memcpy(msg->header->pad, &cport, sizeof(cport)); + memcpy(msg->header->pad, &cport_id, sizeof(cport_id)); payloads[0].buf = msg->header; payloads[0].len = sizeof(*msg->header); -- cgit From c55e0a55b165202f18cbc4a20650d2e1becd5507 Mon Sep 17 00:00:00 2001 From: Tyler Fanelli Date: Tue, 19 Sep 2023 22:40:00 -0400 Subject: fuse: Rename DIRECT_IO_RELAX to DIRECT_IO_ALLOW_MMAP Although DIRECT_IO_RELAX's initial usage is to allow shared mmap, its description indicates a purpose of reducing memory footprint. This may imply that it could be further used to relax other DIRECT_IO operations in the future. Replace it with a flag DIRECT_IO_ALLOW_MMAP which does only one thing, allow shared mmap of DIRECT_IO files while still bypassing the cache on regular reads and writes. [Miklos] Also Keep DIRECT_IO_RELAX definition for backward compatibility. Signed-off-by: Tyler Fanelli Fixes: e78662e818f9 ("fuse: add a new fuse init flag to relax restrictions in no cache mode") Cc: # v6.6 Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 6 +++--- fs/fuse/fuse_i.h | 4 ++-- fs/fuse/inode.c | 6 +++--- include/uapi/linux/fuse.h | 10 ++++++---- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1cdb6327511e..89e870d1a526 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1448,7 +1448,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (!ia) return -ENOMEM; - if (fopen_direct_io && fc->direct_io_relax) { + if (fopen_direct_io && fc->direct_io_allow_mmap) { res = filemap_write_and_wait_range(mapping, pos, pos + count - 1); if (res) { fuse_io_free(ia); @@ -2466,9 +2466,9 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) if (ff->open_flags & FOPEN_DIRECT_IO) { /* Can't provide the coherency needed for MAP_SHARED - * if FUSE_DIRECT_IO_RELAX isn't set. + * if FUSE_DIRECT_IO_ALLOW_MMAP isn't set. */ - if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_relax) + if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_allow_mmap) return -ENODEV; invalidate_inode_pages2(file->f_mapping); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 6e6e721f421b..69bcffaf4832 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -797,8 +797,8 @@ struct fuse_conn { /* Is tmpfile not implemented by fs? */ unsigned int no_tmpfile:1; - /* relax restrictions in FOPEN_DIRECT_IO mode */ - unsigned int direct_io_relax:1; + /* Relax restrictions to allow shared mmap in FOPEN_DIRECT_IO mode */ + unsigned int direct_io_allow_mmap:1; /* Is statx not implemented by fs? */ unsigned int no_statx:1; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 74d4f09d5827..88090c6026a7 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1230,8 +1230,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->init_security = 1; if (flags & FUSE_CREATE_SUPP_GROUP) fc->create_supp_group = 1; - if (flags & FUSE_DIRECT_IO_RELAX) - fc->direct_io_relax = 1; + if (flags & FUSE_DIRECT_IO_ALLOW_MMAP) + fc->direct_io_allow_mmap = 1; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; @@ -1278,7 +1278,7 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | - FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_RELAX; + FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index db92a7202b34..e7418d15fe39 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -209,7 +209,7 @@ * - add FUSE_HAS_EXPIRE_ONLY * * 7.39 - * - add FUSE_DIRECT_IO_RELAX + * - add FUSE_DIRECT_IO_ALLOW_MMAP * - add FUSE_STATX and related structures */ @@ -409,8 +409,7 @@ struct fuse_file_lock { * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir, * symlink and mknod (single group that matches parent) * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation - * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now - * allow shared mmap + * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode. */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -449,7 +448,10 @@ struct fuse_file_lock { #define FUSE_HAS_INODE_DAX (1ULL << 33) #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) -#define FUSE_DIRECT_IO_RELAX (1ULL << 36) +#define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) + +/* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ +#define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP /** * CUSE INIT request/reply flags -- cgit From 11ca77cdcca17cec909d2b97404ddacfec0acafd Mon Sep 17 00:00:00 2001 From: Tyler Fanelli Date: Tue, 19 Sep 2023 22:40:01 -0400 Subject: docs/fuse-io: Document the usage of DIRECT_IO_ALLOW_MMAP By default, shared mmap is disabled in FUSE DIRECT_IO mode. However, when the DIRECT_IO_ALLOW_MMAP flag is enabled in the FUSE_INIT reply, shared mmap is allowed. Signed-off-by: Tyler Fanelli Signed-off-by: Miklos Szeredi --- Documentation/filesystems/fuse-io.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/fuse-io.rst b/Documentation/filesystems/fuse-io.rst index 255a368fe534..6464de4266ad 100644 --- a/Documentation/filesystems/fuse-io.rst +++ b/Documentation/filesystems/fuse-io.rst @@ -15,7 +15,8 @@ The direct-io mode can be selected with the FOPEN_DIRECT_IO flag in the FUSE_OPEN reply. In direct-io mode the page cache is completely bypassed for reads and writes. -No read-ahead takes place. Shared mmap is disabled. +No read-ahead takes place. Shared mmap is disabled by default. To allow shared +mmap, the FUSE_DIRECT_IO_ALLOW_MMAP flag may be enabled in the FUSE_INIT reply. In cached mode reads may be satisfied from the page cache, and data may be read-ahead by the kernel to fill the cache. The cache is always kept consistent -- cgit From c4d361f66ac91db8fc65061a9671682f61f4ca9d Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Fri, 3 Nov 2023 10:39:47 -0700 Subject: fuse: share lookup state between submount and its parent Fuse submounts do not perform a lookup for the nodeid that they inherit from their parent. Instead, the code decrements the nlookup on the submount's fuse_inode when it is instantiated, and no forget is performed when a submount root is evicted. Trouble arises when the submount's parent is evicted despite the submount itself being in use. In this author's case, the submount was in a container and deatched from the initial mount namespace via a MNT_DEATCH operation. When memory pressure triggered the shrinker, the inode from the parent was evicted, which triggered enough forgets to render the submount's nodeid invalid. Since submounts should still function, even if their parent goes away, solve this problem by sharing refcounted state between the parent and its submount. When all of the references on this shared state reach zero, it's safe to forget the final lookup of the fuse nodeid. Signed-off-by: Krister Johansen Cc: stable@vger.kernel.org Fixes: 1866d779d5d2 ("fuse: Allow fuse_fill_super_common() for submounts") Signed-off-by: Miklos Szeredi --- fs/fuse/fuse_i.h | 15 ++++++++++++ fs/fuse/inode.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 87 insertions(+), 3 deletions(-) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 69bcffaf4832..1df83eebda92 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -63,6 +63,19 @@ struct fuse_forget_link { struct fuse_forget_link *next; }; +/* Submount lookup tracking */ +struct fuse_submount_lookup { + /** Refcount */ + refcount_t count; + + /** Unique ID, which identifies the inode between userspace + * and kernel */ + u64 nodeid; + + /** The request used for sending the FORGET message */ + struct fuse_forget_link *forget; +}; + /** FUSE inode */ struct fuse_inode { /** Inode data */ @@ -158,6 +171,8 @@ struct fuse_inode { */ struct fuse_inode_dax *dax; #endif + /** Submount specific lookup tracking */ + struct fuse_submount_lookup *submount_lookup; }; /** FUSE inode state bits */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 88090c6026a7..2a6d44f91729 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -68,6 +68,24 @@ struct fuse_forget_link *fuse_alloc_forget(void) return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT); } +static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void) +{ + struct fuse_submount_lookup *sl; + + sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT); + if (!sl) + return NULL; + sl->forget = fuse_alloc_forget(); + if (!sl->forget) + goto out_free; + + return sl; + +out_free: + kfree(sl); + return NULL; +} + static struct inode *fuse_alloc_inode(struct super_block *sb) { struct fuse_inode *fi; @@ -83,6 +101,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->attr_version = 0; fi->orig_ino = 0; fi->state = 0; + fi->submount_lookup = NULL; mutex_init(&fi->mutex); spin_lock_init(&fi->lock); fi->forget = fuse_alloc_forget(); @@ -113,6 +132,17 @@ static void fuse_free_inode(struct inode *inode) kmem_cache_free(fuse_inode_cachep, fi); } +static void fuse_cleanup_submount_lookup(struct fuse_conn *fc, + struct fuse_submount_lookup *sl) +{ + if (!refcount_dec_and_test(&sl->count)) + return; + + fuse_queue_forget(fc, sl->forget, sl->nodeid, 1); + sl->forget = NULL; + kfree(sl); +} + static void fuse_evict_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); @@ -132,6 +162,11 @@ static void fuse_evict_inode(struct inode *inode) fi->nlookup); fi->forget = NULL; } + + if (fi->submount_lookup) { + fuse_cleanup_submount_lookup(fc, fi->submount_lookup); + fi->submount_lookup = NULL; + } } if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(!list_empty(&fi->write_files)); @@ -330,6 +365,13 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, fuse_dax_dontcache(inode, attr->flags); } +static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl, + u64 nodeid) +{ + sl->nodeid = nodeid; + refcount_set(&sl->count, 1); +} + static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr, struct fuse_conn *fc) { @@ -392,12 +434,22 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, */ if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && S_ISDIR(attr->mode)) { + struct fuse_inode *fi; + inode = new_inode(sb); if (!inode) return NULL; fuse_init_inode(inode, attr, fc); - get_fuse_inode(inode)->nodeid = nodeid; + fi = get_fuse_inode(inode); + fi->nodeid = nodeid; + fi->submount_lookup = fuse_alloc_submount_lookup(); + if (!fi->submount_lookup) { + iput(inode); + return NULL; + } + /* Sets nlookup = 1 on fi->submount_lookup->nlookup */ + fuse_init_submount_lookup(fi->submount_lookup, nodeid); inode->i_flags |= S_AUTOMOUNT; goto done; } @@ -420,11 +472,11 @@ retry: iput(inode); goto retry; } -done: fi = get_fuse_inode(inode); spin_lock(&fi->lock); fi->nlookup++; spin_unlock(&fi->lock); +done: fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version); return inode; @@ -1465,6 +1517,8 @@ static int fuse_fill_super_submount(struct super_block *sb, struct super_block *parent_sb = parent_fi->inode.i_sb; struct fuse_attr root_attr; struct inode *root; + struct fuse_submount_lookup *sl; + struct fuse_inode *fi; fuse_sb_defaults(sb); fm->sb = sb; @@ -1487,12 +1541,27 @@ static int fuse_fill_super_submount(struct super_block *sb, * its nlookup should not be incremented. fuse_iget() does * that, though, so undo it here. */ - get_fuse_inode(root)->nlookup--; + fi = get_fuse_inode(root); + fi->nlookup--; + sb->s_d_op = &fuse_dentry_operations; sb->s_root = d_make_root(root); if (!sb->s_root) return -ENOMEM; + /* + * Grab the parent's submount_lookup pointer and take a + * reference on the shared nlookup from the parent. This is to + * prevent the last forget for this nodeid from getting + * triggered until all users have finished with it. + */ + sl = parent_fi->submount_lookup; + WARN_ON(!sl); + if (sl) { + refcount_inc(&sl->count); + fi->submount_lookup = sl; + } + return 0; } -- cgit From 7f8ed28d1401320bcb02dda81b3c23ab2dc5a6d8 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Thu, 16 Nov 2023 15:57:26 +0800 Subject: fuse: dax: set fc->dax to NULL in fuse_dax_conn_free() fuse_dax_conn_free() will be called when fuse_fill_super_common() fails after fuse_dax_conn_alloc(). Then deactivate_locked_super() in virtio_fs_get_tree() will call virtio_kill_sb() to release the discarded superblock. This will call fuse_dax_conn_free() again in fuse_conn_put(), resulting in a possible double free. Fixes: 1dd539577c42 ("virtiofs: add a mount option to enable dax") Signed-off-by: Hangyu Hua Acked-by: Vivek Goyal Reviewed-by: Jingbo Xu Cc: # v5.10 Signed-off-by: Miklos Szeredi --- fs/fuse/dax.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 23904a6a9a96..12ef91d170bb 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -1222,6 +1222,7 @@ void fuse_dax_conn_free(struct fuse_conn *fc) if (fc->dax) { fuse_free_dax_mem_ranges(&fc->dax->free_ranges); kfree(fc->dax); + fc->dax = NULL; } } -- cgit From 3f29f1c336c0e8a4bec52f1e5217f88835553e5b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 3 Dec 2023 09:42:33 +0200 Subject: fuse: disable FOPEN_PARALLEL_DIRECT_WRITES with FUSE_DIRECT_IO_ALLOW_MMAP The new fuse init flag FUSE_DIRECT_IO_ALLOW_MMAP breaks assumptions made by FOPEN_PARALLEL_DIRECT_WRITES and causes test generic/095 to hit BUG_ON(fi->writectr < 0) assertions in fuse_set_nowrite(): generic/095 5s ... kernel BUG at fs/fuse/dir.c:1756! ... ? fuse_set_nowrite+0x3d/0xdd ? do_raw_spin_unlock+0x88/0x8f ? _raw_spin_unlock+0x2d/0x43 ? fuse_range_is_writeback+0x71/0x84 fuse_sync_writes+0xf/0x19 fuse_direct_io+0x167/0x5bd fuse_direct_write_iter+0xf0/0x146 Auto disable FOPEN_PARALLEL_DIRECT_WRITES when server negotiated FUSE_DIRECT_IO_ALLOW_MMAP. Fixes: e78662e818f9 ("fuse: add a new fuse init flag to relax restrictions in no cache mode") Cc: # v6.6 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 89e870d1a526..a660f1f21540 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1574,6 +1574,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) ssize_t res; bool exclusive_lock = !(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES) || + get_fuse_conn(inode)->direct_io_allow_mmap || iocb->ki_flags & IOCB_APPEND || fuse_direct_write_extending_i_size(iocb, from); @@ -1581,6 +1582,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) * Take exclusive lock if * - Parallel direct writes are disabled - a user space decision * - Parallel direct writes are enabled and i_size is being extended. + * - Shared mmap on direct_io file is supported (FUSE_DIRECT_IO_ALLOW_MMAP). * This might not be needed at all, but needs further investigation. */ if (exclusive_lock) -- cgit From 2d880bfa4a97002962d5be148725197be98b191a Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 4 Dec 2023 10:37:15 +0100 Subject: mailmap: add address mapping for Jiri Kosina Since I switched the MAINTAINERS entry to @kernel.org some time ago, let's canonicalize my addressess to it. Signed-off-by: Jiri Kosina Signed-off-by: Jiri Kosina --- .mailmap | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.mailmap b/.mailmap index 2643b7203a74..2a9db44a6924 100644 --- a/.mailmap +++ b/.mailmap @@ -263,6 +263,9 @@ Jens Osterkamp Jernej Skrabec Jessica Zhang Jilai Wang +Jiri Kosina +Jiri Kosina +Jiri Kosina Jiri Pirko Jiri Pirko Jiri Pirko -- cgit From e62adaeecdc6a1e8ae86e7f3f9f8223a3ede94f5 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 29 Nov 2023 13:25:20 -0800 Subject: r8152: Hold the rtnl_lock for all of reset As of commit d9962b0d4202 ("r8152: Block future register access if register access fails") there is a race condition that can happen between the USB device reset thread and napi_enable() (not) getting called during rtl8152_open(). Specifically: * While rtl8152_open() is running we get a register access error that's _not_ -ENODEV and queue up a USB reset. * rtl8152_open() exits before calling napi_enable() due to any reason (including usb_submit_urb() returning an error). In that case: * Since the USB reset is perform in a separate thread asynchronously, it can run at anytime USB device lock is not held - even before rtl8152_open() has exited with an error and caused __dev_open() to clear the __LINK_STATE_START bit. * The rtl8152_pre_reset() will notice that the netif_running() returns true (since __LINK_STATE_START wasn't cleared) so it won't exit early. * rtl8152_pre_reset() will then hang in napi_disable() because napi_enable() was never called. We can fix the race by making sure that the r8152 reset routines don't run at the same time as we're opening the device. Specifically we need the reset routines in their entirety rely on the return value of netif_running(). The only way to reliably depend on that is for them to hold the rntl_lock() mutex for the duration of reset. Grabbing the rntl_lock() mutex for the duration of reset seems like a long time, but reset is not expected to be common and the rtnl_lock() mutex is already held for long durations since the core grabs it around the open/close calls. Fixes: d9962b0d4202 ("r8152: Block future register access if register access fails") Reviewed-by: Grant Grundler Signed-off-by: Douglas Anderson Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 2c5c1e91ded6..d6edf0254599 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -8397,6 +8397,8 @@ static int rtl8152_pre_reset(struct usb_interface *intf) struct r8152 *tp = usb_get_intfdata(intf); struct net_device *netdev; + rtnl_lock(); + if (!tp || !test_bit(PROBED_WITH_NO_ERRORS, &tp->flags)) return 0; @@ -8428,20 +8430,17 @@ static int rtl8152_post_reset(struct usb_interface *intf) struct sockaddr sa; if (!tp || !test_bit(PROBED_WITH_NO_ERRORS, &tp->flags)) - return 0; + goto exit; rtl_set_accessible(tp); /* reset the MAC address in case of policy change */ - if (determine_ethernet_addr(tp, &sa) >= 0) { - rtnl_lock(); + if (determine_ethernet_addr(tp, &sa) >= 0) dev_set_mac_address (tp->netdev, &sa, NULL); - rtnl_unlock(); - } netdev = tp->netdev; if (!netif_running(netdev)) - return 0; + goto exit; set_bit(WORK_ENABLE, &tp->flags); if (netif_carrier_ok(netdev)) { @@ -8460,6 +8459,8 @@ static int rtl8152_post_reset(struct usb_interface *intf) if (!list_empty(&tp->rx_done)) napi_schedule(&tp->napi); +exit: + rtnl_unlock(); return 0; } -- cgit From 32a574c7e2685aa8138754d4d755f9246cc6bd48 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 29 Nov 2023 13:25:21 -0800 Subject: r8152: Add RTL8152_INACCESSIBLE checks to more loops Previous commits added checks for RTL8152_INACCESSIBLE in the loops in the driver. There are still a few more that keep tripping the driver up in error cases and make things take longer than they should. Add those in. All the loops that are part of this commit existed in some form or another since the r8152 driver was first introduced, though RTL8152_INACCESSIBLE was known as RTL8152_UNPLUG before commit 715f67f33af4 ("r8152: Rename RTL8152_UNPLUG to RTL8152_INACCESSIBLE") Fixes: ac718b69301c ("net/usb: new driver for RTL8152") Reviewed-by: Grant Grundler Signed-off-by: Douglas Anderson Acked-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d6edf0254599..e9955701f455 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3000,6 +3000,8 @@ static void rtl8152_nic_reset(struct r8152 *tp) ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, CR_RST); for (i = 0; i < 1000; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; if (!(ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CR) & CR_RST)) break; usleep_range(100, 400); @@ -3329,6 +3331,8 @@ static void rtl_disable(struct r8152 *tp) rxdy_gated_en(tp, true); for (i = 0; i < 1000; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); if ((ocp_data & FIFO_EMPTY) == FIFO_EMPTY) break; @@ -3336,6 +3340,8 @@ static void rtl_disable(struct r8152 *tp) } for (i = 0; i < 1000; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0) & TCR0_TX_EMPTY) break; usleep_range(1000, 2000); @@ -5499,6 +5505,8 @@ static void wait_oob_link_list_ready(struct r8152 *tp) int i; for (i = 0; i < 1000; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); if (ocp_data & LINK_LIST_READY) break; -- cgit From 8a67b47fced9f6a84101eb9ec5ce4c7d64204bc7 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 29 Nov 2023 13:25:22 -0800 Subject: r8152: Add RTL8152_INACCESSIBLE to r8156b_wait_loading_flash() Delay loops in r8152 should break out if RTL8152_INACCESSIBLE is set so that they don't delay too long if the device becomes inaccessible. Add the break to the loop in r8156b_wait_loading_flash(). Fixes: 195aae321c82 ("r8152: support new chips") Reviewed-by: Grant Grundler Signed-off-by: Douglas Anderson Acked-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index e9955701f455..c4dd81e2421f 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5521,6 +5521,8 @@ static void r8156b_wait_loading_flash(struct r8152 *tp) int i; for (i = 0; i < 100; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; if (ocp_read_word(tp, MCU_TYPE_USB, USB_GPHY_CTRL) & GPHY_PATCH_DONE) break; usleep_range(1000, 2000); -- cgit From 8c53a7bd706535a9cf4e2ec3a4e8d61d46353ca0 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 29 Nov 2023 13:25:23 -0800 Subject: r8152: Add RTL8152_INACCESSIBLE to r8153_pre_firmware_1() Delay loops in r8152 should break out if RTL8152_INACCESSIBLE is set so that they don't delay too long if the device becomes inaccessible. Add the break to the loop in r8153_pre_firmware_1(). Fixes: 9370f2d05a2a ("r8152: support request_firmware for RTL8153") Reviewed-by: Grant Grundler Signed-off-by: Douglas Anderson Acked-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c4dd81e2421f..3958eb622d47 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5645,6 +5645,8 @@ static int r8153_pre_firmware_1(struct r8152 *tp) for (i = 0; i < 104; i++) { u32 ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_WDT1_CTRL); + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + return -ENODEV; if (!(ocp_data & WTD1_EN)) break; usleep_range(1000, 2000); -- cgit From 79321a793945fdbff2f405f84712d0ab81bed287 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 29 Nov 2023 13:25:24 -0800 Subject: r8152: Add RTL8152_INACCESSIBLE to r8153_aldps_en() Delay loops in r8152 should break out if RTL8152_INACCESSIBLE is set so that they don't delay too long if the device becomes inaccessible. Add the break to the loop in r8153_aldps_en(). Fixes: 4214cc550bf9 ("r8152: check if disabling ALDPS is finished") Reviewed-by: Grant Grundler Signed-off-by: Douglas Anderson Acked-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 3958eb622d47..fcdc9ba0f826 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5803,6 +5803,8 @@ static void r8153_aldps_en(struct r8152 *tp, bool enable) data &= ~EN_ALDPS; ocp_reg_write(tp, OCP_POWER_CFG, data); for (i = 0; i < 20; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + return; usleep_range(1000, 2000); if (ocp_read_word(tp, MCU_TYPE_PLA, 0xe000) & 0x0100) break; -- cgit From 8e3c98d9187e09274fc000a7d1a77b070a42d259 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 30 Nov 2023 20:43:42 +0000 Subject: firmware: arm_scmi: Fix frequency truncation by promoting multiplier type Fix the possible frequency truncation for all values equal to or greater 4GHz on 64bit machines by updating the multiplier 'mult_factor' to 'unsigned long' type. It is also possible that the multiplier itself can be greater than or equal to 2^32. So we need to also fix the equation computing the value of the multiplier. Fixes: a9e3fbfaa0ff ("firmware: arm_scmi: add initial support for performance protocol") Reported-by: Sibi Sankar Closes: https://lore.kernel.org/all/20231129065748.19871-3-quic_sibis@quicinc.com/ Cc: Cristian Marussi Link: https://lore.kernel.org/r/20231130204343.503076-1-sudeep.holla@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/perf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index c2435be0ae1b..f98791274495 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -152,7 +152,7 @@ struct perf_dom_info { u32 opp_count; u32 sustained_freq_khz; u32 sustained_perf_level; - u32 mult_factor; + unsigned long mult_factor; struct scmi_perf_domain_info info; struct scmi_opp opp[MAX_OPPS]; struct scmi_fc_info *fc_info; @@ -273,8 +273,8 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph, dom_info->mult_factor = 1000; else dom_info->mult_factor = - (dom_info->sustained_freq_khz * 1000) / - dom_info->sustained_perf_level; + (dom_info->sustained_freq_khz * 1000UL) + / dom_info->sustained_perf_level; strscpy(dom_info->info.name, attr->name, SCMI_SHORT_NAME_MAX_SIZE); } -- cgit From 77f5032e94f244ba08db51e17ca8f37bd7ff9acb Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 30 Nov 2023 20:43:43 +0000 Subject: firmware: arm_scmi: Fix possible frequency truncation when using level indexing mode The multiplier is already promoted to unsigned long, however the frequency calculations done when using level indexing mode doesn't use the multiplier computed. It instead hardcodes the multiplier value of 1000 at all the usage sites. Clean that up by assigning the multiplier value of 1000 when using the perf level indexing mode and update the frequency calculations to use the multiplier instead. It should fix the possible frequency truncation for all the values greater than or equal to 4GHz on 64-bit machines. Fixes: 31c7c1397a33 ("firmware: arm_scmi: Add v3.2 perf level indexing mode support") Reported-by: Sibi Sankar Closes: https://lore.kernel.org/all/20231129065748.19871-3-quic_sibis@quicinc.com/ Cc: Cristian Marussi Link: https://lore.kernel.org/r/20231130204343.503076-2-sudeep.holla@arm.com Reviewed-by: Cristian Marussi Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/perf.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index f98791274495..e11555de99ab 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -268,7 +268,8 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph, dom_info->sustained_perf_level = le32_to_cpu(attr->sustained_perf_level); if (!dom_info->sustained_freq_khz || - !dom_info->sustained_perf_level) + !dom_info->sustained_perf_level || + dom_info->level_indexing_mode) /* CPUFreq converts to kHz, hence default 1000 */ dom_info->mult_factor = 1000; else @@ -798,7 +799,7 @@ static int scmi_dvfs_device_opps_add(const struct scmi_protocol_handle *ph, if (!dom->level_indexing_mode) freq = dom->opp[idx].perf * dom->mult_factor; else - freq = dom->opp[idx].indicative_freq * 1000; + freq = dom->opp[idx].indicative_freq * dom->mult_factor; data.level = dom->opp[idx].perf; data.freq = freq; @@ -845,7 +846,8 @@ static int scmi_dvfs_freq_set(const struct scmi_protocol_handle *ph, u32 domain, } else { struct scmi_opp *opp; - opp = LOOKUP_BY_FREQ(dom->opps_by_freq, freq / 1000); + opp = LOOKUP_BY_FREQ(dom->opps_by_freq, + freq / dom->mult_factor); if (!opp) return -EIO; @@ -879,7 +881,7 @@ static int scmi_dvfs_freq_get(const struct scmi_protocol_handle *ph, u32 domain, if (!opp) return -EIO; - *freq = opp->indicative_freq * 1000; + *freq = opp->indicative_freq * dom->mult_factor; } return ret; @@ -902,7 +904,7 @@ static int scmi_dvfs_est_power_get(const struct scmi_protocol_handle *ph, if (!dom->level_indexing_mode) opp_freq = opp->perf * dom->mult_factor; else - opp_freq = opp->indicative_freq * 1000; + opp_freq = opp->indicative_freq * dom->mult_factor; if (opp_freq < *freq) continue; -- cgit From fea88064445a59584460f7f67d102b6e5fc1ca1d Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Sun, 3 Dec 2023 23:22:16 +0100 Subject: regmap: fix bogus error on regcache_sync success Since commit 0ec7731655de ("regmap: Ensure range selector registers are updated after cache sync") opening pcm512x based soundcards fail with EINVAL and dmesg shows sync cache and pm_runtime_get errors: [ 228.794676] pcm512x 1-004c: Failed to sync cache: -22 [ 228.794740] pcm512x 1-004c: ASoC: error at snd_soc_pcm_component_pm_runtime_get on pcm512x.1-004c: -22 This is caused by the cache check result leaking out into the regcache_sync return value. Fix this by making the check local-only, as the comment above the regcache_read call states a non-zero return value means there's nothing to do so the return value should not be altered. Fixes: 0ec7731655de ("regmap: Ensure range selector registers are updated after cache sync") Cc: stable@vger.kernel.org Signed-off-by: Matthias Reichl Link: https://lore.kernel.org/r/20231203222216.96547-1-hias@horus.com Signed-off-by: Mark Brown --- drivers/base/regmap/regcache.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c index 92592f944a3d..ac63a73ccdaa 100644 --- a/drivers/base/regmap/regcache.c +++ b/drivers/base/regmap/regcache.c @@ -410,8 +410,7 @@ out: rb_entry(node, struct regmap_range_node, node); /* If there's nothing in the cache there's nothing to sync */ - ret = regcache_read(map, this->selector_reg, &i); - if (ret != 0) + if (regcache_read(map, this->selector_reg, &i) != 0) continue; ret = _regmap_write(map, this->selector_reg, i); -- cgit From d4eef75279f5e9d594f5785502038c763ce42268 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Thu, 30 Nov 2023 13:35:15 -0500 Subject: mlxbf-bootctl: correctly identify secure boot with development keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The secure boot state of the BlueField SoC is represented by two bits: 0 = production state 1 = secure boot enabled 2 = non-secure (secure boot disabled) 3 = RMA state There is also a single bit to indicate whether production keys or development keys are being used when secure boot is enabled. This single bit (specified by MLXBF_BOOTCTL_SB_DEV_MASK) only has meaning if secure boot state equals 1 (secure boot enabled). The secure boot states are as follows: - “GA secured” is when secure boot is enabled with official production keys. - “Secured (development)” is when secure boot is enabled with development keys. Without this fix “GA Secured” is displayed on development cards which is misleading. This patch updates the logic in "lifecycle_state_show()" to handle the case where the SoC is configured for secure boot and is using development keys. Fixes: 79e29cb8fbc5c ("platform/mellanox: Add bootctl driver for Mellanox BlueField Soc") Reviewed-by: Khalil Blaiech Signed-off-by: David Thompson Link: https://lore.kernel.org/r/20231130183515.17214-1-davthompson@nvidia.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/mellanox/mlxbf-bootctl.c | 39 ++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/drivers/platform/mellanox/mlxbf-bootctl.c b/drivers/platform/mellanox/mlxbf-bootctl.c index 1ac7dab22c63..c1aef3a8fb2d 100644 --- a/drivers/platform/mellanox/mlxbf-bootctl.c +++ b/drivers/platform/mellanox/mlxbf-bootctl.c @@ -20,6 +20,7 @@ #define MLXBF_BOOTCTL_SB_SECURE_MASK 0x03 #define MLXBF_BOOTCTL_SB_TEST_MASK 0x0c +#define MLXBF_BOOTCTL_SB_DEV_MASK BIT(4) #define MLXBF_SB_KEY_NUM 4 @@ -40,11 +41,18 @@ static struct mlxbf_bootctl_name boot_names[] = { { MLXBF_BOOTCTL_NONE, "none" }, }; +enum { + MLXBF_BOOTCTL_SB_LIFECYCLE_PRODUCTION = 0, + MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE = 1, + MLXBF_BOOTCTL_SB_LIFECYCLE_GA_NON_SECURE = 2, + MLXBF_BOOTCTL_SB_LIFECYCLE_RMA = 3 +}; + static const char * const mlxbf_bootctl_lifecycle_states[] = { - [0] = "Production", - [1] = "GA Secured", - [2] = "GA Non-Secured", - [3] = "RMA", + [MLXBF_BOOTCTL_SB_LIFECYCLE_PRODUCTION] = "Production", + [MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE] = "GA Secured", + [MLXBF_BOOTCTL_SB_LIFECYCLE_GA_NON_SECURE] = "GA Non-Secured", + [MLXBF_BOOTCTL_SB_LIFECYCLE_RMA] = "RMA", }; /* Log header format. */ @@ -247,25 +255,30 @@ static ssize_t second_reset_action_store(struct device *dev, static ssize_t lifecycle_state_show(struct device *dev, struct device_attribute *attr, char *buf) { + int status_bits; + int use_dev_key; + int test_state; int lc_state; - lc_state = mlxbf_bootctl_smc(MLXBF_BOOTCTL_GET_TBB_FUSE_STATUS, - MLXBF_BOOTCTL_FUSE_STATUS_LIFECYCLE); - if (lc_state < 0) - return lc_state; + status_bits = mlxbf_bootctl_smc(MLXBF_BOOTCTL_GET_TBB_FUSE_STATUS, + MLXBF_BOOTCTL_FUSE_STATUS_LIFECYCLE); + if (status_bits < 0) + return status_bits; - lc_state &= - MLXBF_BOOTCTL_SB_TEST_MASK | MLXBF_BOOTCTL_SB_SECURE_MASK; + use_dev_key = status_bits & MLXBF_BOOTCTL_SB_DEV_MASK; + test_state = status_bits & MLXBF_BOOTCTL_SB_TEST_MASK; + lc_state = status_bits & MLXBF_BOOTCTL_SB_SECURE_MASK; /* * If the test bits are set, we specify that the current state may be * due to using the test bits. */ - if (lc_state & MLXBF_BOOTCTL_SB_TEST_MASK) { - lc_state &= MLXBF_BOOTCTL_SB_SECURE_MASK; - + if (test_state) { return sprintf(buf, "%s(test)\n", mlxbf_bootctl_lifecycle_states[lc_state]); + } else if (use_dev_key && + (lc_state == MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE)) { + return sprintf(buf, "Secured (development)\n"); } return sprintf(buf, "%s\n", mlxbf_bootctl_lifecycle_states[lc_state]); -- cgit From b5338b1b901e41bd7cead66a0b3a796e9fa95684 Mon Sep 17 00:00:00 2001 From: Marian Postevca Date: Sun, 3 Dec 2023 00:29:51 +0200 Subject: ASoC: amd: acp: Add support for a new Huawei Matebook laptop This commit adds support for Huawei MateBook D16 2021 with Ryzen 4600H in driver acp3x-es83xx. Signed-off-by: Marian Postevca Link: https://lore.kernel.org/r/20231202223001.8025-1-posteuca@mutex.one Signed-off-by: Mark Brown --- sound/soc/amd/acp-config.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/soc/amd/acp-config.c b/sound/soc/amd/acp-config.c index 20cee7104c2b..3bc4b2e41650 100644 --- a/sound/soc/amd/acp-config.c +++ b/sound/soc/amd/acp-config.c @@ -103,6 +103,20 @@ static const struct config_entry config_table[] = { {} }, }, + { + .flags = FLAG_AMD_LEGACY, + .device = ACP_PCI_DEV_ID, + .dmi_table = (const struct dmi_system_id []) { + { + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "HUAWEI"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HVY-WXX9"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "M1010"), + }, + }, + {} + }, + }, { .flags = FLAG_AMD_LEGACY, .device = ACP_PCI_DEV_ID, -- cgit From 5f44de697383fcc9a9a1a78f99e09d1838704b90 Mon Sep 17 00:00:00 2001 From: David Rau Date: Fri, 1 Dec 2023 12:29:33 +0800 Subject: ASoC: da7219: Support low DC impedance headset Change the default MIC detection impedance threshold to 200ohm to support low mic DC impedance headset. Signed-off-by: David Rau Link: https://lore.kernel.org/r/20231201042933.26392-1-David.Rau.opensource@dm.renesas.com Signed-off-by: Mark Brown --- sound/soc/codecs/da7219-aad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/da7219-aad.c b/sound/soc/codecs/da7219-aad.c index 4c4405942779..6bc068cdcbe2 100644 --- a/sound/soc/codecs/da7219-aad.c +++ b/sound/soc/codecs/da7219-aad.c @@ -696,7 +696,7 @@ static struct da7219_aad_pdata *da7219_aad_fw_to_pdata(struct device *dev) aad_pdata->mic_det_thr = da7219_aad_fw_mic_det_thr(dev, fw_val32); else - aad_pdata->mic_det_thr = DA7219_AAD_MIC_DET_THR_500_OHMS; + aad_pdata->mic_det_thr = DA7219_AAD_MIC_DET_THR_200_OHMS; if (fwnode_property_read_u32(aad_np, "dlg,jack-ins-deb", &fw_val32) >= 0) aad_pdata->jack_ins_deb = -- cgit From 29046a78a3c0a1f8fa0427f164caa222f003cf5b Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 4 Dec 2023 15:41:56 +0800 Subject: ASoC: wm_adsp: fix memleak in wm_adsp_buffer_populate When wm_adsp_buffer_read() fails, we should free buf->regions. Otherwise, the callers of wm_adsp_buffer_populate() will directly free buf on failure, which makes buf->regions a leaked memory. Fixes: a792af69b08f ("ASoC: wm_adsp: Refactor compress stream initialisation") Signed-off-by: Dinghao Liu Reviewed-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20231204074158.12026-1-dinghao.liu@zju.edu.cn Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 236b12b69ae5..c01e31175015 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1451,12 +1451,12 @@ static int wm_adsp_buffer_populate(struct wm_adsp_compr_buf *buf) ret = wm_adsp_buffer_read(buf, caps->region_defs[i].base_offset, ®ion->base_addr); if (ret < 0) - return ret; + goto err; ret = wm_adsp_buffer_read(buf, caps->region_defs[i].size_offset, &offset); if (ret < 0) - return ret; + goto err; region->cumulative_size = offset; @@ -1467,6 +1467,10 @@ static int wm_adsp_buffer_populate(struct wm_adsp_compr_buf *buf) } return 0; + +err: + kfree(buf->regions); + return ret; } static void wm_adsp_buffer_clear(struct wm_adsp_compr_buf *buf) -- cgit From 2c7c857f5fed997be93047d2de853d7f10c8defe Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 1 Dec 2023 13:54:47 +0800 Subject: platform/mellanox: Add null pointer checks for devm_kasprintf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Compile-tested only. Fixes: 1a218d312e65 ("platform/mellanox: mlxbf-pmc: Add Mellanox BlueField PMC driver") Suggested-by: Ilpo Järvinen Suggested-by: Vadim Pasternak Signed-off-by: Kunwu Chan Reviewed-by: Vadim Pasternak Link: https://lore.kernel.org/r/20231201055447.2356001-1-chentao@kylinos.cn [ij: split the change into two] Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/mellanox/mlxbf-pmc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index 0b427fc24a96..d0d1cfc55c5f 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -1771,6 +1771,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num) attr->dev_attr.show = mlxbf_pmc_event_list_show; attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, "event_list"); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[i] = &attr->dev_attr.attr; attr = NULL; @@ -1784,6 +1786,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num) attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, "enable"); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[++i] = &attr->dev_attr.attr; attr = NULL; } @@ -1810,6 +1814,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num) attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, "counter%d", j); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[++i] = &attr->dev_attr.attr; attr = NULL; @@ -1821,6 +1827,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num) attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, "event%d", j); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[++i] = &attr->dev_attr.attr; attr = NULL; } @@ -1853,6 +1861,8 @@ static int mlxbf_pmc_init_perftype_reg(struct device *dev, int blk_num) attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, events[j].evt_name); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[i] = &attr->dev_attr.attr; attr = NULL; i++; @@ -1882,6 +1892,8 @@ static int mlxbf_pmc_create_groups(struct device *dev, int blk_num) pmc->block[blk_num].block_attr_grp.attrs = pmc->block[blk_num].block_attr; pmc->block[blk_num].block_attr_grp.name = devm_kasprintf( dev, GFP_KERNEL, pmc->block_name[blk_num]); + if (!pmc->block[blk_num].block_attr_grp.name) + return -ENOMEM; pmc->groups[pmc->group_num] = &pmc->block[blk_num].block_attr_grp; pmc->group_num++; -- cgit From 3494a594315b56516988afb6854d75dee5b501db Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 1 Dec 2023 13:54:47 +0800 Subject: platform/mellanox: Check devm_hwmon_device_register_with_groups() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit devm_hwmon_device_register_with_groups() returns an error pointer upon failure. Check its return value for errors. Compile-tested only. Fixes: 1a218d312e65 ("platform/mellanox: mlxbf-pmc: Add Mellanox BlueField PMC driver") Suggested-by: Ilpo Järvinen Suggested-by: Vadim Pasternak Signed-off-by: Kunwu Chan Reviewed-by: Vadim Pasternak Link: https://lore.kernel.org/r/20231201055447.2356001-1-chentao@kylinos.cn [ij: split the change into two] Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/mellanox/mlxbf-pmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index d0d1cfc55c5f..1dd84c7a79de 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -2075,6 +2075,8 @@ static int mlxbf_pmc_probe(struct platform_device *pdev) pmc->hwmon_dev = devm_hwmon_device_register_with_groups( dev, "bfperf", pmc, pmc->groups); + if (IS_ERR(pmc->hwmon_dev)) + return PTR_ERR(pmc->hwmon_dev); platform_set_drvdata(pdev, pmc); return 0; -- cgit From 73ea73affe8622bdf292de898da869d441da6a9d Mon Sep 17 00:00:00 2001 From: Roy Luo Date: Tue, 28 Nov 2023 22:17:56 +0000 Subject: USB: gadget: core: adjust uevent timing on gadget unbind The KOBJ_CHANGE uevent is sent before gadget unbind is actually executed, resulting in inaccurate uevent emitted at incorrect timing (the uevent would have USB_UDC_DRIVER variable set while it would soon be removed). Move the KOBJ_CHANGE uevent to the end of the unbind function so that uevent is sent only after the change has been made. Fixes: 2ccea03a8f7e ("usb: gadget: introduce UDC Class") Cc: stable@vger.kernel.org Signed-off-by: Roy Luo Link: https://lore.kernel.org/r/20231128221756.2591158-1-royluo@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index ded9531f141b..d59f94464b87 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1646,8 +1646,6 @@ static void gadget_unbind_driver(struct device *dev) dev_dbg(&udc->dev, "unbinding gadget driver [%s]\n", driver->function); - kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE); - udc->allow_connect = false; cancel_work_sync(&udc->vbus_work); mutex_lock(&udc->connect_lock); @@ -1667,6 +1665,8 @@ static void gadget_unbind_driver(struct device *dev) driver->is_bound = false; udc->driver = NULL; mutex_unlock(&udc_lock); + + kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE); } /* ------------------------------------------------------------------------- */ -- cgit From b17b7fe6dd5c6ff74b38b0758ca799cdbb79e26e Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Wed, 29 Nov 2023 19:23:50 +0000 Subject: usb: typec: class: fix typec_altmode_put_partner to put plugs When typec_altmode_put_partner is called by a plug altmode upon release, the port altmode the plug belongs to will not remove its reference to the plug. The check to see if the altmode being released evaluates against the released altmode's partner instead of the calling altmode itself, so change adev in typec_altmode_put_partner to properly refer to the altmode being released. typec_altmode_set_partner is not run for port altmodes, so also add a check in typec_altmode_release to prevent typec_altmode_put_partner() calls on port altmode release. Fixes: 8a37d87d72f0 ("usb: typec: Bus type for alternate modes") Cc: stable@vger.kernel.org Signed-off-by: RD Babiera Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231129192349.1773623-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 2e0451bd336e..16a670828dde 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -267,7 +267,7 @@ static void typec_altmode_put_partner(struct altmode *altmode) if (!partner) return; - adev = &partner->adev; + adev = &altmode->adev; if (is_typec_plug(adev->dev.parent)) { struct typec_plug *plug = to_typec_plug(adev->dev.parent); @@ -497,7 +497,8 @@ static void typec_altmode_release(struct device *dev) { struct altmode *alt = to_altmode(to_typec_altmode(dev)); - typec_altmode_put_partner(alt); + if (!is_typec_port(dev->parent)) + typec_altmode_put_partner(alt); altmode_id_remove(alt->adev.dev.parent, alt->id); kfree(alt); -- cgit From bbb8e71965c3737bdc691afd803a34bfd61cfbeb Mon Sep 17 00:00:00 2001 From: Sarah Grant Date: Fri, 1 Dec 2023 18:16:54 +0000 Subject: ALSA: usb-audio: Add Pioneer DJM-450 mixer controls These values mirror those of the Pioneer DJM-250MK2 as the channel layout appears identical based on my observations. This duplication could be removed in later contributions if desired. Signed-off-by: Sarah Grant Cc: Link: https://lore.kernel.org/r/20231201181654.5058-1-s@srd.tw Signed-off-by: Takashi Iwai --- sound/usb/mixer_quirks.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 898bc3baca7b..c8d48566e175 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -2978,6 +2978,7 @@ static int snd_bbfpro_controls_create(struct usb_mixer_interface *mixer) #define SND_DJM_850_IDX 0x2 #define SND_DJM_900NXS2_IDX 0x3 #define SND_DJM_750MK2_IDX 0x4 +#define SND_DJM_450_IDX 0x5 #define SND_DJM_CTL(_name, suffix, _default_value, _windex) { \ @@ -3108,6 +3109,31 @@ static const struct snd_djm_ctl snd_djm_ctls_250mk2[] = { }; +// DJM-450 +static const u16 snd_djm_opts_450_cap1[] = { + 0x0103, 0x0100, 0x0106, 0x0107, 0x0108, 0x0109, 0x010d, 0x010a }; + +static const u16 snd_djm_opts_450_cap2[] = { + 0x0203, 0x0200, 0x0206, 0x0207, 0x0208, 0x0209, 0x020d, 0x020a }; + +static const u16 snd_djm_opts_450_cap3[] = { + 0x030a, 0x0311, 0x0312, 0x0307, 0x0308, 0x0309, 0x030d }; + +static const u16 snd_djm_opts_450_pb1[] = { 0x0100, 0x0101, 0x0104 }; +static const u16 snd_djm_opts_450_pb2[] = { 0x0200, 0x0201, 0x0204 }; +static const u16 snd_djm_opts_450_pb3[] = { 0x0300, 0x0301, 0x0304 }; + +static const struct snd_djm_ctl snd_djm_ctls_450[] = { + SND_DJM_CTL("Capture Level", cap_level, 0, SND_DJM_WINDEX_CAPLVL), + SND_DJM_CTL("Ch1 Input", 450_cap1, 2, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch2 Input", 450_cap2, 2, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch3 Input", 450_cap3, 0, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch1 Output", 450_pb1, 0, SND_DJM_WINDEX_PB), + SND_DJM_CTL("Ch2 Output", 450_pb2, 1, SND_DJM_WINDEX_PB), + SND_DJM_CTL("Ch3 Output", 450_pb3, 2, SND_DJM_WINDEX_PB) +}; + + // DJM-750 static const u16 snd_djm_opts_750_cap1[] = { 0x0101, 0x0103, 0x0106, 0x0107, 0x0108, 0x0109, 0x010a, 0x010f }; @@ -3203,6 +3229,7 @@ static const struct snd_djm_device snd_djm_devices[] = { [SND_DJM_850_IDX] = SND_DJM_DEVICE(850), [SND_DJM_900NXS2_IDX] = SND_DJM_DEVICE(900nxs2), [SND_DJM_750MK2_IDX] = SND_DJM_DEVICE(750mk2), + [SND_DJM_450_IDX] = SND_DJM_DEVICE(450), }; @@ -3454,6 +3481,9 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) case USB_ID(0x2b73, 0x0017): /* Pioneer DJ DJM-250MK2 */ err = snd_djm_controls_create(mixer, SND_DJM_250MK2_IDX); break; + case USB_ID(0x2b73, 0x0013): /* Pioneer DJ DJM-450 */ + err = snd_djm_controls_create(mixer, SND_DJM_450_IDX); + break; case USB_ID(0x08e4, 0x017f): /* Pioneer DJ DJM-750 */ err = snd_djm_controls_create(mixer, SND_DJM_750_IDX); break; -- cgit From cd14dedf15be432066e63783c63d650f2800cd48 Mon Sep 17 00:00:00 2001 From: Aleksandrs Vinarskis Date: Mon, 4 Dec 2023 00:30:06 +0100 Subject: ALSA: hda/realtek: fix speakers on XPS 9530 (2023) XPS 9530 has 2 tweeters and 2 subwoofers powered by CS35L41 amplifier, SPI connected. For subwoofers to work, it requires both to enable amplifier support, and to enable output to subwoofers via 0x17 quirk (similalry to XPS 9510/9520). Signed-off-by: Aleksandrs Vinarskis Cc: Link: https://lore.kernel.org/r/20231203233006.100558-1-alex.vinarskis@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ddd74f5d3b30..1c85e6dcef6c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9705,6 +9705,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0b37, "Dell Inspiron 16 Plus 7620 2-in-1", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), + SND_PCI_QUIRK(0x1028, 0x0beb, "Dell XPS 15 9530 (2023)", ALC289_FIXUP_DELL_CS35L41_SPI_2), SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), -- cgit From 6f7e4664e597440dfbdb8b2931c561b717030d07 Mon Sep 17 00:00:00 2001 From: Bin Li Date: Mon, 4 Dec 2023 18:04:50 +0800 Subject: ALSA: hda/realtek: Enable headset on Lenovo M90 Gen5 Lenovo M90 Gen5 is equipped with ALC897, and it needs ALC897_FIXUP_HEADSET_MIC_PIN quirk to make its headset mic work. Signed-off-by: Bin Li Cc: Link: https://lore.kernel.org/r/20231204100450.642783-1-bin.li@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1c85e6dcef6c..d799d0ad7623 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -12198,6 +12198,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x32f7, "Lenovo ThinkCentre M90", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x3321, "Lenovo ThinkCentre M70 Gen4", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x331b, "Lenovo ThinkCentre M90 Gen4", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x3364, "Lenovo ThinkCentre M90 Gen5", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x3742, "Lenovo TianYi510Pro-14IOB", ALC897_FIXUP_HEADSET_MIC_PIN2), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD), -- cgit From a9f68ffe1170ca4bc17ab29067d806a354a026e0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 2 Dec 2023 21:24:30 -0600 Subject: HID: i2c-hid: Add IDEA5002 to i2c_hid_acpi_blacklist[] Users have reported problems with recent Lenovo laptops that contain an IDEA5002 I2C HID device. Reports include fans turning on and running even at idle and spurious wakeups from suspend. Presumably in the Windows ecosystem there is an application that uses the HID device. Maybe that puts it into a lower power state so it doesn't cause spurious events. This device doesn't serve any functional purpose in Linux as nothing interacts with it so blacklist it from being probed. This will prevent the GPIO driver from setting up the GPIO and the spurious interrupts and wake events will not occur. Cc: stable@vger.kernel.org # 6.1 Reported-and-tested-by: Marcus Aram Reported-and-tested-by: Mark Herbert Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2812 Signed-off-by: Mario Limonciello Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid-acpi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-acpi.c b/drivers/hid/i2c-hid/i2c-hid-acpi.c index ac918a9ea8d3..1b49243adb16 100644 --- a/drivers/hid/i2c-hid/i2c-hid-acpi.c +++ b/drivers/hid/i2c-hid/i2c-hid-acpi.c @@ -40,6 +40,11 @@ static const struct acpi_device_id i2c_hid_acpi_blacklist[] = { * ICN8505 controller, has a _CID of PNP0C50 but is not HID compatible. */ { "CHPN0001" }, + /* + * The IDEA5002 ACPI device causes high interrupt usage and spurious + * wakeups from suspend. + */ + { "IDEA5002" }, { } }; -- cgit From 31e52523267faab5ed8569b9d5c22c9a2283872f Mon Sep 17 00:00:00 2001 From: Sebastian Parschauer Date: Mon, 27 Nov 2023 23:49:37 +0100 Subject: HID: Add quirk for Labtec/ODDOR/aikeec handbrake This device needs ALWAYS_POLL quirk, otherwise it keeps reconnecting indefinitely. It is a handbrake for sim racing detected as joystick. Reported and tested by GitHub user N0th1ngM4tt3rs. Link: https://github.com/sriemer/fix-linux-mouse issue 22 Signed-off-by: Sebastian Parschauer Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index c6e4e0d1f214..72046039d1be 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -740,6 +740,7 @@ #define USB_VENDOR_ID_LABTEC 0x1020 #define USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD 0x0006 +#define USB_DEVICE_ID_LABTEC_ODDOR_HANDBRAKE 0x8888 #define USB_VENDOR_ID_LAVIEW 0x22D4 #define USB_DEVICE_ID_GLORIOUS_MODEL_I 0x1503 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index ea472923fab0..e0bbf0c6345d 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -121,6 +121,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406XE), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_T609A), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_ODDOR_HANDBRAKE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019), HID_QUIRK_ALWAYS_POLL }, -- cgit From fb9ad24485087e0f00d84bee7a5914640b2b9024 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 4 Dec 2023 12:47:35 +0000 Subject: ASoC: ops: add correct range check for limiting volume Volume can have ranges that start with negative values, ex: -84dB to +40dB. Apply correct range check in snd_soc_limit_volume before setting the platform_max. Without this patch, for example setting a 0dB limit on a volume range of -84dB to +40dB would fail. Signed-off-by: Srinivas Kandagatla Tested-by: Johan Hovold Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20231204124736.132185-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/soc-ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 55b009d3c681..2d25748ca706 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -661,7 +661,7 @@ int snd_soc_limit_volume(struct snd_soc_card *card, kctl = snd_soc_card_get_kcontrol(card, name); if (kctl) { struct soc_mixer_control *mc = (struct soc_mixer_control *)kctl->private_value; - if (max <= mc->max) { + if (max <= mc->max - mc->min) { mc->platform_max = max; ret = 0; } -- cgit From 716d4e5373e9d1ae993485ab2e3b893bf7104fb1 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 4 Dec 2023 12:47:36 +0000 Subject: ASoC: qcom: sc8280xp: Limit speaker digital volumes Limit the speaker digital gains to 0dB so that the users will not damage them. Currently there is a limit in UCM, but this does not stop the user form changing the digital gains from command line. So limit this in driver which makes the speakers more safer without active speaker protection in place. Signed-off-by: Srinivas Kandagatla Reviewed-by: Johan Hovold Tested-by: Johan Hovold Link: https://lore.kernel.org/r/20231204124736.132185-3-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/sc8280xp.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c index d93b18f07be5..39cb0b889aff 100644 --- a/sound/soc/qcom/sc8280xp.c +++ b/sound/soc/qcom/sc8280xp.c @@ -27,6 +27,23 @@ struct sc8280xp_snd_data { static int sc8280xp_snd_init(struct snd_soc_pcm_runtime *rtd) { struct sc8280xp_snd_data *data = snd_soc_card_get_drvdata(rtd->card); + struct snd_soc_dai *cpu_dai = snd_soc_rtd_to_cpu(rtd, 0); + struct snd_soc_card *card = rtd->card; + + switch (cpu_dai->id) { + case WSA_CODEC_DMA_RX_0: + case WSA_CODEC_DMA_RX_1: + /* + * set limit of 0dB on Digital Volume for Speakers, + * this can prevent damage of speakers to some extent without + * active speaker protection + */ + snd_soc_limit_volume(card, "WSA_RX0 Digital Volume", 84); + snd_soc_limit_volume(card, "WSA_RX1 Digital Volume", 84); + break; + default: + break; + } return qcom_snd_wcd_jack_setup(rtd, &data->jack, &data->jack_setup); } -- cgit From 5c687c287c46fadb14644091823298875a5216aa Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 30 Oct 2023 08:13:09 -0700 Subject: nvme: introduce helper function to get ctrl state The controller state is typically written by another CPU, so reading it should ensure no optimizations are taken. This is a repeated pattern in the driver, so start with adding a convenience function that returns the controller state with READ_ONCE(). Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 39a90b7cb125..578e6d311bc9 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -387,6 +387,11 @@ struct nvme_ctrl { enum nvme_dctype dctype; }; +static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) +{ + return READ_ONCE(ctrl->state); +} + enum nvme_iopolicy { NVME_IOPOLICY_NUMA, NVME_IOPOLICY_RR, -- cgit From e6e7f7ac03e40795346f1b2994a05f507ad8d345 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 27 Oct 2023 10:58:12 -0700 Subject: nvme: ensure reset state check ordering A different CPU may be setting the ctrl->state value, so ensure proper barriers to prevent optimizing to a stale state. Normally it isn't a problem to observe the wrong state as it is merely advisory to take a quicker path during initialization and error recovery, but seeing an old state can report unexpected ENETRESET errors when a reset request was in fact successful. Reported-by: Minh Hoang Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch Signed-off-by: Hannes Reinecke --- drivers/nvme/host/core.c | 42 ++++++++++++++++++++++-------------------- drivers/nvme/host/fc.c | 6 +++--- drivers/nvme/host/pci.c | 14 +++++++------- drivers/nvme/host/rdma.c | 23 ++++++++++++++--------- drivers/nvme/host/tcp.c | 27 +++++++++++++++++---------- 5 files changed, 63 insertions(+), 49 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1be1ce522896..d699f0c8b13e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -131,7 +131,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl) /* * Only new queue scan work when admin and IO queues are both alive */ - if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset) + if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE && ctrl->tagset) queue_work(nvme_wq, &ctrl->scan_work); } @@ -143,7 +143,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl) */ int nvme_try_sched_reset(struct nvme_ctrl *ctrl) { - if (ctrl->state != NVME_CTRL_RESETTING) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_RESETTING) return -EBUSY; if (!queue_work(nvme_reset_wq, &ctrl->reset_work)) return -EBUSY; @@ -156,7 +156,7 @@ static void nvme_failfast_work(struct work_struct *work) struct nvme_ctrl *ctrl = container_of(to_delayed_work(work), struct nvme_ctrl, failfast_work); - if (ctrl->state != NVME_CTRL_CONNECTING) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_CONNECTING) return; set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags); @@ -200,7 +200,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) ret = nvme_reset_ctrl(ctrl); if (!ret) { flush_work(&ctrl->reset_work); - if (ctrl->state != NVME_CTRL_LIVE) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) ret = -ENETRESET; } @@ -499,7 +499,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, spin_lock_irqsave(&ctrl->lock, flags); - old_state = ctrl->state; + old_state = nvme_ctrl_state(ctrl); switch (new_state) { case NVME_CTRL_LIVE: switch (old_state) { @@ -567,7 +567,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, } if (changed) { - ctrl->state = new_state; + WRITE_ONCE(ctrl->state, new_state); wake_up_all(&ctrl->state_wq); } @@ -575,11 +575,11 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, if (!changed) return false; - if (ctrl->state == NVME_CTRL_LIVE) { + if (new_state == NVME_CTRL_LIVE) { if (old_state == NVME_CTRL_CONNECTING) nvme_stop_failfast_work(ctrl); nvme_kick_requeue_lists(ctrl); - } else if (ctrl->state == NVME_CTRL_CONNECTING && + } else if (new_state == NVME_CTRL_CONNECTING && old_state == NVME_CTRL_RESETTING) { nvme_start_failfast_work(ctrl); } @@ -592,7 +592,7 @@ EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); */ static bool nvme_state_terminal(struct nvme_ctrl *ctrl) { - switch (ctrl->state) { + switch (nvme_ctrl_state(ctrl)) { case NVME_CTRL_NEW: case NVME_CTRL_LIVE: case NVME_CTRL_RESETTING: @@ -617,7 +617,7 @@ bool nvme_wait_reset(struct nvme_ctrl *ctrl) wait_event(ctrl->state_wq, nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) || nvme_state_terminal(ctrl)); - return ctrl->state == NVME_CTRL_RESETTING; + return nvme_ctrl_state(ctrl) == NVME_CTRL_RESETTING; } EXPORT_SYMBOL_GPL(nvme_wait_reset); @@ -704,9 +704,11 @@ EXPORT_SYMBOL_GPL(nvme_init_request); blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, struct request *rq) { - if (ctrl->state != NVME_CTRL_DELETING_NOIO && - ctrl->state != NVME_CTRL_DELETING && - ctrl->state != NVME_CTRL_DEAD && + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + + if (state != NVME_CTRL_DELETING_NOIO && + state != NVME_CTRL_DELETING && + state != NVME_CTRL_DEAD && !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) && !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE; @@ -736,7 +738,7 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, * command, which is require to set the queue live in the * appropinquate states. */ - switch (ctrl->state) { + switch (nvme_ctrl_state(ctrl)) { case NVME_CTRL_CONNECTING: if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) && (req->cmd->fabrics.fctype == nvme_fabrics_type_connect || @@ -2550,7 +2552,7 @@ static void nvme_set_latency_tolerance(struct device *dev, s32 val) if (ctrl->ps_max_latency_us != latency) { ctrl->ps_max_latency_us = latency; - if (ctrl->state == NVME_CTRL_LIVE) + if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE) nvme_configure_apst(ctrl); } } @@ -3238,7 +3240,7 @@ static int nvme_dev_open(struct inode *inode, struct file *file) struct nvme_ctrl *ctrl = container_of(inode->i_cdev, struct nvme_ctrl, cdev); - switch (ctrl->state) { + switch (nvme_ctrl_state(ctrl)) { case NVME_CTRL_LIVE: break; default: @@ -3924,7 +3926,7 @@ static void nvme_scan_work(struct work_struct *work) int ret; /* No tagset on a live ctrl means IO queues could not created */ - if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE || !ctrl->tagset) return; /* @@ -3994,7 +3996,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) * removing the namespaces' disks; fail all the queues now to avoid * potentially having to clean up the failed sync later. */ - if (ctrl->state == NVME_CTRL_DEAD) + if (nvme_ctrl_state(ctrl) == NVME_CTRL_DEAD) nvme_mark_namespaces_dead(ctrl); /* this is a no-op when called from the controller reset handler */ @@ -4076,7 +4078,7 @@ static void nvme_async_event_work(struct work_struct *work) * flushing ctrl async_event_work after changing the controller state * from LIVE and before freeing the admin queue. */ - if (ctrl->state == NVME_CTRL_LIVE) + if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE) ctrl->ops->submit_async_event(ctrl); } @@ -4471,7 +4473,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, { int ret; - ctrl->state = NVME_CTRL_NEW; + WRITE_ONCE(ctrl->state, NVME_CTRL_NEW); clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags); spin_lock_init(&ctrl->lock); mutex_init(&ctrl->scan_lock); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 9f9a3b35dc64..fb22976a36a8 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -557,7 +557,7 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport) static void nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl) { - switch (ctrl->ctrl.state) { + switch (nvme_ctrl_state(&ctrl->ctrl)) { case NVME_CTRL_NEW: case NVME_CTRL_CONNECTING: /* @@ -793,7 +793,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) "NVME-FC{%d}: controller connectivity lost. Awaiting " "Reconnect", ctrl->cnum); - switch (ctrl->ctrl.state) { + switch (nvme_ctrl_state(&ctrl->ctrl)) { case NVME_CTRL_NEW: case NVME_CTRL_LIVE: /* @@ -3319,7 +3319,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ; bool recon = true; - if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) + if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_CONNECTING) return; if (portptr->port_state == FC_OBJSTATE_ONLINE) { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 507bc149046d..fad4cccce745 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1233,7 +1233,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO); /* If there is a reset/reinit ongoing, we shouldn't reset again. */ - switch (dev->ctrl.state) { + switch (nvme_ctrl_state(&dev->ctrl)) { case NVME_CTRL_RESETTING: case NVME_CTRL_CONNECTING: return false; @@ -1321,7 +1321,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) * cancellation error. All outstanding requests are completed on * shutdown, so we return BLK_EH_DONE. */ - switch (dev->ctrl.state) { + switch (nvme_ctrl_state(&dev->ctrl)) { case NVME_CTRL_CONNECTING: nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); fallthrough; @@ -1593,7 +1593,7 @@ static int nvme_setup_io_queues_trylock(struct nvme_dev *dev) /* * Controller is in wrong state, fail early. */ - if (dev->ctrl.state != NVME_CTRL_CONNECTING) { + if (nvme_ctrl_state(&dev->ctrl) != NVME_CTRL_CONNECTING) { mutex_unlock(&dev->shutdown_lock); return -ENODEV; } @@ -2573,13 +2573,13 @@ static bool nvme_pci_ctrl_is_dead(struct nvme_dev *dev) static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { + enum nvme_ctrl_state state = nvme_ctrl_state(&dev->ctrl); struct pci_dev *pdev = to_pci_dev(dev->dev); bool dead; mutex_lock(&dev->shutdown_lock); dead = nvme_pci_ctrl_is_dead(dev); - if (dev->ctrl.state == NVME_CTRL_LIVE || - dev->ctrl.state == NVME_CTRL_RESETTING) { + if (state == NVME_CTRL_LIVE || state == NVME_CTRL_RESETTING) { if (pci_is_enabled(pdev)) nvme_start_freeze(&dev->ctrl); /* @@ -2690,7 +2690,7 @@ static void nvme_reset_work(struct work_struct *work) bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result; - if (dev->ctrl.state != NVME_CTRL_RESETTING) { + if (nvme_ctrl_state(&dev->ctrl) != NVME_CTRL_RESETTING) { dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n", dev->ctrl.state); result = -ENODEV; @@ -3192,7 +3192,7 @@ static int nvme_suspend(struct device *dev) nvme_wait_freeze(ctrl); nvme_sync_queues(ctrl); - if (ctrl->state != NVME_CTRL_LIVE) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) goto unfreeze; /* diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 6d178d555920..81e2621169e5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -984,10 +984,11 @@ free_ctrl: static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) { + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + /* If we are resetting/deleting then do nothing */ - if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) { - WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || - ctrl->ctrl.state == NVME_CTRL_LIVE); + if (state != NVME_CTRL_CONNECTING) { + WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE); return; } @@ -1059,8 +1060,10 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) * unless we're during creation of a new controller to * avoid races with teardown flow. */ - WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING && - ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO); + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + + WARN_ON_ONCE(state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO); WARN_ON_ONCE(new); ret = -EINVAL; goto destroy_io; @@ -1129,8 +1132,10 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { /* state change failure is ok if we started ctrl delete */ - WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING && - ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO); + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + + WARN_ON_ONCE(state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO); return; } @@ -1162,7 +1167,7 @@ static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc, struct nvme_rdma_queue *queue = wc->qp->qp_context; struct nvme_rdma_ctrl *ctrl = queue->ctrl; - if (ctrl->ctrl.state == NVME_CTRL_LIVE) + if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE) dev_info(ctrl->ctrl.device, "%s for CQE 0x%p failed with status %s (%d)\n", op, wc->wr_cqe, @@ -1945,7 +1950,7 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq) dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n", rq->tag, nvme_rdma_queue_idx(queue)); - if (ctrl->ctrl.state != NVME_CTRL_LIVE) { + if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) { /* * If we are resetting, connecting or deleting we should * complete immediately because we may block controller diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index d79811cfa0ce..08805f027810 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2152,10 +2152,11 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) { + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + /* If we are resetting/deleting then do nothing */ - if (ctrl->state != NVME_CTRL_CONNECTING) { - WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW || - ctrl->state == NVME_CTRL_LIVE); + if (state != NVME_CTRL_CONNECTING) { + WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE); return; } @@ -2215,8 +2216,10 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) * unless we're during creation of a new controller to * avoid races with teardown flow. */ - WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING && - ctrl->state != NVME_CTRL_DELETING_NOIO); + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + + WARN_ON_ONCE(state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO); WARN_ON_ONCE(new); ret = -EINVAL; goto destroy_io; @@ -2280,8 +2283,10 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { /* state change failure is ok if we started ctrl delete */ - WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING && - ctrl->state != NVME_CTRL_DELETING_NOIO); + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + + WARN_ON_ONCE(state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO); return; } @@ -2311,8 +2316,10 @@ static void nvme_reset_ctrl_work(struct work_struct *work) if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { /* state change failure is ok if we started ctrl delete */ - WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING && - ctrl->state != NVME_CTRL_DELETING_NOIO); + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + + WARN_ON_ONCE(state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO); return; } @@ -2430,7 +2437,7 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type, opc, nvme_opcode_str(qid, opc, fctype)); - if (ctrl->state != NVME_CTRL_LIVE) { + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) { /* * If we are resetting, connecting or deleting we should * complete immediately because we may block controller -- cgit From 7be866b1cf0bf1dfa74480fe8097daeceda68622 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 2 May 2023 11:43:41 -0700 Subject: nvme-ioctl: move capable() admin check to the end This can be an expensive call on some kernel configs. Move it to the end after checking the cheaper ways to determine if the command is allowed. Reviewed-by: Jens Axboe Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 529b9954d2b8..4939ed35638f 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -18,15 +18,12 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, { u32 effects; - if (capable(CAP_SYS_ADMIN)) - return true; - /* * Do not allow unprivileged passthrough on partitions, as that allows an * escape from the containment of the partition. */ if (flags & NVME_IOCTL_PARTITION) - return false; + goto admin; /* * Do not allow unprivileged processes to send vendor specific or fabrics @@ -34,7 +31,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, */ if (c->common.opcode >= nvme_cmd_vendor_start || c->common.opcode == nvme_fabrics_command) - return false; + goto admin; /* * Do not allow unprivileged passthrough of admin commands except @@ -53,7 +50,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, return true; } } - return false; + goto admin; } /* @@ -63,7 +60,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, */ effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); if (!(effects & NVME_CMD_EFFECTS_CSUPP)) - return false; + goto admin; /* * Don't allow passthrough for command that have intrusive (or unknown) @@ -72,16 +69,20 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_UUID_SEL | NVME_CMD_EFFECTS_SCOPE_MASK)) - return false; + goto admin; /* * Only allow I/O commands that transfer data to the controller or that * change the logical block contents if the file descriptor is open for * writing. */ - if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) - return open_for_write; + if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && + !open_for_write) + goto admin; + return true; +admin: + return capable(CAP_SYS_ADMIN); } /* -- cgit From 29ac4b2f9263f52ba12fa35832cc6506ce3b4793 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Wed, 29 Nov 2023 13:49:51 +0900 Subject: nvme: improve NVME_HOST_AUTH and NVME_TARGET_AUTH config descriptions Currently two similar config options NVME_HOST_AUTH and NVME_TARGET_AUTH have almost same descriptions. It is confusing to choose them in menuconfig. Improve the descriptions to distinguish them. Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/Kconfig | 5 +++-- drivers/nvme/target/Kconfig | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 8fe2dd619e80..b309c8be720f 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -107,11 +107,12 @@ config NVME_TCP_TLS If unsure, say N. config NVME_HOST_AUTH - bool "NVM Express over Fabrics In-Band Authentication" + bool "NVMe over Fabrics In-Band Authentication in host side" depends on NVME_CORE select NVME_AUTH help - This provides support for NVMe over Fabrics In-Band Authentication. + This provides support for NVMe over Fabrics In-Band Authentication in + host side. If unsure, say N. diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index e1ebc73f3e5e..872dd1a0acd8 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -99,10 +99,11 @@ config NVME_TARGET_TCP_TLS If unsure, say N. config NVME_TARGET_AUTH - bool "NVMe over Fabrics In-band Authentication support" + bool "NVMe over Fabrics In-band Authentication in target side" depends on NVME_TARGET select NVME_AUTH help - This enables support for NVMe over Fabrics In-band Authentication + This enables support for NVMe over Fabrics In-band Authentication in + target side. If unsure, say N. -- cgit From 20dc66f2d76b4a410df14e4675e373b718babc34 Mon Sep 17 00:00:00 2001 From: Nitesh Shetty Date: Tue, 28 Nov 2023 17:59:57 +0530 Subject: nvme: prevent potential spectre v1 gadget This patch fixes the smatch warning, "nvmet_ns_ana_grpid_store() warn: potential spectre issue 'nvmet_ana_group_enabled' [w] (local cap)" Prevent the contents of kernel memory from being leaked to user space via speculative execution by using array_index_nospec. Signed-off-by: Nitesh Shetty Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/configfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index e307a044b1a1..d937fe05129e 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "nvmet.h" @@ -621,6 +622,7 @@ static ssize_t nvmet_ns_ana_grpid_store(struct config_item *item, down_write(&nvmet_ana_sem); oldgrpid = ns->anagrpid; + newgrpid = array_index_nospec(newgrpid, NVMET_MAX_ANAGRPS); nvmet_ana_group_enabled[newgrpid]++; ns->anagrpid = newgrpid; nvmet_ana_group_enabled[oldgrpid]--; @@ -1812,6 +1814,7 @@ static struct config_group *nvmet_ana_groups_make_group( grp->grpid = grpid; down_write(&nvmet_ana_sem); + grpid = array_index_nospec(grpid, NVMET_MAX_ANAGRPS); nvmet_ana_group_enabled[grpid]++; up_write(&nvmet_ana_sem); -- cgit From 839a40d1e730977d4448d141fa653517c2959a88 Mon Sep 17 00:00:00 2001 From: Bitao Hu Date: Thu, 30 Nov 2023 10:13:37 +0800 Subject: nvme: fix deadlock between reset and scan If controller reset occurs when allocating namespace, both nvme_reset_work and nvme_scan_work will hang, as shown below. Test Scripts: for ((t=1;t<=128;t++)) do nsid=`nvme create-ns /dev/nvme1 -s 14537724 -c 14537724 -f 0 -m 0 \ -d 0 | awk -F: '{print($NF);}'` nvme attach-ns /dev/nvme1 -n $nsid -c 0 done nvme reset /dev/nvme1 We will find that both nvme_reset_work and nvme_scan_work hung: INFO: task kworker/u249:4:17848 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/u249:4 state:D stack: 0 pid:17848 ppid: 2 flags:0x00000028 Workqueue: nvme-reset-wq nvme_reset_work [nvme] Call trace: __switch_to+0xb4/0xfc __schedule+0x22c/0x670 schedule+0x4c/0xd0 blk_mq_freeze_queue_wait+0x84/0xc0 nvme_wait_freeze+0x40/0x64 [nvme_core] nvme_reset_work+0x1c0/0x5cc [nvme] process_one_work+0x1d8/0x4b0 worker_thread+0x230/0x440 kthread+0x114/0x120 INFO: task kworker/u249:3:22404 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/u249:3 state:D stack: 0 pid:22404 ppid: 2 flags:0x00000028 Workqueue: nvme-wq nvme_scan_work [nvme_core] Call trace: __switch_to+0xb4/0xfc __schedule+0x22c/0x670 schedule+0x4c/0xd0 rwsem_down_write_slowpath+0x32c/0x98c down_write+0x70/0x80 nvme_alloc_ns+0x1ac/0x38c [nvme_core] nvme_validate_or_alloc_ns+0xbc/0x150 [nvme_core] nvme_scan_ns_list+0xe8/0x2e4 [nvme_core] nvme_scan_work+0x60/0x500 [nvme_core] process_one_work+0x1d8/0x4b0 worker_thread+0x260/0x440 kthread+0x114/0x120 INFO: task nvme:28428 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:nvme state:D stack: 0 pid:28428 ppid: 27119 flags:0x00000000 Call trace: __switch_to+0xb4/0xfc __schedule+0x22c/0x670 schedule+0x4c/0xd0 schedule_timeout+0x160/0x194 do_wait_for_common+0xac/0x1d0 __wait_for_common+0x78/0x100 wait_for_completion+0x24/0x30 __flush_work.isra.0+0x74/0x90 flush_work+0x14/0x20 nvme_reset_ctrl_sync+0x50/0x74 [nvme_core] nvme_dev_ioctl+0x1b0/0x250 [nvme_core] __arm64_sys_ioctl+0xa8/0xf0 el0_svc_common+0x88/0x234 do_el0_svc+0x7c/0x90 el0_svc+0x1c/0x30 el0_sync_handler+0xa8/0xb0 el0_sync+0x148/0x180 The reason for the hang is that nvme_reset_work occurs while nvme_scan_work is still running. nvme_scan_work may add new ns into ctrl->namespaces list after nvme_reset_work frozen all ns->q in ctrl->namespaces list. The newly added ns is not frozen, so nvme_wait_freeze will wait forever. Unfortunately, ctrl->namespaces_rwsem is held by nvme_reset_work, so nvme_scan_work will also wait forever. Now we are deadlocked! PROCESS1 PROCESS2 ============== ============== nvme_scan_work ... nvme_reset_work nvme_validate_or_alloc_ns nvme_dev_disable nvme_alloc_ns nvme_start_freeze down_write ... nvme_ns_add_to_ctrl_list ... up_write nvme_wait_freeze ... down_read nvme_alloc_ns blk_mq_freeze_queue_wait down_write Fix by marking the ctrl with say NVME_CTRL_FROZEN flag set in nvme_start_freeze and cleared in nvme_unfreeze. Then the scan can check it before adding the new namespace (under the namespaces_rwsem). Signed-off-by: Bitao Hu Reviewed-by: Guixin Liu Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 10 ++++++++++ drivers/nvme/host/nvme.h | 1 + 2 files changed, 11 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d699f0c8b13e..8ebdfd623e0f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3662,6 +3662,14 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) goto out_unlink_ns; down_write(&ctrl->namespaces_rwsem); + /* + * Ensure that no namespaces are added to the ctrl list after the queues + * are frozen, thereby avoiding a deadlock between scan and reset. + */ + if (test_bit(NVME_CTRL_FROZEN, &ctrl->flags)) { + up_write(&ctrl->namespaces_rwsem); + goto out_unlink_ns; + } nvme_ns_add_to_ctrl_list(ns); up_write(&ctrl->namespaces_rwsem); nvme_get_ctrl(ctrl); @@ -4583,6 +4591,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl) list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_unfreeze_queue(ns->queue); up_read(&ctrl->namespaces_rwsem); + clear_bit(NVME_CTRL_FROZEN, &ctrl->flags); } EXPORT_SYMBOL_GPL(nvme_unfreeze); @@ -4616,6 +4625,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; + set_bit(NVME_CTRL_FROZEN, &ctrl->flags); down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_freeze_queue_start(ns->queue); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 578e6d311bc9..efb20a8e23a3 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -251,6 +251,7 @@ enum nvme_ctrl_flags { NVME_CTRL_STOPPED = 3, NVME_CTRL_SKIP_ID_CNS_CS = 4, NVME_CTRL_DIRTY_CAPABILITY = 5, + NVME_CTRL_FROZEN = 6, }; struct nvme_ctrl { -- cgit From d8792a5734b0f3e58b898c2e2f910bfac48e9ee3 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Mon, 27 Nov 2023 14:04:59 -0800 Subject: riscv: Safely remove entries from relocation list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the safe versions of list and hlist iteration to safely remove entries from the module relocation lists. To allow mutliple threads to load modules concurrently, move relocation list pointers onto the stack rather than using global variables. Fixes: 8fd6c5142395 ("riscv: Add remaining module relocations") Reported-by: Ron Economos Closes: https://lore.kernel.org/linux-riscv/444de86a-7e7c-4de7-5d1d-c1c40eefa4ba@w6rz.net Signed-off-by: Charlie Jenkins Tested-by: Björn Töpel Link: https://lore.kernel.org/r/20231127-module_linking_freeing-v4-1-a2ca1d7027d0@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 110 +++++++++++++++++++++++++++++++++------------ 1 file changed, 82 insertions(+), 28 deletions(-) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 56a8c78e9e21..53593fe58cd8 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -40,15 +40,6 @@ struct relocation_handlers { long buffer); }; -unsigned int initialize_relocation_hashtable(unsigned int num_relocations); -void process_accumulated_relocations(struct module *me); -int add_relocation_to_accumulate(struct module *me, int type, void *location, - unsigned int hashtable_bits, Elf_Addr v); - -struct hlist_head *relocation_hashtable; - -struct list_head used_buckets_list; - /* * The auipc+jalr instruction pair can reach any PC-relative offset * in the range [-2^31 - 2^11, 2^31 - 2^11) @@ -604,7 +595,10 @@ static const struct relocation_handlers reloc_handlers[] = { /* 192-255 nonstandard ABI extensions */ }; -void process_accumulated_relocations(struct module *me) +static void +process_accumulated_relocations(struct module *me, + struct hlist_head **relocation_hashtable, + struct list_head *used_buckets_list) { /* * Only ADD/SUB/SET/ULEB128 should end up here. @@ -624,18 +618,25 @@ void process_accumulated_relocations(struct module *me) * - Each relocation entry for a location address */ struct used_bucket *bucket_iter; + struct used_bucket *bucket_iter_tmp; struct relocation_head *rel_head_iter; + struct hlist_node *rel_head_iter_tmp; struct relocation_entry *rel_entry_iter; + struct relocation_entry *rel_entry_iter_tmp; int curr_type; void *location; long buffer; - list_for_each_entry(bucket_iter, &used_buckets_list, head) { - hlist_for_each_entry(rel_head_iter, bucket_iter->bucket, node) { + list_for_each_entry_safe(bucket_iter, bucket_iter_tmp, + used_buckets_list, head) { + hlist_for_each_entry_safe(rel_head_iter, rel_head_iter_tmp, + bucket_iter->bucket, node) { buffer = 0; location = rel_head_iter->location; - list_for_each_entry(rel_entry_iter, - rel_head_iter->rel_entry, head) { + list_for_each_entry_safe(rel_entry_iter, + rel_entry_iter_tmp, + rel_head_iter->rel_entry, + head) { curr_type = rel_entry_iter->type; reloc_handlers[curr_type].reloc_handler( me, &buffer, rel_entry_iter->value); @@ -648,11 +649,14 @@ void process_accumulated_relocations(struct module *me) kfree(bucket_iter); } - kfree(relocation_hashtable); + kfree(*relocation_hashtable); } -int add_relocation_to_accumulate(struct module *me, int type, void *location, - unsigned int hashtable_bits, Elf_Addr v) +static int add_relocation_to_accumulate(struct module *me, int type, + void *location, + unsigned int hashtable_bits, Elf_Addr v, + struct hlist_head *relocation_hashtable, + struct list_head *used_buckets_list) { struct relocation_entry *entry; struct relocation_head *rel_head; @@ -661,6 +665,10 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, unsigned long hash; entry = kmalloc(sizeof(*entry), GFP_KERNEL); + + if (!entry) + return -ENOMEM; + INIT_LIST_HEAD(&entry->head); entry->type = type; entry->value = v; @@ -669,7 +677,10 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, current_head = &relocation_hashtable[hash]; - /* Find matching location (if any) */ + /* + * Search for the relocation_head for the relocations that happen at the + * provided location + */ bool found = false; struct relocation_head *rel_head_iter; @@ -681,19 +692,45 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, } } + /* + * If there has not yet been any relocations at the provided location, + * create a relocation_head for that location and populate it with this + * relocation_entry. + */ if (!found) { rel_head = kmalloc(sizeof(*rel_head), GFP_KERNEL); + + if (!rel_head) { + kfree(entry); + return -ENOMEM; + } + rel_head->rel_entry = kmalloc(sizeof(struct list_head), GFP_KERNEL); + + if (!rel_head->rel_entry) { + kfree(entry); + kfree(rel_head); + return -ENOMEM; + } + INIT_LIST_HEAD(rel_head->rel_entry); rel_head->location = location; INIT_HLIST_NODE(&rel_head->node); if (!current_head->first) { bucket = kmalloc(sizeof(struct used_bucket), GFP_KERNEL); + + if (!bucket) { + kfree(entry); + kfree(rel_head); + kfree(rel_head->rel_entry); + return -ENOMEM; + } + INIT_LIST_HEAD(&bucket->head); bucket->bucket = current_head; - list_add(&bucket->head, &used_buckets_list); + list_add(&bucket->head, used_buckets_list); } hlist_add_head(&rel_head->node, current_head); } @@ -704,7 +741,9 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, return 0; } -unsigned int initialize_relocation_hashtable(unsigned int num_relocations) +static unsigned int +initialize_relocation_hashtable(unsigned int num_relocations, + struct hlist_head **relocation_hashtable) { /* Can safely assume that bits is not greater than sizeof(long) */ unsigned long hashtable_size = roundup_pow_of_two(num_relocations); @@ -720,12 +759,13 @@ unsigned int initialize_relocation_hashtable(unsigned int num_relocations) hashtable_size <<= should_double_size; - relocation_hashtable = kmalloc_array(hashtable_size, - sizeof(*relocation_hashtable), - GFP_KERNEL); - __hash_init(relocation_hashtable, hashtable_size); + *relocation_hashtable = kmalloc_array(hashtable_size, + sizeof(*relocation_hashtable), + GFP_KERNEL); + if (!*relocation_hashtable) + return -ENOMEM; - INIT_LIST_HEAD(&used_buckets_list); + __hash_init(*relocation_hashtable, hashtable_size); return hashtable_bits; } @@ -742,7 +782,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, Elf_Addr v; int res; unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel); - unsigned int hashtable_bits = initialize_relocation_hashtable(num_relocations); + struct hlist_head *relocation_hashtable; + struct list_head used_buckets_list; + unsigned int hashtable_bits; + + hashtable_bits = initialize_relocation_hashtable(num_relocations, + &relocation_hashtable); + + if (hashtable_bits < 0) + return hashtable_bits; + + INIT_LIST_HEAD(&used_buckets_list); pr_debug("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); @@ -823,14 +873,18 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, } if (reloc_handlers[type].accumulate_handler) - res = add_relocation_to_accumulate(me, type, location, hashtable_bits, v); + res = add_relocation_to_accumulate(me, type, location, + hashtable_bits, v, + relocation_hashtable, + &used_buckets_list); else res = handler(me, location, v); if (res) return res; } - process_accumulated_relocations(me); + process_accumulated_relocations(me, &relocation_hashtable, + &used_buckets_list); return 0; } -- cgit From 4a92a87950c4f86bc372ee3b1da4ba9d092252a9 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Mon, 27 Nov 2023 14:05:00 -0800 Subject: riscv: Correct type casting in module loading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use __le16 with le16_to_cpu. Fixes: 8fd6c5142395 ("riscv: Add remaining module relocations") Signed-off-by: Charlie Jenkins Reviewed-by: Samuel Holland Tested-by: Samuel Holland Tested-by: Björn Töpel Link: https://lore.kernel.org/r/20231127-module_linking_freeing-v4-2-a2ca1d7027d0@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 53593fe58cd8..aac019ed63b1 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -55,7 +55,7 @@ static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) static int riscv_insn_rmw(void *location, u32 keep, u32 set) { - u16 *parcel = location; + __le16 *parcel = location; u32 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16; insn &= keep; @@ -68,7 +68,7 @@ static int riscv_insn_rmw(void *location, u32 keep, u32 set) static int riscv_insn_rvc_rmw(void *location, u16 keep, u16 set) { - u16 *parcel = location; + __le16 *parcel = location; u16 insn = le16_to_cpu(*parcel); insn &= keep; -- cgit From 815f986f33eeb06652d59d8a4d405d4fdb4e59a8 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 1 Dec 2023 14:48:59 +0100 Subject: arm64: dts: rockchip: drop interrupt-names property from rk3588s dfi The dfi binding does not specify interrupt names, with the interrupts just specifying channels 0-x. So drop the unspecified property. Fixes: 5a6976b1040a ("arm64: dts: rockchip: Add DFI to rk3588s") Reported-by: Jagan Teki Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20231201134859.322491-1-heiko@sntech.de --- arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi index 7064c0e9179f..8aa0499f9b03 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -1362,7 +1362,6 @@ , , ; - interrupt-names = "ch0", "ch1", "ch2", "ch3"; rockchip,pmu = <&pmu1grf>; }; -- cgit From 7b2404a886f8b91250c31855d287e632123e1746 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2023 00:22:00 +0000 Subject: cifs: Fix flushing, invalidation and file size with copy_file_range() Fix a number of issues in the cifs filesystem implementation of the copy_file_range() syscall in cifs_file_copychunk_range(). Firstly, the invalidation of the destination range is handled incorrectly: We shouldn't just invalidate the whole file as dirty data in the file may get lost and we can't just call truncate_inode_pages_range() to invalidate the destination range as that will erase parts of a partial folio at each end whilst invalidating and discarding all the folios in the middle. We need to force all the folios covering the range to be reloaded, but we mustn't lose dirty data in them that's not in the destination range. Further, we shouldn't simply round out the range to PAGE_SIZE at each end as cifs should move to support multipage folios. Secondly, there's an issue whereby a write may have extended the file locally, but not have been written back yet. This can leaves the local idea of the EOF at a later point than the server's EOF. If a copy request is issued, this will fail on the server with STATUS_INVALID_VIEW_SIZE (which gets translated to -EIO locally) if the copy source extends past the server's EOF. Fix this by: (0) Flush the source region (already done). The flush does nothing and the EOF isn't moved if the source region has no dirty data. (1) Move the EOF to the end of the source region if it isn't already at least at this point. If we can't do this, for instance if the server doesn't support it, just flush the entire source file. (2) Find the folio (if present) at each end of the range, flushing it and increasing the region-to-be-invalidated to cover those in their entirety. (3) Fully discard all the folios covering the range as we want them to be reloaded. (4) Then perform the copy. Thirdly, set i_size after doing the copychunk_range operation as this value may be used by various things internally. stat() hides the issue because setting ->time to 0 causes cifs_getatr() to revalidate the attributes. These were causing the generic/075 xfstest to fail. Fixes: 620d8745b35d ("Introduce cifs_copy_file_range()") Cc: stable@vger.kernel.org Signed-off-by: David Howells cc: Paulo Alcantara cc: Shyam Prasad N cc: Rohith Surabattula cc: Matthew Wilcox cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: David Howells Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 99 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index ea3a7a668b45..8097a9b3e98c 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1196,6 +1196,72 @@ const struct inode_operations cifs_symlink_inode_ops = { .listxattr = cifs_listxattr, }; +/* + * Advance the EOF marker to after the source range. + */ +static int cifs_precopy_set_eof(struct inode *src_inode, struct cifsInodeInfo *src_cifsi, + struct cifs_tcon *src_tcon, + unsigned int xid, loff_t src_end) +{ + struct cifsFileInfo *writeable_srcfile; + int rc = -EINVAL; + + writeable_srcfile = find_writable_file(src_cifsi, FIND_WR_FSUID_ONLY); + if (writeable_srcfile) { + if (src_tcon->ses->server->ops->set_file_size) + rc = src_tcon->ses->server->ops->set_file_size( + xid, src_tcon, writeable_srcfile, + src_inode->i_size, true /* no need to set sparse */); + else + rc = -ENOSYS; + cifsFileInfo_put(writeable_srcfile); + cifs_dbg(FYI, "SetFSize for copychunk rc = %d\n", rc); + } + + if (rc < 0) + goto set_failed; + + netfs_resize_file(&src_cifsi->netfs, src_end); + fscache_resize_cookie(cifs_inode_cookie(src_inode), src_end); + return 0; + +set_failed: + return filemap_write_and_wait(src_inode->i_mapping); +} + +/* + * Flush out either the folio that overlaps the beginning of a range in which + * pos resides or the folio that overlaps the end of a range unless that folio + * is entirely within the range we're going to invalidate. We extend the flush + * bounds to encompass the folio. + */ +static int cifs_flush_folio(struct inode *inode, loff_t pos, loff_t *_fstart, loff_t *_fend, + bool first) +{ + struct folio *folio; + unsigned long long fpos, fend; + pgoff_t index = pos / PAGE_SIZE; + size_t size; + int rc = 0; + + folio = filemap_get_folio(inode->i_mapping, index); + if (IS_ERR(folio)) + return 0; + + size = folio_size(folio); + fpos = folio_pos(folio); + fend = fpos + size - 1; + *_fstart = min_t(unsigned long long, *_fstart, fpos); + *_fend = max_t(unsigned long long, *_fend, fend); + if ((first && pos == fpos) || (!first && pos == fend)) + goto out; + + rc = filemap_write_and_wait_range(inode->i_mapping, fpos, fend); +out: + folio_put(folio); + return rc; +} + static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, struct file *dst_file, loff_t destoff, loff_t len, unsigned int remap_flags) @@ -1263,10 +1329,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, { struct inode *src_inode = file_inode(src_file); struct inode *target_inode = file_inode(dst_file); + struct cifsInodeInfo *src_cifsi = CIFS_I(src_inode); struct cifsFileInfo *smb_file_src; struct cifsFileInfo *smb_file_target; struct cifs_tcon *src_tcon; struct cifs_tcon *target_tcon; + unsigned long long destend, fstart, fend; ssize_t rc; cifs_dbg(FYI, "copychunk range\n"); @@ -1306,13 +1374,41 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, if (rc) goto unlock; - /* should we flush first and last page first */ - truncate_inode_pages(&target_inode->i_data, 0); + /* The server-side copy will fail if the source crosses the EOF marker. + * Advance the EOF marker after the flush above to the end of the range + * if it's short of that. + */ + if (src_cifsi->server_eof < off + len) { + rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len); + if (rc < 0) + goto unlock; + } + + destend = destoff + len - 1; + + /* Flush the folios at either end of the destination range to prevent + * accidental loss of dirty data outside of the range. + */ + fstart = destoff; + fend = destend; + + rc = cifs_flush_folio(target_inode, destoff, &fstart, &fend, true); + if (rc) + goto unlock; + rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false); + if (rc) + goto unlock; + + /* Discard all the folios that overlap the destination region. */ + truncate_inode_pages_range(&target_inode->i_data, fstart, fend); rc = file_modified(dst_file); - if (!rc) + if (!rc) { rc = target_tcon->ses->server->ops->copychunk_range(xid, smb_file_src, smb_file_target, off, len, destoff); + if (rc > 0 && destoff + rc > i_size_read(target_inode)) + truncate_setsize(target_inode, destoff + rc); + } file_accessed(src_file); -- cgit From c54fc3a4f375663f2361a9cbb2955fb4ef912879 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2023 00:22:01 +0000 Subject: cifs: Fix flushing, invalidation and file size with FICLONE Fix a number of issues in the cifs filesystem implementation of the FICLONE ioctl in cifs_remap_file_range(). This is analogous to the previously fixed bug in cifs_file_copychunk_range() and can share the helper functions. Firstly, the invalidation of the destination range is handled incorrectly: We shouldn't just invalidate the whole file as dirty data in the file may get lost and we can't just call truncate_inode_pages_range() to invalidate the destination range as that will erase parts of a partial folio at each end whilst invalidating and discarding all the folios in the middle. We need to force all the folios covering the range to be reloaded, but we mustn't lose dirty data in them that's not in the destination range. Further, we shouldn't simply round out the range to PAGE_SIZE at each end as cifs should move to support multipage folios. Secondly, there's an issue whereby a write may have extended the file locally, but not have been written back yet. This can leaves the local idea of the EOF at a later point than the server's EOF. If a clone request is issued, this will fail on the server with STATUS_INVALID_VIEW_SIZE (which gets translated to -EIO locally) if the clone source extends past the server's EOF. Fix this by: (0) Flush the source region (already done). The flush does nothing and the EOF isn't moved if the source region has no dirty data. (1) Move the EOF to the end of the source region if it isn't already at least at this point. If we can't do this, for instance if the server doesn't support it, just flush the entire source file. (2) Find the folio (if present) at each end of the range, flushing it and increasing the region-to-be-invalidated to cover those in their entirety. (3) Fully discard all the folios covering the range as we want them to be reloaded. (4) Then perform the extent duplication. Thirdly, set i_size after doing the duplicate_extents operation as this value may be used by various things internally. stat() hides the issue because setting ->time to 0 causes cifs_getatr() to revalidate the attributes. These were causing the cifs/001 xfstest to fail. Fixes: 04b38d601239 ("vfs: pull btrfs clone API to vfs layer") Signed-off-by: David Howells Cc: stable@vger.kernel.org cc: Christoph Hellwig cc: Paulo Alcantara cc: Shyam Prasad N cc: Rohith Surabattula cc: Matthew Wilcox cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: David Howells Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 68 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 8097a9b3e98c..c5fc0a35bb19 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1268,9 +1268,12 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, { struct inode *src_inode = file_inode(src_file); struct inode *target_inode = file_inode(dst_file); + struct cifsInodeInfo *src_cifsi = CIFS_I(src_inode); + struct cifsInodeInfo *target_cifsi = CIFS_I(target_inode); struct cifsFileInfo *smb_file_src = src_file->private_data; - struct cifsFileInfo *smb_file_target; - struct cifs_tcon *target_tcon; + struct cifsFileInfo *smb_file_target = dst_file->private_data; + struct cifs_tcon *target_tcon, *src_tcon; + unsigned long long destend, fstart, fend, new_size; unsigned int xid; int rc; @@ -1281,13 +1284,13 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, xid = get_xid(); - if (!src_file->private_data || !dst_file->private_data) { + if (!smb_file_src || !smb_file_target) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); goto out; } - smb_file_target = dst_file->private_data; + src_tcon = tlink_tcon(smb_file_src->tlink); target_tcon = tlink_tcon(smb_file_target->tlink); /* @@ -1300,20 +1303,63 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, if (len == 0) len = src_inode->i_size - off; - cifs_dbg(FYI, "about to flush pages\n"); - /* should we flush first and last page first */ - truncate_inode_pages_range(&target_inode->i_data, destoff, - PAGE_ALIGN(destoff + len)-1); + cifs_dbg(FYI, "clone range\n"); + + /* Flush the source buffer */ + rc = filemap_write_and_wait_range(src_inode->i_mapping, off, + off + len - 1); + if (rc) + goto unlock; + + /* The server-side copy will fail if the source crosses the EOF marker. + * Advance the EOF marker after the flush above to the end of the range + * if it's short of that. + */ + if (src_cifsi->netfs.remote_i_size < off + len) { + rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len); + if (rc < 0) + goto unlock; + } + + new_size = destoff + len; + destend = destoff + len - 1; - if (target_tcon->ses->server->ops->duplicate_extents) + /* Flush the folios at either end of the destination range to prevent + * accidental loss of dirty data outside of the range. + */ + fstart = destoff; + fend = destend; + + rc = cifs_flush_folio(target_inode, destoff, &fstart, &fend, true); + if (rc) + goto unlock; + rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false); + if (rc) + goto unlock; + + /* Discard all the folios that overlap the destination region. */ + cifs_dbg(FYI, "about to discard pages %llx-%llx\n", fstart, fend); + truncate_inode_pages_range(&target_inode->i_data, fstart, fend); + + fscache_invalidate(cifs_inode_cookie(target_inode), NULL, + i_size_read(target_inode), 0); + + rc = -EOPNOTSUPP; + if (target_tcon->ses->server->ops->duplicate_extents) { rc = target_tcon->ses->server->ops->duplicate_extents(xid, smb_file_src, smb_file_target, off, len, destoff); - else - rc = -EOPNOTSUPP; + if (rc == 0 && new_size > i_size_read(target_inode)) { + truncate_setsize(target_inode, new_size); + netfs_resize_file(&target_cifsi->netfs, new_size); + fscache_resize_cookie(cifs_inode_cookie(target_inode), + new_size); + } + } /* force revalidate of size and timestamps of target file now that target is updated on the server */ CIFS_I(target_inode)->time = 0; +unlock: /* although unlocking in the reverse order from locking is not strictly necessary here it is a little cleaner to be consistent */ unlock_two_nondirectories(src_inode, target_inode); -- cgit From c13c823a78b77ea0e5f1f73112d910e259911101 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 30 Nov 2023 13:18:29 -0600 Subject: arm64: dts: rockchip: Fix PCI node addresses on rk3399-gru The rk3399-gru PCI node addresses are wrong. In rk3399-gru-scarlet, the bus number in the address should be 0. This is because bus number assignment is dynamic and not known up front. For FDT, the bus number is simply ignored. In rk3399-gru-chromebook, the addresses are simply invalid. The first "reg" entry must be the configuration space for the device. The entry should be all 0s except for device/slot and function numbers. The existing 64-bit memory space (0x83000000) entries are not valid because they must have the BAR address in the lower byte of the first cell. Warnings for these are enabled by adding the missing 'device_type = "pci"' for the root port node. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20231130191830.2424361-1-robh@kernel.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi | 3 +-- arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts | 4 ++-- arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi | 1 + 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi index 5c1929d41cc0..cacbad35cfc8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi @@ -509,8 +509,7 @@ ap_i2c_tp: &i2c5 { &pci_rootport { mvl_wifi: wifi@0,0 { compatible = "pci1b4b,2b42"; - reg = <0x83010000 0x0 0x00000000 0x0 0x00100000 - 0x83010000 0x0 0x00100000 0x0 0x00100000>; + reg = <0x0000 0x0 0x0 0x0 0x0>; interrupt-parent = <&gpio0>; interrupts = <8 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts index 853e88455e75..9e4b12ed62cb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts @@ -34,8 +34,8 @@ &pci_rootport { wifi@0,0 { compatible = "qcom,ath10k"; - reg = <0x00010000 0x0 0x00000000 0x0 0x00000000>, - <0x03010010 0x0 0x00000000 0x0 0x00200000>; + reg = <0x00000000 0x0 0x00000000 0x0 0x00000000>, + <0x03000010 0x0 0x00000000 0x0 0x00200000>; qcom,ath10k-calibration-variant = "GO_DUMO"; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index c9bf1d5c3a42..789fd0dcc88b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -489,6 +489,7 @@ ap_i2c_audio: &i2c8 { #address-cells = <3>; #size-cells = <2>; ranges; + device_type = "pci"; }; }; -- cgit From ef6fae4a13aecfa7966edff0445e5c920ad2ddd9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 1 Dec 2023 22:31:23 -0500 Subject: bcachefs; Don't use btree write buffer until journal replay is finished The keys being replayed by journal replay have to be synchronized with updates by other threads that overwrite them. We rely on btree node locks for synchronizing - but since btree write buffer updates take no btree locks, that won't work. Instead, simply disable using the btree write buffer until journal replay is finished. This fixes a rare backpointers error in the merge_torture_flakey test. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 324767c0ddcc..25fdca00bf7b 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -554,6 +554,19 @@ int __must_check bch2_trans_update_seq(struct btree_trans *trans, u64 seq, BTREE_UPDATE_PREJOURNAL); } +static noinline int bch2_btree_insert_clone_trans(struct btree_trans *trans, + enum btree_id btree, + struct bkey_i *k) +{ + struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(&k->k)); + int ret = PTR_ERR_OR_ZERO(n); + if (ret) + return ret; + + bkey_copy(n, k); + return bch2_btree_insert_trans(trans, btree, n, 0); +} + int __must_check bch2_trans_update_buffered(struct btree_trans *trans, enum btree_id btree, struct bkey_i *k) @@ -564,6 +577,9 @@ int __must_check bch2_trans_update_buffered(struct btree_trans *trans, EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size); EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); + if (unlikely(trans->journal_replay_not_finished)) + return bch2_btree_insert_clone_trans(trans, btree, k); + trans_for_each_wb_update(trans, i) { if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) { bkey_copy(&i->k, k); -- cgit From 87b0d8d3d05028c59b64c0287efeca90c28e1152 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 1 Dec 2023 01:14:50 -0500 Subject: bcachefs: Fix a journal deadlock in replay Recently, journal pre-reservations were removed. They were for reserving space ahead of time in the journal for operations that are required for journal reclaim, e.g. btree key cache flushing and interior node btree updates. Instead we have watermarks - only operations for journal reclaim are allowed when the journal is low on space, and in general we're quite good about doing operations in the order that will free up space in the journal quickest when we're low on space. If we're doing a journal reclaim operation out of order, we usually do it in nonblocking mode if it's not freeing up space at the end of the journal. There's an exceptino though - interior btree node update operations have to be BCH_WATERMARK_reclaim - once they've been started, and they can't be nonblocking. Generally this is fine because they'll only be a very small fraction of transaction commits - but there's an exception, which is during journal replay. Journal replay does many btree operations, but doesn't need to commit them to the journal since they're already in the journal. So killing off of pre-reservation, plus another change to make journal replay more efficient by initially doing the replay in sorted btree order, made it possible for the interior update operations replay generates to fill and deadlock the journal. Fix this by introducing a new check on journal space at the _start_ of an interior update operation. This causes us to block if necessary in exactly the same way as we used to when interior updates took a journal pre-reservaiton, but without all the expensive accounting pre-reservations required. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 6697417273aa..26be38ab6ecb 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1056,6 +1056,17 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, flags &= ~BCH_WATERMARK_MASK; flags |= watermark; + if (!(flags & BTREE_INSERT_JOURNAL_RECLAIM) && + watermark < c->journal.watermark) { + struct journal_res res = { 0 }; + + ret = drop_locks_do(trans, + bch2_journal_res_get(&c->journal, &res, 1, + watermark|JOURNAL_RES_GET_CHECK)); + if (ret) + return ERR_PTR(ret); + } + while (1) { nr_nodes[!!update_level] += 1 + split; update_level++; -- cgit From 131898b0cb4ac6598d3537eeeee2711dec129f51 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 2 Dec 2023 02:43:58 -0500 Subject: bcachefs: Fix bch2_extent_drop_ptrs() call Also, make bch2_extent_drop_ptrs() safer, so it works with extents and non-extents iterators. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 71aa5e59787b..2418c528c533 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -471,7 +471,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, * we aren't using the extent overwrite path to delete, we're * just using the normal key deletion path: */ - if (bkey_deleted(&n->k)) + if (bkey_deleted(&n->k) && !(iter->flags & BTREE_ITER_IS_EXTENTS)) n->k.size = 0; return bch2_trans_relock(trans) ?: @@ -591,7 +591,7 @@ int bch2_data_update_init(struct btree_trans *trans, m->data_opts.rewrite_ptrs = 0; /* if iter == NULL, it's just a promote */ if (iter) - ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts); + ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts); goto done; } -- cgit From adcf4ee64291b701d083bacf653eb10a4c46acd7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 4 Dec 2023 00:38:56 -0500 Subject: bcachefs: Convert compression_stats to for_each_btree_key2 for_each_btree_key2() runs each loop iteration in a btree transaction, and thus does not cause SRCU lock hold time problems. Signed-off-by: Kent Overstreet --- fs/bcachefs/sysfs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index ab743115f169..f3cb7115b530 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -276,8 +276,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c if (!btree_type_has_ptrs(id)) continue; - for_each_btree_key(trans, iter, id, POS_MIN, - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + ret = for_each_btree_key2(trans, iter, id, POS_MIN, + BTREE_ITER_ALL_SNAPSHOTS, k, ({ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -309,8 +309,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c nr_uncompressed_extents++; else if (compressed) nr_compressed_extents++; - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); } bch2_trans_put(trans); -- cgit From f88d811a238b12a261a04f125db952cf05c06d0b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 3 Dec 2023 13:05:21 -0500 Subject: bcachefs: Don't run indirect extent trigger unless inserting/deleting This fixes a transaction path overflow reported in the snapshot deletion path, when moving extents to the correct snapshot. The root of the issue is that creating/deleting a reflink pointer can generate an unbounded number of updates, if it is allowed to reference an unbounded number of indirect extents; to prevent this, merging of reflink pointers has been disabled. But there's a hole, which is that copygc/rebalance may fragment existing extents in the course of moving them around, and if an indirect extent becomes too fragmented we'll then become unable to delete the reflink pointer. The eventual solution is going to be to tweak trigger handling so that we can process large reflink pointers incrementally when necessary, and notice that trigger updates don't need to be run for the part of the reflink pointer not changing. That is going to be a bigger project though, for another patch. Signed-off-by: Kent Overstreet --- fs/bcachefs/reflink.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 6e1bfe9feb59..37d16e04e671 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -121,6 +121,14 @@ int bch2_trans_mark_reflink_v(struct btree_trans *trans, { check_indirect_extent_deleting(new, &flags); + if (old.k->type == KEY_TYPE_reflink_v && + new->k.type == KEY_TYPE_reflink_v && + old.k->u64s == new->k.u64s && + !memcmp(bkey_s_c_to_reflink_v(old).v->start, + bkey_i_to_reflink_v(new)->v.start, + bkey_val_bytes(&new->k) - 8)) + return 0; + return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags); } -- cgit From db3fadacaf0c817b222090290d06ca2a338422d0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 1 Dec 2023 14:10:21 +0100 Subject: packet: Move reference count in packet_sock to atomic_long_t In some potential instances the reference count on struct packet_sock could be saturated and cause overflows which gets the kernel a bit confused. To prevent this, move to a 64-bit atomic reference count on 64-bit architectures to prevent the possibility of this type to overflow. Because we can not handle saturation, using refcount_t is not possible in this place. Maybe someday in the future if it changes it could be used. Also, instead of using plain atomic64_t, use atomic_long_t instead. 32-bit machines tend to be memory-limited (i.e. anything that increases a reference uses so much memory that you can't actually get to 2**32 references). 32-bit architectures also tend to have serious problems with 64-bit atomics. Hence, atomic_long_t is the more natural solution. Reported-by: "The UK's National Cyber Security Centre (NCSC)" Co-developed-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Daniel Borkmann Cc: Linus Torvalds Cc: stable@kernel.org Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231201131021.19999-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 16 ++++++++-------- net/packet/internal.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a84e00b5904b..7adf48549a3b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4300,7 +4300,7 @@ static void packet_mm_open(struct vm_area_struct *vma) struct sock *sk = sock->sk; if (sk) - atomic_inc(&pkt_sk(sk)->mapped); + atomic_long_inc(&pkt_sk(sk)->mapped); } static void packet_mm_close(struct vm_area_struct *vma) @@ -4310,7 +4310,7 @@ static void packet_mm_close(struct vm_area_struct *vma) struct sock *sk = sock->sk; if (sk) - atomic_dec(&pkt_sk(sk)->mapped); + atomic_long_dec(&pkt_sk(sk)->mapped); } static const struct vm_operations_struct packet_mmap_ops = { @@ -4405,7 +4405,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = -EBUSY; if (!closing) { - if (atomic_read(&po->mapped)) + if (atomic_long_read(&po->mapped)) goto out; if (packet_read_pending(rb)) goto out; @@ -4508,7 +4508,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = -EBUSY; mutex_lock(&po->pg_vec_lock); - if (closing || atomic_read(&po->mapped) == 0) { + if (closing || atomic_long_read(&po->mapped) == 0) { err = 0; spin_lock_bh(&rb_queue->lock); swap(rb->pg_vec, pg_vec); @@ -4526,9 +4526,9 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, po->prot_hook.func = (po->rx_ring.pg_vec) ? tpacket_rcv : packet_rcv; skb_queue_purge(rb_queue); - if (atomic_read(&po->mapped)) - pr_err("packet_mmap: vma is busy: %d\n", - atomic_read(&po->mapped)); + if (atomic_long_read(&po->mapped)) + pr_err("packet_mmap: vma is busy: %ld\n", + atomic_long_read(&po->mapped)); } mutex_unlock(&po->pg_vec_lock); @@ -4606,7 +4606,7 @@ static int packet_mmap(struct file *file, struct socket *sock, } } - atomic_inc(&po->mapped); + atomic_long_inc(&po->mapped); vma->vm_ops = &packet_mmap_ops; err = 0; diff --git a/net/packet/internal.h b/net/packet/internal.h index d29c94c45159..d5d70712007a 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -122,7 +122,7 @@ struct packet_sock { __be16 num; struct packet_rollover *rollover; struct packet_mclist *mclist; - atomic_t mapped; + atomic_long_t mapped; enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; -- cgit From 4fbc3a52cd4d14de3793f4b2c721d7306ea84cf9 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 29 Nov 2023 14:21:41 -0600 Subject: RDMA/core: Fix umem iterator when PAGE_SIZE is greater then HCA pgsz 64k pages introduce the situation in this diagram when the HCA 4k page size is being used: +-------------------------------------------+ <--- 64k aligned VA | | | HCA 4k page | | | +-------------------------------------------+ | o | | | | o | | | | o | +-------------------------------------------+ | | | HCA 4k page | | | +-------------------------------------------+ <--- Live HCA page |OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO| <--- offset | | <--- VA | MR data | +-------------------------------------------+ | | | HCA 4k page | | | +-------------------------------------------+ | o | | | | o | | | | o | +-------------------------------------------+ | | | HCA 4k page | | | +-------------------------------------------+ The VA addresses are coming from rdma-core in this diagram can be arbitrary, but for 64k pages, the VA may be offset by some number of HCA 4k pages and followed by some number of HCA 4k pages. The current iterator doesn't account for either the preceding 4k pages or the following 4k pages. Fix the issue by extending the ib_block_iter to contain the number of DMA pages like comment [1] says and by using __sg_advance to start the iterator at the first live HCA page. The changes are contained in a parallel set of iterator start and next functions that are umem aware and specific to umem since there is one user of the rdma_for_each_block() without umem. These two fixes prevents the extra pages before and after the user MR data. Fix the preceding pages by using the __sq_advance field to start at the first 4k page containing MR data. Fix the following pages by saving the number of pgsz blocks in the iterator state and downcounting on each next. This fix allows for the elimination of the small page crutch noted in the Fixes. Fixes: 10c75ccb54e4 ("RDMA/umem: Prevent small pages from being returned by ib_umem_find_best_pgsz()") Link: https://lore.kernel.org/r/20231129202143.1434-2-shiraz.saleem@intel.com Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 6 ------ include/rdma/ib_umem.h | 9 ++++++++- include/rdma/ib_verbs.h | 1 + 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index f9ab671c8eda..07c571c7b699 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -96,12 +96,6 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, return page_size; } - /* rdma_for_each_block() has a bug if the page size is smaller than the - * page size used to build the umem. For now prevent smaller page sizes - * from being returned. - */ - pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT); - /* The best result is the smallest page size that results in the minimum * number of required pages. Compute the largest page size that could * work based on VA address bits that don't change. diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 95896472a82b..565a85044541 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -77,6 +77,13 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, { __rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl, umem->sgt_append.sgt.nents, pgsz); + biter->__sg_advance = ib_umem_offset(umem) & ~(pgsz - 1); + biter->__sg_numblocks = ib_umem_num_dma_blocks(umem, pgsz); +} + +static inline bool __rdma_umem_block_iter_next(struct ib_block_iter *biter) +{ + return __rdma_block_iter_next(biter) && biter->__sg_numblocks--; } /** @@ -92,7 +99,7 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, */ #define rdma_umem_for_each_dma_block(umem, biter, pgsz) \ for (__rdma_umem_block_iter_start(biter, umem, pgsz); \ - __rdma_block_iter_next(biter);) + __rdma_umem_block_iter_next(biter);) #ifdef CONFIG_INFINIBAND_USER_MEM diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index fb1a2d6b1969..b7b6b58dd348 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2850,6 +2850,7 @@ struct ib_block_iter { /* internal states */ struct scatterlist *__sg; /* sg holding the current aligned block */ dma_addr_t __dma_addr; /* unaligned DMA address of this block */ + size_t __sg_numblocks; /* ib_umem_num_dma_blocks() */ unsigned int __sg_nents; /* number of SG entries */ unsigned int __sg_advance; /* number of bytes to advance in sg in next step */ unsigned int __pg_bit; /* alignment of current block */ -- cgit From 0a5ec366de7e94192669ba08de6ed336607fd282 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 29 Nov 2023 14:21:42 -0600 Subject: RDMA/irdma: Ensure iWarp QP queue memory is OS paged aligned The SQ is shared for between kernel and used by storing the kernel page pointer and passing that to a kmap_atomic(). This then requires that the alignment is PAGE_SIZE aligned. Fix by adding an iWarp specific alignment check. Fixes: e965ef0e7b2c ("RDMA/irdma: Split QP handler into irdma_reg_user_mr_type_qp") Link: https://lore.kernel.org/r/20231129202143.1434-3-shiraz.saleem@intel.com Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 5fa88e6cca4e..fb9088392b19 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2946,6 +2946,11 @@ static int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req, int err; u8 lvl; + /* iWarp: Catch page not starting on OS page boundary */ + if (!rdma_protocol_roce(&iwdev->ibdev, 1) && + ib_umem_offset(iwmr->region)) + return -EINVAL; + total = req.sq_pages + req.rq_pages + 1; if (total > iwmr->page_cnt) return -EINVAL; -- cgit From 03769f72d66edab82484449ed594cb6b00ae0223 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 29 Nov 2023 14:21:43 -0600 Subject: RDMA/irdma: Fix support for 64k pages Virtual QP and CQ require a 4K HW page size but the driver passes PAGE_SIZE to ib_umem_find_best_pgsz() instead. Fix this by using the appropriate 4k value in the bitmap passed to ib_umem_find_best_pgsz(). Fixes: 693a5386eff0 ("RDMA/irdma: Split mr alloc and free into new functions") Link: https://lore.kernel.org/r/20231129202143.1434-4-shiraz.saleem@intel.com Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index fb9088392b19..b5eb8d421988 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2914,7 +2914,7 @@ static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region, iwmr->type = reg_type; pgsz_bitmap = (reg_type == IRDMA_MEMREG_TYPE_MEM) ? - iwdev->rf->sc_dev.hw_attrs.page_size_cap : PAGE_SIZE; + iwdev->rf->sc_dev.hw_attrs.page_size_cap : SZ_4K; iwmr->page_size = ib_umem_find_best_pgsz(region, pgsz_bitmap, virt); if (unlikely(!iwmr->page_size)) { -- cgit From e3e82fcb79eeb3f1a88a89f676831773caff514a Mon Sep 17 00:00:00 2001 From: Shifeng Li Date: Thu, 30 Nov 2023 00:14:15 -0800 Subject: RDMA/irdma: Avoid free the non-cqp_request scratch When creating ceq_0 during probing irdma, cqp.sc_cqp will be sent as a cqp_request to cqp->sc_cqp.sq_ring. If the request is pending when removing the irdma driver or unplugging its aux device, cqp.sc_cqp will be dereferenced as wrong struct in irdma_free_pending_cqp_request(). PID: 3669 TASK: ffff88aef892c000 CPU: 28 COMMAND: "kworker/28:0" #0 [fffffe0000549e38] crash_nmi_callback at ffffffff810e3a34 #1 [fffffe0000549e40] nmi_handle at ffffffff810788b2 #2 [fffffe0000549ea0] default_do_nmi at ffffffff8107938f #3 [fffffe0000549eb8] do_nmi at ffffffff81079582 #4 [fffffe0000549ef0] end_repeat_nmi at ffffffff82e016b4 [exception RIP: native_queued_spin_lock_slowpath+1291] RIP: ffffffff8127e72b RSP: ffff88aa841ef778 RFLAGS: 00000046 RAX: 0000000000000000 RBX: ffff88b01f849700 RCX: ffffffff8127e47e RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff83857ec0 RBP: ffff88afe3e4efc8 R8: ffffed15fc7c9dfa R9: ffffed15fc7c9dfa R10: 0000000000000001 R11: ffffed15fc7c9df9 R12: 0000000000740000 R13: ffff88b01f849708 R14: 0000000000000003 R15: ffffed1603f092e1 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0000 -- -- #5 [ffff88aa841ef778] native_queued_spin_lock_slowpath at ffffffff8127e72b #6 [ffff88aa841ef7b0] _raw_spin_lock_irqsave at ffffffff82c22aa4 #7 [ffff88aa841ef7c8] __wake_up_common_lock at ffffffff81257363 #8 [ffff88aa841ef888] irdma_free_pending_cqp_request at ffffffffa0ba12cc [irdma] #9 [ffff88aa841ef958] irdma_cleanup_pending_cqp_op at ffffffffa0ba1469 [irdma] #10 [ffff88aa841ef9c0] irdma_ctrl_deinit_hw at ffffffffa0b2989f [irdma] #11 [ffff88aa841efa28] irdma_remove at ffffffffa0b252df [irdma] #12 [ffff88aa841efae8] auxiliary_bus_remove at ffffffff8219afdb #13 [ffff88aa841efb00] device_release_driver_internal at ffffffff821882e6 #14 [ffff88aa841efb38] bus_remove_device at ffffffff82184278 #15 [ffff88aa841efb88] device_del at ffffffff82179d23 #16 [ffff88aa841efc48] ice_unplug_aux_dev at ffffffffa0eb1c14 [ice] #17 [ffff88aa841efc68] ice_service_task at ffffffffa0d88201 [ice] #18 [ffff88aa841efde8] process_one_work at ffffffff811c589a #19 [ffff88aa841efe60] worker_thread at ffffffff811c71ff #20 [ffff88aa841eff10] kthread at ffffffff811d87a0 #21 [ffff88aa841eff50] ret_from_fork at ffffffff82e0022f Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Link: https://lore.kernel.org/r/20231130081415.891006-1-lishifeng@sangfor.com.cn Suggested-by: "Ismail, Mustafa" Signed-off-by: Shifeng Li Reviewed-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/hw.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 64a382070e5c..bd4b2b896444 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1184,7 +1184,6 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, int status; struct irdma_ceq_init_info info = {}; struct irdma_sc_dev *dev = &rf->sc_dev; - u64 scratch; u32 ceq_size; info.ceq_id = ceq_id; @@ -1205,14 +1204,13 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, iwceq->sc_ceq.ceq_id = ceq_id; info.dev = dev; info.vsi = vsi; - scratch = (uintptr_t)&rf->cqp.sc_cqp; status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info); if (!status) { if (dev->ceq_valid) status = irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq, IRDMA_OP_CEQ_CREATE); else - status = irdma_sc_cceq_create(&iwceq->sc_ceq, scratch); + status = irdma_sc_cceq_create(&iwceq->sc_ceq, 0); } if (status) { -- cgit From fcc9b50e5517f7d65cdc33d81f223b22536f863f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 5 Dec 2023 09:10:01 +0900 Subject: Revert "greybus: gb-beagleplay: Ensure le for values in transport" This reverts commit 52eb67861ebeb2110318bd9fe33d85ddcf92aac7. Turns out to not be correct, a new version will be generated later. Link: https://lore.kernel.org/r/20231204131008.384583-1-ayushdevel1325@gmail.com Cc: Ayush Singh Signed-off-by: Greg Kroah-Hartman --- drivers/greybus/gb-beagleplay.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/greybus/gb-beagleplay.c b/drivers/greybus/gb-beagleplay.c index 1e70ff7e3da4..43318c1993ba 100644 --- a/drivers/greybus/gb-beagleplay.c +++ b/drivers/greybus/gb-beagleplay.c @@ -93,9 +93,9 @@ static void hdlc_rx_greybus_frame(struct gb_beagleplay *bg, u8 *buf, u16 len) memcpy(&cport_id, hdr->pad, sizeof(cport_id)); dev_dbg(&bg->sd->dev, "Greybus Operation %u type %X cport %u status %u received", - hdr->operation_id, hdr->type, le16_to_cpu(cport_id), hdr->result); + hdr->operation_id, hdr->type, cport_id, hdr->result); - greybus_data_rcvd(bg->gb_hd, le16_to_cpu(cport_id), buf, len); + greybus_data_rcvd(bg->gb_hd, cport_id, buf, len); } static void hdlc_rx_dbg_frame(const struct gb_beagleplay *bg, const char *buf, u16 len) @@ -340,15 +340,14 @@ static int gb_message_send(struct gb_host_device *hd, u16 cport, struct gb_messa { struct gb_beagleplay *bg = dev_get_drvdata(&hd->dev); struct hdlc_payload payloads[2]; - __le16 cport_id = le16_to_cpu(cport); dev_dbg(&hd->dev, "Sending greybus message with Operation %u, Type: %X on Cport %u", msg->header->operation_id, msg->header->type, cport); - if (le16_to_cpu(msg->header->size) > RX_HDLC_PAYLOAD) + if (msg->header->size > RX_HDLC_PAYLOAD) return dev_err_probe(&hd->dev, -E2BIG, "Greybus message too big"); - memcpy(msg->header->pad, &cport_id, sizeof(cport_id)); + memcpy(msg->header->pad, &cport, sizeof(cport)); payloads[0].buf = msg->header; payloads[0].len = sizeof(*msg->header); -- cgit From e05501e8a84eee4f819f31b9ce663bddd01b3b69 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 6 Nov 2023 10:26:45 -0700 Subject: cxl: Add cxl_num_decoders_committed() usage to cxl_test Commit 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper") missed the conversion for cxl_test. Add usage of cxl_num_decoders_committed() to replace the open coding. Suggested-by: Alison Schofield Signed-off-by: Dave Jiang Reviewed-by: Fan Ni Link: https://lore.kernel.org/r/169929160525.824083.11813222229025394254.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- tools/testing/cxl/Kbuild | 1 + tools/testing/cxl/cxl_core_exports.c | 7 +++++++ tools/testing/cxl/test/cxl.c | 5 +++-- 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 tools/testing/cxl/cxl_core_exports.c diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 90f3c9802ffb..95dc58b94178 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -62,5 +62,6 @@ cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o +cxl_core-y += cxl_core_exports.o obj-m += test/ diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c new file mode 100644 index 000000000000..077e6883921d --- /dev/null +++ b/tools/testing/cxl/cxl_core_exports.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ + +#include "cxl.h" + +/* Exporting of cxl_core symbols that are only used by cxl_test */ +EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, CXL); diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index b88546299902..f4e517a0c774 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -669,10 +669,11 @@ static int mock_decoder_commit(struct cxl_decoder *cxld) return 0; dev_dbg(&port->dev, "%s commit\n", dev_name(&cxld->dev)); - if (port->commit_end + 1 != id) { + if (cxl_num_decoders_committed(port) != id) { dev_dbg(&port->dev, "%s: out of order commit, expected decoder%d.%d\n", - dev_name(&cxld->dev), port->id, port->commit_end + 1); + dev_name(&cxld->dev), port->id, + cxl_num_decoders_committed(port)); return -EBUSY; } -- cgit From 659aa050a53817157b7459529538598a6449c1d3 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 13 Nov 2023 14:13:24 -0800 Subject: kernel/resource: Increment by align value in get_free_mem_region() Currently get_free_mem_region() searches for available capacity in increments equal to the region size being requested. This can cause the search to take giant steps through the resource leaving needless gaps and missing available space. Specifically 'cxl create-region' fails with ERANGE even though capacity of the given size and CXL's expected 256M x InterleaveWays alignment can be satisfied. Replace the total-request-size increment with a next alignment increment so that the next possible address is always examined for availability. Fixes: 14b80582c43e ("resource: Introduce alloc_free_mem_region()") Reported-by: Dmytro Adamenko Reported-by: Dan Williams Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20231113221324.1118092-1-alison.schofield@intel.com Cc: Jason Gunthorpe Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams --- kernel/resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index 866ef3663a0b..91be1bc50b60 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1844,8 +1844,8 @@ get_free_mem_region(struct device *dev, struct resource *base, write_lock(&resource_lock); for (addr = gfr_start(base, size, align, flags); - gfr_continue(base, addr, size, flags); - addr = gfr_next(addr, size, flags)) { + gfr_continue(base, addr, align, flags); + addr = gfr_next(addr, align, flags)) { if (__region_intersects(base, addr, size, 0, IORES_DESC_NONE) != REGION_DISJOINT) continue; -- cgit From 6b17a597fc2f13aaaa0a2780eb7edb9ae7ac9aea Mon Sep 17 00:00:00 2001 From: Thomas Reichinger Date: Thu, 30 Nov 2023 12:35:03 +0100 Subject: arcnet: restoring support for multiple Sohard Arcnet cards Probe of Sohard Arcnet cards fails, if 2 or more cards are installed in a system. See kernel log: [ 2.759203] arcnet: arcnet loaded [ 2.763648] arcnet:com20020: COM20020 chipset support (by David Woodhouse et al.) [ 2.770585] arcnet:com20020_pci: COM20020 PCI support [ 2.772295] com20020 0000:02:00.0: enabling device (0000 -> 0003) [ 2.772354] (unnamed net_device) (uninitialized): PLX-PCI Controls ... [ 3.071301] com20020 0000:02:00.0 arc0-0 (uninitialized): PCI COM20020: station FFh found at F080h, IRQ 101. [ 3.071305] com20020 0000:02:00.0 arc0-0 (uninitialized): Using CKP 64 - data rate 2.5 Mb/s [ 3.071534] com20020 0000:07:00.0: enabling device (0000 -> 0003) [ 3.071581] (unnamed net_device) (uninitialized): PLX-PCI Controls ... [ 3.369501] com20020 0000:07:00.0: Led pci:green:tx:0-0 renamed to pci:green:tx:0-0_1 due to name collision [ 3.369535] com20020 0000:07:00.0: Led pci:red:recon:0-0 renamed to pci:red:recon:0-0_1 due to name collision [ 3.370586] com20020 0000:07:00.0 arc0-0 (uninitialized): PCI COM20020: station E1h found at C000h, IRQ 35. [ 3.370589] com20020 0000:07:00.0 arc0-0 (uninitialized): Using CKP 64 - data rate 2.5 Mb/s [ 3.370608] com20020: probe of 0000:07:00.0 failed with error -5 commit 5ef216c1f848 ("arcnet: com20020-pci: add rotary index support") changes the device name of all COM20020 based PCI cards, even if only some cards support this: snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i); The error happens because all Sohard Arcnet cards would be called arc0-0, since the Sohard Arcnet cards don't have a PLX rotary coder. I.e. EAE Arcnet cards have a PLX rotary coder, which sets the first decimal, ensuring unique devices names. This patch adds two new card feature flags to indicate which cards support LEDs and the PLX rotary coder. For EAE based cards the names still depend on the PLX rotary coder (untested, since missing EAE hardware). For Sohard based cards, this patch will result in devices being called arc0, arc1, ... (tested). Signed-off-by: Thomas Reichinger Fixes: 5ef216c1f848 ("arcnet: com20020-pci: add rotary index support") Link: https://lore.kernel.org/r/20231130113503.6812-1-thomas.reichinger@sohard.de Signed-off-by: Jakub Kicinski --- drivers/net/arcnet/arcdevice.h | 2 + drivers/net/arcnet/com20020-pci.c | 89 ++++++++++++++++++++------------------- 2 files changed, 48 insertions(+), 43 deletions(-) diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index 19e996a829c9..b54275389f8a 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -186,6 +186,8 @@ do { \ #define ARC_IS_5MBIT 1 /* card default speed is 5MBit */ #define ARC_CAN_10MBIT 2 /* card uses COM20022, supporting 10MBit, but default is 2.5MBit. */ +#define ARC_HAS_LED 4 /* card has software controlled LEDs */ +#define ARC_HAS_ROTARY 8 /* card has rotary encoder */ /* information needed to define an encapsulation driver */ struct ArcProto { diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index c580acb8b1d3..7b5c8bb02f11 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -213,12 +213,13 @@ static int com20020pci_probe(struct pci_dev *pdev, if (!strncmp(ci->name, "EAE PLX-PCI FB2", 15)) lp->backplane = 1; - /* Get the dev_id from the PLX rotary coder */ - if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15)) - dev_id_mask = 0x3; - dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask; - - snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i); + if (ci->flags & ARC_HAS_ROTARY) { + /* Get the dev_id from the PLX rotary coder */ + if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15)) + dev_id_mask = 0x3; + dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask; + snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i); + } if (arcnet_inb(ioaddr, COM20020_REG_R_STATUS) == 0xFF) { pr_err("IO address %Xh is empty!\n", ioaddr); @@ -230,6 +231,10 @@ static int com20020pci_probe(struct pci_dev *pdev, goto err_free_arcdev; } + ret = com20020_found(dev, IRQF_SHARED); + if (ret) + goto err_free_arcdev; + card = devm_kzalloc(&pdev->dev, sizeof(struct com20020_dev), GFP_KERNEL); if (!card) { @@ -239,41 +244,39 @@ static int com20020pci_probe(struct pci_dev *pdev, card->index = i; card->pci_priv = priv; - card->tx_led.brightness_set = led_tx_set; - card->tx_led.default_trigger = devm_kasprintf(&pdev->dev, - GFP_KERNEL, "arc%d-%d-tx", - dev->dev_id, i); - card->tx_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "pci:green:tx:%d-%d", - dev->dev_id, i); - - card->tx_led.dev = &dev->dev; - card->recon_led.brightness_set = led_recon_set; - card->recon_led.default_trigger = devm_kasprintf(&pdev->dev, - GFP_KERNEL, "arc%d-%d-recon", - dev->dev_id, i); - card->recon_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "pci:red:recon:%d-%d", - dev->dev_id, i); - card->recon_led.dev = &dev->dev; - card->dev = dev; - - ret = devm_led_classdev_register(&pdev->dev, &card->tx_led); - if (ret) - goto err_free_arcdev; - ret = devm_led_classdev_register(&pdev->dev, &card->recon_led); - if (ret) - goto err_free_arcdev; - - dev_set_drvdata(&dev->dev, card); - - ret = com20020_found(dev, IRQF_SHARED); - if (ret) - goto err_free_arcdev; - - devm_arcnet_led_init(dev, dev->dev_id, i); + if (ci->flags & ARC_HAS_LED) { + card->tx_led.brightness_set = led_tx_set; + card->tx_led.default_trigger = devm_kasprintf(&pdev->dev, + GFP_KERNEL, "arc%d-%d-tx", + dev->dev_id, i); + card->tx_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "pci:green:tx:%d-%d", + dev->dev_id, i); + + card->tx_led.dev = &dev->dev; + card->recon_led.brightness_set = led_recon_set; + card->recon_led.default_trigger = devm_kasprintf(&pdev->dev, + GFP_KERNEL, "arc%d-%d-recon", + dev->dev_id, i); + card->recon_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "pci:red:recon:%d-%d", + dev->dev_id, i); + card->recon_led.dev = &dev->dev; + + ret = devm_led_classdev_register(&pdev->dev, &card->tx_led); + if (ret) + goto err_free_arcdev; + + ret = devm_led_classdev_register(&pdev->dev, &card->recon_led); + if (ret) + goto err_free_arcdev; + + dev_set_drvdata(&dev->dev, card); + devm_arcnet_led_init(dev, dev->dev_id, i); + } + card->dev = dev; list_add(&card->list, &priv->list_dev); continue; @@ -329,7 +332,7 @@ static struct com20020_pci_card_info card_info_5mbit = { }; static struct com20020_pci_card_info card_info_sohard = { - .name = "PLX-PCI", + .name = "SOHARD SH ARC-PCI", .devcount = 1, /* SOHARD needs PCI base addr 4 */ .chan_map_tbl = { @@ -364,7 +367,7 @@ static struct com20020_pci_card_info card_info_eae_arc1 = { }, }, .rotary = 0x0, - .flags = ARC_CAN_10MBIT, + .flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT, }; static struct com20020_pci_card_info card_info_eae_ma1 = { @@ -396,7 +399,7 @@ static struct com20020_pci_card_info card_info_eae_ma1 = { }, }, .rotary = 0x0, - .flags = ARC_CAN_10MBIT, + .flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT, }; static struct com20020_pci_card_info card_info_eae_fb2 = { @@ -421,7 +424,7 @@ static struct com20020_pci_card_info card_info_eae_fb2 = { }, }, .rotary = 0x0, - .flags = ARC_CAN_10MBIT, + .flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT, }; static const struct pci_device_id com20020pci_id_table[] = { -- cgit From adbf100fc47001c93d7e513ecac6fd6e04d5b4a1 Mon Sep 17 00:00:00 2001 From: Naveen Mamindlapalli Date: Fri, 1 Dec 2023 11:03:30 +0530 Subject: octeontx2-pf: consider both Rx and Tx packet stats for adaptive interrupt coalescing The current adaptive interrupt coalescing code updates only rx packet stats for dim algorithm. This patch also updates tx packet stats which will be useful when there is only tx traffic. Also moved configuring hardware adaptive interrupt setting to driver dim callback. Fixes: 6e144b47f560 ("octeontx2-pf: Add support for adaptive interrupt coalescing") Signed-off-by: Naveen Mamindlapalli Signed-off-by: Suman Ghosh Reviewed-by: Wojciech Drewek Link: https://lore.kernel.org/r/20231201053330.3903694-1-sumang@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 9 +++++++++ .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 20 ++++++++++---------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 532e324bdcc8..0c17ebdda148 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1688,6 +1688,14 @@ static void otx2_do_set_rx_mode(struct otx2_nic *pf) mutex_unlock(&pf->mbox.lock); } +static void otx2_set_irq_coalesce(struct otx2_nic *pfvf) +{ + int cint; + + for (cint = 0; cint < pfvf->hw.cint_cnt; cint++) + otx2_config_irq_coalescing(pfvf, cint); +} + static void otx2_dim_work(struct work_struct *w) { struct dim_cq_moder cur_moder; @@ -1703,6 +1711,7 @@ static void otx2_dim_work(struct work_struct *w) CQ_TIMER_THRESH_MAX : cur_moder.usec; pfvf->hw.cq_ecount_wait = (cur_moder.pkts > NAPI_POLL_WEIGHT) ? NAPI_POLL_WEIGHT : cur_moder.pkts; + otx2_set_irq_coalesce(pfvf); dim->state = DIM_START_MEASURE; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 6ee15f3c25ed..4d519ea833b2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -512,11 +512,18 @@ static void otx2_adjust_adaptive_coalese(struct otx2_nic *pfvf, struct otx2_cq_p { struct dim_sample dim_sample; u64 rx_frames, rx_bytes; + u64 tx_frames, tx_bytes; rx_frames = OTX2_GET_RX_STATS(RX_BCAST) + OTX2_GET_RX_STATS(RX_MCAST) + OTX2_GET_RX_STATS(RX_UCAST); rx_bytes = OTX2_GET_RX_STATS(RX_OCTS); - dim_update_sample(pfvf->napi_events, rx_frames, rx_bytes, &dim_sample); + tx_bytes = OTX2_GET_TX_STATS(TX_OCTS); + tx_frames = OTX2_GET_TX_STATS(TX_UCAST); + + dim_update_sample(pfvf->napi_events, + rx_frames + tx_frames, + rx_bytes + tx_bytes, + &dim_sample); net_dim(&cq_poll->dim, dim_sample); } @@ -558,16 +565,9 @@ int otx2_napi_handler(struct napi_struct *napi, int budget) if (pfvf->flags & OTX2_FLAG_INTF_DOWN) return workdone; - /* Check for adaptive interrupt coalesce */ - if (workdone != 0 && - ((pfvf->flags & OTX2_FLAG_ADPTV_INT_COAL_ENABLED) == - OTX2_FLAG_ADPTV_INT_COAL_ENABLED)) { - /* Adjust irq coalese using net_dim */ + /* Adjust irq coalese using net_dim */ + if (pfvf->flags & OTX2_FLAG_ADPTV_INT_COAL_ENABLED) otx2_adjust_adaptive_coalese(pfvf, cq_poll); - /* Update irq coalescing */ - for (i = 0; i < pfvf->hw.cint_cnt; i++) - otx2_config_irq_coalescing(pfvf, i); - } if (unlikely(!filled_cnt)) { struct refill_work *work; -- cgit From 37e4b8df27bc68340f3fc80dbb27e3549c7f881c Mon Sep 17 00:00:00 2001 From: Jianheng Zhang Date: Fri, 1 Dec 2023 03:22:03 +0000 Subject: net: stmmac: fix FPE events losing The status bits of register MAC_FPE_CTRL_STS are clear on read. Using 32-bit read for MAC_FPE_CTRL_STS in dwmac5_fpe_configure() and dwmac5_fpe_send_mpacket() clear the status bits. Then the stmmac interrupt handler missing FPE event status and leads to FPE handshaking failure and retries. To avoid clear status bits of MAC_FPE_CTRL_STS in dwmac5_fpe_configure() and dwmac5_fpe_send_mpacket(), add fpe_csr to stmmac_fpe_cfg structure to cache the control bits of MAC_FPE_CTRL_STS and to avoid reading MAC_FPE_CTRL_STS in those methods. Fixes: 5a5586112b92 ("net: stmmac: support FPE link partner hand-shaking procedure") Reviewed-by: Serge Semin Signed-off-by: Jianheng Zhang Link: https://lore.kernel.org/r/CY5PR12MB637225A7CF529D5BE0FBE59CBF81A@CY5PR12MB6372.namprd12.prod.outlook.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac5.c | 45 +++++++++------------- drivers/net/ethernet/stmicro/stmmac/dwmac5.h | 4 +- .../net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 3 +- drivers/net/ethernet/stmicro/stmmac/hwif.h | 4 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 8 +++- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 1 + include/linux/stmmac.h | 1 + 7 files changed, 36 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index e95d35f1e5a0..8fd167501fa0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -710,28 +710,22 @@ void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev, } } -void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, +void dwmac5_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + u32 num_txq, u32 num_rxq, bool enable) { u32 value; - if (!enable) { - value = readl(ioaddr + MAC_FPE_CTRL_STS); - - value &= ~EFPE; - - writel(value, ioaddr + MAC_FPE_CTRL_STS); - return; + if (enable) { + cfg->fpe_csr = EFPE; + value = readl(ioaddr + GMAC_RXQ_CTRL1); + value &= ~GMAC_RXQCTRL_FPRQ; + value |= (num_rxq - 1) << GMAC_RXQCTRL_FPRQ_SHIFT; + writel(value, ioaddr + GMAC_RXQ_CTRL1); + } else { + cfg->fpe_csr = 0; } - - value = readl(ioaddr + GMAC_RXQ_CTRL1); - value &= ~GMAC_RXQCTRL_FPRQ; - value |= (num_rxq - 1) << GMAC_RXQCTRL_FPRQ_SHIFT; - writel(value, ioaddr + GMAC_RXQ_CTRL1); - - value = readl(ioaddr + MAC_FPE_CTRL_STS); - value |= EFPE; - writel(value, ioaddr + MAC_FPE_CTRL_STS); + writel(cfg->fpe_csr, ioaddr + MAC_FPE_CTRL_STS); } int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev) @@ -741,6 +735,9 @@ int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev) status = FPE_EVENT_UNKNOWN; + /* Reads from the MAC_FPE_CTRL_STS register should only be performed + * here, since the status flags of MAC_FPE_CTRL_STS are "clear on read" + */ value = readl(ioaddr + MAC_FPE_CTRL_STS); if (value & TRSP) { @@ -766,19 +763,15 @@ int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev) return status; } -void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, enum stmmac_mpacket_type type) +void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + enum stmmac_mpacket_type type) { - u32 value; + u32 value = cfg->fpe_csr; - value = readl(ioaddr + MAC_FPE_CTRL_STS); - - if (type == MPACKET_VERIFY) { - value &= ~SRSP; + if (type == MPACKET_VERIFY) value |= SVER; - } else { - value &= ~SVER; + else if (type == MPACKET_RESPONSE) value |= SRSP; - } writel(value, ioaddr + MAC_FPE_CTRL_STS); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h index 53c138d0ff48..34e620790eb3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h @@ -153,9 +153,11 @@ int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, unsigned int ptp_rate); void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev, struct stmmac_extra_stats *x, u32 txqcnt); -void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, +void dwmac5_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + u32 num_txq, u32 num_rxq, bool enable); void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, + struct stmmac_fpe_cfg *cfg, enum stmmac_mpacket_type type); int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 453e88b75be0..a74e71db79f9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -1484,7 +1484,8 @@ static int dwxgmac3_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, return 0; } -static void dwxgmac3_fpe_configure(void __iomem *ioaddr, u32 num_txq, +static void dwxgmac3_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + u32 num_txq, u32 num_rxq, bool enable) { u32 value; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index b95d3e137813..68aa2d5ca6e5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -412,9 +412,11 @@ struct stmmac_ops { unsigned int ptp_rate); void (*est_irq_status)(void __iomem *ioaddr, struct net_device *dev, struct stmmac_extra_stats *x, u32 txqcnt); - void (*fpe_configure)(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, + void (*fpe_configure)(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + u32 num_txq, u32 num_rxq, bool enable); void (*fpe_send_mpacket)(void __iomem *ioaddr, + struct stmmac_fpe_cfg *cfg, enum stmmac_mpacket_type type); int (*fpe_irq_status)(void __iomem *ioaddr, struct net_device *dev); }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2afb2bd25977..37e64283f910 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -964,7 +964,8 @@ static void stmmac_fpe_link_state_handle(struct stmmac_priv *priv, bool is_up) bool *hs_enable = &fpe_cfg->hs_enable; if (is_up && *hs_enable) { - stmmac_fpe_send_mpacket(priv, priv->ioaddr, MPACKET_VERIFY); + stmmac_fpe_send_mpacket(priv, priv->ioaddr, fpe_cfg, + MPACKET_VERIFY); } else { *lo_state = FPE_STATE_OFF; *lp_state = FPE_STATE_OFF; @@ -5839,6 +5840,7 @@ static void stmmac_fpe_event_status(struct stmmac_priv *priv, int status) /* If user has requested FPE enable, quickly response */ if (*hs_enable) stmmac_fpe_send_mpacket(priv, priv->ioaddr, + fpe_cfg, MPACKET_RESPONSE); } @@ -7263,6 +7265,7 @@ static void stmmac_fpe_lp_task(struct work_struct *work) if (*lo_state == FPE_STATE_ENTERING_ON && *lp_state == FPE_STATE_ENTERING_ON) { stmmac_fpe_configure(priv, priv->ioaddr, + fpe_cfg, priv->plat->tx_queues_to_use, priv->plat->rx_queues_to_use, *enable); @@ -7281,6 +7284,7 @@ static void stmmac_fpe_lp_task(struct work_struct *work) netdev_info(priv->dev, SEND_VERIFY_MPAKCET_FMT, *lo_state, *lp_state); stmmac_fpe_send_mpacket(priv, priv->ioaddr, + fpe_cfg, MPACKET_VERIFY); } /* Sleep then retry */ @@ -7295,6 +7299,7 @@ void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable) if (priv->plat->fpe_cfg->hs_enable != enable) { if (enable) { stmmac_fpe_send_mpacket(priv, priv->ioaddr, + priv->plat->fpe_cfg, MPACKET_VERIFY); } else { priv->plat->fpe_cfg->lo_fpe_state = FPE_STATE_OFF; @@ -7755,6 +7760,7 @@ int stmmac_suspend(struct device *dev) if (priv->dma_cap.fpesel) { /* Disable FPE */ stmmac_fpe_configure(priv, priv->ioaddr, + priv->plat->fpe_cfg, priv->plat->tx_queues_to_use, priv->plat->rx_queues_to_use, false); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index ac41ef4cbd2f..6ad3e0a11936 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -1079,6 +1079,7 @@ disable: priv->plat->fpe_cfg->enable = false; stmmac_fpe_configure(priv, priv->ioaddr, + priv->plat->fpe_cfg, priv->plat->tx_queues_to_use, priv->plat->rx_queues_to_use, false); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 0b4658a7eceb..dee5ad6e48c5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -175,6 +175,7 @@ struct stmmac_fpe_cfg { bool hs_enable; /* FPE handshake enable */ enum stmmac_fpe_state lp_fpe_state; /* Link Partner FPE state */ enum stmmac_fpe_state lo_fpe_state; /* Local station FPE state */ + u32 fpe_csr; /* MAC_FPE_CTRL_STS reg cache */ }; struct stmmac_safety_feature_cfg { -- cgit From 4b3338aaa74d7d4ec5b6734dc298f0db94ec83d2 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Thu, 30 Nov 2023 12:29:47 +0530 Subject: powerpc/ftrace: Fix stack teardown in ftrace_no_trace Commit 41a506ef71eb ("powerpc/ftrace: Create a dummy stackframe to fix stack unwind") added use of a new stack frame on ftrace entry to fix stack unwind. However, the commit missed updating the offset used while tearing down the ftrace stack when ftrace is disabled. Fix the same. In addition, the commit missed saving the correct stack pointer in pt_regs. Update the same. Fixes: 41a506ef71eb ("powerpc/ftrace: Create a dummy stackframe to fix stack unwind") Cc: stable@vger.kernel.org # v6.5+ Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/20231130065947.2188860-1-naveen@kernel.org --- arch/powerpc/kernel/trace/ftrace_entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 90701885762c..40677416d7b2 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -62,7 +62,7 @@ .endif /* Save previous stack pointer (r1) */ - addi r8, r1, SWITCH_FRAME_SIZE + addi r8, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE PPC_STL r8, GPR1(r1) .if \allregs == 1 @@ -182,7 +182,7 @@ ftrace_no_trace: mflr r3 mtctr r3 REST_GPR(3, r1) - addi r1, r1, SWITCH_FRAME_SIZE + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE mtlr r0 bctr #endif -- cgit From a5e400a985df8041ed4659ed1462aa9134318130 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 20 Aug 2023 20:58:56 +0300 Subject: net/mlx5e: Honor user choice of IPsec replay window size Users can configure IPsec replay window size, but mlx5 driver didn't honor their choice and set always 32bits. Fix assignment logic to configure right size from the beginning. Fixes: 7db21ef4566e ("net/mlx5e: Set IPsec replay sequence numbers") Reviewed-by: Patrisious Haddad Signed-off-by: Leon Romanovsky --- .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 21 +++++++++++++++++++++ .../mellanox/mlx5/core/en_accel/ipsec_offload.c | 2 +- include/linux/mlx5/mlx5_ifc.h | 7 +++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 655496598c68..4028932d93ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -335,6 +335,27 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->replay_esn.esn = sa_entry->esn_state.esn; attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb; attrs->replay_esn.overlap = sa_entry->esn_state.overlap; + switch (x->replay_esn->replay_window) { + case 32: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_32BIT; + break; + case 64: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_64BIT; + break; + case 128: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_128BIT; + break; + case 256: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_256BIT; + break; + default: + WARN_ON(true); + return; + } } attrs->dir = x->xso.dir; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index a91f772dc981..4e018fba2d5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -95,7 +95,7 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, if (attrs->dir == XFRM_DEV_OFFLOAD_IN) { MLX5_SET(ipsec_aso, aso_ctx, window_sz, - attrs->replay_esn.replay_window / 64); + attrs->replay_esn.replay_window); MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_REPLAY_PROTECTION); } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6f3631425f38..90ca63f4bf63 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -12001,6 +12001,13 @@ enum { MLX5_IPSEC_ASO_INC_SN = 0x2, }; +enum { + MLX5_IPSEC_ASO_REPLAY_WIN_32BIT = 0x0, + MLX5_IPSEC_ASO_REPLAY_WIN_64BIT = 0x1, + MLX5_IPSEC_ASO_REPLAY_WIN_128BIT = 0x2, + MLX5_IPSEC_ASO_REPLAY_WIN_256BIT = 0x3, +}; + struct mlx5_ifc_ipsec_aso_bits { u8 valid[0x1]; u8 reserved_at_201[0x1]; -- cgit From 3d42c8cc67a8fcbff0181f9ed6d03d353edcee07 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 20 Sep 2023 10:07:13 +0300 Subject: net/mlx5e: Ensure that IPsec sequence packet number starts from 1 According to RFC4303, section "3.3.3. Sequence Number Generation", the first packet sent using a given SA will contain a sequence number of 1. However if user didn't set seq/oseq, the HW used zero as first sequence packet number. Such misconfiguration causes to drop of first packet if replay window protection was enabled in SA. To fix it, set sequence number to be at least 1. Fixes: 7db21ef4566e ("net/mlx5e: Set IPsec replay sequence numbers") Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 4028932d93ce..914b9e6eb7db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -121,7 +121,14 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO) esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom)); - sa_entry->esn_state.esn = esn; + if (sa_entry->esn_state.esn_msb) + sa_entry->esn_state.esn = esn; + else + /* According to RFC4303, section "3.3.3. Sequence Number Generation", + * the first packet sent using a given SA will contain a sequence + * number of 1. + */ + sa_entry->esn_state.esn = max_t(u32, esn, 1); sa_entry->esn_state.esn_msb = esn_msb; if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { -- cgit From 94af50c0a9bb961fe93cf0fdd14eb0883da86721 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 21 Sep 2023 14:06:18 +0300 Subject: net/mlx5e: Unify esw and normal IPsec status table creation/destruction Change normal IPsec flow to use the same creation/destruction functions for status flow table as that of ESW, which first of all refines the code to have less code duplication. And more importantly, the ESW status table handles IPsec syndrome checks at steering by HW, which is more efficient than the previous behaviour we had where it was copied to WQE meta data and checked by the driver. Fixes: 1762f132d542 ("net/mlx5e: Support IPsec packet offload for RX in switchdev mode") Signed-off-by: Patrisious Haddad Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 187 ++++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c | 152 ----------------- .../net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h | 15 -- 3 files changed, 141 insertions(+), 213 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index f41c976dc33f..85ed5171e835 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -128,63 +128,166 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, return mlx5_create_auto_grouped_flow_table(ns, &ft_attr); } -static int ipsec_status_rule(struct mlx5_core_dev *mdev, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest) +static void ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) { - u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + mlx5_del_flow_rules(rx->status_drop.rule); + mlx5_destroy_flow_group(rx->status_drop.group); + mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt); +} + +static void ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + mlx5_del_flow_rules(rx->status.rule); + + if (rx != ipsec->rx_esw) + return; + +#ifdef CONFIG_MLX5_ESWITCH + mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0); +#endif +} + +static int ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table *ft = rx->ft.status; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {}; - struct mlx5_modify_hdr *modify_hdr; - struct mlx5_flow_handle *fte; + struct mlx5_flow_handle *rule; + struct mlx5_fc *flow_counter; struct mlx5_flow_spec *spec; - int err; + struct mlx5_flow_group *g; + u32 *flow_group_in; + int err = 0; + flow_group_in = kvzalloc(inlen, GFP_KERNEL); spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return -ENOMEM; + if (!flow_group_in || !spec) { + err = -ENOMEM; + goto err_out; + } - /* Action to copy 7 bit ipsec_syndrome to regB[24:30] */ - MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY); - MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME); - MLX5_SET(copy_action_in, action, src_offset, 0); - MLX5_SET(copy_action_in, action, length, 7); - MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); - MLX5_SET(copy_action_in, action, dst_offset, 24); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop flow group, err=%d\n", err); + goto err_out; + } - modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL, - 1, action); + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule counter, err=%d\n", err); + goto err_cnt; + } - if (IS_ERR(modify_hdr)) { - err = PTR_ERR(modify_hdr); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(flow_counter); + if (rx == ipsec->rx_esw) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); mlx5_core_err(mdev, - "fail to alloc ipsec copy modify_header_id err=%d\n", err); - goto out_spec; + "Failed to add ipsec rx status drop rule, err=%d\n", err); + goto err_rule; } - /* create fte */ - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + rx->status_drop.group = g; + rx->status_drop.rule = rule; + rx->status_drop_cnt = flow_counter; + + kvfree(flow_group_in); + kvfree(spec); + return 0; + +err_rule: + mlx5_fc_destroy(mdev, flow_counter); +err_cnt: + mlx5_destroy_flow_group(g); +err_out: + kvfree(flow_group_in); + kvfree(spec); + return err; +} + +static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.ipsec_syndrome); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.ipsec_syndrome, 0); + if (rx == ipsec->rx_esw) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + flow_act.flags = FLOW_ACT_NO_APPEND; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - flow_act.modify_hdr = modify_hdr; - fte = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); - if (IS_ERR(fte)) { - err = PTR_ERR(fte); - mlx5_core_err(mdev, "fail to add ipsec rx err copy rule err=%d\n", err); - goto out; + rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(ipsec->mdev, + "Failed to add ipsec rx status pass rule, err=%d\n", err); + goto err_rule; } + rx->status.rule = rule; kvfree(spec); - rx->status.rule = fte; - rx->status.modify_hdr = modify_hdr; return 0; -out: - mlx5_modify_header_dealloc(mdev, modify_hdr); -out_spec: +err_rule: kvfree(spec); return err; } +static void mlx5_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + ipsec_rx_status_pass_destroy(ipsec, rx); + ipsec_rx_status_drop_destroy(ipsec, rx); +} + +static int mlx5_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest) +{ + int err; + + err = ipsec_rx_status_drop_create(ipsec, rx); + if (err) + return err; + + err = ipsec_rx_status_pass_create(ipsec, rx, dest); + if (err) + goto err_pass_create; + + return 0; + +err_pass_create: + ipsec_rx_status_drop_destroy(ipsec, rx); + return err; +} + static int ipsec_miss_create(struct mlx5_core_dev *mdev, struct mlx5_flow_table *ft, struct mlx5e_ipsec_miss *miss, @@ -333,12 +436,7 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, mlx5_destroy_flow_table(rx->ft.sa); if (rx->allow_tunnel_mode) mlx5_eswitch_unblock_encap(mdev); - if (rx == ipsec->rx_esw) { - mlx5_esw_ipsec_rx_status_destroy(ipsec, rx); - } else { - mlx5_del_flow_rules(rx->status.rule); - mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); - } + mlx5_ipsec_rx_status_destroy(ipsec, rx); mlx5_destroy_flow_table(rx->ft.status); mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family, mdev); @@ -428,10 +526,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[1].counter_id = mlx5_fc_id(rx->fc->cnt); - if (rx == ipsec->rx_esw) - err = mlx5_esw_ipsec_rx_status_create(ipsec, rx, dest); - else - err = ipsec_status_rule(mdev, rx, dest); + err = mlx5_ipsec_rx_status_create(ipsec, rx, dest); if (err) goto err_add; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 095f31f380fa..13b5916b64e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -21,158 +21,6 @@ enum { MLX5_ESW_IPSEC_TX_ESP_FT_CNT_LEVEL, }; -static void esw_ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) -{ - mlx5_del_flow_rules(rx->status_drop.rule); - mlx5_destroy_flow_group(rx->status_drop.group); - mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt); -} - -static void esw_ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) -{ - mlx5_del_flow_rules(rx->status.rule); - mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0); -} - -static int esw_ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_flow_table *ft = rx->ft.status; - struct mlx5_core_dev *mdev = ipsec->mdev; - struct mlx5_flow_destination dest = {}; - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_handle *rule; - struct mlx5_fc *flow_counter; - struct mlx5_flow_spec *spec; - struct mlx5_flow_group *g; - u32 *flow_group_in; - int err = 0; - - flow_group_in = kvzalloc(inlen, GFP_KERNEL); - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!flow_group_in || !spec) { - err = -ENOMEM; - goto err_out; - } - - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1); - g = mlx5_create_flow_group(ft, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - mlx5_core_err(mdev, - "Failed to add ipsec rx status drop flow group, err=%d\n", err); - goto err_out; - } - - flow_counter = mlx5_fc_create(mdev, false); - if (IS_ERR(flow_counter)) { - err = PTR_ERR(flow_counter); - mlx5_core_err(mdev, - "Failed to add ipsec rx status drop rule counter, err=%d\n", err); - goto err_cnt; - } - - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; - dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(flow_counter); - spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; - rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - mlx5_core_err(mdev, - "Failed to add ipsec rx status drop rule, err=%d\n", err); - goto err_rule; - } - - rx->status_drop.group = g; - rx->status_drop.rule = rule; - rx->status_drop_cnt = flow_counter; - - kvfree(flow_group_in); - kvfree(spec); - return 0; - -err_rule: - mlx5_fc_destroy(mdev, flow_counter); -err_cnt: - mlx5_destroy_flow_group(g); -err_out: - kvfree(flow_group_in); - kvfree(spec); - return err; -} - -static int esw_ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest) -{ - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_handle *rule; - struct mlx5_flow_spec *spec; - int err; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return -ENOMEM; - - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, - misc_parameters_2.ipsec_syndrome); - MLX5_SET(fte_match_param, spec->match_value, - misc_parameters_2.ipsec_syndrome, 0); - spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; - flow_act.flags = FLOW_ACT_NO_APPEND; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - mlx5_core_warn(ipsec->mdev, - "Failed to add ipsec rx status pass rule, err=%d\n", err); - goto err_rule; - } - - rx->status.rule = rule; - kvfree(spec); - return 0; - -err_rule: - kvfree(spec); - return err; -} - -void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) -{ - esw_ipsec_rx_status_pass_destroy(ipsec, rx); - esw_ipsec_rx_status_drop_destroy(ipsec, rx); -} - -int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest) -{ - int err; - - err = esw_ipsec_rx_status_drop_create(ipsec, rx); - if (err) - return err; - - err = esw_ipsec_rx_status_pass_create(ipsec, rx, dest); - if (err) - goto err_pass_create; - - return 0; - -err_pass_create: - esw_ipsec_rx_status_drop_destroy(ipsec, rx); - return err; -} - void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx_create_attr *attr) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h index 0c90f7a8b0d3..ac9c65b89166 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h @@ -8,11 +8,6 @@ struct mlx5e_ipsec; struct mlx5e_ipsec_sa_entry; #ifdef CONFIG_MLX5_ESWITCH -void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx); -int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest); void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx_create_attr *attr); int mlx5_esw_ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, @@ -26,16 +21,6 @@ void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx_create_attr *attr); void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev); #else -static inline void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) {} - -static inline int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest) -{ - return -EINVAL; -} - static inline void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx_create_attr *attr) {} -- cgit From 5ad00dee43b98129b86930d457b983d4d04dc553 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 3 Oct 2023 16:44:19 +0300 Subject: net/mlx5e: Remove exposure of IPsec RX flow steering struct After previous commit, which unified various IPsec creation modes, there is no need to have struct mlx5e_ipsec_rx exposed in global IPsec header. Move it to ipsec_fs.c to be placed together with already existing struct mlx5e_ipsec_tx. Fixes: 1762f132d542 ("net/mlx5e: Support IPsec packet offload for RX in switchdev mode") Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h | 14 +------------- .../net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c | 16 ++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c | 8 ++++---- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 8f4a37bceaf4..c3a40bf11952 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -201,19 +201,6 @@ struct mlx5e_ipsec_miss { struct mlx5_flow_handle *rule; }; -struct mlx5e_ipsec_rx { - struct mlx5e_ipsec_ft ft; - struct mlx5e_ipsec_miss pol; - struct mlx5e_ipsec_miss sa; - struct mlx5e_ipsec_rule status; - struct mlx5e_ipsec_miss status_drop; - struct mlx5_fc *status_drop_cnt; - struct mlx5e_ipsec_fc *fc; - struct mlx5_fs_chains *chains; - u8 allow_tunnel_mode : 1; - struct xarray ipsec_obj_id_map; -}; - struct mlx5e_ipsec_tx_create_attr { int prio; int pol_level; @@ -248,6 +235,7 @@ struct mlx5e_ipsec { struct mlx5_ipsec_fs *roce; u8 is_uplink_rep: 1; struct mlx5e_ipsec_mpv_work mpv_work; + struct xarray ipsec_obj_id_map; }; struct mlx5e_ipsec_esn_state { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 85ed5171e835..aa74a2422869 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -32,6 +32,18 @@ struct mlx5e_ipsec_tx { u8 allow_tunnel_mode : 1; }; +struct mlx5e_ipsec_rx { + struct mlx5e_ipsec_ft ft; + struct mlx5e_ipsec_miss pol; + struct mlx5e_ipsec_miss sa; + struct mlx5e_ipsec_rule status; + struct mlx5e_ipsec_miss status_drop; + struct mlx5_fc *status_drop_cnt; + struct mlx5e_ipsec_fc *fc; + struct mlx5_fs_chains *chains; + u8 allow_tunnel_mode : 1; +}; + /* IPsec RX flow steering */ static enum mlx5_traffic_types family2tt(u32 family) { @@ -2052,7 +2064,7 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec) kfree(ipsec->rx_ipv6); if (ipsec->is_uplink_rep) { - xa_destroy(&ipsec->rx_esw->ipsec_obj_id_map); + xa_destroy(&ipsec->ipsec_obj_id_map); mutex_destroy(&ipsec->tx_esw->ft.mutex); WARN_ON(ipsec->tx_esw->ft.refcnt); @@ -2115,7 +2127,7 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec, mutex_init(&ipsec->tx_esw->ft.mutex); mutex_init(&ipsec->rx_esw->ft.mutex); ipsec->tx_esw->ns = ns_esw; - xa_init_flags(&ipsec->rx_esw->ipsec_obj_id_map, XA_FLAGS_ALLOC1); + xa_init_flags(&ipsec->ipsec_obj_id_map, XA_FLAGS_ALLOC1); } else if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ROCE) { ipsec->roce = mlx5_ipsec_fs_roce_init(mdev, devcom); } else { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 13b5916b64e2..5a0047bdcb51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -50,7 +50,7 @@ int mlx5_esw_ipsec_rx_setup_modify_header(struct mlx5e_ipsec_sa_entry *sa_entry, u32 mapped_id; int err; - err = xa_alloc_bh(&ipsec->rx_esw->ipsec_obj_id_map, &mapped_id, + err = xa_alloc_bh(&ipsec->ipsec_obj_id_map, &mapped_id, xa_mk_value(sa_entry->ipsec_obj_id), XA_LIMIT(1, ESW_IPSEC_RX_MAPPED_ID_MASK), 0); if (err) @@ -81,7 +81,7 @@ int mlx5_esw_ipsec_rx_setup_modify_header(struct mlx5e_ipsec_sa_entry *sa_entry, return 0; err_header_alloc: - xa_erase_bh(&ipsec->rx_esw->ipsec_obj_id_map, mapped_id); + xa_erase_bh(&ipsec->ipsec_obj_id_map, mapped_id); return err; } @@ -90,7 +90,7 @@ void mlx5_esw_ipsec_rx_id_mapping_remove(struct mlx5e_ipsec_sa_entry *sa_entry) struct mlx5e_ipsec *ipsec = sa_entry->ipsec; if (sa_entry->rx_mapped_id) - xa_erase_bh(&ipsec->rx_esw->ipsec_obj_id_map, + xa_erase_bh(&ipsec->ipsec_obj_id_map, sa_entry->rx_mapped_id); } @@ -100,7 +100,7 @@ int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id, struct mlx5e_ipsec *ipsec = priv->ipsec; void *val; - val = xa_load(&ipsec->rx_esw->ipsec_obj_id_map, id); + val = xa_load(&ipsec->ipsec_obj_id_map, id); if (!val) return -ENOENT; -- cgit From dddb49b63d8683be81cff220b94a0196c1367b74 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Sun, 24 Sep 2023 11:50:31 +0300 Subject: net/mlx5e: Add IPsec and ASO syndromes check in HW After IPsec decryption it isn't enough to only check the IPsec syndrome but need to also check the ASO syndrome in order to verify that the operation was actually successful. Verify that both syndromes are actually zero and in case not drop the packet and increment the appropriate flow counter for the drop reason. Fixes: 6b5c45e16e43 ("net/mlx5e: Configure IPsec packet offload flow steering") Signed-off-by: Patrisious Haddad Signed-off-by: Leon Romanovsky --- .../ethernet/mellanox/mlx5/core/en_accel/ipsec.h | 8 + .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 235 +++++++++++++++++++-- 2 files changed, 223 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index c3a40bf11952..adaea3493193 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -189,11 +189,19 @@ struct mlx5e_ipsec_ft { u32 refcnt; }; +struct mlx5e_ipsec_drop { + struct mlx5_flow_handle *rule; + struct mlx5_fc *fc; +}; + struct mlx5e_ipsec_rule { struct mlx5_flow_handle *rule; struct mlx5_modify_hdr *modify_hdr; struct mlx5_pkt_reformat *pkt_reformat; struct mlx5_fc *fc; + struct mlx5e_ipsec_drop replay; + struct mlx5e_ipsec_drop auth; + struct mlx5e_ipsec_drop trailer; }; struct mlx5e_ipsec_miss { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index aa74a2422869..aeb399d8dae5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -32,13 +32,17 @@ struct mlx5e_ipsec_tx { u8 allow_tunnel_mode : 1; }; +struct mlx5e_ipsec_status_checks { + struct mlx5_flow_group *drop_all_group; + struct mlx5e_ipsec_drop all; +}; + struct mlx5e_ipsec_rx { struct mlx5e_ipsec_ft ft; struct mlx5e_ipsec_miss pol; struct mlx5e_ipsec_miss sa; struct mlx5e_ipsec_rule status; - struct mlx5e_ipsec_miss status_drop; - struct mlx5_fc *status_drop_cnt; + struct mlx5e_ipsec_status_checks status_drops; struct mlx5e_ipsec_fc *fc; struct mlx5_fs_chains *chains; u8 allow_tunnel_mode : 1; @@ -143,9 +147,9 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, static void ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx) { - mlx5_del_flow_rules(rx->status_drop.rule); - mlx5_destroy_flow_group(rx->status_drop.group); - mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt); + mlx5_del_flow_rules(rx->status_drops.all.rule); + mlx5_fc_destroy(ipsec->mdev, rx->status_drops.all.fc); + mlx5_destroy_flow_group(rx->status_drops.drop_all_group); } static void ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, @@ -161,8 +165,149 @@ static void ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, #endif } -static int ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) +static int rx_add_rule_drop_auth_trailer(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5e_ipsec_rx *rx) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5_flow_table *ft = rx->ft.status; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_fc *flow_counter; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + flow_counter = mlx5_fc_create(mdev, true); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule counter, err=%d\n", err); + goto err_cnt; + } + sa_entry->ipsec_rule.auth.fc = flow_counter; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + flow_act.flags = FLOW_ACT_NO_APPEND; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(flow_counter); + if (rx == ipsec->rx_esw) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.ipsec_syndrome); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.ipsec_syndrome, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_2); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.metadata_reg_c_2, + sa_entry->ipsec_obj_id | BIT(31)); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule, err=%d\n", err); + goto err_rule; + } + sa_entry->ipsec_rule.auth.rule = rule; + + flow_counter = mlx5_fc_create(mdev, true); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule counter, err=%d\n", err); + goto err_cnt_2; + } + sa_entry->ipsec_rule.trailer.fc = flow_counter; + + dest.counter_id = mlx5_fc_id(flow_counter); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.ipsec_syndrome, 2); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule, err=%d\n", err); + goto err_rule_2; + } + sa_entry->ipsec_rule.trailer.rule = rule; + + kvfree(spec); + return 0; + +err_rule_2: + mlx5_fc_destroy(mdev, sa_entry->ipsec_rule.trailer.fc); +err_cnt_2: + mlx5_del_flow_rules(sa_entry->ipsec_rule.auth.rule); +err_rule: + mlx5_fc_destroy(mdev, sa_entry->ipsec_rule.auth.fc); +err_cnt: + kvfree(spec); + return err; +} + +static int rx_add_rule_drop_replay(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5e_ipsec_rx *rx) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5_flow_table *ft = rx->ft.status; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_fc *flow_counter; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + flow_counter = mlx5_fc_create(mdev, true); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule counter, err=%d\n", err); + goto err_cnt; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + flow_act.flags = FLOW_ACT_NO_APPEND; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(flow_counter); + if (rx == ipsec->rx_esw) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_2); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_2, + sa_entry->ipsec_obj_id | BIT(31)); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule, err=%d\n", err); + goto err_rule; + } + + sa_entry->ipsec_rule.replay.rule = rule; + sa_entry->ipsec_rule.replay.fc = flow_counter; + + kvfree(spec); + return 0; + +err_rule: + mlx5_fc_destroy(mdev, flow_counter); +err_cnt: + kvfree(spec); + return err; +} + +static int ipsec_rx_status_drop_all_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_table *ft = rx->ft.status; @@ -214,9 +359,9 @@ static int ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec, goto err_rule; } - rx->status_drop.group = g; - rx->status_drop.rule = rule; - rx->status_drop_cnt = flow_counter; + rx->status_drops.drop_all_group = g; + rx->status_drops.all.rule = rule; + rx->status_drops.all.fc = flow_counter; kvfree(flow_group_in); kvfree(spec); @@ -247,8 +392,12 @@ static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.ipsec_syndrome); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_4); MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.ipsec_syndrome, 0); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.metadata_reg_c_4, 0); if (rx == ipsec->rx_esw) spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; @@ -285,7 +434,7 @@ static int mlx5_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, { int err; - err = ipsec_rx_status_drop_create(ipsec, rx); + err = ipsec_rx_status_drop_all_create(ipsec, rx); if (err) return err; @@ -529,7 +678,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, if (err) return err; - ft = ipsec_ft_create(attr.ns, attr.status_level, attr.prio, 1, 0); + ft = ipsec_ft_create(attr.ns, attr.status_level, attr.prio, 3, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_fs_ft_status; @@ -1159,29 +1308,48 @@ static int setup_modify_header(struct mlx5e_ipsec *ipsec, int type, u32 val, u8 struct mlx5_flow_act *flow_act) { enum mlx5_flow_namespace_type ns_type = ipsec_fs_get_ns(ipsec, type, dir); - u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + u8 action[3][MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; struct mlx5_core_dev *mdev = ipsec->mdev; struct mlx5_modify_hdr *modify_hdr; + u8 num_of_actions = 1; - MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action[0], action_type, MLX5_ACTION_TYPE_SET); switch (dir) { case XFRM_DEV_OFFLOAD_IN: - MLX5_SET(set_action_in, action, field, + MLX5_SET(set_action_in, action[0], field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); + + num_of_actions++; + MLX5_SET(set_action_in, action[1], action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action[1], field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_2); + MLX5_SET(set_action_in, action[1], data, val); + MLX5_SET(set_action_in, action[1], offset, 0); + MLX5_SET(set_action_in, action[1], length, 32); + + if (type == XFRM_DEV_OFFLOAD_CRYPTO) { + num_of_actions++; + MLX5_SET(set_action_in, action[2], action_type, + MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action[2], field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_4); + MLX5_SET(set_action_in, action[2], data, 0); + MLX5_SET(set_action_in, action[2], offset, 0); + MLX5_SET(set_action_in, action[2], length, 32); + } break; case XFRM_DEV_OFFLOAD_OUT: - MLX5_SET(set_action_in, action, field, + MLX5_SET(set_action_in, action[0], field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_4); break; default: return -EINVAL; } - MLX5_SET(set_action_in, action, data, val); - MLX5_SET(set_action_in, action, offset, 0); - MLX5_SET(set_action_in, action, length, 32); + MLX5_SET(set_action_in, action[0], data, val); + MLX5_SET(set_action_in, action[0], offset, 0); + MLX5_SET(set_action_in, action[0], length, 32); - modify_hdr = mlx5_modify_header_alloc(mdev, ns_type, 1, action); + modify_hdr = mlx5_modify_header_alloc(mdev, ns_type, num_of_actions, action); if (IS_ERR(modify_hdr)) { mlx5_core_err(mdev, "Failed to allocate modify_header %ld\n", PTR_ERR(modify_hdr)); @@ -1479,6 +1647,15 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) mlx5_core_err(mdev, "fail to add RX ipsec rule err=%d\n", err); goto err_add_flow; } + if (attrs->type == XFRM_DEV_OFFLOAD_PACKET) + err = rx_add_rule_drop_replay(sa_entry, rx); + if (err) + goto err_add_replay; + + err = rx_add_rule_drop_auth_trailer(sa_entry, rx); + if (err) + goto err_drop_reason; + kvfree(spec); sa_entry->ipsec_rule.rule = rule; @@ -1487,6 +1664,13 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) sa_entry->ipsec_rule.pkt_reformat = flow_act.pkt_reformat; return 0; +err_drop_reason: + if (sa_entry->ipsec_rule.replay.rule) { + mlx5_del_flow_rules(sa_entry->ipsec_rule.replay.rule); + mlx5_fc_destroy(mdev, sa_entry->ipsec_rule.replay.fc); + } +err_add_replay: + mlx5_del_flow_rules(rule); err_add_flow: mlx5_fc_destroy(mdev, counter); err_add_cnt: @@ -1994,6 +2178,17 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry) if (ipsec_rule->modify_hdr) mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr); + + mlx5_del_flow_rules(ipsec_rule->trailer.rule); + mlx5_fc_destroy(mdev, ipsec_rule->trailer.fc); + + mlx5_del_flow_rules(ipsec_rule->auth.rule); + mlx5_fc_destroy(mdev, ipsec_rule->auth.fc); + + if (ipsec_rule->replay.rule) { + mlx5_del_flow_rules(ipsec_rule->replay.rule); + mlx5_fc_destroy(mdev, ipsec_rule->replay.fc); + } mlx5_esw_ipsec_rx_id_mapping_remove(sa_entry); rx_ft_put(sa_entry->ipsec, sa_entry->attrs.family, sa_entry->attrs.type); } -- cgit From c2bf84f1d1a1595dcc45fe867f0e02b331993fee Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 12 Nov 2023 13:50:00 +0200 Subject: net/mlx5e: Tidy up IPsec NAT-T SA discovery IPsec NAT-T packets are UDP encapsulated packets over ESP normal ones. In case they arrive to RX, the SPI and ESP are located in inner header, while the check was performed on outer header instead. That wrong check caused to the situation where received rekeying request was missed and caused to rekey timeout, which "compensated" this failure by completing rekeying. Fixes: d65954934937 ("net/mlx5e: Support IPsec NAT-T functionality") Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 22 ++++++++++++++++------ include/linux/mlx5/mlx5_ifc.h | 2 +- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index aeb399d8dae5..7a789061c998 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -1212,13 +1212,22 @@ static void setup_fte_esp(struct mlx5_flow_spec *spec) MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_ESP); } -static void setup_fte_spi(struct mlx5_flow_spec *spec, u32 spi) +static void setup_fte_spi(struct mlx5_flow_spec *spec, u32 spi, bool encap) { /* SPI number */ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters.outer_esp_spi); - MLX5_SET(fte_match_param, spec->match_value, misc_parameters.outer_esp_spi, spi); + if (encap) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.inner_esp_spi); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters.inner_esp_spi, spi); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.outer_esp_spi); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters.outer_esp_spi, spi); + } } static void setup_fte_no_frags(struct mlx5_flow_spec *spec) @@ -1596,8 +1605,9 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) else setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); - setup_fte_spi(spec, attrs->spi); - setup_fte_esp(spec); + setup_fte_spi(spec, attrs->spi, attrs->encap); + if (!attrs->encap) + setup_fte_esp(spec); setup_fte_no_frags(spec); setup_fte_upper_proto_match(spec, &attrs->upspec); @@ -1719,7 +1729,7 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) switch (attrs->type) { case XFRM_DEV_OFFLOAD_CRYPTO: - setup_fte_spi(spec, attrs->spi); + setup_fte_spi(spec, attrs->spi, false); setup_fte_esp(spec); setup_fte_reg_a(spec); break; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 90ca63f4bf63..3f7b664d625b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -621,7 +621,7 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_140[0x8]; u8 bth_dst_qp[0x18]; - u8 reserved_at_160[0x20]; + u8 inner_esp_spi[0x20]; u8 outer_esp_spi[0x20]; u8 reserved_at_1a0[0x60]; }; -- cgit From baac8351f74c543896b8fd40138b7ad9365587a3 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Wed, 11 Oct 2023 03:38:29 +0000 Subject: net/mlx5e: Reduce eswitch mode_lock protection context Currently eswitch mode_lock is so heavy, for example, it's locked during the whole process of the mode change, which may need to hold other locks. As the mode_lock is also used by IPSec to block mode and encap change now, it is easy to cause lock dependency. Since some of protections are also done by devlink lock, the eswitch mode_lock is not needed at those places, and thus the possibility of lockdep issue is reduced. Fixes: c8e350e62fc5 ("net/mlx5e: Make TC and IPsec offloads mutually exclusive on a netdev") Signed-off-by: Jianbo Liu Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 9 +++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 35 ++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 ++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 38 +++++++++++++--------- 4 files changed, 52 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 7a789061c998..c1e89dc77db9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -2110,8 +2110,11 @@ static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) struct mlx5_eswitch *esw = mdev->priv.eswitch; int err = 0; - if (esw) - down_write(&esw->mode_lock); + if (esw) { + err = mlx5_esw_lock(esw); + if (err) + return err; + } if (mdev->num_block_ipsec) { err = -EBUSY; @@ -2122,7 +2125,7 @@ static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) unlock: if (esw) - up_write(&esw->mode_lock); + mlx5_esw_unlock(esw); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8d0b915a3121..3047d7015c52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1463,7 +1463,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) { int err; - lockdep_assert_held(&esw->mode_lock); + devl_assert_locked(priv_to_devlink(esw->dev)); if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { esw_warn(esw->dev, "FDB is not supported, aborting ...\n"); @@ -1531,7 +1531,6 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) if (toggle_lag) mlx5_lag_disable_change(esw->dev); - down_write(&esw->mode_lock); if (!mlx5_esw_is_fdb_created(esw)) { ret = mlx5_eswitch_enable_locked(esw, num_vfs); } else { @@ -1554,8 +1553,6 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) } } - up_write(&esw->mode_lock); - if (toggle_lag) mlx5_lag_enable_change(esw->dev); @@ -1569,12 +1566,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) return; devl_assert_locked(priv_to_devlink(esw->dev)); - down_write(&esw->mode_lock); /* If driver is unloaded, this function is called twice by remove_one() * and mlx5_unload(). Prevent the second call. */ if (!esw->esw_funcs.num_vfs && !esw->esw_funcs.num_ec_vfs && !clear_vf) - goto unlock; + return; esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", @@ -1603,9 +1599,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) esw->esw_funcs.num_vfs = 0; else esw->esw_funcs.num_ec_vfs = 0; - -unlock: - up_write(&esw->mode_lock); } /* Free resources for corresponding eswitch mode. It is called by devlink @@ -1647,10 +1640,8 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw) devl_assert_locked(priv_to_devlink(esw->dev)); mlx5_lag_disable_change(esw->dev); - down_write(&esw->mode_lock); mlx5_eswitch_disable_locked(esw); esw->mode = MLX5_ESWITCH_LEGACY; - up_write(&esw->mode_lock); mlx5_lag_enable_change(esw->dev); } @@ -2254,8 +2245,13 @@ bool mlx5_esw_hold(struct mlx5_core_dev *mdev) if (!mlx5_esw_allowed(esw)) return true; - if (down_read_trylock(&esw->mode_lock) != 0) + if (down_read_trylock(&esw->mode_lock) != 0) { + if (esw->eswitch_operation_in_progress) { + up_read(&esw->mode_lock); + return false; + } return true; + } return false; } @@ -2312,7 +2308,8 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw) if (down_write_trylock(&esw->mode_lock) == 0) return -EINVAL; - if (atomic64_read(&esw->user_count) > 0) { + if (esw->eswitch_operation_in_progress || + atomic64_read(&esw->user_count) > 0) { up_write(&esw->mode_lock); return -EBUSY; } @@ -2320,6 +2317,18 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw) return esw->mode; } +int mlx5_esw_lock(struct mlx5_eswitch *esw) +{ + down_write(&esw->mode_lock); + + if (esw->eswitch_operation_in_progress) { + up_write(&esw->mode_lock); + return -EBUSY; + } + + return 0; +} + /** * mlx5_esw_unlock() - Release write lock on esw mode lock * @esw: eswitch device. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 37ab66e7b403..b674b57d05aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -383,6 +383,7 @@ struct mlx5_eswitch { struct xarray paired; struct mlx5_devcom_comp_dev *devcom; u16 enabled_ipsec_vf_count; + bool eswitch_operation_in_progress; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -827,6 +828,7 @@ void mlx5_esw_release(struct mlx5_core_dev *dev); void mlx5_esw_get(struct mlx5_core_dev *dev); void mlx5_esw_put(struct mlx5_core_dev *dev); int mlx5_esw_try_lock(struct mlx5_eswitch *esw); +int mlx5_esw_lock(struct mlx5_eswitch *esw); void mlx5_esw_unlock(struct mlx5_eswitch *esw); void esw_vport_change_handle_locked(struct mlx5_vport *vport); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 88236e75fd90..bf78eeca401b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3733,13 +3733,16 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, goto unlock; } + esw->eswitch_operation_in_progress = true; + up_write(&esw->mode_lock); + mlx5_eswitch_disable_locked(esw); if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) { if (mlx5_devlink_trap_get_num_active(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, "Can't change mode while devlink traps are active"); err = -EOPNOTSUPP; - goto unlock; + goto skip; } err = esw_offloads_start(esw, extack); } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) { @@ -3749,6 +3752,9 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, err = -EINVAL; } +skip: + down_write(&esw->mode_lock); + esw->eswitch_operation_in_progress = false; unlock: mlx5_esw_unlock(esw); enable_lag: @@ -3759,16 +3765,12 @@ enable_lag: int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { struct mlx5_eswitch *esw; - int err; esw = mlx5_devlink_eswitch_get(devlink); if (IS_ERR(esw)) return PTR_ERR(esw); - down_read(&esw->mode_lock); - err = esw_mode_to_devlink(esw->mode, mode); - up_read(&esw->mode_lock); - return err; + return esw_mode_to_devlink(esw->mode, mode); } static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode, @@ -3862,11 +3864,15 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (err) goto out; + esw->eswitch_operation_in_progress = true; + up_write(&esw->mode_lock); + err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack); - if (err) - goto out; + if (!err) + esw->offloads.inline_mode = mlx5_mode; - esw->offloads.inline_mode = mlx5_mode; + down_write(&esw->mode_lock); + esw->eswitch_operation_in_progress = false; up_write(&esw->mode_lock); return 0; @@ -3878,16 +3884,12 @@ out: int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) { struct mlx5_eswitch *esw; - int err; esw = mlx5_devlink_eswitch_get(devlink); if (IS_ERR(esw)) return PTR_ERR(esw); - down_read(&esw->mode_lock); - err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); - up_read(&esw->mode_lock); - return err; + return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); } bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev) @@ -3969,6 +3971,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, goto unlock; } + esw->eswitch_operation_in_progress = true; + up_write(&esw->mode_lock); + esw_destroy_offloads_fdb_tables(esw); esw->offloads.encap = encap; @@ -3982,6 +3987,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, (void)esw_create_offloads_fdb_tables(esw); } + down_write(&esw->mode_lock); + esw->eswitch_operation_in_progress = false; + unlock: up_write(&esw->mode_lock); return err; @@ -3996,9 +4004,7 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, if (IS_ERR(esw)) return PTR_ERR(esw); - down_read(&esw->mode_lock); *encap = esw->offloads.encap; - up_read(&esw->mode_lock); return 0; } -- cgit From 4e25b661f484df54b6751b65f9ea2434a3b67539 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 12 Oct 2023 02:00:44 +0000 Subject: net/mlx5e: Check the number of elements before walk TC rhashtable After IPSec TX tables are destroyed, the flow rules in TC rhashtable, which have the destination to IPSec, are restored to the original one, the uplink. However, when the device is in switchdev mode and unload driver with IPSec rules configured, TC rhashtable cleanup is done before IPSec cleanup, which means tc_ht->tbl is already freed when walking TC rhashtable, in order to restore the destination. So add the checking before walking to avoid unexpected behavior. Fixes: d1569537a837 ("net/mlx5e: Modify and restore TC rules for IPSec TX rules") Signed-off-by: Jianbo Liu Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 5a0047bdcb51..190f10aba170 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -152,7 +152,7 @@ void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) xa_for_each(&esw->offloads.vport_reps, i, rep) { rpriv = rep->rep_data[REP_ETH].priv; - if (!rpriv || !rpriv->netdev) + if (!rpriv || !rpriv->netdev || !atomic_read(&rpriv->tc_ht.nelems)) continue; rhashtable_walk_enter(&rpriv->tc_ht, &iter); -- cgit From 762a55a54eec4217e4cec9265ab6e5d4c11b61bd Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 30 Oct 2023 15:44:47 +0200 Subject: net/mlx5e: Disable IPsec offload support if not FW steering IPsec FDB offload can only work with FW steering as of now, disable the cap upon non FW steering. And since the IPSec cap is dynamic now based on steering mode. Cleanup the resources if they exist instead of checking the IPsec cap again. Fixes: edd8b295f9e2 ("Merge branch 'mlx5-ipsec-packet-offload-support-in-eswitch-mode'") Signed-off-by: Chris Mi Signed-off-by: Leon Romanovsky --- .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 26 +++++++++------------- .../mellanox/mlx5/core/en_accel/ipsec_offload.c | 8 ++++++- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 914b9e6eb7db..161c5190c236 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -935,9 +935,11 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) return; mlx5e_accel_ipsec_fs_cleanup(ipsec); - if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) + if (ipsec->netevent_nb.notifier_call) { unregister_netevent_notifier(&ipsec->netevent_nb); - if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) + ipsec->netevent_nb.notifier_call = NULL; + } + if (ipsec->aso) mlx5e_ipsec_aso_cleanup(ipsec); destroy_workqueue(ipsec->wq); kfree(ipsec); @@ -1046,6 +1048,12 @@ static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, } } + if (x->xdo.type == XFRM_DEV_OFFLOAD_PACKET && + !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)) { + NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported"); + return -EINVAL; + } + return 0; } @@ -1141,14 +1149,6 @@ static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { .xdo_dev_state_free = mlx5e_xfrm_free_state, .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, -}; - -static const struct xfrmdev_ops mlx5e_ipsec_packet_xfrmdev_ops = { - .xdo_dev_state_add = mlx5e_xfrm_add_state, - .xdo_dev_state_delete = mlx5e_xfrm_del_state, - .xdo_dev_state_free = mlx5e_xfrm_free_state, - .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, - .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, .xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft, .xdo_dev_policy_add = mlx5e_xfrm_add_policy, @@ -1166,11 +1166,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n"); - if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) - netdev->xfrmdev_ops = &mlx5e_ipsec_packet_xfrmdev_ops; - else - netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; - + netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; netdev->features |= NETIF_F_HW_ESP; netdev->hw_enc_features |= NETIF_F_HW_ESP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 4e018fba2d5f..6e00afe4671b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -6,6 +6,8 @@ #include "ipsec.h" #include "lib/crypto.h" #include "lib/ipsec_fs_roce.h" +#include "fs_core.h" +#include "eswitch.h" enum { MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET, @@ -38,7 +40,10 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp)) caps |= MLX5_IPSEC_CAP_CRYPTO; - if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload)) { + if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload) && + (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS || + (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS && + is_mdev_legacy_mode(mdev)))) { if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_esp_trasport) && MLX5_CAP_FLOWTABLE_NIC_RX(mdev, @@ -559,6 +564,7 @@ void mlx5e_ipsec_aso_cleanup(struct mlx5e_ipsec *ipsec) dma_unmap_single(pdev, aso->dma_addr, sizeof(aso->ctx), DMA_BIDIRECTIONAL); kfree(aso); + ipsec->aso = NULL; } static void mlx5e_ipsec_aso_copy(struct mlx5_wqe_aso_ctrl_seg *ctrl, -- cgit From eab0da38912ebdad922ed0388209f7eb0a5163cd Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 21 Sep 2022 18:45:11 +0300 Subject: net/mlx5e: Fix possible deadlock on mlx5e_tx_timeout_work Due to the cited patch, devlink health commands take devlink lock and this may result in deadlock for mlx5e_tx_reporter as it takes local state_lock before calling devlink health report and on the other hand devlink health commands such as diagnose for same reporter take local state_lock after taking devlink lock (see kernel log below). To fix it, remove local state_lock from mlx5e_tx_timeout_work() before calling devlink_health_report() and take care to cancel the work before any call to close channels, which may free the SQs that should be handled by the work. Before cancel_work_sync(), use current_work() to check we are not calling it from within the work, as mlx5e_tx_timeout_work() itself may close the channels and reopen as part of recovery flow. While removing state_lock from mlx5e_tx_timeout_work() keep rtnl_lock to ensure no change in netdev->real_num_tx_queues, but use rtnl_trylock() and a flag to avoid deadlock by calling cancel_work_sync() before closing the channels while holding rtnl_lock too. Kernel log: ====================================================== WARNING: possible circular locking dependency detected 6.0.0-rc3_for_upstream_debug_2022_08_30_13_10 #1 Not tainted ------------------------------------------------------ kworker/u16:2/65 is trying to acquire lock: ffff888122f6c2f8 (&devlink->lock_key#2){+.+.}-{3:3}, at: devlink_health_report+0x2f1/0x7e0 but task is already holding lock: ffff888121d20be0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&priv->state_lock){+.+.}-{3:3}: __mutex_lock+0x12c/0x14b0 mlx5e_rx_reporter_diagnose+0x71/0x700 [mlx5_core] devlink_nl_cmd_health_reporter_diagnose_doit+0x212/0xa50 genl_family_rcv_msg_doit+0x1e9/0x2f0 genl_rcv_msg+0x2e9/0x530 netlink_rcv_skb+0x11d/0x340 genl_rcv+0x24/0x40 netlink_unicast+0x438/0x710 netlink_sendmsg+0x788/0xc40 sock_sendmsg+0xb0/0xe0 __sys_sendto+0x1c1/0x290 __x64_sys_sendto+0xdd/0x1b0 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 -> #0 (&devlink->lock_key#2){+.+.}-{3:3}: __lock_acquire+0x2c8a/0x6200 lock_acquire+0x1c1/0x550 __mutex_lock+0x12c/0x14b0 devlink_health_report+0x2f1/0x7e0 mlx5e_health_report+0xc9/0xd7 [mlx5_core] mlx5e_reporter_tx_timeout+0x2ab/0x3d0 [mlx5_core] mlx5e_tx_timeout_work+0x1c1/0x280 [mlx5_core] process_one_work+0x7c2/0x1340 worker_thread+0x59d/0xec0 kthread+0x28f/0x330 ret_from_fork+0x1f/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&priv->state_lock); lock(&devlink->lock_key#2); lock(&priv->state_lock); lock(&devlink->lock_key#2); *** DEADLOCK *** 4 locks held by kworker/u16:2/65: #0: ffff88811a55b138 ((wq_completion)mlx5e#2){+.+.}-{0:0}, at: process_one_work+0x6e2/0x1340 #1: ffff888101de7db8 ((work_completion)(&priv->tx_timeout_work)){+.+.}-{0:0}, at: process_one_work+0x70f/0x1340 #2: ffffffff84ce8328 (rtnl_mutex){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x53/0x280 [mlx5_core] #3: ffff888121d20be0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] stack backtrace: CPU: 1 PID: 65 Comm: kworker/u16:2 Not tainted 6.0.0-rc3_for_upstream_debug_2022_08_30_13_10 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5e mlx5e_tx_timeout_work [mlx5_core] Call Trace: dump_stack_lvl+0x57/0x7d check_noncircular+0x278/0x300 ? print_circular_bug+0x460/0x460 ? find_held_lock+0x2d/0x110 ? __stack_depot_save+0x24c/0x520 ? alloc_chain_hlocks+0x228/0x700 __lock_acquire+0x2c8a/0x6200 ? register_lock_class+0x1860/0x1860 ? kasan_save_stack+0x1e/0x40 ? kasan_set_free_info+0x20/0x30 ? ____kasan_slab_free+0x11d/0x1b0 ? kfree+0x1ba/0x520 ? devlink_health_do_dump.part.0+0x171/0x3a0 ? devlink_health_report+0x3d5/0x7e0 lock_acquire+0x1c1/0x550 ? devlink_health_report+0x2f1/0x7e0 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? find_held_lock+0x2d/0x110 __mutex_lock+0x12c/0x14b0 ? devlink_health_report+0x2f1/0x7e0 ? devlink_health_report+0x2f1/0x7e0 ? mutex_lock_io_nested+0x1320/0x1320 ? trace_hardirqs_on+0x2d/0x100 ? bit_wait_io_timeout+0x170/0x170 ? devlink_health_do_dump.part.0+0x171/0x3a0 ? kfree+0x1ba/0x520 ? devlink_health_do_dump.part.0+0x171/0x3a0 devlink_health_report+0x2f1/0x7e0 mlx5e_health_report+0xc9/0xd7 [mlx5_core] mlx5e_reporter_tx_timeout+0x2ab/0x3d0 [mlx5_core] ? lockdep_hardirqs_on_prepare+0x400/0x400 ? mlx5e_reporter_tx_err_cqe+0x1b0/0x1b0 [mlx5_core] ? mlx5e_tx_reporter_timeout_dump+0x70/0x70 [mlx5_core] ? mlx5e_tx_reporter_dump_sq+0x320/0x320 [mlx5_core] ? mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] ? mutex_lock_io_nested+0x1320/0x1320 ? process_one_work+0x70f/0x1340 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? lock_downgrade+0x6e0/0x6e0 mlx5e_tx_timeout_work+0x1c1/0x280 [mlx5_core] process_one_work+0x7c2/0x1340 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? pwq_dec_nr_in_flight+0x230/0x230 ? rwlock_bug.part.0+0x90/0x90 worker_thread+0x59d/0xec0 ? process_one_work+0x1340/0x1340 kthread+0x28f/0x330 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Fixes: c90005b5f75c ("devlink: Hold the instance lock in health callbacks") Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 27 ++++++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b2a5da9739d2..729a11b5fb25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -826,6 +826,7 @@ enum { MLX5E_STATE_DESTROYING, MLX5E_STATE_XDP_TX_ENABLED, MLX5E_STATE_XDP_ACTIVE, + MLX5E_STATE_CHANNELS_ACTIVE, }; struct mlx5e_modify_sq_param { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ea58c6917433..0c87ddb8a7a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2731,6 +2731,7 @@ void mlx5e_close_channels(struct mlx5e_channels *chs) { int i; + ASSERT_RTNL(); if (chs->ptp) { mlx5e_ptp_close(chs->ptp); chs->ptp = NULL; @@ -3012,17 +3013,29 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) if (mlx5e_is_vport_rep(priv)) mlx5e_rep_activate_channels(priv); + set_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); + mlx5e_wait_channels_min_rx_wqes(&priv->channels); if (priv->rx_res) mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels); } +static void mlx5e_cancel_tx_timeout_work(struct mlx5e_priv *priv) +{ + WARN_ON_ONCE(test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)); + if (current_work() != &priv->tx_timeout_work) + cancel_work_sync(&priv->tx_timeout_work); +} + void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { if (priv->rx_res) mlx5e_rx_res_channels_deactivate(priv->rx_res); + clear_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); + mlx5e_cancel_tx_timeout_work(priv); + if (mlx5e_is_vport_rep(priv)) mlx5e_rep_deactivate_channels(priv); @@ -4801,8 +4814,17 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) struct net_device *netdev = priv->netdev; int i; - rtnl_lock(); - mutex_lock(&priv->state_lock); + /* Take rtnl_lock to ensure no change in netdev->real_num_tx_queues + * through this flow. However, channel closing flows have to wait for + * this work to finish while holding rtnl lock too. So either get the + * lock or find that channels are being closed for other reason and + * this work is not relevant anymore. + */ + while (!rtnl_trylock()) { + if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)) + return; + msleep(20); + } if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) goto unlock; @@ -4821,7 +4843,6 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) } unlock: - mutex_unlock(&priv->state_lock); rtnl_unlock(); } -- cgit From ccbe33003b109f14c4dde2a4fca9c2a50c423601 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 11 Sep 2023 13:28:10 +0300 Subject: net/mlx5e: TC, Don't offload post action rule if not supported If post action is not supported, eg. ignore_flow_level is not supported, don't offload post action rule. Otherwise, will hit panic [1]. Fix it by checking if post action table is valid or not. [1] [445537.863880] BUG: unable to handle page fault for address: ffffffffffffffb1 [445537.864617] #PF: supervisor read access in kernel mode [445537.865244] #PF: error_code(0x0000) - not-present page [445537.865860] PGD 70683a067 P4D 70683a067 PUD 70683c067 PMD 0 [445537.866497] Oops: 0000 [#1] PREEMPT SMP NOPTI [445537.867077] CPU: 19 PID: 248742 Comm: tc Kdump: loaded Tainted: G O 6.5.0+ #1 [445537.867888] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [445537.868834] RIP: 0010:mlx5e_tc_post_act_add+0x51/0x130 [mlx5_core] [445537.869635] Code: c0 0d 00 00 e8 20 96 c6 d3 48 85 c0 0f 84 e5 00 00 00 c7 83 b0 01 00 00 00 00 00 00 49 89 c5 31 c0 31 d2 66 89 83 b4 01 00 00 <49> 8b 44 24 10 83 23 df 83 8b d8 01 00 00 04 48 89 83 c0 01 00 00 [445537.871318] RSP: 0018:ffffb98741cef428 EFLAGS: 00010246 [445537.871962] RAX: 0000000000000000 RBX: ffff8df341167000 RCX: 0000000000000001 [445537.872704] RDX: 0000000000000000 RSI: ffffffff954844e1 RDI: ffffffff9546e9cb [445537.873430] RBP: ffffb98741cef448 R08: 0000000000000020 R09: 0000000000000246 [445537.874160] R10: 0000000000000000 R11: ffffffff943f73ff R12: ffffffffffffffa1 [445537.874893] R13: ffff8df36d336c20 R14: ffffffffffffffa1 R15: ffff8df341167000 [445537.875628] FS: 00007fcd6564f800(0000) GS:ffff8dfa9ea00000(0000) knlGS:0000000000000000 [445537.876425] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [445537.877090] CR2: ffffffffffffffb1 CR3: 00000003b5884001 CR4: 0000000000770ee0 [445537.877832] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [445537.878564] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [445537.879300] PKRU: 55555554 [445537.879797] Call Trace: [445537.880263] [445537.880713] ? show_regs+0x6e/0x80 [445537.881232] ? __die+0x29/0x70 [445537.881731] ? page_fault_oops+0x85/0x160 [445537.882276] ? search_exception_tables+0x65/0x70 [445537.882852] ? kernelmode_fixup_or_oops+0xa2/0x120 [445537.883432] ? __bad_area_nosemaphore+0x18b/0x250 [445537.884019] ? bad_area_nosemaphore+0x16/0x20 [445537.884566] ? do_kern_addr_fault+0x8b/0xa0 [445537.885105] ? exc_page_fault+0xf5/0x1c0 [445537.885623] ? asm_exc_page_fault+0x2b/0x30 [445537.886149] ? __kmem_cache_alloc_node+0x1df/0x2a0 [445537.886717] ? mlx5e_tc_post_act_add+0x51/0x130 [mlx5_core] [445537.887431] ? mlx5e_tc_post_act_add+0x30/0x130 [mlx5_core] [445537.888172] alloc_flow_post_acts+0xfb/0x1c0 [mlx5_core] [445537.888849] parse_tc_actions+0x582/0x5c0 [mlx5_core] [445537.889505] parse_tc_fdb_actions+0xd7/0x1f0 [mlx5_core] [445537.890175] __mlx5e_add_fdb_flow+0x1ab/0x2b0 [mlx5_core] [445537.890843] mlx5e_add_fdb_flow+0x56/0x120 [mlx5_core] [445537.891491] ? debug_smp_processor_id+0x1b/0x30 [445537.892037] mlx5e_tc_add_flow+0x79/0x90 [mlx5_core] [445537.892676] mlx5e_configure_flower+0x305/0x450 [mlx5_core] [445537.893341] mlx5e_rep_setup_tc_cls_flower+0x3d/0x80 [mlx5_core] [445537.894037] mlx5e_rep_setup_tc_cb+0x5c/0xa0 [mlx5_core] [445537.894693] tc_setup_cb_add+0xdc/0x220 [445537.895177] fl_hw_replace_filter+0x15f/0x220 [cls_flower] [445537.895767] fl_change+0xe87/0x1190 [cls_flower] [445537.896302] tc_new_tfilter+0x484/0xa50 Fixes: f0da4daa3413 ("net/mlx5e: Refactor ct to use post action infrastructure") Signed-off-by: Chris Mi Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed Reviewed-by: Automatic Verification Reviewed-by: Maher Sanalla Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Reviewed-by: Shachar Kagan Reviewed-by: Tariq Toukan --- .../ethernet/mellanox/mlx5/core/en/tc/post_act.c | 6 ++++++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 25 ++++++++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 4e923a2874ae..86bf007fd05b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -83,6 +83,9 @@ mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act, struct mlx5_flow_spec *spec; int err; + if (IS_ERR(post_act)) + return PTR_ERR(post_act); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return -ENOMEM; @@ -111,6 +114,9 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *po struct mlx5e_post_act_handle *handle; int err; + if (IS_ERR(post_act)) + return ERR_CAST(post_act); + handle = kzalloc(sizeof(*handle), GFP_KERNEL); if (!handle) return ERR_PTR(-ENOMEM); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 7ca9e5b86778..4809a66f3491 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -444,6 +444,9 @@ mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv, struct mlx5e_flow_meter_handle *meter; enum mlx5e_post_meter_type type; + if (IS_ERR(post_act)) + return PTR_ERR(post_act); + meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params); if (IS_ERR(meter)) { mlx5_core_err(priv->mdev, "Failed to get flow meter\n"); @@ -3738,6 +3741,20 @@ out_free: return err; } +static int +set_branch_dest_ft(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) +{ + struct mlx5e_post_act *post_act = get_post_action(priv); + + if (IS_ERR(post_act)) + return PTR_ERR(post_act); + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act); + + return 0; +} + static int alloc_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5e_tc_act_branch_ctrl *cond, @@ -3761,8 +3778,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow, break; case FLOW_ACTION_ACCEPT: case FLOW_ACTION_PIPE: - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); + if (set_branch_dest_ft(flow->priv, attr)) + goto out_err; break; case FLOW_ACTION_JUMP: if (*jump_count) { @@ -3771,8 +3788,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow, goto out_err; } *jump_count = cond->extval; - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); + if (set_branch_dest_ft(flow->priv, attr)) + goto out_err; break; default: err = -EOPNOTSUPP; -- cgit From 3d7a3f2612d75de5f371a681038b089ded6667eb Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 7 Aug 2023 13:11:32 +0300 Subject: net/mlx5: Nack sync reset request when HotPlug is enabled Current sync reset flow is not supported when PCIe bridge connected directly to mlx5 device has HotPlug interrupt enabled and can be triggered on link state change event. Return nack on reset request in such case. Fixes: 92501fa6e421 ("net/mlx5: Ack on sync_reset_request only if PF can do reset_now") Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index b568988e92e3..c4e19d627da2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -325,6 +325,29 @@ static void mlx5_fw_live_patch_event(struct work_struct *work) mlx5_core_err(dev, "Failed to reload FW tracer\n"); } +#if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) +static int mlx5_check_hotplug_interrupt(struct mlx5_core_dev *dev) +{ + struct pci_dev *bridge = dev->pdev->bus->self; + u16 reg16; + int err; + + if (!bridge) + return -EOPNOTSUPP; + + err = pcie_capability_read_word(bridge, PCI_EXP_SLTCTL, ®16); + if (err) + return err; + + if ((reg16 & PCI_EXP_SLTCTL_HPIE) && (reg16 & PCI_EXP_SLTCTL_DLLSCE)) { + mlx5_core_warn(dev, "FW reset is not supported as HotPlug is enabled\n"); + return -EOPNOTSUPP; + } + + return 0; +} +#endif + static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id) { struct pci_bus *bridge_bus = dev->pdev->bus; @@ -357,6 +380,12 @@ static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev) return false; } +#if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) + err = mlx5_check_hotplug_interrupt(dev); + if (err) + return false; +#endif + err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); if (err) return false; -- cgit From 7aaf975238c47b710fcc4eca0da1e7902a53abe2 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Thu, 31 Aug 2023 05:47:09 +0300 Subject: net/mlx5e: Check netdev pointer before checking its net ns Previously, when comparing the net namespaces, the case where the netdev doesn't exist wasn't taken into account, and therefore can cause a crash. In such a case, the comparing function should return false, as there is no netdev->net to compare the devlink->net to. Furthermore, this will result in an attempt to enter switchdev mode without a netdev to fail, and which is the desired result as there is no meaning in switchdev mode without a net device. Fixes: 662404b24a4c ("net/mlx5e: Block entering switchdev mode with ns inconsistency") Signed-off-by: Gavin Li Reviewed-by: Gavi Teitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index bf78eeca401b..bb8bcb448ae9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3653,14 +3653,18 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink) { + struct mlx5_core_dev *dev = devlink_priv(devlink); struct net *devl_net, *netdev_net; - struct mlx5_eswitch *esw; - - esw = mlx5_devlink_eswitch_nocheck_get(devlink); - netdev_net = dev_net(esw->dev->mlx5e_res.uplink_netdev); - devl_net = devlink_net(devlink); + bool ret = false; - return net_eq(devl_net, netdev_net); + mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); + if (dev->mlx5e_res.uplink_netdev) { + netdev_net = dev_net(dev->mlx5e_res.uplink_netdev); + devl_net = devlink_net(devlink); + ret = net_eq(devl_net, netdev_net); + } + mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); + return ret; } int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) -- cgit From ca4ef28d0ad831d2521fa2b16952f37fd9324ca3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Nov 2023 09:36:20 +0300 Subject: net/mlx5: Fix a NULL vs IS_ERR() check The mlx5_esw_offloads_devlink_port() function returns error pointers, not NULL. Fixes: 7bef147a6ab6 ("net/mlx5: Don't skip vport check") Signed-off-by: Dan Carpenter Reviewed-by: Wojciech Drewek Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3ab682bbcf86..1bf7540a65ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1497,7 +1497,7 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); - if (dl_port) { + if (!IS_ERR(dl_port)) { SET_NETDEV_DEVLINK_PORT(netdev, dl_port); mlx5e_rep_vnic_reporter_create(priv, dl_port); } -- cgit From 26513300978f7285c3e776c144f27ef71be61f57 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Dec 2023 08:27:36 +0100 Subject: drm/bridge: tc358768: select CONFIG_VIDEOMODE_HELPERS A dependency on this feature was recently introduced: x86_64-linux-ld: vmlinux.o: in function `tc358768_bridge_pre_enable': tc358768.c:(.text+0xbe3dae): undefined reference to `drm_display_mode_to_videomode' Make sure this is always enabled. Fixes: e5fb21678136 ("drm/bridge: tc358768: Use struct videomode") Signed-off-by: Arnd Bergmann Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20231204072814.968816-1-arnd@kernel.org Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231204072814.968816-1-arnd@kernel.org --- drivers/gpu/drm/bridge/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index ba82a1142adf..3e6a4e2044c0 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -313,6 +313,7 @@ config DRM_TOSHIBA_TC358768 select REGMAP_I2C select DRM_PANEL select DRM_MIPI_DSI + select VIDEOMODE_HELPERS help Toshiba TC358768AXBG/TC358778XBG DSI bridge chip driver. -- cgit From 209043cf092d7b0d4739921b3f11d6d0b451eabf Mon Sep 17 00:00:00 2001 From: Nathan Rossi Date: Mon, 6 Nov 2023 02:14:36 +0000 Subject: arm64: dts: imx8mp: imx8mq: Add parkmode-disable-ss-quirk on DWC3 The i.MX8MP and i.MX8MQ devices both use the same DWC3 controller and are both affected by a known issue with the controller due to specific behaviour when park mode is enabled in SuperSpeed host mode operation. Under heavy USB traffic from multiple endpoints the controller will sometimes incorrectly process transactions such that some transactions are lost, or the controller may hang when processing transactions. When the controller hangs it does not recover. This issue is documented partially within the linux-imx vendor kernel which references a Synopsys STAR number 9001415732 in commits [1] and additional details in [2]. Those commits provide some additional controller internal implementation specifics around the incorrect behaviour of the SuperSpeed host controller operation when park mode is enabled. The summary of this issue is that the host controller can incorrectly enter/exit park mode such that part of the controller is in a state which behaves as if in park mode even though it is not. In this state the controller incorrectly calculates the number of TRBs available which results in incorrect access of the internal caches causing the overwrite of pending requests in the cache which should have been processed but are ignored. This can cause the controller to drop the requests or hang waiting for the pending state of the dropped requests. The workaround for this issue is to disable park mode for SuperSpeed operation of the controller through the GUCTL1[17] bit. This is already available as a quirk for the DWC3 controller and can be enabled via the 'snps,parkmode-disable-ss-quirk' device tree property. It is possible to replicate this failure on an i.MX8MP EVK with a USB Hub connecting 4 SuperSpeed USB flash drives. Performing continuous small read operations (dd if=/dev/sd... of=/dev/null bs=16) on the block devices will result in device errors initially and will eventually result in the controller hanging. [13240.896936] xhci-hcd xhci-hcd.0.auto: WARN Event TRB for slot 4 ep 2 with no TDs queued? [13240.990708] usb 2-1.3: reset SuperSpeed USB device number 5 using xhci-hcd [13241.015582] sd 2:0:0:0: [sdc] tag#0 UNKNOWN(0x2003) Result: hostbyte=0x07 driverbyte=DRIVER_OK cmd_age=0s [13241.025198] sd 2:0:0:0: [sdc] tag#0 CDB: opcode=0x28 28 00 00 00 03 e0 00 01 00 00 [13241.032949] I/O error, dev sdc, sector 992 op 0x0:(READ) flags 0x80700 phys_seg 25 prio class 2 [13272.150710] usb 2-1.2: reset SuperSpeed USB device number 4 using xhci-hcd [13272.175469] sd 1:0:0:0: [sdb] tag#0 UNKNOWN(0x2003) Result: hostbyte=0x03 driverbyte=DRIVER_OK cmd_age=31s [13272.185365] sd 1:0:0:0: [sdb] tag#0 CDB: opcode=0x28 28 00 00 00 03 e0 00 01 00 00 [13272.193385] I/O error, dev sdb, sector 992 op 0x0:(READ) flags 0x80700 phys_seg 18 prio class 2 [13434.846556] xhci-hcd xhci-hcd.0.auto: xHCI host not responding to stop endpoint command [13434.854592] xhci-hcd xhci-hcd.0.auto: xHCI host controller not responding, assume dead [13434.862553] xhci-hcd xhci-hcd.0.auto: HC died; cleaning up [1] https://github.com/nxp-imx/linux-imx/commit/97a5349d936b08cf301730b59e4e8855283f815c [2] https://github.com/nxp-imx/linux-imx/commit/b4b5cbc5a12d7c3b920d1d7cba0ada3379e4e42b Fixes: fb8587a2c165 ("arm64: dtsi: imx8mp: add usb nodes") Fixes: ad37549cb5dc ("arm64: dts: imx8mq: add USB nodes") Signed-off-by: Nathan Rossi Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 2 ++ arch/arm64/boot/dts/freescale/imx8mq.dtsi | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index c9a610ba4836..1264da6012f9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -2072,6 +2072,7 @@ phys = <&usb3_phy0>, <&usb3_phy0>; phy-names = "usb2-phy", "usb3-phy"; snps,gfladj-refclk-lpm-sel-quirk; + snps,parkmode-disable-ss-quirk; }; }; @@ -2114,6 +2115,7 @@ phys = <&usb3_phy1>, <&usb3_phy1>; phy-names = "usb2-phy", "usb3-phy"; snps,gfladj-refclk-lpm-sel-quirk; + snps,parkmode-disable-ss-quirk; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 4b1ce9fc1758..c6dc3ba0d43b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -1649,6 +1649,7 @@ phys = <&usb3_phy0>, <&usb3_phy0>; phy-names = "usb2-phy", "usb3-phy"; power-domains = <&pgc_otg1>; + snps,parkmode-disable-ss-quirk; status = "disabled"; }; @@ -1680,6 +1681,7 @@ phys = <&usb3_phy1>, <&usb3_phy1>; phy-names = "usb2-phy", "usb3-phy"; power-domains = <&pgc_otg2>; + snps,parkmode-disable-ss-quirk; status = "disabled"; }; -- cgit From 20c2dbff342aec13bf93c2f6c951da198916a455 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 27 Nov 2023 16:50:25 +0200 Subject: drm/i915: Skip some timing checks on BXT/GLK DSI transcoders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently some BXT/GLK systems have DSI panels whose timings don't agree with the normal cpu transcoder hblank>=32 limitation. This is perhaps fine as there are no specific hblank/etc. limits listed for the BXT/GLK DSI transcoders. Move those checks out from the global intel_mode_valid() into into connector specific .mode_valid() hooks, skipping BXT/GLK DSI connectors. We'll leave the basic [hv]display/[hv]total checks in intel_mode_valid() as those seem like sensible upper limits regardless of the transcoder used. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9720 Fixes: 8f4b1068e7fc ("drm/i915: Check some transcoder timing minimum limits") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit e0ef2daa8ca8ce4dbc2fd0959e383b753a87fd7d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/icl_dsi.c | 7 +++++++ drivers/gpu/drm/i915/display/intel_crt.c | 5 +++++ drivers/gpu/drm/i915/display/intel_display.c | 10 ++++++++++ drivers/gpu/drm/i915/display/intel_display.h | 3 +++ drivers/gpu/drm/i915/display/intel_dp.c | 4 ++++ drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 ++++ drivers/gpu/drm/i915/display/intel_dvo.c | 6 ++++++ drivers/gpu/drm/i915/display/intel_hdmi.c | 4 ++++ drivers/gpu/drm/i915/display/intel_lvds.c | 5 +++++ drivers/gpu/drm/i915/display/intel_sdvo.c | 8 +++++++- drivers/gpu/drm/i915/display/intel_tv.c | 8 +++++++- drivers/gpu/drm/i915/display/vlv_dsi.c | 18 +++++++++++++++++- 12 files changed, 79 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index c4585e445198..67143a0f5189 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1440,6 +1440,13 @@ static void gen11_dsi_post_disable(struct intel_atomic_state *state, static enum drm_mode_status gen11_dsi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *i915 = to_i915(connector->dev); + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + /* FIXME: DSC? */ return intel_dsi_mode_valid(connector, mode); } diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 913e5d230a4d..6f6b348b8a40 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -348,8 +348,13 @@ intel_crt_mode_valid(struct drm_connector *connector, struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = to_i915(dev); int max_dotclk = dev_priv->max_dotclk_freq; + enum drm_mode_status status; int max_clock; + status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 94330108145e..4ee7d569b379 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7869,6 +7869,16 @@ enum drm_mode_status intel_mode_valid(struct drm_device *dev, mode->vtotal > vtotal_max) return MODE_V_ILLEGAL; + return MODE_OK; +} + +enum drm_mode_status intel_cpu_transcoder_mode_valid(struct drm_i915_private *dev_priv, + const struct drm_display_mode *mode) +{ + /* + * Additional transcoder timing limits, + * excluding BXT/GLK DSI transcoders. + */ if (DISPLAY_VER(dev_priv) >= 5) { if (mode->hdisplay < 64 || mode->htotal - mode->hdisplay < 32) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 0e5dffe8f018..a05c7e2b782e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -403,6 +403,9 @@ enum drm_mode_status intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv, const struct drm_display_mode *mode, bool bigjoiner); +enum drm_mode_status +intel_cpu_transcoder_mode_valid(struct drm_i915_private *i915, + const struct drm_display_mode *mode); enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port); bool is_trans_port_sync_mode(const struct intel_crtc_state *state); bool is_trans_port_sync_master(const struct intel_crtc_state *state); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 2852958dd4e7..b21bcd40f111 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1172,6 +1172,10 @@ intel_dp_mode_valid(struct drm_connector *_connector, enum drm_mode_status status; bool dsc = false, bigjoiner = false; + status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLCLK) return MODE_H_ILLEGAL; diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 851b312bd844..4816e4e77f83 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -959,6 +959,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, return 0; } + *status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (*status != MODE_OK) + return 0; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) { *status = MODE_NO_DBLESCAN; return 0; diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index 55d6743374bd..9111e9d46486 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -217,11 +217,17 @@ intel_dvo_mode_valid(struct drm_connector *_connector, struct drm_display_mode *mode) { struct intel_connector *connector = to_intel_connector(_connector); + struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_dvo *intel_dvo = intel_attached_dvo(connector); const struct drm_display_mode *fixed_mode = intel_panel_fixed_mode(connector, mode); int max_dotclk = to_i915(connector->base.dev)->max_dotclk_freq; int target_clock = mode->clock; + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index ac315f8e7820..bfa456fa7d25 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1983,6 +1983,10 @@ intel_hdmi_mode_valid(struct drm_connector *connector, bool ycbcr_420_only; enum intel_output_format sink_format; + status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (status != MODE_OK) + return status; + if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING) clock *= 2; diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 2a4ca7e65775..bcbdd1984fd9 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -389,11 +389,16 @@ intel_lvds_mode_valid(struct drm_connector *_connector, struct drm_display_mode *mode) { struct intel_connector *connector = to_intel_connector(_connector); + struct drm_i915_private *i915 = to_i915(connector->base.dev); const struct drm_display_mode *fixed_mode = intel_panel_fixed_mode(connector, mode); int max_pixclk = to_i915(connector->base.dev)->max_dotclk_freq; enum drm_mode_status status; + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index a636f42ceae5..a9ac7d45d1f3 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1921,13 +1921,19 @@ static enum drm_mode_status intel_sdvo_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *i915 = to_i915(connector->dev); struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); - int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; bool has_hdmi_sink = intel_has_hdmi_sink(intel_sdvo_connector, connector->state); + int max_dotclk = i915->max_dotclk_freq; + enum drm_mode_status status; int clock = mode->clock; + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 31a79fdfc812..2ee4f0d95851 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -958,8 +958,14 @@ static enum drm_mode_status intel_tv_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *i915 = to_i915(connector->dev); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); - int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; + int max_dotclk = i915->max_dotclk_freq; + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 55da627a8b8d..f488394d3108 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1541,9 +1541,25 @@ static const struct drm_encoder_funcs intel_dsi_funcs = { .destroy = intel_dsi_encoder_destroy, }; +static enum drm_mode_status vlv_dsi_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + struct drm_i915_private *i915 = to_i915(connector->dev); + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + } + + return intel_dsi_mode_valid(connector, mode); +} + static const struct drm_connector_helper_funcs intel_dsi_connector_helper_funcs = { .get_modes = intel_dsi_get_modes, - .mode_valid = intel_dsi_mode_valid, + .mode_valid = vlv_dsi_mode_valid, .atomic_check = intel_digital_connector_atomic_check, }; -- cgit From 7cf82b25dd91d7f330d9df2de868caca14289ba1 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 27 Nov 2023 16:50:26 +0200 Subject: drm/i915/mst: Fix .mode_valid_ctx() return values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .mode_valid_ctx() returns an errno, not the mode status. Fix the code to do the right thing. Cc: stable@vger.kernel.org Cc: Stanislav Lisovskiy Fixes: d51f25eb479a ("drm/i915: Add DSC support to MST path") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit c1799032d2ef6616113b733428dfaa2199a5604b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 4816e4e77f83..1bf21bd9a26b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1031,11 +1031,15 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, * Big joiner configuration needs DSC for TGL which is not true for * XE_LPD where uncompressed joiner is supported. */ - if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc) - return MODE_CLOCK_HIGH; + if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc) { + *status = MODE_CLOCK_HIGH; + return 0; + } - if (mode_rate > max_rate && !dsc) - return MODE_CLOCK_HIGH; + if (mode_rate > max_rate && !dsc) { + *status = MODE_CLOCK_HIGH; + return 0; + } *status = intel_mode_valid_max_plane_size(dev_priv, mode, false); return 0; -- cgit From dd7eb65c493615fda7d459501c3d4a46e00ea5ba Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 27 Nov 2023 16:50:27 +0200 Subject: drm/i915/mst: Reject modes that require the bigjoiner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have no bigjoiner support in the MST code, so .mode_valid() pretending otherwise is just going to result black screens for users. Reject any mode that needs the joiner. Cc: stable@vger.kernel.org Cc: Stanislav Lisovskiy Fixes: d51f25eb479a ("drm/i915: Add DSC support to MST path") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-3-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 9c058492b16f90bb772cb0dad567e8acc68e155d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 1bf21bd9a26b..aa1061262613 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -997,6 +997,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, if (intel_dp_need_bigjoiner(intel_dp, mode->hdisplay, target_clock)) { bigjoiner = true; max_dotclk *= 2; + + /* TODO: add support for bigjoiner */ + *status = MODE_CLOCK_HIGH; + return 0; } if (DISPLAY_VER(dev_priv) >= 10 && -- cgit From 9f269070abe9c45dc60abc84e29326f855317eac Mon Sep 17 00:00:00 2001 From: heminhong Date: Tue, 14 Nov 2023 10:43:41 +0800 Subject: drm/i915: correct the input parameter on _intel_dsb_commit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current, the dewake_scanline variable is defined as unsigned int, an unsigned int variable that is always greater than or equal to 0. when _intel_dsb_commit function is called by intel_dsb_commit function, the dewake_scanline variable may have an int value. So the dewake_scanline variable is necessary to defined as an int. Fixes: f83b94d23770 ("drm/i915/dsb: Use DEwake to combat PkgC latency") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310052201.AnVbpgPr-lkp@intel.com/ Cc: Ville Syrjälä Cc: Uma Shankar Signed-off-by: heminhong Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231114024341.14524-1-heminhong@kylinos.cn (cherry picked from commit ef32c3cc9c62252986f09e06b4e525742cd91529) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dsb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 78b6fe24dcd8..7fd6280c54a7 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -340,7 +340,7 @@ static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state) } static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, - unsigned int dewake_scanline) + int dewake_scanline) { struct intel_crtc *crtc = dsb->crtc; struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); -- cgit From d951f8f5f23a9417b7952f22b33784c73caa1ebb Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sun, 5 Nov 2023 10:32:19 -0300 Subject: ARM: dts: imx6ul-pico: Describe the Ethernet PHY clock Since commit c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup")thet Ethernet PHY is no longer configured via code in board file. This caused Ethernet to stop working. Fix this problem by describing the clocks and clock-names to the Ethernet PHY node so that the KSZ8081 chip can be clocked correctly. Fixes: c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup") Signed-off-by: Fabio Estevam Reviewed-by: Andrew Lunn Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi index 4ffe99ed55ca..07dcecbe485d 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi @@ -121,6 +121,8 @@ max-speed = <100>; interrupt-parent = <&gpio5>; interrupts = <6 IRQ_TYPE_LEVEL_LOW>; + clocks = <&clks IMX6UL_CLK_ENET_REF>; + clock-names = "rmii-ref"; }; }; }; -- cgit From 4c6f19ab2aed2abc78d788d5418047e5f44b1921 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 6 Nov 2023 16:13:24 +0100 Subject: dt-bindings: pwm: imx-pwm: Unify #pwm-cells for all compatibles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use #pwm-cells for all i.MX variants. Only fsl,imx1-pwm does not support inverted PWM output. Keep it the same for consistency. Signed-off-by: Alexander Stein Reviewed-by: Uwe Kleine-König Acked-by: Conor Dooley Signed-off-by: Shawn Guo --- Documentation/devicetree/bindings/pwm/imx-pwm.yaml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/pwm/imx-pwm.yaml b/Documentation/devicetree/bindings/pwm/imx-pwm.yaml index c01dff3b7f84..a84a240a61dc 100644 --- a/Documentation/devicetree/bindings/pwm/imx-pwm.yaml +++ b/Documentation/devicetree/bindings/pwm/imx-pwm.yaml @@ -14,12 +14,10 @@ allOf: properties: "#pwm-cells": - description: | - Should be 2 for i.MX1 and 3 for i.MX27 and newer SoCs. See pwm.yaml - in this directory for a description of the cells format. - enum: - - 2 - - 3 + description: + The only third cell flag supported by this binding is + PWM_POLARITY_INVERTED. fsl,imx1-pwm does not support this flags. + const: 3 compatible: oneOf: -- cgit From e4d008d49a7135214e0ee70537405b6a069e3a3f Mon Sep 17 00:00:00 2001 From: Yewon Choi Date: Fri, 1 Dec 2023 15:10:52 +0900 Subject: xsk: Skip polling event check for unbound socket In xsk_poll(), checking available events and setting mask bits should be executed only when a socket has been bound. Setting mask bits for unbound socket is meaningless. Currently, it checks events even when xsk_check_common() failed. To prevent this, we move goto location (skip_tx) after that checking. Fixes: 1596dae2f17e ("xsk: check IFF_UP earlier in Tx path") Signed-off-by: Yewon Choi Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20231201061048.GA1510@libra05 --- net/xdp/xsk.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ae9f8cb611f6..3da0b52f308d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -947,7 +947,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, rcu_read_lock(); if (xsk_check_common(xs)) - goto skip_tx; + goto out; pool = xs->pool; @@ -959,12 +959,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, xsk_generic_xmit(sk); } -skip_tx: if (xs->rx && !xskq_prod_is_empty(xs->rx)) mask |= EPOLLIN | EPOLLRDNORM; if (xs->tx && xsk_tx_writeable(xs)) mask |= EPOLLOUT | EPOLLWRNORM; - +out: rcu_read_unlock(); return mask; } -- cgit From e53f7b54b1fdecae897f25002ff0cff04faab228 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 5 Dec 2023 15:37:17 +0300 Subject: io_uring/kbuf: Fix an NULL vs IS_ERR() bug in io_alloc_pbuf_ring() The io_mem_alloc() function returns error pointers, not NULL. Update the check accordingly. Fixes: b10b73c102a2 ("io_uring/kbuf: recycle freed mapped buffer ring entries") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/5ed268d3-a997-4f64-bd71-47faa92101ab@moroto.mountain Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 268788305b61..bd25bc2deeaf 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -636,8 +636,8 @@ static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx, ibf = io_lookup_buf_free_entry(ctx, ring_size); if (!ibf) { ptr = io_mem_alloc(ring_size); - if (!ptr) - return -ENOMEM; + if (IS_ERR(ptr)) + return PTR_ERR(ptr); /* Allocate and store deferred free entry */ ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT); -- cgit From 9865346b7e8374b57f1c3ccacdc77846c6352ff4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 5 Dec 2023 07:02:13 -0700 Subject: io_uring/kbuf: check for buffer list readiness after NULL check Move the buffer list 'is_ready' check below the validity check for the buffer list for a given group. Fixes: 5cf4f52e6d8a ("io_uring: free io_buffer_list entries via RCU") Reported-by: Dan Carpenter Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index bd25bc2deeaf..72b6af1d2ed3 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -756,6 +756,8 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid); + if (!bl || !bl->is_mmap) + return NULL; /* * Ensure the list is fully setup. Only strictly needed for RCU lookup * via mmap, and in that case only for the array indexed groups. For @@ -763,8 +765,6 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) */ if (!smp_load_acquire(&bl->is_ready)) return NULL; - if (!bl || !bl->is_mmap) - return NULL; return bl->buf_ring; } -- cgit From 3c91c909f13f0c32b0d54d75c3f798479b1a84f5 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Sat, 2 Dec 2023 17:59:02 +0800 Subject: octeontx2-af: fix a use-after-free in rvu_npa_register_reporters The rvu_dl will be freed in rvu_npa_health_reporters_destroy(rvu_dl) after the create_workqueue fails, and after that free, the rvu_dl will be translate back through rvu_npa_health_reporters_create, rvu_health_reporters_create, and rvu_register_dl. Finally it goes to the err_dl_health label, being freed again in rvu_health_reporters_destroy(rvu) by rvu_npa_health_reporters_destroy. In the second calls of rvu_npa_health_reporters_destroy, however, it uses rvu_dl->rvu_npa_health_reporter, which is already freed at the end of rvu_npa_health_reporters_destroy in the first call. So this patch prevents the first destroy by instantly returning -ENONMEN when create_workqueue fails. In addition, since the failure of create_workqueue is the only entrence of label err, it has been integrated into the error-handling path of create_workqueue. Fixes: f1168d1e207c ("octeontx2-af: Add devlink health reporters for NPA") Signed-off-by: Zhipeng Lu Acked-by: Paolo Abeni Acked-by: Geethasowjanya Akula Link: https://lore.kernel.org/r/20231202095902.3264863-1-alexious@zju.edu.cn Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index c70932625d0d..62780d8b4f9a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1087,7 +1087,7 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl) rvu_dl->devlink_wq = create_workqueue("rvu_devlink_wq"); if (!rvu_dl->devlink_wq) - goto err; + return -ENOMEM; INIT_WORK(&rvu_reporters->intr_work, rvu_npa_intr_work); INIT_WORK(&rvu_reporters->err_work, rvu_npa_err_work); @@ -1095,9 +1095,6 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl) INIT_WORK(&rvu_reporters->ras_work, rvu_npa_ras_work); return 0; -err: - rvu_npa_health_reporters_destroy(rvu_dl); - return -ENOMEM; } static int rvu_npa_health_reporters_create(struct rvu_devlink *rvu_dl) -- cgit From 55702ec9603ebeffb15e6f7b113623fe1d8872f4 Mon Sep 17 00:00:00 2001 From: Stefan Wiehler Date: Mon, 6 Nov 2023 13:12:07 +0100 Subject: mips/smp: Call rcutree_report_cpu_starting() earlier rcutree_report_cpu_starting() must be called before clockevents_register_device() to avoid the following lockdep splat triggered by calling list_add() when CONFIG_PROVE_RCU_LIST=y: WARNING: suspicious RCU usage ... ----------------------------- kernel/locking/lockdep.c:3680 RCU-list traversed in non-reader section!! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 no locks held by swapper/1/0. ... Call Trace: [] show_stack+0x64/0x158 [] dump_stack_lvl+0x90/0xc4 [] __lock_acquire+0x1404/0x2940 [] lock_acquire+0x14c/0x448 [] _raw_spin_lock_irqsave+0x50/0x88 [] clockevents_register_device+0x60/0x1e8 [] r4k_clockevent_init+0x220/0x3a0 [] start_secondary+0x50/0x3b8 raw_smp_processor_id() is required in order to avoid calling into lockdep before RCU has declared the CPU to be watched for readers. See also commit 29368e093921 ("x86/smpboot: Move rcu_cpu_starting() earlier"), commit de5d9dae150c ("s390/smp: move rcu_cpu_starting() earlier") and commit 99f070b62322 ("powerpc/smp: Call rcu_cpu_starting() earlier"). Signed-off-by: Stefan Wiehler Reviewed-by: Huacai Chen Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 8fbef537fb88..82e2e051b416 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -351,10 +351,11 @@ early_initcall(mips_smp_ipi_init); */ asmlinkage void start_secondary(void) { - unsigned int cpu; + unsigned int cpu = raw_smp_processor_id(); cpu_probe(); per_cpu_trap_init(false); + rcutree_report_cpu_starting(cpu); mips_clockevent_init(); mp_ops->init_secondary(); cpu_report(); @@ -366,7 +367,6 @@ asmlinkage void start_secondary(void) */ calibrate_delay(); - cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; set_cpu_sibling_map(cpu); -- cgit From 8f7aa77a463f47c9e00592d02747a9fcf2271543 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 7 Nov 2023 11:15:18 +0000 Subject: MIPS: Loongson64: Reserve vgabios memory on boot vgabios is passed from firmware to kernel on Loongson64 systems. Sane firmware will keep this pointer in reserved memory space passed from the firmware but insane firmware keeps it in low memory before kernel entry that is not reserved. Previously kernel won't try to allocate memory from low memory before kernel entry on boot, but after converting to memblock it will do that. Fix by resversing those memory on early boot. Cc: stable@vger.kernel.org Fixes: a94e4f24ec83 ("MIPS: init: Drop boot_mem_map") Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/loongson64/init.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index ee8de1735b7c..d62262f93069 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -88,6 +88,11 @@ void __init szmem(unsigned int node) break; } } + + /* Reserve vgabios if it comes from firmware */ + if (loongson_sysconf.vgabios_addr) + memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr), + SZ_256K); } #ifndef CONFIG_NUMA -- cgit From edc0378eee00200a5bedf1bb9f00ad390e0d1bd4 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 7 Nov 2023 11:15:19 +0000 Subject: MIPS: Loongson64: Enable DMA noncoherent support There are some Loongson64 systems come with broken coherent DMA support, firmware will set a bit in boot_param and pass nocoherentio in cmdline. However nonconherent support was missed out when spin off Loongson-2EF form Loongson64, and that boot_param change never made itself into upstream. Support DMA noncoherent properly to get those systems working. Cc: stable@vger.kernel.org Fixes: 71e2f4dd5a65 ("MIPS: Fork loongson2ef from loongson64") Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/Kconfig | 2 ++ arch/mips/include/asm/mach-loongson64/boot_param.h | 3 ++- arch/mips/loongson64/env.c | 10 +++++++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 76db82542519..797ae590ebdb 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -460,6 +460,7 @@ config MACH_LOONGSON2EF config MACH_LOONGSON64 bool "Loongson 64-bit family of machines" + select ARCH_DMA_DEFAULT_COHERENT select ARCH_SPARSEMEM_ENABLE select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO @@ -1251,6 +1252,7 @@ config CPU_LOONGSON64 select CPU_SUPPORTS_MSA select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT select CPU_MIPSR2_IRQ_VI + select DMA_NONCOHERENT select WEAK_ORDERING select WEAK_REORDERING_BEYOND_LLSC select MIPS_ASID_BITS_VARIABLE diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h index 035b1a69e2d0..c454ef734c45 100644 --- a/arch/mips/include/asm/mach-loongson64/boot_param.h +++ b/arch/mips/include/asm/mach-loongson64/boot_param.h @@ -117,7 +117,8 @@ struct irq_source_routing_table { u64 pci_io_start_addr; u64 pci_io_end_addr; u64 pci_config_addr; - u32 dma_mask_bits; + u16 dma_mask_bits; + u16 dma_noncoherent; } __packed; struct interface_info { diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c index c961e2999f15..ef3750a6ffac 100644 --- a/arch/mips/loongson64/env.c +++ b/arch/mips/loongson64/env.c @@ -13,6 +13,8 @@ * Copyright (C) 2009 Lemote Inc. * Author: Wu Zhangjin, wuzhangjin@gmail.com */ + +#include #include #include #include @@ -147,8 +149,14 @@ void __init prom_lefi_init_env(void) loongson_sysconf.dma_mask_bits = eirq_source->dma_mask_bits; if (loongson_sysconf.dma_mask_bits < 32 || - loongson_sysconf.dma_mask_bits > 64) + loongson_sysconf.dma_mask_bits > 64) { loongson_sysconf.dma_mask_bits = 32; + dma_default_coherent = true; + } else { + dma_default_coherent = !eirq_source->dma_noncoherent; + } + + pr_info("Firmware: Coherent DMA: %s\n", dma_default_coherent ? "on" : "off"); loongson_sysconf.restart_addr = boot_p->reset_system.ResetWarm; loongson_sysconf.poweroff_addr = boot_p->reset_system.Shutdown; -- cgit From c7206e7bd214ebb3ca6fa474a4423662327d9beb Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 7 Nov 2023 11:15:20 +0000 Subject: MIPS: Loongson64: Handle more memory types passed from firmware There are many types of revsered memory passed from firmware that should be reserved in memblock, and UMA memory passed from firmware that should be added to system memory for system to use. Also for memblock there is no need to align those space into page, which actually cause problems. Handle them properly to prevent memory corruption on some systems. Cc: stable@vger.kernel.org Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/mach-loongson64/boot_param.h | 6 +++- arch/mips/loongson64/init.c | 42 +++++++++++++--------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h index c454ef734c45..e007edd6b60a 100644 --- a/arch/mips/include/asm/mach-loongson64/boot_param.h +++ b/arch/mips/include/asm/mach-loongson64/boot_param.h @@ -14,7 +14,11 @@ #define ADAPTER_ROM 8 #define ACPI_TABLE 9 #define SMBIOS_TABLE 10 -#define MAX_MEMORY_TYPE 11 +#define UMA_VIDEO_RAM 11 +#define VUMA_VIDEO_RAM 12 +#define MAX_MEMORY_TYPE 13 + +#define MEM_SIZE_IS_IN_BYTES (1 << 31) #define LOONGSON3_BOOT_MEM_MAP_MAX 128 struct efi_memory_map_loongson { diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index d62262f93069..f25caa6aa9d3 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -49,8 +49,7 @@ void virtual_early_config(void) void __init szmem(unsigned int node) { u32 i, mem_type; - static unsigned long num_physpages; - u64 node_id, node_psize, start_pfn, end_pfn, mem_start, mem_size; + phys_addr_t node_id, mem_start, mem_size; /* Otherwise come from DTB */ if (loongson_sysconf.fw_interface != LOONGSON_LEFI) @@ -64,27 +63,38 @@ void __init szmem(unsigned int node) mem_type = loongson_memmap->map[i].mem_type; mem_size = loongson_memmap->map[i].mem_size; - mem_start = loongson_memmap->map[i].mem_start; + + /* Memory size comes in MB if MEM_SIZE_IS_IN_BYTES not set */ + if (mem_size & MEM_SIZE_IS_IN_BYTES) + mem_size &= ~MEM_SIZE_IS_IN_BYTES; + else + mem_size = mem_size << 20; + + mem_start = (node_id << 44) | loongson_memmap->map[i].mem_start; switch (mem_type) { case SYSTEM_RAM_LOW: case SYSTEM_RAM_HIGH: - start_pfn = ((node_id << 44) + mem_start) >> PAGE_SHIFT; - node_psize = (mem_size << 20) >> PAGE_SHIFT; - end_pfn = start_pfn + node_psize; - num_physpages += node_psize; - pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", - (u32)node_id, mem_type, mem_start, mem_size); - pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", - start_pfn, end_pfn, num_physpages); - memblock_add_node(PFN_PHYS(start_pfn), - PFN_PHYS(node_psize), node, + case UMA_VIDEO_RAM: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes usable\n", + (u32)node_id, mem_type, &mem_start, &mem_size); + memblock_add_node(mem_start, mem_size, node, MEMBLOCK_NONE); break; case SYSTEM_RAM_RESERVED: - pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", - (u32)node_id, mem_type, mem_start, mem_size); - memblock_reserve(((node_id << 44) + mem_start), mem_size << 20); + case VIDEO_ROM: + case ADAPTER_ROM: + case ACPI_TABLE: + case SMBIOS_TABLE: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes reserved\n", + (u32)node_id, mem_type, &mem_start, &mem_size); + memblock_reserve(mem_start, mem_size); + break; + /* We should not reserve VUMA_VIDEO_RAM as it overlaps with MMIO */ + case VUMA_VIDEO_RAM: + default: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes unhandled\n", + (u32)node_id, mem_type, &mem_start, &mem_size); break; } } -- cgit From a58a173444a68412bb08849bd81c679395f20ca0 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 30 Nov 2023 17:36:01 +0100 Subject: MIPS: kernel: Clear FPU states when setting up kernel threads io_uring sets up the io worker kernel thread via a syscall out of an user space prrocess. This process might have used FPU and since copy_thread() didn't clear FPU states for kernel threads a BUG() is triggered for using FPU inside kernel. Move code around to always clear FPU state for user and kernel threads. Cc: stable@vger.kernel.org Reported-by: Aurelien Jarno Closes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1055021 Suggested-by: Jiaxun Yang Reviewed-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/process.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 5387ed0a5186..b630604c577f 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -121,6 +121,19 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Put the stack after the struct pt_regs. */ childksp = (unsigned long) childregs; p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK; + + /* + * New tasks lose permission to use the fpu. This accelerates context + * switching for most programs since they don't use the fpu. + */ + clear_tsk_thread_flag(p, TIF_USEDFPU); + clear_tsk_thread_flag(p, TIF_USEDMSA); + clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE); + +#ifdef CONFIG_MIPS_MT_FPAFF + clear_tsk_thread_flag(p, TIF_FPUBOUND); +#endif /* CONFIG_MIPS_MT_FPAFF */ + if (unlikely(args->fn)) { /* kernel thread */ unsigned long status = p->thread.cp0_status; @@ -149,20 +162,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.reg29 = (unsigned long) childregs; p->thread.reg31 = (unsigned long) ret_from_fork; - /* - * New tasks lose permission to use the fpu. This accelerates context - * switching for most programs since they don't use the fpu. - */ childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); - clear_tsk_thread_flag(p, TIF_USEDFPU); - clear_tsk_thread_flag(p, TIF_USEDMSA); - clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE); - -#ifdef CONFIG_MIPS_MT_FPAFF - clear_tsk_thread_flag(p, TIF_FPUBOUND); -#endif /* CONFIG_MIPS_MT_FPAFF */ - #ifdef CONFIG_MIPS_FP_SUPPORT atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE); #endif -- cgit From e2b005d6ec0e738df584190e21d2c7ada37266a0 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 4 Dec 2023 10:23:29 -0800 Subject: perf metrics: Avoid segv if default metricgroup isn't set A metric is default by having "Default" within its groups. The default metricgroup name needn't be set and this can result in segv in default_metricgroup_cmp and perf_stat__print_shadow_stats_metricgroup that assume it has a value when there is a Default metric group. To avoid the segv initialize the value to "". Fixes: 1c0e47956a8e ("perf metrics: Sort the Default metricgroup") Signed-off-by: Ian Rogers Reviewed-and-tested-by: Ilkka Koskinen Cc: James Clark Cc: Will Deacon Cc: Leo Yan Cc: Mike Leach Cc: Kajol Jain Cc: Kan Liang Cc: John Garry Cc: stable@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231204182330.654255-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/metricgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 0484736d9fe4..ca3e0404f187 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -225,7 +225,7 @@ static struct metric *metric__new(const struct pmu_metric *pm, m->pmu = pm->pmu ?: "cpu"; m->metric_name = pm->metric_name; - m->default_metricgroup_name = pm->default_metricgroup_name; + m->default_metricgroup_name = pm->default_metricgroup_name ?: ""; m->modifier = NULL; if (modifier) { m->modifier = strdup(modifier); -- cgit From 90fe70d4e23cb57253d2668a171d5695c332deb7 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Thu, 30 Nov 2023 18:15:48 -0800 Subject: perf vendor events arm64: AmpereOne: Add missing DefaultMetricgroupName fields AmpereOne metrics were missing DefaultMetricgroupName from metrics with "Default" in group name resulting perf to segfault. Add the missing field to address the issue. Fixes: 59faeaf80d02 ("perf vendor events arm64: Fix for AmpereOne metrics") Signed-off-by: Ilkka Koskinen Reviewed-by: Ian Rogers Cc: James Clark Cc: Will Deacon Cc: Leo Yan Cc: Mike Leach Cc: John Garry Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231201021550.1109196-2-ilkka@os.amperecomputing.com Signed-off-by: Namhyung Kim --- tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json index e2848a9d4848..afcdad58ef89 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json @@ -231,6 +231,7 @@ "MetricName": "slots_lost_misspeculation_fraction", "MetricExpr": "100 * ((OP_SPEC - OP_RETIRED) / (CPU_CYCLES * #slots))", "BriefDescription": "Fraction of slots lost due to misspeculation", + "DefaultMetricgroupName": "TopdownL1", "MetricGroup": "Default;TopdownL1", "ScaleUnit": "1percent of slots" }, @@ -238,6 +239,7 @@ "MetricName": "retired_fraction", "MetricExpr": "100 * (OP_RETIRED / (CPU_CYCLES * #slots))", "BriefDescription": "Fraction of slots retiring, useful work", + "DefaultMetricgroupName": "TopdownL1", "MetricGroup": "Default;TopdownL1", "ScaleUnit": "1percent of slots" }, -- cgit From f8e9889f54da6e3146c2cb3f5c206cf1a704f9d3 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 24 Oct 2023 16:20:10 +0200 Subject: ice: change vfs.num_msix_per to vf->num_msix vfs::num_msix_per should be only used as default value for vf->num_msix. For other use cases vf->num_msix should be used, as VF can have different MSI-X amount values. Fix incorrect register index calculation. vfs::num_msix_per and pf->sriov_base_vector shouldn't be used after implementation of changing MSI-X amount on VFs. Instead vf->first_vector_idx should be used, as it is storing value for first irq index. Fixes: fe1c5ca2fe76 ("ice: implement num_msix field per VF") Reviewed-by: Jacob Keller Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 7 +------ drivers/net/ethernet/intel/ice/ice_virtchnl.c | 5 ++--- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 2a5e6616cc0a..e1494f24f661 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -374,16 +374,11 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) */ int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) { - struct ice_pf *pf; - if (!vf || !q_vector) return -EINVAL; - pf = vf->pf; - /* always add one to account for the OICR being the first MSIX */ - return pf->sriov_base_vector + pf->vfs.num_msix_per * vf->vf_id + - q_vector->v_idx + 1; + return vf->first_vector_idx + q_vector->v_idx + 1; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index de11b3186bd7..1c7b4ded948b 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -1523,7 +1523,6 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) u16 num_q_vectors_mapped, vsi_id, vector_id; struct virtchnl_irq_map_info *irqmap_info; struct virtchnl_vector_map *map; - struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; int i; @@ -1535,7 +1534,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) * there is actually at least a single VF queue vector mapped */ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || - pf->vfs.num_msix_per < num_q_vectors_mapped || + vf->num_msix < num_q_vectors_mapped || !num_q_vectors_mapped) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -1557,7 +1556,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) /* vector_id is always 0-based for each VF, and can never be * larger than or equal to the max allowed interrupts per VF */ - if (!(vector_id < pf->vfs.num_msix_per) || + if (!(vector_id < vf->num_msix) || !ice_vc_isvalid_vsi_id(vf, vsi_id) || (!vector_id && (map->rxq_map || map->txq_map))) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; -- cgit From 4e7f0087b058cc3cab8f3c32141b51aa5457d298 Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Tue, 7 Nov 2023 14:51:38 +0100 Subject: ice: Restore fix disabling RX VLAN filtering Fix setting dis_rx_filtering depending on whether port vlan is being turned on or off. This was originally fixed in commit c793f8ea15e3 ("ice: Fix disabling Rx VLAN filtering with port VLAN enabled"), but while refactoring ice_vf_vsi_init_vlan_ops(), the fix has been lost. Restore the fix along with the original comment from that change. Also delete duplicate lines in ice_port_vlan_on(). Fixes: 2946204b3fa8 ("ice: implement bridge port vlan") Reviewed-by: Wojciech Drewek Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c index d7b10dc67f03..80dc4bcdd3a4 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c @@ -32,7 +32,6 @@ static void ice_port_vlan_on(struct ice_vsi *vsi) /* setup outer VLAN ops */ vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan; vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan; - vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan; /* setup inner VLAN ops */ vlan_ops = &vsi->inner_vlan_ops; @@ -47,8 +46,13 @@ static void ice_port_vlan_on(struct ice_vsi *vsi) vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan; vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan; - vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan; } + + /* all Rx traffic should be in the domain of the assigned port VLAN, + * so prevent disabling Rx VLAN filtering + */ + vlan_ops->dis_rx_filtering = noop_vlan; + vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering; } @@ -77,6 +81,8 @@ static void ice_port_vlan_off(struct ice_vsi *vsi) vlan_ops->del_vlan = ice_vsi_del_vlan; } + vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering; + if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags)) vlan_ops->ena_rx_filtering = noop_vlan; else @@ -141,7 +147,6 @@ void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi) &vsi->outer_vlan_ops : &vsi->inner_vlan_ops; vlan_ops->add_vlan = ice_vsi_add_vlan; - vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering; vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering; vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering; } -- cgit From 7d9f22b3d3ef379ed05bd3f3e2de83dfa8da8258 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 10 Nov 2023 09:12:09 +0100 Subject: i40e: Fix unexpected MFS warning message Commit 3a2c6ced90e1 ("i40e: Add a check to see if MFS is set") added a warning message that reports unexpected size of port's MFS (max frame size) value. This message use for the port number local variable 'i' that is wrong. In i40e_probe() this 'i' variable is used only to iterate VSIs to find FDIR VSI: ... /* if FDIR VSI was set up, start it now */ for (i = 0; i < pf->num_alloc_vsi; i++) { if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) { i40e_vsi_open(pf->vsi[i]); break; } } ... So the warning message use for the port number index of FDIR VSI if this exists or pf->num_alloc_vsi if not. Fix the message by using 'pf->hw.port' for the port number. Fixes: 3a2c6ced90e1 ("i40e: Add a check to see if MFS is set") Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f7a332e51524..1ab8dbe2d880 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16224,7 +16224,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT; if (val < MAX_FRAME_SIZE_DEFAULT) dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n", - i, val); + pf->hw.port, val); /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out -- cgit From a206d9959f5ccd0fb2d54a997c993947ae0e881c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 27 Nov 2023 15:33:50 -0800 Subject: iavf: validate tx_coalesce_usecs even if rx_coalesce_usecs is zero In __iavf_set_coalesce, the driver checks both ec->rx_coalesce_usecs and ec->tx_coalesce_usecs for validity. It does this via a chain if if/else-if blocks. If every single branch of the series of if statements exited, this would be fine. However, the rx_coalesce_usecs is checked against zero to print an informative message if use_adaptive_rx_coalesce is enabled. If this check is true, it short circuits the entire chain of statements, preventing validation of the tx_coalesce_usecs field. Indeed, since commit e792779e6b63 ("iavf: Prevent changing static ITR values if adaptive moderation is on") the iavf driver actually rejects any change to the tx_coalesce_usecs or rx_coalesce_usecs when use_adaptive_tx_coalesce or use_adaptive_rx_coalesce is enabled, making this checking a bit redundant. Fix this error by removing the unnecessary and redundant checks for use_adaptive_rx_coalesce and use_adaptive_tx_coalesce. Since zero is a valid value, and since the tx_coalesce_usecs and rx_coalesce_usecs fields are already unsigned, remove the minimum value check. This allows assigning an ITR value ranging from 0-8160 as described by the printed message. Fixes: 65e87c0398f5 ("i40evf: support queue-specific settings for interrupt moderation") Signed-off-by: Jacob Keller Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 12 ++---------- drivers/net/ethernet/intel/iavf/iavf_txrx.h | 1 - 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 6f236d1a6444..19cbfe554689 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -827,18 +827,10 @@ static int __iavf_set_coalesce(struct net_device *netdev, struct iavf_adapter *adapter = netdev_priv(netdev); int i; - if (ec->rx_coalesce_usecs == 0) { - if (ec->use_adaptive_rx_coalesce) - netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); - } else if ((ec->rx_coalesce_usecs < IAVF_MIN_ITR) || - (ec->rx_coalesce_usecs > IAVF_MAX_ITR)) { + if (ec->rx_coalesce_usecs > IAVF_MAX_ITR) { netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); return -EINVAL; - } else if (ec->tx_coalesce_usecs == 0) { - if (ec->use_adaptive_tx_coalesce) - netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); - } else if ((ec->tx_coalesce_usecs < IAVF_MIN_ITR) || - (ec->tx_coalesce_usecs > IAVF_MAX_ITR)) { + } else if (ec->tx_coalesce_usecs > IAVF_MAX_ITR) { netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h index 7e6ee32d19b6..10ba36602c0c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -15,7 +15,6 @@ */ #define IAVF_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ #define IAVF_ITR_MASK 0x1FFE /* mask for ITR register value */ -#define IAVF_MIN_ITR 2 /* reg uses 2 usec resolution */ #define IAVF_ITR_100K 10 /* all values below must be even */ #define IAVF_ITR_50K 20 #define IAVF_ITR_20K 50 -- cgit From b1693747487442984050eb0f462b83a3a8307525 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 29 Nov 2023 13:34:26 -0800 Subject: perf list: Fix JSON segfault by setting the used skip_duplicate_pmus callback Json output didn't set the skip_duplicate_pmus callback yielding a segfault. Fixes: cd4e1efbbc40 ("perf pmus: Skip duplicate PMUs and don't print list suffix by default") Signed-off-by: Ian Rogers Cc: James Clark Cc: Kan Liang Cc: Athira Rajeev Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231129213428.2227448-2-irogers@google.com [namhyung: updated subject line according to Arnaldo] Signed-off-by: Namhyung Kim --- tools/perf/builtin-list.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index a343823c8ddf..61c2c96cc070 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -434,6 +434,11 @@ static void json_print_metric(void *ps __maybe_unused, const char *group, strbuf_release(&buf); } +static bool json_skip_duplicate_pmus(void *ps __maybe_unused) +{ + return false; +} + static bool default_skip_duplicate_pmus(void *ps) { struct print_state *print_state = ps; @@ -503,6 +508,7 @@ int cmd_list(int argc, const char **argv) .print_end = json_print_end, .print_event = json_print_event, .print_metric = json_print_metric, + .skip_duplicate_pmus = json_skip_duplicate_pmus, }; ps = &json_ps; } else { -- cgit From 24be0b3c40594a14b65141ced486ae327398faf8 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 5 Dec 2023 11:05:48 +0200 Subject: Revert "xhci: Loosen RPM as default policy to cover for AMD xHC 1.1" This reverts commit 4baf1218150985ee3ab0a27220456a1f027ea0ac. Enabling runtime pm as default for all AMD xHC 1.1 controllers caused regression. An initial attempt to fix those was done in commit a5d6264b638e ("xhci: Enable RPM on controllers that support low-power states") but new issues are still seen. Revert this to get those AMD xHC 1.1 systems working This patch went to stable an needs to be reverted from there as well. Fixes: 4baf12181509 ("xhci: Loosen RPM as default policy to cover for AMD xHC 1.1") Link: https://lore.kernel.org/linux-usb/55c50bf5-bffb-454e-906e-4408c591cb63@molgen.mpg.de Cc: Mario Limonciello Cc: Basavaraj Natikar Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20231205090548.1377667-1-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 95ed9404f6f8..d6fc08e5db8f 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -535,8 +535,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) /* xHC spec requires PCI devices to support D3hot and D3cold */ if (xhci->hci_version >= 0x120) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; - else if (pdev->vendor == PCI_VENDOR_ID_AMD && xhci->hci_version >= 0x110) - xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (xhci->quirks & XHCI_RESET_ON_RESUME) xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, -- cgit From bdefd9913bdd453991ef756b6f7176e8ad80d786 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Fri, 1 Dec 2023 12:32:05 +0000 Subject: powercap: DTPM: Fix missing cpufreq_cpu_put() calls The policy returned by cpufreq_cpu_get() has to be released with the help of cpufreq_cpu_put() to balance its kobject reference counter properly. Add the missing calls to cpufreq_cpu_put() in the code. Fixes: 0aea2e4ec2a2 ("powercap/dtpm_cpu: Reset per_cpu variable in the release function") Fixes: 0e8f68d7f048 ("powercap/drivers/dtpm: Add CPU energy model based support") Cc: v5.16+ # v5.16+ Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- drivers/powercap/dtpm_cpu.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 8a2f18fa3faf..9193c3b8edeb 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -140,6 +140,8 @@ static void pd_release(struct dtpm *dtpm) if (policy) { for_each_cpu(dtpm_cpu->cpu, policy->related_cpus) per_cpu(dtpm_per_cpu, dtpm_cpu->cpu) = NULL; + + cpufreq_cpu_put(policy); } kfree(dtpm_cpu); @@ -191,12 +193,16 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent) return 0; pd = em_cpu_get(cpu); - if (!pd || em_is_artificial(pd)) - return -EINVAL; + if (!pd || em_is_artificial(pd)) { + ret = -EINVAL; + goto release_policy; + } dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL); - if (!dtpm_cpu) - return -ENOMEM; + if (!dtpm_cpu) { + ret = -ENOMEM; + goto release_policy; + } dtpm_init(&dtpm_cpu->dtpm, &dtpm_ops); dtpm_cpu->cpu = cpu; @@ -216,6 +222,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent) if (ret) goto out_dtpm_unregister; + cpufreq_cpu_put(policy); return 0; out_dtpm_unregister: @@ -227,6 +234,8 @@ out_kfree_dtpm_cpu: per_cpu(dtpm_per_cpu, cpu) = NULL; kfree(dtpm_cpu); +release_policy: + cpufreq_cpu_put(policy); return ret; } -- cgit From 7be76461f302ec05cbd62b90b2a05c64299ca01f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Dec 2023 16:52:09 -0500 Subject: tracing: Always update snapshot buffer size It use to be that only the top level instance had a snapshot buffer (for latency tracers like wakeup and irqsoff). The update of the ring buffer size would check if the instance was the top level and if so, it would also update the snapshot buffer as it needs to be the same as the main buffer. Now that lower level instances also has a snapshot buffer, they too need to update their snapshot buffer sizes when the main buffer is changed, otherwise the following can be triggered: # cd /sys/kernel/tracing # echo 1500 > buffer_size_kb # mkdir instances/foo # echo irqsoff > instances/foo/current_tracer # echo 1000 > instances/foo/buffer_size_kb Produces: WARNING: CPU: 2 PID: 856 at kernel/trace/trace.c:1938 update_max_tr_single.part.0+0x27d/0x320 Which is: ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu); if (ret == -EBUSY) { [..] } WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); <== here That's because ring_buffer_swap_cpu() has: int ret = -EINVAL; [..] /* At least make sure the two buffers are somewhat the same */ if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages) goto out; [..] out: return ret; } Instead, update all instances' snapshot buffer sizes when their main buffer size is updated. Link: https://lkml.kernel.org/r/20231205220010.454662151@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 6d9b3fa5e7f6 ("tracing: Move tracing_max_latency into trace_array") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9aebf904ff97..231c173ec04f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6392,8 +6392,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, return ret; #ifdef CONFIG_TRACER_MAX_TRACE - if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) || - !tr->current_trace->use_max_tr) + if (!tr->current_trace->use_max_tr) goto out; ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); -- cgit From d78ab792705c7be1b91243b2544d1a79406a2ad7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Dec 2023 16:52:10 -0500 Subject: tracing: Stop current tracer when resizing buffer When the ring buffer is being resized, it can cause side effects to the running tracer. For instance, there's a race with irqsoff tracer that swaps individual per cpu buffers between the main buffer and the snapshot buffer. The resize operation modifies the main buffer and then the snapshot buffer. If a swap happens in between those two operations it will break the tracer. Simply stop the running tracer before resizing the buffers and enable it again when finished. Link: https://lkml.kernel.org/r/20231205220010.748996423@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 3928a8a2d9808 ("ftrace: make work with new ring buffer") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 231c173ec04f..e978868b1a22 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6387,9 +6387,12 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, if (!tr->array_buffer.buffer) return 0; + /* Do not allow tracing while resizng ring buffer */ + tracing_stop_tr(tr); + ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); if (ret < 0) - return ret; + goto out_start; #ifdef CONFIG_TRACER_MAX_TRACE if (!tr->current_trace->use_max_tr) @@ -6417,7 +6420,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, WARN_ON(1); tracing_disabled = 1; } - return ret; + goto out_start; } update_buffer_entries(&tr->max_buffer, cpu); @@ -6426,7 +6429,8 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, #endif /* CONFIG_TRACER_MAX_TRACE */ update_buffer_entries(&tr->array_buffer, cpu); - + out_start: + tracing_start_tr(tr); return ret; } -- cgit From b538bf7d0ec11ca49f536dfda742a5f6db90a798 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Dec 2023 16:52:11 -0500 Subject: tracing: Disable snapshot buffer when stopping instance tracers It use to be that only the top level instance had a snapshot buffer (for latency tracers like wakeup and irqsoff). When stopping a tracer in an instance would not disable the snapshot buffer. This could have some unintended consequences if the irqsoff tracer is enabled. Consolidate the tracing_start/stop() with tracing_start/stop_tr() so that all instances behave the same. The tracing_start/stop() functions will just call their respective tracing_start/stop_tr() with the global_array passed in. Link: https://lkml.kernel.org/r/20231205220011.041220035@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 6d9b3fa5e7f6 ("tracing: Move tracing_max_latency into trace_array") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 110 ++++++++++++++++----------------------------------- 1 file changed, 34 insertions(+), 76 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e978868b1a22..2492c6c76850 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2360,13 +2360,7 @@ int is_tracing_stopped(void) return global_trace.stop_count; } -/** - * tracing_start - quick start of the tracer - * - * If tracing is enabled but was stopped by tracing_stop, - * this will start the tracer back up. - */ -void tracing_start(void) +static void tracing_start_tr(struct trace_array *tr) { struct trace_buffer *buffer; unsigned long flags; @@ -2374,119 +2368,83 @@ void tracing_start(void) if (tracing_disabled) return; - raw_spin_lock_irqsave(&global_trace.start_lock, flags); - if (--global_trace.stop_count) { - if (global_trace.stop_count < 0) { + raw_spin_lock_irqsave(&tr->start_lock, flags); + if (--tr->stop_count) { + if (WARN_ON_ONCE(tr->stop_count < 0)) { /* Someone screwed up their debugging */ - WARN_ON_ONCE(1); - global_trace.stop_count = 0; + tr->stop_count = 0; } goto out; } /* Prevent the buffers from switching */ - arch_spin_lock(&global_trace.max_lock); + arch_spin_lock(&tr->max_lock); - buffer = global_trace.array_buffer.buffer; + buffer = tr->array_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); #ifdef CONFIG_TRACER_MAX_TRACE - buffer = global_trace.max_buffer.buffer; + buffer = tr->max_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); #endif - arch_spin_unlock(&global_trace.max_lock); - - out: - raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); -} - -static void tracing_start_tr(struct trace_array *tr) -{ - struct trace_buffer *buffer; - unsigned long flags; - - if (tracing_disabled) - return; - - /* If global, we need to also start the max tracer */ - if (tr->flags & TRACE_ARRAY_FL_GLOBAL) - return tracing_start(); - - raw_spin_lock_irqsave(&tr->start_lock, flags); - - if (--tr->stop_count) { - if (tr->stop_count < 0) { - /* Someone screwed up their debugging */ - WARN_ON_ONCE(1); - tr->stop_count = 0; - } - goto out; - } - - buffer = tr->array_buffer.buffer; - if (buffer) - ring_buffer_record_enable(buffer); + arch_spin_unlock(&tr->max_lock); out: raw_spin_unlock_irqrestore(&tr->start_lock, flags); } /** - * tracing_stop - quick stop of the tracer + * tracing_start - quick start of the tracer * - * Light weight way to stop tracing. Use in conjunction with - * tracing_start. + * If tracing is enabled but was stopped by tracing_stop, + * this will start the tracer back up. */ -void tracing_stop(void) +void tracing_start(void) + +{ + return tracing_start_tr(&global_trace); +} + +static void tracing_stop_tr(struct trace_array *tr) { struct trace_buffer *buffer; unsigned long flags; - raw_spin_lock_irqsave(&global_trace.start_lock, flags); - if (global_trace.stop_count++) + raw_spin_lock_irqsave(&tr->start_lock, flags); + if (tr->stop_count++) goto out; /* Prevent the buffers from switching */ - arch_spin_lock(&global_trace.max_lock); + arch_spin_lock(&tr->max_lock); - buffer = global_trace.array_buffer.buffer; + buffer = tr->array_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); #ifdef CONFIG_TRACER_MAX_TRACE - buffer = global_trace.max_buffer.buffer; + buffer = tr->max_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); #endif - arch_spin_unlock(&global_trace.max_lock); + arch_spin_unlock(&tr->max_lock); out: - raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); + raw_spin_unlock_irqrestore(&tr->start_lock, flags); } -static void tracing_stop_tr(struct trace_array *tr) +/** + * tracing_stop - quick stop of the tracer + * + * Light weight way to stop tracing. Use in conjunction with + * tracing_start. + */ +void tracing_stop(void) { - struct trace_buffer *buffer; - unsigned long flags; - - /* If global, we need to also stop the max tracer */ - if (tr->flags & TRACE_ARRAY_FL_GLOBAL) - return tracing_stop(); - - raw_spin_lock_irqsave(&tr->start_lock, flags); - if (tr->stop_count++) - goto out; - - buffer = tr->array_buffer.buffer; - if (buffer) - ring_buffer_record_disable(buffer); - - out: - raw_spin_unlock_irqrestore(&tr->start_lock, flags); + return tracing_stop_tr(&global_trace); } static int trace_save_cmdline(struct task_struct *tsk) -- cgit From 7fed14f7ac9cf5e38c693836fe4a874720141845 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Tue, 5 Dec 2023 17:17:34 +0100 Subject: tracing: Fix incomplete locking when disabling buffered events The following warning appears when using buffered events: [ 203.556451] WARNING: CPU: 53 PID: 10220 at kernel/trace/ring_buffer.c:3912 ring_buffer_discard_commit+0x2eb/0x420 [...] [ 203.670690] CPU: 53 PID: 10220 Comm: stress-ng-sysin Tainted: G E 6.7.0-rc2-default #4 56e6d0fcf5581e6e51eaaecbdaec2a2338c80f3a [ 203.670704] Hardware name: Intel Corp. GROVEPORT/GROVEPORT, BIOS GVPRCRB1.86B.0016.D04.1705030402 05/03/2017 [ 203.670709] RIP: 0010:ring_buffer_discard_commit+0x2eb/0x420 [ 203.735721] Code: 4c 8b 4a 50 48 8b 42 48 49 39 c1 0f 84 b3 00 00 00 49 83 e8 01 75 b1 48 8b 42 10 f0 ff 40 08 0f 0b e9 fc fe ff ff f0 ff 47 08 <0f> 0b e9 77 fd ff ff 48 8b 42 10 f0 ff 40 08 0f 0b e9 f5 fe ff ff [ 203.735734] RSP: 0018:ffffb4ae4f7b7d80 EFLAGS: 00010202 [ 203.735745] RAX: 0000000000000000 RBX: ffffb4ae4f7b7de0 RCX: ffff8ac10662c000 [ 203.735754] RDX: ffff8ac0c750be00 RSI: ffff8ac10662c000 RDI: ffff8ac0c004d400 [ 203.781832] RBP: ffff8ac0c039cea0 R08: 0000000000000000 R09: 0000000000000000 [ 203.781839] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 203.781842] R13: ffff8ac10662c000 R14: ffff8ac0c004d400 R15: ffff8ac10662c008 [ 203.781846] FS: 00007f4cd8a67740(0000) GS:ffff8ad798880000(0000) knlGS:0000000000000000 [ 203.781851] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 203.781855] CR2: 0000559766a74028 CR3: 00000001804c4000 CR4: 00000000001506f0 [ 203.781862] Call Trace: [ 203.781870] [ 203.851949] trace_event_buffer_commit+0x1ea/0x250 [ 203.851967] trace_event_raw_event_sys_enter+0x83/0xe0 [ 203.851983] syscall_trace_enter.isra.0+0x182/0x1a0 [ 203.851990] do_syscall_64+0x3a/0xe0 [ 203.852075] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ 203.852090] RIP: 0033:0x7f4cd870fa77 [ 203.982920] Code: 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 b8 89 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e9 43 0e 00 f7 d8 64 89 01 48 [ 203.982932] RSP: 002b:00007fff99717dd8 EFLAGS: 00000246 ORIG_RAX: 0000000000000089 [ 203.982942] RAX: ffffffffffffffda RBX: 0000558ea1d7b6f0 RCX: 00007f4cd870fa77 [ 203.982948] RDX: 0000000000000000 RSI: 00007fff99717de0 RDI: 0000558ea1d7b6f0 [ 203.982957] RBP: 00007fff99717de0 R08: 00007fff997180e0 R09: 00007fff997180e0 [ 203.982962] R10: 00007fff997180e0 R11: 0000000000000246 R12: 00007fff99717f40 [ 204.049239] R13: 00007fff99718590 R14: 0000558e9f2127a8 R15: 00007fff997180b0 [ 204.049256] For instance, it can be triggered by running these two commands in parallel: $ while true; do echo hist:key=id.syscall:val=hitcount > \ /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger; done $ stress-ng --sysinfo $(nproc) The warning indicates that the current ring_buffer_per_cpu is not in the committing state. It happens because the active ring_buffer_event doesn't actually come from the ring_buffer_per_cpu but is allocated from trace_buffered_event. The bug is in function trace_buffered_event_disable() where the following normally happens: * The code invokes disable_trace_buffered_event() via smp_call_function_many() and follows it by synchronize_rcu(). This increments the per-CPU variable trace_buffered_event_cnt on each target CPU and grants trace_buffered_event_disable() the exclusive access to the per-CPU variable trace_buffered_event. * Maintenance is performed on trace_buffered_event, all per-CPU event buffers get freed. * The code invokes enable_trace_buffered_event() via smp_call_function_many(). This decrements trace_buffered_event_cnt and releases the access to trace_buffered_event. A problem is that smp_call_function_many() runs a given function on all target CPUs except on the current one. The following can then occur: * Task X executing trace_buffered_event_disable() runs on CPU 0. * The control reaches synchronize_rcu() and the task gets rescheduled on another CPU 1. * The RCU synchronization finishes. At this point, trace_buffered_event_disable() has the exclusive access to all trace_buffered_event variables except trace_buffered_event[CPU0] because trace_buffered_event_cnt[CPU0] is never incremented and if the buffer is currently unused, remains set to 0. * A different task Y is scheduled on CPU 0 and hits a trace event. The code in trace_event_buffer_lock_reserve() sees that trace_buffered_event_cnt[CPU0] is set to 0 and decides the use the buffer provided by trace_buffered_event[CPU0]. * Task X continues its execution in trace_buffered_event_disable(). The code incorrectly frees the event buffer pointed by trace_buffered_event[CPU0] and resets the variable to NULL. * Task Y writes event data to the now freed buffer and later detects the created inconsistency. The issue is observable since commit dea499781a11 ("tracing: Fix warning in trace_buffered_event_disable()") which moved the call of trace_buffered_event_disable() in __ftrace_event_enable_disable() earlier, prior to invoking call->class->reg(.. TRACE_REG_UNREGISTER ..). The underlying problem in trace_buffered_event_disable() is however present since the original implementation in commit 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events"). Fix the problem by replacing the two smp_call_function_many() calls with on_each_cpu_mask() which invokes a given callback on all CPUs. Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/ Link: https://lkml.kernel.org/r/20231205161736.19663-2-petr.pavlu@suse.com Cc: stable@vger.kernel.org Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events") Fixes: dea499781a11 ("tracing: Fix warning in trace_buffered_event_disable()") Signed-off-by: Petr Pavlu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2492c6c76850..6aeffa4a6994 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2781,11 +2781,9 @@ void trace_buffered_event_disable(void) if (--trace_buffered_event_ref) return; - preempt_disable(); /* For each CPU, set the buffer as used. */ - smp_call_function_many(tracing_buffer_mask, - disable_trace_buffered_event, NULL, 1); - preempt_enable(); + on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event, + NULL, true); /* Wait for all current users to finish */ synchronize_rcu(); @@ -2800,11 +2798,9 @@ void trace_buffered_event_disable(void) */ smp_wmb(); - preempt_disable(); /* Do the work on each cpu */ - smp_call_function_many(tracing_buffer_mask, - enable_trace_buffered_event, NULL, 1); - preempt_enable(); + on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, + true); } static struct trace_buffer *temp_buffer; -- cgit From 34209fe83ef8404353f91ab4ea4035dbc9922d04 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Tue, 5 Dec 2023 17:17:35 +0100 Subject: tracing: Fix a warning when allocating buffered events fails Function trace_buffered_event_disable() produces an unexpected warning when the previous call to trace_buffered_event_enable() fails to allocate pages for buffered events. The situation can occur as follows: * The counter trace_buffered_event_ref is at 0. * The soft mode gets enabled for some event and trace_buffered_event_enable() is called. The function increments trace_buffered_event_ref to 1 and starts allocating event pages. * The allocation fails for some page and trace_buffered_event_disable() is called for cleanup. * Function trace_buffered_event_disable() decrements trace_buffered_event_ref back to 0, recognizes that it was the last use of buffered events and frees all allocated pages. * The control goes back to trace_buffered_event_enable() which returns. The caller of trace_buffered_event_enable() has no information that the function actually failed. * Some time later, the soft mode is disabled for the same event. Function trace_buffered_event_disable() is called. It warns on "WARN_ON_ONCE(!trace_buffered_event_ref)" and returns. Buffered events are just an optimization and can handle failures. Make trace_buffered_event_enable() exit on the first failure and left any cleanup later to when trace_buffered_event_disable() is called. Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/ Link: https://lkml.kernel.org/r/20231205161736.19663-3-petr.pavlu@suse.com Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events") Signed-off-by: Petr Pavlu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6aeffa4a6994..ef72354f61ce 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2728,8 +2728,11 @@ void trace_buffered_event_enable(void) for_each_tracing_cpu(cpu) { page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_NORETRY, 0); - if (!page) - goto failed; + /* This is just an optimization and can handle failures */ + if (!page) { + pr_err("Failed to allocate event buffer\n"); + break; + } event = page_address(page); memset(event, 0, sizeof(*event)); @@ -2743,10 +2746,6 @@ void trace_buffered_event_enable(void) WARN_ON_ONCE(1); preempt_enable(); } - - return; - failed: - trace_buffered_event_disable(); } static void enable_trace_buffered_event(void *data) -- cgit From c0591b1cccf708a47bc465c62436d669a4213323 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Tue, 5 Dec 2023 17:17:36 +0100 Subject: tracing: Fix a possible race when disabling buffered events Function trace_buffered_event_disable() is responsible for freeing pages backing buffered events and this process can run concurrently with trace_event_buffer_lock_reserve(). The following race is currently possible: * Function trace_buffered_event_disable() is called on CPU 0. It increments trace_buffered_event_cnt on each CPU and waits via synchronize_rcu() for each user of trace_buffered_event to complete. * After synchronize_rcu() is finished, function trace_buffered_event_disable() has the exclusive access to trace_buffered_event. All counters trace_buffered_event_cnt are at 1 and all pointers trace_buffered_event are still valid. * At this point, on a different CPU 1, the execution reaches trace_event_buffer_lock_reserve(). The function calls preempt_disable_notrace() and only now enters an RCU read-side critical section. The function proceeds and reads a still valid pointer from trace_buffered_event[CPU1] into the local variable "entry". However, it doesn't yet read trace_buffered_event_cnt[CPU1] which happens later. * Function trace_buffered_event_disable() continues. It frees trace_buffered_event[CPU1] and decrements trace_buffered_event_cnt[CPU1] back to 0. * Function trace_event_buffer_lock_reserve() continues. It reads and increments trace_buffered_event_cnt[CPU1] from 0 to 1. This makes it believe that it can use the "entry" that it already obtained but the pointer is now invalid and any access results in a use-after-free. Fix the problem by making a second synchronize_rcu() call after all trace_buffered_event values are set to NULL. This waits on all potential users in trace_event_buffer_lock_reserve() that still read a previous pointer from trace_buffered_event. Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/ Link: https://lkml.kernel.org/r/20231205161736.19663-4-petr.pavlu@suse.com Cc: stable@vger.kernel.org Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events") Signed-off-by: Petr Pavlu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ef72354f61ce..fbcd3bafb93e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2791,13 +2791,17 @@ void trace_buffered_event_disable(void) free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); per_cpu(trace_buffered_event, cpu) = NULL; } + /* - * Make sure trace_buffered_event is NULL before clearing - * trace_buffered_event_cnt. + * Wait for all CPUs that potentially started checking if they can use + * their event buffer only after the previous synchronize_rcu() call and + * they still read a valid pointer from trace_buffered_event. It must be + * ensured they don't see cleared trace_buffered_event_cnt else they + * could wrongly decide to use the pointed-to buffer which is now freed. */ - smp_wmb(); + synchronize_rcu(); - /* Do the work on each cpu */ + /* For each CPU, relinquish the buffer */ on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, true); } -- cgit From 235f2b548d7f4ac5931d834f05d3f7f5166a2e72 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 23 Nov 2023 16:19:41 +0800 Subject: scsi: be2iscsi: Fix a memleak in beiscsi_init_wrb_handle() When an error occurs in the for loop of beiscsi_init_wrb_handle(), we should free phwi_ctxt->be_wrbq before returning an error code to prevent potential memleak. Fixes: a7909b396ba7 ("[SCSI] be2iscsi: Fix dynamic CID allocation Mechanism in driver") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20231123081941.24854-1-dinghao.liu@zju.edu.cn Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/be2iscsi/be_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index e48f14ad6dfd..06acb5ff609e 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -2710,6 +2710,7 @@ init_wrb_hndl_failed: kfree(pwrb_context->pwrb_handle_base); kfree(pwrb_context->pwrb_handle_basestd); } + kfree(phwi_ctxt->be_wrbq); return -ENOMEM; } -- cgit From 8f1752723019db900fb60a5b9d0dfd3a2bdea36c Mon Sep 17 00:00:00 2001 From: Zizhi Wo Date: Fri, 1 Dec 2023 22:50:48 +0800 Subject: ksmbd: fix memory leak in smb2_lock() In smb2_lock(), if setup_async_work() executes successfully, work->cancel_argv will bind the argv that generated by kmalloc(). And release_async_work() is called in ksmbd_conn_try_dequeue_request() or smb2_lock() to release argv. However, when setup_async_work function fails, work->cancel_argv has not been bound to the argv, resulting in the previously allocated argv not being released. Call kfree() to fix it. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Signed-off-by: Zizhi Wo Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index d369b98a6e10..bf1dea10c9e7 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -7080,6 +7080,7 @@ skip: smb2_remove_blocked_lock, argv); if (rc) { + kfree(argv); err = -ENOMEM; goto out; } -- cgit From d045850b628aaf931fc776c90feaf824dca5a1cf Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 4 Dec 2023 22:07:03 +0900 Subject: ksmbd: set epoch in create context v2 lease To support v2 lease(directory lease), ksmbd set epoch in create context v2 lease response. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 5 ++++- fs/smb/server/oplock.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 50c68beb71d6..ff5c83b1fb85 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -104,7 +104,7 @@ static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx) lease->duration = lctx->duration; memcpy(lease->parent_lease_key, lctx->parent_lease_key, SMB2_LEASE_KEY_SIZE); lease->version = lctx->version; - lease->epoch = 0; + lease->epoch = le16_to_cpu(lctx->epoch); INIT_LIST_HEAD(&opinfo->lease_entry); opinfo->o_lease = lease; @@ -1032,6 +1032,7 @@ static void copy_lease(struct oplock_info *op1, struct oplock_info *op2) SMB2_LEASE_KEY_SIZE); lease2->duration = lease1->duration; lease2->flags = lease1->flags; + lease2->epoch = lease1->epoch++; } static int add_lease_global_list(struct oplock_info *opinfo) @@ -1364,6 +1365,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) memcpy(buf->lcontext.LeaseKey, lease->lease_key, SMB2_LEASE_KEY_SIZE); buf->lcontext.LeaseFlags = lease->flags; + buf->lcontext.Epoch = cpu_to_le16(++lease->epoch); buf->lcontext.LeaseState = lease->state; memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key, SMB2_LEASE_KEY_SIZE); @@ -1423,6 +1425,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; lreq->flags = lc->lcontext.LeaseFlags; + lreq->epoch = lc->lcontext.Epoch; lreq->duration = lc->lcontext.LeaseDuration; memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey, SMB2_LEASE_KEY_SIZE); diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index 4b0fe6da7694..ad31439c61fe 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -34,6 +34,7 @@ struct lease_ctx_info { __le32 flags; __le64 duration; __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE]; + __le16 epoch; int version; }; -- cgit From 18dd1c367c31d0a060f737d48345747662369b64 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 4 Dec 2023 22:10:16 +0900 Subject: ksmbd: set v2 lease capability Set SMB2_GLOBAL_CAP_DIRECTORY_LEASING to ->capabilities to inform server support directory lease to client. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 4 ---- fs/smb/server/smb2ops.c | 9 ++++++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index ff5c83b1fb85..5ef6af68d0de 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1105,10 +1105,6 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, bool prev_op_has_lease; __le32 prev_op_state = 0; - /* not support directory lease */ - if (S_ISDIR(file_inode(fp->filp)->i_mode)) - return 0; - opinfo = alloc_opinfo(work, pid, tid); if (!opinfo) return -ENOMEM; diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c index aed7704a0672..27a9dce3e03a 100644 --- a/fs/smb/server/smb2ops.c +++ b/fs/smb/server/smb2ops.c @@ -221,7 +221,8 @@ void init_smb3_0_server(struct ksmbd_conn *conn) conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; + conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | + SMB2_GLOBAL_CAP_DIRECTORY_LEASING; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION && conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION) @@ -245,7 +246,8 @@ void init_smb3_02_server(struct ksmbd_conn *conn) conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; + conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | + SMB2_GLOBAL_CAP_DIRECTORY_LEASING; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && @@ -270,7 +272,8 @@ int init_smb3_11_server(struct ksmbd_conn *conn) conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; + conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | + SMB2_GLOBAL_CAP_DIRECTORY_LEASING; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && -- cgit From eb547407f3572d2110cb1194ecd8865b3371a7a4 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 5 Dec 2023 21:02:03 +0900 Subject: ksmbd: downgrade RWH lease caching state to RH for directory RWH(Read + Write + Handle) caching state is not supported for directory. ksmbd downgrade it to RH for directory if client send RWH caching lease state. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 9 +++++++-- fs/smb/server/oplock.h | 2 +- fs/smb/server/smb2pdu.c | 8 ++++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 5ef6af68d0de..57950ba7e925 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1398,10 +1398,11 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) /** * parse_lease_state() - parse lease context containted in file open request * @open_req: buffer containing smb2 file open(create) request + * @is_dir: whether leasing file is directory * * Return: oplock state, -ENOENT if create lease context not found */ -struct lease_ctx_info *parse_lease_state(void *open_req) +struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir) { struct create_context *cc; struct smb2_create_req *req = (struct smb2_create_req *)open_req; @@ -1419,7 +1420,11 @@ struct lease_ctx_info *parse_lease_state(void *open_req) struct create_lease_v2 *lc = (struct create_lease_v2 *)cc; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); - lreq->req_state = lc->lcontext.LeaseState; + if (is_dir) + lreq->req_state = lc->lcontext.LeaseState & + ~SMB2_LEASE_WRITE_CACHING_LE; + else + lreq->req_state = lc->lcontext.LeaseState; lreq->flags = lc->lcontext.LeaseFlags; lreq->epoch = lc->lcontext.Epoch; lreq->duration = lc->lcontext.LeaseDuration; diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index ad31439c61fe..672127318c75 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -109,7 +109,7 @@ void opinfo_put(struct oplock_info *opinfo); /* Lease related functions */ void create_lease_buf(u8 *rbuf, struct lease *lease); -struct lease_ctx_info *parse_lease_state(void *open_req); +struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir); __u8 smb2_map_lease_to_oplock(__le32 lease_state); int lease_read_to_write(struct oplock_info *opinfo); diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index bf1dea10c9e7..2d3b8acb21e7 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2732,10 +2732,6 @@ int smb2_open(struct ksmbd_work *work) } } - req_op_level = req->RequestedOplockLevel; - if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) - lc = parse_lease_state(req); - if (le32_to_cpu(req->ImpersonationLevel) > le32_to_cpu(IL_DELEGATE)) { pr_err("Invalid impersonationlevel : 0x%x\n", le32_to_cpu(req->ImpersonationLevel)); @@ -3215,6 +3211,10 @@ int smb2_open(struct ksmbd_work *work) need_truncate = 1; } + req_op_level = req->RequestedOplockLevel; + if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) + lc = parse_lease_state(req, S_ISDIR(file_inode(filp)->i_mode)); + share_ret = ksmbd_smb_check_shared_mode(fp->filp, fp); if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_OPLOCKS) || (req_op_level == SMB2_OPLOCK_LEVEL_LEASE && -- cgit From 92414333eb375ed64f4ae92d34d579e826936480 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 5 Dec 2023 21:49:29 -0300 Subject: smb: client: fix potential NULL deref in parse_dfs_referrals() If server returned no data for FSCTL_DFS_GET_REFERRALS, @dfs_rsp will remain NULL and then parse_dfs_referrals() will dereference it. Fix this by returning -EIO when no output data is returned. Besides, we can't fix it in SMB2_ioctl() as some FSCTLs are allowed to return no data as per MS-SMB2 2.2.32. Fixes: 9d49640a21bf ("CIFS: implement get_dfs_refer for SMB2+") Cc: stable@vger.kernel.org Reported-by: Robert Morris Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 45931115f475..fcfb6566b899 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2836,6 +2836,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, usleep_range(512, 2048); } while (++retry_count < 5); + if (!rc && !dfs_rsp) + rc = -EIO; if (rc) { if (!is_retryable_error(rc) && rc != -ENOENT && rc != -EOPNOTSUPP) cifs_tcon_dbg(VFS, "%s: ioctl error: rc=%d\n", __func__, rc); -- cgit From 691a41d8da4b34fe72f09393505f55f28a8f34ec Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 4 Dec 2023 14:01:59 +0000 Subject: cifs: Fix non-availability of dedup breaking generic/304 Deduplication isn't supported on cifs, but cifs doesn't reject it, instead treating it as extent duplication/cloning. This can cause generic/304 to go silly and run for hours on end. Fix cifs to indicate EOPNOTSUPP if REMAP_FILE_DEDUP is set in ->remap_file_range(). Note that it's unclear whether or not commit b073a08016a1 is meant to cause cifs to return an error if REMAP_FILE_DEDUP. Fixes: b073a08016a1 ("cifs: fix that return -EINVAL when do dedupe operation") Cc: stable@vger.kernel.org Suggested-by: Dave Chinner cc: Xiaoli Feng cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: Darrick Wong cc: fstests@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/3876191.1701555260@warthog.procyon.org.uk/ Signed-off-by: David Howells Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index c5fc0a35bb19..2131638f26d0 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1277,7 +1277,9 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, unsigned int xid; int rc; - if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + if (remap_flags & REMAP_FILE_DEDUP) + return -EOPNOTSUPP; + if (remap_flags & ~REMAP_FILE_ADVISORY) return -EINVAL; cifs_dbg(FYI, "clone range\n"); -- cgit From a61f46e1102545cf1fb5f19992288265362cefb0 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 4 Dec 2023 16:01:48 +0100 Subject: net: veth: fix packet segmentation in veth_convert_skb_to_xdp_buff Based on the previous allocated packet, page_offset can be not null in veth_convert_skb_to_xdp_buff routine. Take into account page fragment offset during the skb paged area copy in veth_convert_skb_to_xdp_buff(). Fixes: 2d0de67da51a ("net: veth: use newly added page pool API for veth with xdp") Signed-off-by: Lorenzo Bianconi Reviewed-by: Yunsheng Lin Link: https://lore.kernel.org/r/eddfe549e7e626870071930964ac3c38a1dc8068.1701702000.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/veth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 57efb3454c57..977861c46b1f 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -790,7 +790,8 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, skb_add_rx_frag(nskb, i, page, page_offset, size, truesize); - if (skb_copy_bits(skb, off, page_address(page), + if (skb_copy_bits(skb, off, + page_address(page) + page_offset, size)) { consume_skb(nskb); goto drop; -- cgit From d007caaaf052f82ca2340d4c7b32d04a3f5dbf3f Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 4 Dec 2023 10:40:04 +0800 Subject: net: bnxt: fix a potential use-after-free in bnxt_init_tc When flow_indr_dev_register() fails, bnxt_init_tc will free bp->tc_info through kfree(). However, the caller function bnxt_init_one() will ignore this failure and call bnxt_shutdown_tc() on failure of bnxt_dl_register(), where a use-after-free happens. Fix this issue by setting bp->tc_info to NULL after kfree(). Fixes: 627c89d00fb9 ("bnxt_en: flow_offload: offload tunnel decap rules via indirect callbacks") Signed-off-by: Dinghao Liu Reviewed-by: Pavan Chebbi Reviewed-by: Michael Chan Reviewed-by: Somnath Kotur Link: https://lore.kernel.org/r/20231204024004.8245-1-dinghao.liu@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 38d89d80b4a9..273c9ba48f09 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -2075,6 +2075,7 @@ destroy_flow_table: rhashtable_destroy(&tc_info->flow_table); free_tc_info: kfree(tc_info); + bp->tc_info = NULL; return rc; } -- cgit From 58d3aade20cdddbac6c9707ac0f3f5f8c1278b74 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 4 Dec 2023 17:08:05 +0100 Subject: tcp: fix mid stream window clamp. After the blamed commit below, if the user-space application performs window clamping when tp->rcv_wnd is 0, the TCP socket will never be able to announce a non 0 receive window, even after completely emptying the receive buffer and re-setting the window clamp to higher values. Refactor tcp_set_window_clamp() to address the issue: when the user decreases the current clamp value, set rcv_ssthresh according to the same logic used at buffer initialization, but ensuring reserved mem provisioning. To avoid code duplication factor-out the relevant bits from tcp_adjust_rcv_ssthresh() in a new helper and reuse it in the above scenario. When increasing the clamp value, give the rcv_ssthresh a chance to grow according to previously implemented heuristic. Fixes: 3aa7857fe1d7 ("tcp: enable mid stream window clamp") Reported-by: David Gibson Reported-by: Stefano Brivio Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/705dad54e6e6e9a010e571bf58e0b35a8ae70503.1701706073.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 9 +++++++-- net/ipv4/tcp.c | 22 +++++++++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index d2f0736b76b8..144ba48bb07b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1514,17 +1514,22 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } -static inline void tcp_adjust_rcv_ssthresh(struct sock *sk) +static inline void __tcp_adjust_rcv_ssthresh(struct sock *sk, u32 new_ssthresh) { int unused_mem = sk_unused_reserved_mem(sk); struct tcp_sock *tp = tcp_sk(sk); - tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); + tp->rcv_ssthresh = min(tp->rcv_ssthresh, new_ssthresh); if (unused_mem) tp->rcv_ssthresh = max_t(u32, tp->rcv_ssthresh, tcp_win_from_space(sk, unused_mem)); } +static inline void tcp_adjust_rcv_ssthresh(struct sock *sk) +{ + __tcp_adjust_rcv_ssthresh(sk, 4U * tcp_sk(sk)->advmss); +} + void tcp_cleanup_rbuf(struct sock *sk, int copied); void __tcp_cleanup_rbuf(struct sock *sk, int copied); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 53bcc17c91e4..c9f078224569 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3368,9 +3368,25 @@ int tcp_set_window_clamp(struct sock *sk, int val) return -EINVAL; tp->window_clamp = 0; } else { - tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ? - SOCK_MIN_RCVBUF / 2 : val; - tp->rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp); + u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp; + u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ? + SOCK_MIN_RCVBUF / 2 : val; + + if (new_window_clamp == old_window_clamp) + return 0; + + tp->window_clamp = new_window_clamp; + if (new_window_clamp < old_window_clamp) { + /* need to apply the reserved mem provisioning only + * when shrinking the window clamp + */ + __tcp_adjust_rcv_ssthresh(sk, tp->window_clamp); + + } else { + new_rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp); + tp->rcv_ssthresh = max(new_rcv_ssthresh, + tp->rcv_ssthresh); + } } return 0; } -- cgit From 0ceb3860a67652f9d36dfdecfcd2cb3eb2f4537d Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 4 Dec 2023 11:22:33 -0800 Subject: ionic: fix snprintf format length warning Our friendly kernel test robot has reminded us that with a new check we have a warning about a potential string truncation. In this case it really doesn't hurt anything, but it is worth addressing especially since there really is no reason to reserve so many bytes for our queue names. It seems that cutting the queue name buffer length in half stops the complaint. Fixes: c06107cabea3 ("ionic: more ionic name tweaks") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311300201.lO8v7mKU-lkp@intel.com/ Signed-off-by: Shannon Nelson Reviewed-by: Brett Creeley Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231204192234.21017-2-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_dev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index 1dbc3cb50b1d..9b5463040075 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -223,7 +223,7 @@ struct ionic_desc_info { void *cb_arg; }; -#define IONIC_QUEUE_NAME_MAX_SZ 32 +#define IONIC_QUEUE_NAME_MAX_SZ 16 struct ionic_queue { struct device *dev; -- cgit From 4115ba677c35f694b62298e55f0e04ce84eed469 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 4 Dec 2023 11:22:34 -0800 Subject: ionic: Fix dim work handling in split interrupt mode Currently ionic_dim_work() is incorrect when in split interrupt mode. This is because the interrupt rate is only being changed for the Rx side even for dim running on Tx. Fix this by using the qcq from the container_of macro. Also, introduce some local variables for a bit of cleanup. Fixes: a6ff85e0a2d9 ("ionic: remove intr coalesce update from napi") Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231204192234.21017-3-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index edc14730ce88..bad919343180 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -49,24 +49,24 @@ static void ionic_lif_queue_identify(struct ionic_lif *lif); static void ionic_dim_work(struct work_struct *work) { struct dim *dim = container_of(work, struct dim, work); + struct ionic_intr_info *intr; struct dim_cq_moder cur_moder; struct ionic_qcq *qcq; + struct ionic_lif *lif; u32 new_coal; cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); qcq = container_of(dim, struct ionic_qcq, dim); - new_coal = ionic_coal_usec_to_hw(qcq->q.lif->ionic, cur_moder.usec); + lif = qcq->q.lif; + new_coal = ionic_coal_usec_to_hw(lif->ionic, cur_moder.usec); new_coal = new_coal ? new_coal : 1; - if (qcq->intr.dim_coal_hw != new_coal) { - unsigned int qi = qcq->cq.bound_q->index; - struct ionic_lif *lif = qcq->q.lif; - - qcq->intr.dim_coal_hw = new_coal; + intr = &qcq->intr; + if (intr->dim_coal_hw != new_coal) { + intr->dim_coal_hw = new_coal; ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, - lif->rxqcqs[qi]->intr.index, - qcq->intr.dim_coal_hw); + intr->index, intr->dim_coal_hw); } dim->state = DIM_START_MEASURE; -- cgit From 7aebaabfede75feda5c5d16991da74124aee428d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 4 Dec 2023 15:44:15 -0500 Subject: bcachefs: Fix creating snapshot with implict source When creating a snapshot without specifying the source subvolume, we use the subvolume containing the new snapshot. Previously, this worked if the directory containing the new snapshot was the subvolume root - but we were using the incorrect helper, and got a subvolume ID of 0 when the parent directory wasn't the root of the subvolume, causing an emergency read-only. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 5a39bcb597a3..a70b7a03057d 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -413,7 +413,7 @@ retry: if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) && !arg.src_ptr) - snapshot_src.subvol = to_bch_ei(dir)->ei_inode.bi_subvol; + snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol; inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), dst_dentry, arg.mode|S_IFDIR, -- cgit From 5796230582f6131fa217f0a1700783c459c847d2 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 5 Dec 2023 08:24:38 -0500 Subject: bcachefs: don't attempt rw on unfreeze when shutdown The internal freeze mechanism in bcachefs mostly reuses the generic rw<->ro transition code. If the fs happens to shutdown during or after freeze, a transition back to rw can fail. This is expected, but returning an error from the unfreeze callout prevents the filesystem from being unfrozen. Skip the read write transition if the fs is shutdown. This allows the fs to unfreeze at the vfs level so writes will no longer block, but will still fail due to the emergency read-only state of the fs. Signed-off-by: Brian Foster Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 4d51be813509..371565e02ff2 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1733,6 +1733,9 @@ static int bch2_unfreeze(struct super_block *sb) struct bch_fs *c = sb->s_fs_info; int ret; + if (test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) + return 0; + down_write(&c->state_lock); ret = bch2_fs_read_write(c); up_write(&c->state_lock); -- cgit From d863a2f4f47560d71447650822857fc3d2aea715 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 6 Nov 2023 16:13:25 +0100 Subject: arm64: dts: freescale: imx8-ss-lsio: Fix #pwm-cells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i.MX8QM/QXP supports inverted PWM output, thus #pwm-cells needs to be set to 3. Fixes: 23fa99b205ea ("arm64: dts: freescale: imx8-ss-lsio: add support for lsio_pwm0-3") Signed-off-by: Alexander Stein Reviewed-by: Uwe Kleine-König Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi index 49ad3413db94..7e510b21bbac 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi @@ -29,7 +29,7 @@ lsio_subsys: bus@5d000000 { <&pwm0_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = ; status = "disabled"; }; @@ -42,7 +42,7 @@ lsio_subsys: bus@5d000000 { <&pwm1_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = ; status = "disabled"; }; @@ -55,7 +55,7 @@ lsio_subsys: bus@5d000000 { <&pwm2_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = ; status = "disabled"; }; @@ -68,7 +68,7 @@ lsio_subsys: bus@5d000000 { <&pwm3_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = ; status = "disabled"; }; -- cgit From 7cef7c0b1dea1e17c7913826b74403f4ab7edeb9 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 6 Nov 2023 16:13:26 +0100 Subject: arm64: dts: freescale: imx8-ss-dma: Fix #pwm-cells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i.MX8QXP supports inverted PWM output, thus #pwm-cells needs to be set to 3. Fixes: f1d6a6b991ef9 ("arm64: dts: imx8qxp: add adma_pwm in adma") Signed-off-by: Alexander Stein Reviewed-by: Uwe Kleine-König Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi index ce66d30a4839..b0bb77150adc 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi @@ -149,7 +149,7 @@ dma_subsys: bus@5a000000 { clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; power-domains = <&pd IMX_SC_R_LCD_0_PWM_0>; }; -- cgit From b37e75bddc35d4a40a4caeb9921cb95c33c3eba9 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Fri, 10 Nov 2023 15:25:31 +0800 Subject: arm64: dts: imx8qm: Add imx8qm's own pm to avoid panic during startup Add imx8qm's own pm, otherwise the following panic will occur during the startup process: Kernel panic - not syncing: Asynchronous SError Interrupt Hardware name: Freescale i.MX8QM MEK (DT) Workqueue: events_unbound deferred_probe_work_func Call trace: dump_backtrace+0x98/0xf0 show_stack+0x18/0x24 dump_stack_lvl+0x60/0xac dump_stack+0x18/0x24 panic+0x340/0x3a0 nmi_panic+0x8c/0x90 arm64_serror_panic+0x6c/0x78 do_serror+0x3c/0x78 el1h_64_error_handler+0x38/0x50 el1h_64_error+0x64/0x68 fsl_edma_chan_mux+0x98/0xdc fsl_edma_probe+0x278/0x898 platform_probe+0x68/0xd8 really_probe+0x110/0x27c __driver_probe_device+0x78/0x12c driver_probe_device+0x3c/0x118 __device_attach_driver+0xb8/0xf8 bus_for_each_drv+0x84/0xe4 __device_attach+0xfc/0x18c device_initial_probe+0x14/0x20 Fixes: e4d7a330fb7a ("arm64: dts: imx8: add edma[0..3]") Signed-off-by: Xiaolei Wang Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi index 01539df335f8..8439dd6b3935 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi @@ -96,6 +96,17 @@ status = "okay"; }; +&edma3 { + power-domains = <&pd IMX_SC_R_DMA_1_CH0>, + <&pd IMX_SC_R_DMA_1_CH1>, + <&pd IMX_SC_R_DMA_1_CH2>, + <&pd IMX_SC_R_DMA_1_CH3>, + <&pd IMX_SC_R_DMA_1_CH4>, + <&pd IMX_SC_R_DMA_1_CH5>, + <&pd IMX_SC_R_DMA_1_CH6>, + <&pd IMX_SC_R_DMA_1_CH7>; +}; + &flexcan1 { fsl,clk-source = /bits/ 8 <1>; }; -- cgit From d4cb68a5d3a1ed30ecaf1591eb901523faa13496 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 13 Nov 2023 18:02:29 +0800 Subject: arm64: dts: imx93: correct mediamix power "nic_media" clock should be enabled when power on/off mediamix, otherwise power on/off will fail. Because "media_axi_root" clock is the parent of "nic_media" clock, so replace "media_axi_clock" clock with "nic_media" clock in mediamix node. Link: https://github.com/nxp-imx/linux-imx/commit/ce18e6d0071ae9df5486af8613708ebe920484be Fixes: f2d03ba997cb ("arm64: dts: imx93: reorder device nodes") Fixes: e85d3458a804 ("arm64: dts: imx93: add src node") Reviewed-by: Jacky Bai Signed-off-by: Peng Fan Signed-off-by: Liu Ying Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index ceccf4766440..0053f3c89f5f 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -417,7 +417,7 @@ compatible = "fsl,imx93-src-slice"; reg = <0x44462400 0x400>, <0x44465800 0x400>; #power-domain-cells = <0>; - clocks = <&clk IMX93_CLK_MEDIA_AXI>, + clocks = <&clk IMX93_CLK_NIC_MEDIA_GATE>, <&clk IMX93_CLK_MEDIA_APB>; }; }; -- cgit From 4af1b258b68c5c948601082d5ce858ba31eb64f4 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 15 Nov 2023 11:56:20 +0800 Subject: arm64: dts: imx93: update gpio node name to align with register address Change the gpio node name to align with register address Fixes: c1d0782b5fc3 ("arm64: dts: imx93: update gpio node") Signed-off-by: Haibo Chen Reviewed-by: Marco Felsch Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 0053f3c89f5f..34c0540276d1 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -957,7 +957,7 @@ }; }; - gpio2: gpio@43810080 { + gpio2: gpio@43810000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x43810000 0x1000>; gpio-controller; @@ -972,7 +972,7 @@ gpio-ranges = <&iomuxc 0 4 30>; }; - gpio3: gpio@43820080 { + gpio3: gpio@43820000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x43820000 0x1000>; gpio-controller; @@ -988,7 +988,7 @@ <&iomuxc 26 34 2>, <&iomuxc 28 0 4>; }; - gpio4: gpio@43830080 { + gpio4: gpio@43830000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x43830000 0x1000>; gpio-controller; @@ -1003,7 +1003,7 @@ gpio-ranges = <&iomuxc 0 38 28>, <&iomuxc 28 36 2>; }; - gpio1: gpio@47400080 { + gpio1: gpio@47400000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x47400000 0x1000>; gpio-controller; -- cgit From 8ae06f1366390972fdcce0f0cee3cac0f63b3209 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 15 Nov 2023 11:56:21 +0800 Subject: arm64: dts: imx8ulp: update gpio node name to align with register address Change the gpio node name to align with register address. Fixes: ac7bcf48ddba ("arm64: dts: imx8ulp: update gpio node") Signed-off-by: Haibo Chen Reviewed-by: Marco Felsch Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8ulp.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi index f22c1ac391c9..c4a0082f30d3 100644 --- a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi @@ -483,7 +483,7 @@ }; }; - gpioe: gpio@2d000080 { + gpioe: gpio@2d000000 { compatible = "fsl,imx8ulp-gpio"; reg = <0x2d000000 0x1000>; gpio-controller; @@ -498,7 +498,7 @@ gpio-ranges = <&iomuxc1 0 32 24>; }; - gpiof: gpio@2d010080 { + gpiof: gpio@2d010000 { compatible = "fsl,imx8ulp-gpio"; reg = <0x2d010000 0x1000>; gpio-controller; @@ -534,7 +534,7 @@ }; }; - gpiod: gpio@2e200080 { + gpiod: gpio@2e200000 { compatible = "fsl,imx8ulp-gpio"; reg = <0x2e200000 0x1000>; gpio-controller; -- cgit From 1c2b1049af3f86545fcc5fae0fc725fb64b3a09e Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 22 Nov 2023 14:46:36 +0800 Subject: ARM: imx: Check return value of devm_kasprintf in imx_mmdc_perf_init devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Release the id allocated in 'mmdc_pmu_init' when 'devm_kasprintf' return NULL Suggested-by: Ahmad Fatoum Fixes: e76bdfd7403a ("ARM: imx: Added perf functionality to mmdc driver") Signed-off-by: Kunwu Chan Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mmdc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 2157493b78a9..df69af932375 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -501,6 +501,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "mmdc%d", ret); + if (!name) { + ret = -ENOMEM; + goto pmu_release_id; + } pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk; pmu_mmdc->devtype_data = (struct fsl_mmdc_devtype_data *)of_id->data; @@ -523,9 +527,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b pmu_register_err: pr_warn("MMDC Perf PMU failed (%d), disabled\n", ret); - ida_simple_remove(&mmdc_ida, pmu_mmdc->id); cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); hrtimer_cancel(&pmu_mmdc->hrtimer); +pmu_release_id: + ida_simple_remove(&mmdc_ida, pmu_mmdc->id); pmu_free: kfree(pmu_mmdc); return ret; -- cgit From 04179605ab604dba32571a05cd06423afc9eca19 Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Thu, 23 Nov 2023 11:48:12 +0100 Subject: arm64: dts: imx8-apalis: set wifi regulator to always-on Make sure that the wifi regulator is always on. The wifi driver itself puts the wifi module into suspend mode. If we cut the power the driver will crash when resuming from suspend. Signed-off-by: Stefan Eichenberger Signed-off-by: Francesco Dolcini Fixes: ad0de4ceb706 ("arm64: dts: freescale: add initial apalis imx8 aka quadmax module support") Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi index 5ce5fbf2b38e..f69b0c17560a 100644 --- a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi @@ -82,12 +82,9 @@ pinctrl-0 = <&pinctrl_wifi_pdn>; gpio = <&lsio_gpio1 28 GPIO_ACTIVE_HIGH>; enable-active-high; + regulator-always-on; regulator-name = "wifi_pwrdn_fake_regulator"; regulator-settling-time-us = <100>; - - regulator-state-mem { - regulator-off-in-suspend; - }; }; reg_pcie_switch: regulator-pcie-switch { -- cgit From 15a1c7f3e8d9c16e65644b83ad96895164fb2988 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 27 Nov 2023 06:34:14 +0100 Subject: MAINTAINERS: reinstate freescale ARM64 DT directory in i.MX entry The MAINTAINERS entry's F: currently only matches the 32-bit device trees, as commit 724ba6751532 ("ARM: dts: Move .dts files to vendor sub-directories") inadvertently dropped the 64-bit DT match when it added the 32 bit matches. The entry has a N: imx, which reduced the impact a bit, but still some board device trees may not contain the substring and would thus not be covered by the entry. Reinstate the missing F: line to restore previous behavior. Fixes: 724ba6751532 ("ARM: dts: Move .dts files to vendor sub-directories") Signed-off-by: Ahmad Fatoum Signed-off-by: Shawn Guo --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 97f51d5ec1cf..339a17370910 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2155,6 +2155,7 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git F: arch/arm/boot/dts/nxp/imx/ F: arch/arm/boot/dts/nxp/mxs/ +F: arch/arm64/boot/dts/freescale/ X: arch/arm64/boot/dts/freescale/fsl-* X: arch/arm64/boot/dts/freescale/qoriq-* X: drivers/media/i2c/ -- cgit From 397caf68e2d36532054cb14ae8995537f27f8b61 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 27 Nov 2023 17:05:01 +0100 Subject: ARM: dts: imx7: Declare timers compatible with fsl,imx6dl-gpt The timer nodes declare compatibility with "fsl,imx6sx-gpt", which itself is compatible with "fsl,imx6dl-gpt". Switch the fallback compatible from "fsl,imx6sx-gpt" to "fsl,imx6dl-gpt". Fixes: 949673450291 ("ARM: dts: add imx7d soc dtsi file") Signed-off-by: Philipp Zabel Signed-off-by: Roland Hieber Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/imx/imx7s.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi index 29b8fd03567a..5387da8a2a0a 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi @@ -454,7 +454,7 @@ }; gpt1: timer@302d0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302d0000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT1_ROOT_CLK>, @@ -463,7 +463,7 @@ }; gpt2: timer@302e0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302e0000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT2_ROOT_CLK>, @@ -473,7 +473,7 @@ }; gpt3: timer@302f0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302f0000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT3_ROOT_CLK>, @@ -483,7 +483,7 @@ }; gpt4: timer@30300000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x30300000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT4_ROOT_CLK>, -- cgit From 2b3a7a302c9804e463f2ea5b54dc3a6ad106a344 Mon Sep 17 00:00:00 2001 From: Jason Zhang Date: Wed, 6 Dec 2023 09:31:39 +0800 Subject: ALSA: pcm: fix out-of-bounds in snd_pcm_state_names The pcm state can be SNDRV_PCM_STATE_DISCONNECTED at disconnect callback, and there is not an entry of SNDRV_PCM_STATE_DISCONNECTED in snd_pcm_state_names. This patch adds the missing entry to resolve this issue. cat /proc/asound/card2/pcm0p/sub0/status That results in stack traces like the following: [ 99.702732][ T5171] Unexpected kernel BRK exception at EL1 [ 99.702774][ T5171] Internal error: BRK handler: f2005512 [#1] PREEMPT SMP [ 99.703858][ T5171] Modules linked in: bcmdhd(E) (...) [ 99.747425][ T5171] CPU: 3 PID: 5171 Comm: cat Tainted: G C OE 5.10.189-android13-4-00003-g4a17384380d8-ab11086999 #1 [ 99.748447][ T5171] Hardware name: Rockchip RK3588 CVTE V10 Board (DT) [ 99.749024][ T5171] pstate: 60400005 (nZCv daif +PAN -UAO -TCO BTYPE=--) [ 99.749616][ T5171] pc : snd_pcm_substream_proc_status_read+0x264/0x2bc [ 99.750204][ T5171] lr : snd_pcm_substream_proc_status_read+0xa4/0x2bc [ 99.750778][ T5171] sp : ffffffc0175abae0 [ 99.751132][ T5171] x29: ffffffc0175abb80 x28: ffffffc009a2c498 [ 99.751665][ T5171] x27: 0000000000000001 x26: ffffff810cbae6e8 [ 99.752199][ T5171] x25: 0000000000400cc0 x24: ffffffc0175abc60 [ 99.752729][ T5171] x23: 0000000000000000 x22: ffffff802f558400 [ 99.753263][ T5171] x21: ffffff81d8d8ff00 x20: ffffff81020cdc00 [ 99.753795][ T5171] x19: ffffff802d110000 x18: ffffffc014fbd058 [ 99.754326][ T5171] x17: 0000000000000000 x16: 0000000000000000 [ 99.754861][ T5171] x15: 000000000000c276 x14: ffffffff9a976fda [ 99.755392][ T5171] x13: 0000000065689089 x12: 000000000000d72e [ 99.755923][ T5171] x11: ffffff802d110000 x10: 00000000000000e0 [ 99.756457][ T5171] x9 : 9c431600c8385d00 x8 : 0000000000000008 [ 99.756990][ T5171] x7 : 0000000000000000 x6 : 000000000000003f [ 99.757522][ T5171] x5 : 0000000000000040 x4 : ffffffc0175abb70 [ 99.758056][ T5171] x3 : 0000000000000001 x2 : 0000000000000001 [ 99.758588][ T5171] x1 : 0000000000000000 x0 : 0000000000000000 [ 99.759123][ T5171] Call trace: [ 99.759404][ T5171] snd_pcm_substream_proc_status_read+0x264/0x2bc [ 99.759958][ T5171] snd_info_seq_show+0x54/0xa4 [ 99.760370][ T5171] seq_read_iter+0x19c/0x7d4 [ 99.760770][ T5171] seq_read+0xf0/0x128 [ 99.761117][ T5171] proc_reg_read+0x100/0x1f8 [ 99.761515][ T5171] vfs_read+0xf4/0x354 [ 99.761869][ T5171] ksys_read+0x7c/0x148 [ 99.762226][ T5171] __arm64_sys_read+0x20/0x30 [ 99.762625][ T5171] el0_svc_common+0xd0/0x1e4 [ 99.763023][ T5171] el0_svc+0x28/0x98 [ 99.763358][ T5171] el0_sync_handler+0x8c/0xf0 [ 99.763759][ T5171] el0_sync+0x1b8/0x1c0 [ 99.764118][ T5171] Code: d65f03c0 b9406102 17ffffae 94191565 (d42aa240) [ 99.764715][ T5171] ---[ end trace 1eeffa3e17c58e10 ]--- [ 99.780720][ T5171] Kernel panic - not syncing: BRK handler: Fatal exception Signed-off-by: Jason Zhang Cc: Link: https://lore.kernel.org/r/20231206013139.20506-1-jason.zhang@rock-chips.com Signed-off-by: Takashi Iwai --- sound/core/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 20bb2d7c8d4b..6d0c9c37796c 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -253,6 +253,7 @@ static const char * const snd_pcm_state_names[] = { STATE(DRAINING), STATE(PAUSED), STATE(SUSPENDED), + STATE(DISCONNECTED), }; static const char * const snd_pcm_access_names[] = { -- cgit From 80d875cfc9d3711a029f234ef7d680db79e8fa4b Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Sun, 3 Dec 2023 01:14:41 +0900 Subject: ipv4: ip_gre: Avoid skb_pull() failure in ipgre_xmit() In ipgre_xmit(), skb_pull() may fail even if pskb_inet_may_pull() returns true. For example, applications can use PF_PACKET to create a malformed packet with no IP header. This type of packet causes a problem such as uninit-value access. This patch ensures that skb_pull() can pull the required size by checking the skb with pskb_network_may_pull() before skb_pull(). Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Signed-off-by: Shigeru Yoshida Reviewed-by: Eric Dumazet Reviewed-by: Suman Ghosh Link: https://lore.kernel.org/r/20231202161441.221135-1-syoshida@redhat.com Signed-off-by: Paolo Abeni --- net/ipv4/ip_gre.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 22a26d1d29a0..5169c3c72cff 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -635,15 +635,18 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, } if (dev->header_ops) { + int pull_len = tunnel->hlen + sizeof(struct iphdr); + if (skb_cow_head(skb, 0)) goto free_skb; tnl_params = (const struct iphdr *)skb->data; - /* Pull skb since ip_tunnel_xmit() needs skb->data pointing - * to gre header. - */ - skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); + if (!pskb_network_may_pull(skb, pull_len)) + goto free_skb; + + /* ip_tunnel_xmit() needs skb->data pointing to gre header. */ + skb_pull(skb, pull_len); skb_reset_mac_header(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && -- cgit From 63ef8fc9bcee6b73ca445a19a7ac6bd544723c9f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 5 Dec 2023 10:27:35 -0300 Subject: ARM: dts: imx28-xea: Pass the 'model' property Per root-node.yaml, 'model' is a required property. Pass it to fix the following dt-schema warning: imx28-xea.dtb: /: 'model' is a required property from schema $id: http://devicetree.org/schemas/root-node.yaml# Signed-off-by: Fabio Estevam Fixes: 445ae16ac1c5 ("ARM: dts: imx28: Add DTS description of imx28 based XEA board") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/mxs/imx28-xea.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts index a400c108f66a..6c5e6856648a 100644 --- a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts +++ b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts @@ -8,6 +8,7 @@ #include "imx28-lwe.dtsi" / { + model = "Liebherr XEA board"; compatible = "lwn,imx28-xea", "fsl,imx28"; }; -- cgit From 37f3d6108730713c411827ab4af764909f4dfc78 Mon Sep 17 00:00:00 2001 From: Sam Edwards Date: Tue, 5 Dec 2023 12:29:00 -0800 Subject: arm64: dts: rockchip: Fix eMMC Data Strobe PD on rk3588 JEDEC standard JESD84-B51 defines the eMMC Data Strobe line, which is currently used only in HS400 mode, as a device->host clock signal that "is used only in read operation. The Data Strobe is always High-Z (not driven by the device and pulled down by RDS) or Driven Low in write operation, except during CRC status response." RDS is a pull-down resistor specified in the 10K-100K ohm range. Thus per the standard, the Data Strobe is always pulled to ground (by the eMMC and/or RDS) during write operations. Evidently, the eMMC host controller in the RK3588 considers an active voltage on the eMMC-DS line during a write to be an error. The default (i.e. hardware reset, and Rockchip BSP) behavior for the RK3588 is to activate the eMMC-DS pin's builtin pull-down. As a result, many RK3588 board designers do not bother adding a dedicated RDS resistor, instead relying on the RK3588's internal bias. The current devicetree, however, disables this bias (`pcfg_pull_none`), breaking HS400-mode writes for boards without a dedicated RDS, but with an eMMC chip that chooses to High-Z (instead of drive-low) the eMMC-DS line. (The Turing RK1 is one such board.) Fix this by changing the bias in the (common) emmc_data_strobe case to reflect the expected hardware/BSP behavior. This is unlikely to cause regressions elsewhere: the pull-down is only relevant for High-Z eMMCs, and if this is redundant with a (dedicated) RDS resistor, the effective result is only a lower resistance to ground -- where the range of tolerance is quite high. If it does, it's better fixed in the specific devicetrees. Fixes: d85f8a5c798d5 ("arm64: dts: rockchip: Add rk3588 pinctrl data") Signed-off-by: Sam Edwards Link: https://lore.kernel.org/r/20231205202900.4617-2-CFSworks@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi index 63151d9d2377..30db12c4fc82 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi @@ -369,7 +369,7 @@ emmc_data_strobe: emmc-data-strobe { rockchip,pins = /* emmc_data_strobe */ - <2 RK_PA2 1 &pcfg_pull_none>; + <2 RK_PA2 1 &pcfg_pull_down>; }; }; -- cgit From 7037d95a047cd89b1f680eed253c6ab586bef1ed Mon Sep 17 00:00:00 2001 From: Kelly Kane Date: Sat, 2 Dec 2023 17:17:12 -0800 Subject: r8152: add vendor/device ID pair for ASUS USB-C2500 The ASUS USB-C2500 is an RTL8156 based 2.5G Ethernet controller. Add the vendor and product ID values to the driver. This makes Ethernet work with the adapter. Signed-off-by: Kelly Kane Link: https://lore.kernel.org/r/20231203011712.6314-1-kelly@hawknetworks.com Signed-off-by: Paolo Abeni --- drivers/net/usb/r8152.c | 1 + include/linux/usb/r8152.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index fcdc9ba0f826..9bf2140fd0a1 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -10049,6 +10049,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff) }, { USB_DEVICE(VENDOR_ID_TPLINK, 0x0601) }, { USB_DEVICE(VENDOR_ID_DLINK, 0xb301) }, + { USB_DEVICE(VENDOR_ID_ASUS, 0x1976) }, {} }; diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h index 287e9d83fb8b..33a4c146dc19 100644 --- a/include/linux/usb/r8152.h +++ b/include/linux/usb/r8152.h @@ -30,6 +30,7 @@ #define VENDOR_ID_NVIDIA 0x0955 #define VENDOR_ID_TPLINK 0x2357 #define VENDOR_ID_DLINK 0x2001 +#define VENDOR_ID_ASUS 0x0b05 #if IS_REACHABLE(CONFIG_USB_RTL8152) extern u8 rtl8152_get_version(struct usb_interface *intf); -- cgit From e0f04e41e8eedd4e5a1275f2318df7e1841855f2 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 4 Dec 2023 09:32:33 +0100 Subject: drm/atomic-helpers: Invoke end_fb_access while owning plane state Invoke drm_plane_helper_funcs.end_fb_access before drm_atomic_helper_commit_hw_done(). The latter function hands over ownership of the plane state to the following commit, which might free it. Releasing resources in end_fb_access then operates on undefined state. This bug has been observed with non-blocking commits when they are being queued up quickly. Here is an example stack trace from the bug report. The plane state has been free'd already, so the pages for drm_gem_fb_vunmap() are gone. Unable to handle kernel paging request at virtual address 0000000100000049 [...] drm_gem_fb_vunmap+0x18/0x74 drm_gem_end_shadow_fb_access+0x1c/0x2c drm_atomic_helper_cleanup_planes+0x58/0xd8 drm_atomic_helper_commit_tail+0x90/0xa0 commit_tail+0x15c/0x188 commit_work+0x14/0x20 Fix this by running end_fb_access immediately after updating all planes in drm_atomic_helper_commit_planes(). The existing clean-up helper drm_atomic_helper_cleanup_planes() now only handles cleanup_fb. For aborted commits, roll back from drm_atomic_helper_prepare_planes() in the new helper drm_atomic_helper_unprepare_planes(). This case is different from regular cleanup, as we have to release the new state; regular cleanup releases the old state. The new helper also invokes cleanup_fb for all planes. The changes mostly involve DRM's atomic helpers. Only two drivers, i915 and nouveau, implement their own commit function. Update them to invoke drm_atomic_helper_unprepare_planes(). Drivers with custom commit_tail function do not require changes. v4: * fix documentation (kernel test robot) v3: * add drm_atomic_helper_unprepare_planes() for rolling back * use correct state for end_fb_access v2: * fix test in drm_atomic_helper_cleanup_planes() Reported-by: Alyssa Ross Closes: https://lore.kernel.org/dri-devel/87leazm0ya.fsf@alyssa.is/ Suggested-by: Daniel Vetter Fixes: 94d879eaf7fb ("drm/atomic-helper: Add {begin,end}_fb_access to plane helpers") Tested-by: Alyssa Ross Reviewed-by: Alyssa Ross Signed-off-by: Thomas Zimmermann Cc: # v6.2+ Link: https://patchwork.freedesktop.org/patch/msgid/20231204083247.22006-1-tzimmermann@suse.de --- drivers/gpu/drm/drm_atomic_helper.c | 78 ++++++++++++++++++---------- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/nouveau/dispnv50/disp.c | 2 +- include/drm/drm_atomic_helper.h | 2 + 4 files changed, 56 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 2444fc33dd7c..68ffcc0b00dc 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -2012,7 +2012,7 @@ int drm_atomic_helper_commit(struct drm_device *dev, return ret; drm_atomic_helper_async_commit(dev, state); - drm_atomic_helper_cleanup_planes(dev, state); + drm_atomic_helper_unprepare_planes(dev, state); return 0; } @@ -2072,7 +2072,7 @@ int drm_atomic_helper_commit(struct drm_device *dev, return 0; err: - drm_atomic_helper_cleanup_planes(dev, state); + drm_atomic_helper_unprepare_planes(dev, state); return ret; } EXPORT_SYMBOL(drm_atomic_helper_commit); @@ -2650,6 +2650,39 @@ fail_prepare_fb: } EXPORT_SYMBOL(drm_atomic_helper_prepare_planes); +/** + * drm_atomic_helper_unprepare_planes - release plane resources on aborts + * @dev: DRM device + * @state: atomic state object with old state structures + * + * This function cleans up plane state, specifically framebuffers, from the + * atomic state. It undoes the effects of drm_atomic_helper_prepare_planes() + * when aborting an atomic commit. For cleaning up after a successful commit + * use drm_atomic_helper_cleanup_planes(). + */ +void drm_atomic_helper_unprepare_planes(struct drm_device *dev, + struct drm_atomic_state *state) +{ + struct drm_plane *plane; + struct drm_plane_state *new_plane_state; + int i; + + for_each_new_plane_in_state(state, plane, new_plane_state, i) { + const struct drm_plane_helper_funcs *funcs = plane->helper_private; + + if (funcs->end_fb_access) + funcs->end_fb_access(plane, new_plane_state); + } + + for_each_new_plane_in_state(state, plane, new_plane_state, i) { + const struct drm_plane_helper_funcs *funcs = plane->helper_private; + + if (funcs->cleanup_fb) + funcs->cleanup_fb(plane, new_plane_state); + } +} +EXPORT_SYMBOL(drm_atomic_helper_unprepare_planes); + static bool plane_crtc_active(const struct drm_plane_state *state) { return state->crtc && state->crtc->state->active; @@ -2784,6 +2817,17 @@ void drm_atomic_helper_commit_planes(struct drm_device *dev, funcs->atomic_flush(crtc, old_state); } + + /* + * Signal end of framebuffer access here before hw_done. After hw_done, + * a later commit might have already released the plane state. + */ + for_each_old_plane_in_state(old_state, plane, old_plane_state, i) { + const struct drm_plane_helper_funcs *funcs = plane->helper_private; + + if (funcs->end_fb_access) + funcs->end_fb_access(plane, old_plane_state); + } } EXPORT_SYMBOL(drm_atomic_helper_commit_planes); @@ -2911,40 +2955,22 @@ EXPORT_SYMBOL(drm_atomic_helper_disable_planes_on_crtc); * configuration. Hence the old configuration must be perserved in @old_state to * be able to call this function. * - * This function must also be called on the new state when the atomic update - * fails at any point after calling drm_atomic_helper_prepare_planes(). + * This function may not be called on the new state when the atomic update + * fails at any point after calling drm_atomic_helper_prepare_planes(). Use + * drm_atomic_helper_unprepare_planes() in this case. */ void drm_atomic_helper_cleanup_planes(struct drm_device *dev, struct drm_atomic_state *old_state) { struct drm_plane *plane; - struct drm_plane_state *old_plane_state, *new_plane_state; + struct drm_plane_state *old_plane_state; int i; - for_each_oldnew_plane_in_state(old_state, plane, old_plane_state, new_plane_state, i) { + for_each_old_plane_in_state(old_state, plane, old_plane_state, i) { const struct drm_plane_helper_funcs *funcs = plane->helper_private; - if (funcs->end_fb_access) - funcs->end_fb_access(plane, new_plane_state); - } - - for_each_oldnew_plane_in_state(old_state, plane, old_plane_state, new_plane_state, i) { - const struct drm_plane_helper_funcs *funcs; - struct drm_plane_state *plane_state; - - /* - * This might be called before swapping when commit is aborted, - * in which case we have to cleanup the new state. - */ - if (old_plane_state == plane->state) - plane_state = new_plane_state; - else - plane_state = old_plane_state; - - funcs = plane->helper_private; - if (funcs->cleanup_fb) - funcs->cleanup_fb(plane, plane_state); + funcs->cleanup_fb(plane, old_plane_state); } } EXPORT_SYMBOL(drm_atomic_helper_cleanup_planes); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 28d85e1e858e..02f873738905 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7475,7 +7475,7 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state, for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) intel_color_cleanup_commit(new_crtc_state); - drm_atomic_helper_cleanup_planes(dev, &state->base); + drm_atomic_helper_unprepare_planes(dev, &state->base); intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref); return ret; } diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 7840b6428afb..118807e38422 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2474,7 +2474,7 @@ nv50_disp_atomic_commit(struct drm_device *dev, err_cleanup: if (ret) - drm_atomic_helper_cleanup_planes(dev, state); + drm_atomic_helper_unprepare_planes(dev, state); done: pm_runtime_put_autosuspend(dev->dev); return ret; diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h index 536a0b0091c3..006b5c977ad7 100644 --- a/include/drm/drm_atomic_helper.h +++ b/include/drm/drm_atomic_helper.h @@ -97,6 +97,8 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev, int drm_atomic_helper_prepare_planes(struct drm_device *dev, struct drm_atomic_state *state); +void drm_atomic_helper_unprepare_planes(struct drm_device *dev, + struct drm_atomic_state *state); #define DRM_PLANE_COMMIT_ACTIVE_ONLY BIT(0) #define DRM_PLANE_COMMIT_NO_DISABLE_AFTER_MODESET BIT(1) -- cgit From cbe860be36095e68e4e5561ab43610982fb429fd Mon Sep 17 00:00:00 2001 From: Daniil Maximov Date: Mon, 4 Dec 2023 11:58:10 +0300 Subject: net: atlantic: Fix NULL dereference of skb pointer in If is_ptp_ring == true in the loop of __aq_ring_xdp_clean function, then a timestamp is stored from a packet in a field of skb object, which is not allocated at the moment of the call (skb == NULL). Generalize aq_ptp_extract_ts and other affected functions so they don't work with struct sk_buff*, but with struct skb_shared_hwtstamps*. Found by Linux Verification Center (linuxtesting.org) with SVACE Fixes: 26efaef759a1 ("net: atlantic: Implement xdp data plane") Signed-off-by: Daniil Maximov Reviewed-by: Igor Russkikh Link: https://lore.kernel.org/r/20231204085810.1681386-1-daniil31415it@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/aquantia/atlantic/aq_ptp.c | 10 +++++----- drivers/net/ethernet/aquantia/atlantic/aq_ptp.h | 4 ++-- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 18 ++++++++++++------ 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c index 80b44043e6c5..28c9b6f1a54f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c @@ -553,17 +553,17 @@ void aq_ptp_tx_hwtstamp(struct aq_nic_s *aq_nic, u64 timestamp) /* aq_ptp_rx_hwtstamp - utility function which checks for RX time stamp * @adapter: pointer to adapter struct - * @skb: particular skb to send timestamp with + * @shhwtstamps: particular skb_shared_hwtstamps to save timestamp * * if the timestamp is valid, we convert it into the timecounter ns * value, then store that result into the hwtstamps structure which * is passed up the network stack */ -static void aq_ptp_rx_hwtstamp(struct aq_ptp_s *aq_ptp, struct sk_buff *skb, +static void aq_ptp_rx_hwtstamp(struct aq_ptp_s *aq_ptp, struct skb_shared_hwtstamps *shhwtstamps, u64 timestamp) { timestamp -= atomic_read(&aq_ptp->offset_ingress); - aq_ptp_convert_to_hwtstamp(aq_ptp, skb_hwtstamps(skb), timestamp); + aq_ptp_convert_to_hwtstamp(aq_ptp, shhwtstamps, timestamp); } void aq_ptp_hwtstamp_config_get(struct aq_ptp_s *aq_ptp, @@ -639,7 +639,7 @@ bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring) &aq_ptp->ptp_rx == ring || &aq_ptp->hwts_rx == ring; } -u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p, +u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct skb_shared_hwtstamps *shhwtstamps, u8 *p, unsigned int len) { struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp; @@ -648,7 +648,7 @@ u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p, p, len, ×tamp); if (ret > 0) - aq_ptp_rx_hwtstamp(aq_ptp, skb, timestamp); + aq_ptp_rx_hwtstamp(aq_ptp, shhwtstamps, timestamp); return ret; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h index 28ccb7ca2df9..210b723f2207 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h @@ -67,7 +67,7 @@ int aq_ptp_hwtstamp_config_set(struct aq_ptp_s *aq_ptp, /* Return either ring is belong to PTP or not*/ bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring); -u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p, +u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct skb_shared_hwtstamps *shhwtstamps, u8 *p, unsigned int len); struct ptp_clock *aq_ptp_get_ptp_clock(struct aq_ptp_s *aq_ptp); @@ -143,7 +143,7 @@ static inline bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring) } static inline u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, - struct sk_buff *skb, u8 *p, + struct skb_shared_hwtstamps *shhwtstamps, u8 *p, unsigned int len) { return 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 4de22eed099a..694daeaf3e61 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -647,7 +647,7 @@ static int __aq_ring_rx_clean(struct aq_ring_s *self, struct napi_struct *napi, } if (is_ptp_ring) buff->len -= - aq_ptp_extract_ts(self->aq_nic, skb, + aq_ptp_extract_ts(self->aq_nic, skb_hwtstamps(skb), aq_buf_vaddr(&buff->rxdata), buff->len); @@ -742,6 +742,8 @@ static int __aq_ring_xdp_clean(struct aq_ring_s *rx_ring, struct aq_ring_buff_s *buff = &rx_ring->buff_ring[rx_ring->sw_head]; bool is_ptp_ring = aq_ptp_ring(rx_ring->aq_nic, rx_ring); struct aq_ring_buff_s *buff_ = NULL; + u16 ptp_hwtstamp_len = 0; + struct skb_shared_hwtstamps shhwtstamps; struct sk_buff *skb = NULL; unsigned int next_ = 0U; struct xdp_buff xdp; @@ -810,11 +812,12 @@ static int __aq_ring_xdp_clean(struct aq_ring_s *rx_ring, hard_start = page_address(buff->rxdata.page) + buff->rxdata.pg_off - rx_ring->page_offset; - if (is_ptp_ring) - buff->len -= - aq_ptp_extract_ts(rx_ring->aq_nic, skb, - aq_buf_vaddr(&buff->rxdata), - buff->len); + if (is_ptp_ring) { + ptp_hwtstamp_len = aq_ptp_extract_ts(rx_ring->aq_nic, &shhwtstamps, + aq_buf_vaddr(&buff->rxdata), + buff->len); + buff->len -= ptp_hwtstamp_len; + } xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); xdp_prepare_buff(&xdp, hard_start, rx_ring->page_offset, @@ -834,6 +837,9 @@ static int __aq_ring_xdp_clean(struct aq_ring_s *rx_ring, if (IS_ERR(skb) || !skb) continue; + if (ptp_hwtstamp_len > 0) + *skb_hwtstamps(skb) = shhwtstamps; + if (buff->is_vlan) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), buff->vlan_rx_tag); -- cgit From 84757d0839451b20b11e993128f0a77393ca50c1 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Mon, 4 Dec 2023 22:32:31 +0800 Subject: net: hns: fix wrong head when modify the tx feature when sending packets Upon changing the tx feature, the hns driver will modify the maybe_stop_tx() and fill_desc() functions, if the modify happens during packet sending, will cause the hardware and software pointers do not match, and the port can not work anymore. This patch deletes the maybe_stop_tx() and fill_desc() functions modification when setting tx feature, and use the skb_is_gro() to determine which functions to use in the tx path. Fixes: 38f616da1c28 ("net:hns: Add support of ethtool TSO set option for Hip06 in HNS") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 53 ++++++++++++++++----------- drivers/net/ethernet/hisilicon/hns/hns_enet.h | 3 +- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 0900abf5c508..8a713eed4465 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -142,7 +142,8 @@ MODULE_DEVICE_TABLE(acpi, hns_enet_acpi_match); static void fill_desc(struct hnae_ring *ring, void *priv, int size, dma_addr_t dma, int frag_end, - int buf_num, enum hns_desc_type type, int mtu) + int buf_num, enum hns_desc_type type, int mtu, + bool is_gso) { struct hnae_desc *desc = &ring->desc[ring->next_to_use]; struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; @@ -275,6 +276,15 @@ static int hns_nic_maybe_stop_tso( return 0; } +static int hns_nic_maybe_stop_tx_v2(struct sk_buff **out_skb, int *bnum, + struct hnae_ring *ring) +{ + if (skb_is_gso(*out_skb)) + return hns_nic_maybe_stop_tso(out_skb, bnum, ring); + else + return hns_nic_maybe_stop_tx(out_skb, bnum, ring); +} + static void fill_tso_desc(struct hnae_ring *ring, void *priv, int size, dma_addr_t dma, int frag_end, int buf_num, enum hns_desc_type type, int mtu) @@ -300,6 +310,19 @@ static void fill_tso_desc(struct hnae_ring *ring, void *priv, mtu); } +static void fill_desc_v2(struct hnae_ring *ring, void *priv, + int size, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu, + bool is_gso) +{ + if (is_gso) + fill_tso_desc(ring, priv, size, dma, frag_end, buf_num, type, + mtu); + else + fill_v2_desc(ring, priv, size, dma, frag_end, buf_num, type, + mtu); +} + netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, struct sk_buff *skb, struct hns_nic_ring_data *ring_data) @@ -313,6 +336,7 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, int seg_num; dma_addr_t dma; int size, next_to_use; + bool is_gso; int i; switch (priv->ops.maybe_stop_tx(&skb, &buf_num, ring)) { @@ -339,8 +363,9 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, ring->stats.sw_err_cnt++; goto out_err_tx_ok; } + is_gso = skb_is_gso(skb); priv->ops.fill_desc(ring, skb, size, dma, seg_num == 1 ? 1 : 0, - buf_num, DESC_TYPE_SKB, ndev->mtu); + buf_num, DESC_TYPE_SKB, ndev->mtu, is_gso); /* fill the fragments */ for (i = 1; i < seg_num; i++) { @@ -354,7 +379,7 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, } priv->ops.fill_desc(ring, skb_frag_page(frag), size, dma, seg_num - 1 == i ? 1 : 0, buf_num, - DESC_TYPE_PAGE, ndev->mtu); + DESC_TYPE_PAGE, ndev->mtu, is_gso); } /*complete translate all packets*/ @@ -1776,15 +1801,6 @@ static int hns_nic_set_features(struct net_device *netdev, netdev_info(netdev, "enet v1 do not support tso!\n"); break; default: - if (features & (NETIF_F_TSO | NETIF_F_TSO6)) { - priv->ops.fill_desc = fill_tso_desc; - priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso; - /* The chip only support 7*4096 */ - netif_set_tso_max_size(netdev, 7 * 4096); - } else { - priv->ops.fill_desc = fill_v2_desc; - priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; - } break; } netdev->features = features; @@ -2159,16 +2175,9 @@ static void hns_nic_set_priv_ops(struct net_device *netdev) priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; } else { priv->ops.get_rxd_bnum = get_v2rx_desc_bnum; - if ((netdev->features & NETIF_F_TSO) || - (netdev->features & NETIF_F_TSO6)) { - priv->ops.fill_desc = fill_tso_desc; - priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso; - /* This chip only support 7*4096 */ - netif_set_tso_max_size(netdev, 7 * 4096); - } else { - priv->ops.fill_desc = fill_v2_desc; - priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; - } + priv->ops.fill_desc = fill_desc_v2; + priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx_v2; + netif_set_tso_max_size(netdev, 7 * 4096); /* enable tso when init * control tso on/off through TSE bit in bd */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h index ffa9d6573f54..3f3ee032f631 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h @@ -44,7 +44,8 @@ struct hns_nic_ring_data { struct hns_nic_ops { void (*fill_desc)(struct hnae_ring *ring, void *priv, int size, dma_addr_t dma, int frag_end, - int buf_num, enum hns_desc_type type, int mtu); + int buf_num, enum hns_desc_type type, int mtu, + bool is_gso); int (*maybe_stop_tx)(struct sk_buff **out_skb, int *bnum, struct hnae_ring *ring); void (*get_rxd_bnum)(u32 bnum_flag, int *out_bnum); -- cgit From f708aba40f9c1eeb9c7e93ed4863b5f85b09b288 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Mon, 4 Dec 2023 22:32:32 +0800 Subject: net: hns: fix fake link up on xge port If a xge port just connect with an optical module and no fiber, it may have a fake link up because there may be interference on the hardware. This patch adds an anti-shake to avoid the problem. And the time of anti-shake is base on tests. Fixes: b917078c1c10 ("net: hns: Add ACPI support to check SFP present") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 29 +++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 928d934cb21a..f75668c47935 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -66,6 +66,27 @@ static enum mac_mode hns_get_enet_interface(const struct hns_mac_cb *mac_cb) } } +static u32 hns_mac_link_anti_shake(struct mac_driver *mac_ctrl_drv) +{ +#define HNS_MAC_LINK_WAIT_TIME 5 +#define HNS_MAC_LINK_WAIT_CNT 40 + + u32 link_status = 0; + int i; + + if (!mac_ctrl_drv->get_link_status) + return link_status; + + for (i = 0; i < HNS_MAC_LINK_WAIT_CNT; i++) { + msleep(HNS_MAC_LINK_WAIT_TIME); + mac_ctrl_drv->get_link_status(mac_ctrl_drv, &link_status); + if (!link_status) + break; + } + + return link_status; +} + void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status) { struct mac_driver *mac_ctrl_drv; @@ -83,6 +104,14 @@ void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status) &sfp_prsnt); if (!ret) *link_status = *link_status && sfp_prsnt; + + /* for FIBER port, it may have a fake link up. + * when the link status changes from down to up, we need to do + * anti-shake. the anti-shake time is base on tests. + * only FIBER port need to do this. + */ + if (*link_status && !mac_cb->link) + *link_status = hns_mac_link_anti_shake(mac_ctrl_drv); } mac_cb->link = *link_status; -- cgit From 714589c2742209cc228991b115e48548fb8d89cf Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 4 Dec 2023 19:00:40 +0000 Subject: Documentation/tcp: Fix an obvious typo Yep, my VIM spellchecker is not good enough for typos like this one. Fixes: 7fe0e38bb669 ("Documentation/tcp: Add TCP-AO documentation") Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Reported-by: Markus Elfring Closes: https://lore.kernel.org/all/2745ab4e-acac-40d4-83bf-37f2600d0c3d@web.de/ Signed-off-by: Dmitry Safonov Signed-off-by: Paolo Abeni --- Documentation/networking/tcp_ao.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/tcp_ao.rst b/Documentation/networking/tcp_ao.rst index cfa5bf1cc542..8a58321acce7 100644 --- a/Documentation/networking/tcp_ao.rst +++ b/Documentation/networking/tcp_ao.rst @@ -99,7 +99,7 @@ also [6.1]:: when it is no longer considered permitted. Linux TCP-AO will try its best to prevent you from removing a key that's -being used, considering it a key management failure. But sine keeping +being used, considering it a key management failure. But since keeping an outdated key may become a security issue and as a peer may unintentionally prevent the removal of an old key by always setting it as RNextKeyID - a forced key removal mechanism is provided, where -- cgit From da7dfaa6d6f731c30eca6ffa808b83634d43e26f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 4 Dec 2023 19:00:41 +0000 Subject: net/tcp: Consistently align TCP-AO option in the header Currently functions that pre-calculate TCP header options length use unaligned TCP-AO header + MAC-length for skb reservation. And the functions that actually write TCP-AO options into skb do align the header. Nothing good can come out of this for ((maclen % 4) != 0). Provide tcp_ao_len_aligned() helper and use it everywhere for TCP header options space calculations. Fixes: 1e03d32bea8e ("net/tcp: Add TCP-AO sign to outgoing packets") Signed-off-by: Dmitry Safonov Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/tcp_ao.h | 6 ++++++ net/ipv4/tcp_ao.c | 4 ++-- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 6 +++--- net/ipv6/tcp_ipv6.c | 2 +- 6 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index b56be10838f0..647781080613 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -62,11 +62,17 @@ static inline int tcp_ao_maclen(const struct tcp_ao_key *key) return key->maclen; } +/* Use tcp_ao_len_aligned() for TCP header calculations */ static inline int tcp_ao_len(const struct tcp_ao_key *key) { return tcp_ao_maclen(key) + sizeof(struct tcp_ao_hdr); } +static inline int tcp_ao_len_aligned(const struct tcp_ao_key *key) +{ + return round_up(tcp_ao_len(key), 4); +} + static inline unsigned int tcp_ao_digest_size(struct tcp_ao_key *key) { return key->digest_size; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 7696417d0640..c8be1d526eac 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -1100,7 +1100,7 @@ void tcp_ao_connect_init(struct sock *sk) ao_info->current_key = key; if (!ao_info->rnext_key) ao_info->rnext_key = key; - tp->tcp_header_len += tcp_ao_len(key); + tp->tcp_header_len += tcp_ao_len_aligned(key); ao_info->lisn = htonl(tp->write_seq); ao_info->snd_sne = 0; @@ -1346,7 +1346,7 @@ static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key) syn_tcp_option_space -= TCPOLEN_MSS_ALIGNED; syn_tcp_option_space -= TCPOLEN_TSTAMP_ALIGNED; syn_tcp_option_space -= TCPOLEN_WSCALE_ALIGNED; - if (tcp_ao_len(key) > syn_tcp_option_space) { + if (tcp_ao_len_aligned(key) > syn_tcp_option_space) { err = -EMSGSIZE; goto err_kfree; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5f693bbd578d..0c50c5a32b84 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -690,7 +690,7 @@ static bool tcp_v4_ao_sign_reset(const struct sock *sk, struct sk_buff *skb, reply_options[0] = htonl((TCPOPT_AO << 24) | (tcp_ao_len(key) << 16) | (aoh->rnext_keyid << 8) | keyid); - arg->iov[0].iov_len += round_up(tcp_ao_len(key), 4); + arg->iov[0].iov_len += tcp_ao_len_aligned(key); reply->doff = arg->iov[0].iov_len / 4; if (tcp_ao_hash_hdr(AF_INET, (char *)&reply_options[1], @@ -978,7 +978,7 @@ static void tcp_v4_send_ack(const struct sock *sk, (tcp_ao_len(key->ao_key) << 16) | (key->ao_key->sndid << 8) | key->rcv_next); - arg.iov[0].iov_len += round_up(tcp_ao_len(key->ao_key), 4); + arg.iov[0].iov_len += tcp_ao_len_aligned(key->ao_key); rep.th.doff = arg.iov[0].iov_len / 4; tcp_ao_hash_hdr(AF_INET, (char *)&rep.opt[offset], diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a9807eeb311c..9e85f2a0bddd 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -615,7 +615,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, ao_key = treq->af_specific->ao_lookup(sk, req, tcp_rsk(req)->ao_keyid, -1); if (ao_key) - newtp->tcp_header_len += tcp_ao_len(ao_key); + newtp->tcp_header_len += tcp_ao_len_aligned(ao_key); #endif if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index eb13a55d660c..93eef1dbbc55 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -825,7 +825,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, timestamps = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps); if (tcp_key_is_ao(key)) { opts->options |= OPTION_AO; - remaining -= tcp_ao_len(key->ao_key); + remaining -= tcp_ao_len_aligned(key->ao_key); } } @@ -915,7 +915,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, ireq->tstamp_ok &= !ireq->sack_ok; } else if (tcp_key_is_ao(key)) { opts->options |= OPTION_AO; - remaining -= tcp_ao_len(key->ao_key); + remaining -= tcp_ao_len_aligned(key->ao_key); ireq->tstamp_ok &= !ireq->sack_ok; } @@ -982,7 +982,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb size += TCPOLEN_MD5SIG_ALIGNED; } else if (tcp_key_is_ao(key)) { opts->options |= OPTION_AO; - size += tcp_ao_len(key->ao_key); + size += tcp_ao_len_aligned(key->ao_key); } if (likely(tp->rx_opt.tstamp_ok)) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 937a02c2e534..8c6623496dd7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -881,7 +881,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 if (tcp_key_is_md5(key)) tot_len += TCPOLEN_MD5SIG_ALIGNED; if (tcp_key_is_ao(key)) - tot_len += tcp_ao_len(key->ao_key); + tot_len += tcp_ao_len_aligned(key->ao_key); #ifdef CONFIG_MPTCP if (rst && !tcp_key_is_md5(key)) { -- cgit From 965c00e4ea2e4df986ecd73c2fe9d3a00a2858db Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 4 Dec 2023 19:00:42 +0000 Subject: net/tcp: Limit TCP_AO_REPAIR to non-listen sockets Listen socket is not an established TCP connection, so setsockopt(TCP_AO_REPAIR) doesn't have any impact. Restrict this uAPI for listen sockets. Fixes: faadfaba5e01 ("net/tcp: Add TCP_AO_REPAIR") Signed-off-by: Dmitry Safonov Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/ipv4/tcp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c9f078224569..ff6838ca2e58 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3610,6 +3610,10 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, break; case TCP_AO_REPAIR: + if (!tcp_can_repair_sock(sk)) { + err = -EPERM; + break; + } err = tcp_ao_set_repair(sk, optval, optlen); break; #ifdef CONFIG_TCP_AO @@ -4309,6 +4313,8 @@ zerocopy_rcv_out: } #endif case TCP_AO_REPAIR: + if (!tcp_can_repair_sock(sk)) + return -EPERM; return tcp_ao_get_repair(sk, optval, optlen); case TCP_AO_GET_KEYS: case TCP_AO_INFO: { -- cgit From 12083d728213285c2d4347fa0ed3b556449703ce Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 4 Dec 2023 19:00:43 +0000 Subject: net/tcp: Don't add key with non-matching VRF on connected sockets If the connection was established, don't allow adding TCP-AO keys that don't match the peer. Currently, there are checks for ip-address matching, but L3 index check is missing. Add it to restrict userspace shooting itself somewhere. Yet, nothing restricts the CAP_NET_RAW user from trying to shoot themselves by performing setsockopt(SO_BINDTODEVICE) or setsockopt(SO_BINDTOIFINDEX) over an established TCP-AO connection. So, this is just "minimum effort" to potentially save someone's debugging time, rather than a full restriction on doing weird things. Fixes: 248411b8cb89 ("net/tcp: Wire up l3index to TCP-AO") Signed-off-by: Dmitry Safonov Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/ipv4/tcp_ao.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index c8be1d526eac..18dacfef7a07 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -1608,6 +1608,15 @@ static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, if (!dev || !l3index) return -EINVAL; + if (!bound_dev_if || bound_dev_if != cmd.ifindex) { + /* tcp_ao_established_key() doesn't expect having + * non peer-matching key on an established TCP-AO + * connection. + */ + if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) + return -EINVAL; + } + /* It's still possible to bind after adding keys or even * re-bind to a different dev (with CAP_NET_RAW). * So, no reason to return error here, rather try to be -- cgit From 9396c4ee93f9ac03cd0cea0bb345fbc657772943 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 4 Dec 2023 19:00:44 +0000 Subject: net/tcp: Don't store TCP-AO maclen on reqsk This extra check doesn't work for a handshake when SYN segment has (current_key.maclen != rnext_key.maclen). It could be amended to preserve rnext_key.maclen instead of current_key.maclen, but that requires a lookup on listen socket. Originally, this extra maclen check was introduced just because it was cheap. Drop it and convert tcp_request_sock::maclen into boolean tcp_request_sock::used_tcp_ao. Fixes: 06b22ef29591 ("net/tcp: Wire TCP-AO to request sockets") Signed-off-by: Dmitry Safonov Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/tcp.h | 8 ++------ net/ipv4/tcp_ao.c | 4 ++-- net/ipv4/tcp_input.c | 5 +++-- net/ipv4/tcp_output.c | 9 +++------ 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 68f3d315d2e1..b646b574b060 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -169,7 +169,7 @@ struct tcp_request_sock { #ifdef CONFIG_TCP_AO u8 ao_keyid; u8 ao_rcv_next; - u8 maclen; + bool used_tcp_ao; #endif }; @@ -180,14 +180,10 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) static inline bool tcp_rsk_used_ao(const struct request_sock *req) { - /* The real length of MAC is saved in the request socket, - * signing anything with zero-length makes no sense, so here is - * a little hack.. - */ #ifndef CONFIG_TCP_AO return false; #else - return tcp_rsk(req)->maclen != 0; + return tcp_rsk(req)->used_tcp_ao; #endif } diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 18dacfef7a07..f8308d3f565e 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -851,7 +851,7 @@ void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, const struct tcp_ao_hdr *aoh; struct tcp_ao_key *key; - treq->maclen = 0; + treq->used_tcp_ao = false; if (tcp_parse_auth_options(th, NULL, &aoh) || !aoh) return; @@ -863,7 +863,7 @@ void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, treq->ao_rcv_next = aoh->keyid; treq->ao_keyid = aoh->rnext_keyid; - treq->maclen = tcp_ao_maclen(key); + treq->used_tcp_ao = true; } static enum skb_drop_reason diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bcb55d98004c..337c8bb07ccc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7182,11 +7182,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) goto drop_and_release; /* Invalid TCP options */ if (aoh) { - tcp_rsk(req)->maclen = aoh->length - sizeof(struct tcp_ao_hdr); + tcp_rsk(req)->used_tcp_ao = true; tcp_rsk(req)->ao_rcv_next = aoh->keyid; tcp_rsk(req)->ao_keyid = aoh->rnext_keyid; + } else { - tcp_rsk(req)->maclen = 0; + tcp_rsk(req)->used_tcp_ao = false; } #endif tcp_rsk(req)->snt_isn = isn; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 93eef1dbbc55..f5ef15e1d9ac 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3720,7 +3720,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, if (tcp_rsk_used_ao(req)) { #ifdef CONFIG_TCP_AO struct tcp_ao_key *ao_key = NULL; - u8 maclen = tcp_rsk(req)->maclen; u8 keyid = tcp_rsk(req)->ao_keyid; ao_key = tcp_sk(sk)->af_specific->ao_lookup(sk, req_to_sk(req), @@ -3730,13 +3729,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, * for another peer-matching key, but the peer has requested * ao_keyid (RFC5925 RNextKeyID), so let's keep it simple here. */ - if (unlikely(!ao_key || tcp_ao_maclen(ao_key) != maclen)) { - u8 key_maclen = ao_key ? tcp_ao_maclen(ao_key) : 0; - + if (unlikely(!ao_key)) { rcu_read_unlock(); kfree_skb(skb); - net_warn_ratelimited("TCP-AO: the keyid %u with maclen %u|%u from SYN packet is not present - not sending SYNACK\n", - keyid, maclen, key_maclen); + net_warn_ratelimited("TCP-AO: the keyid %u from SYN packet is not present - not sending SYNACK\n", + keyid); return NULL; } key.ao_key = ao_key; -- cgit From 82180b1fae2432ee88b4a54cc6c376ba01e57b22 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Wed, 22 Nov 2023 15:35:54 -0800 Subject: Support rv32 ULEB128 test Use opcodes available to both rv32 and rv64 in uleb128 module linking test. Fixes: af71bc194916 ("riscv: Add tests for riscv module loading") Signed-off-by: Charlie Jenkins Reported-by: Randy Dunlap Closes: https://lore.kernel.org/lkml/1d7c71ee-5742-4df4-b8ef-a2aea0a624eb@infradead.org/ Tested-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/20231122-module_fixup-v2-1-dfb9565e9ea5@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/tests/module_test/test_uleb128.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/tests/module_test/test_uleb128.S b/arch/riscv/kernel/tests/module_test/test_uleb128.S index 90f22049d553..8515ed7cd8c1 100644 --- a/arch/riscv/kernel/tests/module_test/test_uleb128.S +++ b/arch/riscv/kernel/tests/module_test/test_uleb128.S @@ -6,13 +6,13 @@ .text .global test_uleb_basic test_uleb_basic: - ld a0, second + lw a0, second addi a0, a0, -127 ret .global test_uleb_large test_uleb_large: - ld a0, fourth + lw a0, fourth addi a0, a0, -0x07e8 ret @@ -22,10 +22,10 @@ first: second: .reloc second, R_RISCV_SET_ULEB128, second .reloc second, R_RISCV_SUB_ULEB128, first - .dword 0 + .word 0 third: .space 1000 fourth: .reloc fourth, R_RISCV_SET_ULEB128, fourth .reloc fourth, R_RISCV_SUB_ULEB128, third - .dword 0 + .word 0 -- cgit From dca6fa8644b89f54345e55501b1419316ba5cb29 Mon Sep 17 00:00:00 2001 From: Nithin Dabilpuram Date: Tue, 5 Dec 2023 13:34:30 +0530 Subject: octeontx2-af: Adjust Tx credits when MCS external bypass is disabled When MCS external bypass is disabled, MCS returns additional 2 credits(32B) for every packet Tx'ed on LMAC. To account for these extra credits, NIX_AF_TX_LINKX_NORM_CREDIT.CC_MCS_CNT needs to be configured as otherwise NIX Tx credits would overflow and will never be returned to idle state credit count causing issues with credit control and MTU change. This patch fixes the same by configuring CC_MCS_CNT at probe time for MCS enabled SoC's Fixes: bd69476e86fc ("octeontx2-af: cn10k: mcs: Install a default TCAM for normal traffic") Signed-off-by: Nithin Dabilpuram Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/mcs.c | 14 +++++++++++++- drivers/net/ethernet/marvell/octeontx2/af/mcs.h | 2 ++ drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 1 + drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 8 ++++++++ drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h | 1 + 5 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c index c43f19dfbd74..bd87507cf8ea 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c @@ -1219,6 +1219,17 @@ struct mcs *mcs_get_pdata(int mcs_id) return NULL; } +bool is_mcs_bypass(int mcs_id) +{ + struct mcs *mcs_dev; + + list_for_each_entry(mcs_dev, &mcs_list, mcs_list) { + if (mcs_dev->mcs_id == mcs_id) + return mcs_dev->bypass; + } + return true; +} + void mcs_set_port_cfg(struct mcs *mcs, struct mcs_port_cfg_set_req *req) { u64 val = 0; @@ -1436,7 +1447,7 @@ static int mcs_x2p_calibration(struct mcs *mcs) return err; } -static void mcs_set_external_bypass(struct mcs *mcs, u8 bypass) +static void mcs_set_external_bypass(struct mcs *mcs, bool bypass) { u64 val; @@ -1447,6 +1458,7 @@ static void mcs_set_external_bypass(struct mcs *mcs, u8 bypass) else val &= ~BIT_ULL(6); mcs_reg_write(mcs, MCSX_MIL_GLOBAL, val); + mcs->bypass = bypass; } static void mcs_global_cfg(struct mcs *mcs) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.h b/drivers/net/ethernet/marvell/octeontx2/af/mcs.h index 0f89dcb76465..f927cc61dfd2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.h @@ -149,6 +149,7 @@ struct mcs { u16 num_vec; void *rvu; u16 *tx_sa_active; + bool bypass; }; struct mcs_ops { @@ -206,6 +207,7 @@ void mcs_get_custom_tag_cfg(struct mcs *mcs, struct mcs_custom_tag_cfg_get_req * int mcs_alloc_ctrlpktrule(struct rsrc_bmap *rsrc, u16 *pf_map, u16 offset, u16 pcifunc); int mcs_free_ctrlpktrule(struct mcs *mcs, struct mcs_free_ctrl_pkt_rule_req *req); int mcs_ctrlpktrule_write(struct mcs *mcs, struct mcs_ctrl_pkt_rule_write_req *req); +bool is_mcs_bypass(int mcs_id); /* CN10K-B APIs */ void cn10kb_mcs_set_hw_capabilities(struct mcs *mcs); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index c4d999ef5ab4..cce2806aaa50 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -345,6 +345,7 @@ struct nix_hw { struct nix_txvlan txvlan; struct nix_ipolicer *ipolicer; u64 *tx_credits; + u8 cc_mcs_cnt; }; /* RVU block's capabilities or functionality, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index c112c71ff576..4227ebb4a758 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -12,6 +12,7 @@ #include "rvu_reg.h" #include "rvu.h" #include "npc.h" +#include "mcs.h" #include "cgx.h" #include "lmac_common.h" #include "rvu_npc_hash.h" @@ -4389,6 +4390,12 @@ static void nix_link_config(struct rvu *rvu, int blkaddr, SDP_HW_MAX_FRS << 16 | NIC_HW_MIN_FRS); } + /* Get MCS external bypass status for CN10K-B */ + if (mcs_get_blkcnt() == 1) { + /* Adjust for 2 credits when external bypass is disabled */ + nix_hw->cc_mcs_cnt = is_mcs_bypass(0) ? 0 : 2; + } + /* Set credits for Tx links assuming max packet length allowed. * This will be reconfigured based on MTU set for PF/VF. */ @@ -4412,6 +4419,7 @@ static void nix_link_config(struct rvu *rvu, int blkaddr, tx_credits = (lmac_fifo_len - lmac_max_frs) / 16; /* Enable credits and set credit pkt count to max allowed */ cfg = (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1); + cfg |= FIELD_PREP(NIX_AF_LINKX_MCS_CNT_MASK, nix_hw->cc_mcs_cnt); link = iter + slink; nix_hw->tx_credits[link] = tx_credits; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index b42e631e52d0..18c1c9f361cc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -437,6 +437,7 @@ #define NIX_AF_LINKX_BASE_MASK GENMASK_ULL(11, 0) #define NIX_AF_LINKX_RANGE_MASK GENMASK_ULL(19, 16) +#define NIX_AF_LINKX_MCS_CNT_MASK GENMASK_ULL(33, 32) /* SSO */ #define SSO_AF_CONST (0x1000) -- cgit From 9723b2cca1f0e980c53156b52ea73b93966b3c8a Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 5 Dec 2023 13:34:31 +0530 Subject: octeontx2-af: Fix mcs sa cam entries size On latest silicon versions SA cam entries increased to 256. This patch fixes the datatype of sa_entries in mcs_hw_info struct to u16 to hold 256 entries. Fixes: 080bbd19c9dd ("octeontx2-af: cn10k: mcs: Add mailboxes for port related operations") Signed-off-by: Geetha sowjanya Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 6845556581c3..5df42634ceb8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1945,7 +1945,7 @@ struct mcs_hw_info { u8 tcam_entries; /* RX/TX Tcam entries per mcs block */ u8 secy_entries; /* RX/TX SECY entries per mcs block */ u8 sc_entries; /* RX/TX SC CAM entries per mcs block */ - u8 sa_entries; /* PN table entries = SA entries */ + u16 sa_entries; /* PN table entries = SA entries */ u64 rsvd[16]; }; -- cgit From 3ba98a8c6f8ceb4e01a78f973d8d9017020bbd57 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 5 Dec 2023 13:34:32 +0530 Subject: octeontx2-af: Fix mcs stats register address This patch adds the miss mcs stats register for mcs supported platforms. Fixes: 9312150af8da ("octeontx2-af: cn10k: mcs: Support for stats collection") Signed-off-by: Geetha sowjanya Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/mcs.c | 4 +-- .../net/ethernet/marvell/octeontx2/af/mcs_reg.h | 31 +++++++++++++++++++--- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c index bd87507cf8ea..c1775bd01c2b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c @@ -117,7 +117,7 @@ void mcs_get_rx_secy_stats(struct mcs *mcs, struct mcs_secy_stats *stats, int id reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYTAGGEDCTLX(id); stats->pkt_tagged_ctl_cnt = mcs_reg_read(mcs, reg); - reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDORNOTAGX(id); + reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDX(id); stats->pkt_untaged_cnt = mcs_reg_read(mcs, reg); reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYCTLX(id); @@ -215,7 +215,7 @@ void mcs_get_sc_stats(struct mcs *mcs, struct mcs_sc_stats *stats, reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSCNOTVALIDX(id); stats->pkt_notvalid_cnt = mcs_reg_read(mcs, reg); - reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDOROKX(id); + reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDX(id); stats->pkt_unchecked_cnt = mcs_reg_read(mcs, reg); if (mcs->hw->mcs_blks > 1) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h index f3ab01fc363c..f4c6de89002c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h @@ -810,14 +810,37 @@ offset = 0x9d8ull; \ offset; }) +#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDX(a) ({ \ + u64 offset; \ + \ + offset = 0xee80ull; \ + if (mcs->hw->mcs_blks > 1) \ + offset = 0xe818ull; \ + offset += (a) * 0x8ull; \ + offset; }) + +#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDX(a) ({ \ + u64 offset; \ + \ + offset = 0xa680ull; \ + if (mcs->hw->mcs_blks > 1) \ + offset = 0xd018ull; \ + offset += (a) * 0x8ull; \ + offset; }) + +#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCLATEORDELAYEDX(a) ({ \ + u64 offset; \ + \ + offset = 0xf680ull; \ + if (mcs->hw->mcs_blks > 1) \ + offset = 0xe018ull; \ + offset += (a) * 0x8ull; \ + offset; }) + #define MCSX_CSE_RX_MEM_SLAVE_INOCTETSSCDECRYPTEDX(a) (0xe680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INOCTETSSCVALIDATEX(a) (0xde80ull + (a) * 0x8ull) -#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDORNOTAGX(a) (0xa680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYNOTAGX(a) (0xd218 + (a) * 0x8ull) -#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDX(a) (0xd018ull + (a) * 0x8ull) -#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDOROKX(a) (0xee80ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYCTLX(a) (0xb680ull + (a) * 0x8ull) -#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCLATEORDELAYEDX(a) (0xf680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSAINVALIDX(a) (0x12680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSANOTUSINGSAERRORX(a) (0x15680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSANOTVALIDX(a) (0x13680ull + (a) * 0x8ull) -- cgit From d431abd0a9aa27be379fb5f8304062071b0f5a7e Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 5 Dec 2023 13:34:33 +0530 Subject: octeontx2-af: Add missing mcs flr handler call If mcs resources are attached to PF/VF. These resources need to be freed on FLR. This patch add missing mcs flr call on PF FLR. Fixes: bd69476e86fc ("octeontx2-af: cn10k: mcs: Install a default TCAM for normal traffic") Signed-off-by: Geetha sowjanya Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 22c395c7d040..731bb82b577c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2631,6 +2631,9 @@ static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc) rvu_npc_free_mcam_entries(rvu, pcifunc, -1); rvu_mac_reset(rvu, pcifunc); + if (rvu->mcs_blk_cnt) + rvu_mcs_flr_handler(rvu, pcifunc); + mutex_unlock(&rvu->flr_lock); } -- cgit From 7336fc196748f82646b630d5a2e9d283e200b988 Mon Sep 17 00:00:00 2001 From: Rahul Bhansali Date: Tue, 5 Dec 2023 13:34:34 +0530 Subject: octeontx2-af: Update Tx link register range On new silicons the TX channels for transmit level has increased. This patch fixes the respective register offset range to configure the newly added channels. Fixes: b279bbb3314e ("octeontx2-af: NIX Tx scheduler queue config support") Signed-off-by: Rahul Bhansali Signed-off-by: Geetha sowjanya Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c index b3150f053291..d46ac29adb96 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c @@ -31,8 +31,8 @@ static struct hw_reg_map txsch_reg_map[NIX_TXSCH_LVL_CNT] = { {NIX_TXSCH_LVL_TL4, 3, 0xFFFF, {{0x0B00, 0x0B08}, {0x0B10, 0x0B18}, {0x1200, 0x12E0} } }, {NIX_TXSCH_LVL_TL3, 4, 0xFFFF, {{0x1000, 0x10E0}, {0x1600, 0x1608}, - {0x1610, 0x1618}, {0x1700, 0x17B0} } }, - {NIX_TXSCH_LVL_TL2, 2, 0xFFFF, {{0x0E00, 0x0EE0}, {0x1700, 0x17B0} } }, + {0x1610, 0x1618}, {0x1700, 0x17C8} } }, + {NIX_TXSCH_LVL_TL2, 2, 0xFFFF, {{0x0E00, 0x0EE0}, {0x1700, 0x17C8} } }, {NIX_TXSCH_LVL_TL1, 1, 0xFFFF, {{0x0C00, 0x0D98} } }, }; -- cgit From 777c0d761be7d981a2ae5494dfbc636311908dfb Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 1 Nov 2023 15:19:09 +0100 Subject: RISC-V: hwprobe: Always use u64 for extension bits Extensions are getting added quickly and their hwprobe bits will soon exceed 31 (which pair values accommodate, since they're of type u64). However, in one tree, where a bunch of extensions got merged prior to zicboz, zicboz already got pushed to bit 32. Pushing it exposed a 32-bit compilation bug, since unsigned long was used instead of u64. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310311801.hxduISrr-lkp@intel.com/ Fixes: 9c7646d5ffd2 ("RISC-V: hwprobe: Expose Zicboz extension and its block size") Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20231101141908.192198-2-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sys_riscv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index c712037dbe10..a2ca5b7756a5 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -169,7 +169,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, pair->value &= ~missing; } -static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) +static bool hwprobe_ext0_has(const struct cpumask *cpus, u64 ext) { struct riscv_hwprobe pair; -- cgit From c0a2755aced969e0125fd68ccd95269b28d8913a Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 29 Nov 2023 20:12:31 +0100 Subject: dt-bindings: interrupt-controller: Allow #power-domain-cells MPM provides a single genpd. Allow #power-domain-cells = <0>. Fixes: 54fc9851c0e0 ("dt-bindings: interrupt-controller: Add Qualcomm MPM support") Acked-by: Shawn Guo Acked-by: Krzysztof Kozlowski Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231129-topic-mpmbindingspd-v2-1-acbe909ceee1@linaro.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml index 509d20c091af..6a206111d4e0 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml @@ -62,6 +62,9 @@ properties: - description: MPM pin number - description: GIC SPI number for the MPM pin + '#power-domain-cells': + const: 0 + required: - compatible - reg @@ -93,4 +96,5 @@ examples: <86 183>, <90 260>, <91 260>; + #power-domain-cells = <0>; }; -- cgit From 22e0eb04837a63af111fae35a92f7577676b9bc8 Mon Sep 17 00:00:00 2001 From: Clément Léger Date: Fri, 3 Nov 2023 10:02:23 +0100 Subject: riscv: fix misaligned access handling of C.SWSP and C.SDSP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a backport of a fix that was done in OpenSBI: ec0559eb315b ("lib: sbi_misaligned_ldst: Fix handling of C.SWSP and C.SDSP"). Unlike C.LWSP/C.LDSP, these encodings can be used with the zero register, so checking that the rs2 field is non-zero is unnecessary. Additionally, the previous check was incorrect since it was checking the immediate field of the instruction instead of the rs2 field. Fixes: 956d705dd279 ("riscv: Unaligned load/store handling for M_MODE") Signed-off-by: Clément Léger Link: https://lore.kernel.org/r/20231103090223.702340-1-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps_misaligned.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 5eba37147caa..5255f8134aef 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -550,16 +550,14 @@ int handle_misaligned_store(struct pt_regs *regs) } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) { len = 8; val.data_ulong = GET_RS2S(insn, regs); - } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP && - ((insn >> SH_RD) & 0x1f)) { + } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP) { len = 8; val.data_ulong = GET_RS2C(insn, regs); #endif } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) { len = 4; val.data_ulong = GET_RS2S(insn, regs); - } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP && - ((insn >> SH_RD) & 0x1f)) { + } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP) { len = 4; val.data_ulong = GET_RS2C(insn, regs); } else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) { -- cgit From 33038efb64f7576bac635164021f5c984d4c755f Mon Sep 17 00:00:00 2001 From: Tim Bosse Date: Wed, 6 Dec 2023 09:26:29 -0500 Subject: ALSA: hda/realtek: add new Framework laptop to quirks The Framework Laptop 13 (AMD Ryzen 7040Series) has an ALC295 with a disconnected or faulty headset mic presence detect similar to the previous models. It works with the same quirk chain as 309d7363ca3d9fcdb92ff2d958be14d7e8707f68. This model has a VID:PID of f111:0006. Signed-off-by: Tim Bosse Cc: Link: https://lore.kernel.org/r/20231206142629.388615-1-flinn@timbos.se Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d799d0ad7623..51e1bfd7bdde 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10273,6 +10273,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 /* Below is a quirk table taken from the old code. -- cgit From 96ba4a47d147cf8c4b764ec3a66a088a5bb3033a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 22 Nov 2023 15:44:14 -0700 Subject: dt-bindings: perf: riscv,pmu: drop unneeded quotes Drop unneeded quotes over simple string values to fix a soon to be enabled yamllint warning: [error] string value is redundantly quoted with any quotes (quoted-strings) Signed-off-by: Rob Herring Reviewed-by: Krzysztof Kozlowski Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20231122224414.2809184-1-robh@kernel.org Signed-off-by: Palmer Dabbelt --- Documentation/devicetree/bindings/perf/riscv,pmu.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/perf/riscv,pmu.yaml b/Documentation/devicetree/bindings/perf/riscv,pmu.yaml index c8448de2f2a0..d01c677ad3c7 100644 --- a/Documentation/devicetree/bindings/perf/riscv,pmu.yaml +++ b/Documentation/devicetree/bindings/perf/riscv,pmu.yaml @@ -90,7 +90,7 @@ properties: bitmap of all MHPMCOUNTERx that can monitor the range of events dependencies: - "riscv,event-to-mhpmevent": [ "riscv,event-to-mhpmcounters" ] + riscv,event-to-mhpmevent: [ "riscv,event-to-mhpmcounters" ] required: - compatible -- cgit From f40cab8e18ed57d2c7b5213437d83d955f78097f Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 21 Nov 2023 13:19:29 -0800 Subject: riscv: Fix SMP when shadow call stacks are enabled This fixes two bugs in SCS initialization for secondary CPUs. First, the SCS was not initialized at all in the spinwait boot path. Second, the code for the SBI HSM path attempted to initialize the SCS before enabling the MMU. However, that involves dereferencing the thread pointer, which requires the MMU to be enabled. Fix both issues by setting up the SCS in the common secondary entry path, after enabling the MMU. Fixes: d1584d791a29 ("riscv: Implement Shadow Call Stack") Signed-off-by: Samuel Holland Reviewed-by: Sami Tolvanen Link: https://lore.kernel.org/r/20231121211958.3158576-1-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index b77397432403..76ace1e0b46f 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -154,7 +154,6 @@ secondary_start_sbi: XIP_FIXUP_OFFSET a3 add a3, a3, a1 REG_L sp, (a3) - scs_load_current .Lsecondary_start_common: @@ -165,6 +164,7 @@ secondary_start_sbi: call relocate_enable_mmu #endif call .Lsetup_trap_vector + scs_load_current tail smp_callin #endif /* CONFIG_SMP */ -- cgit From ed5b7cfd7839f9280a63365c1133482b42d0981f Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Thu, 30 Nov 2023 21:26:47 +0000 Subject: riscv: errata: andes: Probe for IOCP only once in boot stage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to probe for IOCP only once during boot stage, as we were probing for IOCP for all the stages this caused the below issue during module-init stage, [9.019104] Unable to handle kernel paging request at virtual address ffffffff8100d3a0 [9.027153] Oops [#1] [9.029421] Modules linked in: rcar_canfd renesas_usbhs i2c_riic can_dev spi_rspi i2c_core [9.037686] CPU: 0 PID: 90 Comm: udevd Not tainted 6.7.0-rc1+ #57 [9.043756] Hardware name: Renesas SMARC EVK based on r9a07g043f01 (DT) [9.050339] epc : riscv_noncoherent_supported+0x10/0x3e [9.055558]  ra : andes_errata_patch_func+0x4a/0x52 [9.060418] epc : ffffffff8000d8c2 ra : ffffffff8000d95c sp : ffffffc8003abb00 [9.067607]  gp : ffffffff814e25a0 tp : ffffffd80361e540 t0 : 0000000000000000 [9.074795]  t1 : 000000000900031e t2 : 0000000000000001 s0 : ffffffc8003abb20 [9.081984]  s1 : ffffffff015b57c7 a0 : 0000000000000000 a1 : 0000000000000001 [9.089172]  a2 : 0000000000000000 a3 : 0000000000000000 a4 : ffffffff8100d8be [9.096360]  a5 : 0000000000000001 a6 : 0000000000000001 a7 : 000000000900031e [9.103548]  s2 : ffffffff015b57d7 s3 : 0000000000000001 s4 : 000000000000031e [9.110736]  s5 : 8000000000008a45 s6 : 0000000000000500 s7 : 000000000000003f [9.117924]  s8 : ffffffc8003abd48 s9 : ffffffff015b1140 s10: ffffffff8151a1b0 [9.125113]  s11: ffffffff015b1000 t3 : 0000000000000001 t4 : fefefefefefefeff [9.132301]  t5 : ffffffff015b57c7 t6 : ffffffd8b63a6000 [9.137587] status: 0000000200000120 badaddr: ffffffff8100d3a0 cause: 000000000000000f [9.145468] [] riscv_noncoherent_supported+0x10/0x3e [9.151972] [] _apply_alternatives+0x84/0x86 [9.157784] [] apply_module_alternatives+0x10/0x1a [9.164113] [] module_finalize+0x5e/0x7a [9.169583] [] load_module+0xfd8/0x179c [9.174965] [] init_module_from_file+0x76/0xaa [9.180948] [] __riscv_sys_finit_module+0x176/0x2a8 [9.187365] [] do_trap_ecall_u+0xbe/0x130 [9.192922] [] ret_from_exception+0x0/0x64 [9.198573] Code: 0009 b7e9 6797 014d a783 85a7 c799 4785 0717 0100 (0123) aef7 [9.205994] ---[ end trace 0000000000000000 ]--- This is because we called riscv_noncoherent_supported() for all the stages during IOCP probe. riscv_noncoherent_supported() function sets noncoherent_supported variable to true which has an annotation set to "__ro_after_init" due to which we were seeing the above splat. Fix this by probing for IOCP only once in boot stage by having a boolean variable "done" which will be set to true upon IOCP probe in errata_probe_iocp() and we bail out early if "done" is set to true. While at it make return type of errata_probe_iocp() to void as we were not checking the return value in andes_errata_patch_func(). Fixes: e021ae7f5145 ("riscv: errata: Add Andes alternative ports") Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Reviewed-by: Yu Chien Peter Lin Link: https://lore.kernel.org/r/20231130212647.108746-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/andes/errata.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c index 197db68cc8da..17a904869724 100644 --- a/arch/riscv/errata/andes/errata.c +++ b/arch/riscv/errata/andes/errata.c @@ -38,29 +38,35 @@ static long ax45mp_iocp_sw_workaround(void) return ret.error ? 0 : ret.value; } -static bool errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid) +static void errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid) { + static bool done; + if (!IS_ENABLED(CONFIG_ERRATA_ANDES_CMO)) - return false; + return; + + if (done) + return; + + done = true; if (arch_id != ANDESTECH_AX45MP_MARCHID || impid != ANDESTECH_AX45MP_MIMPID) - return false; + return; if (!ax45mp_iocp_sw_workaround()) - return false; + return; /* Set this just to make core cbo code happy */ riscv_cbom_block_size = 1; riscv_noncoherent_supported(); - - return true; } void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, unsigned long archid, unsigned long impid, unsigned int stage) { - errata_probe_iocp(stage, archid, impid); + if (stage == RISCV_ALTERNATIVES_BOOT) + errata_probe_iocp(stage, archid, impid); /* we have nothing to patch here ATM so just return back */ } -- cgit From fe2b1226656afae56702d1d84c6900f6b67df297 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 1 Dec 2023 11:23:22 +0100 Subject: leds: trigger: netdev: fix RTNL handling to prevent potential deadlock When working on LED support for r8169 I got the following lockdep warning. Easiest way to prevent this scenario seems to be to take the RTNL lock before the trigger_data lock in set_device_name(). ====================================================== WARNING: possible circular locking dependency detected 6.7.0-rc2-next-20231124+ #2 Not tainted ------------------------------------------------------ bash/383 is trying to acquire lock: ffff888103aa1c68 (&trigger_data->lock){+.+.}-{3:3}, at: netdev_trig_notify+0xec/0x190 [ledtrig_netdev] but task is already holding lock: ffffffff8cddf808 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x12/0x20 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (rtnl_mutex){+.+.}-{3:3}: __mutex_lock+0x9b/0xb50 mutex_lock_nested+0x16/0x20 rtnl_lock+0x12/0x20 set_device_name+0xa9/0x120 [ledtrig_netdev] netdev_trig_activate+0x1a1/0x230 [ledtrig_netdev] led_trigger_set+0x172/0x2c0 led_trigger_write+0xf1/0x140 sysfs_kf_bin_write+0x5d/0x80 kernfs_fop_write_iter+0x15d/0x210 vfs_write+0x1f0/0x510 ksys_write+0x6c/0xf0 __x64_sys_write+0x14/0x20 do_syscall_64+0x3f/0xf0 entry_SYSCALL_64_after_hwframe+0x6c/0x74 -> #0 (&trigger_data->lock){+.+.}-{3:3}: __lock_acquire+0x1459/0x25a0 lock_acquire+0xc8/0x2d0 __mutex_lock+0x9b/0xb50 mutex_lock_nested+0x16/0x20 netdev_trig_notify+0xec/0x190 [ledtrig_netdev] call_netdevice_register_net_notifiers+0x5a/0x100 register_netdevice_notifier+0x85/0x120 netdev_trig_activate+0x1d4/0x230 [ledtrig_netdev] led_trigger_set+0x172/0x2c0 led_trigger_write+0xf1/0x140 sysfs_kf_bin_write+0x5d/0x80 kernfs_fop_write_iter+0x15d/0x210 vfs_write+0x1f0/0x510 ksys_write+0x6c/0xf0 __x64_sys_write+0x14/0x20 do_syscall_64+0x3f/0xf0 entry_SYSCALL_64_after_hwframe+0x6c/0x74 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(rtnl_mutex); lock(&trigger_data->lock); lock(rtnl_mutex); lock(&trigger_data->lock); *** DEADLOCK *** 8 locks held by bash/383: #0: ffff888103ff33f0 (sb_writers#3){.+.+}-{0:0}, at: ksys_write+0x6c/0xf0 #1: ffff888103aa1e88 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x114/0x210 #2: ffff8881036f1890 (kn->active#82){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x11d/0x210 #3: ffff888108e2c358 (&led_cdev->led_access){+.+.}-{3:3}, at: led_trigger_write+0x30/0x140 #4: ffffffff8cdd9e10 (triggers_list_lock){++++}-{3:3}, at: led_trigger_write+0x75/0x140 #5: ffff888108e2c270 (&led_cdev->trigger_lock){++++}-{3:3}, at: led_trigger_write+0xe3/0x140 #6: ffffffff8cdde3d0 (pernet_ops_rwsem){++++}-{3:3}, at: register_netdevice_notifier+0x1c/0x120 #7: ffffffff8cddf808 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x12/0x20 stack backtrace: CPU: 0 PID: 383 Comm: bash Not tainted 6.7.0-rc2-next-20231124+ #2 Hardware name: Default string Default string/Default string, BIOS ADLN.M6.SODIMM.ZB.CY.015 08/08/2023 Call Trace: dump_stack_lvl+0x5c/0xd0 dump_stack+0x10/0x20 print_circular_bug+0x2dd/0x410 check_noncircular+0x131/0x150 __lock_acquire+0x1459/0x25a0 lock_acquire+0xc8/0x2d0 ? netdev_trig_notify+0xec/0x190 [ledtrig_netdev] __mutex_lock+0x9b/0xb50 ? netdev_trig_notify+0xec/0x190 [ledtrig_netdev] ? __this_cpu_preempt_check+0x13/0x20 ? netdev_trig_notify+0xec/0x190 [ledtrig_netdev] ? __cancel_work_timer+0x11c/0x1b0 ? __mutex_lock+0x123/0xb50 mutex_lock_nested+0x16/0x20 ? mutex_lock_nested+0x16/0x20 netdev_trig_notify+0xec/0x190 [ledtrig_netdev] call_netdevice_register_net_notifiers+0x5a/0x100 register_netdevice_notifier+0x85/0x120 netdev_trig_activate+0x1d4/0x230 [ledtrig_netdev] led_trigger_set+0x172/0x2c0 ? preempt_count_add+0x49/0xc0 led_trigger_write+0xf1/0x140 sysfs_kf_bin_write+0x5d/0x80 kernfs_fop_write_iter+0x15d/0x210 vfs_write+0x1f0/0x510 ksys_write+0x6c/0xf0 __x64_sys_write+0x14/0x20 do_syscall_64+0x3f/0xf0 entry_SYSCALL_64_after_hwframe+0x6c/0x74 RIP: 0033:0x7f269055d034 Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d 35 c3 0d 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 48 89 54 24 18 48 RSP: 002b:00007ffddb7ef748 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000007 RCX: 00007f269055d034 RDX: 0000000000000007 RSI: 000055bf5f4af3c0 RDI: 0000000000000001 RBP: 000055bf5f4af3c0 R08: 0000000000000073 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000007 R13: 00007f26906325c0 R14: 00007f269062ff20 R15: 0000000000000000 Fixes: d5e01266e7f5 ("leds: trigger: netdev: add additional specific link speed mode") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Acked-by: Lee Jones Link: https://lore.kernel.org/r/fb5c8294-2a10-4bf5-8f10-3d2b77d2757e@gmail.com Signed-off-by: Jakub Kicinski --- drivers/leds/trigger/ledtrig-netdev.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c index e358e77e4b38..d76214fa9ad8 100644 --- a/drivers/leds/trigger/ledtrig-netdev.c +++ b/drivers/leds/trigger/ledtrig-netdev.c @@ -226,6 +226,11 @@ static int set_device_name(struct led_netdev_data *trigger_data, cancel_delayed_work_sync(&trigger_data->work); + /* + * Take RTNL lock before trigger_data lock to prevent potential + * deadlock with netdev notifier registration. + */ + rtnl_lock(); mutex_lock(&trigger_data->lock); if (trigger_data->net_dev) { @@ -245,16 +250,14 @@ static int set_device_name(struct led_netdev_data *trigger_data, trigger_data->carrier_link_up = false; trigger_data->link_speed = SPEED_UNKNOWN; trigger_data->duplex = DUPLEX_UNKNOWN; - if (trigger_data->net_dev != NULL) { - rtnl_lock(); + if (trigger_data->net_dev) get_device_state(trigger_data); - rtnl_unlock(); - } trigger_data->last_activity = 0; set_baseline_state(trigger_data); mutex_unlock(&trigger_data->lock); + rtnl_unlock(); return 0; } -- cgit From 94ea0ed356b45fa2d5d0c08d5495471bde29a1e5 Mon Sep 17 00:00:00 2001 From: Chester Lin Date: Thu, 16 Nov 2023 07:45:08 +0800 Subject: MAINTAINERS: change the S32G2 maintainer's email address. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I am leaving SUSE so the current email address will be disabled soon. will be my new address for handling emails, patches and pull requests from upstream and communities. Cc: Chester Lin Cc: NXP S32 Linux Team Cc: Arnd Bergmann Cc: Olof Johansson Cc: Andreas Färber Cc: Matthias Brugger Signed-off-by: Chester Lin Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20231115234508.11510-1-clin@suse.com Signed-off-by: Arnd Bergmann --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 012df8ccf34e..f78d2b96af01 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2535,7 +2535,7 @@ F: drivers/*/*/*wpcm* F: drivers/*/*wpcm* ARM/NXP S32G ARCHITECTURE -M: Chester Lin +M: Chester Lin R: Andreas Färber R: Matthias Brugger R: NXP S32 Linux Team -- cgit From 1834d62ae88500f37cba4439c3237aa85242272e Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Thu, 30 Nov 2023 15:23:23 +0800 Subject: netfilter: bpf: fix bad registration on nf_defrag We should pass a pointer to global_hook to the get_proto_defrag_hook() instead of its value, since the passed value won't be updated even if the request module was loaded successfully. Log: [ 54.915713] nf_defrag_ipv4 has bad registration [ 54.915779] WARNING: CPU: 3 PID: 6323 at net/netfilter/nf_bpf_link.c:62 get_proto_defrag_hook+0x137/0x160 [ 54.915835] CPU: 3 PID: 6323 Comm: fentry Kdump: loaded Tainted: G E 6.7.0-rc2+ #35 [ 54.915839] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 [ 54.915841] RIP: 0010:get_proto_defrag_hook+0x137/0x160 [ 54.915844] Code: 4f 8c e8 2c cf 68 ff 80 3d db 83 9a 01 00 0f 85 74 ff ff ff 48 89 ee 48 c7 c7 8f 12 4f 8c c6 05 c4 83 9a 01 01 e8 09 ee 5f ff <0f> 0b e9 57 ff ff ff 49 8b 3c 24 4c 63 e5 e8 36 28 6c ff 4c 89 e0 [ 54.915849] RSP: 0018:ffffb676003fbdb0 EFLAGS: 00010286 [ 54.915852] RAX: 0000000000000023 RBX: ffff9596503d5600 RCX: ffff95996fce08c8 [ 54.915854] RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffff95996fce08c0 [ 54.915855] RBP: ffffffff8c4f12de R08: 0000000000000000 R09: 00000000fffeffff [ 54.915859] R10: ffffb676003fbc70 R11: ffffffff8d363ae8 R12: 0000000000000000 [ 54.915861] R13: ffffffff8e1f75c0 R14: ffffb676003c9000 R15: 00007ffd15e78ef0 [ 54.915864] FS: 00007fb6e9cab740(0000) GS:ffff95996fcc0000(0000) knlGS:0000000000000000 [ 54.915867] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 54.915868] CR2: 00007ffd15e75c40 CR3: 0000000101e62006 CR4: 0000000000360ef0 [ 54.915870] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 54.915871] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 54.915873] Call Trace: [ 54.915891] [ 54.915894] ? __warn+0x84/0x140 [ 54.915905] ? get_proto_defrag_hook+0x137/0x160 [ 54.915908] ? __report_bug+0xea/0x100 [ 54.915925] ? report_bug+0x2b/0x80 [ 54.915928] ? handle_bug+0x3c/0x70 [ 54.915939] ? exc_invalid_op+0x18/0x70 [ 54.915942] ? asm_exc_invalid_op+0x1a/0x20 [ 54.915948] ? get_proto_defrag_hook+0x137/0x160 [ 54.915950] bpf_nf_link_attach+0x1eb/0x240 [ 54.915953] link_create+0x173/0x290 [ 54.915969] __sys_bpf+0x588/0x8f0 [ 54.915974] __x64_sys_bpf+0x20/0x30 [ 54.915977] do_syscall_64+0x45/0xf0 [ 54.915989] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ 54.915998] RIP: 0033:0x7fb6e9daa51d [ 54.916001] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 2b 89 0c 00 f7 d8 64 89 01 48 [ 54.916003] RSP: 002b:00007ffd15e78ed8 EFLAGS: 00000246 ORIG_RAX: 0000000000000141 [ 54.916006] RAX: ffffffffffffffda RBX: 00007ffd15e78fc0 RCX: 00007fb6e9daa51d [ 54.916007] RDX: 0000000000000040 RSI: 00007ffd15e78ef0 RDI: 000000000000001c [ 54.916009] RBP: 000000000000002d R08: 00007fb6e9e73a60 R09: 0000000000000001 [ 54.916010] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000006 [ 54.916012] R13: 0000000000000006 R14: 0000000000000000 R15: 0000000000000000 [ 54.916014] [ 54.916015] ---[ end trace 0000000000000000 ]--- Fixes: 91721c2d02d3 ("netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link") Signed-off-by: D. Wythe Acked-by: Daniel Xu Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_bpf_link.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index e502ec00b2fe..0e4beae421f8 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -31,7 +31,7 @@ struct bpf_nf_link { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) static const struct nf_defrag_hook * get_proto_defrag_hook(struct bpf_nf_link *link, - const struct nf_defrag_hook __rcu *global_hook, + const struct nf_defrag_hook __rcu **ptr_global_hook, const char *mod) { const struct nf_defrag_hook *hook; @@ -39,7 +39,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link, /* RCU protects us from races against module unloading */ rcu_read_lock(); - hook = rcu_dereference(global_hook); + hook = rcu_dereference(*ptr_global_hook); if (!hook) { rcu_read_unlock(); err = request_module(mod); @@ -47,7 +47,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link, return ERR_PTR(err < 0 ? err : -EINVAL); rcu_read_lock(); - hook = rcu_dereference(global_hook); + hook = rcu_dereference(*ptr_global_hook); } if (hook && try_module_get(hook->owner)) { @@ -78,7 +78,7 @@ static int bpf_nf_enable_defrag(struct bpf_nf_link *link) switch (link->hook_ops.pf) { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) case NFPROTO_IPV4: - hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4"); + hook = get_proto_defrag_hook(link, &nf_defrag_v4_hook, "nf_defrag_ipv4"); if (IS_ERR(hook)) return PTR_ERR(hook); @@ -87,7 +87,7 @@ static int bpf_nf_enable_defrag(struct bpf_nf_link *link) #endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) case NFPROTO_IPV6: - hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6"); + hook = get_proto_defrag_hook(link, &nf_defrag_v6_hook, "nf_defrag_ipv6"); if (IS_ERR(hook)) return PTR_ERR(hook); -- cgit From 317eb9685095678f2c9f5a8189de698c5354316a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Dec 2023 15:47:13 +0100 Subject: netfilter: nft_set_pipapo: skip inactive elements during set walk Otherwise set elements can be deactivated twice which will cause a crash. Reported-by: Xingyuan Mo Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 701977af3ee8..7252fcdae349 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -2043,6 +2043,9 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, e = f->mt[r].e; + if (!nft_set_elem_active(&e->ext, iter->genmask)) + goto cont; + iter->err = iter->fn(ctx, set, iter, &e->priv); if (iter->err < 0) goto out; -- cgit From 63331e37fb227e796894b31d713697612c8dee7f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 Dec 2023 12:29:54 +0100 Subject: netfilter: nf_tables: fix 'exist' matching on bigendian arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maze reports "tcp option fastopen exists" fails to match on OpenWrt 22.03.5, r20134-5f15225c1e (5.10.176) router. "tcp option fastopen exists" translates to: inet [ exthdr load tcpopt 1b @ 34 + 0 present => reg 1 ] [ cmp eq reg 1 0x00000001 ] .. but existing nft userspace generates a 1-byte compare. On LSB (x86), "*reg32 = 1" is identical to nft_reg_store8(reg32, 1), but not on MSB, which will place the 1 last. IOW, on bigendian aches the cmp8 is awalys false. Make sure we store this in a consistent fashion, so existing userspace will also work on MSB (bigendian). Regardless of this patch we can also change nft userspace to generate 'reg32 == 0' and 'reg32 != 0' instead of u8 == 0 // u8 == 1 when adding 'option x missing/exists' expressions as well. Fixes: 3c1fece8819e ("netfilter: nft_exthdr: Allow checking TCP option presence, too") Fixes: b9f9a485fb0e ("netfilter: nft_exthdr: add boolean DCCP option matching") Fixes: 055c4b34b94f ("netfilter: nft_fib: Support existence check") Reported-by: Maciej Żenczykowski Closes: https://lore.kernel.org/netfilter-devel/CAHo-OozyEqHUjL2-ntATzeZOiuftLWZ_HU6TOM_js4qLfDEAJg@mail.gmail.com/ Signed-off-by: Florian Westphal Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_exthdr.c | 4 ++-- net/netfilter/nft_fib.c | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 3fbaa7bf41f9..6eb571d0c3fd 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -214,7 +214,7 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr, offset = i + priv->offset; if (priv->flags & NFT_EXTHDR_F_PRESENT) { - *dest = 1; + nft_reg_store8(dest, 1); } else { if (priv->len % NFT_REG32_SIZE) dest[priv->len / NFT_REG32_SIZE] = 0; @@ -461,7 +461,7 @@ static void nft_exthdr_dccp_eval(const struct nft_expr *expr, type = bufp[0]; if (type == priv->type) { - *dest = 1; + nft_reg_store8(dest, 1); return; } diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 1bfe258018da..37cfe6dd712d 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -145,11 +145,15 @@ void nft_fib_store_result(void *reg, const struct nft_fib *priv, switch (priv->result) { case NFT_FIB_RESULT_OIF: index = dev ? dev->ifindex : 0; - *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index; + if (priv->flags & NFTA_FIB_F_PRESENT) + nft_reg_store8(dreg, !!index); + else + *dreg = index; + break; case NFT_FIB_RESULT_OIFNAME: if (priv->flags & NFTA_FIB_F_PRESENT) - *dreg = !!dev; + nft_reg_store8(dreg, !!dev); else strscpy_pad(reg, dev ? dev->name : "", IFNAMSIZ); break; -- cgit From 3701cd390fd731ee7ae8b8006246c8db82c72bea Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Dec 2023 14:25:33 +0100 Subject: netfilter: nf_tables: bail out on mismatching dynset and set expressions If dynset expressions provided by userspace is larger than the declared set expressions, then bail out. Fixes: 48b0ae046ee9 ("netfilter: nftables: netlink support for several set element expressions") Reported-by: Xingyuan Mo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_dynset.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index b18a79039125..c09dba57354c 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -280,10 +280,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx, priv->expr_array[i] = dynset_expr; priv->num_exprs++; - if (set->num_exprs && - dynset_expr->ops != set->exprs[i]->ops) { - err = -EOPNOTSUPP; - goto err_expr_free; + if (set->num_exprs) { + if (i >= set->num_exprs) { + err = -EINVAL; + goto err_expr_free; + } + if (dynset_expr->ops != set->exprs[i]->ops) { + err = -EOPNOTSUPP; + goto err_expr_free; + } } i++; } -- cgit From f6e1532a2697b81da00bfb184e99d15e01e9d98c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Dec 2023 14:51:48 +0100 Subject: netfilter: nf_tables: validate family when identifying table via handle Validate table family when looking up for it via NFTA_TABLE_HANDLE. Fixes: 3ecbfd65f50e ("netfilter: nf_tables: allocate handle and delete objects via handle") Reported-by: Xingyuan Mo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c0a42989b982..c5c17c6e80ed 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -803,7 +803,7 @@ static struct nft_table *nft_table_lookup(const struct net *net, static struct nft_table *nft_table_lookup_byhandle(const struct net *net, const struct nlattr *nla, - u8 genmask, u32 nlpid) + int family, u8 genmask, u32 nlpid) { struct nftables_pernet *nft_net; struct nft_table *table; @@ -811,6 +811,7 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net, nft_net = nft_pernet(net); list_for_each_entry(table, &nft_net->tables, list) { if (be64_to_cpu(nla_get_be64(nla)) == table->handle && + table->family == family && nft_active_genmask(table, genmask)) { if (nft_table_has_owner(table) && nlpid && table->nlpid != nlpid) @@ -1544,7 +1545,7 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_TABLE_HANDLE]) { attr = nla[NFTA_TABLE_HANDLE]; - table = nft_table_lookup_byhandle(net, attr, genmask, + table = nft_table_lookup_byhandle(net, attr, family, genmask, NETLINK_CB(skb).portid); } else { attr = nla[NFTA_TABLE_NAME]; -- cgit From 7ae836a3d630e146b732fe8ef7d86b243748751f Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 5 Dec 2023 21:58:12 +0100 Subject: netfilter: xt_owner: Fix for unsafe access of sk->sk_socket A concurrently running sock_orphan() may NULL the sk_socket pointer in between check and deref. Follow other users (like nft_meta.c for instance) and acquire sk_callback_lock before dereferencing sk_socket. Fixes: 0265ab44bacc ("[NETFILTER]: merge ipt_owner/ip6t_owner in xt_owner") Reported-by: Jann Horn Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_owner.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index e85ce69924ae..50332888c8d2 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -76,18 +76,23 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) */ return false; - filp = sk->sk_socket->file; - if (filp == NULL) + read_lock_bh(&sk->sk_callback_lock); + filp = sk->sk_socket ? sk->sk_socket->file : NULL; + if (filp == NULL) { + read_unlock_bh(&sk->sk_callback_lock); return ((info->match ^ info->invert) & (XT_OWNER_UID | XT_OWNER_GID)) == 0; + } if (info->match & XT_OWNER_UID) { kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); if ((uid_gte(filp->f_cred->fsuid, uid_min) && uid_lte(filp->f_cred->fsuid, uid_max)) ^ - !(info->invert & XT_OWNER_UID)) + !(info->invert & XT_OWNER_UID)) { + read_unlock_bh(&sk->sk_callback_lock); return false; + } } if (info->match & XT_OWNER_GID) { @@ -112,10 +117,13 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) } } - if (match ^ !(info->invert & XT_OWNER_GID)) + if (match ^ !(info->invert & XT_OWNER_GID)) { + read_unlock_bh(&sk->sk_callback_lock); return false; + } } + read_unlock_bh(&sk->sk_callback_lock); return true; } -- cgit From 823342524868168bf681f135d01b4ae10f5863ec Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 6 Dec 2023 16:37:37 +0000 Subject: Revert "cifs: reconnect work should have reference on server struct" This reverts commit 19a4b9d6c372cab6a3b2c9a061a236136fe95274. This earlier commit was making an assumption that each mod_delayed_work called for the reconnect work would result in smb2_reconnect_server being called twice. This assumption turns out to be untrue. So reverting this change for now. I will submit a follow-up patch to fix the actual problem in a different way. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/connect.c | 27 ++++++--------------------- fs/smb/client/smb2pdu.c | 23 ++++++++++------------- 2 files changed, 16 insertions(+), 34 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index f896f60c924b..e07975173cf4 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -402,13 +402,7 @@ static int __cifs_reconnect(struct TCP_Server_Info *server, spin_unlock(&server->srv_lock); cifs_swn_reset_server_dstaddr(server); cifs_server_unlock(server); - - /* increase ref count which reconnect work will drop */ - spin_lock(&cifs_tcp_ses_lock); - server->srv_count++; - spin_unlock(&cifs_tcp_ses_lock); - if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0)) - cifs_put_tcp_session(server, false); + mod_delayed_work(cifsiod_wq, &server->reconnect, 0); } } while (server->tcpStatus == CifsNeedReconnect); @@ -538,13 +532,7 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) spin_unlock(&server->srv_lock); cifs_swn_reset_server_dstaddr(server); cifs_server_unlock(server); - - /* increase ref count which reconnect work will drop */ - spin_lock(&cifs_tcp_ses_lock); - server->srv_count++; - spin_unlock(&cifs_tcp_ses_lock); - if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0)) - cifs_put_tcp_session(server, false); + mod_delayed_work(cifsiod_wq, &server->reconnect, 0); } while (server->tcpStatus == CifsNeedReconnect); mutex_lock(&server->refpath_lock); @@ -1626,19 +1614,16 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) cancel_delayed_work_sync(&server->echo); - if (from_reconnect) { + if (from_reconnect) /* * Avoid deadlock here: reconnect work calls * cifs_put_tcp_session() at its end. Need to be sure * that reconnect work does nothing with server pointer after * that step. */ - if (cancel_delayed_work(&server->reconnect)) - cifs_put_tcp_session(server, from_reconnect); - } else { - if (cancel_delayed_work_sync(&server->reconnect)) - cifs_put_tcp_session(server, from_reconnect); - } + cancel_delayed_work(&server->reconnect); + else + cancel_delayed_work_sync(&server->reconnect); spin_lock(&server->srv_lock); server->tcpStatus = CifsExiting; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 395e1230ddbc..9d7289f8d87b 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3952,6 +3952,12 @@ void smb2_reconnect_server(struct work_struct *work) } spin_unlock(&ses->chan_lock); } + /* + * Get the reference to server struct to be sure that the last call of + * cifs_put_tcon() in the loop below won't release the server pointer. + */ + if (tcon_exist || ses_exist) + server->srv_count++; spin_unlock(&cifs_tcp_ses_lock); @@ -3999,17 +4005,13 @@ void smb2_reconnect_server(struct work_struct *work) done: cifs_dbg(FYI, "Reconnecting tcons and channels finished\n"); - if (resched) { + if (resched) queue_delayed_work(cifsiod_wq, &server->reconnect, 2 * HZ); - mutex_unlock(&pserver->reconnect_mutex); - - /* no need to put tcp session as we're retrying */ - return; - } mutex_unlock(&pserver->reconnect_mutex); /* now we can safely release srv struct */ - cifs_put_tcp_session(server, true); + if (tcon_exist || ses_exist) + cifs_put_tcp_session(server, 1); } int @@ -4029,12 +4031,7 @@ SMB2_echo(struct TCP_Server_Info *server) server->ops->need_neg(server)) { spin_unlock(&server->srv_lock); /* No need to send echo on newly established connections */ - spin_lock(&cifs_tcp_ses_lock); - server->srv_count++; - spin_unlock(&cifs_tcp_ses_lock); - if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0)) - cifs_put_tcp_session(server, false); - + mod_delayed_work(cifsiod_wq, &server->reconnect, 0); return rc; } spin_unlock(&server->srv_lock); -- cgit From 04909192ada3285070f8ced0af7f07735478b364 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 6 Dec 2023 16:37:38 +0000 Subject: cifs: reconnect worker should take reference on server struct unconditionally Reconnect worker currently assumes that the server struct is alive and only takes reference on the server if it needs to call smb2_reconnect. With the new ability to disable channels based on whether the server has multichannel disabled, this becomes a problem when we need to disable established channels. While disabling the channels and deallocating the server, there could be reconnect work that could not be cancelled (because it started). This change forces the reconnect worker to unconditionally take a reference on the server when it runs. Also, this change now allows smb2_reconnect to know if it was called by the reconnect worker. Based on this, the cifs_put_tcp_session can decide whether it can cancel the reconnect work synchronously or not. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/connect.c | 8 ++++---- fs/smb/client/smb2pdu.c | 29 +++++++++++++++-------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index e07975173cf4..9dc6dc2754c2 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1608,10 +1608,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) list_del_init(&server->tcp_ses_list); spin_unlock(&cifs_tcp_ses_lock); - /* For secondary channels, we pick up ref-count on the primary server */ - if (SERVER_IS_CHAN(server)) - cifs_put_tcp_session(server->primary_server, from_reconnect); - cancel_delayed_work_sync(&server->echo); if (from_reconnect) @@ -1625,6 +1621,10 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) else cancel_delayed_work_sync(&server->reconnect); + /* For secondary channels, we pick up ref-count on the primary server */ + if (SERVER_IS_CHAN(server)) + cifs_put_tcp_session(server->primary_server, from_reconnect); + spin_lock(&server->srv_lock); server->tcpStatus = CifsExiting; spin_unlock(&server->srv_lock); diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 9d7289f8d87b..20634fc6d4f0 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -158,7 +158,7 @@ out: static int smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, - struct TCP_Server_Info *server) + struct TCP_Server_Info *server, bool from_reconnect) { int rc = 0; struct nls_table *nls_codepage = NULL; @@ -331,7 +331,7 @@ again: * as cifs_put_tcp_session takes a higher lock * i.e. cifs_tcp_ses_lock */ - cifs_put_tcp_session(server, 1); + cifs_put_tcp_session(server, from_reconnect); server->terminate = true; cifs_signal_cifsd_for_reconnect(server, false); @@ -499,7 +499,7 @@ static int smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, { int rc; - rc = smb2_reconnect(smb2_command, tcon, server); + rc = smb2_reconnect(smb2_command, tcon, server, false); if (rc) return rc; @@ -3895,6 +3895,15 @@ void smb2_reconnect_server(struct work_struct *work) int rc; bool resched = false; + /* first check if ref count has reached 0, if not inc ref count */ + spin_lock(&cifs_tcp_ses_lock); + if (!server->srv_count) { + spin_unlock(&cifs_tcp_ses_lock); + return; + } + server->srv_count++; + spin_unlock(&cifs_tcp_ses_lock); + /* If server is a channel, select the primary channel */ pserver = SERVER_IS_CHAN(server) ? server->primary_server : server; @@ -3952,17 +3961,10 @@ void smb2_reconnect_server(struct work_struct *work) } spin_unlock(&ses->chan_lock); } - /* - * Get the reference to server struct to be sure that the last call of - * cifs_put_tcon() in the loop below won't release the server pointer. - */ - if (tcon_exist || ses_exist) - server->srv_count++; - spin_unlock(&cifs_tcp_ses_lock); list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) { - rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server); + rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server, true); if (!rc) cifs_reopen_persistent_handles(tcon); else @@ -3995,7 +3997,7 @@ void smb2_reconnect_server(struct work_struct *work) /* now reconnect sessions for necessary channels */ list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) { tcon->ses = ses; - rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server); + rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server, true); if (rc) resched = true; list_del_init(&ses->rlist); @@ -4010,8 +4012,7 @@ done: mutex_unlock(&pserver->reconnect_mutex); /* now we can safely release srv struct */ - if (tcon_exist || ses_exist) - cifs_put_tcp_session(server, 1); + cifs_put_tcp_session(server, true); } int -- cgit From b2dd797543cfa6580eac8408dd67fa02164d9e56 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 6 Dec 2023 10:02:44 -0500 Subject: ring-buffer: Force absolute timestamp on discard of event There's a race where if an event is discarded from the ring buffer and an interrupt were to happen at that time and insert an event, the time stamp is still used from the discarded event as an offset. This can screw up the timings. If the event is going to be discarded, set the "before_stamp" to zero. When a new event comes in, it compares the "before_stamp" with the "write_stamp" and if they are not equal, it will insert an absolute timestamp. This will prevent the timings from getting out of sync due to the discarded event. Link: https://lore.kernel.org/linux-trace-kernel/20231206100244.5130f9b3@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: 6f6be606e763f ("ring-buffer: Force before_stamp and write_stamp to be different on discard") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 43cc47d7faaf..a6da2d765c78 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3030,22 +3030,19 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, local_read(&bpage->write) & ~RB_WRITE_MASK; unsigned long event_length = rb_event_length(event); + /* + * For the before_stamp to be different than the write_stamp + * to make sure that the next event adds an absolute + * value and does not rely on the saved write stamp, which + * is now going to be bogus. + */ + rb_time_set(&cpu_buffer->before_stamp, 0); + /* Something came in, can't discard */ if (!rb_time_cmpxchg(&cpu_buffer->write_stamp, write_stamp, write_stamp - delta)) return false; - /* - * It's possible that the event time delta is zero - * (has the same time stamp as the previous event) - * in which case write_stamp and before_stamp could - * be the same. In such a case, force before_stamp - * to be different than write_stamp. It doesn't - * matter what it is, as long as its different. - */ - if (!delta) - rb_time_set(&cpu_buffer->before_stamp, 0); - /* * If an event were to come in now, it would see that the * write_stamp and the before_stamp are different, and assume -- cgit From f458a1453424e03462b5bb539673c9a3cddda480 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 6 Dec 2023 10:00:50 -0500 Subject: ring-buffer: Test last update in 32bit version of __rb_time_read() Since 64 bit cmpxchg() is very expensive on 32bit architectures, the timestamp used by the ring buffer does some interesting tricks to be able to still have an atomic 64 bit number. It originally just used 60 bits and broke it up into two 32 bit words where the extra 2 bits were used for synchronization. But this was not enough for all use cases, and all 64 bits were required. The 32bit version of the ring buffer timestamp was then broken up into 3 32bit words using the same counter trick. But one update was not done. The check to see if the read operation was done without interruption only checked the first two words and not last one (like it had before this update). Fix it by making sure all three updates happen without interruption by comparing the initial counter with the last updated counter. Link: https://lore.kernel.org/linux-trace-kernel/20231206100050.3100b7bb@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: f03f2abce4f39 ("ring-buffer: Have 32 bit time stamps use all 64 bits") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a6da2d765c78..8d2a4f00eca9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -644,8 +644,8 @@ static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt) *cnt = rb_time_cnt(top); - /* If top and bottom counts don't match, this interrupted a write */ - if (*cnt != rb_time_cnt(bottom)) + /* If top and msb counts don't match, this interrupted a write */ + if (*cnt != rb_time_cnt(msb)) return false; /* The shift to msb will lose its cnt bits */ -- cgit From d6a57588666301acd9d42d3b00d74240964f07f6 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Fri, 1 Dec 2023 08:38:15 +0800 Subject: drm/amdgpu: disable MCBP by default Disable MCBP(mid command buffer preemption) by default as old Mesa hangs with it. We shall not enable the feature that breaks old usermode driver. Fixes: 50a7c8765ca6 ("drm/amdgpu: enable mcbp by default on gfx9") Signed-off-by: Jiadong Zhu Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5c0817cbc7c2..1cc1ae2743c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3791,10 +3791,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) adev->gfx.mcbp = true; else if (amdgpu_mcbp == 0) adev->gfx.mcbp = false; - else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) && - (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) && - adev->gfx.num_gfx_rings) - adev->gfx.mcbp = true; if (amdgpu_sriov_vf(adev)) adev->gfx.mcbp = true; -- cgit From f2d87a759f6841a132e845e2fafdad37385ddd30 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Dec 2023 17:42:13 +0800 Subject: md: fix missing flush of sync_work Commit ac619781967b ("md: use separate work_struct for md_start_sync()") use a new sync_work to replace del_work, however, stop_sync_thread() and __md_stop_writes() was trying to wait for sync_thread to be done, hence they should switch to use sync_work as well. Noted that md_start_sync() from sync_work will grab 'reconfig_mutex', hence other contex can't held the same lock to flush work, and this will be fixed in later patches. Fixes: ac619781967b ("md: use separate work_struct for md_start_sync()") Signed-off-by: Yu Kuai Acked-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231205094215.1824240-2-yukuai1@huaweicloud.com --- drivers/md/md.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index c94373d64f2c..5640a948086b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4857,7 +4857,7 @@ static void stop_sync_thread(struct mddev *mddev) return; } - if (work_pending(&mddev->del_work)) + if (work_pending(&mddev->sync_work)) flush_workqueue(md_misc_wq); set_bit(MD_RECOVERY_INTR, &mddev->recovery); @@ -6265,7 +6265,7 @@ static void md_clean(struct mddev *mddev) static void __md_stop_writes(struct mddev *mddev) { set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - if (work_pending(&mddev->del_work)) + if (work_pending(&mddev->sync_work)) flush_workqueue(md_misc_wq); if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); -- cgit From c9f7cb5b2bc968adcdc686c197ed108f47fd8eb0 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Dec 2023 17:42:14 +0800 Subject: md: don't leave 'MD_RECOVERY_FROZEN' in error path of md_set_readonly() If md_set_readonly() failed, the array could still be read-write, however 'MD_RECOVERY_FROZEN' could still be set, which leave the array in an abnormal state that sync or recovery can't continue anymore. Hence make sure the flag is cleared after md_set_readonly() returns. Fixes: 88724bfa68be ("md: wait for pending superblock updates before switching to read-only") Signed-off-by: Yu Kuai Acked-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231205094215.1824240-3-yukuai1@huaweicloud.com --- drivers/md/md.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 5640a948086b..2d8e45a1af23 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6355,6 +6355,9 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) int err = 0; int did_freeze = 0; + if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) + return -EBUSY; + if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) { did_freeze = 1; set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); @@ -6369,8 +6372,6 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) */ md_wakeup_thread_directly(mddev->sync_thread); - if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) - return -EBUSY; mddev_unlock(mddev); wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); @@ -6383,29 +6384,30 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) mddev->sync_thread || test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { pr_warn("md: %s still in use.\n",mdname(mddev)); - if (did_freeze) { - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - md_wakeup_thread(mddev->thread); - } err = -EBUSY; goto out; } + if (mddev->pers) { __md_stop_writes(mddev); - err = -ENXIO; - if (mddev->ro == MD_RDONLY) + if (mddev->ro == MD_RDONLY) { + err = -ENXIO; goto out; + } + mddev->ro = MD_RDONLY; set_disk_ro(mddev->gendisk, 1); + } + +out: + if ((mddev->pers && !err) || did_freeze) { clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); sysfs_notify_dirent_safe(mddev->sysfs_state); - err = 0; } -out: + mutex_unlock(&mddev->open_mutex); return err; } -- cgit From f52f5c71f3d4bb0992800139d2f35cf9f6f6e0ee Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Dec 2023 17:42:15 +0800 Subject: md: fix stopping sync thread Currently sync thread is stopped from multiple contex: - idle_sync_thread - frozen_sync_thread - __md_stop_writes - md_set_readonly - do_md_stop And there are some problems: 1) sync_work is flushed while reconfig_mutex is grabbed, this can deadlock because the work function will grab reconfig_mutex as well. 2) md_reap_sync_thread() can't be called directly while md_do_sync() is not finished yet, for example, commit 130443d60b1b ("md: refactor idle/frozen_sync_thread() to fix deadlock"). 3) If MD_RECOVERY_RUNNING is not set, there is no need to stop sync_thread at all because sync_thread must not be registered. Factor out a helper stop_sync_thread(), so that above contex will behave the same. Fix 1) by flushing sync_work after reconfig_mutex is released, before waiting for sync_thread to be done; Fix 2) bt letting daemon thread to unregister sync_thread; Fix 3) by always checking MD_RECOVERY_RUNNING first. Fixes: db5e653d7c9f ("md: delay choosing sync action to md_start_sync()") Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231205094215.1824240-4-yukuai1@huaweicloud.com --- drivers/md/md.c | 90 ++++++++++++++++++++++++--------------------------------- 1 file changed, 37 insertions(+), 53 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 2d8e45a1af23..bc9d67af1961 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4840,25 +4840,29 @@ action_show(struct mddev *mddev, char *page) return sprintf(page, "%s\n", type); } -static void stop_sync_thread(struct mddev *mddev) +/** + * stop_sync_thread() - wait for sync_thread to stop if it's running. + * @mddev: the array. + * @locked: if set, reconfig_mutex will still be held after this function + * return; if not set, reconfig_mutex will be released after this + * function return. + * @check_seq: if set, only wait for curent running sync_thread to stop, noted + * that new sync_thread can still start. + */ +static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq) { - if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) - return; + int sync_seq; - if (mddev_lock(mddev)) - return; + if (check_seq) + sync_seq = atomic_read(&mddev->sync_seq); - /* - * Check again in case MD_RECOVERY_RUNNING is cleared before lock is - * held. - */ if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { - mddev_unlock(mddev); + if (!locked) + mddev_unlock(mddev); return; } - if (work_pending(&mddev->sync_work)) - flush_workqueue(md_misc_wq); + mddev_unlock(mddev); set_bit(MD_RECOVERY_INTR, &mddev->recovery); /* @@ -4866,21 +4870,28 @@ static void stop_sync_thread(struct mddev *mddev) * never happen */ md_wakeup_thread_directly(mddev->sync_thread); + if (work_pending(&mddev->sync_work)) + flush_work(&mddev->sync_work); - mddev_unlock(mddev); + wait_event(resync_wait, + !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + (check_seq && sync_seq != atomic_read(&mddev->sync_seq))); + + if (locked) + mddev_lock_nointr(mddev); } static void idle_sync_thread(struct mddev *mddev) { - int sync_seq = atomic_read(&mddev->sync_seq); - mutex_lock(&mddev->sync_mutex); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - stop_sync_thread(mddev); - wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) || - !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); + if (mddev_lock(mddev)) { + mutex_unlock(&mddev->sync_mutex); + return; + } + stop_sync_thread(mddev, false, true); mutex_unlock(&mddev->sync_mutex); } @@ -4888,11 +4899,13 @@ static void frozen_sync_thread(struct mddev *mddev) { mutex_lock(&mddev->sync_mutex); set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - stop_sync_thread(mddev); - wait_event(resync_wait, mddev->sync_thread == NULL && - !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); + if (mddev_lock(mddev)) { + mutex_unlock(&mddev->sync_mutex); + return; + } + stop_sync_thread(mddev, false, false); mutex_unlock(&mddev->sync_mutex); } @@ -6264,14 +6277,7 @@ static void md_clean(struct mddev *mddev) static void __md_stop_writes(struct mddev *mddev) { - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - if (work_pending(&mddev->sync_work)) - flush_workqueue(md_misc_wq); - if (mddev->sync_thread) { - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - md_reap_sync_thread(mddev); - } - + stop_sync_thread(mddev, true, false); del_timer_sync(&mddev->safemode_timer); if (mddev->pers && mddev->pers->quiesce) { @@ -6363,18 +6369,8 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); md_wakeup_thread(mddev->thread); } - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - - /* - * Thread might be blocked waiting for metadata update which will now - * never happen - */ - md_wakeup_thread_directly(mddev->sync_thread); - mddev_unlock(mddev); - wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING, - &mddev->recovery)); + stop_sync_thread(mddev, false, false); wait_event(mddev->sb_wait, !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); mddev_lock_nointr(mddev); @@ -6428,20 +6424,8 @@ static int do_md_stop(struct mddev *mddev, int mode, set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); md_wakeup_thread(mddev->thread); } - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - /* - * Thread might be blocked waiting for metadata update which will now - * never happen - */ - md_wakeup_thread_directly(mddev->sync_thread); - - mddev_unlock(mddev); - wait_event(resync_wait, (mddev->sync_thread == NULL && - !test_bit(MD_RECOVERY_RUNNING, - &mddev->recovery))); - mddev_lock_nointr(mddev); + stop_sync_thread(mddev, true, false); mutex_lock(&mddev->open_mutex); if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || -- cgit From fec05adc40c25a028c9dfa9d540f800a2d433f80 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Fri, 1 Dec 2023 06:25:07 -0700 Subject: drm/amd/display: Use channel_width = 2 for vram table 3.0 VBIOS has suggested to use channel_width=2 for any ASIC that uses vram info 3.0. This is because channel_width in the vram table no longer represents the memory width Tested-by: Daniel Wheeler Reviewed-by: Samson Tam Acked-by: Rodrigo Siqueira Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 7cdb1a8a0ba0..6a96810a477e 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2386,7 +2386,13 @@ static enum bp_result get_vram_info_v30( return BP_RESULT_BADBIOSTABLE; info->num_chans = info_v30->channel_num; - info->dram_channel_width_bytes = (1 << info_v30->channel_width) / 8; + /* As suggested by VBIOS we should always use + * dram_channel_width_bytes = 2 when using VRAM + * table version 3.0. This is because the channel_width + * param in the VRAM info table is changed in 7000 series and + * no longer represents the memory channel width. + */ + info->dram_channel_width_bytes = 2; return result; } -- cgit From 3d71a8726e05a35beb9de394e86ce896d69e563f Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Fri, 1 Dec 2023 06:25:16 -0700 Subject: drm/amd/display: Add monitor patch for specific eDP [WHY] Some eDP panels's ext caps don't write initial value cause the value of dpcd_addr(0x317) is random. It means that sometimes the eDP will clarify it is OLED, miniLED...etc cause the backlight control interface is incorrect. [HOW] Add a new panel patch to remove sink ext caps(HDR,OLED...etc) Tested-by: Daniel Wheeler Reviewed-by: Sun peng Li Acked-by: Rodrigo Siqueira Signed-off-by: Ivan Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index c7a29bb737e2..aac98f93545a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -63,6 +63,12 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps) DRM_DEBUG_DRIVER("Disabling FAMS on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.disable_fams = true; break; + /* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */ + case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB): + case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B): + DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id); + edid_caps->panel_patch.remove_sink_ext_caps = true; + break; default: return; } -- cgit From 9f7cb03e3c32613fb5891e10ce3ff9169b09ba69 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Fri, 1 Dec 2023 06:25:40 -0700 Subject: drm/amd/display: Fix array-index-out-of-bounds in dml2 [Why] UBSAN errors observed in dmesg. array-index-out-of-bounds in dml2/display_mode_core.c [How] Fix the index. Tested-by: Daniel Wheeler Acked-by: Rodrigo Siqueira Signed-off-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 59718ee33e51..4d1336e5afc2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -9447,12 +9447,12 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc // Output CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark - CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j]; + CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[0]; CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0][0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[] - CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j]; + CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[0]; CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported - CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j]; + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[0]; CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( &mode_lib->scratch, -- cgit From 78825df90d427b26964bf9610eaac30542ee9e2d Mon Sep 17 00:00:00 2001 From: Li Ma Date: Tue, 14 Nov 2023 16:17:51 +0800 Subject: drm/amd/swsmu: update smu v14_0_0 driver if version and metrics table Increment the driver if version and add new mems to the mertics table. Signed-off-by: Li Ma Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 17 +++++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 10 +++ .../pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h | 77 +++++++++++++--------- .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c | 46 ++++++++++++- 4 files changed, 115 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 0d1209f2cf31..1c5049e894e3 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -1085,6 +1085,10 @@ struct gpu_metrics_v3_0 { uint16_t average_dram_reads; /* time filtered DRAM write bandwidth [MB/sec] */ uint16_t average_dram_writes; + /* time filtered IPU read bandwidth [MB/sec] */ + uint16_t average_ipu_reads; + /* time filtered IPU write bandwidth [MB/sec] */ + uint16_t average_ipu_writes; /* Driver attached timestamp (in ns) */ uint64_t system_clock_counter; @@ -1104,6 +1108,8 @@ struct gpu_metrics_v3_0 { uint32_t average_all_core_power; /* calculated core power [mW] */ uint16_t average_core_power[16]; + /* time filtered total system power [mW] */ + uint16_t average_sys_power; /* maximum IRM defined STAPM power limit [mW] */ uint16_t stapm_power_limit; /* time filtered STAPM power limit [mW] */ @@ -1116,6 +1122,8 @@ struct gpu_metrics_v3_0 { uint16_t average_ipuclk_frequency; uint16_t average_fclk_frequency; uint16_t average_vclk_frequency; + uint16_t average_uclk_frequency; + uint16_t average_mpipu_frequency; /* Current clocks */ /* target core frequency [MHz] */ @@ -1125,6 +1133,15 @@ struct gpu_metrics_v3_0 { /* GFXCLK frequency limit enforced on GFX [MHz] */ uint16_t current_gfx_maxfreq; + /* Throttle Residency (ASIC dependent) */ + uint32_t throttle_residency_prochot; + uint32_t throttle_residency_spl; + uint32_t throttle_residency_fppt; + uint32_t throttle_residency_sppt; + uint32_t throttle_residency_thm_core; + uint32_t throttle_residency_thm_gfx; + uint32_t throttle_residency_thm_soc; + /* Metrics table alpha filter time constant [us] */ uint32_t time_filter_alphavalue; }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 23fa71cafb14..f8b2e6cc2568 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1408,6 +1408,16 @@ typedef enum { METRICS_PCIE_WIDTH, METRICS_CURR_FANPWM, METRICS_CURR_SOCKETPOWER, + METRICS_AVERAGE_VPECLK, + METRICS_AVERAGE_IPUCLK, + METRICS_AVERAGE_MPIPUCLK, + METRICS_THROTTLER_RESIDENCY_PROCHOT, + METRICS_THROTTLER_RESIDENCY_SPL, + METRICS_THROTTLER_RESIDENCY_FPPT, + METRICS_THROTTLER_RESIDENCY_SPPT, + METRICS_THROTTLER_RESIDENCY_THM_CORE, + METRICS_THROTTLER_RESIDENCY_THM_GFX, + METRICS_THROTTLER_RESIDENCY_THM_SOC, } MetricsMember_t; enum smu_cmn2asic_mapping_type { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h index 22f88842a7fd..8f42771e1f0a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define PMFW_DRIVER_IF_VERSION 6 +#define PMFW_DRIVER_IF_VERSION 7 typedef struct { int32_t value; @@ -150,37 +150,50 @@ typedef struct { } DpmClocks_t; typedef struct { - uint16_t CoreFrequency[16]; //Target core frequency [MHz] - uint16_t CorePower[16]; //CAC calculated core power [mW] - uint16_t CoreTemperature[16]; //TSEN measured core temperature [centi-C] - uint16_t GfxTemperature; //TSEN measured GFX temperature [centi-C] - uint16_t SocTemperature; //TSEN measured SOC temperature [centi-C] - uint16_t StapmOpnLimit; //Maximum IRM defined STAPM power limit [mW] - uint16_t StapmCurrentLimit; //Time filtered STAPM power limit [mW] - uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced on classic cores [MHz] - uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit enforced on GFX [MHz] - uint16_t SkinTemp; //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C] - uint16_t GfxclkFrequency; //Time filtered target GFXCLK frequency [MHz] - uint16_t FclkFrequency; //Time filtered target FCLK frequency [MHz] - uint16_t GfxActivity; //Time filtered GFX busy % [0-100] - uint16_t SocclkFrequency; //Time filtered target SOCCLK frequency [MHz] - uint16_t VclkFrequency; //Time filtered target VCLK frequency [MHz] - uint16_t VcnActivity; //Time filtered VCN busy % [0-100] - uint16_t VpeclkFrequency; //Time filtered target VPECLK frequency [MHz] - uint16_t IpuclkFrequency; //Time filtered target IPUCLK frequency [MHz] - uint16_t IpuBusy[8]; //Time filtered IPU per-column busy % [0-100] - uint16_t DRAMReads; //Time filtered DRAM read bandwidth [MB/sec] - uint16_t DRAMWrites; //Time filtered DRAM write bandwidth [MB/sec] - uint16_t CoreC0Residency[16]; //Time filtered per-core C0 residency % [0-100] - uint16_t IpuPower; //Time filtered IPU power [mW] - uint32_t ApuPower; //Time filtered APU power [mW] - uint32_t GfxPower; //Time filtered GFX power [mW] - uint32_t dGpuPower; //Time filtered dGPU power [mW] - uint32_t SocketPower; //Time filtered power used for PPT/STAPM [APU+dGPU] [mW] - uint32_t AllCorePower; //Time filtered sum of core power across all cores in the socket [mW] - uint32_t FilterAlphaValue; //Metrics table alpha filter time constant [us] - uint32_t MetricsCounter; //Counter that is incremented on every metrics table update [PM_TIMER cycles] - uint32_t spare[16]; + uint16_t CoreFrequency[16]; //Target core frequency [MHz] + uint16_t CorePower[16]; //CAC calculated core power [mW] + uint16_t CoreTemperature[16]; //TSEN measured core temperature [centi-C] + uint16_t GfxTemperature; //TSEN measured GFX temperature [centi-C] + uint16_t SocTemperature; //TSEN measured SOC temperature [centi-C] + uint16_t StapmOpnLimit; //Maximum IRM defined STAPM power limit [mW] + uint16_t StapmCurrentLimit; //Time filtered STAPM power limit [mW] + uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced on classic cores [MHz] + uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit enforced on GFX [MHz] + uint16_t SkinTemp; //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C] + uint16_t GfxclkFrequency; //Time filtered target GFXCLK frequency [MHz] + uint16_t FclkFrequency; //Time filtered target FCLK frequency [MHz] + uint16_t GfxActivity; //Time filtered GFX busy % [0-100] + uint16_t SocclkFrequency; //Time filtered target SOCCLK frequency [MHz] + uint16_t VclkFrequency; //Time filtered target VCLK frequency [MHz] + uint16_t VcnActivity; //Time filtered VCN busy % [0-100] + uint16_t VpeclkFrequency; //Time filtered target VPECLK frequency [MHz] + uint16_t IpuclkFrequency; //Time filtered target IPUCLK frequency [MHz] + uint16_t IpuBusy[8]; //Time filtered IPU per-column busy % [0-100] + uint16_t DRAMReads; //Time filtered DRAM read bandwidth [MB/sec] + uint16_t DRAMWrites; //Time filtered DRAM write bandwidth [MB/sec] + uint16_t CoreC0Residency[16]; //Time filtered per-core C0 residency % [0-100] + uint16_t IpuPower; //Time filtered IPU power [mW] + uint32_t ApuPower; //Time filtered APU power [mW] + uint32_t GfxPower; //Time filtered GFX power [mW] + uint32_t dGpuPower; //Time filtered dGPU power [mW] + uint32_t SocketPower; //Time filtered power used for PPT/STAPM [APU+dGPU] [mW] + uint32_t AllCorePower; //Time filtered sum of core power across all cores in the socket [mW] + uint32_t FilterAlphaValue; //Metrics table alpha filter time constant [us] + uint32_t MetricsCounter; //Counter that is incremented on every metrics table update [PM_TIMER cycles] + uint16_t MemclkFrequency; //Time filtered target MEMCLK frequency [MHz] + uint16_t MpipuclkFrequency; //Time filtered target MPIPUCLK frequency [MHz] + uint16_t IpuReads; //Time filtered IPU read bandwidth [MB/sec] + uint16_t IpuWrites; //Time filtered IPU write bandwidth [MB/sec] + uint32_t ThrottleResidency_PROCHOT; //Counter that is incremented on every metrics table update when PROCHOT was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_SPL; //Counter that is incremented on every metrics table update when SPL was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_FPPT; //Counter that is incremented on every metrics table update when fast PPT was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_SPPT; //Counter that is incremented on every metrics table update when slow PPT was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_THM_CORE; //Counter that is incremented on every metrics table update when CORE thermal throttling was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_THM_GFX; //Counter that is incremented on every metrics table update when GFX thermal throttling was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_THM_SOC; //Counter that is incremented on every metrics table update when SOC thermal throttling was engaged [PM_TIMER cycles] + uint16_t Psys; //Time filtered Psys power [mW] + uint16_t spare1; + uint32_t spare[6]; } SmuMetrics_t; //ISP tile definitions diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index 03b38c3a9968..94ccdbfd7090 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -246,11 +246,20 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu, *value = 0; break; case METRICS_AVERAGE_UCLK: - *value = 0; + *value = metrics->MemclkFrequency; break; case METRICS_AVERAGE_FCLK: *value = metrics->FclkFrequency; break; + case METRICS_AVERAGE_VPECLK: + *value = metrics->VpeclkFrequency; + break; + case METRICS_AVERAGE_IPUCLK: + *value = metrics->IpuclkFrequency; + break; + case METRICS_AVERAGE_MPIPUCLK: + *value = metrics->MpipuclkFrequency; + break; case METRICS_AVERAGE_GFXACTIVITY: *value = metrics->GfxActivity / 100; break; @@ -270,8 +279,26 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu, *value = metrics->SocTemperature / 100 * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; - case METRICS_THROTTLER_STATUS: - *value = 0; + case METRICS_THROTTLER_RESIDENCY_PROCHOT: + *value = metrics->ThrottleResidency_PROCHOT; + break; + case METRICS_THROTTLER_RESIDENCY_SPL: + *value = metrics->ThrottleResidency_SPL; + break; + case METRICS_THROTTLER_RESIDENCY_FPPT: + *value = metrics->ThrottleResidency_FPPT; + break; + case METRICS_THROTTLER_RESIDENCY_SPPT: + *value = metrics->ThrottleResidency_SPPT; + break; + case METRICS_THROTTLER_RESIDENCY_THM_CORE: + *value = metrics->ThrottleResidency_THM_CORE; + break; + case METRICS_THROTTLER_RESIDENCY_THM_GFX: + *value = metrics->ThrottleResidency_THM_GFX; + break; + case METRICS_THROTTLER_RESIDENCY_THM_SOC: + *value = metrics->ThrottleResidency_THM_SOC; break; case METRICS_VOLTAGE_VDDGFX: *value = 0; @@ -498,6 +525,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, sizeof(uint16_t) * 16); gpu_metrics->average_dram_reads = metrics.DRAMReads; gpu_metrics->average_dram_writes = metrics.DRAMWrites; + gpu_metrics->average_ipu_reads = metrics.IpuReads; + gpu_metrics->average_ipu_writes = metrics.IpuWrites; gpu_metrics->average_socket_power = metrics.SocketPower; gpu_metrics->average_ipu_power = metrics.IpuPower; @@ -505,6 +534,7 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_gfx_power = metrics.GfxPower; gpu_metrics->average_dgpu_power = metrics.dGpuPower; gpu_metrics->average_all_core_power = metrics.AllCorePower; + gpu_metrics->average_sys_power = metrics.Psys; memcpy(&gpu_metrics->average_core_power[0], &metrics.CorePower[0], sizeof(uint16_t) * 16); @@ -515,6 +545,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_fclk_frequency = metrics.FclkFrequency; gpu_metrics->average_vclk_frequency = metrics.VclkFrequency; gpu_metrics->average_ipuclk_frequency = metrics.IpuclkFrequency; + gpu_metrics->average_uclk_frequency = metrics.MemclkFrequency; + gpu_metrics->average_mpipu_frequency = metrics.MpipuclkFrequency; memcpy(&gpu_metrics->current_coreclk[0], &metrics.CoreFrequency[0], @@ -522,6 +554,14 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, gpu_metrics->current_core_maxfreq = metrics.InfrastructureCpuMaxFreq; gpu_metrics->current_gfx_maxfreq = metrics.InfrastructureGfxMaxFreq; + gpu_metrics->throttle_residency_prochot = metrics.ThrottleResidency_PROCHOT; + gpu_metrics->throttle_residency_spl = metrics.ThrottleResidency_SPL; + gpu_metrics->throttle_residency_fppt = metrics.ThrottleResidency_FPPT; + gpu_metrics->throttle_residency_sppt = metrics.ThrottleResidency_SPPT; + gpu_metrics->throttle_residency_thm_core = metrics.ThrottleResidency_THM_CORE; + gpu_metrics->throttle_residency_thm_gfx = metrics.ThrottleResidency_THM_GFX; + gpu_metrics->throttle_residency_thm_soc = metrics.ThrottleResidency_THM_SOC; + gpu_metrics->time_filter_alphavalue = metrics.FilterAlphaValue; gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); -- cgit From 37c57631c18661c4c0dc415e75afd143ed89e098 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 4 Dec 2023 10:17:57 +0800 Subject: drm/amd/pm: support new mca smu error code decoding support new mca smu error code decoding from smu 85.86.0 for smu v13.0.6 Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 2 ++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index 2b488fcf2f95..e51e8918e667 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -46,6 +46,8 @@ #define MCA_REG__STATUS__ERRORCODEEXT(x) MCA_REG_FIELD(x, 21, 16) #define MCA_REG__STATUS__ERRORCODE(x) MCA_REG_FIELD(x, 15, 0) +#define MCA_REG__SYND__ERRORINFORMATION(x) MCA_REG_FIELD(x, 17, 0) + enum amdgpu_mca_ip { AMDGPU_MCA_IP_UNKNOW = -1, AMDGPU_MCA_IP_PSP = 0, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 0e5a77c3c2e2..900a2d9e6d85 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2593,13 +2593,20 @@ static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) { + struct smu_context *smu = adev->powerplay.pp_handle; uint32_t errcode, instlo; instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); if (instlo != 0x03b30400) return false; - errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); + if (!(adev->flags & AMD_IS_APU) && smu->smc_fw_version >= 0x00555600) { + errcode = MCA_REG__SYND__ERRORINFORMATION(entry->regs[MCA_REG_IDX_SYND]); + errcode &= 0xff; + } else { + errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); + } + return mca_smu_check_error_code(adev, mca_ras, errcode); } -- cgit From 0e8af20517197934cc04f8e361c6bbe198c327fd Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 20 Nov 2023 10:43:02 +0800 Subject: drm/amdgpu: Update fw version for boot time error query Boot time error query is not available until fw a10109 Signed-off-by: Hawking Zhang Reviewed-by: Stanley Yang Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 3cf4684d0d3f..5f46877f78cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -821,7 +821,7 @@ static int psp_v13_0_query_boot_status(struct psp_context *psp) if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) return 0; - if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10007) + if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10109) return 0; for_each_inst(i, inst_mask) { -- cgit From dbf3850d12baf3ba8a80c302f538d1b01940aef7 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 4 Dec 2023 10:44:32 +0800 Subject: drm/amdgpu: optimize the printing order of error data sort error data list to optimize the printing order. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a3dc68e98910..63fb4cd85e53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_ras.h" @@ -3665,6 +3666,21 @@ static struct ras_err_node *amdgpu_ras_error_node_new(void) return err_node; } +static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b) +{ + struct ras_err_node *nodea = container_of(a, struct ras_err_node, node); + struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node); + struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info; + struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info; + + if (unlikely(infoa->socket_id != infob->socket_id)) + return infoa->socket_id - infob->socket_id; + else + return infoa->die_id - infob->die_id; + + return 0; +} + static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data, struct amdgpu_smuio_mcm_config_info *mcm_info) { @@ -3682,6 +3698,7 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d err_data->err_list_count++; list_add_tail(&err_node->node, &err_data->err_node_list); + list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp); return &err_node->err_info; } -- cgit From 5b750b22530fe53bf7fd6a30baacd53ada26911b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 30 Nov 2023 17:34:07 -0500 Subject: drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml Does the same thing as: commit 6740ec97bcdb ("drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2") Reviewed-by: Harry Wentland Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311302107.hUDXVyWT-lkp@intel.com/ Fixes: 67e38874b85b ("drm/amd/display: Increase num voltage states to 40") Signed-off-by: Alex Deucher Cc: Alvin Lee Cc: Hamza Mahfooz Cc: Samson Tam Cc: Harry Wentland --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index ea7d60f9a9b4..6042a5a6a44f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -61,8 +61,12 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) +ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) +frame_warn_flag := -Wframe-larger-than=3072 +else frame_warn_flag := -Wframe-larger-than=2048 endif +endif CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) -- cgit From 6fce23a4d8c5f93bf80b7f122449fbb97f1e40dd Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 29 Nov 2023 18:06:55 +0530 Subject: drm/amdgpu: Restrict extended wait to PSP v13.0.6 Only PSPv13.0.6 SOCs take a longer time to reach steady state. Other PSPv13 based SOCs don't need extended wait. Also, reduce PSPv13.0.6 wait time. Cc: stable@vger.kernel.org Fixes: fc5988907156 ("drm/amdgpu: update retry times for psp vmbx wait") Fixes: d8c1925ba8cd ("drm/amdgpu: update retry times for psp BL wait") Link: https://lore.kernel.org/amd-gfx/34dd4c66-f7bf-44aa-af8f-c82889dd652c@amd.com/ Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 5f46877f78cf..df1844d0800f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -60,7 +60,7 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin"); #define GFX_CMD_USB_PD_USE_LFB 0x480 /* Retry times for vmbx ready wait */ -#define PSP_VMBX_POLLING_LIMIT 20000 +#define PSP_VMBX_POLLING_LIMIT 3000 /* VBIOS gfl defines */ #define MBOX_READY_MASK 0x80000000 @@ -161,14 +161,18 @@ static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp) static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; - int retry_loop, ret; + int retry_loop, retry_cnt, ret; + retry_cnt = + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) ? + PSP_VMBX_POLLING_LIMIT : + 10; /* Wait for bootloader to signify that it is ready having bit 31 of * C2PMSG_35 set to 1. All other bits are expected to be cleared. * If there is an error in processing command, bits[7:0] will be set. * This is applicable for PSP v13.0.6 and newer. */ - for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) { + for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) { ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), 0x80000000, 0xffffffff, false); -- cgit From 81577503efb49f4ad76af22f9941d72900ef4aab Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 29 Nov 2023 12:37:34 +0530 Subject: drm/amdgpu: Add NULL checks for function pointers Check if function is implemented before making the call. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index c82776e5e9aa..edd6344d1e2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1423,9 +1423,11 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - adev->nbio.funcs->get_clockgating_state(adev, flags); + if (adev->nbio.funcs && adev->nbio.funcs->get_clockgating_state) + adev->nbio.funcs->get_clockgating_state(adev, flags); - adev->hdp.funcs->get_clock_gating_state(adev, flags); + if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state) + adev->hdp.funcs->get_clock_gating_state(adev, flags); if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) { /* AMD_CG_SUPPORT_DRM_MGCG */ @@ -1440,9 +1442,11 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) } /* AMD_CG_SUPPORT_ROM_MGCG */ - adev->smuio.funcs->get_clock_gating_state(adev, flags); + if (adev->smuio.funcs && adev->smuio.funcs->get_clock_gating_state) + adev->smuio.funcs->get_clock_gating_state(adev, flags); - adev->df.funcs->get_clockgating_state(adev, flags); + if (adev->df.funcs && adev->df.funcs->get_clockgating_state) + adev->df.funcs->get_clockgating_state(adev, flags); } static int soc15_common_set_powergating_state(void *handle, -- cgit From 555e39f0270b1a1c51224044be9922b4c3a4c27f Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 30 Nov 2023 15:58:21 +0530 Subject: drm/amdgpu: Update HDP 4.4.2 clock gating flags HDP 4.4.2 clockgating is enabled by default, update the flags accordingly. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 49e934975719..4db6bb73ead4 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -129,6 +129,11 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev, { int data; + if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2)) { + /* Default enabled */ + *flags |= AMD_CG_SUPPORT_HDP_MGCG; + return; + } /* AMD_CG_SUPPORT_HDP_LS */ data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK) -- cgit From 27b024a88acba17c8e3a71ff4fd425064851e3b7 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 30 Nov 2023 16:35:58 +0530 Subject: drm/amdgpu: Avoid querying DRM MGCG status MP0 v13.0.6 SOCs don't support DRM MGCG. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index edd6344d1e2b..51342809af03 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1429,7 +1429,8 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state) adev->hdp.funcs->get_clock_gating_state(adev, flags); - if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) { + if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) && + (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))) { /* AMD_CG_SUPPORT_DRM_MGCG */ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0)); if (!(data & 0x01000000)) -- cgit From dab96d8b61aab1a4f99d0b86964a6c40e7bb1756 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 29 Nov 2023 15:44:25 -0500 Subject: drm/amdgpu: fix buffer funcs setting order on suspend We need to disable this after the last eviction call, but before we disable the SDMA IP. Fixes: b70438004a14 ("drm/amdgpu: move buffer funcs setting up a level") Link: https://lore.kernel.org/r/87edgv4x3i.fsf@vps.thesusis.net Reviewed-by: Luben Tuikov Tested-by: Phillip Susi Signed-off-by: Alex Deucher Cc: Phillip Susi Cc: Luben Tuikov --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1cc1ae2743c1..1f64d8cbb14d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4527,6 +4527,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (r) return r; + amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_fence_driver_hw_fini(adev); amdgpu_device_ip_suspend_phase2(adev); -- cgit From f63e1164b90b385cd832ff0fdfcfa76c3cc15436 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 1 Dec 2023 13:00:09 -0800 Subject: btrfs: free qgroup reserve when ORDERED_IOERR is set An ordered extent completing is a critical moment in qgroup reserve handling, as the ownership of the reservation is handed off from the ordered extent to the delayed ref. In the happy path we release (unlock) but do not free (decrement counter) the reservation, and the delayed ref drives the free. However, on an error, we don't create a delayed ref, since there is no ref to add. Therefore, free on the error path. CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 574e8a55e24a..8620ff402de4 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -599,7 +599,9 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, release = entry->disk_num_bytes; else release = entry->num_bytes; - btrfs_delalloc_release_metadata(btrfs_inode, release, false); + btrfs_delalloc_release_metadata(btrfs_inode, release, + test_bit(BTRFS_ORDERED_IOERR, + &entry->flags)); } percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes, -- cgit From 9e65bfca24cf1d77e4a5c7a170db5867377b3fe7 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 1 Dec 2023 13:00:10 -0800 Subject: btrfs: fix qgroup_free_reserved_data int overflow The reserved data counter and input parameter is a u64, but we inadvertently accumulate it in an int. Overflowing that int results in freeing the wrong amount of data and breaking reserve accounting. Unfortunately, this overflow rot spreads from there, as the qgroup release/free functions rely on returning an int to take advantage of negative values for error codes. Therefore, the full fix is to return the "released" or "freed" amount by a u64 argument and to return 0 or negative error code via the return value. Most of the call sites simply ignore the return value, though some of them handle the error and count the returned bytes. Change all of them accordingly. CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/delalloc-space.c | 2 +- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 16 ++++++++-------- fs/btrfs/ordered-data.c | 7 ++++--- fs/btrfs/qgroup.c | 25 +++++++++++++++---------- fs/btrfs/qgroup.h | 4 ++-- 6 files changed, 31 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index 51453d4928fa..2833e8ef4c09 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -199,7 +199,7 @@ void btrfs_free_reserved_data_space(struct btrfs_inode *inode, start = round_down(start, fs_info->sectorsize); btrfs_free_reserved_data_space_noquota(fs_info, len); - btrfs_qgroup_free_data(inode, reserved, start, len); + btrfs_qgroup_free_data(inode, reserved, start, len, NULL); } /* diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 92419cb8508a..33587c7ac859 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3190,7 +3190,7 @@ static long btrfs_fallocate(struct file *file, int mode, qgroup_reserved -= range->len; } else if (qgroup_reserved > 0) { btrfs_qgroup_free_data(BTRFS_I(inode), data_reserved, - range->start, range->len); + range->start, range->len, NULL); qgroup_reserved -= range->len; } list_del(&range->list); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 137c4824ff91..b9a31df58744 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -688,7 +688,7 @@ out: * And at reserve time, it's always aligned to page size, so * just free one page here. */ - btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE); + btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE, NULL); btrfs_free_path(path); btrfs_end_transaction(trans); return ret; @@ -5132,7 +5132,7 @@ static void evict_inode_truncate_pages(struct inode *inode) */ if (state_flags & EXTENT_DELALLOC) btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start, - end - start + 1); + end - start + 1, NULL); clear_extent_bit(io_tree, start, end, EXTENT_CLEAR_ALL_BITS | EXTENT_DO_ACCOUNTING, @@ -8055,7 +8055,7 @@ next: * reserved data space. * Since the IO will never happen for this page. */ - btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur); + btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur, NULL); if (!inode_evicting) { clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_UPTODATE | @@ -9487,7 +9487,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( struct btrfs_path *path; u64 start = ins->objectid; u64 len = ins->offset; - int qgroup_released; + u64 qgroup_released = 0; int ret; memset(&stack_fi, 0, sizeof(stack_fi)); @@ -9500,9 +9500,9 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE); /* Encryption and other encoding is reserved and all 0 */ - qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len); - if (qgroup_released < 0) - return ERR_PTR(qgroup_released); + ret = btrfs_qgroup_release_data(inode, file_offset, len, &qgroup_released); + if (ret < 0) + return ERR_PTR(ret); if (trans) { ret = insert_reserved_file_extent(trans, inode, @@ -10397,7 +10397,7 @@ out_delalloc_release: btrfs_delalloc_release_metadata(inode, disk_num_bytes, ret < 0); out_qgroup_free_data: if (ret < 0) - btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes); + btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes, NULL); out_free_data_space: /* * If btrfs_reserve_extent() succeeded, then we already decremented diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 8620ff402de4..a82e1417c4d2 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -152,11 +152,12 @@ static struct btrfs_ordered_extent *alloc_ordered_extent( { struct btrfs_ordered_extent *entry; int ret; + u64 qgroup_rsv = 0; if (flags & ((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) { /* For nocow write, we can release the qgroup rsv right now */ - ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes); + ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes, &qgroup_rsv); if (ret < 0) return ERR_PTR(ret); } else { @@ -164,7 +165,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent( * The ordered extent has reserved qgroup space, release now * and pass the reserved number for qgroup_record to free. */ - ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes); + ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes, &qgroup_rsv); if (ret < 0) return ERR_PTR(ret); } @@ -182,7 +183,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent( entry->inode = igrab(&inode->vfs_inode); entry->compress_type = compress_type; entry->truncated_len = (u64)-1; - entry->qgroup_rsv = ret; + entry->qgroup_rsv = qgroup_rsv; entry->flags = flags; refcount_set(&entry->refs, 1); init_waitqueue_head(&entry->wait); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index ce446d9d7f23..a953c16c7eb8 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4057,13 +4057,14 @@ int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, /* Free ranges specified by @reserved, normally in error path */ static int qgroup_free_reserved_data(struct btrfs_inode *inode, - struct extent_changeset *reserved, u64 start, u64 len) + struct extent_changeset *reserved, + u64 start, u64 len, u64 *freed_ret) { struct btrfs_root *root = inode->root; struct ulist_node *unode; struct ulist_iterator uiter; struct extent_changeset changeset; - int freed = 0; + u64 freed = 0; int ret; extent_changeset_init(&changeset); @@ -4104,7 +4105,9 @@ static int qgroup_free_reserved_data(struct btrfs_inode *inode, } btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, freed, BTRFS_QGROUP_RSV_DATA); - ret = freed; + if (freed_ret) + *freed_ret = freed; + ret = 0; out: extent_changeset_release(&changeset); return ret; @@ -4112,7 +4115,7 @@ out: static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, struct extent_changeset *reserved, u64 start, u64 len, - int free) + u64 *released, int free) { struct extent_changeset changeset; int trace_op = QGROUP_RELEASE; @@ -4128,7 +4131,7 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, /* In release case, we shouldn't have @reserved */ WARN_ON(!free && reserved); if (free && reserved) - return qgroup_free_reserved_data(inode, reserved, start, len); + return qgroup_free_reserved_data(inode, reserved, start, len, released); extent_changeset_init(&changeset); ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1, EXTENT_QGROUP_RESERVED, &changeset); @@ -4143,7 +4146,8 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, btrfs_qgroup_free_refroot(inode->root->fs_info, inode->root->root_key.objectid, changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA); - ret = changeset.bytes_changed; + if (released) + *released = changeset.bytes_changed; out: extent_changeset_release(&changeset); return ret; @@ -4162,9 +4166,10 @@ out: * NOTE: This function may sleep for memory allocation. */ int btrfs_qgroup_free_data(struct btrfs_inode *inode, - struct extent_changeset *reserved, u64 start, u64 len) + struct extent_changeset *reserved, + u64 start, u64 len, u64 *freed) { - return __btrfs_qgroup_release_data(inode, reserved, start, len, 1); + return __btrfs_qgroup_release_data(inode, reserved, start, len, freed, 1); } /* @@ -4182,9 +4187,9 @@ int btrfs_qgroup_free_data(struct btrfs_inode *inode, * * NOTE: This function may sleep for memory allocation. */ -int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len) +int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released) { - return __btrfs_qgroup_release_data(inode, NULL, start, len, 0); + return __btrfs_qgroup_release_data(inode, NULL, start, len, released, 0); } static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes, diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 855a4f978761..15b485506104 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -358,10 +358,10 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, /* New io_tree based accurate qgroup reserve API */ int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, struct extent_changeset **reserved, u64 start, u64 len); -int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len); +int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released); int btrfs_qgroup_free_data(struct btrfs_inode *inode, struct extent_changeset *reserved, u64 start, - u64 len); + u64 len, u64 *freed); int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, enum btrfs_qgroup_rsv_type type, bool enforce); int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, -- cgit From b321a52cce062ec7ed385333a33905d22159ce36 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 1 Dec 2023 13:00:11 -0800 Subject: btrfs: free qgroup pertrans reserve on transaction abort If we abort a transaction, we never run the code that frees the pertrans qgroup reservation. This results in warnings on unmount as that reservation has been leaked. The leak isn't a huge issue since the fs is read-only, but it's better to clean it up when we know we can/should. Do it during the cleanup_transaction step of aborting. CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 28 ++++++++++++++++++++++++++++ fs/btrfs/qgroup.c | 5 +++-- fs/btrfs/transaction.c | 2 -- fs/btrfs/transaction.h | 3 +++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bbcc3df77646..62cb97f7c94f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4799,6 +4799,32 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans, } } +static void btrfs_free_all_qgroup_pertrans(struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *gang[8]; + int i; + int ret; + + spin_lock(&fs_info->fs_roots_radix_lock); + while (1) { + ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, + (void **)gang, 0, + ARRAY_SIZE(gang), + BTRFS_ROOT_TRANS_TAG); + if (ret == 0) + break; + for (i = 0; i < ret; i++) { + struct btrfs_root *root = gang[i]; + + btrfs_qgroup_free_meta_all_pertrans(root); + radix_tree_tag_clear(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + } + } + spin_unlock(&fs_info->fs_roots_radix_lock); +} + void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_fs_info *fs_info) { @@ -4827,6 +4853,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, EXTENT_DIRTY); btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents); + btrfs_free_all_qgroup_pertrans(fs_info); + cur_trans->state =TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index a953c16c7eb8..daec90342dad 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4337,8 +4337,9 @@ static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root, qgroup_rsv_release(fs_info, qgroup, num_bytes, BTRFS_QGROUP_RSV_META_PREALLOC); - qgroup_rsv_add(fs_info, qgroup, num_bytes, - BTRFS_QGROUP_RSV_META_PERTRANS); + if (!sb_rdonly(fs_info->sb)) + qgroup_rsv_add(fs_info, qgroup, num_bytes, + BTRFS_QGROUP_RSV_META_PERTRANS); list_for_each_entry(glist, &qgroup->groups, next_group) qgroup_iterator_add(&qgroup_list, glist->group); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7af9665bebae..b5aa83b7345a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -37,8 +37,6 @@ static struct kmem_cache *btrfs_trans_handle_cachep; -#define BTRFS_ROOT_TRANS_TAG 0 - /* * Transaction states and transitions * diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 18c4f6e83b78..2bf8bbdfd0b3 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -12,6 +12,9 @@ #include "ctree.h" #include "misc.h" +/* Radix-tree tag for roots that are part of the trasaction. */ +#define BTRFS_ROOT_TRANS_TAG 0 + enum btrfs_trans_state { TRANS_STATE_RUNNING, TRANS_STATE_COMMIT_PREP, -- cgit From a86805504b88f636a6458520d85afdf0634e3c6b Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 1 Dec 2023 13:00:12 -0800 Subject: btrfs: don't clear qgroup reserved bit in release_folio The EXTENT_QGROUP_RESERVED bit is used to "lock" regions of the file for duplicate reservations. That is two writes to that range in one transaction shouldn't create two reservations, as the reservation will only be freed once when the write finally goes down. Therefore, it is never OK to clear that bit without freeing the associated qgroup reserve. At this point, we don't want to be freeing the reserve, so mask off the bit. CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e6230a6ffa98..8f724c54fc8e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2302,7 +2302,8 @@ static int try_release_extent_state(struct extent_io_tree *tree, ret = 0; } else { u32 clear_bits = ~(EXTENT_LOCKED | EXTENT_NODATASUM | - EXTENT_DELALLOC_NEW | EXTENT_CTLBITS); + EXTENT_DELALLOC_NEW | EXTENT_CTLBITS | + EXTENT_QGROUP_RESERVED); /* * At this point we can safely clear everything except the -- cgit From e85a0adacf170634878fffcbf34b725aff3f49ed Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 1 Dec 2023 13:00:13 -0800 Subject: btrfs: ensure releasing squota reserve on head refs A reservation goes through a 3 step lifetime: - generated during delalloc - released/counted by ordered_extent allocation - freed by running delayed ref That third step depends on must_insert_reserved on the head ref, so the head ref with that field set owns the reservation. Once you prepare to run the head ref, must_insert_reserved is unset, which means that running the ref must free the reservation, whether or not it succeeds, or else the reservation is leaked. That results in either a risk of spurious ENOSPC if the fs stays writeable or a warning on unmount if it is readonly. The existing squota code was aware of these invariants, but missed a few cases. Improve it by adding a helper function to use in the cleanup paths and call it from the existing early returns in running delayed refs. This also simplifies btrfs_record_squota_delta and struct btrfs_quota_delta. This fixes (or at least improves the reliability of) generic/475 with "mkfs -O squota". On my machine, that test failed ~4/10 times without this patch and passed 100/100 times with it. Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 48 ++++++++++++++++++++++++++++++++++-------------- fs/btrfs/qgroup.c | 14 +++++++++++--- fs/btrfs/qgroup.h | 3 +-- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0455935ff558..01423670bc8a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1547,6 +1547,23 @@ out: return ret; } +static void free_head_ref_squota_rsv(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_head *href) +{ + u64 root = href->owning_root; + + /* + * Don't check must_insert_reserved, as this is called from contexts + * where it has already been unset. + */ + if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE || + !href->is_data || !is_fstree(root)) + return; + + btrfs_qgroup_free_refroot(fs_info, root, href->reserved_bytes, + BTRFS_QGROUP_RSV_DATA); +} + static int run_delayed_data_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *href, struct btrfs_delayed_ref_node *node, @@ -1569,7 +1586,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, struct btrfs_squota_delta delta = { .root = href->owning_root, .num_bytes = node->num_bytes, - .rsv_bytes = href->reserved_bytes, .is_data = true, .is_inc = true, .generation = trans->transid, @@ -1586,11 +1602,9 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, flags, ref->objectid, ref->offset, &key, node->ref_mod, href->owning_root); + free_head_ref_squota_rsv(trans->fs_info, href); if (!ret) ret = btrfs_record_squota_delta(trans->fs_info, &delta); - else - btrfs_qgroup_free_refroot(trans->fs_info, delta.root, - delta.rsv_bytes, BTRFS_QGROUP_RSV_DATA); } else if (node->action == BTRFS_ADD_DELAYED_REF) { ret = __btrfs_inc_extent_ref(trans, node, parent, ref->root, ref->objectid, ref->offset, @@ -1742,7 +1756,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, struct btrfs_squota_delta delta = { .root = href->owning_root, .num_bytes = fs_info->nodesize, - .rsv_bytes = 0, .is_data = false, .is_inc = true, .generation = trans->transid, @@ -1774,8 +1787,10 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, int ret = 0; if (TRANS_ABORTED(trans)) { - if (insert_reserved) + if (insert_reserved) { btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); + free_head_ref_squota_rsv(trans->fs_info, href); + } return 0; } @@ -1871,6 +1886,8 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head) { + u64 ret = 0; + /* * We had csum deletions accounted for in our delayed refs rsv, we need * to drop the csum leaves for this update from our delayed_refs_rsv. @@ -1885,14 +1902,13 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, btrfs_delayed_refs_rsv_release(fs_info, 0, nr_csums); - return btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums); + ret = btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums); } - if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE && - head->must_insert_reserved && head->is_data) - btrfs_qgroup_free_refroot(fs_info, head->owning_root, - head->reserved_bytes, BTRFS_QGROUP_RSV_DATA); + /* must_insert_reserved can be set only if we didn't run the head ref. */ + if (head->must_insert_reserved) + free_head_ref_squota_rsv(fs_info, head); - return 0; + return ret; } static int cleanup_ref_head(struct btrfs_trans_handle *trans, @@ -2033,6 +2049,12 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, * spin lock. */ must_insert_reserved = locked_ref->must_insert_reserved; + /* + * Unsetting this on the head ref relinquishes ownership of + * the rsv_bytes, so it is critical that every possible code + * path from here forward frees all reserves including qgroup + * reserve. + */ locked_ref->must_insert_reserved = false; extent_op = locked_ref->extent_op; @@ -3292,7 +3314,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_squota_delta delta = { .root = delayed_ref_root, .num_bytes = num_bytes, - .rsv_bytes = 0, .is_data = is_data, .is_inc = false, .generation = btrfs_extent_generation(leaf, ei), @@ -4937,7 +4958,6 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, .root = root_objectid, .num_bytes = ins->offset, .generation = trans->transid, - .rsv_bytes = 0, .is_data = true, .is_inc = true, }; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index daec90342dad..e46774e8f49f 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4661,6 +4661,17 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) *root = RB_ROOT; } +void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes) +{ + if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE) + return; + + if (!is_fstree(root)) + return; + + btrfs_qgroup_free_refroot(fs_info, root, rsv_bytes, BTRFS_QGROUP_RSV_DATA); +} + int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, struct btrfs_squota_delta *delta) { @@ -4705,8 +4716,5 @@ int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, out: spin_unlock(&fs_info->qgroup_lock); - if (!ret && delta->rsv_bytes) - btrfs_qgroup_free_refroot(fs_info, root, delta->rsv_bytes, - BTRFS_QGROUP_RSV_DATA); return ret; } diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 15b485506104..be18c862e64e 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -274,8 +274,6 @@ struct btrfs_squota_delta { u64 root; /* The number of bytes in the extent being counted. */ u64 num_bytes; - /* The number of bytes reserved for this extent. */ - u64 rsv_bytes; /* The generation the extent was created in. */ u64 generation; /* Whether we are using or freeing the extent. */ @@ -422,6 +420,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb); void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans); bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info); +void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes); int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, struct btrfs_squota_delta *delta); -- cgit From 4b7de801606e504e69689df71475d27e35336fb3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Dec 2023 09:30:40 +0100 Subject: bpf: Fix prog_array_map_poke_run map poke update Lee pointed out issue found by syscaller [0] hitting BUG in prog array map poke update in prog_array_map_poke_run function due to error value returned from bpf_arch_text_poke function. There's race window where bpf_arch_text_poke can fail due to missing bpf program kallsym symbols, which is accounted for with check for -EINVAL in that BUG_ON call. The problem is that in such case we won't update the tail call jump and cause imbalance for the next tail call update check which will fail with -EBUSY in bpf_arch_text_poke. I'm hitting following race during the program load: CPU 0 CPU 1 bpf_prog_load bpf_check do_misc_fixups prog_array_map_poke_track map_update_elem bpf_fd_array_map_update_elem prog_array_map_poke_run bpf_arch_text_poke returns -EINVAL bpf_prog_kallsyms_add After bpf_arch_text_poke (CPU 1) fails to update the tail call jump, the next poke update fails on expected jump instruction check in bpf_arch_text_poke with -EBUSY and triggers the BUG_ON in prog_array_map_poke_run. Similar race exists on the program unload. Fixing this by moving the update to bpf_arch_poke_desc_update function which makes sure we call __bpf_arch_text_poke that skips the bpf address check. Each architecture has slightly different approach wrt looking up bpf address in bpf_arch_text_poke, so instead of splitting the function or adding new 'checkip' argument in previous version, it seems best to move the whole map_poke_run update as arch specific code. [0] https://syzkaller.appspot.com/bug?extid=97a4fe20470e9bc30810 Fixes: ebf7d1f508a7 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT") Reported-by: syzbot+97a4fe20470e9bc30810@syzkaller.appspotmail.com Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Cc: Lee Jones Cc: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20231206083041.1306660-2-jolsa@kernel.org --- arch/x86/net/bpf_jit_comp.c | 46 +++++++++++++++++++++++++++++++++++ include/linux/bpf.h | 3 +++ kernel/bpf/arraymap.c | 58 ++++++++------------------------------------- 3 files changed, 59 insertions(+), 48 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8c10d9abc239..e89e415aa743 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3025,3 +3025,49 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp #endif WARN(1, "verification of programs using bpf_throw should have failed\n"); } + +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old) +{ + u8 *old_addr, *new_addr, *old_bypass_addr; + int ret; + + old_bypass_addr = old ? NULL : poke->bypass_addr; + old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; + new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; + + /* + * On program loading or teardown, the program's kallsym entry + * might not be in place, so we use __bpf_arch_text_poke to skip + * the kallsyms check. + */ + if (new) { + ret = __bpf_arch_text_poke(poke->tailcall_target, + BPF_MOD_JUMP, + old_addr, new_addr); + BUG_ON(ret < 0); + if (!old) { + ret = __bpf_arch_text_poke(poke->tailcall_bypass, + BPF_MOD_JUMP, + poke->bypass_addr, + NULL); + BUG_ON(ret < 0); + } + } else { + ret = __bpf_arch_text_poke(poke->tailcall_bypass, + BPF_MOD_JUMP, + old_bypass_addr, + poke->bypass_addr); + BUG_ON(ret < 0); + /* let other CPUs finish the execution of program + * so that it will not possible to expose them + * to invalid nop, stack unwind, nop state + */ + if (!ret) + synchronize_rcu(); + ret = __bpf_arch_text_poke(poke->tailcall_target, + BPF_MOD_JUMP, + old_addr, NULL); + BUG_ON(ret < 0); + } +} diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6762dac3ef76..cff5bb08820e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3175,6 +3175,9 @@ enum bpf_text_poke_type { int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old); + void *bpf_arch_text_copy(void *dst, void *src, size_t len); int bpf_arch_text_invalidate(void *dst, size_t len); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 2058e89b5ddd..c85ff9162a5c 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -1012,11 +1012,16 @@ static void prog_array_map_poke_untrack(struct bpf_map *map, mutex_unlock(&aux->poke_mutex); } +void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old) +{ + WARN_ON_ONCE(1); +} + static void prog_array_map_poke_run(struct bpf_map *map, u32 key, struct bpf_prog *old, struct bpf_prog *new) { - u8 *old_addr, *new_addr, *old_bypass_addr; struct prog_poke_elem *elem; struct bpf_array_aux *aux; @@ -1025,7 +1030,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, list_for_each_entry(elem, &aux->poke_progs, list) { struct bpf_jit_poke_descriptor *poke; - int i, ret; + int i; for (i = 0; i < elem->aux->size_poke_tab; i++) { poke = &elem->aux->poke_tab[i]; @@ -1044,21 +1049,10 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, * activated, so tail call updates can arrive from here * while JIT is still finishing its final fixup for * non-activated poke entries. - * 3) On program teardown, the program's kallsym entry gets - * removed out of RCU callback, but we can only untrack - * from sleepable context, therefore bpf_arch_text_poke() - * might not see that this is in BPF text section and - * bails out with -EINVAL. As these are unreachable since - * RCU grace period already passed, we simply skip them. - * 4) Also programs reaching refcount of zero while patching + * 3) Also programs reaching refcount of zero while patching * is in progress is okay since we're protected under * poke_mutex and untrack the programs before the JIT - * buffer is freed. When we're still in the middle of - * patching and suddenly kallsyms entry of the program - * gets evicted, we just skip the rest which is fine due - * to point 3). - * 5) Any other error happening below from bpf_arch_text_poke() - * is a unexpected bug. + * buffer is freed. */ if (!READ_ONCE(poke->tailcall_target_stable)) continue; @@ -1068,39 +1062,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, poke->tail_call.key != key) continue; - old_bypass_addr = old ? NULL : poke->bypass_addr; - old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; - new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; - - if (new) { - ret = bpf_arch_text_poke(poke->tailcall_target, - BPF_MOD_JUMP, - old_addr, new_addr); - BUG_ON(ret < 0 && ret != -EINVAL); - if (!old) { - ret = bpf_arch_text_poke(poke->tailcall_bypass, - BPF_MOD_JUMP, - poke->bypass_addr, - NULL); - BUG_ON(ret < 0 && ret != -EINVAL); - } - } else { - ret = bpf_arch_text_poke(poke->tailcall_bypass, - BPF_MOD_JUMP, - old_bypass_addr, - poke->bypass_addr); - BUG_ON(ret < 0 && ret != -EINVAL); - /* let other CPUs finish the execution of program - * so that it will not possible to expose them - * to invalid nop, stack unwind, nop state - */ - if (!ret) - synchronize_rcu(); - ret = bpf_arch_text_poke(poke->tailcall_target, - BPF_MOD_JUMP, - old_addr, NULL); - BUG_ON(ret < 0 && ret != -EINVAL); - } + bpf_arch_poke_desc_update(poke, new, old); } } } -- cgit From ffed24eff9e0e52d8e74df1c18db8ed43b4666e6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Dec 2023 09:30:41 +0100 Subject: selftests/bpf: Add test for early update in prog_array_map_poke_run Adding test that tries to trigger the BUG_IN during early map update in prog_array_map_poke_run function. The idea is to share prog array map between thread that constantly updates it and another one loading a program that uses that prog array. Eventually we will hit a place where the program is ok to be updated (poke->tailcall_target_stable check) but the address is still not registered in kallsyms, so the bpf_arch_text_poke returns -EINVAL and cause imbalance for the next tail call update check, which will fail with -EBUSY in bpf_arch_text_poke as described in previous fix. Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Ilya Leoshkevich Link: https://lore.kernel.org/bpf/20231206083041.1306660-3-jolsa@kernel.org --- tools/testing/selftests/bpf/prog_tests/tailcalls.c | 84 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/tailcall_poke.c | 32 +++++++++ 2 files changed, 116 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/tailcall_poke.c diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index fc6b2954e8f5..59993fc9c0d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -1,6 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include +#include "tailcall_poke.skel.h" + /* test_tailcall_1 checks basic functionality by patching multiple locations * in a single program for a single tail call slot with nop->jmp, jmp->nop @@ -1105,6 +1108,85 @@ out: bpf_object__close(tgt_obj); } +#define JMP_TABLE "/sys/fs/bpf/jmp_table" + +static int poke_thread_exit; + +static void *poke_update(void *arg) +{ + __u32 zero = 0, prog1_fd, prog2_fd, map_fd; + struct tailcall_poke *call = arg; + + map_fd = bpf_map__fd(call->maps.jmp_table); + prog1_fd = bpf_program__fd(call->progs.call1); + prog2_fd = bpf_program__fd(call->progs.call2); + + while (!poke_thread_exit) { + bpf_map_update_elem(map_fd, &zero, &prog1_fd, BPF_ANY); + bpf_map_update_elem(map_fd, &zero, &prog2_fd, BPF_ANY); + } + + return NULL; +} + +/* + * We are trying to hit prog array update during another program load + * that shares the same prog array map. + * + * For that we share the jmp_table map between two skeleton instances + * by pinning the jmp_table to same path. Then first skeleton instance + * periodically updates jmp_table in 'poke update' thread while we load + * the second skeleton instance in the main thread. + */ +static void test_tailcall_poke(void) +{ + struct tailcall_poke *call, *test; + int err, cnt = 10; + pthread_t thread; + + unlink(JMP_TABLE); + + call = tailcall_poke__open_and_load(); + if (!ASSERT_OK_PTR(call, "tailcall_poke__open")) + return; + + err = bpf_map__pin(call->maps.jmp_table, JMP_TABLE); + if (!ASSERT_OK(err, "bpf_map__pin")) + goto out; + + err = pthread_create(&thread, NULL, poke_update, call); + if (!ASSERT_OK(err, "new toggler")) + goto out; + + while (cnt--) { + test = tailcall_poke__open(); + if (!ASSERT_OK_PTR(test, "tailcall_poke__open")) + break; + + err = bpf_map__set_pin_path(test->maps.jmp_table, JMP_TABLE); + if (!ASSERT_OK(err, "bpf_map__pin")) { + tailcall_poke__destroy(test); + break; + } + + bpf_program__set_autoload(test->progs.test, true); + bpf_program__set_autoload(test->progs.call1, false); + bpf_program__set_autoload(test->progs.call2, false); + + err = tailcall_poke__load(test); + tailcall_poke__destroy(test); + if (!ASSERT_OK(err, "tailcall_poke__load")) + break; + } + + poke_thread_exit = 1; + ASSERT_OK(pthread_join(thread, NULL), "pthread_join"); + +out: + bpf_map__unpin(call->maps.jmp_table, JMP_TABLE); + tailcall_poke__destroy(call); +} + void test_tailcalls(void) { if (test__start_subtest("tailcall_1")) @@ -1139,4 +1221,6 @@ void test_tailcalls(void) test_tailcall_bpf2bpf_fentry_fexit(); if (test__start_subtest("tailcall_bpf2bpf_fentry_entry")) test_tailcall_bpf2bpf_fentry_entry(); + if (test__start_subtest("tailcall_poke")) + test_tailcall_poke(); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_poke.c b/tools/testing/selftests/bpf/progs/tailcall_poke.c new file mode 100644 index 000000000000..c78b94b75e83 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_poke.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +SEC("?fentry/bpf_fentry_test1") +int BPF_PROG(test, int a) +{ + bpf_tail_call_static(ctx, &jmp_table, 0); + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(call1, int a) +{ + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(call2, int a) +{ + return 0; +} -- cgit From b6c7ca4d7966c9d6e3deaefa896666cd5f8e2d8d Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 6 Dec 2023 08:23:37 -0300 Subject: dt-bindings: lcdif: Properly describe the i.MX23 interrupts i.MX23 has two LCDIF interrupts instead of a single one like other i.MX devices. Take this into account for properly describing the i.MX23 LCDIF interrupts. This fixes the following dt-schema warning: imx23-olinuxino.dtb: lcdif@80030000: interrupts: [[46], [45]] is too long from schema $id: http://devicetree.org/schemas/display/fsl,lcdif.yaml# Signed-off-by: Fabio Estevam Reviewed-by: Marek Vasut Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20231206112337.2234849-1-festevam@gmail.com Signed-off-by: Rob Herring --- .../devicetree/bindings/display/fsl,lcdif.yaml | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/fsl,lcdif.yaml b/Documentation/devicetree/bindings/display/fsl,lcdif.yaml index fc11ab5fc465..1c2be8d6f633 100644 --- a/Documentation/devicetree/bindings/display/fsl,lcdif.yaml +++ b/Documentation/devicetree/bindings/display/fsl,lcdif.yaml @@ -51,7 +51,10 @@ properties: minItems: 1 interrupts: - maxItems: 1 + items: + - description: LCDIF DMA interrupt + - description: LCDIF Error interrupt + minItems: 1 power-domains: maxItems: 1 @@ -131,6 +134,21 @@ allOf: then: required: - power-domains + - if: + properties: + compatible: + contains: + enum: + - fsl,imx23-lcdif + then: + properties: + interrupts: + minItems: 2 + maxItems: 2 + else: + properties: + interrupts: + maxItems: 1 examples: - | -- cgit From 136c6531ba12e4a658376387e355a09c9b5223e5 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 6 Dec 2023 06:36:43 -0300 Subject: dt-bindings: display: adi,adv75xx: Document #sound-dai-cells When using audio from ADV7533 or ADV7535 and describing the audio card via simple-audio-card, the '#sound-dai-cells' needs to be passed. Document the '#sound-dai-cells' property to fix the following dt-schema warning: imx8mn-beacon-kit.dtb: hdmi@3d: '#sound-dai-cells' does not match any of the regexes: 'pinctrl-[0-9]+' from schema $id: http://devicetree.org/schemas/display/bridge/adi,adv7533.yaml# Signed-off-by: Fabio Estevam Acked-by: Adam Ford Link: https://lore.kernel.org/r/20231206093643.2198562-1-festevam@gmail.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml b/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml index 987aa83c2649..df20a3c9c744 100644 --- a/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml +++ b/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml @@ -9,6 +9,9 @@ title: Analog Devices ADV7533/35 HDMI Encoders maintainers: - Laurent Pinchart +allOf: + - $ref: /schemas/sound/dai-common.yaml# + description: | The ADV7533 and ADV7535 are HDMI audio and video transmitters compatible with HDMI 1.4 and DVI 1.0. They support color space @@ -89,6 +92,9 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32 enum: [ 1, 2, 3, 4 ] + "#sound-dai-cells": + const: 0 + ports: description: The ADV7533/35 has two video ports and one audio port. -- cgit From e59728883943c6820a0aa413db66a38f2e8c27bd Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Wed, 6 Dec 2023 21:26:00 +1300 Subject: bcachefs: rebalance shouldn't attempt to compress unwritten extents This fixes a bug where rebalance would loop repeatedly on the same extents. Signed-off-by: Daniel Hill Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index f6c92df55270..9d8afcb5979a 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1294,7 +1294,8 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, unsigned i = 0; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) { + if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || + p.ptr.unwritten) { rewrite_ptrs = 0; goto incompressible; } -- cgit From b197d16669831d3e3240e2b6a3e4f9cf0331d58e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 14 Nov 2023 15:02:04 -0800 Subject: MAINTAINERS: add Andrew Morton for lib/* Add myself as the fallthough maintainer for material under lib/. Cc: Joe Perches Signed-off-by: Andrew Morton --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 509281e9e169..633fc9f22cda 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12208,6 +12208,13 @@ F: include/linux/nd.h F: include/uapi/linux/ndctl.h F: tools/testing/nvdimm/ +LIBRARY CODE +M: Andrew Morton +L: linux-kernel@vger.kernel.org +S: Supported +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-nonmm-unstable +F: lib/* + LICENSES and SPDX stuff M: Thomas Gleixner M: Greg Kroah-Hartman -- cgit From 187da0f8250aa94bd96266096aef6f694e0b4cd2 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Mon, 13 Nov 2023 17:20:33 -0800 Subject: hugetlb: fix null-ptr-deref in hugetlb_vma_lock_write The routine __vma_private_lock tests for the existence of a reserve map associated with a private hugetlb mapping. A pointer to the reserve map is in vma->vm_private_data. __vma_private_lock was checking the pointer for NULL. However, it is possible that the low bits of the pointer could be used as flags. In such instances, vm_private_data is not NULL and not a valid pointer. This results in the null-ptr-deref reported by syzbot: general protection fault, probably for non-canonical address 0xdffffc000000001d: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x00000000000000e8-0x00000000000000ef] CPU: 0 PID: 5048 Comm: syz-executor139 Not tainted 6.6.0-rc7-syzkaller-00142-g88 8cf78c29e2 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 1 0/09/2023 RIP: 0010:__lock_acquire+0x109/0x5de0 kernel/locking/lockdep.c:5004 ... Call Trace: lock_acquire kernel/locking/lockdep.c:5753 [inline] lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5718 down_write+0x93/0x200 kernel/locking/rwsem.c:1573 hugetlb_vma_lock_write mm/hugetlb.c:300 [inline] hugetlb_vma_lock_write+0xae/0x100 mm/hugetlb.c:291 __hugetlb_zap_begin+0x1e9/0x2b0 mm/hugetlb.c:5447 hugetlb_zap_begin include/linux/hugetlb.h:258 [inline] unmap_vmas+0x2f4/0x470 mm/memory.c:1733 exit_mmap+0x1ad/0xa60 mm/mmap.c:3230 __mmput+0x12a/0x4d0 kernel/fork.c:1349 mmput+0x62/0x70 kernel/fork.c:1371 exit_mm kernel/exit.c:567 [inline] do_exit+0x9ad/0x2a20 kernel/exit.c:861 __do_sys_exit kernel/exit.c:991 [inline] __se_sys_exit kernel/exit.c:989 [inline] __x64_sys_exit+0x42/0x50 kernel/exit.c:989 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Mask off low bit flags before checking for NULL pointer. In addition, the reserve map only 'belongs' to the OWNER (parent in parent/child relationships) so also check for the OWNER flag. Link: https://lkml.kernel.org/r/20231114012033.259600-1-mike.kravetz@oracle.com Reported-by: syzbot+6ada951e7c0f7bc8a71e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-mm/00000000000078d1e00608d7878b@google.com/ Fixes: bf4916922c60 ("hugetlbfs: extend hugetlb_vma_lock to private VMAs") Signed-off-by: Mike Kravetz Reviewed-by: Rik van Riel Cc: Edward Adam Davis Cc: Muchun Song Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Tom Rix Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 5 +---- mm/hugetlb.c | 7 +++++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d3acecc5db4b..236ec7b63c54 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -1268,10 +1268,7 @@ static inline bool __vma_shareable_lock(struct vm_area_struct *vma) return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; } -static inline bool __vma_private_lock(struct vm_area_struct *vma) -{ - return (!(vma->vm_flags & VM_MAYSHARE)) && vma->vm_private_data; -} +bool __vma_private_lock(struct vm_area_struct *vma); /* * Safe version of huge_pte_offset() to check the locks. See comments diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1169ef2f2176..6feb3e0630d1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1182,6 +1182,13 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag) return (get_vma_private_data(vma) & flag) != 0; } +bool __vma_private_lock(struct vm_area_struct *vma) +{ + return !(vma->vm_flags & VM_MAYSHARE) && + get_vma_private_data(vma) & ~HPAGE_RESV_MASK && + is_vma_resv_set(vma, HPAGE_RESV_OWNER); +} + void hugetlb_dup_vma_private(struct vm_area_struct *vma) { VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma); -- cgit From 727d16f1993bcf46ee2888c13e3fc1463babed8d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 15 Nov 2023 13:54:18 -0800 Subject: mm/memory.c:zap_pte_range() print bad swap entry We have a report of this WARN() triggering. Let's print the offending swp_entry_t to help diagnosis. Link: https://lkml.kernel.org/r/000000000000b0e576060a30ee3b@google.com Cc: Muhammad Usama Anjum Signed-off-by: Andrew Morton --- mm/memory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memory.c b/mm/memory.c index 1f18ed4a5497..5c757fba8858 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1517,6 +1517,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, continue; } else { /* We should have covered all the swap entry types */ + pr_alert("unrecognized swap entry 0x%lx\n", entry.val); WARN_ON_ONCE(1); } pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); -- cgit From 4eff7d62abdeb293233fdda2a2ecc4e0907a9a30 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 15 Nov 2023 16:21:37 +0800 Subject: Revert "mm/kmemleak: move the initialisation of object to __link_object" Patch series "Fix invalid wait context of set_track_prepare()". Geert reported an invalid wait context[1] which is resulted by moving set_track_prepare() inside kmemleak_lock. This is not allowed because in RT mode, the spinlocks can be preempted but raw_spinlocks can not, so it is not allowd to acquire spinlocks while holding raw_spinlocks. The second patch fix same problem in kmemleak_update_trace(). This patch (of 2): Move the initialisation of object back to__alloc_object() because set_track_prepare() attempt to acquire zone->lock(spinlocks) while __link_object is holding kmemleak_lock(raw_spinlocks). This is not right for RT mode. This reverts commit 245245c2fffd00 ("mm/kmemleak: move the initialisation of object to __link_object"). Link: https://lkml.kernel.org/r/20231115082138.2649870-1-liushixin2@huawei.com Link: https://lkml.kernel.org/r/20231115082138.2649870-2-liushixin2@huawei.com Fixes: 245245c2fffd ("mm/kmemleak: move the initialisation of object to __link_object") Signed-off-by: Liu Shixin Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/linux-mm/CAMuHMdWj0UzwNaxUvcocTfh481qRJpOWwXxsJCTJfu1oCqvgdA@mail.gmail.com/ [1] Acked-by: Catalin Marinas Cc: Kefeng Wang Cc: Patrick Wang Signed-off-by: Andrew Morton --- mm/kmemleak.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 1eacca03bedd..22bab3738a9e 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -642,32 +642,16 @@ static struct kmemleak_object *__alloc_object(gfp_t gfp) if (!object) { pr_warn("Cannot allocate a kmemleak_object structure\n"); kmemleak_disable(); + return NULL; } - return object; -} - -static int __link_object(struct kmemleak_object *object, unsigned long ptr, - size_t size, int min_count, bool is_phys) -{ - - struct kmemleak_object *parent; - struct rb_node **link, *rb_parent; - unsigned long untagged_ptr; - unsigned long untagged_objp; - INIT_LIST_HEAD(&object->object_list); INIT_LIST_HEAD(&object->gray_list); INIT_HLIST_HEAD(&object->area_list); raw_spin_lock_init(&object->lock); atomic_set(&object->use_count, 1); - object->flags = OBJECT_ALLOCATED | (is_phys ? OBJECT_PHYS : 0); - object->pointer = ptr; - object->size = kfence_ksize((void *)ptr) ?: size; object->excess_ref = 0; - object->min_count = min_count; object->count = 0; /* white color initially */ - object->jiffies = jiffies; object->checksum = 0; object->del_state = 0; @@ -692,6 +676,24 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr, /* kernel backtrace */ object->trace_handle = set_track_prepare(); + return object; +} + +static int __link_object(struct kmemleak_object *object, unsigned long ptr, + size_t size, int min_count, bool is_phys) +{ + + struct kmemleak_object *parent; + struct rb_node **link, *rb_parent; + unsigned long untagged_ptr; + unsigned long untagged_objp; + + object->flags = OBJECT_ALLOCATED | (is_phys ? OBJECT_PHYS : 0); + object->pointer = ptr; + object->size = kfence_ksize((void *)ptr) ?: size; + object->min_count = min_count; + object->jiffies = jiffies; + untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); /* * Only update min_addr and max_addr with object -- cgit From d63385a7d3099fedf201b0263282aebf9f9af3e8 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 15 Nov 2023 16:21:38 +0800 Subject: mm/kmemleak: move set_track_prepare() outside raw_spinlocks set_track_prepare() will call __alloc_pages() which attempts to acquire zone->lock(spinlocks), so move it outside object->lock(raw_spinlocks) because it's not right to acquire spinlocks while holding raw_spinlocks in RT mode. Link: https://lkml.kernel.org/r/20231115082138.2649870-3-liushixin2@huawei.com Signed-off-by: Liu Shixin Acked-by: Catalin Marinas Cc: Geert Uytterhoeven Cc: Kefeng Wang Cc: Patrick Wang Signed-off-by: Andrew Morton --- mm/kmemleak.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 22bab3738a9e..5501363d6b31 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1152,6 +1152,7 @@ EXPORT_SYMBOL_GPL(kmemleak_free_percpu); void __ref kmemleak_update_trace(const void *ptr) { struct kmemleak_object *object; + depot_stack_handle_t trace_handle; unsigned long flags; pr_debug("%s(0x%px)\n", __func__, ptr); @@ -1168,8 +1169,9 @@ void __ref kmemleak_update_trace(const void *ptr) return; } + trace_handle = set_track_prepare(); raw_spin_lock_irqsave(&object->lock, flags); - object->trace_handle = set_track_prepare(); + object->trace_handle = trace_handle; raw_spin_unlock_irqrestore(&object->lock, flags); put_object(object); -- cgit From 5f79489a73d77419d18952e0258efbd5ecb74770 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 15 Nov 2023 18:51:09 -0800 Subject: mm: kmem: properly initialize local objcg variable in current_obj_cgroup() Erhard reported that the 6.7-rc1 kernel panics on boot if being built with clang-16. The problem was not reproducible with gcc. [ 5.975049] general protection fault, probably for non-canonical address 0xf555515555555557: 0000 [#1] SMP KASAN PTI [ 5.976422] KASAN: maybe wild-memory-access in range [0xaaaaaaaaaaaaaab8-0xaaaaaaaaaaaaaabf] [ 5.977475] CPU: 3 PID: 1 Comm: systemd Not tainted 6.7.0-rc1-Zen3 #77 [ 5.977860] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 [ 5.977860] RIP: 0010:obj_cgroup_charge_pages+0x27/0x2d5 [ 5.977860] Code: 90 90 90 55 41 57 41 56 41 55 41 54 53 89 d5 41 89 f6 49 89 ff 48 b8 00 00 00 00 00 fc ff df 49 83 c7 10 4d3 [ 5.977860] RSP: 0018:ffffc9000001fb18 EFLAGS: 00010a02 [ 5.977860] RAX: dffffc0000000000 RBX: aaaaaaaaaaaaaaaa RCX: ffff8883eb9a8b08 [ 5.977860] RDX: 0000000000000005 RSI: 0000000000400cc0 RDI: aaaaaaaaaaaaaaaa [ 5.977860] RBP: 0000000000000005 R08: 3333333333333333 R09: 0000000000000000 [ 5.977860] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8883eb9a8b18 [ 5.977860] R13: 1555555555555557 R14: 0000000000400cc0 R15: aaaaaaaaaaaaaaba [ 5.977860] FS: 00007f2976438b40(0000) GS:ffff8883eb980000(0000) knlGS:0000000000000000 [ 5.977860] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5.977860] CR2: 00007f29769e0060 CR3: 0000000107222003 CR4: 0000000000370eb0 [ 5.977860] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 5.977860] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 5.977860] Call Trace: [ 5.977860] [ 5.977860] ? __die_body+0x16/0x75 [ 5.977860] ? die_addr+0x4a/0x70 [ 5.977860] ? exc_general_protection+0x1c9/0x2d0 [ 5.977860] ? cgroup_mkdir+0x455/0x9fb [ 5.977860] ? __x64_sys_mkdir+0x69/0x80 [ 5.977860] ? asm_exc_general_protection+0x26/0x30 [ 5.977860] ? obj_cgroup_charge_pages+0x27/0x2d5 [ 5.977860] obj_cgroup_charge+0x114/0x1ab [ 5.977860] pcpu_alloc+0x1a6/0xa65 [ 5.977860] ? mem_cgroup_css_alloc+0x1eb/0x1140 [ 5.977860] ? cgroup_apply_control_enable+0x26b/0x7c0 [ 5.977860] mem_cgroup_css_alloc+0x23f/0x1140 [ 5.977860] cgroup_apply_control_enable+0x26b/0x7c0 [ 5.977860] ? cgroup_kn_set_ugid+0x2d/0x1a0 [ 5.977860] cgroup_mkdir+0x455/0x9fb [ 5.977860] ? __cfi_cgroup_mkdir+0x10/0x10 [ 5.977860] kernfs_iop_mkdir+0x130/0x170 [ 5.977860] vfs_mkdir+0x405/0x530 [ 5.977860] do_mkdirat+0x188/0x1f0 [ 5.977860] __x64_sys_mkdir+0x69/0x80 [ 5.977860] do_syscall_64+0x7d/0x100 [ 5.977860] ? do_syscall_64+0x89/0x100 [ 5.977860] ? do_syscall_64+0x89/0x100 [ 5.977860] ? do_syscall_64+0x89/0x100 [ 5.977860] ? do_syscall_64+0x89/0x100 [ 5.977860] entry_SYSCALL_64_after_hwframe+0x4b/0x53 [ 5.977860] RIP: 0033:0x7f297671defb [ 5.977860] Code: 8b 05 39 7f 0d 00 bb ff ff ff ff 64 c7 00 16 00 00 00 e9 61 ff ff ff e8 23 0c 02 00 0f 1f 00 f3 0f 1e fa b88 [ 5.977860] RSP: 002b:00007ffee6242bb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000053 [ 5.977860] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f297671defb [ 5.977860] RDX: 0000000000000000 RSI: 00000000000001ed RDI: 000055c6b449f0e0 [ 5.977860] RBP: 00007ffee6242bf0 R08: 000000000000000e R09: 0000000000000000 [ 5.977860] R10: 0000000000000000 R11: 0000000000000246 R12: 000055c6b445db80 [ 5.977860] R13: 00000000000003a0 R14: 00007f2976a68651 R15: 00000000000003a0 [ 5.977860] [ 5.977860] Modules linked in: [ 6.014095] ---[ end trace 0000000000000000 ]--- [ 6.014701] RIP: 0010:obj_cgroup_charge_pages+0x27/0x2d5 [ 6.015348] Code: 90 90 90 55 41 57 41 56 41 55 41 54 53 89 d5 41 89 f6 49 89 ff 48 b8 00 00 00 00 00 fc ff df 49 83 c7 10 4d3 [ 6.017575] RSP: 0018:ffffc9000001fb18 EFLAGS: 00010a02 [ 6.018255] RAX: dffffc0000000000 RBX: aaaaaaaaaaaaaaaa RCX: ffff8883eb9a8b08 [ 6.019120] RDX: 0000000000000005 RSI: 0000000000400cc0 RDI: aaaaaaaaaaaaaaaa [ 6.019983] RBP: 0000000000000005 R08: 3333333333333333 R09: 0000000000000000 [ 6.020849] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8883eb9a8b18 [ 6.021747] R13: 1555555555555557 R14: 0000000000400cc0 R15: aaaaaaaaaaaaaaba [ 6.022609] FS: 00007f2976438b40(0000) GS:ffff8883eb980000(0000) knlGS:0000000000000000 [ 6.023593] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6.024296] CR2: 00007f29769e0060 CR3: 0000000107222003 CR4: 0000000000370eb0 [ 6.025279] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 6.026139] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 6.027000] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b Actually the problem is caused by uninitialized local variable in current_obj_cgroup(). If the root memory cgroup is set as an active memory cgroup for a charging scope (as in the trace, where systemd tries to create the first non-root cgroup, so the parent cgroup is the root cgroup), the "for" loop is skipped and uninitialized objcg is returned, causing a panic down the accounting stack. The fix is trivial: initialize the objcg variable to NULL unconditionally before the "for" loop. [vbabka@suse.cz: remove redundant assignment] Link: https://lkml.kernel.org/r/4bd106d5-c3e3-6731-9a74-cff81e2392de@suse.cz Link: https://lkml.kernel.org/r/20231116025109.3775055-1-roman.gushchin@linux.dev Fixes: e86828e5446d ("mm: kmem: scoped objcg protection") Signed-off-by: Roman Gushchin (Cruise) Signed-off-by: Vlastimil Babka Reported-by: Erhard Furtner Closes: https://github.com/ClangBuiltLinux/linux/issues/1959 Tested-by: Erhard Furtner Acked-by: Vlastimil Babka Acked-by: Shakeel Butt Cc: David Rientjes Cc: Dennis Zhou Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1c1061df9cd1..b226090fd906 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3166,6 +3166,7 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void) return NULL; from_memcg: + objcg = NULL; for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) { /* * Memcg pointer is protected by scope (see set_active_memcg()) @@ -3176,7 +3177,6 @@ from_memcg: objcg = rcu_dereference_check(memcg->objcg, 1); if (likely(objcg)) break; - objcg = NULL; } return objcg; -- cgit From 0dff1b407def32452a0d80148172889862c64fe7 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 16 Nov 2023 15:15:45 -0500 Subject: mm/pagemap: fix ioctl(PAGEMAP_SCAN) on vma check Patch series "mm/pagemap: A few fixes to the recent PAGEMAP_SCAN". This series should fix two known reports from syzbot on the new PAGEMAP_SCAN ioctl(): https://lore.kernel.org/all/000000000000b0e576060a30ee3b@google.com/ https://lore.kernel.org/all/000000000000773fa7060a31e2cc@google.com/ The 3rd patch is something I found when testing these patches. This patch (of 3): The new ioctl(PAGEMAP_SCAN) relies on vma wr-protect capability provided by userfault, however in the vma test it didn't explicitly require the vma to have wr-protect function enabled, even if PM_SCAN_WP_MATCHING flag is set. It means the pagemap code can now apply uffd-wp bit to a page in the vma even if not registered to userfaultfd at all. Then in whatever way as long as the pte got written and page fault resolved, we'll apply the write bit even if uffd-wp bit is set. We'll see a pte that has both UFFD_WP and WRITE bit set. Anything later that looks up the pte for uffd-wp bit will trigger the warning: WARNING: CPU: 1 PID: 5071 at arch/x86/include/asm/pgtable.h:403 pte_uffd_wp arch/x86/include/asm/pgtable.h:403 [inline] Fix it by doing proper check over the vma attributes when PM_SCAN_WP_MATCHING is specified. Link: https://lkml.kernel.org/r/20231116201547.536857-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20231116201547.536857-2-peterx@redhat.com Fixes: 52526ca7fdb9 ("fs/proc/task_mmu: implement IOCTL to get and optionally clear info about PTEs") Signed-off-by: Peter Xu Reported-by: syzbot+e94c5aaf7890901ebf9b@syzkaller.appspotmail.com Reviewed-by: David Hildenbrand Reviewed-by: Andrei Vagin Reviewed-by: Muhammad Usama Anjum Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ef2eb12906da..a6f88cbfb689 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1982,15 +1982,31 @@ static int pagemap_scan_test_walk(unsigned long start, unsigned long end, struct pagemap_scan_private *p = walk->private; struct vm_area_struct *vma = walk->vma; unsigned long vma_category = 0; + bool wp_allowed = userfaultfd_wp_async(vma) && + userfaultfd_wp_use_markers(vma); - if (userfaultfd_wp_async(vma) && userfaultfd_wp_use_markers(vma)) - vma_category |= PAGE_IS_WPALLOWED; - else if (p->arg.flags & PM_SCAN_CHECK_WPASYNC) - return -EPERM; + if (!wp_allowed) { + /* User requested explicit failure over wp-async capability */ + if (p->arg.flags & PM_SCAN_CHECK_WPASYNC) + return -EPERM; + /* + * User requires wr-protect, and allows silently skipping + * unsupported vmas. + */ + if (p->arg.flags & PM_SCAN_WP_MATCHING) + return 1; + /* + * Then the request doesn't involve wr-protects at all, + * fall through to the rest checks, and allow vma walk. + */ + } if (vma->vm_flags & VM_PFNMAP) return 1; + if (wp_allowed) + vma_category |= PAGE_IS_WPALLOWED; + if (!pagemap_scan_is_interesting_vma(vma_category, p)) return 1; -- cgit From 4980e837cab7e2acc4dad18dba656c6896c531aa Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 16 Nov 2023 15:15:46 -0500 Subject: mm/pagemap: fix wr-protect even if PM_SCAN_WP_MATCHING not set The new pagemap ioctl contains a fast path for wr-protections without looking into category masks. It forgets to check PM_SCAN_WP_MATCHING before applying the wr-protections. It can cause, e.g., pte markers installed on archs that do not even support uffd wr-protect. WARNING: CPU: 0 PID: 5059 at mm/memory.c:1520 zap_pte_range mm/memory.c:1520 [inline] Link: https://lkml.kernel.org/r/20231116201547.536857-3-peterx@redhat.com Fixes: 12f6b01a0bcb ("fs/proc/task_mmu: add fast paths to get/clear PAGE_IS_WRITTEN flag") Signed-off-by: Peter Xu Reported-by: syzbot+7ca4b2719dc742b8d0a4@syzkaller.appspotmail.com Reviewed-by: David Hildenbrand Reviewed-by: Andrei Vagin Cc: Muhammad Usama Anjum Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index a6f88cbfb689..435b61054b5b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2156,7 +2156,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, return 0; } - if (!p->vec_out) { + if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) { /* Fast path for performing exclusive WP */ for (addr = start; addr != end; pte++, addr += PAGE_SIZE) { if (pte_uffd_wp(ptep_get(pte))) -- cgit From 3f3cac5c0a5c3b09bdfb919a7518dca83523a52c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 16 Nov 2023 15:15:47 -0500 Subject: mm/selftests: fix pagemap_ioctl memory map test __FILE__ is not guaranteed to exist in current dir. Replace that with argv[0] for memory map test. Link: https://lkml.kernel.org/r/20231116201547.536857-4-peterx@redhat.com Fixes: 46fd75d4a3c9 ("selftests: mm: add pagemap ioctl tests") Signed-off-by: Peter Xu Reviewed-by: David Hildenbrand Cc: Andrei Vagin Cc: David Hildenbrand Cc: Muhammad Usama Anjum Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/pagemap_ioctl.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index befab43719ba..d59517ed3d48 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -36,6 +36,7 @@ int pagemap_fd; int uffd; int page_size; int hpage_size; +const char *progname; #define LEN(region) ((region.end - region.start)/page_size) @@ -1149,11 +1150,11 @@ int sanity_tests(void) munmap(mem, mem_size); /* 9. Memory mapped file */ - fd = open(__FILE__, O_RDONLY); + fd = open(progname, O_RDONLY); if (fd < 0) ksft_exit_fail_msg("%s Memory mapped file\n", __func__); - ret = stat(__FILE__, &sbuf); + ret = stat(progname, &sbuf); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); @@ -1472,12 +1473,14 @@ static void transact_test(int page_size) extra_thread_faults); } -int main(void) +int main(int argc, char *argv[]) { int mem_size, shmid, buf_size, fd, i, ret; char *mem, *map, *fmem; struct stat sbuf; + progname = argv[0]; + ksft_print_header(); if (init_uffd()) -- cgit From eb66b8abae98f869c224f7c852b685ae02144564 Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Thu, 16 Nov 2023 11:13:52 +0800 Subject: squashfs: squashfs_read_data need to check if the length is 0 When the length passed in is 0, the pagemap_scan_test_walk() caller should bail. This error causes at least a WARN_ON(). Link: https://lkml.kernel.org/r/20231116031352.40853-1-lizhi.xu@windriver.com Reported-by: syzbot+32d3767580a1ea339a81@syzkaller.appspotmail.com Closes: https://lkml.kernel.org/r/0000000000000526f2060a30a085@google.com Signed-off-by: Lizhi Xu Reviewed-by: Phillip Lougher Signed-off-by: Andrew Morton --- fs/squashfs/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 581ce9519339..2dc730800f44 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -321,7 +321,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, TRACE("Block @ 0x%llx, %scompressed size %d\n", index - 2, compressed ? "" : "un", length); } - if (length < 0 || length > output->length || + if (length <= 0 || length > output->length || (index + length) > msblk->bytes_used) { res = -EIO; goto out; -- cgit From 9aa1345d66b8132745ffb99b348b1492088da9e2 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 17 Nov 2023 00:49:18 -0800 Subject: mm: fix oops when filemap_map_pmd() without prealloc_pte MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit syzbot reports oops in lockdep's __lock_acquire(), called from __pte_offset_map_lock() called from filemap_map_pages(); or when I run the repro, the oops comes in pmd_install(), called from filemap_map_pmd() called from filemap_map_pages(), just before the __pte_offset_map_lock(). The problem is that filemap_map_pmd() has been assuming that when it finds pmd_none(), a page table has already been prepared in prealloc_pte; and indeed do_fault_around() has been careful to preallocate one there, when it finds pmd_none(): but what if *pmd became none in between? My 6.6 mods in mm/khugepaged.c, avoiding mmap_lock for write, have made it easy for *pmd to be cleared while servicing a page fault; but even before those, a huge *pmd might be zapped while a fault is serviced. The difference in symptomatic stack traces comes from the "memory model" in use: pmd_install() uses pmd_populate() uses page_to_pfn(): in some models that is strict, and will oops on the NULL prealloc_pte; in other models, it will construct a bogus value to be populated into *pmd, then __pte_offset_map_lock() oops when trying to access split ptlock pointer (or some other symptom in normal case of ptlock embedded not pointer). Link: https://lore.kernel.org/linux-mm/20231115065506.19780-1-jose.pekkarinen@foxhound.fi/ Link: https://lkml.kernel.org/r/6ed0c50c-78ef-0719-b3c5-60c0c010431c@google.com Fixes: f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") Signed-off-by: Hugh Dickins Reported-and-tested-by: syzbot+89edd67979b52675ddec@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-mm/0000000000005e44550608a0806c@google.com/ Reviewed-by: David Hildenbrand Cc: Jann Horn , Cc: José Pekkarinen Cc: Kirill A. Shutemov Cc: Matthew Wilcox (Oracle) Cc: [5.12+] Signed-off-by: Andrew Morton --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 32eedf3afd45..f1c8c278310f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3371,7 +3371,7 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct folio *folio, } } - if (pmd_none(*vmf->pmd)) + if (pmd_none(*vmf->pmd) && vmf->prealloc_pte) pmd_install(mm, vmf->pmd, &vmf->prealloc_pte); return false; -- cgit From c540b03828dff96e1b36e9e75714f0a8e807a92e Mon Sep 17 00:00:00 2001 From: Chester Lin Date: Fri, 17 Nov 2023 10:28:07 +0800 Subject: .mailmap: add a new address mapping for Chester Lin My company email address is going to be disabled so let's create a mapping that links to my private/community email just in case people might still try to reach me via the old one. Link: https://lkml.kernel.org/r/20231117022807.29461-1-clin@suse.com Signed-off-by: Chester Lin Cc: Chester Lin Cc: Bjorn Andersson Cc: Greg Kroah-Hartman Cc: Heiko Stuebner Cc: Jakub Kicinski Cc: Konrad Dybcio Cc: Oleksij Rempel Cc: Stephen Hemminger Cc: Conor Dooley Cc: Matthias Brugger Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 43031441b2d9..19eb49e55836 100644 --- a/.mailmap +++ b/.mailmap @@ -117,6 +117,7 @@ Changbin Du Changbin Du Chao Yu Chao Yu +Chester Lin Chris Chiu Chris Chiu Chris Lew -- cgit From 001002e73712cdf6b8d9a103648cda3040ad7647 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Mon, 20 Nov 2023 15:53:52 +0100 Subject: mm/memory_hotplug: add missing mem_hotplug_lock From Documentation/core-api/memory-hotplug.rst: When adding/removing/onlining/offlining memory or adding/removing heterogeneous/device memory, we should always hold the mem_hotplug_lock in write mode to serialise memory hotplug (e.g. access to global/zone variables). mhp_(de)init_memmap_on_memory() functions can change zone stats and struct page content, but they are currently called w/o the mem_hotplug_lock. When memory block is being offlined and when kmemleak goes through each populated zone, the following theoretical race conditions could occur: CPU 0: | CPU 1: memory_offline() | -> offline_pages() | -> mem_hotplug_begin() | ... | -> mem_hotplug_done() | | kmemleak_scan() | -> get_online_mems() | ... -> mhp_deinit_memmap_on_memory() | [not protected by mem_hotplug_begin/done()]| Marks memory section as offline, | Retrieves zone_start_pfn poisons vmemmap struct pages and updates | and struct page members. the zone related data | | ... | -> put_online_mems() Fix this by ensuring mem_hotplug_lock is taken before performing mhp_init_memmap_on_memory(). Also ensure that mhp_deinit_memmap_on_memory() holds the lock. online/offline_pages() are currently only called from memory_block_online/offline(), so it is safe to move the locking there. Link: https://lkml.kernel.org/r/20231120145354.308999-2-sumanthk@linux.ibm.com Fixes: a08a2ae34613 ("mm,memory_hotplug: allocate memmap from the added memory range") Signed-off-by: Sumanth Korikkar Reviewed-by: Gerald Schaefer Acked-by: David Hildenbrand Cc: Alexander Gordeev Cc: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Heiko Carstens Cc: Michal Hocko Cc: Oscar Salvador Cc: Vasily Gorbik Cc: kernel test robot Cc: [5.15+] Signed-off-by: Andrew Morton --- drivers/base/memory.c | 18 +++++++++++++++--- mm/memory_hotplug.c | 13 ++++++------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index f3b9a4d0fa3b..8a13babd826c 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -180,6 +180,9 @@ static inline unsigned long memblk_nr_poison(struct memory_block *mem) } #endif +/* + * Must acquire mem_hotplug_lock in write mode. + */ static int memory_block_online(struct memory_block *mem) { unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); @@ -204,10 +207,11 @@ static int memory_block_online(struct memory_block *mem) if (mem->altmap) nr_vmemmap_pages = mem->altmap->free; + mem_hotplug_begin(); if (nr_vmemmap_pages) { ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone); if (ret) - return ret; + goto out; } ret = online_pages(start_pfn + nr_vmemmap_pages, @@ -215,7 +219,7 @@ static int memory_block_online(struct memory_block *mem) if (ret) { if (nr_vmemmap_pages) mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages); - return ret; + goto out; } /* @@ -227,9 +231,14 @@ static int memory_block_online(struct memory_block *mem) nr_vmemmap_pages); mem->zone = zone; +out: + mem_hotplug_done(); return ret; } +/* + * Must acquire mem_hotplug_lock in write mode. + */ static int memory_block_offline(struct memory_block *mem) { unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); @@ -247,6 +256,7 @@ static int memory_block_offline(struct memory_block *mem) if (mem->altmap) nr_vmemmap_pages = mem->altmap->free; + mem_hotplug_begin(); if (nr_vmemmap_pages) adjust_present_page_count(pfn_to_page(start_pfn), mem->group, -nr_vmemmap_pages); @@ -258,13 +268,15 @@ static int memory_block_offline(struct memory_block *mem) if (nr_vmemmap_pages) adjust_present_page_count(pfn_to_page(start_pfn), mem->group, nr_vmemmap_pages); - return ret; + goto out; } if (nr_vmemmap_pages) mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages); mem->zone = NULL; +out: + mem_hotplug_done(); return ret; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ab41a511e20a..bb908289679e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1129,6 +1129,9 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages) kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages)); } +/* + * Must be called with mem_hotplug_lock in write mode. + */ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group) { @@ -1149,7 +1152,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, !IS_ALIGNED(pfn + nr_pages, PAGES_PER_SECTION))) return -EINVAL; - mem_hotplug_begin(); /* associate pfn range with the zone */ move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE); @@ -1208,7 +1210,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, writeback_set_ratelimit(); memory_notify(MEM_ONLINE, &arg); - mem_hotplug_done(); return 0; failed_addition: @@ -1217,7 +1218,6 @@ failed_addition: (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1); memory_notify(MEM_CANCEL_ONLINE, &arg); remove_pfn_range_from_zone(zone, pfn, nr_pages); - mem_hotplug_done(); return ret; } @@ -1863,6 +1863,9 @@ static int count_system_ram_pages_cb(unsigned long start_pfn, return 0; } +/* + * Must be called with mem_hotplug_lock in write mode. + */ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group) { @@ -1885,8 +1888,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, !IS_ALIGNED(start_pfn + nr_pages, PAGES_PER_SECTION))) return -EINVAL; - mem_hotplug_begin(); - /* * Don't allow to offline memory blocks that contain holes. * Consequently, memory blocks with holes can never get onlined @@ -2031,7 +2032,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, memory_notify(MEM_OFFLINE, &arg); remove_pfn_range_from_zone(zone, start_pfn, nr_pages); - mem_hotplug_done(); return 0; failed_removal_isolated: @@ -2046,7 +2046,6 @@ failed_removal: (unsigned long long) start_pfn << PAGE_SHIFT, ((unsigned long long) end_pfn << PAGE_SHIFT) - 1, reason); - mem_hotplug_done(); return ret; } -- cgit From f42ce5f087eb69e47294ababd2e7e6f88a82d308 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Mon, 20 Nov 2023 15:53:53 +0100 Subject: mm/memory_hotplug: fix error handling in add_memory_resource() In add_memory_resource(), creation of memory block devices occurs after successful call to arch_add_memory(). However, creation of memory block devices could fail. In that case, arch_remove_memory() is called to perform necessary cleanup. Currently with or without altmap support, arch_remove_memory() is always passed with altmap set to NULL during error handling. This leads to freeing of struct pages using free_pages(), eventhough the allocation might have been performed with altmap support via altmap_alloc_block_buf(). Fix the error handling by passing altmap in arch_remove_memory(). This ensures the following: * When altmap is disabled, deallocation of the struct pages array occurs via free_pages(). * When altmap is enabled, deallocation occurs via vmem_altmap_free(). Link: https://lkml.kernel.org/r/20231120145354.308999-3-sumanthk@linux.ibm.com Fixes: a08a2ae34613 ("mm,memory_hotplug: allocate memmap from the added memory range") Signed-off-by: Sumanth Korikkar Reviewed-by: Gerald Schaefer Acked-by: David Hildenbrand Cc: Alexander Gordeev Cc: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Heiko Carstens Cc: kernel test robot Cc: Michal Hocko Cc: Oscar Salvador Cc: Vasily Gorbik Cc: [5.15+] Signed-off-by: Andrew Morton --- mm/memory_hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index bb908289679e..7a5fc89a8652 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1458,7 +1458,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) /* create memory block devices after memory was added */ ret = create_memory_block_devices(start, size, params.altmap, group); if (ret) { - arch_remove_memory(start, size, NULL); + arch_remove_memory(start, size, params.altmap); goto error_free; } -- cgit From ee34db3f271cea4d4252048617919c2caafe698b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Nov 2023 19:37:17 +0100 Subject: checkstack: fix printed address All addresses printed by checkstack have an extra incorrect 0 appended at the end. This was introduced with commit 677f1410e058 ("scripts/checkstack.pl: don't display $dre as different entity"): since then the address is taken from the line which contains the function name, instead of the line which contains stack consumption. E.g. on s390: 0000000000100a30 : ... 100a44: e3 f0 ff 70 ff 71 lay %r15,-144(%r15) So the used regex which matches spaces and hexadecimal numbers to extract an address now matches a different substring. Subsequently replacing spaces with 0 appends a zero at the and, instead of replacing leading spaces. Fix this by using the proper regex, and simplify the code a bit. Link: https://lkml.kernel.org/r/20231120183719.2188479-2-hca@linux.ibm.com Fixes: 677f1410e058 ("scripts/checkstack.pl: don't display $dre as different entity") Signed-off-by: Heiko Carstens Cc: Maninder Singh Cc: Masahiro Yamada Cc: Vaneet Narang Cc: Signed-off-by: Andrew Morton --- scripts/checkstack.pl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index 84f5fb7f1cec..ac74f8629cea 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -139,15 +139,11 @@ $total_size = 0; while (my $line = ) { if ($line =~ m/$funcre/) { $func = $1; - next if $line !~ m/^($xs*)/; + next if $line !~ m/^($x*)/; if ($total_size > $min_stack) { push @stack, "$intro$total_size\n"; } - - $addr = $1; - $addr =~ s/ /0/g; - $addr = "0x$addr"; - + $addr = "0x$1"; $intro = "$addr $func [$file]:"; my $padlen = 56 - length($intro); while ($padlen > 0) { -- cgit From 0263f92fadbb9d294d5971ac57743f882c93b2b3 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 20 Nov 2023 16:35:59 +0800 Subject: lib/group_cpus.c: avoid acquiring cpu hotplug lock in group_cpus_evenly group_cpus_evenly() could be part of storage driver's error handler, such as nvme driver, when may happen during CPU hotplug, in which storage queue has to drain its pending IOs because all CPUs associated with the queue are offline and the queue is becoming inactive. And handling IO needs error handler to provide forward progress. Then deadlock is caused: 1) inside CPU hotplug handler, CPU hotplug lock is held, and blk-mq's handler is waiting for inflight IO 2) error handler is waiting for CPU hotplug lock 3) inflight IO can't be completed in blk-mq's CPU hotplug handler because error handling can't provide forward progress. Solve the deadlock by not holding CPU hotplug lock in group_cpus_evenly(), in which two stage spreads are taken: 1) the 1st stage is over all present CPUs; 2) the end stage is over all other CPUs. Turns out the two stage spread just needs consistent 'cpu_present_mask', and remove the CPU hotplug lock by storing it into one local cache. This way doesn't change correctness, because all CPUs are still covered. Link: https://lkml.kernel.org/r/20231120083559.285174-1-ming.lei@redhat.com Signed-off-by: Ming Lei Reported-by: Yi Zhang Reported-by: Guangwu Zhang Tested-by: Guangwu Zhang Reviewed-by: Chengming Zhou Reviewed-by: Jens Axboe Cc: Keith Busch Cc: Signed-off-by: Andrew Morton --- lib/group_cpus.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/group_cpus.c b/lib/group_cpus.c index aa3f6815bb12..ee272c4cefcc 100644 --- a/lib/group_cpus.c +++ b/lib/group_cpus.c @@ -366,13 +366,25 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) if (!masks) goto fail_node_to_cpumask; - /* Stabilize the cpumasks */ - cpus_read_lock(); build_node_to_cpumask(node_to_cpumask); + /* + * Make a local cache of 'cpu_present_mask', so the two stages + * spread can observe consistent 'cpu_present_mask' without holding + * cpu hotplug lock, then we can reduce deadlock risk with cpu + * hotplug code. + * + * Here CPU hotplug may happen when reading `cpu_present_mask`, and + * we can live with the case because it only affects that hotplug + * CPU is handled in the 1st or 2nd stage, and either way is correct + * from API user viewpoint since 2-stage spread is sort of + * optimization. + */ + cpumask_copy(npresmsk, data_race(cpu_present_mask)); + /* grouping present CPUs first */ ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask, - cpu_present_mask, nmsk, masks); + npresmsk, nmsk, masks); if (ret < 0) goto fail_build_affinity; nr_present = ret; @@ -387,15 +399,13 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) curgrp = 0; else curgrp = nr_present; - cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); + cpumask_andnot(npresmsk, cpu_possible_mask, npresmsk); ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask, npresmsk, nmsk, masks); if (ret >= 0) nr_others = ret; fail_build_affinity: - cpus_read_unlock(); - if (ret >= 0) WARN_ON(nr_present + nr_others < numgrps); -- cgit From 1f3730fd9e8d4d77fb99c60d0e6ad4b1104e7e04 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 19 Nov 2023 17:15:28 +0000 Subject: mm/damon/core: copy nr_accesses when splitting region Regions split function ('damon_split_region_at()') is called at the beginning of an aggregation interval, and when DAMOS applying the actions and charging quota. Because 'nr_accesses' fields of all regions are reset at the beginning of each aggregation interval, and DAMOS was applying the action at the end of each aggregation interval, there was no need to copy the 'nr_accesses' field to the split-out region. However, commit 42f994b71404 ("mm/damon/core: implement scheme-specific apply interval") made DAMOS applies action on its own timing interval. Hence, 'nr_accesses' should also copied to split-out regions, but the commit didn't. Fix it by copying it. Link: https://lkml.kernel.org/r/20231119171529.66863-1-sj@kernel.org Fixes: 42f994b71404 ("mm/damon/core: implement scheme-specific apply interval") Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index 6262d55904e7..ce1562783e7e 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1225,6 +1225,7 @@ static void damon_split_region_at(struct damon_target *t, new->age = r->age; new->last_nr_accesses = r->last_nr_accesses; new->nr_accesses_bp = r->nr_accesses_bp; + new->nr_accesses = r->nr_accesses; damon_insert_region(new, r, damon_next_region(r), t); } -- cgit From f39fb633fe9b9a4a7572ec32debf766d971a500b Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Mon, 20 Nov 2023 15:29:08 -0700 Subject: selftests/mm: prevent duplicate runs caused by TEST_GEN_PROGS Commit 05f1edac8009 ("selftests/mm: run all tests from run_vmtests.sh") fixed the inconsistency caused by tests being defined as TEST_GEN_PROGS. This issue was leading to tests not being executed via run_vmtests.sh and furthermore some tests running twice due to the kselftests wrapper also executing them. Fix the definition of two tests (soft-dirty and pagemap_ioctl) that are still incorrectly defined. Link: https://lkml.kernel.org/r/20231120222908.28559-1-npache@redhat.com Signed-off-by: Nico Pache Reviewed-by: David Hildenbrand Cc: Joel Savitz Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 78dfec8bc676..dede0bcf97a3 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -60,7 +60,7 @@ TEST_GEN_FILES += mrelease_test TEST_GEN_FILES += mremap_dontunmap TEST_GEN_FILES += mremap_test TEST_GEN_FILES += on-fault-limit -TEST_GEN_PROGS += pagemap_ioctl +TEST_GEN_FILES += pagemap_ioctl TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress @@ -72,7 +72,7 @@ TEST_GEN_FILES += mdwe_test TEST_GEN_FILES += hugetlb_fault_after_madv ifneq ($(ARCH),arm64) -TEST_GEN_PROGS += soft-dirty +TEST_GEN_FILES += soft-dirty endif ifeq ($(ARCH),x86_64) -- cgit From 97219cc358ad156ec5b170c2ed6f44278932c63c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 23 Nov 2023 17:42:04 -0500 Subject: mm/Kconfig: make userfaultfd a menuconfig PTE_MARKER_UFFD_WP is a subconfig for userfaultfd. To make it clear, switch to use menuconfig for userfaultfd. Link: https://lkml.kernel.org/r/20231123224204.1060152-1-peterx@redhat.com Signed-off-by: Peter Xu Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: Mike Rapoport (IBM) Cc: Peter Xu Signed-off-by: Andrew Morton --- mm/Kconfig | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index 89971a894b60..57cd378c73d6 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1201,13 +1201,6 @@ config ANON_VMA_NAME area from being merged with adjacent virtual memory areas due to the difference in their name. -config USERFAULTFD - bool "Enable userfaultfd() system call" - depends on MMU - help - Enable the userfaultfd() system call that allows to intercept and - handle page faults in userland. - config HAVE_ARCH_USERFAULTFD_WP bool help @@ -1218,6 +1211,14 @@ config HAVE_ARCH_USERFAULTFD_MINOR help Arch has userfaultfd minor fault support +menuconfig USERFAULTFD + bool "Enable userfaultfd() system call" + depends on MMU + help + Enable the userfaultfd() system call that allows to intercept and + handle page faults in userland. + +if USERFAULTFD config PTE_MARKER_UFFD_WP bool "Userfaultfd write protection support for shmem/hugetlbfs" default y @@ -1227,6 +1228,7 @@ config PTE_MARKER_UFFD_WP Allows to create marker PTEs for userfaultfd write protection purposes. It is required to enable userfaultfd write protection on file-backed memory types like shmem and hugetlbfs. +endif # USERFAULTFD # multi-gen LRU { config LRU_GEN -- cgit From 854f2764b5771e450b44e6a5fddb6872deb5bb56 Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Mon, 27 Nov 2023 15:04:01 +0800 Subject: scripts/gdb/tasks: fix lx-ps command error Since commit 8e1f385104ac ("kill task_struct->thread_group") remove the thread_group, we will encounter below issue. (gdb) lx-ps TASK PID COMM 0xffff800086503340 0 swapper/0 Python Exception : There is no member named thread_group. Error occurred in Python: There is no member named thread_group. We use signal->thread_head to iterate all threads instead. [Kuan-Ying.Lee@mediatek.com: v2] Link: https://lkml.kernel.org/r/20231129065142.13375-2-Kuan-Ying.Lee@mediatek.com Link: https://lkml.kernel.org/r/20231127070404.4192-2-Kuan-Ying.Lee@mediatek.com Fixes: 8e1f385104ac ("kill task_struct->thread_group") Signed-off-by: Kuan-Ying Lee Acked-by: Oleg Nesterov Tested-by: Florian Fainelli Cc: AngeloGioacchino Del Regno Cc: Chinwen Chang Cc: Kuan-Ying Lee Cc: Matthias Brugger Cc: Qun-Wei Lin Cc: Andrey Konovalov Signed-off-by: Andrew Morton --- scripts/gdb/linux/tasks.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py index 17ec19e9b5bf..aa5ab6251f76 100644 --- a/scripts/gdb/linux/tasks.py +++ b/scripts/gdb/linux/tasks.py @@ -13,7 +13,7 @@ import gdb -from linux import utils +from linux import utils, lists task_type = utils.CachedType("struct task_struct") @@ -22,19 +22,15 @@ task_type = utils.CachedType("struct task_struct") def task_lists(): task_ptr_type = task_type.get_type().pointer() init_task = gdb.parse_and_eval("init_task").address - t = g = init_task + t = init_task while True: - while True: - yield t + thread_head = t['signal']['thread_head'] + for thread in lists.list_for_each_entry(thread_head, task_ptr_type, 'thread_node'): + yield thread - t = utils.container_of(t['thread_group']['next'], - task_ptr_type, "thread_group") - if t == g: - break - - t = g = utils.container_of(g['tasks']['next'], - task_ptr_type, "tasks") + t = utils.container_of(t['tasks']['next'], + task_ptr_type, "tasks") if t == init_task: return -- cgit From 7d6fa31a2fd7072376b3c8be66bf8527b2c8208c Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 24 Nov 2023 21:38:40 +0000 Subject: mm/damon/sysfs-schemes: add timeout for update_schemes_tried_regions If a scheme is set to not applied to any monitoring target region for any reasons including the target access pattern, quota, filters, or watermarks, writing 'update_schemes_tried_regions' to 'state' DAMON sysfs file can indefinitely hang. Fix the case by implementing a timeout for the operation. The time limit is two apply intervals of each scheme. Link: https://lkml.kernel.org/r/20231124213840.39157-1-sj@kernel.org Fixes: 4d4e41b68299 ("mm/damon/sysfs-schemes: do not update tried regions more than one DAMON snapshot") Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 49 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index be667236b8e6..fe0fe2562000 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -139,6 +139,13 @@ static const struct kobj_type damon_sysfs_scheme_region_ktype = { * damon_sysfs_before_damos_apply() understands the situation by showing the * 'finished' status and do nothing. * + * If DAMOS is not applied to any region due to any reasons including the + * access pattern, the watermarks, the quotas, and the filters, + * ->before_damos_apply() will not be called back. Until the situation is + * changed, the update will not be finished. To avoid this, + * damon_sysfs_after_sampling() set the status as 'finished' if more than two + * apply intervals of the scheme is passed while the state is 'idle'. + * * Finally, the tried regions request handling finisher function * (damon_sysfs_schemes_update_regions_stop()) unregisters the callbacks. */ @@ -154,6 +161,7 @@ struct damon_sysfs_scheme_regions { int nr_regions; unsigned long total_bytes; enum damos_sysfs_regions_upd_status upd_status; + unsigned long upd_timeout_jiffies; }; static struct damon_sysfs_scheme_regions * @@ -1854,7 +1862,9 @@ static int damon_sysfs_after_sampling(struct damon_ctx *ctx) for (i = 0; i < sysfs_schemes->nr; i++) { sysfs_regions = sysfs_schemes->schemes_arr[i]->tried_regions; if (sysfs_regions->upd_status == - DAMOS_TRIED_REGIONS_UPD_STARTED) + DAMOS_TRIED_REGIONS_UPD_STARTED || + time_after(jiffies, + sysfs_regions->upd_timeout_jiffies)) sysfs_regions->upd_status = DAMOS_TRIED_REGIONS_UPD_FINISHED; } @@ -1885,14 +1895,41 @@ int damon_sysfs_schemes_clear_regions( return 0; } +static struct damos *damos_sysfs_nth_scheme(int n, struct damon_ctx *ctx) +{ + struct damos *scheme; + int i = 0; + + damon_for_each_scheme(scheme, ctx) { + if (i == n) + return scheme; + i++; + } + return NULL; +} + static void damos_tried_regions_init_upd_status( - struct damon_sysfs_schemes *sysfs_schemes) + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx) { int i; + struct damos *scheme; + struct damon_sysfs_scheme_regions *sysfs_regions; - for (i = 0; i < sysfs_schemes->nr; i++) - sysfs_schemes->schemes_arr[i]->tried_regions->upd_status = - DAMOS_TRIED_REGIONS_UPD_IDLE; + for (i = 0; i < sysfs_schemes->nr; i++) { + sysfs_regions = sysfs_schemes->schemes_arr[i]->tried_regions; + scheme = damos_sysfs_nth_scheme(i, ctx); + if (!scheme) { + sysfs_regions->upd_status = + DAMOS_TRIED_REGIONS_UPD_FINISHED; + continue; + } + sysfs_regions->upd_status = DAMOS_TRIED_REGIONS_UPD_IDLE; + sysfs_regions->upd_timeout_jiffies = jiffies + + 2 * usecs_to_jiffies(scheme->apply_interval_us ? + scheme->apply_interval_us : + ctx->attrs.sample_interval); + } } /* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */ @@ -1902,7 +1939,7 @@ int damon_sysfs_schemes_update_regions_start( { damon_sysfs_schemes_clear_regions(sysfs_schemes, ctx); damon_sysfs_schemes_for_damos_callback = sysfs_schemes; - damos_tried_regions_init_upd_status(sysfs_schemes); + damos_tried_regions_init_upd_status(sysfs_schemes, ctx); damos_regions_upd_total_bytes_only = total_bytes_only; ctx->callback.before_damos_apply = damon_sysfs_before_damos_apply; ctx->callback.after_sampling = damon_sysfs_after_sampling; -- cgit From 4e9e2e4c65136dfd32dd0afe555961433d1cf906 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 28 Nov 2023 13:52:48 +0800 Subject: drivers/base/cpu: crash data showing should depends on KEXEC_CORE After commit 88a6f8994421 ("crash: memory and CPU hotplug sysfs attributes"), on x86_64, if only below kernel configs related to kdump are set, compiling error are triggered. ---- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y CONFIG_CRASH_HOTPLUG=y ------ ------------------------------------------------------ drivers/base/cpu.c: In function `crash_hotplug_show': drivers/base/cpu.c:309:40: error: implicit declaration of function `crash_hotplug_cpu_support'; did you mean `crash_hotplug_show'? [-Werror=implicit-function-declaration] 309 | return sysfs_emit(buf, "%d\n", crash_hotplug_cpu_support()); | ^~~~~~~~~~~~~~~~~~~~~~~~~ | crash_hotplug_show cc1: some warnings being treated as errors ------------------------------------------------------ CONFIG_KEXEC is used to enable kexec_load interface, the crash_notes/crash_notes_size/crash_hotplug showing depends on CONFIG_KEXEC is incorrect. It should depend on KEXEC_CORE instead. Fix it now. Link: https://lkml.kernel.org/r/20231128055248.659808-1-bhe@redhat.com Fixes: 88a6f8994421 ("crash: memory and CPU hotplug sysfs attributes") Signed-off-by: Baoquan He Tested-by: Ignat Korchagin [compile-time only] Tested-by: Alexander Gordeev Reviewed-by: Eric DeVolder Cc: Signed-off-by: Andrew Morton --- drivers/base/cpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 9ea22e165acd..548491de818e 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -144,7 +144,7 @@ static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store); #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ #endif /* CONFIG_HOTPLUG_CPU */ -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE #include static ssize_t crash_notes_show(struct device *dev, @@ -189,14 +189,14 @@ static const struct attribute_group crash_note_cpu_attr_group = { #endif static const struct attribute_group *common_cpu_attr_groups[] = { -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE &crash_note_cpu_attr_group, #endif NULL }; static const struct attribute_group *hotplugable_cpu_attr_groups[] = { -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE &crash_note_cpu_attr_group, #endif NULL -- cgit From 8e92157d7f6190c86bfd6144a409001469827100 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 28 Nov 2023 19:44:03 +0200 Subject: units: add missing header BITS_PER_BYTE is defined in bits.h. Link: https://lkml.kernel.org/r/20231128174404.393393-1-andriy.shevchenko@linux.intel.com Fixes: e8eed5f7366f ("units: Add BYTES_PER_*BIT") Signed-off-by: Andy Shevchenko Reviewed-by: Randy Dunlap Cc: Damian Muszynski Cc: Rasmus Villemoes Cc: Herbert Xu Signed-off-by: Andrew Morton --- include/linux/units.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/units.h b/include/linux/units.h index ff1bd6b5f5b3..45110daaf8d3 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -2,6 +2,7 @@ #ifndef _LINUX_UNITS_H #define _LINUX_UNITS_H +#include #include /* Metric prefixes in accordance with Système international (d'unités) */ -- cgit From dccf78d39f1069a5ddf4328bf0c97aa5f2f4296e Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 28 Nov 2023 13:44:57 +0800 Subject: kernel/Kconfig.kexec: drop select of KEXEC for CRASH_DUMP Ignat Korchagin complained that a potential config regression was introduced by commit 89cde455915f ("kexec: consolidate kexec and crash options into kernel/Kconfig.kexec"). Before the commit, CONFIG_CRASH_DUMP has no dependency on CONFIG_KEXEC. After the commit, CRASH_DUMP selects KEXEC. That enforces system to have CONFIG_KEXEC=y as long as CONFIG_CRASH_DUMP=Y which people may not want. In Ignat's case, he sets CONFIG_CRASH_DUMP=y, CONFIG_KEXEC_FILE=y and CONFIG_KEXEC=n because kexec_load interface could have security issue if kernel/initrd has no chance to be signed and verified. CRASH_DUMP has select of KEXEC because Eric, author of above commit, met a LKP report of build failure when posting patch of earlier version. Please see below link to get detail of the LKP report: https://lore.kernel.org/all/3e8eecd1-a277-2cfb-690e-5de2eb7b988e@oracle.com/T/#u In fact, that LKP report is triggered because arm's is wrapped in CONFIG_KEXEC ifdeffery scope. That is wrong. CONFIG_KEXEC controls the enabling/disabling of kexec_load interface, but not kexec feature. Removing the wrongly added CONFIG_KEXEC ifdeffery scope in of arm allows us to drop the select KEXEC for CRASH_DUMP. Meanwhile, change arch/arm/kernel/Makefile to let machine_kexec.o relocate_kernel.o depend on KEXEC_CORE. Link: https://lkml.kernel.org/r/20231128054457.659452-1-bhe@redhat.com Fixes: 89cde455915f ("kexec: consolidate kexec and crash options into kernel/Kconfig.kexec") Signed-off-by: Baoquan He Reported-by: Ignat Korchagin Tested-by: Ignat Korchagin [compile-time only] Tested-by: Alexander Gordeev Reviewed-by: Eric DeVolder Tested-by: Eric DeVolder Signed-off-by: Andrew Morton --- arch/arm/include/asm/kexec.h | 4 ---- arch/arm/kernel/Makefile | 2 +- kernel/Kconfig.kexec | 1 - 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h index e62832dcba76..a8287e7ab9d4 100644 --- a/arch/arm/include/asm/kexec.h +++ b/arch/arm/include/asm/kexec.h @@ -2,8 +2,6 @@ #ifndef _ARM_KEXEC_H #define _ARM_KEXEC_H -#ifdef CONFIG_KEXEC - /* Maximum physical address we can use pages from */ #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) /* Maximum address we can reach in physical address mode */ @@ -82,6 +80,4 @@ static inline struct page *boot_pfn_to_page(unsigned long boot_pfn) #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_KEXEC */ - #endif /* _ARM_KEXEC_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index d53f56d6f840..771264d4726a 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -59,7 +59,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += entry-ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o patch.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o patch.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o # Main staffs in KPROBES are in arch/arm/probes/ . obj-$(CONFIG_KPROBES) += patch.o insn.o obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 7aff28ded2f4..1cc3b1c595d7 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -97,7 +97,6 @@ config CRASH_DUMP depends on ARCH_SUPPORTS_KEXEC select CRASH_CORE select KEXEC_CORE - select KEXEC help Generate crash dump after being started by kexec. This should be normally only set in special crash dump kernels -- cgit From d61d0ab573649789bf9eb909c89a1a193b2e3d10 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 29 Nov 2023 23:15:47 +0900 Subject: nilfs2: fix missing error check for sb_set_blocksize call When mounting a filesystem image with a block size larger than the page size, nilfs2 repeatedly outputs long error messages with stack traces to the kernel log, such as the following: getblk(): invalid block size 8192 requested logical block size: 512 ... Call Trace: dump_stack_lvl+0x92/0xd4 dump_stack+0xd/0x10 bdev_getblk+0x33a/0x354 __breadahead+0x11/0x80 nilfs_search_super_root+0xe2/0x704 [nilfs2] load_nilfs+0x72/0x504 [nilfs2] nilfs_mount+0x30f/0x518 [nilfs2] legacy_get_tree+0x1b/0x40 vfs_get_tree+0x18/0xc4 path_mount+0x786/0xa88 __ia32_sys_mount+0x147/0x1a8 __do_fast_syscall_32+0x56/0xc8 do_fast_syscall_32+0x29/0x58 do_SYSENTER_32+0x15/0x18 entry_SYSENTER_32+0x98/0xf1 ... This overloads the system logger. And to make matters worse, it sometimes crashes the kernel with a memory access violation. This is because the return value of the sb_set_blocksize() call, which should be checked for errors, is not checked. The latter issue is due to out-of-buffer memory being accessed based on a large block size that caused sb_set_blocksize() to fail for buffers read with the initial minimum block size that remained unupdated in the super_block structure. Since nilfs2 mkfs tool does not accept block sizes larger than the system page size, this has been overlooked. However, it is possible to create this situation by intentionally modifying the tool or by passing a filesystem image created on a system with a large page size to a system with a smaller page size and mounting it. Fix this issue by inserting the expected error handling for the call to sb_set_blocksize(). Link: https://lkml.kernel.org/r/20231129141547.4726-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/the_nilfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 0f0667957c81..71400496ed36 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -716,7 +716,11 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) goto failed_sbh; } nilfs_release_super_block(nilfs); - sb_set_blocksize(sb, blocksize); + if (!sb_set_blocksize(sb, blocksize)) { + nilfs_err(sb, "bad blocksize %d", blocksize); + err = -EINVAL; + goto out; + } err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp); if (err) -- cgit From 73424d00dc63ba681856e06cfb0a5abbdb62e2b5 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 30 Nov 2023 11:40:18 +0800 Subject: highmem: fix a memory copy problem in memcpy_from_folio Clang static checker complains that value stored to 'from' is never read. And memcpy_from_folio() only copy the last chunk memory from folio to destination. Use 'to += chunk' to replace 'from += chunk' to fix this typo problem. Link: https://lkml.kernel.org/r/20231130034017.1210429-1-suhui@nfschina.com Fixes: b23d03ef7af5 ("highmem: add memcpy_to_folio() and memcpy_from_folio()") Signed-off-by: Su Hui Reviewed-by: Matthew Wilcox (Oracle) Cc: Ira Weiny Cc: Jiaqi Yan Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Collingbourne Cc: Tom Rix Cc: Tony Luck Cc: Signed-off-by: Andrew Morton --- include/linux/highmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 4cacc0e43b51..be20cff4ba73 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -454,7 +454,7 @@ static inline void memcpy_from_folio(char *to, struct folio *folio, memcpy(to, from, chunk); kunmap_local(from); - from += chunk; + to += chunk; offset += chunk; len -= chunk; } while (len > 0); -- cgit From bc220fe70919d6500811e5e1e07aff43e137065a Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Thu, 30 Nov 2023 15:38:48 +0700 Subject: MAINTAINERS: drop Antti Palosaari MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit He is currently inactive (last message from him is two years ago [1]). His media tree [2] is also dormant (latest activity is 6 years ago), yet his site is still online [3]. Drop him from MAINTAINERS and add CREDITS entry for him. We thank him for maintaining various DVB drivers. [1]: https://lore.kernel.org/all/660772b3-0597-02db-ed94-c6a9be04e8e8@iki.fi/ [2]: https://git.linuxtv.org/anttip/media_tree.git/ [3]: https://palosaari.fi/linux/ Link: https://lkml.kernel.org/r/20231130083848.5396-1-bagasdotme@gmail.com Signed-off-by: Bagas Sanjaya Acked-by: Antti Palosaari Cc: Hans Verkuil Cc: Lukas Bulwahn Cc: Mauro Carvalho Chehab Cc: Uwe Kleine-König Signed-off-by: Andrew Morton --- CREDITS | 8 +++ MAINTAINERS | 179 +++++++++++++----------------------------------------------- 2 files changed, 45 insertions(+), 142 deletions(-) diff --git a/CREDITS b/CREDITS index f33a33fd2371..81845c39e3cf 100644 --- a/CREDITS +++ b/CREDITS @@ -2944,6 +2944,14 @@ D: IPX development and support N: Venkatesh Pallipadi (Venki) D: x86/HPET +N: Antti Palosaari +E: crope@iki.fi +D: Various DVB drivers +W: https://palosaari.fi/linux/ +S: Yliopistokatu 1 D 513 +S: FI-90570 Oulu +S: FINLAND + N: Kyungmin Park E: kyungmin.park@samsung.com D: Samsung S5Pv210 and Exynos4210 mobile platforms diff --git a/MAINTAINERS b/MAINTAINERS index 633fc9f22cda..f60da8fb3f54 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -171,13 +171,10 @@ S: Supported F: drivers/soc/fujitsu/a64fx-diag.c A8293 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/dvb-frontends/a8293* AACRAID SCSI RAID DRIVER @@ -576,23 +573,17 @@ F: drivers/iio/accel/adxl372_i2c.c F: drivers/iio/accel/adxl372_spi.c AF9013 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/dvb-frontends/af9013* AF9033 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/dvb-frontends/af9033* AFFS FILE SYSTEM @@ -650,13 +641,10 @@ F: fs/aio.c F: include/linux/*aio*.h AIRSPY MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/usb/airspy/ ALACRITECH GIGABIT ETHERNET DRIVER @@ -5605,13 +5593,10 @@ F: Documentation/driver-api/media/drivers/cx88* F: drivers/media/pci/cx88/ CXD2820R MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/dvb-frontends/cxd2820r* CXGB3 ETHERNET DRIVER (CXGB3) @@ -5724,13 +5709,10 @@ F: Documentation/devicetree/bindings/input/cypress-sf.yaml F: drivers/input/keyboard/cypress-sf.c CYPRESS_FIRMWARE MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/common/cypress_firmware* CYTTSP TOUCHSCREEN DRIVER @@ -7320,53 +7302,38 @@ T: git git://linuxtv.org/media_tree.git F: drivers/media/pci/dt3155/ DVB_USB_AF9015 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/usb/dvb-usb-v2/af9015* DVB_USB_AF9035 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/usb/dvb-usb-v2/af9035* DVB_USB_ANYSEE MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/usb/dvb-usb-v2/anysee* DVB_USB_AU6610 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/usb/dvb-usb-v2/au6610* DVB_USB_CE6230 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/usb/dvb-usb-v2/ce6230* DVB_USB_CXUSB MEDIA DRIVER @@ -7380,22 +7347,17 @@ T: git git://linuxtv.org/media_tree.git F: drivers/media/usb/dvb-usb/cxusb* DVB_USB_EC168 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/usb/dvb-usb-v2/ec168* DVB_USB_GL861 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/usb/dvb-usb-v2/gl861* DVB_USB_MXL111SF MEDIA DRIVER @@ -7409,23 +7371,18 @@ T: git git://linuxtv.org/mkrufky/mxl111sf.git F: drivers/media/usb/dvb-usb-v2/mxl111sf* DVB_USB_RTL28XXU MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/usb/dvb-usb-v2/rtl28xxu* DVB_USB_V2 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/usb/dvb-usb-v2/dvb_usb* F: drivers/media/usb/dvb-usb-v2/usb_urb.c @@ -7467,13 +7424,10 @@ F: Documentation/devicetree/bindings/input/e3x0-button.txt F: drivers/input/misc/e3x0-button.c E4000 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/tuners/e4000* EARTH_PT1 MEDIA DRIVER @@ -7489,13 +7443,10 @@ S: Odd Fixes F: drivers/media/pci/pt3/ EC100 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/dvb-frontends/ec100* ECRYPT FILE SYSTEM @@ -8113,13 +8064,10 @@ F: drivers/media/tuners/fc0011.c F: drivers/media/tuners/fc0011.h FC2580 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/tuners/fc2580* FCOE SUBSYSTEM (libfc, libfcoe, fcoe) @@ -9249,13 +9197,10 @@ F: include/trace/events/habanalabs.h F: include/uapi/drm/habanalabs_accel.h HACKRF MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/usb/hackrf/ HANDSHAKE UPCALL FOR TRANSPORT LAYER SECURITY @@ -11328,13 +11273,10 @@ F: Documentation/hwmon/it87.rst F: drivers/hwmon/it87.c IT913X MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/tuners/it913x* ITE IT66121 HDMI BRIDGE DRIVER @@ -12694,13 +12636,10 @@ W: http://www.tazenda.demon.co.uk/phil/linux-hp F: arch/m68k/hp300/ M88DS3103 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/dvb-frontends/m88ds3103* M88RS2000 MEDIA DRIVER @@ -14594,20 +14533,16 @@ F: include/asm-generic/tlb.h F: mm/mmu_gather.c MN88472 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ F: drivers/media/dvb-frontends/mn88472* MN88473 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ F: drivers/media/dvb-frontends/mn88473* @@ -14695,23 +14630,17 @@ S: Orphan F: drivers/platform/x86/msi-wmi.c MSI001 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/tuners/msi001* MSI2500 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/usb/msi2500/ MSTAR INTERRUPT CONTROLLER DRIVER @@ -17782,13 +17711,10 @@ F: drivers/bus/fsl-mc/ F: include/uapi/linux/fsl_mc.h QT1010 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/tuners/qt1010* QUALCOMM ATH12K WIRELESS DRIVER @@ -18841,33 +18767,24 @@ S: Maintained F: drivers/tty/rpmsg_tty.c RTL2830 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/dvb-frontends/rtl2830* RTL2832 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/dvb-frontends/rtl2832* RTL2832_SDR MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/dvb-frontends/rtl2832_sdr* RTL8180 WIRELESS DRIVER @@ -19677,13 +19594,10 @@ F: drivers/media/platform/renesas/sh_vou.c F: include/media/drv-intf/sh_vou.h SI2157 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/tuners/si2157* SI2165 MEDIA DRIVER @@ -19695,13 +19609,10 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/ F: drivers/media/dvb-frontends/si2165* SI2168 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/dvb-frontends/si2168* SI470X FM RADIO RECEIVER I2C DRIVER @@ -21203,33 +21114,24 @@ W: http://tcp-lp-mod.sourceforge.net/ F: net/ipv4/tcp_lp.c TDA10071 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/dvb-frontends/tda10071* TDA18212 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/tuners/tda18212* TDA18218 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/tuners/tda18218* TDA18250 MEDIA DRIVER @@ -22164,13 +22066,10 @@ F: include/uapi/linux/serial_core.h F: include/uapi/linux/tty.h TUA9001 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org -W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/tuners/tua9001* TULIP NETWORK DRIVERS @@ -24115,20 +24014,16 @@ S: Orphan F: drivers/net/wireless/zydas/zd1211rw/ ZD1301 MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org/ -W: http://palosaari.fi/linux/ Q: https://patchwork.linuxtv.org/project/linux-media/list/ F: drivers/media/usb/dvb-usb-v2/zd1301* ZD1301_DEMOD MEDIA DRIVER -M: Antti Palosaari L: linux-media@vger.kernel.org -S: Maintained +S: Orphan W: https://linuxtv.org/ -W: http://palosaari.fi/linux/ Q: https://patchwork.linuxtv.org/project/linux-media/list/ F: drivers/media/dvb-frontends/zd1301_demod* -- cgit From 801a2b1b49f4dcf06703130922806e9c639c2ca8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 29 Nov 2023 20:33:16 -0800 Subject: scripts/gdb: fix lx-device-list-bus and lx-device-list-class After the conversion to bus_to_subsys() and class_to_subsys(), the gdb scripts listing the system buses and classes respectively was broken, fix those by returning the subsys_priv pointer and have the various caller de-reference either the 'bus' or 'class' structure members accordingly. Link: https://lkml.kernel.org/r/20231130043317.174188-1-florian.fainelli@broadcom.com Fixes: 7b884b7f24b4 ("driver core: class.c: convert to only use class_to_subsys") Signed-off-by: Florian Fainelli Tested-by: Kuan-Ying Lee Cc: Greg Kroah-Hartman Cc: Jan Kiszka Cc: Kieran Bingham Signed-off-by: Andrew Morton --- scripts/gdb/linux/device.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/gdb/linux/device.py b/scripts/gdb/linux/device.py index 16376c5cfec6..0eabc5f4f8ca 100644 --- a/scripts/gdb/linux/device.py +++ b/scripts/gdb/linux/device.py @@ -36,26 +36,26 @@ def for_each_bus(): for kobj in kset_for_each_object(gdb.parse_and_eval('bus_kset')): subsys = container_of(kobj, kset_type.get_type().pointer(), 'kobj') subsys_priv = container_of(subsys, subsys_private_type.get_type().pointer(), 'subsys') - yield subsys_priv['bus'] + yield subsys_priv def for_each_class(): for kobj in kset_for_each_object(gdb.parse_and_eval('class_kset')): subsys = container_of(kobj, kset_type.get_type().pointer(), 'kobj') subsys_priv = container_of(subsys, subsys_private_type.get_type().pointer(), 'subsys') - yield subsys_priv['class'] + yield subsys_priv def get_bus_by_name(name): for item in for_each_bus(): - if item['name'].string() == name: + if item['bus']['name'].string() == name: return item raise gdb.GdbError("Can't find bus type {!r}".format(name)) def get_class_by_name(name): for item in for_each_class(): - if item['name'].string() == name: + if item['class']['name'].string() == name: return item raise gdb.GdbError("Can't find device class {!r}".format(name)) @@ -70,13 +70,13 @@ def klist_for_each(klist): def bus_for_each_device(bus): - for kn in klist_for_each(bus['p']['klist_devices']): + for kn in klist_for_each(bus['klist_devices']): dp = container_of(kn, device_private_type.get_type().pointer(), 'knode_bus') yield dp['device'] def class_for_each_device(cls): - for kn in klist_for_each(cls['p']['klist_devices']): + for kn in klist_for_each(cls['klist_devices']): dp = container_of(kn, device_private_type.get_type().pointer(), 'knode_class') yield dp['device'] @@ -103,7 +103,7 @@ class LxDeviceListBus(gdb.Command): def invoke(self, arg, from_tty): if not arg: for bus in for_each_bus(): - gdb.write('bus {}:\t{}\n'.format(bus['name'].string(), bus)) + gdb.write('bus {}:\t{}\n'.format(bus['bus']['name'].string(), bus)) for dev in bus_for_each_device(bus): _show_device(dev, level=1) else: @@ -123,7 +123,7 @@ class LxDeviceListClass(gdb.Command): def invoke(self, arg, from_tty): if not arg: for cls in for_each_class(): - gdb.write("class {}:\t{}\n".format(cls['name'].string(), cls)) + gdb.write("class {}:\t{}\n".format(cls['class']['name'].string(), cls)) for dev in class_for_each_device(cls): _show_device(dev, level=1) else: -- cgit From 4a3ef6be03e6700037fc20e63aa5ffd972e435ca Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Mon, 4 Dec 2023 10:32:34 -0800 Subject: mm/hugetlb: have CONFIG_HUGETLB_PAGE select CONFIG_XARRAY_MULTI After commit a08c7193e4f1 "mm/filemap: remove hugetlb special casing in filemap.c", hugetlb pages are stored in the page cache in base page sized indexes. This leads to multi index stores in the xarray which is only supporting through CONFIG_XARRAY_MULTI. The other page cache user of multi index stores ,THP, selects XARRAY_MULTI. Have CONFIG_HUGETLB_PAGE follow this behavior as well to avoid the BUG() with a CONFIG_HUGETLB_PAGE && !CONFIG_XARRAY_MULTI config. Link: https://lkml.kernel.org/r/20231204183234.348697-1-sidhartha.kumar@oracle.com Fixes: a08c7193e4f1 ("mm/filemap: remove hugetlb special casing in filemap.c") Signed-off-by: Sidhartha Kumar Reported-by: Al Viro Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- fs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/Kconfig b/fs/Kconfig index fd1f655b4f1f..42837617a55b 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -268,6 +268,7 @@ config HUGETLBFS config HUGETLB_PAGE def_bool HUGETLBFS + select XARRAY_MULTI config HUGETLB_PAGE_OPTIMIZE_VMEMMAP def_bool HUGETLB_PAGE -- cgit From 675abf8df1353e0e3bde314993e0796c524cfbf0 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 5 Dec 2023 17:59:47 +0900 Subject: nilfs2: prevent WARNING in nilfs_sufile_set_segment_usage() If nilfs2 reads a disk image with corrupted segment usage metadata, and its segment usage information is marked as an error for the segment at the write location, nilfs_sufile_set_segment_usage() can trigger WARN_ONs during log writing. Segments newly allocated for writing with nilfs_sufile_alloc() will not have this error flag set, but this unexpected situation will occur if the segment indexed by either nilfs->ns_segnum or nilfs->ns_nextnum (active segment) was marked in error. Fix this issue by inserting a sanity check to treat it as a file system corruption. Since error returns are not allowed during the execution phase where nilfs_sufile_set_segment_usage() is used, this inserts the sanity check into nilfs_sufile_mark_dirty() which pre-reads the buffer containing the segment usage record to be updated and sets it up in a dirty state for writing. In addition, nilfs_sufile_set_segment_usage() is also called when canceling log writing and undoing segment usage update, so in order to avoid issuing the same kernel warning in that case, in case of cancellation, avoid checking the error flag in nilfs_sufile_set_segment_usage(). Link: https://lkml.kernel.org/r/20231205085947.4431-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+14e9f834f6ddecece094@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=14e9f834f6ddecece094 Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/sufile.c | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 2c6078a6b8ec..58ca7c936393 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -501,15 +501,38 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); - if (!ret) { - mark_buffer_dirty(bh); - nilfs_mdt_mark_dirty(sufile); - kaddr = kmap_atomic(bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + if (ret) + goto out_sem; + + kaddr = kmap_atomic(bh->b_page); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + if (unlikely(nilfs_segment_usage_error(su))) { + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + + kunmap_atomic(kaddr); + brelse(bh); + if (nilfs_segment_is_active(nilfs, segnum)) { + nilfs_error(sufile->i_sb, + "active segment %llu is erroneous", + (unsigned long long)segnum); + } else { + /* + * Segments marked erroneous are never allocated by + * nilfs_sufile_alloc(); only active segments, ie, + * the segments indexed by ns_segnum or ns_nextnum, + * can be erroneous here. + */ + WARN_ON_ONCE(1); + } + ret = -EIO; + } else { nilfs_segment_usage_set_dirty(su); kunmap_atomic(kaddr); + mark_buffer_dirty(bh); + nilfs_mdt_mark_dirty(sufile); brelse(bh); } +out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } @@ -536,9 +559,14 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, kaddr = kmap_atomic(bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); - WARN_ON(nilfs_segment_usage_error(su)); - if (modtime) + if (modtime) { + /* + * Check segusage error and set su_lastmod only when updating + * this entry with a valid timestamp, not for cancellation. + */ + WARN_ON_ONCE(nilfs_segment_usage_error(su)); su->su_lastmod = cpu_to_le64(modtime); + } su->su_nblocks = cpu_to_le32(nblocks); kunmap_atomic(kaddr); -- cgit From b2f557a21bc8fffdcd65794eda8a854e024999f3 Mon Sep 17 00:00:00 2001 From: Jiexun Wang Date: Thu, 21 Sep 2023 20:27:51 +0800 Subject: mm/madvise: add cond_resched() in madvise_cold_or_pageout_pte_range() I conducted real-time testing and observed that madvise_cold_or_pageout_pte_range() causes significant latency under memory pressure, which can be effectively reduced by adding cond_resched() within the loop. I tested on the LicheePi 4A board using Cylictest for latency testing and Ftrace for latency tracing. The board uses TH1520 processor and has a memory size of 8GB. The kernel version is 6.5.0 with the PREEMPT_RT patch applied. The script I tested is as follows: echo wakeup_rt > /sys/kernel/tracing/current_tracer echo 1 > /sys/kernel/tracing/tracing_on echo 0 > /sys/kernel/tracing/tracing_max_latency stress-ng --vm 8 --vm-bytes 2G & cyclictest --mlockall --smp --priority=99 --distance=0 --duration=30m echo 0 > /sys/kernel/tracing/tracing_on cat /sys/kernel/tracing/trace The tracing results before modification are as follows: # tracer: wakeup_rt # # wakeup_rt latency trace v1.1.5 on 6.5.0-rt6-r1208-00003-g999d221864bf # -------------------------------------------------------------------- # latency: 2552 us, #6/6, CPU#3 | (M:preempt_rt VP:0, KP:0, SP:0 HP:0 #P:4) # ----------------- # | task: cyclictest-196 (uid:0 nice:0 policy:1 rt_prio:99) # ----------------- # # _--------=> CPU# # / _-------=> irqs-off/BH-disabled # | / _------=> need-resched # || / _-----=> need-resched-lazy # ||| / _----=> hardirq/softirq # |||| / _---=> preempt-depth # ||||| / _--=> preempt-lazy-depth # |||||| / _-=> migrate-disable # ||||||| / delay # cmd pid |||||||| time | caller # \ / |||||||| \ | / stress-n-206 3dn.h512 2us : 206:120:R + [003] 196: 0:R cyclictest stress-n-206 3dn.h512 7us : => __ftrace_trace_stack => __trace_stack => probe_wakeup => ttwu_do_activate => try_to_wake_up => wake_up_process => hrtimer_wakeup => __hrtimer_run_queues => hrtimer_interrupt => riscv_timer_interrupt => handle_percpu_devid_irq => generic_handle_domain_irq => riscv_intc_irq => handle_riscv_irq => do_irq stress-n-206 3dn.h512 9us#: 0 stress-n-206 3d...3.. 2544us : __schedule stress-n-206 3d...3.. 2545us : 206:120:R ==> [003] 196: 0:R cyclictest stress-n-206 3d...3.. 2551us : => __ftrace_trace_stack => __trace_stack => probe_wakeup_sched_switch => __schedule => preempt_schedule => migrate_enable => rt_spin_unlock => madvise_cold_or_pageout_pte_range => walk_pgd_range => __walk_page_range => walk_page_range => madvise_pageout => madvise_vma_behavior => do_madvise => sys_madvise => do_trap_ecall_u => ret_from_exception The tracing results after modification are as follows: # tracer: wakeup_rt # # wakeup_rt latency trace v1.1.5 on 6.5.0-rt6-r1208-00004-gca3876fc69a6-dirty # -------------------------------------------------------------------- # latency: 1689 us, #6/6, CPU#0 | (M:preempt_rt VP:0, KP:0, SP:0 HP:0 #P:4) # ----------------- # | task: cyclictest-217 (uid:0 nice:0 policy:1 rt_prio:99) # ----------------- # # _--------=> CPU# # / _-------=> irqs-off/BH-disabled # | / _------=> need-resched # || / _-----=> need-resched-lazy # ||| / _----=> hardirq/softirq # |||| / _---=> preempt-depth # ||||| / _--=> preempt-lazy-depth # |||||| / _-=> migrate-disable # ||||||| / delay # cmd pid |||||||| time | caller # \ / |||||||| \ | / stress-n-232 0dn.h413 1us+: 232:120:R + [000] 217: 0:R cyclictest stress-n-232 0dn.h413 12us : => __ftrace_trace_stack => __trace_stack => probe_wakeup => ttwu_do_activate => try_to_wake_up => wake_up_process => hrtimer_wakeup => __hrtimer_run_queues => hrtimer_interrupt => riscv_timer_interrupt => handle_percpu_devid_irq => generic_handle_domain_irq => riscv_intc_irq => handle_riscv_irq => do_irq stress-n-232 0dn.h413 19us#: 0 stress-n-232 0d...3.. 1671us : __schedule stress-n-232 0d...3.. 1676us+: 232:120:R ==> [000] 217: 0:R cyclictest stress-n-232 0d...3.. 1687us : => __ftrace_trace_stack => __trace_stack => probe_wakeup_sched_switch => __schedule => preempt_schedule => migrate_enable => free_unref_page_list => release_pages => free_pages_and_swap_cache => tlb_batch_pages_flush => tlb_flush_mmu => unmap_page_range => unmap_vmas => unmap_region => do_vmi_align_munmap.constprop.0 => do_vmi_munmap => __vm_munmap => sys_munmap => do_trap_ecall_u => ret_from_exception After the modification, the cause of maximum latency is no longer madvise_cold_or_pageout_pte_range(), so this modification can reduce the latency caused by madvise_cold_or_pageout_pte_range(). Currently the madvise_cold_or_pageout_pte_range() function exhibits significant latency under memory pressure, which can be effectively reduced by adding cond_resched() within the loop. When the batch_count reaches SWAP_CLUSTER_MAX, we reschedule the task to ensure fairness and avoid long lock holding times. Link: https://lkml.kernel.org/r/85363861af65fac66c7a98c251906afc0d9c8098.1695291046.git.wangjiexun@tinylab.org Signed-off-by: Jiexun Wang Cc: Zhangjin Wu Signed-off-by: Andrew Morton --- mm/madvise.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/madvise.c b/mm/madvise.c index cf4d694280e9..6214a1ab5654 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -335,6 +335,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, struct folio *folio = NULL; LIST_HEAD(folio_list); bool pageout_anon_only_filter; + unsigned int batch_count = 0; if (fatal_signal_pending(current)) return -EINTR; @@ -416,6 +417,7 @@ huge_unlock: regular_folio: #endif tlb_change_page_size(tlb, PAGE_SIZE); +restart: start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (!start_pte) return 0; @@ -424,6 +426,15 @@ regular_folio: for (; addr < end; pte++, addr += PAGE_SIZE) { ptent = ptep_get(pte); + if (++batch_count == SWAP_CLUSTER_MAX) { + batch_count = 0; + if (need_resched()) { + pte_unmap_unlock(start_pte, ptl); + cond_resched(); + goto restart; + } + } + if (pte_none(ptent)) continue; -- cgit From 61890dc28f7d9e9aac8a9471302613824c22fae4 Mon Sep 17 00:00:00 2001 From: Konstantin Aladyshev Date: Wed, 6 Dec 2023 11:07:44 +0300 Subject: usb: gadget: f_hid: fix report descriptor allocation The commit 89ff3dfac604 ("usb: gadget: f_hid: fix f_hidg lifetime vs cdev") has introduced a bug that leads to hid device corruption after the replug operation. Reverse device managed memory allocation for the report descriptor to fix the issue. Tested: This change was tested on the AMD EthanolX CRB server with the BMC based on the OpenBMC distribution. The BMC provides KVM functionality via the USB gadget device: - before: KVM page refresh results in a broken USB device, - after: KVM page refresh works without any issues. Fixes: 89ff3dfac604 ("usb: gadget: f_hid: fix f_hidg lifetime vs cdev") Cc: stable@vger.kernel.org Signed-off-by: Konstantin Aladyshev Link: https://lore.kernel.org/r/20231206080744.253-2-aladyshev22@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index ea85e2c701a1..3c8a9dd585c0 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -92,6 +92,7 @@ static void hidg_release(struct device *dev) { struct f_hidg *hidg = container_of(dev, struct f_hidg, dev); + kfree(hidg->report_desc); kfree(hidg->set_report_buf); kfree(hidg); } @@ -1287,9 +1288,9 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) hidg->report_length = opts->report_length; hidg->report_desc_length = opts->report_desc_length; if (opts->report_desc) { - hidg->report_desc = devm_kmemdup(&hidg->dev, opts->report_desc, - opts->report_desc_length, - GFP_KERNEL); + hidg->report_desc = kmemdup(opts->report_desc, + opts->report_desc_length, + GFP_KERNEL); if (!hidg->report_desc) { ret = -ENOMEM; goto err_put_device; -- cgit From f0b9d97a77fa8f18400450713358303a435ab688 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Mon, 4 Dec 2023 17:38:03 +0100 Subject: serial: ma35d1: Validate console index before assignment The console is immediately assigned to the ma35d1 port without checking its index. This oversight can lead to out-of-bounds errors when the index falls outside the valid '0' to MA35_UART_NR range. Such scenario trigges ran error like the following: UBSAN: array-index-out-of-bounds in drivers/tty/serial/ma35d1_serial.c:555:51 index -1 is out of range for type 'uart_ma35d1_port [17] Check the index before using it and bail out with a warning. Fixes: 930cbf92db01 ("tty: serial: Add Nuvoton ma35d1 serial driver support") Signed-off-by: Andi Shyti Cc: Jacky Huang Cc: # v6.5+ Link: https://lore.kernel.org/r/20231204163804.1331415-2-andi.shyti@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/ma35d1_serial.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/ma35d1_serial.c b/drivers/tty/serial/ma35d1_serial.c index a6a7c405892e..21b574f78b86 100644 --- a/drivers/tty/serial/ma35d1_serial.c +++ b/drivers/tty/serial/ma35d1_serial.c @@ -552,11 +552,19 @@ static void ma35d1serial_console_putchar(struct uart_port *port, unsigned char c */ static void ma35d1serial_console_write(struct console *co, const char *s, u32 count) { - struct uart_ma35d1_port *up = &ma35d1serial_ports[co->index]; + struct uart_ma35d1_port *up; unsigned long flags; int locked = 1; u32 ier; + if ((co->index < 0) || (co->index >= MA35_UART_NR)) { + pr_warn("Failed to write on ononsole port %x, out of range\n", + co->index); + return; + } + + up = &ma35d1serial_ports[co->index]; + if (up->port.sysrq) locked = 0; else if (oops_in_progress) -- cgit From e92fad024929c79460403acf946bc9c09ce5c3a9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 5 Dec 2023 21:55:24 +0200 Subject: serial: 8250_dw: Add ACPI ID for Granite Rapids-D UART Granite Rapids-D has an additional UART that is enumerated via ACPI. Add ACPI ID for it. Signed-off-by: Andy Shevchenko Cc: stable Link: https://lore.kernel.org/r/20231205195524.2705965-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index b94f567647cb..e6218766d0c8 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -777,6 +777,7 @@ static const struct acpi_device_id dw8250_acpi_match[] = { { "INT33C5", (kernel_ulong_t)&dw8250_dw_apb }, { "INT3434", (kernel_ulong_t)&dw8250_dw_apb }, { "INT3435", (kernel_ulong_t)&dw8250_dw_apb }, + { "INTC10EE", (kernel_ulong_t)&dw8250_dw_apb }, { }, }; MODULE_DEVICE_TABLE(acpi, dw8250_acpi_match); -- cgit From 1a031f6edc460e9562098bdedc3918da07c30a6e Mon Sep 17 00:00:00 2001 From: Cameron Williams Date: Thu, 2 Nov 2023 21:10:40 +0000 Subject: parport: Add support for Brainboxes IX/UC/PX parallel cards Adds support for Intashield IX-500/IX-550, UC-146/UC-157, PX-146/PX-157, PX-203 and PX-475 (LPT port) Cc: stable@vger.kernel.org Signed-off-by: Cameron Williams Acked-by: Sudip Mukherjee Link: https://lore.kernel.org/r/AS4PR02MB790389C130410BD864C8DCC9C4A6A@AS4PR02MB7903.eurprd02.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/parport/parport_pc.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 1f236aaf7867..f33b5d1ddfc1 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -2658,6 +2658,8 @@ enum parport_pc_pci_cards { asix_ax99100, quatech_sppxp100, wch_ch382l, + brainboxes_uc146, + brainboxes_px203, }; @@ -2737,6 +2739,8 @@ static struct parport_pc_pci { /* asix_ax99100 */ { 1, { { 0, 1 }, } }, /* quatech_sppxp100 */ { 1, { { 0, 1 }, } }, /* wch_ch382l */ { 1, { { 2, -1 }, } }, + /* brainboxes_uc146 */ { 1, { { 3, -1 }, } }, + /* brainboxes_px203 */ { 1, { { 0, -1 }, } }, }; static const struct pci_device_id parport_pc_pci_tbl[] = { @@ -2833,6 +2837,23 @@ static const struct pci_device_id parport_pc_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 }, /* WCH CH382L PCI-E single parallel port card */ { 0x1c00, 0x3050, 0x1c00, 0x3050, 0, 0, wch_ch382l }, + /* Brainboxes IX-500/550 */ + { PCI_VENDOR_ID_INTASHIELD, 0x402a, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, + /* Brainboxes UC-146/UC-157 */ + { PCI_VENDOR_ID_INTASHIELD, 0x0be1, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, brainboxes_uc146 }, + { PCI_VENDOR_ID_INTASHIELD, 0x0be2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, brainboxes_uc146 }, + /* Brainboxes PX-146/PX-257 */ + { PCI_VENDOR_ID_INTASHIELD, 0x401c, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, + /* Brainboxes PX-203 */ + { PCI_VENDOR_ID_INTASHIELD, 0x4007, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, brainboxes_px203 }, + /* Brainboxes PX-475 */ + { PCI_VENDOR_ID_INTASHIELD, 0x401f, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, { 0, } /* terminate list */ }; MODULE_DEVICE_TABLE(pci, parport_pc_pci_tbl); -- cgit From b7c1e53751cb3990153084f31c41f25fde3b629c Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 24 Nov 2023 20:38:14 +0100 Subject: nvmem: Do not expect fixed layouts to grab a layout driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two series lived in parallel for some time, which led to this situation: - The nvmem-layout container is used for dynamic layouts - We now expect fixed layouts to also use the nvmem-layout container but this does not require any additional driver, the support is built-in the nvmem core. Ensure we don't refuse to probe for wrong reasons. Fixes: 27f699e578b1 ("nvmem: core: add support for fixed cells *layout*") Cc: stable@vger.kernel.org Reported-by: Luca Ceresoli Signed-off-by: Miquel Raynal Tested-by: Rafał Miłecki Tested-by: Luca Ceresoli Reviewed-by: Luca Ceresoli Link: https://lore.kernel.org/r/20231124193814.360552-1-miquel.raynal@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index bf42b7e826db..608b352a7d91 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -796,6 +796,12 @@ static struct nvmem_layout *nvmem_layout_get(struct nvmem_device *nvmem) if (!layout_np) return NULL; + /* Fixed layouts don't have a matching driver */ + if (of_device_is_compatible(layout_np, "fixed-layout")) { + of_node_put(layout_np); + return NULL; + } + /* * In case the nvmem device was built-in while the layout was built as a * module, we shall manually request the layout driver loading otherwise -- cgit From 73bf1c9ae6c054c53b8e84452c5e46f86dd28246 Mon Sep 17 00:00:00 2001 From: Xiang Yang Date: Sat, 12 Aug 2023 14:27:48 +0800 Subject: drm/exynos: fix a potential error pointer dereference Smatch reports the warning below: drivers/gpu/drm/exynos/exynos_hdmi.c:1864 hdmi_bind() error: 'crtc' dereferencing possible ERR_PTR() The return value of exynos_drm_crtc_get_by_type maybe ERR_PTR(-ENODEV), which can not be used directly. Fix this by checking the return value before using it. Signed-off-by: Xiang Yang Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_hdmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index f3aaa4ea3e68..dd9903eab563 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1861,6 +1861,8 @@ static int hdmi_bind(struct device *dev, struct device *master, void *data) return ret; crtc = exynos_drm_crtc_get_by_type(drm_dev, EXYNOS_DISPLAY_TYPE_HDMI); + if (IS_ERR(crtc)) + return PTR_ERR(crtc); crtc->pipe_clk = &hdata->phy_clk; ret = hdmi_create_connector(encoder); -- cgit From 8d1b7809684c688005706125b804e1f9792d2b1b Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Wed, 1 Nov 2023 18:36:51 +0900 Subject: drm/exynos: fix a wrong error checking Fix a wrong error checking in exynos_drm_dma.c module. In the exynos_drm_register_dma function, both arm_iommu_create_mapping() and iommu_get_domain_for_dev() functions are expected to return NULL as an error. However, the error checking is performed using the statement if(IS_ERR(mapping)), which doesn't provide a suitable error value. So check if 'mapping' is NULL, and if it is, return -ENODEV. This issue[1] was reported by Dan. Changelog v1: - fix build warning. [1] https://lore.kernel.org/all/33e52277-1349-472b-a55b-ab5c3462bfcf@moroto.mountain/ Reported-by : Dan Carpenter Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_dma.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c index a971590b8132..e2c7373f20c6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dma.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c @@ -107,18 +107,16 @@ int exynos_drm_register_dma(struct drm_device *drm, struct device *dev, return 0; if (!priv->mapping) { - void *mapping; + void *mapping = NULL; if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) mapping = arm_iommu_create_mapping(&platform_bus_type, EXYNOS_DEV_ADDR_START, EXYNOS_DEV_ADDR_SIZE); else if (IS_ENABLED(CONFIG_IOMMU_DMA)) mapping = iommu_get_domain_for_dev(priv->dma_dev); - else - mapping = ERR_PTR(-ENODEV); - if (IS_ERR(mapping)) - return PTR_ERR(mapping); + if (!mapping) + return -ENODEV; priv->mapping = mapping; } -- cgit From 3d501dd326fb1c73f1b8206d4c6e1d7b15c07e27 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Dec 2023 16:18:41 +0000 Subject: tcp: do not accept ACK of bytes we never sent This patch is based on a detailed report and ideas from Yepeng Pan and Christian Rossow. ACK seq validation is currently following RFC 5961 5.2 guidelines: The ACK value is considered acceptable only if it is in the range of ((SND.UNA - MAX.SND.WND) <= SEG.ACK <= SND.NXT). All incoming segments whose ACK value doesn't satisfy the above condition MUST be discarded and an ACK sent back. It needs to be noted that RFC 793 on page 72 (fifth check) says: "If the ACK is a duplicate (SEG.ACK < SND.UNA), it can be ignored. If the ACK acknowledges something not yet sent (SEG.ACK > SND.NXT) then send an ACK, drop the segment, and return". The "ignored" above implies that the processing of the incoming data segment continues, which means the ACK value is treated as acceptable. This mitigation makes the ACK check more stringent since any ACK < SND.UNA wouldn't be accepted, instead only ACKs that are in the range ((SND.UNA - MAX.SND.WND) <= SEG.ACK <= SND.NXT) get through. This can be refined for new (and possibly spoofed) flows, by not accepting ACK for bytes that were never sent. This greatly improves TCP security at a little cost. I added a Fixes: tag to make sure this patch will reach stable trees, even if the 'blamed' patch was adhering to the RFC. tp->bytes_acked was added in linux-4.2 Following packetdrill test (courtesy of Yepeng Pan) shows the issue at hand: 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1024) = 0 // ---------------- Handshake ------------------- // // when window scale is set to 14 the window size can be extended to // 65535 * (2^14) = 1073725440. Linux would accept an ACK packet // with ack number in (Server_ISN+1-1073725440. Server_ISN+1) // ,though this ack number acknowledges some data never // sent by the server. +0 < S 0:0(0) win 65535 +0 > S. 0:0(0) ack 1 <...> +0 < . 1:1(0) ack 1 win 65535 +0 accept(3, ..., ...) = 4 // For the established connection, we send an ACK packet, // the ack packet uses ack number 1 - 1073725300 + 2^32, // where 2^32 is used to wrap around. // Note: we used 1073725300 instead of 1073725440 to avoid possible // edge cases. // 1 - 1073725300 + 2^32 = 3221241997 // Oops, old kernels happily accept this packet. +0 < . 1:1001(1000) ack 3221241997 win 65535 // After the kernel fix the following will be replaced by a challenge ACK, // and prior malicious frame would be dropped. +0 > . 1:1(0) ack 1001 Fixes: 354e4aa391ed ("tcp: RFC 5961 5.2 Blind Data Injection Attack Mitigation") Signed-off-by: Eric Dumazet Reported-by: Yepeng Pan Reported-by: Christian Rossow Acked-by: Neal Cardwell Link: https://lore.kernel.org/r/20231205161841.2702925-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 337c8bb07ccc..90de838a2745 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3871,8 +3871,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * then we can probably ignore it. */ if (before(ack, prior_snd_una)) { + u32 max_window; + + /* do not accept ACK for bytes we never sent. */ + max_window = min_t(u64, tp->max_window, tp->bytes_acked); /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ - if (before(ack, prior_snd_una - tp->max_window)) { + if (before(ack, prior_snd_una - max_window)) { if (!(flag & FLAG_NO_CHALLENGE_ACK)) tcp_send_challenge_ack(sk); return -SKB_DROP_REASON_TCP_TOO_OLD_ACK; -- cgit From 0c7ed1f9197aecada33a08b022e484a97bf584ba Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Tue, 5 Dec 2023 23:13:59 +0100 Subject: net: dsa: mv88e6xxx: Restore USXGMII support for 6393X In 4a56212774ac, USXGMII support was added for 6393X, but this was lost in the PCS conversion (the blamed commit), most likely because these efforts where more or less done in parallel. Restore this feature by porting Michal's patch to fit the new implementation. Reviewed-by: Florian Fainelli Tested-by: Michal Smulski Reviewed-by: Vladimir Oltean Fixes: e5b732a275f5 ("net: dsa: mv88e6xxx: convert 88e639x to phylink_pcs") Signed-off-by: Tobias Waldekranz Link: https://lore.kernel.org/r/20231205221359.3926018-1-tobias@waldekranz.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/pcs-639x.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c index 9a8429f5d09c..d758a6c1b226 100644 --- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c +++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c @@ -465,6 +465,7 @@ mv88e639x_pcs_select(struct mv88e6xxx_chip *chip, int port, case PHY_INTERFACE_MODE_10GBASER: case PHY_INTERFACE_MODE_XAUI: case PHY_INTERFACE_MODE_RXAUI: + case PHY_INTERFACE_MODE_USXGMII: return &mpcs->xg_pcs; default: @@ -873,7 +874,8 @@ static int mv88e6393x_xg_pcs_post_config(struct phylink_pcs *pcs, struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs); int err; - if (interface == PHY_INTERFACE_MODE_10GBASER) { + if (interface == PHY_INTERFACE_MODE_10GBASER || + interface == PHY_INTERFACE_MODE_USXGMII) { err = mv88e6393x_erratum_5_2(mpcs); if (err) return err; @@ -886,12 +888,37 @@ static int mv88e6393x_xg_pcs_post_config(struct phylink_pcs *pcs, return mv88e639x_xg_pcs_enable(mpcs); } +static void mv88e6393x_xg_pcs_get_state(struct phylink_pcs *pcs, + struct phylink_link_state *state) +{ + struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs); + u16 status, lp_status; + int err; + + if (state->interface != PHY_INTERFACE_MODE_USXGMII) + return mv88e639x_xg_pcs_get_state(pcs, state); + + state->link = false; + + err = mv88e639x_read(mpcs, MV88E6390_USXGMII_PHY_STATUS, &status); + err = err ? : mv88e639x_read(mpcs, MV88E6390_USXGMII_LP_STATUS, &lp_status); + if (err) { + dev_err(mpcs->mdio.dev.parent, + "can't read USXGMII status: %pe\n", ERR_PTR(err)); + return; + } + + state->link = !!(status & MDIO_USXGMII_LINK); + state->an_complete = state->link; + phylink_decode_usxgmii_word(state, lp_status); +} + static const struct phylink_pcs_ops mv88e6393x_xg_pcs_ops = { .pcs_enable = mv88e6393x_xg_pcs_enable, .pcs_disable = mv88e6393x_xg_pcs_disable, .pcs_pre_config = mv88e6393x_xg_pcs_pre_config, .pcs_post_config = mv88e6393x_xg_pcs_post_config, - .pcs_get_state = mv88e639x_xg_pcs_get_state, + .pcs_get_state = mv88e6393x_xg_pcs_get_state, .pcs_config = mv88e639x_xg_pcs_config, }; -- cgit From 8804fa04a492f4176ea407390052292912227820 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 6 Dec 2023 13:39:27 -0600 Subject: ALSA: hda/realtek: Add Framework laptop 16 to quirks The Framework 16" laptop has the same controller as other Framework models. Apply the presence detection quirk. Signed-off-by: Mario Limonciello Cc: Link: https://lore.kernel.org/r/20231206193927.2996-1-mario.limonciello@amd.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 51e1bfd7bdde..5b5f298870be 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10273,6 +10273,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0xf111, 0x0005, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 -- cgit From eb99b1b72a424a79f56c972e0fd7ad01fe93a008 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Wed, 6 Dec 2023 22:32:11 +0000 Subject: ALSA: pcmtest: stop timer before buffer is released Stop timer in the 'trigger' and 'sync_stop' callbacks since we want the timer to be stopped before the DMA buffer is released. Otherwise, it could trigger a kernel panic in some circumstances, for instance when the DMA buffer is already released but the timer callback is still running. Signed-off-by: Ivan Orlov Link: https://lore.kernel.org/r/20231206223211.12761-1-ivan.orlov0322@gmail.com Signed-off-by: Takashi Iwai --- sound/drivers/pcmtest.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/sound/drivers/pcmtest.c b/sound/drivers/pcmtest.c index b59b78a09224..b8bff5522bce 100644 --- a/sound/drivers/pcmtest.c +++ b/sound/drivers/pcmtest.c @@ -397,7 +397,6 @@ static int snd_pcmtst_pcm_close(struct snd_pcm_substream *substream) struct pcmtst_buf_iter *v_iter = substream->runtime->private_data; timer_shutdown_sync(&v_iter->timer_instance); - v_iter->substream = NULL; playback_capture_test = !v_iter->is_buf_corrupted; kfree(v_iter); return 0; @@ -435,6 +434,7 @@ static int snd_pcmtst_pcm_trigger(struct snd_pcm_substream *substream, int cmd) case SNDRV_PCM_TRIGGER_PAUSE_PUSH: // We can't call timer_shutdown_sync here, as it is forbidden to sleep here v_iter->suspend = true; + timer_delete(&v_iter->timer_instance); break; } @@ -512,12 +512,22 @@ static int snd_pcmtst_ioctl(struct snd_pcm_substream *substream, unsigned int cm return snd_pcm_lib_ioctl(substream, cmd, arg); } +static int snd_pcmtst_sync_stop(struct snd_pcm_substream *substream) +{ + struct pcmtst_buf_iter *v_iter = substream->runtime->private_data; + + timer_delete_sync(&v_iter->timer_instance); + + return 0; +} + static const struct snd_pcm_ops snd_pcmtst_playback_ops = { .open = snd_pcmtst_pcm_open, .close = snd_pcmtst_pcm_close, .trigger = snd_pcmtst_pcm_trigger, .hw_params = snd_pcmtst_pcm_hw_params, .ioctl = snd_pcmtst_ioctl, + .sync_stop = snd_pcmtst_sync_stop, .hw_free = snd_pcmtst_pcm_hw_free, .prepare = snd_pcmtst_pcm_prepare, .pointer = snd_pcmtst_pcm_pointer, @@ -530,6 +540,7 @@ static const struct snd_pcm_ops snd_pcmtst_capture_ops = { .hw_params = snd_pcmtst_pcm_hw_params, .hw_free = snd_pcmtst_pcm_hw_free, .ioctl = snd_pcmtst_ioctl, + .sync_stop = snd_pcmtst_sync_stop, .prepare = snd_pcmtst_pcm_prepare, .pointer = snd_pcmtst_pcm_pointer, }; -- cgit From 0ad722bd9ee3a9bdfca9613148645e4c9b7f26cf Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Tue, 5 Dec 2023 11:26:25 +0200 Subject: nfp: flower: fix for take a mutex lock in soft irq context and rcu lock The neighbour event callback call the function nfp_tun_write_neigh, this function will take a mutex lock and it is in soft irq context, change the work queue to process the neighbour event. Move the nfp_tun_write_neigh function out of range rcu_read_lock/unlock() in function nfp_tunnel_request_route_v4 and nfp_tunnel_request_route_v6. Fixes: abc210952af7 ("nfp: flower: tunnel neigh support bond offload") CC: stable@vger.kernel.org # 6.2+ Signed-off-by: Hui Zhou Signed-off-by: Louis Peens Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/flower/tunnel_conf.c | 127 +++++++++++++++------ 1 file changed, 95 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 060a77f2265d..e522845c7c21 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -160,6 +160,18 @@ struct nfp_tun_mac_addr_offload { u8 addr[ETH_ALEN]; }; +/** + * struct nfp_neigh_update_work - update neighbour information to nfp + * @work: Work queue for writing neigh to the nfp + * @n: neighbour entry + * @app: Back pointer to app + */ +struct nfp_neigh_update_work { + struct work_struct work; + struct neighbour *n; + struct nfp_app *app; +}; + enum nfp_flower_mac_offload_cmd { NFP_TUNNEL_MAC_OFFLOAD_ADD = 0, NFP_TUNNEL_MAC_OFFLOAD_DEL = 1, @@ -607,38 +619,30 @@ err: nfp_flower_cmsg_warn(app, "Neighbour configuration failed.\n"); } -static int -nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, - void *ptr) +static void +nfp_tun_release_neigh_update_work(struct nfp_neigh_update_work *update_work) { - struct nfp_flower_priv *app_priv; - struct netevent_redirect *redir; - struct neighbour *n; + neigh_release(update_work->n); + kfree(update_work); +} + +static void nfp_tun_neigh_update(struct work_struct *work) +{ + struct nfp_neigh_update_work *update_work; struct nfp_app *app; + struct neighbour *n; bool neigh_invalid; int err; - switch (event) { - case NETEVENT_REDIRECT: - redir = (struct netevent_redirect *)ptr; - n = redir->neigh; - break; - case NETEVENT_NEIGH_UPDATE: - n = (struct neighbour *)ptr; - break; - default: - return NOTIFY_DONE; - } - - neigh_invalid = !(n->nud_state & NUD_VALID) || n->dead; - - app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); - app = app_priv->app; + update_work = container_of(work, struct nfp_neigh_update_work, work); + app = update_work->app; + n = update_work->n; if (!nfp_flower_get_port_id_from_netdev(app, n->dev)) - return NOTIFY_DONE; + goto out; #if IS_ENABLED(CONFIG_INET) + neigh_invalid = !(n->nud_state & NUD_VALID) || n->dead; if (n->tbl->family == AF_INET6) { #if IS_ENABLED(CONFIG_IPV6) struct flowi6 flow6 = {}; @@ -655,13 +659,11 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, dst = ip6_dst_lookup_flow(dev_net(n->dev), NULL, &flow6, NULL); if (IS_ERR(dst)) - return NOTIFY_DONE; + goto out; dst_release(dst); } nfp_tun_write_neigh(n->dev, app, &flow6, n, true, false); -#else - return NOTIFY_DONE; #endif /* CONFIG_IPV6 */ } else { struct flowi4 flow4 = {}; @@ -678,17 +680,71 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, rt = ip_route_output_key(dev_net(n->dev), &flow4); err = PTR_ERR_OR_ZERO(rt); if (err) - return NOTIFY_DONE; + goto out; ip_rt_put(rt); } nfp_tun_write_neigh(n->dev, app, &flow4, n, false, false); } -#else - return NOTIFY_DONE; #endif /* CONFIG_INET */ +out: + nfp_tun_release_neigh_update_work(update_work); +} - return NOTIFY_OK; +static struct nfp_neigh_update_work * +nfp_tun_alloc_neigh_update_work(struct nfp_app *app, struct neighbour *n) +{ + struct nfp_neigh_update_work *update_work; + + update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC); + if (!update_work) + return NULL; + + INIT_WORK(&update_work->work, nfp_tun_neigh_update); + neigh_hold(n); + update_work->n = n; + update_work->app = app; + + return update_work; +} + +static int +nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct nfp_neigh_update_work *update_work; + struct nfp_flower_priv *app_priv; + struct netevent_redirect *redir; + struct neighbour *n; + struct nfp_app *app; + + switch (event) { + case NETEVENT_REDIRECT: + redir = (struct netevent_redirect *)ptr; + n = redir->neigh; + break; + case NETEVENT_NEIGH_UPDATE: + n = (struct neighbour *)ptr; + break; + default: + return NOTIFY_DONE; + } +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) +#else + if (n->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); + app = app_priv->app; + update_work = nfp_tun_alloc_neigh_update_work(app, n); + if (!update_work) + return NOTIFY_DONE; + + queue_work(system_highpri_wq, &update_work->work); + + return NOTIFY_DONE; } void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) @@ -706,6 +762,7 @@ void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL); if (!netdev) goto fail_rcu_unlock; + dev_hold(netdev); flow.daddr = payload->ipv4_addr; flow.flowi4_proto = IPPROTO_UDP; @@ -725,13 +782,16 @@ void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) ip_rt_put(rt); if (!n) goto fail_rcu_unlock; + rcu_read_unlock(); + nfp_tun_write_neigh(n->dev, app, &flow, n, false, true); neigh_release(n); - rcu_read_unlock(); + dev_put(netdev); return; fail_rcu_unlock: rcu_read_unlock(); + dev_put(netdev); nfp_flower_cmsg_warn(app, "Requested route not found.\n"); } @@ -749,6 +809,7 @@ void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL); if (!netdev) goto fail_rcu_unlock; + dev_hold(netdev); flow.daddr = payload->ipv6_addr; flow.flowi6_proto = IPPROTO_UDP; @@ -766,14 +827,16 @@ void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) dst_release(dst); if (!n) goto fail_rcu_unlock; + rcu_read_unlock(); nfp_tun_write_neigh(n->dev, app, &flow, n, true, true); neigh_release(n); - rcu_read_unlock(); + dev_put(netdev); return; fail_rcu_unlock: rcu_read_unlock(); + dev_put(netdev); nfp_flower_cmsg_warn(app, "Requested IPv6 route not found.\n"); } -- cgit From 5a6c9a05e55cb2972396cc991af9d74c8c15029a Mon Sep 17 00:00:00 2001 From: Lingkai Dong Date: Wed, 6 Dec 2023 13:51:58 +0000 Subject: drm: Fix FD ownership check in drm_master_check_perm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DRM subsystem keeps a record of the owner of a DRM device file descriptor using thread group ID (TGID) instead of process ID (PID), to ensures all threads within the same userspace process are considered the owner. However, the DRM master ownership check compares the current thread's PID against the record, so the thread is incorrectly considered to be not the FD owner if the PID is not equal to the TGID. This causes DRM ioctls to be denied master privileges, even if the same thread that opened the FD performs an ioctl. Fix this by checking TGID. Fixes: 4230cea89cafb ("drm: Track clients by tgid and not tid") Signed-off-by: Lingkai Dong Reviewed-by: Christian König Reviewed-by: Tvrtko Ursulin Cc: # v6.4+ Link: https://patchwork.freedesktop.org/patch/msgid/PA6PR08MB107665920BE9A96658CDA04CE8884A@PA6PR08MB10766.eurprd08.prod.outlook.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c index 2ed2585ded37..6899b3dc1f12 100644 --- a/drivers/gpu/drm/drm_auth.c +++ b/drivers/gpu/drm/drm_auth.c @@ -236,7 +236,7 @@ static int drm_master_check_perm(struct drm_device *dev, struct drm_file *file_priv) { if (file_priv->was_master && - rcu_access_pointer(file_priv->pid) == task_pid(current)) + rcu_access_pointer(file_priv->pid) == task_tgid(current)) return 0; if (!capable(CAP_SYS_ADMIN)) -- cgit From 107b4e063d78c300b21e2d5291b1aa94c514ea5b Mon Sep 17 00:00:00 2001 From: Georg Gottleuber Date: Wed, 20 Sep 2023 10:52:10 +0200 Subject: nvme-pci: Add sleep quirk for Kingston drives Some Kingston NV1 and A2000 are wasting a lot of power on specific TUXEDO platforms in s2idle sleep if 'Simple Suspend' is used. This patch applies a new quirk 'Force No Simple Suspend' to achieve a low power sleep without 'Simple Suspend'. Signed-off-by: Werner Sembach Signed-off-by: Georg Gottleuber Cc: Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 5 +++++ drivers/nvme/host/pci.c | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index efb20a8e23a3..e7411dac00f7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -156,6 +156,11 @@ enum nvme_quirks { * No temperature thresholds for channels other than 0 (Composite). */ NVME_QUIRK_NO_SECONDARY_TEMP_THRESH = (1 << 19), + + /* + * Disables simple suspend/resume path. + */ + NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND = (1 << 20), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index fad4cccce745..61af7ff1a9d6 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2902,6 +2902,18 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) if ((dmi_match(DMI_BOARD_VENDOR, "LENOVO")) && dmi_match(DMI_BOARD_NAME, "LNVNB161216")) return NVME_QUIRK_SIMPLE_SUSPEND; + } else if (pdev->vendor == 0x2646 && (pdev->device == 0x2263 || + pdev->device == 0x500f)) { + /* + * Exclude some Kingston NV1 and A2000 devices from + * NVME_QUIRK_SIMPLE_SUSPEND. Do a full suspend to save a + * lot fo energy with s2idle sleep on some TUXEDO platforms. + */ + if (dmi_match(DMI_BOARD_NAME, "NS5X_NS7XAU") || + dmi_match(DMI_BOARD_NAME, "NS5x_7xAU") || + dmi_match(DMI_BOARD_NAME, "NS5x_7xPU") || + dmi_match(DMI_BOARD_NAME, "PH4PRX1_PH6PRX1")) + return NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND; } return 0; @@ -2932,7 +2944,9 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, dev->dev = get_device(&pdev->dev); quirks |= check_vendor_combination_bug(pdev); - if (!noacpi && acpi_storage_d3(&pdev->dev)) { + if (!noacpi && + !(quirks & NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND) && + acpi_storage_d3(&pdev->dev)) { /* * Some systems use a bios work around to ask for D3 on * platforms that support kernel managed suspend. -- cgit From 705318a99a138c29a512a72c3e0043b3cd7f55f4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 6 Dec 2023 13:26:47 +0000 Subject: io_uring/af_unix: disable sending io_uring over sockets File reference cycles have caused lots of problems for io_uring in the past, and it still doesn't work exactly right and races with unix_stream_read_generic(). The safest fix would be to completely disallow sending io_uring files via sockets via SCM_RIGHT, so there are no possible cycles invloving registered files and thus rendering SCM accounting on the io_uring side unnecessary. Cc: Fixes: 0091bfc81741b ("io_uring/af_unix: defer registered files gc to io_uring release") Reported-and-suggested-by: Jann Horn Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/c716c88321939156909cfa1bd8b0faaf1c804103.1701868795.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rsrc.h | 7 ------- net/core/scm.c | 6 ++++++ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 8625181fb87a..08ac0d8e07ef 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -77,17 +77,10 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file); -#if defined(CONFIG_UNIX) -static inline bool io_file_need_scm(struct file *filp) -{ - return !!unix_get_socket(filp); -} -#else static inline bool io_file_need_scm(struct file *filp) { return false; } -#endif static inline int io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) diff --git a/net/core/scm.c b/net/core/scm.c index 880027ecf516..7dc47c17d863 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -103,6 +104,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (fd < 0 || !(file = fget_raw(fd))) return -EBADF; + /* don't allow io_uring files */ + if (io_uring_get_socket(file)) { + fput(file); + return -EINVAL; + } *fpp++ = file; fpl->count++; } -- cgit From b82a8dbd3d2f4563156f7150c6f2ecab6e960b30 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 4 Dec 2023 11:31:38 +0300 Subject: x86/coco: Disable 32-bit emulation by default on TDX and SEV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The INT 0x80 instruction is used for 32-bit x86 Linux syscalls. The kernel expects to receive a software interrupt as a result of the INT 0x80 instruction. However, an external interrupt on the same vector triggers the same handler. The kernel interprets an external interrupt on vector 0x80 as a 32-bit system call that came from userspace. A VMM can inject external interrupts on any arbitrary vector at any time. This remains true even for TDX and SEV guests where the VMM is untrusted. Put together, this allows an untrusted VMM to trigger int80 syscall handling at any given point. The content of the guest register file at that moment defines what syscall is triggered and its arguments. It opens the guest OS to manipulation from the VMM side. Disable 32-bit emulation by default for TDX and SEV. User can override it with the ia32_emulation=y command line option. [ dhansen: reword the changelog ] Reported-by: Supraja Sridhara Reported-by: Benedict Schlüter Reported-by: Mark Kuhne Reported-by: Andrin Bertschi Reported-by: Shweta Shinde Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+: 1da5c9b x86: Introduce ia32_enabled() Cc: # v6.0+ --- arch/x86/coco/tdx/tdx.c | 10 ++++++++++ arch/x86/include/asm/ia32.h | 7 +++++++ arch/x86/mm/mem_encrypt_amd.c | 11 +++++++++++ 3 files changed, 28 insertions(+) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 1b5d17a9f70d..545e54b829b2 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -891,5 +892,14 @@ void __init tdx_early_init(void) */ x86_cpuinit.parallel_bringup = false; + /* + * The VMM is capable of injecting interrupt 0x80 and triggering the + * compatibility syscall path. + * + * By default, the 32-bit emulation is disabled in order to ensure + * the safety of the VM. + */ + ia32_disable(); + pr_info("Guest detected\n"); } diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 5a2ae24b1204..9805629479d9 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -75,6 +75,11 @@ static inline bool ia32_enabled(void) return __ia32_enabled; } +static inline void ia32_disable(void) +{ + __ia32_enabled = false; +} + #else /* !CONFIG_IA32_EMULATION */ static inline bool ia32_enabled(void) @@ -82,6 +87,8 @@ static inline bool ia32_enabled(void) return IS_ENABLED(CONFIG_X86_32); } +static inline void ia32_disable(void) {} + #endif #endif /* _ASM_X86_IA32_H */ diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index a68f2dda0948..70b91de2e053 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "mm_internal.h" @@ -481,6 +482,16 @@ void __init sme_early_init(void) */ if (sev_status & MSR_AMD64_SEV_ES_ENABLED) x86_cpuinit.parallel_bringup = false; + + /* + * The VMM is capable of injecting interrupt 0x80 and triggering the + * compatibility syscall path. + * + * By default, the 32-bit emulation is disabled in order to ensure + * the safety of the VM. + */ + if (sev_status & MSR_AMD64_SEV_ENABLED) + ia32_disable(); } void __init mem_encrypt_free_decrypted_mem(void) -- cgit From be5341eb0d43b1e754799498bd2e8756cc167a41 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2023 11:31:39 +0300 Subject: x86/entry: Convert INT 0x80 emulation to IDTENTRY There is no real reason to have a separate ASM entry point implementation for the legacy INT 0x80 syscall emulation on 64-bit. IDTENTRY provides all the functionality needed with the only difference that it does not: - save the syscall number (AX) into pt_regs::orig_ax - set pt_regs::ax to -ENOSYS Both can be done safely in the C code of an IDTENTRY before invoking any of the syscall related functions which depend on this convention. Aside of ASM code reduction this prepares for detecting and handling a local APIC injected vector 0x80. [ kirill.shutemov: More verbose comments ] Suggested-by: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+ --- arch/x86/entry/common.c | 58 +++++++++++++++++++++++++++++- arch/x86/entry/entry_64_compat.S | 77 ---------------------------------------- arch/x86/include/asm/idtentry.h | 4 +++ arch/x86/include/asm/proto.h | 4 --- arch/x86/kernel/idt.c | 2 +- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm.S | 2 +- 7 files changed, 64 insertions(+), 85 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index d813160b14d8..8ca4d57d68eb 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -167,7 +167,62 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) } } -/* Handles int $0x80 */ +#ifdef CONFIG_IA32_EMULATION +/** + * int80_emulation - 32-bit legacy syscall entry + * + * This entry point can be used by 32-bit and 64-bit programs to perform + * 32-bit system calls. Instances of INT $0x80 can be found inline in + * various programs and libraries. It is also used by the vDSO's + * __kernel_vsyscall fallback for hardware that doesn't support a faster + * entry method. Restarted 32-bit system calls also fall back to INT + * $0x80 regardless of what instruction was originally used to do the + * system call. + * + * This is considered a slow path. It is not used by most libc + * implementations on modern hardware except during process startup. + * + * The arguments for the INT $0x80 based syscall are on stack in the + * pt_regs structure: + * eax: system call number + * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 + */ +DEFINE_IDTENTRY_RAW(int80_emulation) +{ + int nr; + + /* Establish kernel context. */ + enter_from_user_mode(regs); + + instrumentation_begin(); + add_random_kstack_offset(); + + /* + * The low level idtentry code pushed -1 into regs::orig_ax + * and regs::ax contains the syscall number. + * + * User tracing code (ptrace or signal handlers) might assume + * that the regs::orig_ax contains a 32-bit number on invoking + * a 32-bit syscall. + * + * Establish the syscall convention by saving the 32bit truncated + * syscall number in regs::orig_ax and by invalidating regs::ax. + */ + regs->orig_ax = regs->ax & GENMASK(31, 0); + regs->ax = -ENOSYS; + + nr = syscall_32_enter(regs); + + local_irq_enable(); + nr = syscall_enter_from_user_mode_work(regs, nr); + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#else /* CONFIG_IA32_EMULATION */ + +/* Handles int $0x80 on a 32bit kernel */ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { int nr = syscall_32_enter(regs); @@ -186,6 +241,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) instrumentation_end(); syscall_exit_to_user_mode(regs); } +#endif /* !CONFIG_IA32_EMULATION */ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) { diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 27c05d08558a..de94e2e84ecc 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -275,80 +275,3 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR int3 SYM_CODE_END(entry_SYSCALL_compat) - -/* - * 32-bit legacy system call entry. - * - * 32-bit x86 Linux system calls traditionally used the INT $0x80 - * instruction. INT $0x80 lands here. - * - * This entry point can be used by 32-bit and 64-bit programs to perform - * 32-bit system calls. Instances of INT $0x80 can be found inline in - * various programs and libraries. It is also used by the vDSO's - * __kernel_vsyscall fallback for hardware that doesn't support a faster - * entry method. Restarted 32-bit system calls also fall back to INT - * $0x80 regardless of what instruction was originally used to do the - * system call. - * - * This is considered a slow path. It is not used by most libc - * implementations on modern hardware except during process startup. - * - * Arguments: - * eax system call number - * ebx arg1 - * ecx arg2 - * edx arg3 - * esi arg4 - * edi arg5 - * ebp arg6 - */ -SYM_CODE_START(entry_INT80_compat) - UNWIND_HINT_ENTRY - ENDBR - /* - * Interrupts are off on entry. - */ - ASM_CLAC /* Do this early to minimize exposure */ - ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV - - /* - * User tracing code (ptrace or signal handlers) might assume that - * the saved RAX contains a 32-bit number when we're invoking a 32-bit - * syscall. Just in case the high bits are nonzero, zero-extend - * the syscall number. (This could almost certainly be deleted - * with no ill effects.) - */ - movl %eax, %eax - - /* switch to thread stack expects orig_ax and rdi to be pushed */ - pushq %rax /* pt_regs->orig_ax */ - - /* Need to switch before accessing the thread stack. */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rax - - /* In the Xen PV case we already run on the thread stack. */ - ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV - - movq %rsp, %rax - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp - - pushq 5*8(%rax) /* regs->ss */ - pushq 4*8(%rax) /* regs->rsp */ - pushq 3*8(%rax) /* regs->eflags */ - pushq 2*8(%rax) /* regs->cs */ - pushq 1*8(%rax) /* regs->ip */ - pushq 0*8(%rax) /* regs->orig_ax */ -.Lint80_keep_stack: - - PUSH_AND_CLEAR_REGS rax=$-ENOSYS - UNWIND_HINT_REGS - - cld - - IBRS_ENTER - UNTRAIN_RET - - movq %rsp, %rdi - call do_int80_syscall_32 - jmp swapgs_restore_regs_and_return_to_usermode -SYM_CODE_END(entry_INT80_compat) diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 05fd175cec7d..13639e57e1f8 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -569,6 +569,10 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); +#if defined(CONFIG_IA32_EMULATION) +DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR, int80_emulation); +#endif + #ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_64 DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 4d84122bd643..484f4f0131a5 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -32,10 +32,6 @@ void entry_SYSCALL_compat(void); void entry_SYSCALL_compat_safe_stack(void); void entry_SYSRETL_compat_unsafe_stack(void); void entry_SYSRETL_compat_end(void); -void entry_INT80_compat(void); -#ifdef CONFIG_XEN_PV -void xen_entry_INT80_compat(void); -#endif #else /* !CONFIG_IA32_EMULATION */ #define entry_SYSCALL_compat NULL #define entry_SYSENTER_compat NULL diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 8857abc706e4..660b601f1d6c 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -121,7 +121,7 @@ static const __initconst struct idt_data def_idts[] = { static const struct idt_data ia32_idt[] __initconst = { #if defined(CONFIG_IA32_EMULATION) - SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat), + SYSG(IA32_SYSCALL_VECTOR, asm_int80_emulation), #elif defined(CONFIG_X86_32) SYSG(IA32_SYSCALL_VECTOR, entry_INT80_32), #endif diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index bbbfdd495ebd..aeb33e0a3f76 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -704,7 +704,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_int3, false ), TRAP_ENTRY(exc_overflow, false ), #ifdef CONFIG_IA32_EMULATION - { entry_INT80_compat, xen_entry_INT80_compat, false }, + TRAP_ENTRY(int80_emulation, false ), #endif TRAP_ENTRY(exc_page_fault, false ), TRAP_ENTRY(exc_divide_error, false ), diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 9e5e68008785..1a9cd18dfbd3 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -156,7 +156,7 @@ xen_pv_trap asm_xenpv_exc_machine_check #endif /* CONFIG_X86_MCE */ xen_pv_trap asm_exc_simd_coprocessor_error #ifdef CONFIG_IA32_EMULATION -xen_pv_trap entry_INT80_compat +xen_pv_trap asm_int80_emulation #endif xen_pv_trap asm_exc_xen_unknown_trap xen_pv_trap asm_exc_xen_hypervisor_callback -- cgit From 55617fb991df535f953589586468612351575704 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2023 11:31:40 +0300 Subject: x86/entry: Do not allow external 0x80 interrupts The INT 0x80 instruction is used for 32-bit x86 Linux syscalls. The kernel expects to receive a software interrupt as a result of the INT 0x80 instruction. However, an external interrupt on the same vector also triggers the same codepath. An external interrupt on vector 0x80 will currently be interpreted as a 32-bit system call, and assuming that it was a user context. Panic on external interrupts on the vector. To distinguish software interrupts from external ones, the kernel checks the APIC ISR bit relevant to the 0x80 vector. For software interrupts, this bit will be 0. Signed-off-by: Thomas Gleixner Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+ --- arch/x86/entry/common.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 8ca4d57d68eb..6356060caaf3 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -26,6 +26,7 @@ #include #endif +#include #include #include #include @@ -168,6 +169,25 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) } #ifdef CONFIG_IA32_EMULATION +static __always_inline bool int80_is_external(void) +{ + const unsigned int offs = (0x80 / 32) * 0x10; + const u32 bit = BIT(0x80 % 32); + + /* The local APIC on XENPV guests is fake */ + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return false; + + /* + * If vector 0x80 is set in the APIC ISR then this is an external + * interrupt. Either from broken hardware or injected by a VMM. + * + * Note: In guest mode this is only valid for secure guests where + * the secure module fully controls the vAPIC exposed to the guest. + */ + return apic_read(APIC_ISR + offs) & bit; +} + /** * int80_emulation - 32-bit legacy syscall entry * @@ -191,12 +211,27 @@ DEFINE_IDTENTRY_RAW(int80_emulation) { int nr; - /* Establish kernel context. */ + /* Kernel does not use INT $0x80! */ + if (unlikely(!user_mode(regs))) { + irqentry_enter(regs); + instrumentation_begin(); + panic("Unexpected external interrupt 0x80\n"); + } + + /* + * Establish kernel context for instrumentation, including for + * int80_is_external() below which calls into the APIC driver. + * Identical for soft and external interrupts. + */ enter_from_user_mode(regs); instrumentation_begin(); add_random_kstack_offset(); + /* Validate that this is a soft interrupt to the extent possible */ + if (unlikely(int80_is_external())) + panic("Unexpected external interrupt 0x80\n"); + /* * The low level idtentry code pushed -1 into regs::orig_ax * and regs::ax contains the syscall number. -- cgit From f4116bfc44621882556bbf70f5284fbf429a5cf6 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 4 Dec 2023 11:31:41 +0300 Subject: x86/tdx: Allow 32-bit emulation by default 32-bit emulation was disabled on TDX to prevent a possible attack by a VMM injecting an interrupt on vector 0x80. Now that int80_emulation() has a check for external interrupts the limitation can be lifted. To distinguish software interrupts from external ones, int80_emulation() checks the APIC ISR bit relevant to the 0x80 vector. For software interrupts, this bit will be 0. On TDX, the VAPIC state (including ISR) is protected and cannot be manipulated by the VMM. The ISR bit is set by the microcode flow during the handling of posted interrupts. [ dhansen: more changelog tweaks ] Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+ --- arch/x86/coco/tdx/tdx.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 545e54b829b2..cf1f13c82175 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -892,14 +892,5 @@ void __init tdx_early_init(void) */ x86_cpuinit.parallel_bringup = false; - /* - * The VMM is capable of injecting interrupt 0x80 and triggering the - * compatibility syscall path. - * - * By default, the 32-bit emulation is disabled in order to ensure - * the safety of the VM. - */ - ia32_disable(); - pr_info("Guest detected\n"); } -- cgit From c5a595000e2677e865a39f249c056bc05d6e55fd Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 6 Dec 2023 15:27:05 -0800 Subject: net: tls, update curr on splice as well The curr pointer must also be updated on the splice similar to how we do this for other copy types. Fixes: d829e9c4112b ("tls: convert to generic sk_msg interface") Signed-off-by: John Fastabend Reported-by: Jann Horn Link: https://lore.kernel.org/r/20231206232706.374377-2-john.fastabend@gmail.com Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 316f76187962..e37b4d2e2acd 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -952,6 +952,8 @@ static int tls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg, } sk_msg_page_add(msg_pl, page, part, off); + msg_pl->sg.copybreak = 0; + msg_pl->sg.curr = msg_pl->sg.end; sk_mem_charge(sk, part); *copied += part; try_to_copy -= part; -- cgit From bb9aefde5bbaf6c168c77ba635c155b4980c2287 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 6 Dec 2023 15:27:06 -0800 Subject: bpf: sockmap, updating the sg structure should also update curr Curr pointer should be updated when the sg structure is shifted. Fixes: 7246d8ed4dcce ("bpf: helper to pop data from messages") Signed-off-by: John Fastabend Link: https://lore.kernel.org/r/20231206232706.374377-3-john.fastabend@gmail.com Signed-off-by: Jakub Kicinski --- net/core/filter.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index 7e4d7c3bcc84..1737884be52f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2602,6 +2602,22 @@ BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes) return 0; } +static void sk_msg_reset_curr(struct sk_msg *msg) +{ + u32 i = msg->sg.start; + u32 len = 0; + + do { + len += sk_msg_elem(msg, i)->length; + sk_msg_iter_var_next(i); + if (len >= msg->sg.size) + break; + } while (i != msg->sg.end); + + msg->sg.curr = i; + msg->sg.copybreak = 0; +} + static const struct bpf_func_proto bpf_msg_cork_bytes_proto = { .func = bpf_msg_cork_bytes, .gpl_only = false, @@ -2721,6 +2737,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, msg->sg.end - shift + NR_MSG_FRAG_IDS : msg->sg.end - shift; out: + sk_msg_reset_curr(msg); msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset; msg->data_end = msg->data + bytes; return 0; @@ -2857,6 +2874,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, msg->sg.data[new] = rsge; } + sk_msg_reset_curr(msg); sk_msg_compute_data_pointers(msg); return 0; } @@ -3025,6 +3043,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, sk_mem_uncharge(msg->sk, len - pop); msg->sg.size -= (len - pop); + sk_msg_reset_curr(msg); sk_msg_compute_data_pointers(msg); return 0; } -- cgit From 44ec98ea5ea9cfecd31a5c4cc124703cb5442832 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Dec 2023 23:31:01 +0200 Subject: psample: Require 'CAP_NET_ADMIN' when joining "packets" group The "psample" generic netlink family notifies sampled packets over the "packets" multicast group. This is problematic since by default generic netlink allows non-root users to listen to these notifications. Fix by marking the group with the 'GENL_UNS_ADMIN_PERM' flag. This will prevent non-root users or root without the 'CAP_NET_ADMIN' capability (in the user namespace owning the network namespace) from joining the group. Tested using [1]. Before: # capsh -- -c ./psample_repo # capsh --drop=cap_net_admin -- -c ./psample_repo After: # capsh -- -c ./psample_repo # capsh --drop=cap_net_admin -- -c ./psample_repo Failed to join "packets" multicast group [1] $ cat psample.c #include #include #include #include int join_grp(struct nl_sock *sk, const char *grp_name) { int grp, err; grp = genl_ctrl_resolve_grp(sk, "psample", grp_name); if (grp < 0) { fprintf(stderr, "Failed to resolve \"%s\" multicast group\n", grp_name); return grp; } err = nl_socket_add_memberships(sk, grp, NFNLGRP_NONE); if (err) { fprintf(stderr, "Failed to join \"%s\" multicast group\n", grp_name); return err; } return 0; } int main(int argc, char **argv) { struct nl_sock *sk; int err; sk = nl_socket_alloc(); if (!sk) { fprintf(stderr, "Failed to allocate socket\n"); return -1; } err = genl_connect(sk); if (err) { fprintf(stderr, "Failed to connect socket\n"); return err; } err = join_grp(sk, "config"); if (err) return err; err = join_grp(sk, "packets"); if (err) return err; return 0; } $ gcc -I/usr/include/libnl3 -lnl-3 -lnl-genl-3 -o psample_repo psample.c Fixes: 6ae0a6286171 ("net: Introduce psample, a new genetlink channel for packet sampling") Reported-by: "The UK's National Cyber Security Centre (NCSC)" Signed-off-by: Ido Schimmel Reviewed-by: Jacob Keller Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231206213102.1824398-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/psample/psample.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/psample/psample.c b/net/psample/psample.c index 81a794e36f53..c34e902855db 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -31,7 +31,8 @@ enum psample_nl_multicast_groups { static const struct genl_multicast_group psample_nl_mcgrps[] = { [PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME }, - [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME }, + [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME, + .flags = GENL_UNS_ADMIN_PERM }, }; static struct genl_family psample_nl_family __ro_after_init; -- cgit From e03781879a0d524ce3126678d50a80484a513c4b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Dec 2023 23:31:02 +0200 Subject: drop_monitor: Require 'CAP_SYS_ADMIN' when joining "events" group The "NET_DM" generic netlink family notifies drop locations over the "events" multicast group. This is problematic since by default generic netlink allows non-root users to listen to these notifications. Fix by adding a new field to the generic netlink multicast group structure that when set prevents non-root users or root without the 'CAP_SYS_ADMIN' capability (in the user namespace owning the network namespace) from joining the group. Set this field for the "events" group. Use 'CAP_SYS_ADMIN' rather than 'CAP_NET_ADMIN' because of the nature of the information that is shared over this group. Note that the capability check in this case will always be performed against the initial user namespace since the family is not netns aware and only operates in the initial network namespace. A new field is added to the structure rather than using the "flags" field because the existing field uses uAPI flags and it is inappropriate to add a new uAPI flag for an internal kernel check. In net-next we can rework the "flags" field to use internal flags and fold the new field into it. But for now, in order to reduce the amount of changes, add a new field. Since the information can only be consumed by root, mark the control plane operations that start and stop the tracing as root-only using the 'GENL_ADMIN_PERM' flag. Tested using [1]. Before: # capsh -- -c ./dm_repo # capsh --drop=cap_sys_admin -- -c ./dm_repo After: # capsh -- -c ./dm_repo # capsh --drop=cap_sys_admin -- -c ./dm_repo Failed to join "events" multicast group [1] $ cat dm.c #include #include #include #include int main(int argc, char **argv) { struct nl_sock *sk; int grp, err; sk = nl_socket_alloc(); if (!sk) { fprintf(stderr, "Failed to allocate socket\n"); return -1; } err = genl_connect(sk); if (err) { fprintf(stderr, "Failed to connect socket\n"); return err; } grp = genl_ctrl_resolve_grp(sk, "NET_DM", "events"); if (grp < 0) { fprintf(stderr, "Failed to resolve \"events\" multicast group\n"); return grp; } err = nl_socket_add_memberships(sk, grp, NFNLGRP_NONE); if (err) { fprintf(stderr, "Failed to join \"events\" multicast group\n"); return err; } return 0; } $ gcc -I/usr/include/libnl3 -lnl-3 -lnl-genl-3 -o dm_repo dm.c Fixes: 9a8afc8d3962 ("Network Drop Monitor: Adding drop monitor implementation & Netlink protocol") Reported-by: "The UK's National Cyber Security Centre (NCSC)" Signed-off-by: Ido Schimmel Reviewed-by: Jacob Keller Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231206213102.1824398-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/genetlink.h | 2 ++ net/core/drop_monitor.c | 4 +++- net/netlink/genetlink.c | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index e18a4c0d69ee..c53244f20437 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -12,10 +12,12 @@ * struct genl_multicast_group - generic netlink multicast group * @name: name of the multicast group, names are per-family * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM) + * @cap_sys_admin: whether %CAP_SYS_ADMIN is required for binding */ struct genl_multicast_group { char name[GENL_NAMSIZ]; u8 flags; + u8 cap_sys_admin:1; }; struct genl_split_ops; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index aff31cd944c2..b240d9aae4a6 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -183,7 +183,7 @@ out: } static const struct genl_multicast_group dropmon_mcgrps[] = { - { .name = "events", }, + { .name = "events", .cap_sys_admin = 1 }, }; static void send_dm_alert(struct work_struct *work) @@ -1619,11 +1619,13 @@ static const struct genl_small_ops dropmon_ops[] = { .cmd = NET_DM_CMD_START, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, + .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_STOP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, + .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_CONFIG_GET, diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 92ef5ed2e7b0..9c7ffd10df2a 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1691,6 +1691,9 @@ static int genl_bind(struct net *net, int group) if ((grp->flags & GENL_UNS_ADMIN_PERM) && !ns_capable(net->user_ns, CAP_NET_ADMIN)) ret = -EPERM; + if (grp->cap_sys_admin && + !ns_capable(net->user_ns, CAP_SYS_ADMIN)) + ret = -EPERM; break; } -- cgit From 1499b89289bf272fd83cb296c82fb5519d0fe93f Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Wed, 6 Dec 2023 08:16:54 +0100 Subject: net: dsa: microchip: provide a list of valid protocols for xmit handler Provide a list of valid protocols for which the driver will provide it's deferred xmit handler. When using DSA_TAG_PROTO_KSZ8795 protocol, it does not provide a "connect" method, therefor ksz_connect() is not allocating ksz_tagger_data. This avoids the following null pointer dereference: ksz_connect_tag_protocol from dsa_register_switch+0x9ac/0xee0 dsa_register_switch from ksz_switch_register+0x65c/0x828 ksz_switch_register from ksz_spi_probe+0x11c/0x168 ksz_spi_probe from spi_probe+0x84/0xa8 spi_probe from really_probe+0xc8/0x2d8 Fixes: ab32f56a4100 ("net: dsa: microchip: ptp: add packet transmission timestamping") Signed-off-by: Sean Nyekjaer Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20231206071655.1626479-1-sean@geanix.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 3fed406fb46a..ff4b39601c93 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2713,10 +2713,18 @@ static int ksz_connect_tag_protocol(struct dsa_switch *ds, { struct ksz_tagger_data *tagger_data; - tagger_data = ksz_tagger_data(ds); - tagger_data->xmit_work_fn = ksz_port_deferred_xmit; - - return 0; + switch (proto) { + case DSA_TAG_PROTO_KSZ8795: + return 0; + case DSA_TAG_PROTO_KSZ9893: + case DSA_TAG_PROTO_KSZ9477: + case DSA_TAG_PROTO_LAN937X: + tagger_data = ksz_tagger_data(ds); + tagger_data->xmit_work_fn = ksz_port_deferred_xmit; + return 0; + default: + return -EPROTONOSUPPORT; + } } static int ksz_port_vlan_filtering(struct dsa_switch *ds, int port, -- cgit From c5a10397d4571bcfd4bd7ca211ee47bcb6792ec3 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 7 Dec 2023 01:02:37 +0800 Subject: net/smc: fix missing byte order conversion in CLC handshake The byte order conversions of ISM GID and DMB token are missing in process of CLC accept and confirm. So fix it. Fixes: 3d9725a6a133 ("net/smc: common routine for CLC accept and confirm") Signed-off-by: Wen Gu Reviewed-by: Tony Lu Reviewed-by: Alexandra Winter Reviewed-by: Wenjia Zhang Link: https://lore.kernel.org/r/1701882157-87956-1-git-send-email-guwen@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 4 ++-- net/smc/smc_clc.c | 9 ++++----- net/smc/smc_clc.h | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 2a1388841951..73eebddbbf41 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -723,7 +723,7 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc, int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size); smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx; - smc->conn.peer_token = clc->d0.token; + smc->conn.peer_token = ntohll(clc->d0.token); /* msg header takes up space in the buffer */ smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); @@ -1415,7 +1415,7 @@ static int smc_connect_ism(struct smc_sock *smc, if (rc) return rc; } - ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid; + ini->ism_peer_gid[ini->ism_selected] = ntohll(aclc->d0.gid); /* there is only one lgr role for SMC-D; use server lock */ mutex_lock(&smc_server_lgr_pending); diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 8deb46c28f1d..72f4d81a3f41 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -1004,6 +1004,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, { struct smc_connection *conn = &smc->conn; struct smc_clc_first_contact_ext_v2x fce; + struct smcd_dev *smcd = conn->lgr->smcd; struct smc_clc_msg_accept_confirm *clc; struct smc_clc_fce_gid_ext gle; struct smc_clc_msg_trail trl; @@ -1021,17 +1022,15 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)); clc->hdr.typev1 = SMC_TYPE_D; - clc->d0.gid = - conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd); - clc->d0.token = conn->rmb_desc->token; + clc->d0.gid = htonll(smcd->ops->get_local_gid(smcd)); + clc->d0.token = htonll(conn->rmb_desc->token); clc->d0.dmbe_size = conn->rmbe_size_comp; clc->d0.dmbe_idx = 0; memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); if (version == SMC_V1) { clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); } else { - clc_v2->d1.chid = - htons(smc_ism_get_chid(conn->lgr->smcd)); + clc_v2->d1.chid = htons(smc_ism_get_chid(smcd)); if (eid && eid[0]) memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN); len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index c5c8e7db775a..08155a96a02a 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -204,8 +204,8 @@ struct smcr_clc_msg_accept_confirm { /* SMCR accept/confirm */ } __packed; struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */ - u64 gid; /* Sender GID */ - u64 token; /* DMB token */ + __be64 gid; /* Sender GID */ + __be64 token; /* DMB token */ u8 dmbe_idx; /* DMBE index */ #if defined(__BIG_ENDIAN_BITFIELD) u8 dmbe_size : 4, /* buf size (compressed) */ -- cgit From b0a930e8d90caf66a94fee7a9d0b8472bc3e7561 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 6 Dec 2023 17:41:43 +0100 Subject: vsock/virtio: fix "comparison of distinct pointer types lacks a cast" warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After backporting commit 581512a6dc93 ("vsock/virtio: MSG_ZEROCOPY flag support") in CentOS Stream 9, CI reported the following error: In file included from ./include/linux/kernel.h:17, from ./include/linux/list.h:9, from ./include/linux/preempt.h:11, from ./include/linux/spinlock.h:56, from net/vmw_vsock/virtio_transport_common.c:9: net/vmw_vsock/virtio_transport_common.c: In function ‘virtio_transport_can_zcopy‘: ./include/linux/minmax.h:20:35: error: comparison of distinct pointer types lacks a cast [-Werror] 20 | (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) | ^~ ./include/linux/minmax.h:26:18: note: in expansion of macro ‘__typecheck‘ 26 | (__typecheck(x, y) && __no_side_effects(x, y)) | ^~~~~~~~~~~ ./include/linux/minmax.h:36:31: note: in expansion of macro ‘__safe_cmp‘ 36 | __builtin_choose_expr(__safe_cmp(x, y), \ | ^~~~~~~~~~ ./include/linux/minmax.h:45:25: note: in expansion of macro ‘__careful_cmp‘ 45 | #define min(x, y) __careful_cmp(x, y, <) | ^~~~~~~~~~~~~ net/vmw_vsock/virtio_transport_common.c:63:37: note: in expansion of macro ‘min‘ 63 | int pages_to_send = min(pages_in_iov, MAX_SKB_FRAGS); We could solve it by using min_t(), but this operation seems entirely unnecessary, because we also pass MAX_SKB_FRAGS to iov_iter_npages(), which performs almost the same check, returning at most MAX_SKB_FRAGS elements. So, let's eliminate this unnecessary comparison. Fixes: 581512a6dc93 ("vsock/virtio: MSG_ZEROCOPY flag support") Cc: avkrasnov@salutedevices.com Signed-off-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Reviewed-by: Arseniy Krasnov Link: https://lore.kernel.org/r/20231206164143.281107-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport_common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index f6dc896bf44c..c8e162c9d1df 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -59,8 +59,7 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, t_ops = virtio_transport_get_ops(info->vsk); if (t_ops->can_msgzerocopy) { - int pages_in_iov = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); - int pages_to_send = min(pages_in_iov, MAX_SKB_FRAGS); + int pages_to_send = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); /* +1 is for packet header. */ return t_ops->can_msgzerocopy(pages_to_send + 1); -- cgit From b39113349de60e9b0bc97c2e129181b193c45054 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 7 Dec 2023 10:07:24 +0800 Subject: md: split MD_RECOVERY_NEEDED out of mddev_resume New mddev_resume() calls are added to synchronize IO with array reconfiguration, however, this introduces a performance regression while adding it in md_start_sync(): 1) someone sets MD_RECOVERY_NEEDED first; 2) daemon thread grabs reconfig_mutex, then clears MD_RECOVERY_NEEDED and queues a new sync work; 3) daemon thread releases reconfig_mutex; 4) in md_start_sync a) check that there are spares that can be added/removed, then suspend the array; b) remove_and_add_spares may not be called, or called without really add/remove spares; c) resume the array, then set MD_RECOVERY_NEEDED again! Loop between 2 - 4, then mddev_suspend() will be called quite often, for consequence, normal IO will be quite slow. Fix this problem by don't set MD_RECOVERY_NEEDED again in md_start_sync(), hence the loop will be broken. Fixes: bc08041b32ab ("md: suspend array in md_start_sync() if array need reconfiguration") Suggested-by: Song Liu Reported-by: Janpieter Sollie Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218200 Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231207020724.2797445-1-yukuai1@huaweicloud.com --- drivers/md/md.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index bc9d67af1961..b066abbffd10 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -490,7 +490,7 @@ int mddev_suspend(struct mddev *mddev, bool interruptible) } EXPORT_SYMBOL_GPL(mddev_suspend); -void mddev_resume(struct mddev *mddev) +static void __mddev_resume(struct mddev *mddev, bool recovery_needed) { lockdep_assert_not_held(&mddev->reconfig_mutex); @@ -507,12 +507,18 @@ void mddev_resume(struct mddev *mddev) percpu_ref_resurrect(&mddev->active_io); wake_up(&mddev->sb_wait); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + if (recovery_needed) + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ mutex_unlock(&mddev->suspend_mutex); } + +void mddev_resume(struct mddev *mddev) +{ + return __mddev_resume(mddev, true); +} EXPORT_SYMBOL_GPL(mddev_resume); /* @@ -9389,7 +9395,15 @@ static void md_start_sync(struct work_struct *ws) goto not_running; } - suspend ? mddev_unlock_and_resume(mddev) : mddev_unlock(mddev); + mddev_unlock(mddev); + /* + * md_start_sync was triggered by MD_RECOVERY_NEEDED, so we should + * not set it again. Otherwise, we may cause issue like this one: + * https://bugzilla.kernel.org/show_bug.cgi?id=218200 + * Therefore, use __mddev_resume(mddev, false). + */ + if (suspend) + __mddev_resume(mddev, false); md_wakeup_thread(mddev->sync_thread); sysfs_notify_dirent_safe(mddev->sysfs_action); md_new_event(); @@ -9401,7 +9415,15 @@ not_running: clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - suspend ? mddev_unlock_and_resume(mddev) : mddev_unlock(mddev); + mddev_unlock(mddev); + /* + * md_start_sync was triggered by MD_RECOVERY_NEEDED, so we should + * not set it again. Otherwise, we may cause issue like this one: + * https://bugzilla.kernel.org/show_bug.cgi?id=218200 + * Therefore, use __mddev_resume(mddev, false). + */ + if (suspend) + __mddev_resume(mddev, false); wake_up(&resync_wait); if (test_and_clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery) && -- cgit From 634e5e1e06f5cdd614a1bc429ecb243a51cc009d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 7 Dec 2023 19:20:35 +0100 Subject: ALSA: hda/realtek: Add quirk for Lenovo Yoga Pro 7 Lenovo Yoga Pro 7 14APH8 (PCI SSID 17aa:3882) seems requiring the similar workaround like Yoga 9 model for the bass speaker. Cc: Link: https://lore.kernel.org/r/CAGGk=CRRQ1L9p771HsXTN_ebZP41Qj+3gw35Gezurn+nokRewg@mail.gmail.com Link: https://lore.kernel.org/r/20231207182035.30248-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5b5f298870be..0377912e9264 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10206,6 +10206,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x387d, "Yoga S780-16 pro Quad AAC", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x387e, "Yoga S780-16 pro Quad YC", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3881, "YB9 dual power mode2 YC", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x17aa, 0x3882, "Lenovo Yoga Pro 7 14APH8", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x3884, "Y780 YG DUAL", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C), -- cgit From 8f0b960a42badda7a2781e8a33564624200debc9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 7 Dec 2023 19:28:10 +0100 Subject: ACPI: utils: Fix error path in acpi_evaluate_reference() If a pointer to an uninitialized struct acpi_handle_list is passed to acpi_evaluate_reference() and it decides to bail out early, either because acpi_evaluate_object() fails, or because it produces invalid data, the handles pointer from the struct acpi_handle_list will be passed to kfree() and if it is not NULL, the kernel will crash on an attempt to free unallocated memory. Address this by moving the "end" label in acpi_evaluate_reference() to the end of the function, which is sufficient, because no cleanup is needed in that case. Fixes: 2e57d10a6591 ("ACPI: utils: Dynamically determine acpi_handle_list size") Signed-off-by: Rafael J. Wysocki Tested-by: Woody Suwalski --- drivers/acpi/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 28c75242fca9..62944e35fcee 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -399,13 +399,13 @@ acpi_evaluate_reference(acpi_handle handle, acpi_handle_debug(list->handles[i], "Found in reference list\n"); } -end: if (ACPI_FAILURE(status)) { list->count = 0; kfree(list->handles); list->handles = NULL; } +end: kfree(buffer.pointer); return status; -- cgit From 6f5c4eca48ffe18307b4e1d375817691c9005c87 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Dec 2023 19:11:14 -0800 Subject: cxl/hdm: Fix dpa translation locking The helper, cxl_dpa_resource_start(), snapshots the dpa-address of an endpoint-decoder after acquiring the cxl_dpa_rwsem. However, it is sufficient to assert that cxl_dpa_rwsem is held rather than acquire it in the helper. Otherwise, it triggers multiple lockdep reports: 1/ Tracing callbacks are in an atomic context that can not acquire sleeping locks: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1525 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1288, name: bash preempt_count: 2, expected: 0 RCU nest depth: 0, expected: 0 [..] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc38 05/24/2023 Call Trace: dump_stack_lvl+0x71/0x90 __might_resched+0x1b2/0x2c0 down_read+0x1a/0x190 cxl_dpa_resource_start+0x15/0x50 [cxl_core] cxl_trace_hpa+0x122/0x300 [cxl_core] trace_event_raw_event_cxl_poison+0x1c9/0x2d0 [cxl_core] 2/ The rwsem is already held in the inject poison path: WARNING: possible recursive locking detected 6.7.0-rc2+ #12 Tainted: G W OE N -------------------------------------------- bash/1288 is trying to acquire lock: ffffffffc05f73d0 (cxl_dpa_rwsem){++++}-{3:3}, at: cxl_dpa_resource_start+0x15/0x50 [cxl_core] but task is already holding lock: ffffffffc05f73d0 (cxl_dpa_rwsem){++++}-{3:3}, at: cxl_inject_poison+0x7d/0x1e0 [cxl_core] [..] Call Trace: dump_stack_lvl+0x71/0x90 __might_resched+0x1b2/0x2c0 down_read+0x1a/0x190 cxl_dpa_resource_start+0x15/0x50 [cxl_core] cxl_trace_hpa+0x122/0x300 [cxl_core] trace_event_raw_event_cxl_poison+0x1c9/0x2d0 [cxl_core] __traceiter_cxl_poison+0x5c/0x80 [cxl_core] cxl_inject_poison+0x1bc/0x1e0 [cxl_core] This appears to have been an issue since the initial implementation and uncovered by the new cxl-poison.sh test [1]. That test is now passing with these changes. Fixes: 28a3ae4ff66c ("cxl/trace: Add an HPA to cxl_poison trace events") Link: http://lore.kernel.org/r/e4f2716646918135ddbadf4146e92abb659de734.1700615159.git.alison.schofield@intel.com [1] Cc: Cc: Alison Schofield Cc: Jonathan Cameron Cc: Dave Jiang Cc: Ira Weiny Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 3 +-- drivers/cxl/core/port.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 529baa8a1759..7d97790b893d 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -363,10 +363,9 @@ resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled) { resource_size_t base = -1; - down_read(&cxl_dpa_rwsem); + lockdep_assert_held(&cxl_dpa_rwsem); if (cxled->dpa_res) base = cxled->dpa_res->start; - up_read(&cxl_dpa_rwsem); return base; } diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 38441634e4c6..b7c93bb18f6e 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -226,9 +226,9 @@ static ssize_t dpa_resource_show(struct device *dev, struct device_attribute *at char *buf) { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); - u64 base = cxl_dpa_resource_start(cxled); - return sysfs_emit(buf, "%#llx\n", base); + guard(rwsem_read)(&cxl_dpa_rwsem); + return sysfs_emit(buf, "%#llx\n", (u64)cxl_dpa_resource_start(cxled)); } static DEVICE_ATTR_RO(dpa_resource); -- cgit From 6d1980f0af439b5fd49b1bee2220deff6888792e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 7 Dec 2023 12:39:13 -0500 Subject: bcachefs: Fix deleted inode check for dirs We could delete directories transactionally on rmdir()/unlink(), but we don't; instead, like with regular files we wait for the VFS to call evict(). That means that our check for directories in the deleted inodes btree is wrong - the check should be for non-empty directories. Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 19 +++++++++++-------- fs/bcachefs/dirent.h | 1 + fs/bcachefs/inode.c | 15 ++++++++++----- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 1a0f2d571569..2bfff0da7000 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -485,20 +485,15 @@ retry: return ret; } -int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) +int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 snapshot) { struct btree_iter iter; struct bkey_s_c k; - u32 snapshot; int ret; - ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot); - if (ret) - return ret; - for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, - SPOS(dir.inum, 0, snapshot), - POS(dir.inum, U64_MAX), 0, k, ret) + SPOS(dir, 0, snapshot), + POS(dir, U64_MAX), 0, k, ret) if (k.k->type == KEY_TYPE_dirent) { ret = -ENOTEMPTY; break; @@ -508,6 +503,14 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) return ret; } +int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) +{ + u32 snapshot; + + return bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot) ?: + bch2_empty_dir_snapshot(trans, dir.inum, snapshot); +} + int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) { struct btree_trans *trans = bch2_trans_get(c); diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index cd262bf4d9c5..1e3431990abd 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -64,6 +64,7 @@ u64 bch2_dirent_lookup(struct bch_fs *, subvol_inum, const struct bch_hash_info *, const struct qstr *, subvol_inum *); +int bch2_empty_dir_snapshot(struct btree_trans *, u64, u32); int bch2_empty_dir_trans(struct btree_trans *, subvol_inum); int bch2_readdir(struct bch_fs *, subvol_inum, struct dir_context *); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index c7849b0753e7..9309cfeecd8d 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -7,6 +7,7 @@ #include "btree_update.h" #include "buckets.h" #include "compress.h" +#include "dirent.h" #include "error.h" #include "extents.h" #include "extent_update.h" @@ -1093,11 +1094,15 @@ static int may_delete_deleted_inode(struct btree_trans *trans, if (ret) goto out; - if (fsck_err_on(S_ISDIR(inode.bi_mode), c, - deleted_inode_is_dir, - "directory %llu:%u in deleted_inodes btree", - pos.offset, pos.snapshot)) - goto delete; + if (S_ISDIR(inode.bi_mode)) { + ret = bch2_empty_dir_snapshot(trans, pos.offset, pos.snapshot); + if (fsck_err_on(ret == -ENOTEMPTY, c, deleted_inode_is_dir, + "non empty directory %llu:%u in deleted_inodes btree", + pos.offset, pos.snapshot)) + goto delete; + if (ret) + goto out; + } if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), c, deleted_inode_not_unlinked, -- cgit From 3823119b9c2b5f9e9b760336f75bc989b805cde6 Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Fri, 21 Jul 2023 09:14:46 -0700 Subject: drm/crtc: Fix uninit-value bug in drm_mode_setcrtc The connector_set contains uninitialized values when allocated with kmalloc_array. However, in the "out" branch, the logic assumes that any element in connector_set would be equal to NULL if failed to initialize, which causes the bug reported by Syzbot. The fix is to use an extra variable to keep track of how many connectors are initialized indeed, and use that variable to decrease any refcounts in the "out" branch. Reported-by: syzbot+4fad2e57beb6397ab2fc@syzkaller.appspotmail.com Signed-off-by: Ziqi Zhao Reported-and-tested-by: syzbot+4fad2e57beb6397ab2fc@syzkaller.appspotmail.com Tested-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20230721161446.8602-1-astrajoan@yahoo.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/drm_crtc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index df9bf3c9206e..d718c17ab1e9 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -715,8 +715,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, struct drm_mode_set set; uint32_t __user *set_connectors_ptr; struct drm_modeset_acquire_ctx ctx; - int ret; - int i; + int ret, i, num_connectors; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; @@ -851,6 +850,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, goto out; } + num_connectors = 0; for (i = 0; i < crtc_req->count_connectors; i++) { connector_set[i] = NULL; set_connectors_ptr = (uint32_t __user *)(unsigned long)crtc_req->set_connectors_ptr; @@ -871,6 +871,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, connector->name); connector_set[i] = connector; + num_connectors++; } } @@ -879,7 +880,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, set.y = crtc_req->y; set.mode = mode; set.connectors = connector_set; - set.num_connectors = crtc_req->count_connectors; + set.num_connectors = num_connectors; set.fb = fb; if (drm_drv_uses_atomic_modeset(dev)) @@ -892,7 +893,7 @@ out: drm_framebuffer_put(fb); if (connector_set) { - for (i = 0; i < crtc_req->count_connectors; i++) { + for (i = 0; i < num_connectors; i++) { if (connector_set[i]) drm_connector_put(connector_set[i]); } -- cgit From e5dc5afff62f3e97e86c3643ec9fcad23de4f2d3 Mon Sep 17 00:00:00 2001 From: Judy Hsiao Date: Wed, 6 Dec 2023 03:38:33 +0000 Subject: neighbour: Don't let neigh_forced_gc() disable preemption for long We are seeing cases where neigh_cleanup_and_release() is called by neigh_forced_gc() many times in a row with preemption turned off. When running on a low powered CPU at a low CPU frequency, this has been measured to keep preemption off for ~10 ms. That's not great on a system with HZ=1000 which expects tasks to be able to schedule in with ~1ms latency. Suggested-by: Douglas Anderson Signed-off-by: Judy Hsiao Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Reviewed-by: Douglas Anderson Signed-off-by: David S. Miller --- net/core/neighbour.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index df81c1f0a570..552719c3bbc3 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -253,9 +253,11 @@ static int neigh_forced_gc(struct neigh_table *tbl) { int max_clean = atomic_read(&tbl->gc_entries) - READ_ONCE(tbl->gc_thresh2); + u64 tmax = ktime_get_ns() + NSEC_PER_MSEC; unsigned long tref = jiffies - 5 * HZ; struct neighbour *n, *tmp; int shrunk = 0; + int loop = 0; NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); @@ -278,11 +280,16 @@ static int neigh_forced_gc(struct neigh_table *tbl) shrunk++; if (shrunk >= max_clean) break; + if (++loop == 16) { + if (ktime_get_ns() > tmax) + goto unlock; + loop = 0; + } } } WRITE_ONCE(tbl->last_flush, jiffies); - +unlock: write_unlock_bh(&tbl->lock); return shrunk; -- cgit From bd4a816752bab609dd6d65ae021387beb9e2ddbd Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Wed, 6 Dec 2023 09:36:12 -0800 Subject: net: ipv6: support reporting otherwise unknown prefix flags in RTM_NEWPREFIX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lorenzo points out that we effectively clear all unknown flags from PIO when copying them to userspace in the netlink RTM_NEWPREFIX notification. We could fix this one at a time as new flags are defined, or in one fell swoop - I choose the latter. We could either define 6 new reserved flags (reserved1..6) and handle them individually (and rename them as new flags are defined), or we could simply copy the entire unmodified byte over - I choose the latter. This unfortunately requires some anonymous union/struct magic, so we add a static assert on the struct size for a little extra safety. Cc: David Ahern Cc: Lorenzo Colitti Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Maciej Żenczykowski Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/addrconf.h | 12 ++++++++++-- include/net/if_inet6.h | 4 ---- net/ipv6/addrconf.c | 6 +----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 82da55101b5a..61ebe723ee4d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -31,17 +31,22 @@ struct prefix_info { __u8 length; __u8 prefix_len; + union __packed { + __u8 flags; + struct __packed { #if defined(__BIG_ENDIAN_BITFIELD) - __u8 onlink : 1, + __u8 onlink : 1, autoconf : 1, reserved : 6; #elif defined(__LITTLE_ENDIAN_BITFIELD) - __u8 reserved : 6, + __u8 reserved : 6, autoconf : 1, onlink : 1; #else #error "Please fix " #endif + }; + }; __be32 valid; __be32 prefered; __be32 reserved2; @@ -49,6 +54,9 @@ struct prefix_info { struct in6_addr prefix; }; +/* rfc4861 4.6.2: IPv6 PIO is 32 bytes in size */ +static_assert(sizeof(struct prefix_info) == 32); + #include #include #include diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 3e454c4d7ba6..f07642264c1e 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -22,10 +22,6 @@ #define IF_RS_SENT 0x10 #define IF_READY 0x80000000 -/* prefix flags */ -#define IF_PREFIX_ONLINK 0x01 -#define IF_PREFIX_AUTOCONF 0x02 - enum { INET6_IFADDR_STATE_PREDAD, INET6_IFADDR_STATE_DAD, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3aaea56b5166..2692a7b24c40 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6149,11 +6149,7 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, pmsg->prefix_len = pinfo->prefix_len; pmsg->prefix_type = pinfo->type; pmsg->prefix_pad3 = 0; - pmsg->prefix_flags = 0; - if (pinfo->onlink) - pmsg->prefix_flags |= IF_PREFIX_ONLINK; - if (pinfo->autoconf) - pmsg->prefix_flags |= IF_PREFIX_AUTOCONF; + pmsg->prefix_flags = pinfo->flags; if (nla_put(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix)) goto nla_put_failure; -- cgit From 14c200b7ca46b9a9f4af9e81d258a58274320b6f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 4 Dec 2023 16:06:01 +0100 Subject: platform/x86: intel-vbtn: Fix missing tablet-mode-switch events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2 issues have been reported on the Dell Inspiron 7352: 1. Sometimes the tablet-mode-switch stops reporting tablet-mode change events. Add a "VBDL" call to notify_handler() to work around this. 2. Sometimes the tablet-mode is incorrect after suspend/resume Add a detect_tablet_mode() to resume() to fix this. Reported-by: Arnold Gozum Closes: https://lore.kernel.org/platform-driver-x86/87271a74-c831-4eec-b7a4-1371d0e42471@gmail.com/ Tested-by: Arnold Gozum Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20231204150601.46976-1-hdegoede@redhat.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/vbtn.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 6fa1735ad7a4..210b0a81b7ec 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -73,10 +73,10 @@ struct intel_vbtn_priv { bool wakeup_mode; }; -static void detect_tablet_mode(struct platform_device *device) +static void detect_tablet_mode(struct device *dev) { - struct intel_vbtn_priv *priv = dev_get_drvdata(&device->dev); - acpi_handle handle = ACPI_HANDLE(&device->dev); + struct intel_vbtn_priv *priv = dev_get_drvdata(dev); + acpi_handle handle = ACPI_HANDLE(dev); unsigned long long vgbs; acpi_status status; int m; @@ -89,6 +89,8 @@ static void detect_tablet_mode(struct platform_device *device) input_report_switch(priv->switches_dev, SW_TABLET_MODE, m); m = (vgbs & VGBS_DOCK_MODE_FLAG) ? 1 : 0; input_report_switch(priv->switches_dev, SW_DOCK, m); + + input_sync(priv->switches_dev); } /* @@ -134,7 +136,7 @@ static int intel_vbtn_input_setup(struct platform_device *device) priv->switches_dev->id.bustype = BUS_HOST; if (priv->has_switches) { - detect_tablet_mode(device); + detect_tablet_mode(&device->dev); ret = input_register_device(priv->switches_dev); if (ret) @@ -198,6 +200,9 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) autorelease = val && (!ke_rel || ke_rel->type == KE_IGNORE); sparse_keymap_report_event(input_dev, event, val, autorelease); + + /* Some devices need this to report further events */ + acpi_evaluate_object(handle, "VBDL", NULL, NULL); } /* @@ -352,7 +357,13 @@ static void intel_vbtn_pm_complete(struct device *dev) static int intel_vbtn_pm_resume(struct device *dev) { + struct intel_vbtn_priv *priv = dev_get_drvdata(dev); + intel_vbtn_pm_complete(dev); + + if (priv->has_switches) + detect_tablet_mode(dev); + return 0; } -- cgit From 17fe3ec0c110b4afc04052e2a33b146766aac8a1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 5 Dec 2023 22:01:43 -0800 Subject: platform/x86: thinkpad_acpi: fix kernel-doc warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a function's return description and don't misuse "/**" for non-kernel-doc comments to prevent warnings from scripts/kernel-doc. thinkpad_acpi.c:523: warning: No description found for return value of 'tpacpi_check_quirks' thinkpad_acpi.c:9307: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst thinkpad_acpi.c:9307: warning: missing initial short description on line: * This evaluates a ACPI method call specific to the battery Signed-off-by: Randy Dunlap Cc: Henrique de Moraes Holschuh Cc: Hans de Goede Cc: Ilpo Järvinen CC: ibm-acpi-devel@lists.sourceforge.net CC: platform-driver-x86@vger.kernel.org Reviewed-by: mpearson-lenovo@squebb.ca Link: https://lore.kernel.org/r/20231206060144.8260-1-rdunlap@infradead.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/thinkpad_acpi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index d0b5fd4137bc..3392ae99ac3f 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -512,10 +512,10 @@ struct tpacpi_quirk { * Iterates over a quirks list until one is found that matches the * ThinkPad's vendor, BIOS and EC model. * - * Returns 0 if nothing matches, otherwise returns the quirks field of + * Returns: %0 if nothing matches, otherwise returns the quirks field of * the matching &struct tpacpi_quirk entry. * - * The match criteria is: vendor, ec and bios much match. + * The match criteria is: vendor, ec and bios must match. */ static unsigned long __init tpacpi_check_quirks( const struct tpacpi_quirk *qlist, @@ -9303,7 +9303,7 @@ static struct tpacpi_battery_driver_data battery_info; /* ACPI helpers/functions/probes */ -/** +/* * This evaluates a ACPI method call specific to the battery * ACPI extension. The specifics are that an error is marked * in the 32rd bit of the response, so we just check that here. -- cgit From 7bcd032370f88fd4022b6926d101403e96a86309 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 5 Dec 2023 22:01:18 -0800 Subject: platform/x86: intel_ips: fix kernel-doc formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix kernel-doc function notation and comment formatting to prevent warnings from scripts/kernel-doc. for drivers/platform/x86/intel_ips.c: 595: warning: No description found for return value of 'mcp_exceeded' 624: warning: No description found for return value of 'cpu_exceeded' 650: warning: No description found for return value of 'mch_exceeded' 745: warning: bad line: cpu+ gpu+ cpu+gpu- cpu-gpu+ cpu-gpu- 746: warning: bad line: cpu < gpu < cpu+gpu+ cpu+ gpu+ nothing 753: warning: No description found for return value of 'ips_adjust' 747: warning: bad line: cpu < gpu >= cpu+gpu-(mcp<) cpu+gpu-(mcp<) gpu- gpu- 748: warning: bad line: cpu >= gpu < cpu-gpu+(mcp<) cpu- cpu-gpu+(mcp<) cpu- 749: warning: bad line: cpu >= gpu >= cpu-gpu- cpu-gpu- cpu-gpu- cpu-gpu- 945: warning: No description found for return value of 'ips_monitor' 1151: warning: No description found for return value of 'ips_irq_handler' 1301: warning: Function parameter or member 'ips' not described in 'ips_detect_cpu' 1302: warning: No description found for return value of 'ips_detect_cpu' 1358: warning: No description found for return value of 'ips_get_i915_syms' Signed-off-by: Randy Dunlap Cc: Hans de Goede Cc: Ilpo Järvinen Cc: platform-driver-x86@vger.kernel.org Link: https://lore.kernel.org/r/20231206060120.4816-1-rdunlap@infradead.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel_ips.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c index 4dfdbfca6841..c66808601fdd 100644 --- a/drivers/platform/x86/intel_ips.c +++ b/drivers/platform/x86/intel_ips.c @@ -590,6 +590,8 @@ static void ips_disable_gpu_turbo(struct ips_driver *ips) * @ips: IPS driver struct * * Check whether the MCP is over its thermal or power budget. + * + * Returns: %true if the temp or power has exceeded its maximum, else %false */ static bool mcp_exceeded(struct ips_driver *ips) { @@ -619,6 +621,8 @@ static bool mcp_exceeded(struct ips_driver *ips) * @cpu: CPU number to check * * Check a given CPU's average temp or power is over its limit. + * + * Returns: %true if the temp or power has exceeded its maximum, else %false */ static bool cpu_exceeded(struct ips_driver *ips, int cpu) { @@ -645,6 +649,8 @@ static bool cpu_exceeded(struct ips_driver *ips, int cpu) * @ips: IPS driver struct * * Check the MCH temp & power against their maximums. + * + * Returns: %true if the temp or power has exceeded its maximum, else %false */ static bool mch_exceeded(struct ips_driver *ips) { @@ -742,12 +748,13 @@ static void update_turbo_limits(struct ips_driver *ips) * - down (at TDP limit) * - adjust both CPU and GPU down if possible * - cpu+ gpu+ cpu+gpu- cpu-gpu+ cpu-gpu- -cpu < gpu < cpu+gpu+ cpu+ gpu+ nothing -cpu < gpu >= cpu+gpu-(mcp<) cpu+gpu-(mcp<) gpu- gpu- -cpu >= gpu < cpu-gpu+(mcp<) cpu- cpu-gpu+(mcp<) cpu- -cpu >= gpu >= cpu-gpu- cpu-gpu- cpu-gpu- cpu-gpu- + * |cpu+ gpu+ cpu+gpu- cpu-gpu+ cpu-gpu- + * cpu < gpu < |cpu+gpu+ cpu+ gpu+ nothing + * cpu < gpu >= |cpu+gpu-(mcp<) cpu+gpu-(mcp<) gpu- gpu- + * cpu >= gpu < |cpu-gpu+(mcp<) cpu- cpu-gpu+(mcp<) cpu- + * cpu >= gpu >=|cpu-gpu- cpu-gpu- cpu-gpu- cpu-gpu- * + * Returns: %0 */ static int ips_adjust(void *data) { @@ -935,11 +942,13 @@ static void monitor_timeout(struct timer_list *t) * @data: ips driver structure * * This is the main function for the IPS driver. It monitors power and - * tempurature in the MCP and adjusts CPU and GPU power clams accordingly. + * temperature in the MCP and adjusts CPU and GPU power clamps accordingly. * - * We keep a 5s moving average of power consumption and tempurature. Using + * We keep a 5s moving average of power consumption and temperature. Using * that data, along with CPU vs GPU preference, we adjust the power clamps * up or down. + * + * Returns: %0 on success or -errno on error */ static int ips_monitor(void *data) { @@ -1146,6 +1155,8 @@ static void dump_thermal_info(struct ips_driver *ips) * Handle temperature limit trigger events, generally by lowering the clamps. * If we're at a critical limit, we clamp back to the lowest possible value * to prevent emergency shutdown. + * + * Returns: IRQ_NONE or IRQ_HANDLED */ static irqreturn_t ips_irq_handler(int irq, void *arg) { @@ -1293,9 +1304,12 @@ static void ips_debugfs_init(struct ips_driver *ips) /** * ips_detect_cpu - detect whether CPU supports IPS + * @ips: IPS driver struct * * Walk our list and see if we're on a supported CPU. If we find one, * return the limits for it. + * + * Returns: the &ips_mcp_limits struct that matches the boot CPU or %NULL */ static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips) { @@ -1352,6 +1366,8 @@ static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips) * monitor and control graphics turbo mode. If we can find them, we can * enable graphics turbo, otherwise we must disable it to avoid exceeding * thermal and power limits in the MCP. + * + * Returns: %true if the required symbols are found, else %false */ static bool ips_get_i915_syms(struct ips_driver *ips) { -- cgit From 6e455f5dcdd15fa28edf0ffb5b44d3508512dccf Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Dec 2023 15:12:38 +0200 Subject: drm/crtc: fix uninitialized variable use Commit 3823119b9c2b ("drm/crtc: Fix uninit-value bug in drm_mode_setcrtc") was supposed to fix use of an uninitialized variable, but introduced another. num_connectors is only initialized if crtc_req->count_connectors > 0, but it's used regardless. Fix it. Fixes: 3823119b9c2b ("drm/crtc: Fix uninit-value bug in drm_mode_setcrtc") Cc: syzbot+4fad2e57beb6397ab2fc@syzkaller.appspotmail.com Cc: Ziqi Zhao Cc: Maxime Ripard Cc: Maarten Lankhorst Cc: Thomas Zimmermann Signed-off-by: Jani Nikula Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20231208131238.2924571-1-jani.nikula@intel.com --- drivers/gpu/drm/drm_crtc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index d718c17ab1e9..cb90e70d85e8 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -715,7 +715,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, struct drm_mode_set set; uint32_t __user *set_connectors_ptr; struct drm_modeset_acquire_ctx ctx; - int ret, i, num_connectors; + int ret, i, num_connectors = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; @@ -850,7 +850,6 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, goto out; } - num_connectors = 0; for (i = 0; i < crtc_req->count_connectors; i++) { connector_set[i] = NULL; set_connectors_ptr = (uint32_t __user *)(unsigned long)crtc_req->set_connectors_ptr; -- cgit From d47d9886aeef79feba7adac701a510d65f3682b5 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 8 Dec 2023 14:33:41 +0900 Subject: ksmbd: send v2 lease break notification for directory If client send different parent key, different client guid, or there is no parent lease key flags in create context v2 lease, ksmbd send lease break to client. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smb2pdu.h | 1 + fs/smb/server/oplock.c | 56 ++++++++++++++++++++++++++++++++++++++++++----- fs/smb/server/oplock.h | 4 ++++ fs/smb/server/smb2pdu.c | 7 ++++++ fs/smb/server/vfs_cache.c | 13 ++++++++++- fs/smb/server/vfs_cache.h | 2 ++ 6 files changed, 77 insertions(+), 6 deletions(-) diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 9fbaaa387dcc..8b603b13e372 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1253,6 +1253,7 @@ struct create_mxac_rsp { #define SMB2_LEASE_WRITE_CACHING_LE cpu_to_le32(0x04) #define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS_LE cpu_to_le32(0x02) +#define SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE cpu_to_le32(0x04) #define SMB2_LEASE_KEY_SIZE 16 diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 57950ba7e925..147d98427ce8 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -102,6 +102,7 @@ static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx) lease->new_state = 0; lease->flags = lctx->flags; lease->duration = lctx->duration; + lease->is_dir = lctx->is_dir; memcpy(lease->parent_lease_key, lctx->parent_lease_key, SMB2_LEASE_KEY_SIZE); lease->version = lctx->version; lease->epoch = le16_to_cpu(lctx->epoch); @@ -543,12 +544,13 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, /* upgrading lease */ if ((atomic_read(&ci->op_count) + atomic_read(&ci->sop_count)) == 1) { - if (lease->state == - (lctx->req_state & lease->state)) { + if (lease->state != SMB2_LEASE_NONE_LE && + lease->state == (lctx->req_state & lease->state)) { lease->state |= lctx->req_state; if (lctx->req_state & SMB2_LEASE_WRITE_CACHING_LE) lease_read_to_write(opinfo); + } } else if ((atomic_read(&ci->op_count) + atomic_read(&ci->sop_count)) > 1) { @@ -900,7 +902,8 @@ static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level) lease->new_state = SMB2_LEASE_READ_CACHING_LE; } else { - if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE) + if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE && + !lease->is_dir) lease->new_state = SMB2_LEASE_READ_CACHING_LE; else @@ -1082,6 +1085,48 @@ static void set_oplock_level(struct oplock_info *opinfo, int level, } } +void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, + struct lease_ctx_info *lctx) +{ + struct oplock_info *opinfo; + struct ksmbd_inode *p_ci = NULL; + + if (lctx->version != 2) + return; + + p_ci = ksmbd_inode_lookup_lock(fp->filp->f_path.dentry->d_parent); + if (!p_ci) + return; + + read_lock(&p_ci->m_lock); + list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) { + if (!opinfo->is_lease) + continue; + + if (opinfo->o_lease->state != SMB2_OPLOCK_LEVEL_NONE && + (!(lctx->flags & SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE) || + !compare_guid_key(opinfo, fp->conn->ClientGUID, + lctx->parent_lease_key))) { + if (!atomic_inc_not_zero(&opinfo->refcount)) + continue; + + atomic_inc(&opinfo->conn->r_count); + if (ksmbd_conn_releasing(opinfo->conn)) { + atomic_dec(&opinfo->conn->r_count); + continue; + } + + read_unlock(&p_ci->m_lock); + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); + opinfo_conn_put(opinfo); + read_lock(&p_ci->m_lock); + } + } + read_unlock(&p_ci->m_lock); + + ksmbd_inode_put(p_ci); +} + /** * smb_grant_oplock() - handle oplock/lease request on file open * @work: smb work @@ -1420,10 +1465,11 @@ struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir) struct create_lease_v2 *lc = (struct create_lease_v2 *)cc; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); - if (is_dir) + if (is_dir) { lreq->req_state = lc->lcontext.LeaseState & ~SMB2_LEASE_WRITE_CACHING_LE; - else + lreq->is_dir = true; + } else lreq->req_state = lc->lcontext.LeaseState; lreq->flags = lc->lcontext.LeaseFlags; lreq->epoch = lc->lcontext.Epoch; diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index 672127318c75..b64d1536882a 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -36,6 +36,7 @@ struct lease_ctx_info { __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE]; __le16 epoch; int version; + bool is_dir; }; struct lease_table { @@ -54,6 +55,7 @@ struct lease { __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE]; int version; unsigned short epoch; + bool is_dir; struct lease_table *l_lb; }; @@ -125,4 +127,6 @@ struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn, int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci, struct lease_ctx_info *lctx); void destroy_lease_table(struct ksmbd_conn *conn); +void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, + struct lease_ctx_info *lctx); #endif /* __KSMBD_OPLOCK_H */ diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 2d3b8acb21e7..45fc4bc3ac19 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3225,6 +3225,13 @@ int smb2_open(struct ksmbd_work *work) } } else { if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) { + /* + * Compare parent lease using parent key. If there is no + * a lease that has same parent key, Send lease break + * notification. + */ + smb_send_parent_lease_break_noti(fp, lc); + req_op_level = smb2_map_lease_to_oplock(lc->req_state); ksmbd_debug(SMB, "lease req for(%s) req oplock state 0x%x, lease state 0x%x\n", diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index ddf233994ddb..4e82ff627d12 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -87,6 +87,17 @@ static struct ksmbd_inode *ksmbd_inode_lookup(struct ksmbd_file *fp) return __ksmbd_inode_lookup(fp->filp->f_path.dentry); } +struct ksmbd_inode *ksmbd_inode_lookup_lock(struct dentry *d) +{ + struct ksmbd_inode *ci; + + read_lock(&inode_hash_lock); + ci = __ksmbd_inode_lookup(d); + read_unlock(&inode_hash_lock); + + return ci; +} + int ksmbd_query_inode_status(struct dentry *dentry) { struct ksmbd_inode *ci; @@ -199,7 +210,7 @@ static void ksmbd_inode_free(struct ksmbd_inode *ci) kfree(ci); } -static void ksmbd_inode_put(struct ksmbd_inode *ci) +void ksmbd_inode_put(struct ksmbd_inode *ci) { if (atomic_dec_and_test(&ci->m_count)) ksmbd_inode_free(ci); diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 8325cf4527c4..4d4938d6029b 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -138,6 +138,8 @@ struct ksmbd_file *ksmbd_lookup_foreign_fd(struct ksmbd_work *work, u64 id); struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id, u64 pid); void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp); +struct ksmbd_inode *ksmbd_inode_lookup_lock(struct dentry *d); +void ksmbd_inode_put(struct ksmbd_inode *ci); struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id); struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid); struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry); -- cgit From c2a721eead71202a0d8ddd9b56ec8dce652c71d1 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 8 Dec 2023 14:37:56 +0900 Subject: ksmbd: lazy v2 lease break on smb2_write() Don't immediately send directory lease break notification on smb2_write(). Instead, It postpones it until smb2_close(). Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- fs/smb/server/oplock.h | 1 + fs/smb/server/vfs.c | 3 +++ fs/smb/server/vfs_cache.h | 1 + 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 147d98427ce8..562b180459a1 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -396,8 +396,8 @@ void close_id_del_oplock(struct ksmbd_file *fp) { struct oplock_info *opinfo; - if (S_ISDIR(file_inode(fp->filp)->i_mode)) - return; + if (fp->reserve_lease_break) + smb_lazy_parent_lease_break_close(fp); opinfo = opinfo_get(fp); if (!opinfo) @@ -1127,6 +1127,47 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, ksmbd_inode_put(p_ci); } +void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) +{ + struct oplock_info *opinfo; + struct ksmbd_inode *p_ci = NULL; + + rcu_read_lock(); + opinfo = rcu_dereference(fp->f_opinfo); + rcu_read_unlock(); + + if (!opinfo->is_lease || opinfo->o_lease->version != 2) + return; + + p_ci = ksmbd_inode_lookup_lock(fp->filp->f_path.dentry->d_parent); + if (!p_ci) + return; + + read_lock(&p_ci->m_lock); + list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) { + if (!opinfo->is_lease) + continue; + + if (opinfo->o_lease->state != SMB2_OPLOCK_LEVEL_NONE) { + if (!atomic_inc_not_zero(&opinfo->refcount)) + continue; + + atomic_inc(&opinfo->conn->r_count); + if (ksmbd_conn_releasing(opinfo->conn)) { + atomic_dec(&opinfo->conn->r_count); + continue; + } + read_unlock(&p_ci->m_lock); + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); + opinfo_conn_put(opinfo); + read_lock(&p_ci->m_lock); + } + } + read_unlock(&p_ci->m_lock); + + ksmbd_inode_put(p_ci); +} + /** * smb_grant_oplock() - handle oplock/lease request on file open * @work: smb work diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index b64d1536882a..5b93ea9196c0 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -129,4 +129,5 @@ int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci, void destroy_lease_table(struct ksmbd_conn *conn); void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, struct lease_ctx_info *lctx); +void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp); #endif /* __KSMBD_OPLOCK_H */ diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 9091dcd7a310..4277750a6da1 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -517,6 +517,9 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp, } } + /* Reserve lease break for parent dir at closing time */ + fp->reserve_lease_break = true; + /* Do we need to break any of a levelII oplock? */ smb_break_all_levII_oplock(work, fp, 1); diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 4d4938d6029b..a528f0cc775a 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -105,6 +105,7 @@ struct ksmbd_file { struct ksmbd_readdir_data readdir_data; int dot_dotdot[2]; unsigned int f_state; + bool reserve_lease_break; }; static inline void set_ctx_actor(struct dir_context *ctx, -- cgit From 658609d9a618d8881bf549b5893c0ba8fcff4526 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 4 Dec 2023 22:20:46 +0900 Subject: ksmbd: avoid duplicate opinfo_put() call on error of smb21_lease_break_ack() opinfo_put() could be called twice on error of smb21_lease_break_ack(). It will cause UAF issue if opinfo is referenced on other places. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 45fc4bc3ac19..f3af83dc49c4 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -8219,6 +8219,11 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) le32_to_cpu(req->LeaseState)); } + if (ret < 0) { + rsp->hdr.Status = err; + goto err_out; + } + lease_state = lease->state; opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); @@ -8226,11 +8231,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) wake_up_interruptible_all(&opinfo->oplock_brk); opinfo_put(opinfo); - if (ret < 0) { - rsp->hdr.Status = err; - goto err_out; - } - rsp->StructureSize = cpu_to_le16(36); rsp->Reserved = 0; rsp->Flags = 0; -- cgit From a9f106c765c12d2f58aa33431bd8ce8e9d8a404a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 4 Dec 2023 22:23:34 +0900 Subject: ksmbd: fix wrong allocation size update in smb2_open() When client send SMB2_CREATE_ALLOCATION_SIZE create context, ksmbd update old size to ->AllocationSize in smb2 create response. ksmbd_vfs_getattr() should be called after it to get updated stat result. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index f3af83dc49c4..652ab429bf2e 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2516,7 +2516,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path * da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME | XATTR_DOSINFO_ITIME; - rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da, false); + rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da, true); if (rc) ksmbd_debug(SMB, "failed to store file attribute into xattr\n"); } @@ -3185,23 +3185,6 @@ int smb2_open(struct ksmbd_work *work) goto err_out; } - rc = ksmbd_vfs_getattr(&path, &stat); - if (rc) - goto err_out; - - if (stat.result_mask & STATX_BTIME) - fp->create_time = ksmbd_UnixTimeToNT(stat.btime); - else - fp->create_time = ksmbd_UnixTimeToNT(stat.ctime); - if (req->FileAttributes || fp->f_ci->m_fattr == 0) - fp->f_ci->m_fattr = - cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes))); - - if (!created) - smb2_update_xattrs(tcon, &path, fp); - else - smb2_new_xattrs(tcon, &path, fp); - if (file_present || created) ksmbd_vfs_kern_path_unlock(&parent_path, &path); @@ -3302,6 +3285,23 @@ int smb2_open(struct ksmbd_work *work) } } + rc = ksmbd_vfs_getattr(&path, &stat); + if (rc) + goto err_out1; + + if (stat.result_mask & STATX_BTIME) + fp->create_time = ksmbd_UnixTimeToNT(stat.btime); + else + fp->create_time = ksmbd_UnixTimeToNT(stat.ctime); + if (req->FileAttributes || fp->f_ci->m_fattr == 0) + fp->f_ci->m_fattr = + cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes))); + + if (!created) + smb2_update_xattrs(tcon, &path, fp); + else + smb2_new_xattrs(tcon, &path, fp); + memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE); rsp->StructureSize = cpu_to_le16(89); -- cgit From 13736654481198e519059d4a2e2e3b20fa9fdb3e Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 6 Dec 2023 08:23:49 +0900 Subject: ksmbd: fix wrong name of SMB2_CREATE_ALLOCATION_SIZE MS confirm that "AISi" name of SMB2_CREATE_ALLOCATION_SIZE in MS-SMB2 specification is a typo. cifs/ksmbd have been using this wrong name from MS-SMB2. It should be "AlSi". Also It will cause problem when running smb2.create.open test in smbtorture against ksmbd. Cc: stable@vger.kernel.org Fixes: 12197a7fdda9 ("Clarify SMB2/SMB3 create context and add missing ones") Signed-off-by: Namjae Jeon Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/common/smb2pdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 8b603b13e372..57f2343164a3 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1145,7 +1145,7 @@ struct smb2_server_client_notification { #define SMB2_CREATE_SD_BUFFER "SecD" /* security descriptor */ #define SMB2_CREATE_DURABLE_HANDLE_REQUEST "DHnQ" #define SMB2_CREATE_DURABLE_HANDLE_RECONNECT "DHnC" -#define SMB2_CREATE_ALLOCATION_SIZE "AISi" +#define SMB2_CREATE_ALLOCATION_SIZE "AlSi" #define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc" #define SMB2_CREATE_TIMEWARP_REQUEST "TWrp" #define SMB2_CREATE_QUERY_ON_DISK_ID "QFid" -- cgit From ef61a0405742a9f7f6051bc6fd2f017d87d07911 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 1 Dec 2023 11:50:28 +0000 Subject: PCI: loongson: Limit MRRS to 256 This is a partial revert of 8b3517f88ff2 ("PCI: loongson: Prevent LS7A MRRS increases") for MIPS-based Loongson. Some MIPS Loongson systems don't support arbitrary Max_Read_Request_Size (MRRS) settings. 8b3517f88ff2 ("PCI: loongson: Prevent LS7A MRRS increases") worked around that by (1) assuming that firmware configured MRRS to the maximum supported value and (2) preventing the PCI core from increasing MRRS. Unfortunately, some firmware doesn't set that maximum MRRS correctly, which results in devices not being initialized correctly. One symptom, from the Debian report below, is this: ata4.00: exception Emask 0x0 SAct 0x20000000 SErr 0x0 action 0x6 frozen ata4.00: failed command: WRITE FPDMA QUEUED ata4.00: cmd 61/20:e8:00:f0:e1/00:00:00:00:00/40 tag 29 ncq dma 16384 out res 40/00:00:00:00:00/00:00:00:00:00/00 Emask 0x4 (timeout) ata4.00: status: { DRDY } ata4: hard resetting link Limit MRRS to 256 because MIPS Loongson with higher MRRS support is considered rare. This must be done at device enablement stage because the MRRS setting may get lost if PCI_COMMAND_MASTER on the parent bridge is cleared, and we are only sure parent bridge is enabled at this point. Fixes: 8b3517f88ff2 ("PCI: loongson: Prevent LS7A MRRS increases") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217680 Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1035587 Link: https://lore.kernel.org/r/20231201115028.84351-1-jiaxun.yang@flygoat.com Signed-off-by: Jiaxun Yang Signed-off-by: Bjorn Helgaas Acked-by: Huacai Chen Cc: stable@vger.kernel.org --- drivers/pci/controller/pci-loongson.c | 46 +++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/drivers/pci/controller/pci-loongson.c b/drivers/pci/controller/pci-loongson.c index d45e7b8dc530..8b34ccff073a 100644 --- a/drivers/pci/controller/pci-loongson.c +++ b/drivers/pci/controller/pci-loongson.c @@ -80,13 +80,49 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, DEV_LS7A_LPC, system_bus_quirk); +/* + * Some Loongson PCIe ports have hardware limitations on their Maximum Read + * Request Size. They can't handle anything larger than this. Sane + * firmware will set proper MRRS at boot, so we only need no_inc_mrrs for + * bridges. However, some MIPS Loongson firmware doesn't set MRRS properly, + * so we have to enforce maximum safe MRRS, which is 256 bytes. + */ +#ifdef CONFIG_MIPS +static void loongson_set_min_mrrs_quirk(struct pci_dev *pdev) +{ + struct pci_bus *bus = pdev->bus; + struct pci_dev *bridge; + static const struct pci_device_id bridge_devids[] = { + { PCI_VDEVICE(LOONGSON, DEV_LS2K_PCIE_PORT0) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT0) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT1) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT2) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT3) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT4) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT5) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT6) }, + { 0, }, + }; + + /* look for the matching bridge */ + while (!pci_is_root_bus(bus)) { + bridge = bus->self; + bus = bus->parent; + + if (pci_match_id(bridge_devids, bridge)) { + if (pcie_get_readrq(pdev) > 256) { + pci_info(pdev, "limiting MRRS to 256\n"); + pcie_set_readrq(pdev, 256); + } + break; + } + } +} +DECLARE_PCI_FIXUP_ENABLE(PCI_ANY_ID, PCI_ANY_ID, loongson_set_min_mrrs_quirk); +#endif + static void loongson_mrrs_quirk(struct pci_dev *pdev) { - /* - * Some Loongson PCIe ports have h/w limitations of maximum read - * request size. They can't handle anything larger than this. So - * force this limit on any devices attached under these ports. - */ struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus); bridge->no_inc_mrrs = 1; -- cgit From 4a073e813477be4ae95dfd23cb08baf36e93a29f Mon Sep 17 00:00:00 2001 From: angquan yu Date: Tue, 28 Nov 2023 16:11:05 -0600 Subject: KVM: selftests: Actually print out magic token in NX hugepages skip message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass MAGIC_TOKEN to __TEST_REQUIRE() when printing the help message about needing to pass a magic value to manually run the NX hugepages test, otherwise the help message will contain garbage. In file included from x86_64/nx_huge_pages_test.c:15: x86_64/nx_huge_pages_test.c: In function ‘main’: include/test_util.h:40:32: error: format ‘%d’ expects a matching ‘int’ argument [-Werror=format=] 40 | ksft_exit_skip("- " fmt "\n", ##__VA_ARGS__); \ | ^~~~ x86_64/nx_huge_pages_test.c:259:9: note: in expansion of macro ‘__TEST_REQUIRE’ 259 | __TEST_REQUIRE(token == MAGIC_TOKEN, | ^~~~~~~~~~~~~~ Signed-off-by: angquan yu Link: https://lore.kernel.org/r/20231128221105.63093-1-angquan21@gmail.com [sean: rewrite shortlog+changelog] Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 18ac5c1952a3..83e25bccc139 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -259,7 +259,7 @@ int main(int argc, char **argv) __TEST_REQUIRE(token == MAGIC_TOKEN, "This test must be run with the magic token %d.\n" "This is done by nx_huge_pages_test.sh, which\n" - "also handles environment setup for the test."); + "also handles environment setup for the test.", MAGIC_TOKEN); run_test(reclaim_period_ms, false, reboot_permissions); run_test(reclaim_period_ms, true, reboot_permissions); -- cgit From 96f124015f825a4a186b8497e20cf87f6519c7b4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 28 Oct 2023 20:34:53 +0100 Subject: KVM: selftests: add -MP to CFLAGS Using -MD without -MP causes build failures when a header file is deleted or moved. With -MP, the compiler will emit phony targets for the header files it lists as dependencies, and the Makefiles won't refuse to attempt to rebuild a C unit which no longer includes the deleted header. Signed-off-by: David Woodhouse Link: https://lore.kernel.org/r/9fc8b5395321abbfcaf5d78477a9a7cd350b08e4.camel@infradead.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 52c59bad7213..963435959a92 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -224,7 +224,7 @@ else LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ - -Wno-gnu-variable-sized-type-not-at-end -MD\ + -Wno-gnu-variable-sized-type-not-at-end -MD -MP \ -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \ -fno-builtin-strnlen \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -- cgit From 307004e8b254ad28e150b63f299ab9caa4bc7c3e Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Thu, 7 Dec 2023 22:07:23 +0100 Subject: hwmon: (corsair-psu) Fix probe when built-in It seems that when the driver is built-in, the HID bus is initialized after the driver is loaded, which whould cause module_hid_driver() to fail. Fix this by registering the driver after the HID bus using late_initcall() in accordance with other hwmon HID drivers. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20231207210723.222552-1-W_Armin@gmx.de [groeck: Dropped "compile tested" comment; the patch has been tested but the tester did not provide a Tested-by: tag] Signed-off-by: Guenter Roeck --- drivers/hwmon/corsair-psu.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c index 904890598c11..2c7c92272fe3 100644 --- a/drivers/hwmon/corsair-psu.c +++ b/drivers/hwmon/corsair-psu.c @@ -899,7 +899,23 @@ static struct hid_driver corsairpsu_driver = { .reset_resume = corsairpsu_resume, #endif }; -module_hid_driver(corsairpsu_driver); + +static int __init corsair_init(void) +{ + return hid_register_driver(&corsairpsu_driver); +} + +static void __exit corsair_exit(void) +{ + hid_unregister_driver(&corsairpsu_driver); +} + +/* + * With module_init() the driver would load before the HID bus when + * built-in, so use late_initcall() instead. + */ +late_initcall(corsair_init); +module_exit(corsair_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Wilken Gottwalt "); -- cgit From 4cdf351d3630a640ab6a05721ef055b9df62277f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 7 May 2021 09:59:46 -0700 Subject: KVM: SVM: Update EFER software model on CR0 trap for SEV-ES In general, activating long mode involves setting the EFER_LME bit in the EFER register and then enabling the X86_CR0_PG bit in the CR0 register. At this point, the EFER_LMA bit will be set automatically by hardware. In the case of SVM/SEV guests where writes to CR0 are intercepted, it's necessary for the host to set EFER_LMA on behalf of the guest since hardware does not see the actual CR0 write. In the case of SEV-ES guests where writes to CR0 are trapped instead of intercepted, the hardware *does* see/record the write to CR0 before exiting and passing the value on to the host, so as part of enabling SEV-ES support commit f1c6366e3043 ("KVM: SVM: Add required changes to support intercepts under SEV-ES") dropped special handling of the EFER_LMA bit with the understanding that it would be set automatically. However, since the guest never explicitly sets the EFER_LMA bit, the host never becomes aware that it has been set. This becomes problematic when userspace tries to get/set the EFER values via KVM_GET_SREGS/KVM_SET_SREGS, since the EFER contents tracked by the host will be missing the EFER_LMA bit, and when userspace attempts to pass the EFER value back via KVM_SET_SREGS it will fail a sanity check that asserts that EFER_LMA should always be set when X86_CR0_PG and EFER_LME are set. Fix this by always inferring the value of EFER_LMA based on X86_CR0_PG and EFER_LME, regardless of whether or not SEV-ES is enabled. Fixes: f1c6366e3043 ("KVM: SVM: Add required changes to support intercepts under SEV-ES") Reported-by: Peter Gonda Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210507165947.2502412-2-seanjc@google.com> [A two year old patch that was revived after we noticed the failure in KVM_SET_SREGS and a similar patch was posted by Michael Roth. This is Sean's patch, but with Michael's more complete commit message. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 712146312358..f3bb30b40876 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1855,15 +1855,17 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) bool old_paging = is_paging(vcpu); #ifdef CONFIG_X86_64 - if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) { + if (vcpu->arch.efer & EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { vcpu->arch.efer |= EFER_LMA; - svm->vmcb->save.efer |= EFER_LMA | EFER_LME; + if (!vcpu->arch.guest_state_protected) + svm->vmcb->save.efer |= EFER_LMA | EFER_LME; } if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { vcpu->arch.efer &= ~EFER_LMA; - svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); + if (!vcpu->arch.guest_state_protected) + svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); } } #endif -- cgit From c12296bbecc488623b7d1932080e394d08f3226b Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Wed, 6 Dec 2023 13:37:18 +0100 Subject: team: Fix use-after-free when an option instance allocation fails In __team_options_register, team_options are allocated and appended to the team's option_list. If one option instance allocation fails, the "inst_rollback" cleanup path frees the previously allocated options but doesn't remove them from the team's option_list. This leaves dangling pointers that can be dereferenced later by other parts of the team driver that iterate over options. This patch fixes the cleanup path to remove the dangling pointers from the list. As far as I can tell, this uaf doesn't have much security implications since it would be fairly hard to exploit (an attacker would need to make the allocation of that specific small object fail) but it's still nice to fix. Cc: stable@vger.kernel.org Fixes: 80f7c6683fe0 ("team: add support for per-port options") Signed-off-by: Florent Revest Reviewed-by: Jiri Pirko Reviewed-by: Hangbin Liu Link: https://lore.kernel.org/r/20231206123719.1963153-1-revest@chromium.org Signed-off-by: Jakub Kicinski --- drivers/net/team/team.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 508d9a392ab1..f575f225d417 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -281,8 +281,10 @@ static int __team_options_register(struct team *team, return 0; inst_rollback: - for (i--; i >= 0; i--) + for (i--; i >= 0; i--) { __team_option_inst_del_option(team, dst_opts[i]); + list_del(&dst_opts[i]->list); + } i = option_count; alloc_rollback: -- cgit From a1664b991ac12b872be859ca03529c68c72795a2 Mon Sep 17 00:00:00 2001 From: Shinas Rasheed Date: Wed, 6 Dec 2023 05:52:27 -0800 Subject: octeon_ep: initialise control mbox tasks before using APIs Initialise various workqueue tasks and queue interrupt poll task before the first invocation of any control net APIs. Since octep_ctrl_net_get_info was called before the control net receive work task was initialised or even the interrupt poll task was queued, the function call wasn't returning actual firmware info queried from Octeon. Fixes: 8d6198a14e2b ("octeon_ep: support to fetch firmware info") Signed-off-by: Shinas Rasheed Reviewed-by: Michal Schmidt Link: https://lore.kernel.org/r/20231206135228.2591659-1-srasheed@marvell.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeon_ep/octep_main.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 552970c7dec0..a2b5a3f8df93 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1193,6 +1193,13 @@ int octep_device_setup(struct octep_device *oct) if (ret) return ret; + INIT_WORK(&oct->tx_timeout_task, octep_tx_timeout_task); + INIT_WORK(&oct->ctrl_mbox_task, octep_ctrl_mbox_task); + INIT_DELAYED_WORK(&oct->intr_poll_task, octep_intr_poll_task); + oct->poll_non_ioq_intr = true; + queue_delayed_work(octep_wq, &oct->intr_poll_task, + msecs_to_jiffies(OCTEP_INTR_POLL_TIME_MSECS)); + atomic_set(&oct->hb_miss_cnt, 0); INIT_DELAYED_WORK(&oct->hb_task, octep_hb_timeout_task); @@ -1325,21 +1332,18 @@ static int octep_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_octep_config; } - octep_ctrl_net_get_info(octep_dev, OCTEP_CTRL_NET_INVALID_VFID, - &octep_dev->conf->fw_info); + err = octep_ctrl_net_get_info(octep_dev, OCTEP_CTRL_NET_INVALID_VFID, + &octep_dev->conf->fw_info); + if (err) { + dev_err(&pdev->dev, "Failed to get firmware info\n"); + goto register_dev_err; + } dev_info(&octep_dev->pdev->dev, "Heartbeat interval %u msecs Heartbeat miss count %u\n", octep_dev->conf->fw_info.hb_interval, octep_dev->conf->fw_info.hb_miss_count); queue_delayed_work(octep_wq, &octep_dev->hb_task, msecs_to_jiffies(octep_dev->conf->fw_info.hb_interval)); - INIT_WORK(&octep_dev->tx_timeout_task, octep_tx_timeout_task); - INIT_WORK(&octep_dev->ctrl_mbox_task, octep_ctrl_mbox_task); - INIT_DELAYED_WORK(&octep_dev->intr_poll_task, octep_intr_poll_task); - octep_dev->poll_non_ioq_intr = true; - queue_delayed_work(octep_wq, &octep_dev->intr_poll_task, - msecs_to_jiffies(OCTEP_INTR_POLL_TIME_MSECS)); - netdev->netdev_ops = &octep_netdev_ops; octep_set_ethtool_ops(netdev); netif_carrier_off(netdev); -- cgit From f4e6064c97c050bd9904925ff7d53d0c9954fc7b Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 6 Dec 2023 15:12:20 +0100 Subject: qca_debug: Prevent crash on TX ring changes The qca_spi driver stop and restart the SPI kernel thread (via ndo_stop & ndo_open) in case of TX ring changes. This is a big issue because it allows userspace to prevent restart of the SPI kernel thread (via signals). A subsequent change of TX ring wrongly assume a valid spi_thread pointer which result in a crash. So prevent this by stopping the network traffic handling and temporary park the SPI thread. Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20231206141222.52029-2-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qualcomm/qca_debug.c | 9 ++++----- drivers/net/ethernet/qualcomm/qca_spi.c | 12 ++++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c index 6f2fa2a42770..a5445252b0c4 100644 --- a/drivers/net/ethernet/qualcomm/qca_debug.c +++ b/drivers/net/ethernet/qualcomm/qca_debug.c @@ -263,7 +263,6 @@ qcaspi_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, struct netlink_ext_ack *extack) { - const struct net_device_ops *ops = dev->netdev_ops; struct qcaspi *qca = netdev_priv(dev); if ((ring->rx_pending) || @@ -271,14 +270,14 @@ qcaspi_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, (ring->rx_jumbo_pending)) return -EINVAL; - if (netif_running(dev)) - ops->ndo_stop(dev); + if (qca->spi_thread) + kthread_park(qca->spi_thread); qca->txr.count = max_t(u32, ring->tx_pending, TX_RING_MIN_LEN); qca->txr.count = min_t(u16, qca->txr.count, TX_RING_MAX_LEN); - if (netif_running(dev)) - ops->ndo_open(dev); + if (qca->spi_thread) + kthread_unpark(qca->spi_thread); return 0; } diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index bec723028e96..b0fad69bb755 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -580,6 +580,18 @@ qcaspi_spi_thread(void *data) netdev_info(qca->net_dev, "SPI thread created\n"); while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_park()) { + netif_tx_disable(qca->net_dev); + netif_carrier_off(qca->net_dev); + qcaspi_flush_tx_ring(qca); + kthread_parkme(); + if (qca->sync == QCASPI_SYNC_READY) { + netif_carrier_on(qca->net_dev); + netif_wake_queue(qca->net_dev); + } + continue; + } + if ((qca->intr_req == qca->intr_svc) && !qca->txr.skb[qca->txr.head]) schedule(); -- cgit From 96a7e861d9e04d07febd3011c30cd84cd141d81f Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 6 Dec 2023 15:12:21 +0100 Subject: qca_debug: Fix ethtool -G iface tx behavior After calling ethtool -g it was not possible to adjust the TX ring size again: # ethtool -g eth1 Ring parameters for eth1: Pre-set maximums: RX: 4 RX Mini: n/a RX Jumbo: n/a TX: 10 Current hardware settings: RX: 4 RX Mini: n/a RX Jumbo: n/a TX: 10 # ethtool -G eth1 tx 8 netlink error: Invalid argument The reason for this is that the readonly setting rx_pending get initialized and after that the range check in qcaspi_set_ringparam() fails regardless of the provided parameter. So fix this by accepting the exposed RX defaults. Instead of adding another magic number better use a new define here. Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") Suggested-by: Paolo Abeni Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20231206141222.52029-3-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qualcomm/qca_debug.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c index a5445252b0c4..1822f2ad8f0d 100644 --- a/drivers/net/ethernet/qualcomm/qca_debug.c +++ b/drivers/net/ethernet/qualcomm/qca_debug.c @@ -30,6 +30,8 @@ #define QCASPI_MAX_REGS 0x20 +#define QCASPI_RX_MAX_FRAMES 4 + static const u16 qcaspi_spi_regs[] = { SPI_REG_BFR_SIZE, SPI_REG_WRBUF_SPC_AVA, @@ -252,9 +254,9 @@ qcaspi_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, { struct qcaspi *qca = netdev_priv(dev); - ring->rx_max_pending = 4; + ring->rx_max_pending = QCASPI_RX_MAX_FRAMES; ring->tx_max_pending = TX_RING_MAX_LEN; - ring->rx_pending = 4; + ring->rx_pending = QCASPI_RX_MAX_FRAMES; ring->tx_pending = qca->txr.count; } @@ -265,7 +267,7 @@ qcaspi_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, { struct qcaspi *qca = netdev_priv(dev); - if ((ring->rx_pending) || + if (ring->rx_pending != QCASPI_RX_MAX_FRAMES || (ring->rx_mini_pending) || (ring->rx_jumbo_pending)) return -EINVAL; -- cgit From 1057812d146dd658c9a9a96d869c2551150207b5 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 6 Dec 2023 15:12:22 +0100 Subject: qca_spi: Fix reset behavior In case of a reset triggered by the QCA7000 itself, the behavior of the qca_spi driver was not quite correct: - in case of a pending RX frame decoding the drop counter must be incremented and decoding state machine reseted - also the reset counter must always be incremented regardless of sync state Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20231206141222.52029-4-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qualcomm/qca_spi.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index b0fad69bb755..5f3c11fb3fa2 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -620,11 +620,17 @@ qcaspi_spi_thread(void *data) if (intr_cause & SPI_INT_CPU_ON) { qcaspi_qca7k_sync(qca, QCASPI_EVENT_CPUON); + /* Frame decoding in progress */ + if (qca->frm_handle.state != qca->frm_handle.init) + qca->net_dev->stats.rx_dropped++; + + qcafrm_fsm_init_spi(&qca->frm_handle); + qca->stats.device_reset++; + /* not synced. */ if (qca->sync != QCASPI_SYNC_READY) continue; - qca->stats.device_reset++; netif_wake_queue(qca->net_dev); netif_carrier_on(qca->net_dev); } -- cgit From c65efe3685f5d150eeca5599afeabdc85da899d1 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 16 Nov 2023 16:03:29 -0800 Subject: cxl/cdat: Free correct buffer on checksum error The new 6.7-rc1 kernel now checks the checksum on CDAT data. While using a branch of Fan's DCD qemu work (and specifying DCD devices), the following splat was observed. WARNING: CPU: 1 PID: 1384 at drivers/base/devres.c:1064 devm_kfree+0x4f/0x60 ... RIP: 0010:devm_kfree+0x4f/0x60 ... ? devm_kfree+0x4f/0x60 read_cdat_data+0x1a0/0x2a0 [cxl_core] cxl_port_probe+0xdf/0x200 [cxl_port] ... The issue in qemu is still unknown but the spat is a straight forward bug in the CDAT checksum processing code. Use a CDAT buffer variable to ensure the devm_free() works correctly on error. Fixes: 670e4e88f3b1 ("cxl: Add checksum verification to CDAT from CXL") Signed-off-by: Ira Weiny Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Reviewed-by: Robert Richter Link: http://lore.kernel.org/r/20231116-fix-cdat-devm-free-v1-1-b148b40707d7@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index eff20e83d0a6..37e1652afbc7 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -620,7 +620,7 @@ void read_cdat_data(struct cxl_port *port) struct pci_dev *pdev = NULL; struct cxl_memdev *cxlmd; size_t cdat_length; - void *cdat_table; + void *cdat_table, *cdat_buf; int rc; if (is_cxl_memdev(uport)) { @@ -651,16 +651,15 @@ void read_cdat_data(struct cxl_port *port) return; } - cdat_table = devm_kzalloc(dev, cdat_length + sizeof(__le32), - GFP_KERNEL); - if (!cdat_table) + cdat_buf = devm_kzalloc(dev, cdat_length + sizeof(__le32), GFP_KERNEL); + if (!cdat_buf) return; - rc = cxl_cdat_read_table(dev, cdat_doe, cdat_table, &cdat_length); + rc = cxl_cdat_read_table(dev, cdat_doe, cdat_buf, &cdat_length); if (rc) goto err; - cdat_table = cdat_table + sizeof(__le32); + cdat_table = cdat_buf + sizeof(__le32); if (cdat_checksum(cdat_table, cdat_length)) goto err; @@ -670,7 +669,7 @@ void read_cdat_data(struct cxl_port *port) err: /* Don't leave table data allocated on error */ - devm_kfree(dev, cdat_table); + devm_kfree(dev, cdat_buf); dev_err(dev, "Failed to read/validate CDAT.\n"); } EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); -- cgit From a45f1e46274256852990c479fbb1198a7d84529b Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 7 Dec 2023 21:23:58 +0100 Subject: MAINTAINERS: remove myself as maintainer of SMC I changed responsibilities some time ago, its time to remove myself as maintainer of the SMC component. Signed-off-by: Karsten Graul Signed-off-by: Wenjia Zhang Link: https://lore.kernel.org/r/20231207202358.53502-1-wenjia@linux.ibm.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a0fb0df07b43..c17c66810598 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19638,7 +19638,6 @@ S: Maintained F: drivers/misc/sgi-xp/ SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS -M: Karsten Graul M: Wenjia Zhang M: Jan Karcher R: D. Wythe -- cgit From 9c25aae0132b8babbe71ebb2160186d69231d9b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Dec 2023 18:13:42 +0000 Subject: tcp: fix tcp_disordered_ack() vs usec TS resolution After commit 939463016b7a ("tcp: change data receiver flowlabel after one dup") we noticed an increase of TCPACKSkippedPAWS events. Neal Cardwell tracked the issue to tcp_disordered_ack() assumption about remote peer TS clock. RFC 1323 & 7323 are suggesting the following: "timestamp clock frequency in the range 1 ms to 1 sec per tick between 1ms and 1sec." This has to be adjusted for 1 MHz clock frequency. This hints at reorders of SACK packets on send side, this might deserve a future patch. (skb->ooo_okay is always set for pure ACK packets) Fixes: 614e8316aa4c ("tcp: add support for usec resolution in TCP TS values") Co-developed-by: Neal Cardwell Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Cc: David Morley Link: https://lore.kernel.org/r/20231207181342.525181-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 90de838a2745..701cb87043f2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4368,6 +4368,23 @@ EXPORT_SYMBOL(tcp_do_parse_auth_options); * up to bandwidth of 18Gigabit/sec. 8) ] */ +/* Estimates max number of increments of remote peer TSval in + * a replay window (based on our current RTO estimation). + */ +static u32 tcp_tsval_replay(const struct sock *sk) +{ + /* If we use usec TS resolution, + * then expect the remote peer to use the same resolution. + */ + if (tcp_sk(sk)->tcp_usec_ts) + return inet_csk(sk)->icsk_rto * (USEC_PER_SEC / HZ); + + /* RFC 7323 recommends a TSval clock between 1ms and 1sec. + * We know that some OS (including old linux) can use 1200 Hz. + */ + return inet_csk(sk)->icsk_rto * 1200 / HZ; +} + static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) { const struct tcp_sock *tp = tcp_sk(sk); @@ -4375,7 +4392,7 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) u32 seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; - return (/* 1. Pure ACK with correct sequence number. */ + return /* 1. Pure ACK with correct sequence number. */ (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) && /* 2. ... and duplicate ACK. */ @@ -4385,7 +4402,8 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) && /* 4. ... and sits in replay window. */ - (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ); + (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= + tcp_tsval_replay(sk); } static inline bool tcp_paws_discard(const struct sock *sk, -- cgit From 9ef7c58f5abe41e6d91f37f28fe2d851ffedd92a Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 7 Dec 2023 16:16:55 -0800 Subject: bnxt_en: Clear resource reservation during resume We are issuing HWRM_FUNC_RESET cmd to reset the device including all reserved resources, but not clearing the reservations within the driver struct. As a result, when the driver re-initializes as part of resume, it believes that there is no need to do any resource reservation and goes ahead and tries to allocate rings which will eventually fail beyond a certain number pre-reserved by the firmware. Fixes: 674f50a5b026 ("bnxt_en: Implement new method to reserve rings.") Reviewed-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Andy Gospodarek Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20231208001658.14230-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d0359b569afe..72f2fc983940 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13940,6 +13940,8 @@ static int bnxt_resume(struct device *device) if (rc) goto resume_exit; + bnxt_clear_reservations(bp, true); + if (bnxt_hwrm_func_drv_rgtr(bp, NULL, 0, false)) { rc = -ENODEV; goto resume_exit; -- cgit From aded5d1feb08e48d544845d3594d70c4d5fe6e54 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Thu, 7 Dec 2023 16:16:56 -0800 Subject: bnxt_en: Fix skb recycling logic in bnxt_deliver_skb() Receive SKBs can go through the VF-rep path or the normal path. skb_mark_for_recycle() is only called for the normal path. Fix it to do it for both paths to fix possible stalled page pool shutdown errors. Fixes: 86b05508f775 ("bnxt_en: Use the unified RX page pool buffers for XDP and non-XDP") Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek Reviewed-by: Vikas Gupta Signed-off-by: Sreekanth Reddy Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20231208001658.14230-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 72f2fc983940..b4a5311bdeb5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1748,13 +1748,14 @@ static void bnxt_tpa_agg(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi, struct sk_buff *skb) { + skb_mark_for_recycle(skb); + if (skb->dev != bp->dev) { /* this packet belongs to a vf-rep */ bnxt_vf_rep_rx(bp, skb); return; } skb_record_rx_queue(skb, bnapi->index); - skb_mark_for_recycle(skb); napi_gro_receive(&bnapi->napi, skb); } -- cgit From bd6781c18cb5b5e5d8c5873fa9a51668e89ec76e Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 7 Dec 2023 16:16:57 -0800 Subject: bnxt_en: Fix wrong return value check in bnxt_close_nic() The wait_event_interruptible_timeout() function returns 0 if the timeout elapsed, -ERESTARTSYS if it was interrupted by a signal, and the remaining jiffies otherwise if the condition evaluated to true before the timeout elapsed. Driver should have checked for zero return value instead of a positive value. MChan: Print a warning for -ERESTARTSYS. The close operation will proceed anyway when wait_event_interruptible_timeout() returns for any reason. Since we do the close no matter what, we should not return this error code to the caller. Change bnxt_close_nic() to a void function and remove all error handling from some of the callers. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Reviewed-by: Andy Gospodarek Reviewed-by: Vikas Gupta Reviewed-by: Somnath Kotur Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20231208001658.14230-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++------ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 11 ++--------- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 19 ++++--------------- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 5 ++--- 5 files changed, 16 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b4a5311bdeb5..aa1f5b776b5a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10732,10 +10732,8 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bnxt_free_mem(bp, irq_re_init); } -int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) +void bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { - int rc = 0; - if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { /* If we get here, it means firmware reset is in progress * while we are trying to close. We can safely proceed with @@ -10750,15 +10748,18 @@ int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) #ifdef CONFIG_BNXT_SRIOV if (bp->sriov_cfg) { + int rc; + rc = wait_event_interruptible_timeout(bp->sriov_cfg_wait, !bp->sriov_cfg, BNXT_SRIOV_CFG_WAIT_TMO); - if (rc) - netdev_warn(bp->dev, "timeout waiting for SRIOV config operation to complete!\n"); + if (!rc) + netdev_warn(bp->dev, "timeout waiting for SRIOV config operation to complete, proceeding to close!\n"); + else if (rc < 0) + netdev_warn(bp->dev, "SRIOV config operation interrupted, proceeding to close!\n"); } #endif __bnxt_close_nic(bp, irq_re_init, link_re_init); - return rc; } static int bnxt_close(struct net_device *dev) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index e702dbc3e6b1..0488b0466015 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2375,7 +2375,7 @@ int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); void bnxt_half_close_nic(struct bnxt *bp); void bnxt_reenable_sriov(struct bnxt *bp); -int bnxt_close_nic(struct bnxt *, bool, bool); +void bnxt_close_nic(struct bnxt *, bool, bool); void bnxt_get_ring_err_stats(struct bnxt *bp, struct bnxt_total_ring_err_stats *stats); int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index f302dac56599..89809f1b129c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -449,15 +449,8 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, return -ENODEV; } bnxt_ulp_stop(bp); - if (netif_running(bp->dev)) { - rc = bnxt_close_nic(bp, true, true); - if (rc) { - NL_SET_ERR_MSG_MOD(extack, "Failed to close"); - dev_close(bp->dev); - rtnl_unlock(); - break; - } - } + if (netif_running(bp->dev)) + bnxt_close_nic(bp, true, true); bnxt_vf_reps_free(bp); rc = bnxt_hwrm_func_drv_unrgtr(bp); if (rc) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index f3f384773ac0..5f67a7f94e7d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -165,9 +165,8 @@ static int bnxt_set_coalesce(struct net_device *dev, reset_coalesce: if (test_bit(BNXT_STATE_OPEN, &bp->state)) { if (update_stats) { - rc = bnxt_close_nic(bp, true, false); - if (!rc) - rc = bnxt_open_nic(bp, true, false); + bnxt_close_nic(bp, true, false); + rc = bnxt_open_nic(bp, true, false); } else { rc = bnxt_hwrm_set_coal(bp); } @@ -972,12 +971,7 @@ static int bnxt_set_channels(struct net_device *dev, * before PF unload */ } - rc = bnxt_close_nic(bp, true, false); - if (rc) { - netdev_err(bp->dev, "Set channel failure rc :%x\n", - rc); - return rc; - } + bnxt_close_nic(bp, true, false); } if (sh) { @@ -4042,12 +4036,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, bnxt_run_fw_tests(bp, test_mask, &test_results); } else { bnxt_ulp_stop(bp); - rc = bnxt_close_nic(bp, true, false); - if (rc) { - etest->flags |= ETH_TEST_FL_FAILED; - bnxt_ulp_start(bp, rc); - return; - } + bnxt_close_nic(bp, true, false); bnxt_run_fw_tests(bp, test_mask, &test_results); buf[BNXT_MACLPBK_TEST_IDX] = 1; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index f3886710e778..6e3da3362bd6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -521,9 +521,8 @@ static int bnxt_hwrm_ptp_cfg(struct bnxt *bp) if (netif_running(bp->dev)) { if (ptp->rx_filter == HWTSTAMP_FILTER_ALL) { - rc = bnxt_close_nic(bp, false, false); - if (!rc) - rc = bnxt_open_nic(bp, false, false); + bnxt_close_nic(bp, false, false); + rc = bnxt_open_nic(bp, false, false); } else { bnxt_ptp_cfg_tstamp_filters(bp); } -- cgit From c13e268c0768659cdaae4bfe2fb24860bcc8ddb4 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 7 Dec 2023 16:16:58 -0800 Subject: bnxt_en: Fix HWTSTAMP_FILTER_ALL packet timestamp logic When the chip is configured to timestamp all receive packets, the timestamp in the RX completion is only valid if the metadata present flag is not set for packets received on the wire. In addition, internal loopback packets will never have a valid timestamp and the timestamp field will always be zero. We must exclude any 0 value in the timestamp field because there is no way to determine if it is a loopback packet or not. Add a new function bnxt_rx_ts_valid() to check for all timestamp valid conditions. Fixes: 66ed81dcedc6 ("bnxt_en: Enable packet timestamping for all RX packets") Reviewed-by: Andy Gospodarek Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20231208001658.14230-5-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 20 +++++++++++++++++--- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 8 +++++++- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index aa1f5b776b5a..579eebb6fc56 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1759,6 +1759,21 @@ static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi, napi_gro_receive(&bnapi->napi, skb); } +static bool bnxt_rx_ts_valid(struct bnxt *bp, u32 flags, + struct rx_cmp_ext *rxcmp1, u32 *cmpl_ts) +{ + u32 ts = le32_to_cpu(rxcmp1->rx_cmp_timestamp); + + if (BNXT_PTP_RX_TS_VALID(flags)) + goto ts_valid; + if (!bp->ptp_all_rx_tstamp || !ts || !BNXT_ALL_RX_TS_VALID(flags)) + return false; + +ts_valid: + *cmpl_ts = ts; + return true; +} + /* returns the following: * 1 - 1 packet successfully received * 0 - successful TPA_START, packet not completed yet @@ -1784,6 +1799,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, struct sk_buff *skb; struct xdp_buff xdp; u32 flags, misc; + u32 cmpl_ts; void *data; int rc = 0; @@ -2006,10 +2022,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } } - if (unlikely((flags & RX_CMP_FLAGS_ITYPES_MASK) == - RX_CMP_FLAGS_ITYPE_PTP_W_TS) || bp->ptp_all_rx_tstamp) { + if (bnxt_rx_ts_valid(bp, flags, rxcmp1, &cmpl_ts)) { if (bp->flags & BNXT_FLAG_CHIP_P5) { - u32 cmpl_ts = le32_to_cpu(rxcmp1->rx_cmp_timestamp); u64 ns, ts; if (!bnxt_get_rx_ts_p5(bp, &ts, cmpl_ts)) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 0488b0466015..a7d7b09ea162 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -161,7 +161,7 @@ struct rx_cmp { #define RX_CMP_FLAGS_ERROR (1 << 6) #define RX_CMP_FLAGS_PLACEMENT (7 << 7) #define RX_CMP_FLAGS_RSS_VALID (1 << 10) - #define RX_CMP_FLAGS_UNUSED (1 << 11) + #define RX_CMP_FLAGS_PKT_METADATA_PRESENT (1 << 11) #define RX_CMP_FLAGS_ITYPES_SHIFT 12 #define RX_CMP_FLAGS_ITYPES_MASK 0xf000 #define RX_CMP_FLAGS_ITYPE_UNKNOWN (0 << 12) @@ -188,6 +188,12 @@ struct rx_cmp { __le32 rx_cmp_rss_hash; }; +#define BNXT_PTP_RX_TS_VALID(flags) \ + (((flags) & RX_CMP_FLAGS_ITYPES_MASK) == RX_CMP_FLAGS_ITYPE_PTP_W_TS) + +#define BNXT_ALL_RX_TS_VALID(flags) \ + !((flags) & RX_CMP_FLAGS_PKT_METADATA_PRESENT) + #define RX_CMP_HASH_VALID(rxcmp) \ ((rxcmp)->rx_cmp_len_flags_type & cpu_to_le32(RX_CMP_FLAGS_RSS_VALID)) -- cgit From eea673e9d5ea994c60b550ffb684413d3759b3f4 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Sat, 9 Dec 2023 15:49:15 +0800 Subject: LoongArch: Apply dynamic relocations for LLD For the following assembly code: .text .global func func: nop .data var: .dword func When linked with `-pie`, GNU LD populates the `var` variable with the pre-relocated value of `func`. However, LLVM LLD does not exhibit the same behavior. This issue also arises with the `kernel_entry` in arch/ loongarch/kernel/head.S: _head: .word MZ_MAGIC /* "MZ", MS-DOS header */ .org 0x8 .dword kernel_entry /* Kernel entry point */ The correct kernel entry from the MS-DOS header is crucial for jumping to vmlinux from zboot. This necessity is why the compressed relocatable kernel compiled by Clang encounters difficulties in booting. To address this problem, it is proposed to apply dynamic relocations to place with `--apply-dynamic-relocs`. Link: https://github.com/ClangBuiltLinux/linux/issues/1962 Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 204b94b2e6aa..4ba8d67ddb09 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -83,7 +83,7 @@ endif ifeq ($(CONFIG_RELOCATABLE),y) KBUILD_CFLAGS_KERNEL += -fPIE -LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext +LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext $(call ld-option, --apply-dynamic-relocs) endif cflags-y += $(call cc-option, -mno-check-zero-division) -- cgit From 8146c5b349074da7732f1d45eb4a5f9fd192c7c1 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 9 Dec 2023 15:49:15 +0800 Subject: LoongArch: Slightly clean up drdtime() As we are just discarding the stable clock ID, simply write it into $zero instead of allocating a temporary register. Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/loongarch.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 9b4957cefa8a..46366e783c84 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -1098,12 +1098,11 @@ static __always_inline u64 drdtime(void) { - int rID = 0; u64 val = 0; __asm__ __volatile__( - "rdtime.d %0, %1 \n\t" - : "=r"(val), "=r"(rID) + "rdtime.d %0, $zero\n\t" + : "=r"(val) : ); return val; -- cgit From 97ceddbc9404a7d1e2c4049435bff29427d762cc Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Sat, 9 Dec 2023 15:49:15 +0800 Subject: LoongArch: Set unwind stack type to unknown rather than set error flag During unwinding, unwind_done() is used as an end condition. Normally it unwind to the user stack and then set the stack type to unknown, which is a normal exit. When something unexpected happens in unwind process and we cannot unwind anymore, we should set the error flag, and also set the stack type to unknown to indicate that the unwind process can not continue. The error flag emphasizes that the unwind process produce an unexpected error. There is no unexpected things when we unwind the PT_REGS in the top of IRQ stack and find out that is an user mode PT_REGS. Thus, we should not set error flag and just set stack type to unknown. Reported-by: Hengqi Chen Acked-by: Hengqi Chen Signed-off-by: Jinyang He Signed-off-by: Huacai Chen --- arch/loongarch/kernel/stacktrace.c | 2 +- arch/loongarch/kernel/unwind.c | 1 - arch/loongarch/kernel/unwind_prologue.c | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c index 92270f14db94..f623feb2129f 100644 --- a/arch/loongarch/kernel/stacktrace.c +++ b/arch/loongarch/kernel/stacktrace.c @@ -32,7 +32,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, } for (unwind_start(&state, task, regs); - !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) { + !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); if (!addr || !consume_entry(cookie, addr)) break; diff --git a/arch/loongarch/kernel/unwind.c b/arch/loongarch/kernel/unwind.c index ba324ba76fa1..a463d6961344 100644 --- a/arch/loongarch/kernel/unwind.c +++ b/arch/loongarch/kernel/unwind.c @@ -28,6 +28,5 @@ bool default_next_frame(struct unwind_state *state) } while (!get_stack_info(state->sp, state->task, info)); - state->error = true; return false; } diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 55afc27320e1..929ae240280a 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -227,7 +227,7 @@ static bool next_frame(struct unwind_state *state) } while (!get_stack_info(state->sp, state->task, info)); out: - state->error = true; + state->stack_info.type = STACK_TYPE_UNKNOWN; return false; } -- cgit From d6c5f06e46a836e6a70c7cfd95bb38a67d9252ec Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sat, 9 Dec 2023 15:49:15 +0800 Subject: LoongArch: Preserve syscall nr across execve() Currently, we store syscall nr in pt_regs::regs[11] and syscall execve() accidentally overrides it during its execution: sys_execve() -> do_execve() -> do_execveat_common() -> bprm_execve() -> exec_binprm() -> search_binary_handler() -> load_elf_binary() -> ELF_PLAT_INIT() ELF_PLAT_INIT() reset regs[11] to 0, so in syscall_exit_to_user_mode() we later get a wrong syscall nr. This breaks tools like execsnoop since it relies on execve() tracepoints. Skip pt_regs::regs[11] reset in ELF_PLAT_INIT() to fix the issue. Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h index b9a4ab54285c..9b16a3b8e706 100644 --- a/arch/loongarch/include/asm/elf.h +++ b/arch/loongarch/include/asm/elf.h @@ -293,7 +293,7 @@ extern const char *__elf_platform; #define ELF_PLAT_INIT(_r, load_addr) do { \ _r->regs[1] = _r->regs[2] = _r->regs[3] = _r->regs[4] = 0; \ _r->regs[5] = _r->regs[6] = _r->regs[7] = _r->regs[8] = 0; \ - _r->regs[9] = _r->regs[10] = _r->regs[11] = _r->regs[12] = 0; \ + _r->regs[9] = _r->regs[10] /* syscall n */ = _r->regs[12] = 0; \ _r->regs[13] = _r->regs[14] = _r->regs[15] = _r->regs[16] = 0; \ _r->regs[17] = _r->regs[18] = _r->regs[19] = _r->regs[20] = 0; \ _r->regs[21] = _r->regs[22] = _r->regs[23] = _r->regs[24] = 0; \ -- cgit From fe5757553bf9ebe45ae8ecab5922f6937c8d8dfc Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sat, 9 Dec 2023 15:49:16 +0800 Subject: LoongArch: BPF: Don't sign extend memory load operand The `cgrp_local_storage` test triggers a kernel panic like: # ./test_progs -t cgrp_local_storage Can't find bpf_testmod.ko kernel module: -2 WARNING! Selftests relying on bpf_testmod.ko will be skipped. [ 550.930632] CPU 1 Unable to handle kernel paging request at virtual address 0000000000000080, era == ffff80000200be34, ra == ffff80000200be00 [ 550.931781] Oops[#1]: [ 550.931966] CPU: 1 PID: 1303 Comm: test_progs Not tainted 6.7.0-rc2-loong-devel-g2f56bb0d2327 #35 a896aca3f4164f09cc346f89f2e09832e07be5f6 [ 550.932215] Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 2/2/2022 [ 550.932403] pc ffff80000200be34 ra ffff80000200be00 tp 9000000108350000 sp 9000000108353dc0 [ 550.932545] a0 0000000000000000 a1 0000000000000517 a2 0000000000000118 a3 00007ffffbb15558 [ 550.932682] a4 00007ffffbb15620 a5 90000001004e7700 a6 0000000000000021 a7 0000000000000118 [ 550.932824] t0 ffff80000200bdc0 t1 0000000000000517 t2 0000000000000517 t3 00007ffff1c06ee0 [ 550.932961] t4 0000555578ae04d0 t5 fffffffffffffff8 t6 0000000000000004 t7 0000000000000020 [ 550.933097] t8 0000000000000040 u0 00000000000007b8 s9 9000000108353e00 s0 90000001004e7700 [ 550.933241] s1 9000000004005000 s2 0000000000000001 s3 0000000000000000 s4 0000555555eb2ec8 [ 550.933379] s5 00007ffffbb15bb8 s6 00007ffff1dafd60 s7 000055555663f610 s8 00007ffff1db0050 [ 550.933520] ra: ffff80000200be00 bpf_prog_98f1b9e767be2a84_on_enter+0x40/0x200 [ 550.933911] ERA: ffff80000200be34 bpf_prog_98f1b9e767be2a84_on_enter+0x74/0x200 [ 550.934105] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) [ 550.934596] PRMD: 00000004 (PPLV0 +PIE -PWE) [ 550.934712] EUEN: 00000003 (+FPE +SXE -ASXE -BTE) [ 550.934836] ECFG: 00071c1c (LIE=2-4,10-12 VS=7) [ 550.934976] ESTAT: 00010000 [PIL] (IS= ECode=1 EsubCode=0) [ 550.935097] BADV: 0000000000000080 [ 550.935181] PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) [ 550.935291] Modules linked in: [ 550.935391] Process test_progs (pid: 1303, threadinfo=000000006c3b1c41, task=0000000061f84a55) [ 550.935643] Stack : 00007ffffbb15bb8 0000555555eb2ec8 0000000000000000 0000000000000001 [ 550.935844] 9000000004005000 ffff80001b864000 00007ffffbb15450 90000000029aa034 [ 550.935990] 0000000000000000 9000000108353ec0 0000000000000118 d07d9dfb09721a09 [ 550.936175] 0000000000000001 0000000000000000 9000000108353ec0 0000000000000118 [ 550.936314] 9000000101d46ad0 900000000290abf0 000055555663f610 0000000000000000 [ 550.936479] 0000000000000003 9000000108353ec0 00007ffffbb15450 90000000029d7288 [ 550.936635] 00007ffff1dafd60 000055555663f610 0000000000000000 0000000000000003 [ 550.936779] 9000000108353ec0 90000000035dd1f0 00007ffff1dafd58 9000000002841c5c [ 550.936939] 0000000000000119 0000555555eea5a8 00007ffff1d78780 00007ffffbb153e0 [ 550.937083] ffffffffffffffda 00007ffffbb15518 0000000000000040 00007ffffbb15558 [ 550.937224] ... [ 550.937299] Call Trace: [ 550.937521] [] bpf_prog_98f1b9e767be2a84_on_enter+0x74/0x200 [ 550.937910] [<90000000029aa034>] bpf_trace_run2+0x90/0x154 [ 550.938105] [<900000000290abf0>] syscall_trace_enter.isra.0+0x1cc/0x200 [ 550.938224] [<90000000035dd1f0>] do_syscall+0x48/0x94 [ 550.938319] [<9000000002841c5c>] handle_syscall+0xbc/0x158 [ 550.938477] [ 550.938607] Code: 580009ae 50016000 262402e4 <28c20085> 14092084 03a00084 16000024 03240084 00150006 [ 550.938851] [ 550.939021] ---[ end trace 0000000000000000 ]--- Further investigation shows that this panic is triggered by memory load operations: ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); The expression `task->cgroups->dfl_cgrp` involves two memory load. Since the field offset fits in imm12 or imm14, we use ldd or ldptrd instructions. But both instructions have the side effect that it will signed-extended the imm operand. Finally, we got the wrong addresses and panics is inevitable. Use a generic ldxd instruction to avoid this kind of issues. With this change, we have: # ./test_progs -t cgrp_local_storage Can't find bpf_testmod.ko kernel module: -2 WARNING! Selftests relying on bpf_testmod.ko will be skipped. test_cgrp_local_storage:PASS:join_cgroup /cgrp_local_storage 0 nsec #48/1 cgrp_local_storage/tp_btf:OK test_attach_cgroup:PASS:skel_open 0 nsec test_attach_cgroup:PASS:prog_attach 0 nsec test_attach_cgroup:PASS:prog_attach 0 nsec libbpf: prog 'update_cookie_tracing': failed to attach: ERROR: strerror_r(-524)=22 test_attach_cgroup:FAIL:prog_attach unexpected error: -524 #48/2 cgrp_local_storage/attach_cgroup:FAIL test_recursion:PASS:skel_open_and_load 0 nsec libbpf: prog 'on_lookup': failed to attach: ERROR: strerror_r(-524)=22 libbpf: prog 'on_lookup': failed to auto-attach: -524 test_recursion:FAIL:skel_attach unexpected error: -524 (errno 524) #48/3 cgrp_local_storage/recursion:FAIL #48/4 cgrp_local_storage/negative:OK #48/5 cgrp_local_storage/cgroup_iter_sleepable:OK test_yes_rcu_lock:PASS:skel_open 0 nsec test_yes_rcu_lock:PASS:skel_load 0 nsec libbpf: prog 'yes_rcu_lock': failed to attach: ERROR: strerror_r(-524)=22 libbpf: prog 'yes_rcu_lock': failed to auto-attach: -524 test_yes_rcu_lock:FAIL:skel_attach unexpected error: -524 (errno 524) #48/6 cgrp_local_storage/yes_rcu_lock:FAIL #48/7 cgrp_local_storage/no_rcu_lock:OK #48 cgrp_local_storage:FAIL All error logs: test_cgrp_local_storage:PASS:join_cgroup /cgrp_local_storage 0 nsec test_attach_cgroup:PASS:skel_open 0 nsec test_attach_cgroup:PASS:prog_attach 0 nsec test_attach_cgroup:PASS:prog_attach 0 nsec libbpf: prog 'update_cookie_tracing': failed to attach: ERROR: strerror_r(-524)=22 test_attach_cgroup:FAIL:prog_attach unexpected error: -524 #48/2 cgrp_local_storage/attach_cgroup:FAIL test_recursion:PASS:skel_open_and_load 0 nsec libbpf: prog 'on_lookup': failed to attach: ERROR: strerror_r(-524)=22 libbpf: prog 'on_lookup': failed to auto-attach: -524 test_recursion:FAIL:skel_attach unexpected error: -524 (errno 524) #48/3 cgrp_local_storage/recursion:FAIL test_yes_rcu_lock:PASS:skel_open 0 nsec test_yes_rcu_lock:PASS:skel_load 0 nsec libbpf: prog 'yes_rcu_lock': failed to attach: ERROR: strerror_r(-524)=22 libbpf: prog 'yes_rcu_lock': failed to auto-attach: -524 test_yes_rcu_lock:FAIL:skel_attach unexpected error: -524 (errno 524) #48/6 cgrp_local_storage/yes_rcu_lock:FAIL #48 cgrp_local_storage:FAIL Summary: 0/4 PASSED, 0 SKIPPED, 1 FAILED No panics any more (The test still failed because lack of BPF trampoline which I am actively working on). Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support") Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 169ff8b3915e..d2161c95ceb4 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -988,14 +988,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext } break; case BPF_DW: - if (is_signed_imm12(off)) { - emit_insn(ctx, ldd, dst, src, off); - } else if (is_signed_imm14(off)) { - emit_insn(ctx, ldptrd, dst, src, off); - } else { - move_imm(ctx, t1, off, is32); - emit_insn(ctx, ldxd, dst, src, t1); - } + move_imm(ctx, t1, off, is32); + emit_insn(ctx, ldxd, dst, src, t1); break; } -- cgit From 5d47ec2e6f4c64e30e392cfe9532df98c9beb106 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sat, 9 Dec 2023 15:49:16 +0800 Subject: LoongArch: BPF: Don't sign extend function return value The `cls_redirect` test triggers a kernel panic like: # ./test_progs -t cls_redirect Can't find bpf_testmod.ko kernel module: -2 WARNING! Selftests relying on bpf_testmod.ko will be skipped. [ 30.938489] CPU 3 Unable to handle kernel paging request at virtual address fffffffffd814de0, era == ffff800002009fb8, ra == ffff800002009f9c [ 30.939331] Oops[#1]: [ 30.939513] CPU: 3 PID: 1260 Comm: test_progs Not tainted 6.7.0-rc2-loong-devel-g2f56bb0d2327 #35 a896aca3f4164f09cc346f89f2e09832e07be5f6 [ 30.939732] Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 2/2/2022 [ 30.939901] pc ffff800002009fb8 ra ffff800002009f9c tp 9000000104da4000 sp 9000000104da7ab0 [ 30.940038] a0 fffffffffd814de0 a1 9000000104da7a68 a2 0000000000000000 a3 9000000104da7c10 [ 30.940183] a4 9000000104da7c14 a5 0000000000000002 a6 0000000000000021 a7 00005555904d7f90 [ 30.940321] t0 0000000000000110 t1 0000000000000000 t2 fffffffffd814de0 t3 0004c4b400000000 [ 30.940456] t4 ffffffffffffffff t5 00000000c3f63600 t6 0000000000000000 t7 0000000000000000 [ 30.940590] t8 000000000006d803 u0 0000000000000020 s9 9000000104da7b10 s0 900000010504c200 [ 30.940727] s1 fffffffffd814de0 s2 900000010504c200 s3 9000000104da7c10 s4 9000000104da7ad0 [ 30.940866] s5 0000000000000000 s6 90000000030e65bc s7 9000000104da7b44 s8 90000000044f6fc0 [ 30.941015] ra: ffff800002009f9c bpf_prog_846803e5ae81417f_cls_redirect+0xa0/0x590 [ 30.941535] ERA: ffff800002009fb8 bpf_prog_846803e5ae81417f_cls_redirect+0xbc/0x590 [ 30.941696] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) [ 30.942224] PRMD: 00000004 (PPLV0 +PIE -PWE) [ 30.942330] EUEN: 00000003 (+FPE +SXE -ASXE -BTE) [ 30.942453] ECFG: 00071c1c (LIE=2-4,10-12 VS=7) [ 30.942612] ESTAT: 00010000 [PIL] (IS= ECode=1 EsubCode=0) [ 30.942764] BADV: fffffffffd814de0 [ 30.942854] PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) [ 30.942974] Modules linked in: [ 30.943078] Process test_progs (pid: 1260, threadinfo=00000000ce303226, task=000000007d10bb76) [ 30.943306] Stack : 900000010a064000 90000000044f6fc0 9000000104da7b48 0000000000000000 [ 30.943495] 0000000000000000 9000000104da7c14 9000000104da7c10 900000010504c200 [ 30.943626] 0000000000000001 ffff80001b88c000 9000000104da7b70 90000000030e6668 [ 30.943785] 0000000000000000 9000000104da7b58 ffff80001b88c048 9000000003d05000 [ 30.943936] 900000000303ac88 0000000000000000 0000000000000000 9000000104da7b70 [ 30.944091] 0000000000000000 0000000000000001 0000000731eeab00 0000000000000000 [ 30.944245] ffff80001b88c000 0000000000000000 0000000000000000 54b99959429f83b8 [ 30.944402] ffff80001b88c000 90000000044f6fc0 9000000101d70000 ffff80001b88c000 [ 30.944538] 000000000000005a 900000010504c200 900000010a064000 900000010a067000 [ 30.944697] 9000000104da7d88 0000000000000000 9000000003d05000 90000000030e794c [ 30.944852] ... [ 30.944924] Call Trace: [ 30.945120] [] bpf_prog_846803e5ae81417f_cls_redirect+0xbc/0x590 [ 30.945650] [<90000000030e6668>] bpf_test_run+0x1ec/0x2f8 [ 30.945958] [<90000000030e794c>] bpf_prog_test_run_skb+0x31c/0x684 [ 30.946065] [<90000000026d4f68>] __sys_bpf+0x678/0x2724 [ 30.946159] [<90000000026d7288>] sys_bpf+0x20/0x2c [ 30.946253] [<90000000032dd224>] do_syscall+0x7c/0x94 [ 30.946343] [<9000000002541c5c>] handle_syscall+0xbc/0x158 [ 30.946492] [ 30.946549] Code: 0015030e 5c0009c0 5001d000 <28c00304> 02c00484 29c00304 00150009 2a42d2e4 0280200d [ 30.946793] [ 30.946971] ---[ end trace 0000000000000000 ]--- [ 32.093225] Kernel panic - not syncing: Fatal exception in interrupt [ 32.093526] Kernel relocated by 0x2320000 [ 32.093630] .text @ 0x9000000002520000 [ 32.093725] .data @ 0x9000000003400000 [ 32.093792] .bss @ 0x9000000004413200 [ 34.971998] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- This is because we signed-extend function return values. When subprog mode is enabled, we have: cls_redirect() -> get_global_metrics() returns pcpu ptr 0xfffffefffc00b480 The pointer returned is later signed-extended to 0xfffffffffc00b480 at `BPF_JMP | BPF_EXIT`. During BPF prog run, this triggers unhandled page fault and a kernel panic. Drop the unnecessary signed-extension on return values like other architectures do. With this change, we have: # ./test_progs -t cls_redirect Can't find bpf_testmod.ko kernel module: -2 WARNING! Selftests relying on bpf_testmod.ko will be skipped. #51/1 cls_redirect/cls_redirect_inlined:OK #51/2 cls_redirect/IPv4 TCP accept unknown (no hops, flags: SYN):OK #51/3 cls_redirect/IPv6 TCP accept unknown (no hops, flags: SYN):OK #51/4 cls_redirect/IPv4 TCP accept unknown (no hops, flags: ACK):OK #51/5 cls_redirect/IPv6 TCP accept unknown (no hops, flags: ACK):OK #51/6 cls_redirect/IPv4 TCP forward unknown (one hop, flags: ACK):OK #51/7 cls_redirect/IPv6 TCP forward unknown (one hop, flags: ACK):OK #51/8 cls_redirect/IPv4 TCP accept known (one hop, flags: ACK):OK #51/9 cls_redirect/IPv6 TCP accept known (one hop, flags: ACK):OK #51/10 cls_redirect/IPv4 UDP accept unknown (no hops, flags: none):OK #51/11 cls_redirect/IPv6 UDP accept unknown (no hops, flags: none):OK #51/12 cls_redirect/IPv4 UDP forward unknown (one hop, flags: none):OK #51/13 cls_redirect/IPv6 UDP forward unknown (one hop, flags: none):OK #51/14 cls_redirect/IPv4 UDP accept known (one hop, flags: none):OK #51/15 cls_redirect/IPv6 UDP accept known (one hop, flags: none):OK #51/16 cls_redirect/cls_redirect_subprogs:OK #51/17 cls_redirect/IPv4 TCP accept unknown (no hops, flags: SYN):OK #51/18 cls_redirect/IPv6 TCP accept unknown (no hops, flags: SYN):OK #51/19 cls_redirect/IPv4 TCP accept unknown (no hops, flags: ACK):OK #51/20 cls_redirect/IPv6 TCP accept unknown (no hops, flags: ACK):OK #51/21 cls_redirect/IPv4 TCP forward unknown (one hop, flags: ACK):OK #51/22 cls_redirect/IPv6 TCP forward unknown (one hop, flags: ACK):OK #51/23 cls_redirect/IPv4 TCP accept known (one hop, flags: ACK):OK #51/24 cls_redirect/IPv6 TCP accept known (one hop, flags: ACK):OK #51/25 cls_redirect/IPv4 UDP accept unknown (no hops, flags: none):OK #51/26 cls_redirect/IPv6 UDP accept unknown (no hops, flags: none):OK #51/27 cls_redirect/IPv4 UDP forward unknown (one hop, flags: none):OK #51/28 cls_redirect/IPv6 UDP forward unknown (one hop, flags: none):OK #51/29 cls_redirect/IPv4 UDP accept known (one hop, flags: none):OK #51/30 cls_redirect/IPv6 UDP accept known (one hop, flags: none):OK #51/31 cls_redirect/cls_redirect_dynptr:OK #51/32 cls_redirect/IPv4 TCP accept unknown (no hops, flags: SYN):OK #51/33 cls_redirect/IPv6 TCP accept unknown (no hops, flags: SYN):OK #51/34 cls_redirect/IPv4 TCP accept unknown (no hops, flags: ACK):OK #51/35 cls_redirect/IPv6 TCP accept unknown (no hops, flags: ACK):OK #51/36 cls_redirect/IPv4 TCP forward unknown (one hop, flags: ACK):OK #51/37 cls_redirect/IPv6 TCP forward unknown (one hop, flags: ACK):OK #51/38 cls_redirect/IPv4 TCP accept known (one hop, flags: ACK):OK #51/39 cls_redirect/IPv6 TCP accept known (one hop, flags: ACK):OK #51/40 cls_redirect/IPv4 UDP accept unknown (no hops, flags: none):OK #51/41 cls_redirect/IPv6 UDP accept unknown (no hops, flags: none):OK #51/42 cls_redirect/IPv4 UDP forward unknown (one hop, flags: none):OK #51/43 cls_redirect/IPv6 UDP forward unknown (one hop, flags: none):OK #51/44 cls_redirect/IPv4 UDP accept known (one hop, flags: none):OK #51/45 cls_redirect/IPv6 UDP accept known (one hop, flags: none):OK #51 cls_redirect:OK Summary: 1/45 PASSED, 0 SKIPPED, 0 FAILED Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support") Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index d2161c95ceb4..d76281efc5b8 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -911,8 +911,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* function return */ case BPF_JMP | BPF_EXIT: - emit_sext_32(ctx, regmap[BPF_REG_0], true); - if (i == ctx->prog->len - 1) break; -- cgit From 772cbe948fb07389639d4e698a2d3299f8e538b8 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 9 Dec 2023 15:49:16 +0800 Subject: LoongArch: BPF: Fix sign-extension mov instructions We can see that "Short form of movsx, dst_reg = (s8,s16,s32)src_reg" in include/linux/filter.h, additionally, for BPF_ALU64 the value of the destination register is unchanged whereas for BPF_ALU the upper 32 bits of the destination register are zeroed, so it should clear the upper 32 bits for BPF_ALU. [root@linux fedora]# echo 1 > /proc/sys/net/core/bpf_jit_enable [root@linux fedora]# modprobe test_bpf Before: test_bpf: #81 ALU_MOVSX | BPF_B jited:1 ret 2 != 1 (0x2 != 0x1)FAIL (1 times) test_bpf: #82 ALU_MOVSX | BPF_H jited:1 ret 2 != 1 (0x2 != 0x1)FAIL (1 times) After: test_bpf: #81 ALU_MOVSX | BPF_B jited:1 6 PASS test_bpf: #82 ALU_MOVSX | BPF_H jited:1 6 PASS By the way, the bpf selftest case "./test_progs -t verifier_movsx" can also be fixed with this patch. Fixes: f48012f16150 ("LoongArch: BPF: Support sign-extension mov instructions") Acked-by: Hengqi Chen Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index d76281efc5b8..cd002f183648 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -480,10 +480,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case 8: move_reg(ctx, t1, src); emit_insn(ctx, extwb, dst, t1); + emit_zext_32(ctx, dst, is32); break; case 16: move_reg(ctx, t1, src); emit_insn(ctx, extwh, dst, t1); + emit_zext_32(ctx, dst, is32); break; case 32: emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO); -- cgit From e2f7b3d8b4b300956a77fa1ab084c931ba1c7421 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 9 Dec 2023 15:49:16 +0800 Subject: LoongArch: BPF: Fix unconditional bswap instructions We can see that "bswap32: Takes an unsigned 32-bit number in either big- or little-endian format and returns the equivalent number with the same bit width but opposite endianness" in BPF Instruction Set Specification, so it should clear the upper 32 bits in "case 32:" for both BPF_ALU and BPF_ALU64. [root@linux fedora]# echo 1 > /proc/sys/net/core/bpf_jit_enable [root@linux fedora]# modprobe test_bpf Before: test_bpf: #313 BSWAP 32: 0x0123456789abcdef -> 0xefcdab89 jited:1 ret 1460850314 != -271733879 (0x5712ce8a != 0xefcdab89)FAIL (1 times) test_bpf: #317 BSWAP 32: 0xfedcba9876543210 -> 0x10325476 jited:1 ret -1460850316 != 271733878 (0xa8ed3174 != 0x10325476)FAIL (1 times) After: test_bpf: #313 BSWAP 32: 0x0123456789abcdef -> 0xefcdab89 jited:1 4 PASS test_bpf: #317 BSWAP 32: 0xfedcba9876543210 -> 0x10325476 jited:1 4 PASS Fixes: 4ebf9216e7df ("LoongArch: BPF: Support unconditional bswap instructions") Acked-by: Hengqi Chen Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index cd002f183648..4fcd6cd6da23 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -774,8 +774,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext break; case 32: emit_insn(ctx, revb2w, dst, dst); - /* zero-extend 32 bits into 64 bits */ - emit_zext_32(ctx, dst, is32); + /* clear the upper 32 bits */ + emit_zext_32(ctx, dst, true); break; case 64: emit_insn(ctx, revbd, dst, dst); -- cgit From d5dba32b8f6cb39be708b726044ba30dbc088b30 Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Thu, 7 Dec 2023 12:34:37 +0000 Subject: atm: solos-pci: Fix potential deadlock on &cli_queue_lock As &card->cli_queue_lock is acquired under softirq context along the following call chain from solos_bh(), other acquisition of the same lock inside process context should disable at least bh to avoid double lock. console_show() --> spin_lock(&card->cli_queue_lock) --> solos_bh() --> spin_lock(&card->cli_queue_lock) This flaw was found by an experimental static analysis tool I am developing for irq-related deadlock. To prevent the potential deadlock, the patch uses spin_lock_bh() on the card->cli_queue_lock under process context code consistently to prevent the possible deadlock scenario. Fixes: 9c54004ea717 ("atm: Driver for Solos PCI ADSL2+ card.") Signed-off-by: Chengfeng Ye Signed-off-by: David S. Miller --- drivers/atm/solos-pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 94fbc3abe60e..95f768b28a5e 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -449,9 +449,9 @@ static ssize_t console_show(struct device *dev, struct device_attribute *attr, struct sk_buff *skb; unsigned int len; - spin_lock(&card->cli_queue_lock); + spin_lock_bh(&card->cli_queue_lock); skb = skb_dequeue(&card->cli_queue[SOLOS_CHAN(atmdev)]); - spin_unlock(&card->cli_queue_lock); + spin_unlock_bh(&card->cli_queue_lock); if(skb == NULL) return sprintf(buf, "No data.\n"); -- cgit From 15319a4e8ee4b098118591c6ccbd17237f841613 Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Thu, 7 Dec 2023 12:34:53 +0000 Subject: atm: solos-pci: Fix potential deadlock on &tx_queue_lock As &card->tx_queue_lock is acquired under softirq context along the following call chain from solos_bh(), other acquisition of the same lock inside process context should disable at least bh to avoid double lock. pclose() --> spin_lock(&card->tx_queue_lock) --> solos_bh() --> fpga_tx() --> spin_lock(&card->tx_queue_lock) This flaw was found by an experimental static analysis tool I am developing for irq-related deadlock. To prevent the potential deadlock, the patch uses spin_lock_bh() on &card->tx_queue_lock under process context code consistently to prevent the possible deadlock scenario. Fixes: 213e85d38912 ("solos-pci: clean up pclose() function") Signed-off-by: Chengfeng Ye Signed-off-by: David S. Miller --- drivers/atm/solos-pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 95f768b28a5e..d3c30a28c410 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -956,14 +956,14 @@ static void pclose(struct atm_vcc *vcc) struct pkt_hdr *header; /* Remove any yet-to-be-transmitted packets from the pending queue */ - spin_lock(&card->tx_queue_lock); + spin_lock_bh(&card->tx_queue_lock); skb_queue_walk_safe(&card->tx_queue[port], skb, tmpskb) { if (SKB_CB(skb)->vcc == vcc) { skb_unlink(skb, &card->tx_queue[port]); solos_pop(vcc, skb); } } - spin_unlock(&card->tx_queue_lock); + spin_unlock_bh(&card->tx_queue_lock); skb = alloc_skb(sizeof(*header), GFP_KERNEL); if (!skb) { -- cgit From 69db702c83874fbaa2a51af761e35a8e5a593b95 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 6 Dec 2023 13:55:19 +0000 Subject: io_uring/af_unix: disable sending io_uring over sockets File reference cycles have caused lots of problems for io_uring in the past, and it still doesn't work exactly right and races with unix_stream_read_generic(). The safest fix would be to completely disallow sending io_uring files via sockets via SCM_RIGHT, so there are no possible cycles invloving registered files and thus rendering SCM accounting on the io_uring side unnecessary. Cc: stable@vger.kernel.org Fixes: 0091bfc81741b ("io_uring/af_unix: defer registered files gc to io_uring release") Reported-and-suggested-by: Jann Horn Signed-off-by: Pavel Begunkov Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- io_uring/rsrc.h | 7 ------- net/core/scm.c | 6 ++++++ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 8625181fb87a..08ac0d8e07ef 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -77,17 +77,10 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file); -#if defined(CONFIG_UNIX) -static inline bool io_file_need_scm(struct file *filp) -{ - return !!unix_get_socket(filp); -} -#else static inline bool io_file_need_scm(struct file *filp) { return false; } -#endif static inline int io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) diff --git a/net/core/scm.c b/net/core/scm.c index 880027ecf516..7dc47c17d863 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -103,6 +104,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (fd < 0 || !(file = fget_raw(fd))) return -EBADF; + /* don't allow io_uring files */ + if (io_uring_get_socket(file)) { + fput(file); + return -EINVAL; + } *fpp++ = file; fpl->count++; } -- cgit From 3b1ff57e24a7bcd2e2a8426dd2013a80d1fa96eb Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 8 Dec 2023 15:21:26 +0200 Subject: ALSA: hda/hdmi: add force-connect quirk for NUC5CPYB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add one more older NUC model that requires quirk to force all pins to be connected. The display codec pins are not registered properly without the force-connect quirk. The codec will report only one pin as having external connectivity, but i915 finds all three connectors on the system, so the two drivers are not in sync. Issue found with DRM igt-gpu-tools test kms_hdmi_inject@inject-audio. Link: https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/issues/3 Cc: Ville Syrjälä Cc: Jani Saarinen Signed-off-by: Kai Vehmanen Cc: Link: https://lore.kernel.org/r/20231208132127.2438067-2-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 1cde2a69bdb4..b152c941414f 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1994,6 +1994,7 @@ static const struct snd_pci_quirk force_connect_list[] = { SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8715, "HP", 1), SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1), + SND_PCI_QUIRK(0x8086, 0x2060, "Intel NUC5CPYB", 1), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1), {} }; -- cgit From 924f5ca2975b2993ee81a7ecc3c809943a70f334 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 8 Dec 2023 15:21:27 +0200 Subject: ALSA: hda/hdmi: add force-connect quirks for ASUSTeK Z170 variants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ASUSTeK Z170M PLUS and Z170 PRO GAMING systems, the display codec pins are not registered properly without the force-connect quirk. The codec will report only one pin as having external connectivity, but i915 finds all three connectors on the system, so the two drivers are not in sync. Issue found with DRM igt-gpu-tools test kms_hdmi_inject@inject-audio. Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9801 Cc: Ville Syrjälä Cc: Jani Saarinen Signed-off-by: Kai Vehmanen Cc: Link: https://lore.kernel.org/r/20231208132127.2438067-3-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index b152c941414f..78cee53fee02 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1993,6 +1993,8 @@ static const struct snd_pci_quirk force_connect_list[] = { SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8715, "HP", 1), + SND_PCI_QUIRK(0x1043, 0x86ae, "ASUS", 1), /* Z170 PRO */ + SND_PCI_QUIRK(0x1043, 0x86c7, "ASUS", 1), /* Z170M PLUS */ SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1), SND_PCI_QUIRK(0x8086, 0x2060, "Intel NUC5CPYB", 1), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1), -- cgit From 9b726bf6ae11add6a7a52883a21f90ff9cbca916 Mon Sep 17 00:00:00 2001 From: Hartmut Knaack Date: Sat, 9 Dec 2023 15:47:07 +0100 Subject: ALSA: hda/realtek: Apply mute LED quirk for HP15-db The HP laptop 15-db0403ng uses the ALC236 codec and controls the mute LED using COEF 0x07 index 1. Sound card subsystem: Hewlett-Packard Company Device [103c:84ae] Use the existing quirk for this model. Signed-off-by: Hartmut Knaack Cc: Link: https://lore.kernel.org/r/e61815d0-f1c7-b164-e49d-6ca84771476a@gmx.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0377912e9264..e45d4c405f8f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9795,6 +9795,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x841c, "HP Pavilion 15-CK0xx", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), + SND_PCI_QUIRK(0x103c, 0x84ae, "HP 15-db0403ng", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), -- cgit From 75a25d31b80770485641ad2789a854955f5c1e40 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Sat, 9 Dec 2023 22:18:29 +0100 Subject: ALSA: hda/tas2781: leave hda_component in usable state Unloading then loading the module causes a NULL ponter dereference. The hda_unbind zeroes the hda_component, later the hda_bind tries to dereference the codec field. The hda_component is only initialized once by tas2781_generic_fixup. Set only previously modified fields to NULL. BUG: kernel NULL pointer dereference, address: 0000000000000322 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? exc_page_fault+0x7f/0x180 ? asm_exc_page_fault+0x26/0x30 ? tas2781_hda_bind+0x59/0x140 [snd_hda_scodec_tas2781_i2c] component_bind_all+0xf3/0x240 try_to_bring_up_aggregate_device+0x1c3/0x270 __component_add+0xbc/0x1a0 tas2781_hda_i2c_probe+0x289/0x3a0 [snd_hda_scodec_tas2781_i2c] i2c_device_probe+0x136/0x2e0 Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") Cc: stable@vger.kernel.org Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/8b8ed2bd5f75fbb32e354a3226c2f966fa85b46b.1702156522.git.soyer@irl.hu Signed-off-by: Takashi Iwai --- sound/pci/hda/tas2781_hda_i2c.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index fb802802939e..b42837105c22 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -612,9 +612,13 @@ static void tas2781_hda_unbind(struct device *dev, { struct tasdevice_priv *tas_priv = dev_get_drvdata(dev); struct hda_component *comps = master_data; + comps = &comps[tas_priv->index]; - if (comps[tas_priv->index].dev == dev) - memset(&comps[tas_priv->index], 0, sizeof(*comps)); + if (comps->dev == dev) { + comps->dev = NULL; + memset(comps->name, 0, sizeof(comps->name)); + comps->playback_hook = NULL; + } tasdevice_config_info_remove(tas_priv); tasdevice_dsp_remove(tas_priv); -- cgit From 4a147af2081070218a4c66523c584e198994528e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 12:21:42 -0500 Subject: bcachefs: Fix uninitialized var in bch2_journal_replay() Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 770ced1c6285..c7d9074c82d9 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -144,7 +144,7 @@ static int bch2_journal_replay(struct bch_fs *c) u64 start_seq = c->journal_replay_seq_start; u64 end_seq = c->journal_replay_seq_start; size_t i; - int ret; + int ret = 0; move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr); keys->gap = keys->nr; -- cgit From 9fc95fe95c3e2a63ced8eeca4b256518ab204b63 Mon Sep 17 00:00:00 2001 From: Radu Bulie Date: Thu, 7 Dec 2023 16:38:01 +0800 Subject: net: fec: correct queue selection The old implementation extracted VLAN TCI info from the payload before the VLAN tag has been pushed in the payload. Another problem was that the VLAN TCI was extracted even if the packet did not have VLAN protocol header. This resulted in invalid VLAN TCI and as a consequence a random queue was computed. This patch fixes the above issues and use the VLAN TCI from the skb if it is present or VLAN TCI from payload if present. If no VLAN header is present queue 0 is selected. Fixes: 52c4a1a85f4b ("net: fec: add ndo_select_queue to fix TX bandwidth fluctuations") Signed-off-by: Radu Bulie Signed-off-by: Wei Fang Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index c3b7694a7485..e08c7b572497 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3731,31 +3731,26 @@ static int fec_set_features(struct net_device *netdev, return 0; } -static u16 fec_enet_get_raw_vlan_tci(struct sk_buff *skb) -{ - struct vlan_ethhdr *vhdr; - unsigned short vlan_TCI = 0; - - if (skb->protocol == htons(ETH_P_ALL)) { - vhdr = (struct vlan_ethhdr *)(skb->data); - vlan_TCI = ntohs(vhdr->h_vlan_TCI); - } - - return vlan_TCI; -} - static u16 fec_enet_select_queue(struct net_device *ndev, struct sk_buff *skb, struct net_device *sb_dev) { struct fec_enet_private *fep = netdev_priv(ndev); - u16 vlan_tag; + u16 vlan_tag = 0; if (!(fep->quirks & FEC_QUIRK_HAS_AVB)) return netdev_pick_tx(ndev, skb, NULL); - vlan_tag = fec_enet_get_raw_vlan_tci(skb); - if (!vlan_tag) + /* VLAN is present in the payload.*/ + if (eth_type_vlan(skb->protocol)) { + struct vlan_ethhdr *vhdr = skb_vlan_eth_hdr(skb); + + vlan_tag = ntohs(vhdr->h_vlan_TCI); + /* VLAN is present in the skb but not yet pushed in the payload.*/ + } else if (skb_vlan_tag_present(skb)) { + vlan_tag = skb->vlan_tci; + } else { return vlan_tag; + } return fec_enet_vlan_pri_to_queue[vlan_tag >> 13]; } -- cgit From 28a7cb045ab700de5554193a1642917602787784 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Thu, 7 Dec 2023 17:49:16 +0800 Subject: octeontx2-af: fix a use-after-free in rvu_nix_register_reporters The rvu_dl will be freed in rvu_nix_health_reporters_destroy(rvu_dl) after the create_workqueue fails, and after that free, the rvu_dl will be translate back through the following call chain: rvu_nix_health_reporters_destroy |-> rvu_nix_health_reporters_create |-> rvu_health_reporters_create |-> rvu_register_dl (label err_dl_health) Finally. in the err_dl_health label, rvu_dl being freed again in rvu_health_reporters_destroy(rvu) by rvu_nix_health_reporters_destroy. In the second calls of rvu_nix_health_reporters_destroy, however, it uses rvu_dl->rvu_nix_health_reporter, which is already freed at the end of rvu_nix_health_reporters_destroy in the first call. So this patch prevents the first destroy by instantly returning -ENONMEN when create_workqueue fails. In addition, since the failure of create_workqueue is the only entrence of label err, it has been integrated into the error-handling path of create_workqueue. Fixes: 5ed66306eab6 ("octeontx2-af: Add devlink health reporters for NIX") Signed-off-by: Zhipeng Lu Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 62780d8b4f9a..21b5d71c1e37 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -538,7 +538,7 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) rvu_dl->devlink_wq = create_workqueue("rvu_devlink_wq"); if (!rvu_dl->devlink_wq) - goto err; + return -ENOMEM; INIT_WORK(&rvu_reporters->intr_work, rvu_nix_intr_work); INIT_WORK(&rvu_reporters->gen_work, rvu_nix_gen_work); @@ -546,9 +546,6 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) INIT_WORK(&rvu_reporters->ras_work, rvu_nix_ras_work); return 0; -err: - rvu_nix_health_reporters_destroy(rvu_dl); - return -ENOMEM; } static int rvu_nix_health_reporters_create(struct rvu_devlink *rvu_dl) -- cgit From a66ff26b0f31189e413a87065c25949c359e4bef Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Dec 2023 15:23:27 -0500 Subject: bcachefs: Close journal entry if necessary when flushing all pins Since outstanding journal buffers hold a journal pin, when flushing all pins we need to close the current journal entry if necessary so its pin can be released. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 8 ++++---- fs/bcachefs/journal.h | 1 + fs/bcachefs/journal_io.c | 1 + fs/bcachefs/journal_reclaim.c | 3 +++ 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 489b34046e78..8cf238be6213 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -249,7 +249,7 @@ static bool journal_entry_want_write(struct journal *j) return ret; } -static bool journal_entry_close(struct journal *j) +bool bch2_journal_entry_close(struct journal *j) { bool ret; @@ -383,7 +383,7 @@ static bool journal_quiesced(struct journal *j) bool ret = atomic64_read(&j->seq) == j->seq_ondisk; if (!ret) - journal_entry_close(j); + bch2_journal_entry_close(j); return ret; } @@ -436,7 +436,7 @@ retry: /* * Recheck after taking the lock, so we don't race with another thread - * that just did journal_entry_open() and call journal_entry_close() + * that just did journal_entry_open() and call bch2_journal_entry_close() * unnecessarily */ if (journal_res_get_fast(j, res, flags)) { @@ -1041,7 +1041,7 @@ void bch2_fs_journal_stop(struct journal *j) bch2_journal_reclaim_stop(j); bch2_journal_flush_all_pins(j); - wait_event(j->wait, journal_entry_close(j)); + wait_event(j->wait, bch2_journal_entry_close(j)); /* * Always write a new journal entry, to make sure the clock hands are up diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 4c513fca5ef2..2f768e11aec9 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -266,6 +266,7 @@ static inline union journal_res_state journal_state_buf_put(struct journal *j, u return s; } +bool bch2_journal_entry_close(struct journal *); void bch2_journal_buf_put_final(struct journal *, u64, bool); static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 0f17fc5f8d68..5de1b68fb8af 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1599,6 +1599,7 @@ static CLOSURE_CALLBACK(journal_write_done) } while ((v = atomic64_cmpxchg(&j->reservations.counter, old.v, new.v)) != old.v); + bch2_journal_reclaim_fast(j); bch2_journal_space_available(j); closure_wake_up(&w->wait); diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index e63c6eda86af..ec712104addb 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -776,6 +776,9 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, (1U << JOURNAL_PIN_btree), 0, 0, 0)) *did_work = true; + if (seq_to_flush > journal_cur_seq(j)) + bch2_journal_entry_close(j); + spin_lock(&j->lock); /* * If journal replay hasn't completed, the unreplayed journal entries -- cgit From a39b6ac3781d46ba18193c9dbb2110f31e9bffe9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 Dec 2023 14:33:40 -0800 Subject: Linux 6.7-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 511b5616aa41..70fc4c11dfc0 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- cgit From 778dfacc903d4b1ef5b7a9726e3a36bc15913d29 Mon Sep 17 00:00:00 2001 From: Guanjun Date: Mon, 11 Dec 2023 13:37:03 +0800 Subject: dmaengine: idxd: Protect int_handle field in hw descriptor The int_handle field in hw descriptor should also be protected by wmb() before possibly triggering a DMA read. Fixes: eb0cf33a91b4 (dmaengine: idxd: move interrupt handle assignment) Signed-off-by: Guanjun Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Reviewed-by: Lijun Pan Link: https://lore.kernel.org/r/20231211053704.2725417-2-guanjun@linux.alibaba.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/submit.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index c01db23e3333..3f922518e3a5 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -182,13 +182,6 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) portal = idxd_wq_portal_addr(wq); - /* - * The wmb() flushes writes to coherent DMA data before - * possibly triggering a DMA read. The wmb() is necessary - * even on UP because the recipient is a device. - */ - wmb(); - /* * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. @@ -199,6 +192,13 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) llist_add(&desc->llnode, &ie->pending_llist); } + /* + * The wmb() flushes writes to coherent DMA data before + * possibly triggering a DMA read. The wmb() is necessary + * even on UP because the recipient is a device. + */ + wmb(); + if (wq_dedicated(wq)) { iosubmit_cmds512(portal, desc->hw, 1); } else { -- cgit From 0c154698a0fc32957d00c6009d5389e086dc8acf Mon Sep 17 00:00:00 2001 From: Guanjun Date: Mon, 11 Dec 2023 13:37:04 +0800 Subject: dmaengine: idxd: Fix incorrect descriptions for GRPCFG register Fix incorrect descriptions for the GRPCFG register which has three sub-registers (GRPWQCFG, GRPENGCFG and GRPFLGCFG). No functional changes Signed-off-by: Guanjun Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Acked-by: Lijun Pan Link: https://lore.kernel.org/r/20231211053704.2725417-3-guanjun@linux.alibaba.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/registers.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 7b54a3939ea1..315c004f58e4 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -440,12 +440,14 @@ union wqcfg { /* * This macro calculates the offset into the GRPCFG register * idxd - struct idxd * - * n - wq id - * ofs - the index of the 32b dword for the config register + * n - group id + * ofs - the index of the 64b qword for the config register * - * The WQCFG register block is divided into groups per each wq. The n index - * allows us to move to the register group that's for that particular wq. - * Each register is 32bits. The ofs gives us the number of register to access. + * The GRPCFG register block is divided into three sub-registers, which + * are GRPWQCFG, GRPENGCFG and GRPFLGCFG. The n index allows us to move + * to the register block that contains the three sub-registers. + * Each register block is 64bits. And the ofs gives us the offset + * within the GRPWQCFG register to access. */ #define GRPWQCFG_OFFSET(idxd_dev, n, ofs) ((idxd_dev)->grpcfg_offset +\ (n) * GRPCFG_SIZE + sizeof(u64) * (ofs)) -- cgit From 33071422714a4c9587753b0ccc130ca59323bf42 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Mon, 11 Dec 2023 00:37:33 +0100 Subject: ALSA: hda/tas2781: handle missing EFI calibration data The code does not properly check whether the calibration variable is available in the EFI. If it is not available, it causes a NULL pointer dereference. Check the return value of the first get_variable call also. BUG: kernel NULL pointer dereference, address: 0000000000000000 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? srso_alias_return_thunk+0x5/0x7f ? schedule+0x5e/0xd0 ? exc_page_fault+0x7f/0x180 ? asm_exc_page_fault+0x26/0x30 ? crc32_body+0x2c/0x120 ? tas2781_save_calibration+0xe4/0x220 [snd_hda_scodec_tas2781_i2c] tasdev_fw_ready+0x1af/0x280 [snd_hda_scodec_tas2781_i2c] request_firmware_work_func+0x59/0xa0 Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") CC: stable@vger.kernel.org Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/f1f6583bda918f78556f67d522ca7b3b91cebbd5.1702251102.git.soyer@irl.hu Signed-off-by: Takashi Iwai --- sound/pci/hda/tas2781_hda_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index b42837105c22..d3dafc9d150b 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -455,9 +455,9 @@ static int tas2781_save_calibration(struct tasdevice_priv *tas_priv) status = efi.get_variable(efi_name, &efi_guid, &attr, &tas_priv->cali_data.total_sz, tas_priv->cali_data.data); - if (status != EFI_SUCCESS) - return -EINVAL; } + if (status != EFI_SUCCESS) + return -EINVAL; tmp_val = (unsigned int *)tas_priv->cali_data.data; -- cgit From 35c49cfc8b702eda7a0d3f05497b16f81b69e289 Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Mon, 4 Dec 2023 13:23:31 +0100 Subject: accel/ivpu/37xx: Fix interrupt_clear_with_0 WA initialization Using PCI Device ID/Revision to initialize the interrupt_clear_with_0 workaround is problematic - there are many pre-production steppings with different behavior, even with the same PCI ID/Revision Instead of checking for PCI Device ID/Revision, check the VPU buttress interrupt status register behavior - if this register is not zero after writing 1s it means there register is RW instead of RW1C and we need to enable the interrupt_clear_with_0 workaround. Fixes: 7f34e01f77f8 ("accel/ivpu: Clear specific interrupt status bits on C0") Signed-off-by: Andrzej Kacprowski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://lore.kernel.org/all/20231204122331.40560-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_37xx.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index 4ccf1994b97a..d530384f8d60 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -53,10 +53,12 @@ #define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK) -#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE)) | \ - (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ +#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, UFI_ERR))) +#define BUTTRESS_ALL_IRQ_MASK (BUTTRESS_IRQ_MASK | \ + (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE))) + #define BUTTRESS_IRQ_ENABLE_MASK ((u32)~BUTTRESS_IRQ_MASK) #define BUTTRESS_IRQ_DISABLE_MASK ((u32)-1) @@ -74,8 +76,12 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev) vdev->wa.clear_runtime_mem = false; vdev->wa.d3hot_after_power_off = true; - if (ivpu_device_id(vdev) == PCI_DEVICE_ID_MTL && ivpu_revision(vdev) < 4) + REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, BUTTRESS_ALL_IRQ_MASK); + if (REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) == BUTTRESS_ALL_IRQ_MASK) { + /* Writing 1s does not clear the interrupt status register */ vdev->wa.interrupt_clear_with_0 = true; + REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, 0x0); + } IVPU_PRINT_WA(punit_disabled); IVPU_PRINT_WA(clear_runtime_mem); -- cgit From 125f1c7f26ffcdbf96177abe75b70c1a6ceb17bc Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 5 Dec 2023 18:25:54 +0100 Subject: net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table The referenced change added custom cleanup code to act_ct to delete any callbacks registered on the parent block when deleting the tcf_ct_flow_table instance. However, the underlying issue is that the drivers don't obtain the reference to the tcf_ct_flow_table instance when registering callbacks which means that not only driver callbacks may still be on the table when deleting it but also that the driver can still have pointers to its internal nf_flowtable and can use it concurrently which results either warning in netfilter[0] or use-after-free. Fix the issue by taking a reference to the underlying struct tcf_ct_flow_table instance when registering the callback and release the reference when unregistering. Expose new API required for such reference counting by adding two new callbacks to nf_flowtable_type and implementing them for act_ct flowtable_ct type. This fixes the issue by extending the lifetime of nf_flowtable until all users have unregistered. [0]: [106170.938634] ------------[ cut here ]------------ [106170.939111] WARNING: CPU: 21 PID: 3688 at include/net/netfilter/nf_flow_table.h:262 mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] [106170.940108] Modules linked in: act_ct nf_flow_table act_mirred act_skbedit act_tunnel_key vxlan cls_matchall nfnetlink_cttimeout act_gact cls_flower sch_ingress mlx5_vdpa vringh vhost_iotlb vdpa bonding openvswitch nsh rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_regis try overlay mlx5_core [106170.943496] CPU: 21 PID: 3688 Comm: kworker/u48:0 Not tainted 6.6.0-rc7_for_upstream_min_debug_2023_11_01_13_02 #1 [106170.944361] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [106170.945292] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core] [106170.945846] RIP: 0010:mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] [106170.946413] Code: 89 ef 48 83 05 71 a4 14 00 01 e8 f4 06 04 e1 48 83 05 6c a4 14 00 01 48 83 c4 28 5b 5d 41 5c 41 5d c3 48 83 05 d1 8b 14 00 01 <0f> 0b 48 83 05 d7 8b 14 00 01 e9 96 fe ff ff 48 83 05 a2 90 14 00 [106170.947924] RSP: 0018:ffff88813ff0fcb8 EFLAGS: 00010202 [106170.948397] RAX: 0000000000000000 RBX: ffff88811eabac40 RCX: ffff88811eabad48 [106170.949040] RDX: ffff88811eab8000 RSI: ffffffffa02cd560 RDI: 0000000000000000 [106170.949679] RBP: ffff88811eab8000 R08: 0000000000000001 R09: ffffffffa0229700 [106170.950317] R10: ffff888103538fc0 R11: 0000000000000001 R12: ffff88811eabad58 [106170.950969] R13: ffff888110c01c00 R14: ffff888106b40000 R15: 0000000000000000 [106170.951616] FS: 0000000000000000(0000) GS:ffff88885fd40000(0000) knlGS:0000000000000000 [106170.952329] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [106170.952834] CR2: 00007f1cefd28cb0 CR3: 000000012181b006 CR4: 0000000000370ea0 [106170.953482] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [106170.954121] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [106170.954766] Call Trace: [106170.955057] [106170.955315] ? __warn+0x79/0x120 [106170.955648] ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] [106170.956172] ? report_bug+0x17c/0x190 [106170.956537] ? handle_bug+0x3c/0x60 [106170.956891] ? exc_invalid_op+0x14/0x70 [106170.957264] ? asm_exc_invalid_op+0x16/0x20 [106170.957666] ? mlx5_del_flow_rules+0x10/0x310 [mlx5_core] [106170.958172] ? mlx5_tc_ct_block_flow_offload_add+0x1240/0x1240 [mlx5_core] [106170.958788] ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] [106170.959339] ? mlx5_tc_ct_del_ft_cb+0xc6/0x2b0 [mlx5_core] [106170.959854] ? mapping_remove+0x154/0x1d0 [mlx5_core] [106170.960342] ? mlx5e_tc_action_miss_mapping_put+0x4f/0x80 [mlx5_core] [106170.960927] mlx5_tc_ct_delete_flow+0x76/0xc0 [mlx5_core] [106170.961441] mlx5_free_flow_attr_actions+0x13b/0x220 [mlx5_core] [106170.962001] mlx5e_tc_del_fdb_flow+0x22c/0x3b0 [mlx5_core] [106170.962524] mlx5e_tc_del_flow+0x95/0x3c0 [mlx5_core] [106170.963034] mlx5e_flow_put+0x73/0xe0 [mlx5_core] [106170.963506] mlx5e_put_flow_list+0x38/0x70 [mlx5_core] [106170.964002] mlx5e_rep_update_flows+0xec/0x290 [mlx5_core] [106170.964525] mlx5e_rep_neigh_update+0x1da/0x310 [mlx5_core] [106170.965056] process_one_work+0x13a/0x2c0 [106170.965443] worker_thread+0x2e5/0x3f0 [106170.965808] ? rescuer_thread+0x410/0x410 [106170.966192] kthread+0xc6/0xf0 [106170.966515] ? kthread_complete_and_exit+0x20/0x20 [106170.966970] ret_from_fork+0x2d/0x50 [106170.967332] ? kthread_complete_and_exit+0x20/0x20 [106170.967774] ret_from_fork_asm+0x11/0x20 [106170.970466] [106170.970726] ---[ end trace 0000000000000000 ]--- Fixes: 77ac5e40c44e ("net/sched: act_ct: remove and free nf_table callbacks") Signed-off-by: Vlad Buslov Reviewed-by: Paul Blakey Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/net/netfilter/nf_flow_table.h | 10 ++++++++++ net/sched/act_ct.c | 34 ++++++++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index fe1507c1db82..692d5955911c 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -62,6 +62,8 @@ struct nf_flowtable_type { enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule); void (*free)(struct nf_flowtable *ft); + void (*get)(struct nf_flowtable *ft); + void (*put)(struct nf_flowtable *ft); nf_hookfn *hook; struct module *owner; }; @@ -240,6 +242,11 @@ nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, } list_add_tail(&block_cb->list, &block->cb_list); + up_write(&flow_table->flow_block_lock); + + if (flow_table->type->get) + flow_table->type->get(flow_table); + return 0; unlock: up_write(&flow_table->flow_block_lock); @@ -262,6 +269,9 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, WARN_ON(true); } up_write(&flow_table->flow_block_lock); + + if (flow_table->type->put) + flow_table->type->put(flow_table); } void flow_offload_route_init(struct flow_offload *flow, diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index b3f4a503ee2b..f69c47945175 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -286,9 +286,31 @@ static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow) !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); } +static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft); + +static void tcf_ct_nf_get(struct nf_flowtable *ft) +{ + struct tcf_ct_flow_table *ct_ft = + container_of(ft, struct tcf_ct_flow_table, nf_ft); + + tcf_ct_flow_table_get_ref(ct_ft); +} + +static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft); + +static void tcf_ct_nf_put(struct nf_flowtable *ft) +{ + struct tcf_ct_flow_table *ct_ft = + container_of(ft, struct tcf_ct_flow_table, nf_ft); + + tcf_ct_flow_table_put(ct_ft); +} + static struct nf_flowtable_type flowtable_ct = { .gc = tcf_ct_flow_is_outdated, .action = tcf_ct_flow_table_fill_actions, + .get = tcf_ct_nf_get, + .put = tcf_ct_nf_put, .owner = THIS_MODULE, }; @@ -337,9 +359,13 @@ err_alloc: return err; } +static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft) +{ + refcount_inc(&ct_ft->ref); +} + static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) { - struct flow_block_cb *block_cb, *tmp_cb; struct tcf_ct_flow_table *ct_ft; struct flow_block *block; @@ -347,13 +373,9 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) rwork); nf_flow_table_free(&ct_ft->nf_ft); - /* Remove any remaining callbacks before cleanup */ block = &ct_ft->nf_ft.flow_block; down_write(&ct_ft->nf_ft.flow_block_lock); - list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) { - list_del(&block_cb->list); - flow_block_cb_free(block_cb); - } + WARN_ON(!list_empty(&block->cb_list)); up_write(&ct_ft->nf_ft.flow_block_lock); kfree(ct_ft); -- cgit From 284f717622417cb267e344a9174f8e5698d1e3c1 Mon Sep 17 00:00:00 2001 From: Shinas Rasheed Date: Thu, 7 Dec 2023 21:56:46 -0800 Subject: octeon_ep: explicitly test for firmware ready value The firmware ready value is 1, and get firmware ready status function should explicitly test for that value. The firmware ready value read will be 2 after driver load, and on unbind till firmware rewrites the firmware ready back to 0, the value seen by driver will be 2, which should be regarded as not ready. Fixes: 10c073e40469 ("octeon_ep: defer probe if firmware not ready") Signed-off-by: Shinas Rasheed Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index a2b5a3f8df93..a9bdf3283a85 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1265,7 +1265,8 @@ static bool get_fw_ready_status(struct pci_dev *pdev) pci_read_config_byte(pdev, (pos + 8), &status); dev_info(&pdev->dev, "Firmware ready status = %u\n", status); - return status; +#define FW_STATUS_READY 1ULL + return status == FW_STATUS_READY; } return false; } -- cgit From dbda436824ded8ef6a05bb82cd9baa8d42377a49 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 8 Dec 2023 12:26:09 +0530 Subject: octeontx2-pf: Fix promisc mcam entry action Current implementation is such that, promisc mcam entry action is set as multicast even when there are no trusted VFs. multicast action causes the hardware to copy packet data, which reduces the performance. This patch fixes this issue by setting the promisc mcam entry action to unicast instead of multicast when there are no trusted VFs. The same change is made for the 'allmulti' mcam entry action. Fixes: ffd2f89ad05c ("octeontx2-pf: Enable promisc/allmulti match MCAM entries.") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 25 +++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 0c17ebdda148..a57455aebff6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1650,6 +1650,21 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) mutex_unlock(&mbox->lock); } +static bool otx2_promisc_use_mce_list(struct otx2_nic *pfvf) +{ + int vf; + + /* The AF driver will determine whether to allow the VF netdev or not */ + if (is_otx2_vf(pfvf->pcifunc)) + return true; + + /* check if there are any trusted VFs associated with the PF netdev */ + for (vf = 0; vf < pci_num_vf(pfvf->pdev); vf++) + if (pfvf->vf_configs[vf].trusted) + return true; + return false; +} + static void otx2_do_set_rx_mode(struct otx2_nic *pf) { struct net_device *netdev = pf->netdev; @@ -1682,7 +1697,8 @@ static void otx2_do_set_rx_mode(struct otx2_nic *pf) if (netdev->flags & (IFF_ALLMULTI | IFF_MULTICAST)) req->mode |= NIX_RX_MODE_ALLMULTI; - req->mode |= NIX_RX_MODE_USE_MCE; + if (otx2_promisc_use_mce_list(pf)) + req->mode |= NIX_RX_MODE_USE_MCE; otx2_sync_mbox_msg(&pf->mbox); mutex_unlock(&pf->mbox.lock); @@ -2691,11 +2707,14 @@ static int otx2_ndo_set_vf_trust(struct net_device *netdev, int vf, pf->vf_configs[vf].trusted = enable; rc = otx2_set_vf_permissions(pf, vf, OTX2_TRUSTED_VF); - if (rc) + if (rc) { pf->vf_configs[vf].trusted = !enable; - else + } else { netdev_info(pf->netdev, "VF %d is %strusted\n", vf, enable ? "" : "not "); + otx2_set_rx_mode(netdev); + } + return rc; } -- cgit From 570ba37898ecd9069beb58bf0b6cf84daba6e0fe Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 8 Dec 2023 12:26:10 +0530 Subject: octeontx2-af: Update RSS algorithm index The RSS flow algorithm is not set up correctly for promiscuous or all multi MCAM entries. This has an impact on flow distribution. This patch fixes the issue by updating flow algorithm index in above mentioned MCAM entries. Fixes: 967db3529eca ("octeontx2-af: add support for multicast/promisc packet replication feature") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/rvu_npc.c | 55 +++++++++++++++++----- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index f65805860c8d..0bcf3e559280 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -671,6 +671,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, int blkaddr, ucast_idx, index; struct nix_rx_action action = { 0 }; u64 relaxed_mask; + u8 flow_key_alg; if (!hw->cap.nix_rx_multicast && is_cgx_vf(rvu, pcifunc)) return; @@ -701,6 +702,8 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, action.op = NIX_RX_ACTIONOP_UCAST; } + flow_key_alg = action.flow_key_alg; + /* RX_ACTION set to MCAST for CGX PF's */ if (hw->cap.nix_rx_multicast && pfvf->use_mce_list && is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc))) { @@ -740,7 +743,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, req.vf = pcifunc; req.index = action.index; req.match_id = action.match_id; - req.flow_key_alg = action.flow_key_alg; + req.flow_key_alg = flow_key_alg; rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); } @@ -854,6 +857,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u8 mac_addr[ETH_ALEN] = { 0 }; struct nix_rx_action action = { 0 }; struct rvu_pfvf *pfvf; + u8 flow_key_alg; u16 vf_func; /* Only CGX PF/VF can add allmulticast entry */ @@ -888,6 +892,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, *(u64 *)&action = npc_get_mcam_action(rvu, mcam, blkaddr, ucast_idx); + flow_key_alg = action.flow_key_alg; if (action.op != NIX_RX_ACTIONOP_RSS) { *(u64 *)&action = 0; action.op = NIX_RX_ACTIONOP_UCAST; @@ -924,7 +929,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, req.vf = pcifunc | vf_func; req.index = action.index; req.match_id = action.match_id; - req.flow_key_alg = action.flow_key_alg; + req.flow_key_alg = flow_key_alg; rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); } @@ -990,11 +995,38 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam, mutex_unlock(&mcam->lock); } +static void npc_update_rx_action_with_alg_idx(struct rvu *rvu, struct nix_rx_action action, + struct rvu_pfvf *pfvf, int mcam_index, int blkaddr, + int alg_idx) + +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + struct rvu_hwinfo *hw = rvu->hw; + int bank, op_rss; + + if (!is_mcam_entry_enabled(rvu, mcam, blkaddr, mcam_index)) + return; + + op_rss = (!hw->cap.nix_rx_multicast || !pfvf->use_mce_list); + + bank = npc_get_bank(mcam, mcam_index); + mcam_index &= (mcam->banksize - 1); + + /* If Rx action is MCAST update only RSS algorithm index */ + if (!op_rss) { + *(u64 *)&action = rvu_read64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_ACTION(mcam_index, bank)); + + action.flow_key_alg = alg_idx; + } + rvu_write64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_ACTION(mcam_index, bank), *(u64 *)&action); +} + void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, int group, int alg_idx, int mcam_index) { struct npc_mcam *mcam = &rvu->hw->mcam; - struct rvu_hwinfo *hw = rvu->hw; struct nix_rx_action action; int blkaddr, index, bank; struct rvu_pfvf *pfvf; @@ -1050,15 +1082,16 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, /* If PF's promiscuous entry is enabled, * Set RSS action for that entry as well */ - if ((!hw->cap.nix_rx_multicast || !pfvf->use_mce_list) && - is_mcam_entry_enabled(rvu, mcam, blkaddr, index)) { - bank = npc_get_bank(mcam, index); - index &= (mcam->banksize - 1); + npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, blkaddr, + alg_idx); - rvu_write64(rvu, blkaddr, - NPC_AF_MCAMEX_BANKX_ACTION(index, bank), - *(u64 *)&action); - } + index = npc_get_nixlf_mcam_index(mcam, pcifunc, + nixlf, NIXLF_ALLMULTI_ENTRY); + /* If PF's allmulti entry is enabled, + * Set RSS action for that entry as well + */ + npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, blkaddr, + alg_idx); } void npc_enadis_default_mce_entry(struct rvu *rvu, u16 pcifunc, -- cgit From 271f2a4a9576b87ed1f8584909d6d270039e52ea Mon Sep 17 00:00:00 2001 From: Wang Yao Date: Wed, 6 Dec 2023 08:24:27 +0800 Subject: efi/loongarch: Use load address to calculate kernel entry address The efi_relocate_kernel() may load the PIE kernel to anywhere, the loaded address may not be equal to link address or EFI_KIMG_PREFERRED_ADDRESS. Acked-by: Huacai Chen Signed-off-by: Wang Yao Signed-off-by: Ard Biesheuvel --- arch/loongarch/include/asm/efi.h | 2 +- drivers/firmware/efi/libstub/loongarch-stub.c | 4 ++-- drivers/firmware/efi/libstub/loongarch.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h index 091897d40b03..91d81f9730ab 100644 --- a/arch/loongarch/include/asm/efi.h +++ b/arch/loongarch/include/asm/efi.h @@ -32,6 +32,6 @@ static inline unsigned long efi_get_kimg_min_align(void) #define EFI_KIMG_PREFERRED_ADDRESS PHYSADDR(VMLINUX_LOAD_ADDRESS) -unsigned long kernel_entry_address(void); +unsigned long kernel_entry_address(unsigned long kernel_addr); #endif /* _ASM_LOONGARCH_EFI_H */ diff --git a/drivers/firmware/efi/libstub/loongarch-stub.c b/drivers/firmware/efi/libstub/loongarch-stub.c index 72c71ae201f0..d6ec5d4b8dbe 100644 --- a/drivers/firmware/efi/libstub/loongarch-stub.c +++ b/drivers/firmware/efi/libstub/loongarch-stub.c @@ -35,9 +35,9 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, return status; } -unsigned long kernel_entry_address(void) +unsigned long kernel_entry_address(unsigned long kernel_addr) { unsigned long base = (unsigned long)&kernel_offset - kernel_offset; - return (unsigned long)&kernel_entry - base + VMLINUX_LOAD_ADDRESS; + return (unsigned long)&kernel_entry - base + kernel_addr; } diff --git a/drivers/firmware/efi/libstub/loongarch.c b/drivers/firmware/efi/libstub/loongarch.c index 807cba2693fc..0e0aa6cda73f 100644 --- a/drivers/firmware/efi/libstub/loongarch.c +++ b/drivers/firmware/efi/libstub/loongarch.c @@ -37,9 +37,9 @@ static efi_status_t exit_boot_func(struct efi_boot_memmap *map, void *priv) return EFI_SUCCESS; } -unsigned long __weak kernel_entry_address(void) +unsigned long __weak kernel_entry_address(unsigned long kernel_addr) { - return *(unsigned long *)(PHYSADDR(VMLINUX_LOAD_ADDRESS) + 8); + return *(unsigned long *)(kernel_addr + 8) - VMLINUX_LOAD_ADDRESS + kernel_addr; } efi_status_t efi_boot_kernel(void *handle, efi_loaded_image_t *image, @@ -73,7 +73,7 @@ efi_status_t efi_boot_kernel(void *handle, efi_loaded_image_t *image, csr_write64(CSR_DMW0_INIT, LOONGARCH_CSR_DMWIN0); csr_write64(CSR_DMW1_INIT, LOONGARCH_CSR_DMWIN1); - real_kernel_entry = (void *)kernel_entry_address(); + real_kernel_entry = (void *)kernel_entry_address(kernel_addr); real_kernel_entry(true, (unsigned long)cmdline_ptr, (unsigned long)efi_system_table); -- cgit From e307b5a845c5951dabafc48d00b6424ee64716c4 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 8 Dec 2023 14:57:54 +0530 Subject: octeontx2-af: Fix pause frame configuration The current implementation's default Pause Forward setting is causing unnecessary network traffic. This patch disables Pause Forward to address this issue. Fixes: 1121f6b02e7a ("octeontx2-af: Priority flow control configuration support") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index af21e2030cff..4728ba34b0e3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -373,6 +373,11 @@ void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable) cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + /* Disable forward pause to driver */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + /* Enable channel mask for all LMACS */ if (is_dev_rpm2(rpm)) rpm_write(rpm, lmac_id, RPM2_CMR_CHAN_MSK_OR, 0xffff); @@ -616,12 +621,10 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 p if (rx_pause) { cfg &= ~(RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE | - RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE | - RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD); + RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE); } else { cfg |= (RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE | - RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE | - RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD); + RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE); } if (tx_pause) { -- cgit From 7c7c863bf89c5f76d8c7fda177a81559b61dc15b Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 1 Dec 2023 12:21:08 +0000 Subject: drm/i915/selftests: Fix engine reset count storage for multi-tile Engine->id namespace is per-tile so struct igt_live_test->reset_engine[] needs to be two-dimensional so engine reset counts from all tiles can be stored with no aliasing. With aliasing, if we had a real multi-tile platform, the reset counts would be incorrect for same engine instance on different tiles. Signed-off-by: Tvrtko Ursulin Fixes: 0c29efa23f5c ("drm/i915/selftests: Consider multi-gt instead of to_gt()") Reported-by: Alan Previn Teres Alexis Cc: Tejas Upadhyay Cc: Andi Shyti Cc: Daniele Ceraolo Spurio Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231201122109.729006-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 0647ece3819b018cb62a71c3bcb7c2c3243e78ac) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/selftests/igt_live_test.c | 9 +++++---- drivers/gpu/drm/i915/selftests/igt_live_test.h | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 4ddc6d902752..7d41874a49c5 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -37,8 +37,9 @@ int igt_live_test_begin(struct igt_live_test *t, } for_each_engine(engine, gt, id) - t->reset_engine[id] = - i915_reset_engine_count(&i915->gpu_error, engine); + t->reset_engine[i][id] = + i915_reset_engine_count(&i915->gpu_error, + engine); } t->reset_global = i915_reset_count(&i915->gpu_error); @@ -66,14 +67,14 @@ int igt_live_test_end(struct igt_live_test *t) for_each_gt(gt, i915, i) { for_each_engine(engine, gt, id) { - if (t->reset_engine[id] == + if (t->reset_engine[i][id] == i915_reset_engine_count(&i915->gpu_error, engine)) continue; gt_err(gt, "%s(%s): engine '%s' was reset %d times!\n", t->func, t->name, engine->name, i915_reset_engine_count(&i915->gpu_error, engine) - - t->reset_engine[id]); + t->reset_engine[i][id]); return -EIO; } } diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h index 36ed42736c52..83e3ad430922 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.h +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h @@ -7,6 +7,7 @@ #ifndef IGT_LIVE_TEST_H #define IGT_LIVE_TEST_H +#include "gt/intel_gt_defines.h" /* for I915_MAX_GT */ #include "gt/intel_engine.h" /* for I915_NUM_ENGINES */ struct drm_i915_private; @@ -17,7 +18,7 @@ struct igt_live_test { const char *name; unsigned int reset_global; - unsigned int reset_engine[I915_NUM_ENGINES]; + unsigned int reset_engine[I915_MAX_GT][I915_NUM_ENGINES]; }; /* -- cgit From 1f721a93a528268fa97875cff515d1fcb69f4f44 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 1 Dec 2023 12:21:09 +0000 Subject: drm/i915: Use internal class when counting engine resets Commit 503579448db9 ("drm/i915/gsc: Mark internal GSC engine with reserved uabi class") made the GSC0 engine not have a valid uabi class and so broke the engine reset counting, which in turn was made class based in cb823ed9915b ("drm/i915/gt: Use intel_gt as the primary object for handling resets"). Despite the title and commit text of the latter is not mentioning it (and has left the storage array incorrectly sized), tracking by class, despite it adding aliasing in hypthotetical multi-tile systems, is handy for virtual engines which for instance do not have a valid engine->id. Therefore we keep that but just change it to use the internal class which is always valid. We also add a helper to increment the count, which aligns with the existing getter. What was broken without this fix were out of bounds reads every time a reset would happen on the GSC0 engine, or during selftests when storing and cross-checking the counts in igt_live_test_begin and igt_live_test_end. Signed-off-by: Tvrtko Ursulin Fixes: 503579448db9 ("drm/i915/gsc: Mark internal GSC engine with reserved uabi class") [tursulin: fixed Fixes tag] Reported-by: Alan Previn Teres Alexis Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20231201122109.729006-2-tvrtko.ursulin@linux.intel.com (cherry picked from commit cf9cb028ac56696ff879af1154c4b2f0b12701fd) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 5 +++-- drivers/gpu/drm/i915/i915_gpu_error.h | 12 ++++++++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index d5ed904f355d..6801f8b95c53 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1293,7 +1293,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) if (msg) drm_notice(&engine->i915->drm, "Resetting %s for %s\n", engine->name, msg); - atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); + i915_increase_reset_engine_count(&engine->i915->gpu_error, engine); ret = intel_gt_reset_engine(engine); if (ret) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index d37698bd6b91..17df71117cc7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -5001,7 +5001,8 @@ static void capture_error_state(struct intel_guc *guc, if (match) { intel_engine_set_hung_context(e, ce); engine_mask |= e->mask; - atomic_inc(&i915->gpu_error.reset_engine_count[e->uabi_class]); + i915_increase_reset_engine_count(&i915->gpu_error, + e); } } @@ -5013,7 +5014,7 @@ static void capture_error_state(struct intel_guc *guc, } else { intel_engine_set_hung_context(ce->engine, ce); engine_mask = ce->engine->mask; - atomic_inc(&i915->gpu_error.reset_engine_count[ce->engine->uabi_class]); + i915_increase_reset_engine_count(&i915->gpu_error, ce->engine); } with_intel_runtime_pm(&i915->runtime_pm, wakeref) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 9f5971f5e980..48f6c00402c4 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -16,6 +16,7 @@ #include "display/intel_display_device.h" #include "gt/intel_engine.h" +#include "gt/intel_engine_types.h" #include "gt/intel_gt_types.h" #include "gt/uc/intel_uc_fw.h" @@ -232,7 +233,7 @@ struct i915_gpu_error { atomic_t reset_count; /** Number of times an engine has been reset */ - atomic_t reset_engine_count[I915_NUM_ENGINES]; + atomic_t reset_engine_count[MAX_ENGINE_CLASS]; }; struct drm_i915_error_state_buf { @@ -255,7 +256,14 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, const struct intel_engine_cs *engine) { - return atomic_read(&error->reset_engine_count[engine->uabi_class]); + return atomic_read(&error->reset_engine_count[engine->class]); +} + +static inline void +i915_increase_reset_engine_count(struct i915_gpu_error *error, + const struct intel_engine_cs *engine) +{ + atomic_inc(&error->reset_engine_count[engine->class]); } #define CORE_DUMP_FLAG_NONE 0x0 -- cgit From 0ccd963fe555451b1f84e6d14d2b3ef03dd5c947 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 5 Dec 2023 20:03:08 +0200 Subject: drm/i915: Fix remapped stride with CCS on ADL+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ADL+ the hardware automagically calculates the CCS AUX surface stride from the main surface stride, so when remapping we can't really play a lot of tricks with the main surface stride, or else the AUX surface stride would get miscalculated and no longer match the actual data layout in memory. Supposedly we could remap in 256 main surface tile units (AUX page(4096)/cachline(64)*4(4x1 main surface tiles per AUX cacheline)=256 main surface tiles), but the extra complexity is probably not worth the hassle. So let's just make sure our mapping stride is calculated from the full framebuffer stride (instead of the framebuffer width). This way the stride we program into PLANE_STRIDE will be the original framebuffer stride, and thus there will be no change to the AUX stride/layout. Cc: stable@vger.kernel.org Cc: Imre Deak Cc: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231205180308.7505-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit 2c12eb36f849256f5eb00ffaee9bf99396fd3814) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fb.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 19b35ece31f1..0dfc1b06255a 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1501,8 +1501,20 @@ static u32 calc_plane_remap_info(const struct intel_framebuffer *fb, int color_p size += remap_info->size; } else { - unsigned int dst_stride = plane_view_dst_stride_tiles(fb, color_plane, - remap_info->width); + unsigned int dst_stride; + + /* + * The hardware automagically calculates the CCS AUX surface + * stride from the main surface stride so can't really remap a + * smaller subset (unless we'd remap in whole AUX page units). + */ + if (intel_fb_needs_pot_stride_remap(fb) && + intel_fb_is_ccs_modifier(fb->base.modifier)) + dst_stride = remap_info->src_stride; + else + dst_stride = remap_info->width; + + dst_stride = plane_view_dst_stride_tiles(fb, color_plane, dst_stride); assign_chk_ovf(i915, remap_info->dst_stride, dst_stride); color_plane_info->mapping_stride = dst_stride * -- cgit From c3070f080f9ba18dea92eaa21730f7ab85b5c8f4 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 7 Dec 2023 21:34:34 +0200 Subject: drm/i915: Fix intel_atomic_setup_scalers() plane_state handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the plane_state variable is declared outside the scaler_users loop in intel_atomic_setup_scalers(), and it's never reset back to NULL inside the loop we may end up calling intel_atomic_setup_scaler() with a non-NULL plane state for the pipe scaling case. That is bad because intel_atomic_setup_scaler() determines whether we are doing plane scaling or pipe scaling based on plane_state!=NULL. The end result is that we may miscalculate the scaler mode for pipe scaling. The hardware becomes somewhat upset if we end up in this situation when scanning out a planar format on a SDR plane. We end up programming the pipe scaler into planar mode as well, and the result is a screenfull of garbage. Fix the situation by making sure we pass the correct plane_state==NULL when calculating the scaler mode for pipe scaling. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit e81144106e21271c619f0c722a09e27ccb8c043d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/skl_scaler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index 1e7c97243fcf..8a934bada624 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -504,7 +504,6 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, { struct drm_plane *plane = NULL; struct intel_plane *intel_plane; - struct intel_plane_state *plane_state = NULL; struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; struct drm_atomic_state *drm_state = crtc_state->uapi.state; @@ -536,6 +535,7 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, /* walkthrough scaler_users bits and start assigning scalers */ for (i = 0; i < sizeof(scaler_state->scaler_users) * 8; i++) { + struct intel_plane_state *plane_state = NULL; int *scaler_id; const char *name; int idx, ret; -- cgit From 324b70e997aab0a7deab8cb90711faccda4e98c8 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 4 Dec 2023 22:24:43 +0200 Subject: drm/i915: Fix ADL+ tiled plane stride when the POT stride is smaller than the original MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit plane_view_scanout_stride() currently assumes that we had to pad the mapping stride with dummy pages in order to align it. But that is not the case if the original fb stride exceeds the aligned stride used to populate the remapped view, which is calculated from the user specified framebuffer width rather than the user specified framebuffer stride. Ignore the original fb stride in this case and just stick to the POT aligned stride. Getting this wrong will cause the plane to fetch the wrong data, and can lead to fault errors if the page tables at the bogus location aren't even populated. TODO: figure out if this is OK for CCS, or if we should instead increase the width of the view to cover the entire user specified fb stride instead... Cc: Imre Deak Cc: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231204202443.31247-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak Reviewed-by: Juha-Pekka Heikkila (cherry picked from commit 01a39f1c4f1220a4e6a25729fae87ff5794cbc52) Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 0dfc1b06255a..646f367a13f5 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1374,7 +1374,8 @@ plane_view_scanout_stride(const struct intel_framebuffer *fb, int color_plane, struct drm_i915_private *i915 = to_i915(fb->base.dev); unsigned int stride_tiles; - if (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) + if ((IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) && + src_stride_tiles < dst_stride_tiles) stride_tiles = src_stride_tiles; else stride_tiles = dst_stride_tiles; -- cgit From e6861d8264cd43c5eb20196e53df36fd71ec5698 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 5 Dec 2023 20:05:51 +0200 Subject: drm/i915/edp: don't write to DP_LINK_BW_SET when using rate select MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The eDP 1.5 spec adds a clarification for eDP 1.4x: > For eDP v1.4x, if the Source device chooses the Main-Link rate by way > of DPCD 00100h, the Sink device shall ignore DPCD 00115h[2:0]. We write 0 to DP_LINK_BW_SET (DPCD 100h) even when using DP_LINK_RATE_SET (DPCD 114h). Stop doing that, as it can cause the panel to ignore the rate set method. Moreover, 0 is a reserved value for DP_LINK_BW_SET, and should not be used. v2: Improve the comments (Ville) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9081 Tested-by: Animesh Manna Reviewed-by: Uma Shankar Cc: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231205180551.2476228-1-jani.nikula@intel.com (cherry picked from commit 23b392b94acb0499f69706c5808c099f590ebcf4) Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula --- .../gpu/drm/i915/display/intel_dp_link_training.c | 31 +++++++++++++++------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index dbc1b66c8ee4..1abfafbbfa75 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -650,19 +650,30 @@ intel_dp_update_link_bw_set(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, u8 link_bw, u8 rate_select) { - u8 link_config[2]; + u8 lane_count = crtc_state->lane_count; - /* Write the link configuration data */ - link_config[0] = link_bw; - link_config[1] = crtc_state->lane_count; if (crtc_state->enhanced_framing) - link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; - drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2); + lane_count |= DP_LANE_COUNT_ENHANCED_FRAME_EN; + + if (link_bw) { + /* DP and eDP v1.3 and earlier link bw set method. */ + u8 link_config[] = { link_bw, lane_count }; - /* eDP 1.4 rate select method. */ - if (!link_bw) - drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET, - &rate_select, 1); + drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, + ARRAY_SIZE(link_config)); + } else { + /* + * eDP v1.4 and later link rate set method. + * + * eDP v1.4x sinks shall ignore DP_LINK_RATE_SET if + * DP_LINK_BW_SET is set. Avoid writing DP_LINK_BW_SET. + * + * eDP v1.5 sinks allow choosing either, and the last choice + * shall be active. + */ + drm_dp_dpcd_writeb(&intel_dp->aux, DP_LANE_COUNT_SET, lane_count); + drm_dp_dpcd_writeb(&intel_dp->aux, DP_LINK_RATE_SET, rate_select); + } } /* -- cgit From bffa7218dcddb80e7f18dfa545dd4b359b11dd93 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 29 Nov 2023 17:00:00 +0800 Subject: dmaengine: fsl-edma: fix wrong pointer check in fsl_edma3_attach_pd() device_link_add() returns NULL pointer not PTR_ERR() when it fails, so replace the IS_ERR() check with NULL pointer check. Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20231129090000.841440-1-yangyingliang@huaweicloud.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index aea7a703dda7..238a69bd0d6f 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -396,9 +396,8 @@ static int fsl_edma3_attach_pd(struct platform_device *pdev, struct fsl_edma_eng link = device_link_add(dev, pd_chan, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE); - if (IS_ERR(link)) { - dev_err(dev, "Failed to add device_link to %d: %ld\n", i, - PTR_ERR(link)); + if (!link) { + dev_err(dev, "Failed to add device_link to %d\n", i); return -EINVAL; } -- cgit From 759f14e20891de72e676d9d738eb2c573aa15f52 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 7 Dec 2023 11:38:21 +0200 Subject: drm/edid: also call add modes in EDID connector update fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the separate add modes call was added back in commit c533b5167c7e ("drm/edid: add separate drm_edid_connector_add_modes()"), it failed to address drm_edid_override_connector_update(). Also call add modes there. Reported-by: bbaa Closes: https://lore.kernel.org/r/930E9B4C7D91FDFF+29b34d89-8658-4910-966a-c772f320ea03@bbaa.fun Fixes: c533b5167c7e ("drm/edid: add separate drm_edid_connector_add_modes()") Cc: # v6.3+ Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231207093821.2654267-1-jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 39db08f803ea..3b4065099872 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -2309,7 +2309,8 @@ int drm_edid_override_connector_update(struct drm_connector *connector) override = drm_edid_override_get(connector); if (override) { - num_modes = drm_edid_connector_update(connector, override); + if (drm_edid_connector_update(connector, override) == 0) + num_modes = drm_edid_connector_add_modes(connector); drm_edid_free(override); -- cgit From b6961d187fcd138981b8707dac87b9fcdbfe75d1 Mon Sep 17 00:00:00 2001 From: Stuart Lee Date: Fri, 10 Nov 2023 09:29:14 +0800 Subject: drm/mediatek: Fix access violation in mtk_drm_crtc_dma_dev_get Add error handling to check NULL input in mtk_drm_crtc_dma_dev_get function. While display path is not configured correctly, none of crtc is established. So the caller of mtk_drm_crtc_dma_dev_get may pass input parameter *crtc as NULL, Which may cause coredump when we try to get the container of NULL pointer. Fixes: cb1d6bcca542 ("drm/mediatek: Add dma dev get function") Signed-off-by: Stuart Lee Cc: stable@vger.kernel.org Reviewed-by: AngeloGioacchino DEl Regno Tested-by: Macpaul Lin Link: https://patchwork.kernel.org/project/dri-devel/patch/20231110012914.14884-2-stuart.lee@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 0a7d60c191b8..db43f9dff912 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -926,7 +926,14 @@ static int mtk_drm_crtc_init_comp_planes(struct drm_device *drm_dev, struct device *mtk_drm_crtc_dma_dev_get(struct drm_crtc *crtc) { - struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); + struct mtk_drm_crtc *mtk_crtc = NULL; + + if (!crtc) + return NULL; + + mtk_crtc = to_mtk_crtc(crtc); + if (!mtk_crtc) + return NULL; return mtk_crtc->dma_dev; } -- cgit From 4ee632c82d2dbb9e2dcc816890ef182a151cbd99 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 27 Nov 2023 16:43:25 -0500 Subject: dmaengine: fsl-edma: fix DMA channel leak in eDMAv4 Allocate channel count consistently increases due to a missing source ID (srcid) cleanup in the fsl_edma_free_chan_resources() function at imx93 eDMAv4. Reset 'srcid' at fsl_edma_free_chan_resources(). Cc: stable@vger.kernel.org Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20231127214325.2477247-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c index 6a3abe5b1790..b53f46245c37 100644 --- a/drivers/dma/fsl-edma-common.c +++ b/drivers/dma/fsl-edma-common.c @@ -828,6 +828,7 @@ void fsl_edma_free_chan_resources(struct dma_chan *chan) dma_pool_destroy(fsl_chan->tcd_pool); fsl_chan->tcd_pool = NULL; fsl_chan->is_sw = false; + fsl_chan->srcid = 0; } void fsl_edma_cleanup_vchan(struct dma_device *dmadev) -- cgit From 50d7cdf7a9b1ab6f4f74a69c84e974d5dc0c1bf1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 11 Dec 2023 10:00:57 +0100 Subject: efi/x86: Avoid physical KASLR on older Dell systems River reports boot hangs with v6.6 and v6.7, and the bisect points to commit a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") which moves the memory allocation and kernel decompression from the legacy decompressor (which executes *after* ExitBootServices()) to the EFI stub, using boot services for allocating the memory. The memory allocation succeeds but the subsequent call to decompress_kernel() never returns, resulting in a failed boot and a hanging system. As it turns out, this issue only occurs when physical address randomization (KASLR) is enabled, and given that this is a feature we can live without (virtual KASLR is much more important), let's disable the physical part of KASLR when booting on AMI UEFI firmware claiming to implement revision v2.0 of the specification (which was released in 2006), as this is the version these systems advertise. Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218173 Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/x86-stub.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 1bfdae34df39..da9b7b8d0716 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -307,17 +307,20 @@ static void setup_unaccepted_memory(void) efi_err("Memory acceptance protocol failed\n"); } +static efi_char16_t *efistub_fw_vendor(void) +{ + unsigned long vendor = efi_table_attr(efi_system_table, fw_vendor); + + return (efi_char16_t *)vendor; +} + static const efi_char16_t apple[] = L"Apple"; static void setup_quirks(struct boot_params *boot_params) { - efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long) - efi_table_attr(efi_system_table, fw_vendor); - - if (!memcmp(fw_vendor, apple, sizeof(apple))) { - if (IS_ENABLED(CONFIG_APPLE_PROPERTIES)) - retrieve_apple_device_properties(boot_params); - } + if (IS_ENABLED(CONFIG_APPLE_PROPERTIES) && + !memcmp(efistub_fw_vendor(), apple, sizeof(apple))) + retrieve_apple_device_properties(boot_params); } /* @@ -765,11 +768,25 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) { u64 range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - kernel_total_size; + static const efi_char16_t ami[] = L"American Megatrends"; efi_get_seed(seed, sizeof(seed)); virt_addr += (range * seed[1]) >> 32; virt_addr &= ~(CONFIG_PHYSICAL_ALIGN - 1); + + /* + * Older Dell systems with AMI UEFI firmware v2.0 may hang + * while decompressing the kernel if physical address + * randomization is enabled. + * + * https://bugzilla.kernel.org/show_bug.cgi?id=218173 + */ + if (efi_system_table->hdr.revision <= EFI_2_00_SYSTEM_TABLE_REVISION && + !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) { + efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n"); + seed[0] = 0; + } } status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, -- cgit From 9fd2fbaabdb9dba947d1c14e5f4f217bc21afc34 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 11 Dec 2023 11:28:30 -0500 Subject: drm/amdgpu: fix buffer funcs setting order on suspend harder Part of commit dab96d8b61aa ("drm/amdgpu: fix buffer funcs setting order on suspend") got dropped accidently. Add it back. Fixes: dab96d8b61aa ("drm/amdgpu: fix buffer funcs setting order on suspend") Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1f64d8cbb14d..8dee52ce26d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4516,8 +4516,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_ras_suspend(adev); - amdgpu_ttm_set_buffer_funcs_status(adev, false); - amdgpu_device_ip_suspend_phase1(adev); if (!adev->in_s0ix) -- cgit From dbfbf4740e40fbd39ceeb5c42ab301ac2edd7a9f Mon Sep 17 00:00:00 2001 From: Dmitrii Galantsev Date: Wed, 6 Dec 2023 02:04:52 -0600 Subject: drm/amd/pm: fix pp_*clk_od typo Fix pp_dpm_sclk_od and pp_dpm_mclk_od typos. Those were defined as pp_*clk_od but used as pp_dpm_*clk_od instead. This change removes the _dpm part. Fixes: 8cfd6a05750c ("drm/amd/pm: Hide irrelevant pm device attributes") Signed-off-by: Dmitrii Galantsev Reviewed-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index ca2ece24e1e0..49028dde0f87 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2198,10 +2198,10 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } else if (DEVICE_ATTR_IS(xgmi_plpd_policy)) { if (amdgpu_dpm_get_xgmi_plpd_mode(adev, NULL) == XGMI_PLPD_NONE) *states = ATTR_STATE_UNSUPPORTED; - } else if (DEVICE_ATTR_IS(pp_dpm_mclk_od)) { + } else if (DEVICE_ATTR_IS(pp_mclk_od)) { if (amdgpu_dpm_get_mclk_od(adev) == -EOPNOTSUPP) *states = ATTR_STATE_UNSUPPORTED; - } else if (DEVICE_ATTR_IS(pp_dpm_sclk_od)) { + } else if (DEVICE_ATTR_IS(pp_sclk_od)) { if (amdgpu_dpm_get_sclk_od(adev) == -EOPNOTSUPP) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(apu_thermal_cap)) { -- cgit From f528ee145bd0076cd0ed7e7b2d435893e6329e98 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 5 Dec 2023 14:55:04 -0500 Subject: drm/amd/display: fix hw rotated modes when PSR-SU is enabled We currently don't support dirty rectangles on hardware rotated modes. So, if a user is using hardware rotated modes with PSR-SU enabled, use PSR-SU FFU for all rotated planes (including cursor planes). Cc: stable@vger.kernel.org Fixes: 30ebe41582d1 ("drm/amd/display: add FB_DAMAGE_CLIPS support") Reported-by: Kai-Heng Feng Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2952 Tested-by: Kai-Heng Feng Tested-by: Bin Li Reviewed-by: Mario Limonciello Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 1 + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c | 12 ++++++++++-- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 3 ++- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b452796fc6d3..c8c00c2a5224 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5182,6 +5182,9 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, if (plane->type == DRM_PLANE_TYPE_CURSOR) return; + if (new_plane_state->rotation != DRM_MODE_ROTATE_0) + goto ffu; + num_clips = drm_plane_get_damage_clips_count(new_plane_state); clips = drm_plane_get_damage_clips(new_plane_state); diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 9649934ea186..e2a3aa8812df 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -465,6 +465,7 @@ struct dc_cursor_mi_param { struct fixed31_32 v_scale_ratio; enum dc_rotation_angle rotation; bool mirror; + struct dc_stream_state *stream; }; /* IPP related types */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 139cf31d2e45..89c3bf0fe0c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -1077,8 +1077,16 @@ void hubp2_cursor_set_position( if (src_y_offset < 0) src_y_offset = 0; /* Save necessary cursor info x, y position. w, h is saved in attribute func. */ - hubp->cur_rect.x = src_x_offset + param->viewport.x; - hubp->cur_rect.y = src_y_offset + param->viewport.y; + if (param->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 && + param->rotation != ROTATION_ANGLE_0) { + hubp->cur_rect.x = 0; + hubp->cur_rect.y = 0; + hubp->cur_rect.w = param->stream->timing.h_addressable; + hubp->cur_rect.h = param->stream->timing.v_addressable; + } else { + hubp->cur_rect.x = src_x_offset + param->viewport.x; + hubp->cur_rect.y = src_y_offset + param->viewport.y; + } } void hubp2_clk_cntl(struct hubp *hubp, bool enable) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 2b8b8366538e..cdb903116eb7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3417,7 +3417,8 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz, .v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert, .rotation = pipe_ctx->plane_state->rotation, - .mirror = pipe_ctx->plane_state->horizontal_mirror + .mirror = pipe_ctx->plane_state->horizontal_mirror, + .stream = pipe_ctx->stream, }; bool pipe_split_on = false; bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) || -- cgit From b96ab339ee50470d13a1faa6ad94d2218a7cd49f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 6 Dec 2023 12:08:26 -0600 Subject: drm/amd/display: Restore guard against default backlight value < 1 nit Mark reports that brightness is not restored after Xorg dpms screen blank. This behavior was introduced by commit d9e865826c20 ("drm/amd/display: Simplify brightness initialization") which dropped the cached backlight value in display code, but also removed code for when the default value read back was less than 1 nit. Restore this code so that the backlight brightness is restored to the correct default value in this circumstance. Reported-by: Mark Herbert Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3031 Cc: stable@vger.kernel.org Cc: Camille Cho Cc: Krunoslav Kovac Cc: Hamza Mahfooz Fixes: d9e865826c20 ("drm/amd/display: Simplify brightness initialization") Acked-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 996e4ee99023..e5cfaaef70b3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -287,8 +287,8 @@ bool set_default_brightness_aux(struct dc_link *link) if (link && link->dpcd_sink_ext_caps.bits.oled == 1) { if (!read_default_bl_aux(link, &default_backlight)) default_backlight = 150000; - // if > 5000, it might be wrong readback - if (default_backlight > 5000000) + // if < 1 nits or > 5000, it might be wrong readback + if (default_backlight < 1000 || default_backlight > 5000000) default_backlight = 150000; return edp_set_backlight_level_nits(link, true, -- cgit From 718ab8226636a1a3a7d281f5d6a7ad7c925efe5a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 28 Nov 2023 09:15:07 +0100 Subject: PCI/ASPM: Add pci_enable_link_state_locked() Add pci_enable_link_state_locked() for enabling link states that can be used in contexts where a pci_bus_sem read lock is already held (e.g. from pci_walk_bus()). This helper will be used to fix a couple of potential deadlocks where the current helper is called with the lock already held, hence the CC stable tag. Fixes: f492edb40b54 ("PCI: vmd: Add quirk to configure PCIe ASPM and LTR") Link: https://lore.kernel.org/r/20231128081512.19387-2-johan+linaro@kernel.org Signed-off-by: Johan Hovold [bhelgaas: include helper name in subject, commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Cc: # 6.3 Cc: Michael Bottini Cc: David E. Box --- drivers/pci/pcie/aspm.c | 53 +++++++++++++++++++++++++++++++++++++------------ include/linux/pci.h | 3 +++ 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 50b04ae5c394..5eb462772354 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1109,17 +1109,7 @@ int pci_disable_link_state(struct pci_dev *pdev, int state) } EXPORT_SYMBOL(pci_disable_link_state); -/** - * pci_enable_link_state - Clear and set the default device link state so that - * the link may be allowed to enter the specified states. Note that if the - * BIOS didn't grant ASPM control to the OS, this does nothing because we can't - * touch the LNKCTL register. Also note that this does not enable states - * disabled by pci_disable_link_state(). Return 0 or a negative errno. - * - * @pdev: PCI device - * @state: Mask of ASPM link states to enable - */ -int pci_enable_link_state(struct pci_dev *pdev, int state) +static int __pci_enable_link_state(struct pci_dev *pdev, int state, bool locked) { struct pcie_link_state *link = pcie_aspm_get_link(pdev); @@ -1136,7 +1126,8 @@ int pci_enable_link_state(struct pci_dev *pdev, int state) return -EPERM; } - down_read(&pci_bus_sem); + if (!locked) + down_read(&pci_bus_sem); mutex_lock(&aspm_lock); link->aspm_default = 0; if (state & PCIE_LINK_STATE_L0S) @@ -1157,12 +1148,48 @@ int pci_enable_link_state(struct pci_dev *pdev, int state) link->clkpm_default = (state & PCIE_LINK_STATE_CLKPM) ? 1 : 0; pcie_set_clkpm(link, policy_to_clkpm_state(link)); mutex_unlock(&aspm_lock); - up_read(&pci_bus_sem); + if (!locked) + up_read(&pci_bus_sem); return 0; } + +/** + * pci_enable_link_state - Clear and set the default device link state so that + * the link may be allowed to enter the specified states. Note that if the + * BIOS didn't grant ASPM control to the OS, this does nothing because we can't + * touch the LNKCTL register. Also note that this does not enable states + * disabled by pci_disable_link_state(). Return 0 or a negative errno. + * + * @pdev: PCI device + * @state: Mask of ASPM link states to enable + */ +int pci_enable_link_state(struct pci_dev *pdev, int state) +{ + return __pci_enable_link_state(pdev, state, false); +} EXPORT_SYMBOL(pci_enable_link_state); +/** + * pci_enable_link_state_locked - Clear and set the default device link state + * so that the link may be allowed to enter the specified states. Note that if + * the BIOS didn't grant ASPM control to the OS, this does nothing because we + * can't touch the LNKCTL register. Also note that this does not enable states + * disabled by pci_disable_link_state(). Return 0 or a negative errno. + * + * @pdev: PCI device + * @state: Mask of ASPM link states to enable + * + * Context: Caller holds pci_bus_sem read lock. + */ +int pci_enable_link_state_locked(struct pci_dev *pdev, int state) +{ + lockdep_assert_held_read(&pci_bus_sem); + + return __pci_enable_link_state(pdev, state, true); +} +EXPORT_SYMBOL(pci_enable_link_state_locked); + static int pcie_aspm_set_policy(const char *val, const struct kernel_param *kp) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 60ca768bc867..dea043bc1e38 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1829,6 +1829,7 @@ extern bool pcie_ports_native; int pci_disable_link_state(struct pci_dev *pdev, int state); int pci_disable_link_state_locked(struct pci_dev *pdev, int state); int pci_enable_link_state(struct pci_dev *pdev, int state); +int pci_enable_link_state_locked(struct pci_dev *pdev, int state); void pcie_no_aspm(void); bool pcie_aspm_support_enabled(void); bool pcie_aspm_enabled(struct pci_dev *pdev); @@ -1839,6 +1840,8 @@ static inline int pci_disable_link_state_locked(struct pci_dev *pdev, int state) { return 0; } static inline int pci_enable_link_state(struct pci_dev *pdev, int state) { return 0; } +static inline int pci_enable_link_state_locked(struct pci_dev *pdev, int state) +{ return 0; } static inline void pcie_no_aspm(void) { } static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } -- cgit From 49de0dc87965079a8e2803ee4b39f9d946259423 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 28 Nov 2023 09:15:08 +0100 Subject: PCI: vmd: Fix potential deadlock when enabling ASPM The vmd_pm_enable_quirk() helper is called from pci_walk_bus() during probe to enable ASPM for controllers with VMD_FEAT_BIOS_PM_QUIRK set. Since pci_walk_bus() already holds a pci_bus_sem read lock, use pci_enable_link_state_locked() to enable link states in order to avoid a potential deadlock (e.g. in case someone takes a write lock before reacquiring the read lock). Fixes: f492edb40b54 ("PCI: vmd: Add quirk to configure PCIe ASPM and LTR") Link: https://lore.kernel.org/r/20231128081512.19387-3-johan+linaro@kernel.org Signed-off-by: Johan Hovold [bhelgaas: add "potential" in subject since the deadlock has only been reported by lockdep, include helper name in commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Cc: # 6.3 Cc: Michael Bottini Cc: David E. Box --- drivers/pci/controller/vmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 94ba61fe1c44..0452cbc362ee 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -751,7 +751,7 @@ static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata) if (!(features & VMD_FEAT_BIOS_PM_QUIRK)) return 0; - pci_enable_link_state(pdev, PCIE_LINK_STATE_ALL); + pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL); pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_LTR); if (!pos) -- cgit From f352ce99926048e12aa4281c32471031351aec98 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 28 Nov 2023 09:15:09 +0100 Subject: PCI: qcom: Fix potential deadlock when enabling ASPM The qcom_pcie_enable_aspm() helper is called from pci_walk_bus() during host init to enable ASPM. Since pci_walk_bus() already holds a pci_bus_sem read lock, use pci_enable_link_state_locked() to enable link states in order to avoid a potential deadlock (e.g. in case someone takes a write lock before reacquiring the read lock). This issue was reported by lockdep: ============================================ WARNING: possible recursive locking detected 6.7.0-rc1 #4 Not tainted -------------------------------------------- kworker/u16:6/147 is trying to acquire lock: ffffbf3ff9d2cfa0 (pci_bus_sem){++++}-{3:3}, at: pci_enable_link_state+0x74/0x1e8 but task is already holding lock: ffffbf3ff9d2cfa0 (pci_bus_sem){++++}-{3:3}, at: pci_walk_bus+0x34/0xbc other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(pci_bus_sem); lock(pci_bus_sem); *** DEADLOCK *** Fixes: 9f4f3dfad8cf ("PCI: qcom: Enable ASPM for platforms supporting 1.9.0 ops") Link: https://lore.kernel.org/r/20231128081512.19387-4-johan+linaro@kernel.org Signed-off-by: Johan Hovold [bhelgaas: add "potential" in subject since the deadlock has only been reported by lockdep, include helper name in commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/dwc/pcie-qcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 6902e97719d1..13dbdfac98f2 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -970,7 +970,7 @@ static int qcom_pcie_enable_aspm(struct pci_dev *pdev, void *userdata) { /* Downstream devices need to be in D0 state before enabling PCI PM substates */ pci_set_power_state(pdev, PCI_D0); - pci_enable_link_state(pdev, PCIE_LINK_STATE_ALL); + pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL); return 0; } -- cgit From 780f52e3213e5f05bb41adebe1f2214f2f86f4a3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 28 Nov 2023 09:15:10 +0100 Subject: PCI: qcom: Clean up ASPM comment Break up the newly added ASPM comment so that it fits within the soft 80 character limit and becomes more readable. Link: https://lore.kernel.org/r/20231128081512.19387-5-johan+linaro@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/dwc/pcie-qcom.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 13dbdfac98f2..11c80555d975 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -968,7 +968,10 @@ static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie) static int qcom_pcie_enable_aspm(struct pci_dev *pdev, void *userdata) { - /* Downstream devices need to be in D0 state before enabling PCI PM substates */ + /* + * Downstream devices need to be in D0 state before enabling PCI PM + * substates. + */ pci_set_power_state(pdev, PCI_D0); pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL); -- cgit From e673d383bdba94c9924388086b91988254d39f19 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 28 Nov 2023 09:15:11 +0100 Subject: PCI/ASPM: Clean up __pci_disable_link_state() 'sem' parameter Replace the current 'sem' parameter to the __pci_disable_link_state() helper with a more descriptive 'locked' parameter, which indicates whether a pci_bus_sem read lock is already held. Link: https://lore.kernel.org/r/20231128081512.19387-6-johan+linaro@kernel.org Signed-off-by: Johan Hovold [bhelgaas: include function name in subject, commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- drivers/pci/pcie/aspm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 5eb462772354..d7a3ca555cc1 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1041,7 +1041,7 @@ static struct pcie_link_state *pcie_aspm_get_link(struct pci_dev *pdev) return bridge->link_state; } -static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) +static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool locked) { struct pcie_link_state *link = pcie_aspm_get_link(pdev); @@ -1060,7 +1060,7 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) return -EPERM; } - if (sem) + if (!locked) down_read(&pci_bus_sem); mutex_lock(&aspm_lock); if (state & PCIE_LINK_STATE_L0S) @@ -1082,7 +1082,7 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) link->clkpm_disable = 1; pcie_set_clkpm(link, policy_to_clkpm_state(link)); mutex_unlock(&aspm_lock); - if (sem) + if (!locked) up_read(&pci_bus_sem); return 0; @@ -1090,7 +1090,7 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) int pci_disable_link_state_locked(struct pci_dev *pdev, int state) { - return __pci_disable_link_state(pdev, state, false); + return __pci_disable_link_state(pdev, state, true); } EXPORT_SYMBOL(pci_disable_link_state_locked); @@ -1105,7 +1105,7 @@ EXPORT_SYMBOL(pci_disable_link_state_locked); */ int pci_disable_link_state(struct pci_dev *pdev, int state) { - return __pci_disable_link_state(pdev, state, true); + return __pci_disable_link_state(pdev, state, false); } EXPORT_SYMBOL(pci_disable_link_state); -- cgit From 7ff2b7a1821b61c324626ad57c3664398fb0083d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 28 Nov 2023 09:15:12 +0100 Subject: PCI/ASPM: Add pci_disable_link_state_locked() lockdep assert Add a lockdep assert to pci_disable_link_state_locked() which should only be called with a pci_bus_sem read lock held. Link: https://lore.kernel.org/r/20231128081512.19387-7-johan+linaro@kernel.org Signed-off-by: Johan Hovold [bhelgaas: include function name in subject, commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- drivers/pci/pcie/aspm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index d7a3ca555cc1..5dab531c8654 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1090,6 +1090,8 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool locked int pci_disable_link_state_locked(struct pci_dev *pdev, int state) { + lockdep_assert_held_read(&pci_bus_sem); + return __pci_disable_link_state(pdev, state, true); } EXPORT_SYMBOL(pci_disable_link_state_locked); -- cgit From eec04ea119691e65227a97ce53c0da6b9b74b0b7 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 11 Dec 2023 10:26:40 -0300 Subject: smb: client: fix OOB in receive_encrypted_standard() Fix potential OOB in receive_encrypted_standard() if server returned a large shdr->NextCommand that would end up writing off the end of @next_buffer. Fixes: b24df3e30cbf ("cifs: update receive_encrypted_standard to handle compounded responses") Cc: stable@vger.kernel.org Reported-by: Robert Morris Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index fcfb6566b899..a6f4948adcbb 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4943,6 +4943,7 @@ receive_encrypted_standard(struct TCP_Server_Info *server, struct smb2_hdr *shdr; unsigned int pdu_length = server->pdu_size; unsigned int buf_size; + unsigned int next_cmd; struct mid_q_entry *mid_entry; int next_is_large; char *next_buffer = NULL; @@ -4971,14 +4972,15 @@ receive_encrypted_standard(struct TCP_Server_Info *server, next_is_large = server->large_buf; one_more: shdr = (struct smb2_hdr *)buf; - if (shdr->NextCommand) { + next_cmd = le32_to_cpu(shdr->NextCommand); + if (next_cmd) { + if (WARN_ON_ONCE(next_cmd > pdu_length)) + return -1; if (next_is_large) next_buffer = (char *)cifs_buf_get(); else next_buffer = (char *)cifs_small_buf_get(); - memcpy(next_buffer, - buf + le32_to_cpu(shdr->NextCommand), - pdu_length - le32_to_cpu(shdr->NextCommand)); + memcpy(next_buffer, buf + next_cmd, pdu_length - next_cmd); } mid_entry = smb2_find_mid(server, buf); @@ -5002,8 +5004,8 @@ one_more: else ret = cifs_handle_standard(server, mid_entry); - if (ret == 0 && shdr->NextCommand) { - pdu_length -= le32_to_cpu(shdr->NextCommand); + if (ret == 0 && next_cmd) { + pdu_length -= next_cmd; server->large_buf = next_is_large; if (next_is_large) server->bigbuf = buf = next_buffer; -- cgit From af1689a9b7701d9907dfc84d2a4b57c4bc907144 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 11 Dec 2023 10:26:41 -0300 Subject: smb: client: fix potential OOBs in smb2_parse_contexts() Validate offsets and lengths before dereferencing create contexts in smb2_parse_contexts(). This fixes following oops when accessing invalid create contexts from server: BUG: unable to handle page fault for address: ffff8881178d8cc3 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 4a01067 P4D 4a01067 PUD 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 3 PID: 1736 Comm: mount.cifs Not tainted 6.7.0-rc4 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 RIP: 0010:smb2_parse_contexts+0xa0/0x3a0 [cifs] Code: f8 10 75 13 48 b8 93 ad 25 50 9c b4 11 e7 49 39 06 0f 84 d2 00 00 00 8b 45 00 85 c0 74 61 41 29 c5 48 01 c5 41 83 fd 0f 76 55 <0f> b7 7d 04 0f b7 45 06 4c 8d 74 3d 00 66 83 f8 04 75 bc ba 04 00 RSP: 0018:ffffc900007939e0 EFLAGS: 00010216 RAX: ffffc90000793c78 RBX: ffff8880180cc000 RCX: ffffc90000793c90 RDX: ffffc90000793cc0 RSI: ffff8880178d8cc0 RDI: ffff8880180cc000 RBP: ffff8881178d8cbf R08: ffffc90000793c22 R09: 0000000000000000 R10: ffff8880180cc000 R11: 0000000000000024 R12: 0000000000000000 R13: 0000000000000020 R14: 0000000000000000 R15: ffffc90000793c22 FS: 00007f873753cbc0(0000) GS:ffff88806bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff8881178d8cc3 CR3: 00000000181ca000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x181/0x480 ? search_module_extables+0x19/0x60 ? srso_alias_return_thunk+0x5/0xfbef5 ? exc_page_fault+0x1b6/0x1c0 ? asm_exc_page_fault+0x26/0x30 ? smb2_parse_contexts+0xa0/0x3a0 [cifs] SMB2_open+0x38d/0x5f0 [cifs] ? smb2_is_path_accessible+0x138/0x260 [cifs] smb2_is_path_accessible+0x138/0x260 [cifs] cifs_is_path_remote+0x8d/0x230 [cifs] cifs_mount+0x7e/0x350 [cifs] cifs_smb3_do_mount+0x128/0x780 [cifs] smb3_get_tree+0xd9/0x290 [cifs] vfs_get_tree+0x2c/0x100 ? capable+0x37/0x70 path_mount+0x2d7/0xb80 ? srso_alias_return_thunk+0x5/0xfbef5 ? _raw_spin_unlock_irqrestore+0x44/0x60 __x64_sys_mount+0x11a/0x150 do_syscall_64+0x47/0xf0 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f8737657b1e Reported-by: Robert Morris Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 17 ++++++--- fs/smb/client/smb2pdu.c | 93 ++++++++++++++++++++++++++++------------------ fs/smb/client/smb2proto.h | 12 +++--- 3 files changed, 75 insertions(+), 47 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 59f6b8e32cc9..d64a306a414b 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -291,16 +291,23 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, oparms.fid->mid = le64_to_cpu(o_rsp->hdr.MessageId); #endif /* CIFS_DEBUG2 */ - rc = -EINVAL; + if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) { + spin_unlock(&cfids->cfid_list_lock); + rc = -EINVAL; + goto oshr_free; + } + + rc = smb2_parse_contexts(server, rsp_iov, + &oparms.fid->epoch, + oparms.fid->lease_key, + &oplock, NULL, NULL); + if (rc) { spin_unlock(&cfids->cfid_list_lock); goto oshr_free; } - smb2_parse_contexts(server, o_rsp, - &oparms.fid->epoch, - oparms.fid->lease_key, &oplock, - NULL, NULL); + rc = -EINVAL; if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) { spin_unlock(&cfids->cfid_list_lock); goto oshr_free; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 20634fc6d4f0..c571760ad39a 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2236,17 +2236,18 @@ parse_posix_ctxt(struct create_context *cc, struct smb2_file_all_info *info, posix->nlink, posix->mode, posix->reparse_tag); } -void -smb2_parse_contexts(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key, __u8 *oplock, - struct smb2_file_all_info *buf, - struct create_posix_rsp *posix) +int smb2_parse_contexts(struct TCP_Server_Info *server, + struct kvec *rsp_iov, + unsigned int *epoch, + char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf, + struct create_posix_rsp *posix) { - char *data_offset; + struct smb2_create_rsp *rsp = rsp_iov->iov_base; struct create_context *cc; - unsigned int next; - unsigned int remaining; + size_t rem, off, len; + size_t doff, dlen; + size_t noff, nlen; char *name; static const char smb3_create_tag_posix[] = { 0x93, 0xAD, 0x25, 0x50, 0x9C, @@ -2255,45 +2256,63 @@ smb2_parse_contexts(struct TCP_Server_Info *server, }; *oplock = 0; - data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset); - remaining = le32_to_cpu(rsp->CreateContextsLength); - cc = (struct create_context *)data_offset; + + off = le32_to_cpu(rsp->CreateContextsOffset); + rem = le32_to_cpu(rsp->CreateContextsLength); + if (check_add_overflow(off, rem, &len) || len > rsp_iov->iov_len) + return -EINVAL; + cc = (struct create_context *)((u8 *)rsp + off); /* Initialize inode number to 0 in case no valid data in qfid context */ if (buf) buf->IndexNumber = 0; - while (remaining >= sizeof(struct create_context)) { - name = le16_to_cpu(cc->NameOffset) + (char *)cc; - if (le16_to_cpu(cc->NameLength) == 4 && - strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4) == 0) - *oplock = server->ops->parse_lease_buf(cc, epoch, - lease_key); - else if (buf && (le16_to_cpu(cc->NameLength) == 4) && - strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0) - parse_query_id_ctxt(cc, buf); - else if ((le16_to_cpu(cc->NameLength) == 16)) { - if (posix && - memcmp(name, smb3_create_tag_posix, 16) == 0) + while (rem >= sizeof(*cc)) { + doff = le16_to_cpu(cc->DataOffset); + dlen = le32_to_cpu(cc->DataLength); + if (check_add_overflow(doff, dlen, &len) || len > rem) + return -EINVAL; + + noff = le16_to_cpu(cc->NameOffset); + nlen = le16_to_cpu(cc->NameLength); + if (noff + nlen >= doff) + return -EINVAL; + + name = (char *)cc + noff; + switch (nlen) { + case 4: + if (!strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4)) { + *oplock = server->ops->parse_lease_buf(cc, epoch, + lease_key); + } else if (buf && + !strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4)) { + parse_query_id_ctxt(cc, buf); + } + break; + case 16: + if (posix && !memcmp(name, smb3_create_tag_posix, 16)) parse_posix_ctxt(cc, buf, posix); + break; + default: + cifs_dbg(FYI, "%s: unhandled context (nlen=%zu dlen=%zu)\n", + __func__, nlen, dlen); + if (IS_ENABLED(CONFIG_CIFS_DEBUG2)) + cifs_dump_mem("context data: ", cc, dlen); + break; } - /* else { - cifs_dbg(FYI, "Context not matched with len %d\n", - le16_to_cpu(cc->NameLength)); - cifs_dump_mem("Cctxt name: ", name, 4); - } */ - - next = le32_to_cpu(cc->Next); - if (!next) + + off = le32_to_cpu(cc->Next); + if (!off) break; - remaining -= next; - cc = (struct create_context *)((char *)cc + next); + if (check_sub_overflow(rem, off, &rem)) + return -EINVAL; + cc = (struct create_context *)((u8 *)cc + off); } if (rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) *oplock = rsp->OplockLevel; - return; + return 0; } static int @@ -3124,8 +3143,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, } - smb2_parse_contexts(server, rsp, &oparms->fid->epoch, - oparms->fid->lease_key, oplock, buf, posix); + rc = smb2_parse_contexts(server, &rsp_iov, &oparms->fid->epoch, + oparms->fid->lease_key, oplock, buf, posix); creat_exit: SMB2_open_free(&rqst); free_rsp_buf(resp_buftype, rsp); diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 46eff9ec302a..0e371f7e2854 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -251,11 +251,13 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *); extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *, enum securityEnum); -extern void smb2_parse_contexts(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key, - __u8 *oplock, struct smb2_file_all_info *buf, - struct create_posix_rsp *posix); +int smb2_parse_contexts(struct TCP_Server_Info *server, + struct kvec *rsp_iov, + unsigned int *epoch, + char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf, + struct create_posix_rsp *posix); + extern int smb3_encryption_required(const struct cifs_tcon *tcon); extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length, struct kvec *iov, unsigned int min_buf_size); -- cgit From 90d025c2e953c11974e76637977c473200593a46 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 11 Dec 2023 10:26:42 -0300 Subject: smb: client: fix NULL deref in asn1_ber_decoder() If server replied SMB2_NEGOTIATE with a zero SecurityBufferOffset, smb2_get_data_area() sets @len to non-zero but return NULL, so decode_negTokeninit() ends up being called with a NULL @security_blob: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 2 PID: 871 Comm: mount.cifs Not tainted 6.7.0-rc4 #2 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 RIP: 0010:asn1_ber_decoder+0x173/0xc80 Code: 01 4c 39 2c 24 75 09 45 84 c9 0f 85 2f 03 00 00 48 8b 14 24 4c 29 ea 48 83 fa 01 0f 86 1e 07 00 00 48 8b 74 24 28 4d 8d 5d 01 <42> 0f b6 3c 2e 89 fa 40 88 7c 24 5c f7 d2 83 e2 1f 0f 84 3d 07 00 RSP: 0018:ffffc9000063f950 EFLAGS: 00010202 RAX: 0000000000000002 RBX: 0000000000000000 RCX: 000000000000004a RDX: 000000000000004a RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000002 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000000 R14: 000000000000004d R15: 0000000000000000 FS: 00007fce52b0fbc0(0000) GS:ffff88806ba00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000001ae64000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x181/0x480 ? __stack_depot_save+0x1e6/0x480 ? exc_page_fault+0x6f/0x1c0 ? asm_exc_page_fault+0x26/0x30 ? asn1_ber_decoder+0x173/0xc80 ? check_object+0x40/0x340 decode_negTokenInit+0x1e/0x30 [cifs] SMB2_negotiate+0xc99/0x17c0 [cifs] ? smb2_negotiate+0x46/0x60 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 smb2_negotiate+0x46/0x60 [cifs] cifs_negotiate_protocol+0xae/0x130 [cifs] cifs_get_smb_ses+0x517/0x1040 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? queue_delayed_work_on+0x5d/0x90 cifs_mount_get_session+0x78/0x200 [cifs] dfs_mount_share+0x13a/0x9f0 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 ? lock_acquire+0xbf/0x2b0 ? find_nls+0x16/0x80 ? srso_alias_return_thunk+0x5/0xfbef5 cifs_mount+0x7e/0x350 [cifs] cifs_smb3_do_mount+0x128/0x780 [cifs] smb3_get_tree+0xd9/0x290 [cifs] vfs_get_tree+0x2c/0x100 ? capable+0x37/0x70 path_mount+0x2d7/0xb80 ? srso_alias_return_thunk+0x5/0xfbef5 ? _raw_spin_unlock_irqrestore+0x44/0x60 __x64_sys_mount+0x11a/0x150 do_syscall_64+0x47/0xf0 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7fce52c2ab1e Fix this by setting @len to zero when @off == 0 so callers won't attempt to dereference non-existing data areas. Reported-by: Robert Morris Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/smb2misc.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 32dfa0f7a78c..e20b4354e703 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -313,6 +313,9 @@ static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = { char * smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *shdr) { + const int max_off = 4096; + const int max_len = 128 * 1024; + *off = 0; *len = 0; @@ -384,29 +387,20 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *shdr) * Invalid length or offset probably means data area is invalid, but * we have little choice but to ignore the data area in this case. */ - if (*off > 4096) { - cifs_dbg(VFS, "offset %d too large, data area ignored\n", *off); - *len = 0; - *off = 0; - } else if (*off < 0) { - cifs_dbg(VFS, "negative offset %d to data invalid ignore data area\n", - *off); + if (unlikely(*off < 0 || *off > max_off || + *len < 0 || *len > max_len)) { + cifs_dbg(VFS, "%s: invalid data area (off=%d len=%d)\n", + __func__, *off, *len); *off = 0; *len = 0; - } else if (*len < 0) { - cifs_dbg(VFS, "negative data length %d invalid, data area ignored\n", - *len); - *len = 0; - } else if (*len > 128 * 1024) { - cifs_dbg(VFS, "data area larger than 128K: %d\n", *len); + } else if (*off == 0) { *len = 0; } /* return pointer to beginning of data area, ie offset from SMB start */ - if ((*off != 0) && (*len != 0)) + if (*off > 0 && *len > 0) return (char *)shdr + *off; - else - return NULL; + return NULL; } /* -- cgit From 3a42709fa909e22b0be4bb1e2795aa04ada732a3 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 11 Dec 2023 10:26:43 -0300 Subject: smb: client: fix OOB in smb2_query_reparse_point() Validate @ioctl_rsp->OutputOffset and @ioctl_rsp->OutputCount so that their sum does not wrap to a number that is smaller than @reparse_buf and we end up with a wild pointer as follows: BUG: unable to handle page fault for address: ffff88809c5cd45f #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 4a01067 P4D 4a01067 PUD 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 2 PID: 1260 Comm: mount.cifs Not tainted 6.7.0-rc4 #2 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 RIP: 0010:smb2_query_reparse_point+0x3e0/0x4c0 [cifs] Code: ff ff e8 f3 51 fe ff 41 89 c6 58 5a 45 85 f6 0f 85 14 fe ff ff 49 8b 57 48 8b 42 60 44 8b 42 64 42 8d 0c 00 49 39 4f 50 72 40 <8b> 04 02 48 8b 9d f0 fe ff ff 49 8b 57 50 89 03 48 8b 9d e8 fe ff RSP: 0018:ffffc90000347a90 EFLAGS: 00010212 RAX: 000000008000001f RBX: ffff88800ae11000 RCX: 00000000000000ec RDX: ffff88801c5cd440 RSI: 0000000000000000 RDI: ffffffff82004aa4 RBP: ffffc90000347bb0 R08: 00000000800000cd R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000024 R12: ffff8880114d4100 R13: ffff8880114d4198 R14: 0000000000000000 R15: ffff8880114d4000 FS: 00007f02c07babc0(0000) GS:ffff88806ba00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88809c5cd45f CR3: 0000000011750000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x181/0x480 ? search_module_extables+0x19/0x60 ? srso_alias_return_thunk+0x5/0xfbef5 ? exc_page_fault+0x1b6/0x1c0 ? asm_exc_page_fault+0x26/0x30 ? _raw_spin_unlock_irqrestore+0x44/0x60 ? smb2_query_reparse_point+0x3e0/0x4c0 [cifs] cifs_get_fattr+0x16e/0xa50 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 ? lock_acquire+0xbf/0x2b0 cifs_root_iget+0x163/0x5f0 [cifs] cifs_smb3_do_mount+0x5bd/0x780 [cifs] smb3_get_tree+0xd9/0x290 [cifs] vfs_get_tree+0x2c/0x100 ? capable+0x37/0x70 path_mount+0x2d7/0xb80 ? srso_alias_return_thunk+0x5/0xfbef5 ? _raw_spin_unlock_irqrestore+0x44/0x60 __x64_sys_mount+0x11a/0x150 do_syscall_64+0x47/0xf0 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f02c08d5b1e Fixes: 2e4564b31b64 ("smb3: add support for stat of WSL reparse points for special file types") Cc: stable@vger.kernel.org Reported-by: Robert Morris Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index a6f4948adcbb..8f6f0a38b886 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3003,7 +3003,7 @@ static int smb2_query_reparse_point(const unsigned int xid, struct kvec *rsp_iov; struct smb2_ioctl_rsp *ioctl_rsp; struct reparse_data_buffer *reparse_buf; - u32 plen; + u32 off, count, len; cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); @@ -3084,16 +3084,22 @@ static int smb2_query_reparse_point(const unsigned int xid, */ if (rc == 0) { /* See MS-FSCC 2.3.23 */ + off = le32_to_cpu(ioctl_rsp->OutputOffset); + count = le32_to_cpu(ioctl_rsp->OutputCount); + if (check_add_overflow(off, count, &len) || + len > rsp_iov[1].iov_len) { + cifs_tcon_dbg(VFS, "%s: invalid ioctl: off=%d count=%d\n", + __func__, off, count); + rc = -EIO; + goto query_rp_exit; + } - reparse_buf = (struct reparse_data_buffer *) - ((char *)ioctl_rsp + - le32_to_cpu(ioctl_rsp->OutputOffset)); - plen = le32_to_cpu(ioctl_rsp->OutputCount); - - if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) > - rsp_iov[1].iov_len) { - cifs_tcon_dbg(FYI, "srv returned invalid ioctl len: %d\n", - plen); + reparse_buf = (void *)((u8 *)ioctl_rsp + off); + len = sizeof(*reparse_buf); + if (count < len || + count < le16_to_cpu(reparse_buf->ReparseDataLength) + len) { + cifs_tcon_dbg(VFS, "%s: invalid ioctl: off=%d count=%d\n", + __func__, off, count); rc = -EIO; goto query_rp_exit; } -- cgit From 52bf9f6c09fca8c74388cd41cc24e5d1bff812a9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 11 Dec 2023 21:43:52 +0000 Subject: afs: Fix refcount underflow from error handling race If an AFS cell that has an unreachable (eg. ENETUNREACH) server listed (VL server or fileserver), an asynchronous probe to one of its addresses may fail immediately because sendmsg() returns an error. When this happens, a refcount underflow can happen if certain events hit a very small window. The way this occurs is: (1) There are two levels of "call" object, the afs_call and the rxrpc_call. Each of them can be transitioned to a "completed" state in the event of success or failure. (2) Asynchronous afs_calls are self-referential whilst they are active to prevent them from evaporating when they're not being processed. This reference is disposed of when the afs_call is completed. Note that an afs_call may only be completed once; once completed completing it again will do nothing. (3) When a call transmission is made, the app-side rxrpc code queues a Tx buffer for the rxrpc I/O thread to transmit. The I/O thread invokes sendmsg() to transmit it - and in the case of failure, it transitions the rxrpc_call to the completed state. (4) When an rxrpc_call is completed, the app layer is notified. In this case, the app is kafs and it schedules a work item to process events pertaining to an afs_call. (5) When the afs_call event processor is run, it goes down through the RPC-specific handler to afs_extract_data() to retrieve data from rxrpc - and, in this case, it picks up the error from the rxrpc_call and returns it. The error is then propagated to the afs_call and that is completed too. At this point the self-reference is released. (6) If the rxrpc I/O thread manages to complete the rxrpc_call within the window between rxrpc_send_data() queuing the request packet and checking for call completion on the way out, then rxrpc_kernel_send_data() will return the error from sendmsg() to the app. (7) Then afs_make_call() will see an error and will jump to the error handling path which will attempt to clean up the afs_call. (8) The problem comes when the error handling path in afs_make_call() tries to unconditionally drop an async afs_call's self-reference. This self-reference, however, may already have been dropped by afs_extract_data() completing the afs_call (9) The refcount underflows when we return to afs_do_probe_vlserver() and that tries to drop its reference on the afs_call. Fix this by making afs_make_call() attempt to complete the afs_call rather than unconditionally putting it. That way, if afs_extract_data() manages to complete the call first, afs_make_call() won't do anything. The bug can be forced by making do_udp_sendmsg() return -ENETUNREACH and sticking an msleep() in rxrpc_send_data() after the 'success:' label to widen the race window. The error message looks something like: refcount_t: underflow; use-after-free. WARNING: CPU: 3 PID: 720 at lib/refcount.c:28 refcount_warn_saturate+0xba/0x110 ... RIP: 0010:refcount_warn_saturate+0xba/0x110 ... afs_put_call+0x1dc/0x1f0 [kafs] afs_fs_get_capabilities+0x8b/0xe0 [kafs] afs_fs_probe_fileserver+0x188/0x1e0 [kafs] afs_lookup_server+0x3bf/0x3f0 [kafs] afs_alloc_server_list+0x130/0x2e0 [kafs] afs_create_volume+0x162/0x400 [kafs] afs_get_tree+0x266/0x410 [kafs] vfs_get_tree+0x25/0xc0 fc_mount+0xe/0x40 afs_d_automount+0x1b3/0x390 [kafs] __traverse_mounts+0x8f/0x210 step_into+0x340/0x760 path_openat+0x13a/0x1260 do_filp_open+0xaf/0x160 do_sys_openat2+0xaf/0x170 or something like: refcount_t: underflow; use-after-free. ... RIP: 0010:refcount_warn_saturate+0x99/0xda ... afs_put_call+0x4a/0x175 afs_send_vl_probes+0x108/0x172 afs_select_vlserver+0xd6/0x311 afs_do_cell_detect_alias+0x5e/0x1e9 afs_cell_detect_alias+0x44/0x92 afs_validate_fc+0x9d/0x134 afs_get_tree+0x20/0x2e6 vfs_get_tree+0x1d/0xc9 fc_mount+0xe/0x33 afs_d_automount+0x48/0x9d __traverse_mounts+0xe0/0x166 step_into+0x140/0x274 open_last_lookups+0x1c1/0x1df path_openat+0x138/0x1c3 do_filp_open+0x55/0xb4 do_sys_openat2+0x6c/0xb6 Fixes: 34fa47612bfe ("afs: Fix race in async call refcounting") Reported-by: Bill MacAllister Closes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1052304 Suggested-by: Jeffrey E Altman Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/2633992.1702073229@warthog.procyon.org.uk/ # v1 Signed-off-by: Linus Torvalds --- fs/afs/rxrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ed1644e7683f..d642d06a453b 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -424,7 +424,7 @@ error_kill_call: if (call->async) { if (cancel_work_sync(&call->async_work)) afs_put_call(call); - afs_put_call(call); + afs_set_call_complete(call, ret, 0); } ac->error = ret; -- cgit From 1892fe103c3a20fced306c8dafa74f7f6d4ea0a3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 11 Dec 2023 19:27:28 +0000 Subject: perf/arm-cmn: Fail DTC counter allocation correctly Calling arm_cmn_event_clear() before all DTC indices are allocated is wrong, and can lead to arm_cmn_event_add() erroneously clearing live counters from full DTCs where allocation fails. Since the DTC counters are only updated by arm_cmn_init_counter() after all DTC and DTM allocations succeed, nothing actually needs cleaning up in this case anyway, and it should just return directly as it did before. Fixes: 7633ec2c262f ("perf/arm-cmn: Rework DTC counters (again)") Signed-off-by: Robin Murphy Reviewed-by: Ilkka Koskinen Acked-by: Will Deacon Link: https://lore.kernel.org/r/ed589c0d8e4130dc68b8ad1625226d28bdc185d4.1702322847.git.robin.murphy@arm.com Signed-off-by: Catalin Marinas --- drivers/perf/arm-cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 014010d03588..847b0dc41293 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1816,7 +1816,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) idx = 0; while (cmn->dtc[j].counters[idx]) if (++idx == CMN_DT_NUM_COUNTERS) - goto free_dtms; + return -ENOSPC; } hw->dtc_idx[j] = idx; } -- cgit From 24e90b9e34f9e039f56b5f25f6e6eb92cdd8f4b3 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Sat, 9 Dec 2023 04:42:10 -0500 Subject: atm: Fix Use-After-Free in do_vcc_ioctl Because do_vcc_ioctl() accesses sk->sk_receive_queue without holding a sk->sk_receive_queue.lock, it can cause a race with vcc_recvmsg(). A use-after-free for skb occurs with the following flow. ``` do_vcc_ioctl() -> skb_peek() vcc_recvmsg() -> skb_recv_datagram() -> skb_free_datagram() ``` Add sk->sk_receive_queue.lock to do_vcc_ioctl() to fix this issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Hyunwoo Kim Link: https://lore.kernel.org/r/20231209094210.GA403126@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Paolo Abeni --- net/atm/ioctl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 838ebf0cabbf..f81f8d56f5c0 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -73,14 +73,17 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, case SIOCINQ: { struct sk_buff *skb; + int amount; if (sock->state != SS_CONNECTED) { error = -EINVAL; goto done; } + spin_lock_irq(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); - error = put_user(skb ? skb->len : 0, - (int __user *)argp) ? -EFAULT : 0; + amount = skb ? skb->len : 0; + spin_unlock_irq(&sk->sk_receive_queue.lock); + error = put_user(amount, (int __user *)argp) ? -EFAULT : 0; goto done; } case ATM_SETSC: -- cgit From 810c38a369a0a0ce625b5c12169abce1dd9ccd53 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Sat, 9 Dec 2023 05:05:38 -0500 Subject: net/rose: Fix Use-After-Free in rose_ioctl Because rose_ioctl() accesses sk->sk_receive_queue without holding a sk->sk_receive_queue.lock, it can cause a race with rose_accept(). A use-after-free for skb occurs with the following flow. ``` rose_ioctl() -> skb_peek() rose_accept() -> skb_dequeue() -> kfree_skb() ``` Add sk->sk_receive_queue.lock to rose_ioctl() to fix this issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Hyunwoo Kim Link: https://lore.kernel.org/r/20231209100538.GA407321@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Paolo Abeni --- net/rose/af_rose.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 0cc5a4e19900..ecb91ad4ce63 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1315,9 +1315,11 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case TIOCINQ: { struct sk_buff *skb; long amount = 0L; - /* These two are safe on a single CPU system as only user tasks fiddle here */ + + spin_lock_irq(&sk->sk_receive_queue.lock); if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) amount = skb->len; + spin_unlock_irq(&sk->sk_receive_queue.lock); return put_user(amount, (unsigned int __user *) argp); } -- cgit From 43527a0094c10dfbf0d5a2e7979395a38de3ff65 Mon Sep 17 00:00:00 2001 From: Mikhail Khvainitski Date: Tue, 12 Dec 2023 15:31:48 +0200 Subject: HID: lenovo: Restrict detection of patched firmware only to USB cptkbd Commit 46a0a2c96f0f ("HID: lenovo: Detect quirk-free fw on cptkbd and stop applying workaround") introduced a regression for ThinkPad TrackPoint Keyboard II which has similar quirks to cptkbd (so it uses the same workarounds) but slightly different so that there are false-positives during detecting well-behaving firmware. This commit restricts detecting well-behaving firmware to the only model which known to have one and have stable enough quirks to not cause false-positives. Fixes: 46a0a2c96f0f ("HID: lenovo: Detect quirk-free fw on cptkbd and stop applying workaround") Link: https://lore.kernel.org/linux-input/ZXRiiPsBKNasioqH@jekhomev/ Link: https://bbs.archlinux.org/viewtopic.php?pid=2135468#p2135468 Signed-off-by: Mikhail Khvainitski Tested-by: Yauhen Kharuzhy Signed-off-by: Jiri Kosina --- drivers/hid/hid-lenovo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index 7c1b33be9d13..149a3c74346b 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -692,7 +692,8 @@ static int lenovo_event_cptkbd(struct hid_device *hdev, * so set middlebutton_state to 3 * to never apply workaround anymore */ - if (cptkbd_data->middlebutton_state == 1 && + if (hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD && + cptkbd_data->middlebutton_state == 1 && usage->type == EV_REL && (usage->code == REL_X || usage->code == REL_Y)) { cptkbd_data->middlebutton_state = 3; -- cgit From df83a0df820b9b705e51b9499691b0dafb2f4dcb Mon Sep 17 00:00:00 2001 From: Yan Jun Date: Sun, 3 Dec 2023 19:50:58 +0800 Subject: HID: apple: Add "hfd.cn" and "WKB603" to the list of non-apple keyboards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JingZao(京造) WKB603 keyboard is a rebranded product of Jamesdonkey RS2 keyboard, identified as "hfd.cn WKB603" in wired mode, "WKB603" in bluetooth mode. Adding them to the list of non-apple keyboards fixes function key. Signed-off-by: Yan Jun Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index d9e9829b2200..b9c7c0ed7bcc 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -347,6 +347,8 @@ static const struct apple_non_apple_keyboard non_apple_keyboards[] = { { "Hailuck" }, { "Jamesdonkey" }, { "A3R" }, + { "hfd.cn" }, + { "WKB603" }, }; static bool apple_is_non_apple_keyboard(struct hid_device *hdev) -- cgit From 6c02757c936063f0631b4e43fe156f8c8f1f351f Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Mon, 11 Dec 2023 19:25:44 +0800 Subject: jbd2: fix soft lockup in journal_finish_inode_data_buffers() There's issue when do io test: WARN: soft lockup - CPU#45 stuck for 11s! [jbd2/dm-2-8:4170] CPU: 45 PID: 4170 Comm: jbd2/dm-2-8 Kdump: loaded Tainted: G OE Call trace: dump_backtrace+0x0/0x1a0 show_stack+0x24/0x30 dump_stack+0xb0/0x100 watchdog_timer_fn+0x254/0x3f8 __hrtimer_run_queues+0x11c/0x380 hrtimer_interrupt+0xfc/0x2f8 arch_timer_handler_phys+0x38/0x58 handle_percpu_devid_irq+0x90/0x248 generic_handle_irq+0x3c/0x58 __handle_domain_irq+0x68/0xc0 gic_handle_irq+0x90/0x320 el1_irq+0xcc/0x180 queued_spin_lock_slowpath+0x1d8/0x320 jbd2_journal_commit_transaction+0x10f4/0x1c78 [jbd2] kjournald2+0xec/0x2f0 [jbd2] kthread+0x134/0x138 ret_from_fork+0x10/0x18 Analyzed informations from vmcore as follows: (1) There are about 5k+ jbd2_inode in 'commit_transaction->t_inode_list'; (2) Now is processing the 855th jbd2_inode; (3) JBD2 task has TIF_NEED_RESCHED flag; (4) There's no pags in address_space around the 855th jbd2_inode; (5) There are some process is doing drop caches; (6) Mounted with 'nodioread_nolock' option; (7) 128 CPUs; According to informations from vmcore we know 'journal->j_list_lock' spin lock competition is fierce. So journal_finish_inode_data_buffers() maybe process slowly. Theoretically, there is scheduling point in the filemap_fdatawait_range_keep_errors(). However, if inode's address_space has no pages which taged with PAGECACHE_TAG_WRITEBACK, will not call cond_resched(). So may lead to soft lockup. journal_finish_inode_data_buffers filemap_fdatawait_range_keep_errors __filemap_fdatawait_range while (index <= end) nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, PAGECACHE_TAG_WRITEBACK); if (!nr_pages) break; --> If 'nr_pages' is equal zero will break, then will not call cond_resched() for (i = 0; i < nr_pages; i++) wait_on_page_writeback(page); cond_resched(); To solve above issue, add scheduling point in the journal_finish_inode_data_buffers(); Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20231211112544.3879780-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/jbd2/commit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 9bdb377a348f..5e122586e06e 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -270,6 +270,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal, if (!ret) ret = err; } + cond_resched(); spin_lock(&journal->j_list_lock); jinode->i_flags &= ~JI_COMMIT_RUNNING; smp_mb(); -- cgit From 3c0696076aad60a2f04c019761921954579e1b0e Mon Sep 17 00:00:00 2001 From: James Houghton Date: Mon, 4 Dec 2023 17:26:46 +0000 Subject: arm64: mm: Always make sw-dirty PTEs hw-dirty in pte_modify It is currently possible for a userspace application to enter an infinite page fault loop when using HugeTLB pages implemented with contiguous PTEs when HAFDBS is not available. This happens because: 1. The kernel may sometimes write PTEs that are sw-dirty but hw-clean (PTE_DIRTY | PTE_RDONLY | PTE_WRITE). 2. If, during a write, the CPU uses a sw-dirty, hw-clean PTE in handling the memory access on a system without HAFDBS, we will get a page fault. 3. HugeTLB will check if it needs to update the dirty bits on the PTE. For contiguous PTEs, it will check to see if the pgprot bits need updating. In this case, HugeTLB wants to write a sequence of sw-dirty, hw-dirty PTEs, but it finds that all the PTEs it is about to overwrite are all pte_dirty() (pte_sw_dirty() => pte_dirty()), so it thinks no update is necessary. We can get the kernel to write a sw-dirty, hw-clean PTE with the following steps (showing the relevant VMA flags and pgprot bits): i. Create a valid, writable contiguous PTE. VMA vmflags: VM_SHARED | VM_READ | VM_WRITE VMA pgprot bits: PTE_RDONLY | PTE_WRITE PTE pgprot bits: PTE_DIRTY | PTE_WRITE ii. mprotect the VMA to PROT_NONE. VMA vmflags: VM_SHARED VMA pgprot bits: PTE_RDONLY PTE pgprot bits: PTE_DIRTY | PTE_RDONLY iii. mprotect the VMA back to PROT_READ | PROT_WRITE. VMA vmflags: VM_SHARED | VM_READ | VM_WRITE VMA pgprot bits: PTE_RDONLY | PTE_WRITE PTE pgprot bits: PTE_DIRTY | PTE_WRITE | PTE_RDONLY Make it impossible to create a writeable sw-dirty, hw-clean PTE with pte_modify(). Such a PTE should be impossible to create, and there may be places that assume that pte_dirty() implies pte_hw_dirty(). Signed-off-by: James Houghton Fixes: 031e6e6b4e12 ("arm64: hugetlb: Avoid unnecessary clearing in huge_ptep_set_access_flags") Cc: Acked-by: Will Deacon Reviewed-by: Ryan Roberts Link: https://lore.kernel.org/r/20231204172646.2541916-3-jthoughton@google.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b19a8aee684c..79ce70fbb751 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -834,6 +834,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); + /* + * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware + * dirtiness again. + */ + if (pte_sw_dirty(pte)) + pte = pte_mkdirty(pte); return pte; } -- cgit From 19544aa5f5ece80b12315fa68e51fb2ba6f01fa4 Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Tue, 28 Nov 2023 17:02:06 +0530 Subject: drm/amdgpu/jpeg: configure doorbell for each playback Doorbell is configured during start of each playback. v1 - add comment for the doorbell programming change Signed-off-by: Saleemkhan Jamadar Acked-by: Leo Liu Reviewed-by: Veerabadhran Gopalakrishnan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 9df011323d4b..6ede85b28cc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -155,13 +155,6 @@ static int jpeg_v4_0_5_hw_init(void *handle) struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); - - WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, - ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | - VCN_JPEG_DB_CTRL__EN_MASK); - r = amdgpu_ring_test_helper(ring); if (r) return r; @@ -336,6 +329,14 @@ static int jpeg_v4_0_5_start(struct amdgpu_device *adev) if (adev->pm.dpm_enabled) amdgpu_dpm_enable_jpeg(adev, true); + /* doorbell programming is done for every playback */ + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + + WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + /* disable power gating */ r = jpeg_v4_0_5_disable_static_power_gating(adev); if (r) -- cgit From a409c053b0b0cc0fc1af684d0b23bd5ca010c4cb Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Wed, 6 Dec 2023 14:52:25 -0500 Subject: drm/amd/display: Revert "Fix conversions between bytes and KB" [Why & How] HostVMMinPageSize is expected to be in KB according to spec, the checks later down the line reflect this as well. Reviewed-by: Nicholas Kazlauskas Acked-by: Aurabindo Pillai Signed-off-by: Taimur Hassan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 4d1336e5afc2..180f8a98a361 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6329,7 +6329,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.NoOfDPPThisState, mode_lib->ms.dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; @@ -6542,7 +6542,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], mode_lib->ms.MetaRowBytes[j][k], mode_lib->ms.DPTEBytesPerRow[j][k], @@ -7687,7 +7687,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState; @@ -7957,7 +7957,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal; @@ -8699,7 +8699,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; @@ -8805,7 +8805,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, locals->dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); locals->TCalc = 24.0 / locals->DCFCLKDeepSleep; @@ -8995,7 +8995,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; @@ -9240,7 +9240,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, locals->PDEAndMetaPTEBytesFrame[k], locals->MetaRowByte[k], locals->PixelPTEBytesPerRow[k], -- cgit From fb01ab528df324a140058a11e9b25e5efdf9671d Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Wed, 6 Dec 2023 14:52:28 -0500 Subject: drm/amd/display: Populate dtbclk from bounding box dtbclk is unavaliable from pmfw. Try to grab the value from bounding box Reviewed-by: Charlene Liu Acked-by: Aurabindo Pillai Signed-off-by: Fangzhi Zuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 14 +++++++++----- .../gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 5 +++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 39cf1ae3a3e1..f154a3eb1d1a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -124,7 +124,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 600.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 186.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 1, @@ -133,7 +133,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 2, @@ -142,7 +142,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 3, @@ -151,7 +151,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 371.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 4, @@ -160,7 +160,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 417.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, }, .num_states = 5, @@ -348,6 +348,8 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, clock_limits[i].socclk_mhz; dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz = + clock_limits[i].dtbclk_mhz; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = clk_table->num_entries; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels = @@ -360,6 +362,8 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, clk_table->num_entries; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels = clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels = + clk_table->num_entries; } } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index fa8fe5bf7e57..db06a5b749b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -423,8 +423,9 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, } for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels; i++) { - p->in_states->state_array[i].dtbclk_mhz = - dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz; + if (dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz > 0) + p->in_states->state_array[i].dtbclk_mhz = + dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz; } for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels; i++) { -- cgit From e7ab758741672acb21c5d841a9f0309d30e48a06 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 19 Jun 2023 15:04:24 -0500 Subject: drm/amd/display: Disable PSR-SU on Parade 0803 TCON again When screen brightness is rapidly changed and PSR-SU is enabled the display hangs on panels with this TCON even on the latest DCN 3.1.4 microcode (0x8002a81 at this time). This was disabled previously as commit 072030b17830 ("drm/amd: Disable PSR-SU on Parade 0803 TCON") but reverted as commit 1e66a17ce546 ("Revert "drm/amd: Disable PSR-SU on Parade 0803 TCON"") in favor of testing for a new enough microcode (commit cd2e31a9ab93 ("drm/amd/display: Set minimum requirement for using PSR-SU on Phoenix")). As hangs are still happening specifically with this TCON, disable PSR-SU again for it until it can be root caused. Cc: stable@vger.kernel.org Cc: aaron.ma@canonical.com Cc: binli@gnome.org Cc: Marc Rossi Cc: Hamza Mahfooz Signed-off-by: Mario Limonciello Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2046131 Acked-by: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index a522a7c02911..1675314a3ff2 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -839,6 +839,8 @@ bool is_psr_su_specific_panel(struct dc_link *link) ((dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x08) || (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x07))) isPSRSUSupported = false; + else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) + isPSRSUSupported = false; else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1) isPSRSUSupported = true; } -- cgit From 3a0b5a2929fdeda63fc921c2dbed237059acf732 Mon Sep 17 00:00:00 2001 From: Piotr Gardocki Date: Tue, 21 Nov 2023 22:47:15 -0500 Subject: iavf: Introduce new state machines for flow director New states introduced: IAVF_FDIR_FLTR_DIS_REQUEST IAVF_FDIR_FLTR_DIS_PENDING IAVF_FDIR_FLTR_INACTIVE Current FDIR state machines (SM) are not adequate to handle a few scenarios in the link DOWN/UP event, reset event and ntuple-feature. For example, when VF link goes DOWN and comes back UP administratively, the expectation is that previously installed filters should also be restored. But with current SM, filters are not restored. So with new SM, during link DOWN filters are marked as INACTIVE in the iavf list but removed from PF. After link UP, SM will transition from INACTIVE to ADD_REQUEST to restore the filter. Similarly, with VF reset, filters will be removed from the PF, but marked as INACTIVE in the iavf list. Filters will be restored after reset completion. Steps to reproduce: ------------------- 1. Create a VF. Here VF is enp8s0. 2. Assign IP addresses to VF and link partner and ping continuously from remote. Here remote IP is 1.1.1.1. 3. Check default RX Queue of traffic. ethtool -S enp8s0 | grep -E "rx-[[:digit:]]+\.packets" 4. Add filter - change default RX Queue (to 15 here) ethtool -U ens8s0 flow-type ip4 src-ip 1.1.1.1 action 15 loc 5 5. Ensure filter gets added and traffic is received on RX queue 15 now. Link event testing: ------------------- 6. Bring VF link down and up. If traffic flows to configured queue 15, test is success, otherwise it is a failure. Reset event testing: -------------------- 7. Reset the VF. If traffic flows to configured queue 15, test is success, otherwise it is a failure. Fixes: 0dbfbabb840d ("iavf: Add framework to enable ethtool ntuple filters") Signed-off-by: Piotr Gardocki Reviewed-by: Larysa Zaremba Signed-off-by: Ranganatha Rao Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 1 + drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 27 ++++++---- drivers/net/ethernet/intel/iavf/iavf_fdir.h | 15 +++++- drivers/net/ethernet/intel/iavf/iavf_main.c | 48 +++++++++++++---- drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 71 +++++++++++++++++++++++-- 5 files changed, 139 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index e7ab89dc883a..63b45c61cc4a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -292,6 +292,7 @@ struct iavf_adapter { #define IAVF_FLAG_QUEUES_DISABLED BIT(17) #define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18) #define IAVF_FLAG_REINIT_MSIX_NEEDED BIT(20) +#define IAVF_FLAG_FDIR_ENABLED BIT(21) /* duplicates for common code */ #define IAVF_FLAG_DCB_ENABLED 0 /* flags for admin queue service task */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 19cbfe554689..dc499fe7734e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -1061,7 +1061,7 @@ iavf_get_ethtool_fdir_entry(struct iavf_adapter *adapter, struct iavf_fdir_fltr *rule = NULL; int ret = 0; - if (!FDIR_FLTR_SUPPORT(adapter)) + if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) return -EOPNOTSUPP; spin_lock_bh(&adapter->fdir_fltr_lock); @@ -1203,7 +1203,7 @@ iavf_get_fdir_fltr_ids(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd, unsigned int cnt = 0; int val = 0; - if (!FDIR_FLTR_SUPPORT(adapter)) + if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) return -EOPNOTSUPP; cmd->data = IAVF_MAX_FDIR_FILTERS; @@ -1395,7 +1395,7 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx int count = 50; int err; - if (!FDIR_FLTR_SUPPORT(adapter)) + if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) return -EOPNOTSUPP; if (fsp->flow_type & FLOW_MAC_EXT) @@ -1436,12 +1436,16 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx spin_lock_bh(&adapter->fdir_fltr_lock); iavf_fdir_list_add_fltr(adapter, fltr); adapter->fdir_active_fltr++; - fltr->state = IAVF_FDIR_FLTR_ADD_REQUEST; - adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER; + if (adapter->link_up) { + fltr->state = IAVF_FDIR_FLTR_ADD_REQUEST; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER; + } else { + fltr->state = IAVF_FDIR_FLTR_INACTIVE; + } spin_unlock_bh(&adapter->fdir_fltr_lock); - mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); - + if (adapter->link_up) + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); ret: if (err && fltr) kfree(fltr); @@ -1463,7 +1467,7 @@ static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx struct iavf_fdir_fltr *fltr = NULL; int err = 0; - if (!FDIR_FLTR_SUPPORT(adapter)) + if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) return -EOPNOTSUPP; spin_lock_bh(&adapter->fdir_fltr_lock); @@ -1472,6 +1476,11 @@ static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx if (fltr->state == IAVF_FDIR_FLTR_ACTIVE) { fltr->state = IAVF_FDIR_FLTR_DEL_REQUEST; adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER; + } else if (fltr->state == IAVF_FDIR_FLTR_INACTIVE) { + list_del(&fltr->list); + kfree(fltr); + adapter->fdir_active_fltr--; + fltr = NULL; } else { err = -EBUSY; } @@ -1780,7 +1789,7 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, ret = 0; break; case ETHTOOL_GRXCLSRLCNT: - if (!FDIR_FLTR_SUPPORT(adapter)) + if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) break; spin_lock_bh(&adapter->fdir_fltr_lock); cmd->rule_cnt = adapter->fdir_active_fltr; diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h index 9eb9f73f6adf..d31bd923ba8c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h +++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h @@ -6,12 +6,25 @@ struct iavf_adapter; -/* State of Flow Director filter */ +/* State of Flow Director filter + * + * *_REQUEST states are used to mark filter to be sent to PF driver to perform + * an action (either add or delete filter). *_PENDING states are an indication + * that request was sent to PF and the driver is waiting for response. + * + * Both DELETE and DISABLE states are being used to delete a filter in PF. + * The difference is that after a successful response filter in DEL_PENDING + * state is being deleted from VF driver as well and filter in DIS_PENDING state + * is being changed to INACTIVE state. + */ enum iavf_fdir_fltr_state_t { IAVF_FDIR_FLTR_ADD_REQUEST, /* User requests to add filter */ IAVF_FDIR_FLTR_ADD_PENDING, /* Filter pending add by the PF */ IAVF_FDIR_FLTR_DEL_REQUEST, /* User requests to delete filter */ IAVF_FDIR_FLTR_DEL_PENDING, /* Filter pending delete by the PF */ + IAVF_FDIR_FLTR_DIS_REQUEST, /* Filter scheduled to be disabled */ + IAVF_FDIR_FLTR_DIS_PENDING, /* Filter pending disable by the PF */ + IAVF_FDIR_FLTR_INACTIVE, /* Filter inactive on link down */ IAVF_FDIR_FLTR_ACTIVE, /* Filter is active */ }; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index c862ebcd2e39..567435e23936 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1353,18 +1353,20 @@ static void iavf_clear_cloud_filters(struct iavf_adapter *adapter) **/ static void iavf_clear_fdir_filters(struct iavf_adapter *adapter) { - struct iavf_fdir_fltr *fdir, *fdirtmp; + struct iavf_fdir_fltr *fdir; /* remove all Flow Director filters */ spin_lock_bh(&adapter->fdir_fltr_lock); - list_for_each_entry_safe(fdir, fdirtmp, &adapter->fdir_list_head, - list) { + list_for_each_entry(fdir, &adapter->fdir_list_head, list) { if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST) { - list_del(&fdir->list); - kfree(fdir); - adapter->fdir_active_fltr--; - } else { - fdir->state = IAVF_FDIR_FLTR_DEL_REQUEST; + /* Cancel a request, keep filter as inactive */ + fdir->state = IAVF_FDIR_FLTR_INACTIVE; + } else if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING || + fdir->state == IAVF_FDIR_FLTR_ACTIVE) { + /* Disable filters which are active or have a pending + * request to PF to be added + */ + fdir->state = IAVF_FDIR_FLTR_DIS_REQUEST; } } spin_unlock_bh(&adapter->fdir_fltr_lock); @@ -4112,6 +4114,33 @@ static int iavf_setup_tc(struct net_device *netdev, enum tc_setup_type type, } } +/** + * iavf_restore_fdir_filters + * @adapter: board private structure + * + * Restore existing FDIR filters when VF netdev comes back up. + **/ +static void iavf_restore_fdir_filters(struct iavf_adapter *adapter) +{ + struct iavf_fdir_fltr *f; + + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry(f, &adapter->fdir_list_head, list) { + if (f->state == IAVF_FDIR_FLTR_DIS_REQUEST) { + /* Cancel a request, keep filter as active */ + f->state = IAVF_FDIR_FLTR_ACTIVE; + } else if (f->state == IAVF_FDIR_FLTR_DIS_PENDING || + f->state == IAVF_FDIR_FLTR_INACTIVE) { + /* Add filters which are inactive or have a pending + * request to PF to be deleted + */ + f->state = IAVF_FDIR_FLTR_ADD_REQUEST; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER; + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); +} + /** * iavf_open - Called when a network interface is made active * @netdev: network interface device structure @@ -4179,8 +4208,9 @@ static int iavf_open(struct net_device *netdev) spin_unlock_bh(&adapter->mac_vlan_list_lock); - /* Restore VLAN filters that were removed with IFF_DOWN */ + /* Restore filters that were removed with IFF_DOWN */ iavf_restore_filters(adapter); + iavf_restore_fdir_filters(adapter); iavf_configure(adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 64c4443dbef9..2d9366be0ec5 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -1735,8 +1735,8 @@ void iavf_add_fdir_filter(struct iavf_adapter *adapter) **/ void iavf_del_fdir_filter(struct iavf_adapter *adapter) { + struct virtchnl_fdir_del f = {}; struct iavf_fdir_fltr *fdir; - struct virtchnl_fdir_del f; bool process_fltr = false; int len; @@ -1753,11 +1753,16 @@ void iavf_del_fdir_filter(struct iavf_adapter *adapter) list_for_each_entry(fdir, &adapter->fdir_list_head, list) { if (fdir->state == IAVF_FDIR_FLTR_DEL_REQUEST) { process_fltr = true; - memset(&f, 0, len); f.vsi_id = fdir->vc_add_msg.vsi_id; f.flow_id = fdir->flow_id; fdir->state = IAVF_FDIR_FLTR_DEL_PENDING; break; + } else if (fdir->state == IAVF_FDIR_FLTR_DIS_REQUEST) { + process_fltr = true; + f.vsi_id = fdir->vc_add_msg.vsi_id; + f.flow_id = fdir->flow_id; + fdir->state = IAVF_FDIR_FLTR_DIS_PENDING; + break; } } spin_unlock_bh(&adapter->fdir_fltr_lock); @@ -1901,6 +1906,48 @@ static void iavf_netdev_features_vlan_strip_set(struct net_device *netdev, netdev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; } +/** + * iavf_activate_fdir_filters - Reactivate all FDIR filters after a reset + * @adapter: private adapter structure + * + * Called after a reset to re-add all FDIR filters and delete some of them + * if they were pending to be deleted. + */ +static void iavf_activate_fdir_filters(struct iavf_adapter *adapter) +{ + struct iavf_fdir_fltr *f, *ftmp; + bool add_filters = false; + + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry_safe(f, ftmp, &adapter->fdir_list_head, list) { + if (f->state == IAVF_FDIR_FLTR_ADD_REQUEST || + f->state == IAVF_FDIR_FLTR_ADD_PENDING || + f->state == IAVF_FDIR_FLTR_ACTIVE) { + /* All filters and requests have been removed in PF, + * restore them + */ + f->state = IAVF_FDIR_FLTR_ADD_REQUEST; + add_filters = true; + } else if (f->state == IAVF_FDIR_FLTR_DIS_REQUEST || + f->state == IAVF_FDIR_FLTR_DIS_PENDING) { + /* Link down state, leave filters as inactive */ + f->state = IAVF_FDIR_FLTR_INACTIVE; + } else if (f->state == IAVF_FDIR_FLTR_DEL_REQUEST || + f->state == IAVF_FDIR_FLTR_DEL_PENDING) { + /* Delete filters that were pending to be deleted, the + * list on PF is already cleared after a reset + */ + list_del(&f->list); + kfree(f); + adapter->fdir_active_fltr--; + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); + + if (add_filters) + adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER; +} + /** * iavf_virtchnl_completion * @adapter: adapter structure @@ -2078,7 +2125,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, spin_lock_bh(&adapter->fdir_fltr_lock); list_for_each_entry(fdir, &adapter->fdir_list_head, list) { - if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) { + if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING || + fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) { fdir->state = IAVF_FDIR_FLTR_ACTIVE; dev_info(&adapter->pdev->dev, "Failed to del Flow Director filter, error %s\n", iavf_stat_str(&adapter->hw, @@ -2214,6 +2262,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, spin_unlock_bh(&adapter->mac_vlan_list_lock); + iavf_activate_fdir_filters(adapter); + iavf_parse_vf_resource_msg(adapter); /* negotiated VIRTCHNL_VF_OFFLOAD_VLAN_V2, so wait for the @@ -2390,7 +2440,9 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, list_for_each_entry_safe(fdir, fdir_tmp, &adapter->fdir_list_head, list) { if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) { - if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS) { + if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS || + del_fltr->status == + VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST) { dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is deleted\n", fdir->loc); list_del(&fdir->list); @@ -2402,6 +2454,17 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, del_fltr->status); iavf_print_fdir_fltr(adapter, fdir); } + } else if (fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) { + if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS || + del_fltr->status == + VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST) { + fdir->state = IAVF_FDIR_FLTR_INACTIVE; + } else { + fdir->state = IAVF_FDIR_FLTR_ACTIVE; + dev_info(&adapter->pdev->dev, "Failed to disable Flow Director filter with status: %d\n", + del_fltr->status); + iavf_print_fdir_fltr(adapter, fdir); + } } } spin_unlock_bh(&adapter->fdir_fltr_lock); -- cgit From 09d23b8918f9ab0f8114f6b94f2faf8bde3fb52a Mon Sep 17 00:00:00 2001 From: Piotr Gardocki Date: Tue, 21 Nov 2023 22:47:16 -0500 Subject: iavf: Handle ntuple on/off based on new state machines for flow director ntuple-filter feature on/off: Default is on. If turned off, the filters will be removed from both PF and iavf list. The removal is irrespective of current filter state. Steps to reproduce: ------------------- 1. Ensure ntuple is on. ethtool -K enp8s0 ntuple-filters on 2. Create a filter to receive the traffic into non-default rx-queue like 15 and ensure traffic is flowing into queue into 15. Now, turn off ntuple. Traffic should not flow to configured queue 15. It should flow to default RX queue. Fixes: 0dbfbabb840d ("iavf: Add framework to enable ethtool ntuple filters") Signed-off-by: Piotr Gardocki Reviewed-by: Larysa Zaremba Signed-off-by: Ranganatha Rao Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 59 +++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 567435e23936..98116872f6bd 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -4341,6 +4341,49 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) return ret; } +/** + * iavf_disable_fdir - disable Flow Director and clear existing filters + * @adapter: board private structure + **/ +static void iavf_disable_fdir(struct iavf_adapter *adapter) +{ + struct iavf_fdir_fltr *fdir, *fdirtmp; + bool del_filters = false; + + adapter->flags &= ~IAVF_FLAG_FDIR_ENABLED; + + /* remove all Flow Director filters */ + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry_safe(fdir, fdirtmp, &adapter->fdir_list_head, + list) { + if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST || + fdir->state == IAVF_FDIR_FLTR_INACTIVE) { + /* Delete filters not registered in PF */ + list_del(&fdir->list); + kfree(fdir); + adapter->fdir_active_fltr--; + } else if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING || + fdir->state == IAVF_FDIR_FLTR_DIS_REQUEST || + fdir->state == IAVF_FDIR_FLTR_ACTIVE) { + /* Filters registered in PF, schedule their deletion */ + fdir->state = IAVF_FDIR_FLTR_DEL_REQUEST; + del_filters = true; + } else if (fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) { + /* Request to delete filter already sent to PF, change + * state to DEL_PENDING to delete filter after PF's + * response, not set as INACTIVE + */ + fdir->state = IAVF_FDIR_FLTR_DEL_PENDING; + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); + + if (del_filters) { + adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER; + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); + } +} + #define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \ NETIF_F_HW_VLAN_CTAG_TX | \ NETIF_F_HW_VLAN_STAG_RX | \ @@ -4366,6 +4409,13 @@ static int iavf_set_features(struct net_device *netdev, ((netdev->features & NETIF_F_RXFCS) ^ (features & NETIF_F_RXFCS))) iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + if ((netdev->features & NETIF_F_NTUPLE) ^ (features & NETIF_F_NTUPLE)) { + if (features & NETIF_F_NTUPLE) + adapter->flags |= IAVF_FLAG_FDIR_ENABLED; + else + iavf_disable_fdir(adapter); + } + return 0; } @@ -4715,6 +4765,9 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev, features = iavf_fix_netdev_vlan_features(adapter, features); + if (!FDIR_FLTR_SUPPORT(adapter)) + features &= ~NETIF_F_NTUPLE; + return iavf_fix_strip_features(adapter, features); } @@ -4832,6 +4885,12 @@ int iavf_process_config(struct iavf_adapter *adapter) if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + if (FDIR_FLTR_SUPPORT(adapter)) { + netdev->hw_features |= NETIF_F_NTUPLE; + netdev->features |= NETIF_F_NTUPLE; + adapter->flags |= IAVF_FLAG_FDIR_ENABLED; + } + netdev->priv_flags |= IFF_UNICAST_FLT; /* Do not turn on offloads when they are requested to be turned off. -- cgit From 7ae42ef308ed0f6250b36f43e4eeb182ebbe6215 Mon Sep 17 00:00:00 2001 From: Slawomir Laba Date: Wed, 29 Nov 2023 10:35:26 -0500 Subject: iavf: Fix iavf_shutdown to call iavf_remove instead iavf_close Make the flow for pci shutdown be the same to the pci remove. iavf_shutdown was implementing an incomplete version of iavf_remove. It misses several calls to the kernel like iavf_free_misc_irq, iavf_reset_interrupt_capability, iounmap that might break the system on reboot or hibernation. Implement the call of iavf_remove directly in iavf_shutdown to close this gap. Fixes below error messages (dmesg) during shutdown stress tests - [685814.900917] ice 0000:88:00.0: MAC 02:d0:5f:82:43:5d does not exist for VF 0 [685814.900928] ice 0000:88:00.0: MAC 33:33:00:00:00:01 does not exist for VF 0 Reproduction: 1. Create one VF interface: echo 1 > /sys/class/net//device/sriov_numvfs 2. Run live dmesg on the host: dmesg -wH 3. On SUT, script below steps into vf_namespace_assignment.sh <#!/bin/sh> // Remove <>. Git removes # line if= (edit this per VF name) loop=0 while true; do echo test round $loop let loop++ ip netns add ns$loop ip link set dev $if up ip link set dev $if netns ns$loop ip netns exec ns$loop ip link set dev $if up ip netns exec ns$loop ip link set dev $if netns 1 ip netns delete ns$loop done 4. Run the script for at least 1000 iterations on SUT: ./vf_namespace_assignment.sh Expected result: No errors in dmesg. Fixes: 129cf89e5856 ("iavf: rename functions and structs to new name") Signed-off-by: Slawomir Laba Reviewed-by: Michal Swiatkowski Reviewed-by: Ahmed Zaki Reviewed-by: Jesse Brandeburg Co-developed-by: Ranganatha Rao Signed-off-by: Ranganatha Rao Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 72 +++++++++-------------------- 1 file changed, 21 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 98116872f6bd..e8d5b889addc 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -276,27 +276,6 @@ void iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem) kfree(mem->va); } -/** - * iavf_lock_timeout - try to lock mutex but give up after timeout - * @lock: mutex that should be locked - * @msecs: timeout in msecs - * - * Returns 0 on success, negative on failure - **/ -static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) -{ - unsigned int wait, delay = 10; - - for (wait = 0; wait < msecs; wait += delay) { - if (mutex_trylock(lock)) - return 0; - - msleep(delay); - } - - return -1; -} - /** * iavf_schedule_reset - Set the flags and schedule a reset event * @adapter: board private structure @@ -4914,34 +4893,6 @@ int iavf_process_config(struct iavf_adapter *adapter) return 0; } -/** - * iavf_shutdown - Shutdown the device in preparation for a reboot - * @pdev: pci device structure - **/ -static void iavf_shutdown(struct pci_dev *pdev) -{ - struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev); - struct net_device *netdev = adapter->netdev; - - netif_device_detach(netdev); - - if (netif_running(netdev)) - iavf_close(netdev); - - if (iavf_lock_timeout(&adapter->crit_lock, 5000)) - dev_warn(&adapter->pdev->dev, "%s: failed to acquire crit_lock\n", __func__); - /* Prevent the watchdog from running. */ - iavf_change_state(adapter, __IAVF_REMOVE); - adapter->aq_required = 0; - mutex_unlock(&adapter->crit_lock); - -#ifdef CONFIG_PM - pci_save_state(pdev); - -#endif - pci_disable_device(pdev); -} - /** * iavf_probe - Device Initialization Routine * @pdev: PCI device information struct @@ -5152,16 +5103,21 @@ static int __maybe_unused iavf_resume(struct device *dev_d) **/ static void iavf_remove(struct pci_dev *pdev) { - struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev); struct iavf_fdir_fltr *fdir, *fdirtmp; struct iavf_vlan_filter *vlf, *vlftmp; struct iavf_cloud_filter *cf, *cftmp; struct iavf_adv_rss *rss, *rsstmp; struct iavf_mac_filter *f, *ftmp; + struct iavf_adapter *adapter; struct net_device *netdev; struct iavf_hw *hw; - netdev = adapter->netdev; + /* Don't proceed with remove if netdev is already freed */ + netdev = pci_get_drvdata(pdev); + if (!netdev) + return; + + adapter = iavf_pdev_to_adapter(pdev); hw = &adapter->hw; if (test_and_set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) @@ -5273,11 +5229,25 @@ static void iavf_remove(struct pci_dev *pdev) destroy_workqueue(adapter->wq); + pci_set_drvdata(pdev, NULL); + free_netdev(netdev); pci_disable_device(pdev); } +/** + * iavf_shutdown - Shutdown the device in preparation for a reboot + * @pdev: pci device structure + **/ +static void iavf_shutdown(struct pci_dev *pdev) +{ + iavf_remove(pdev); + + if (system_state == SYSTEM_POWER_OFF) + pci_set_power_state(pdev, PCI_D3hot); +} + static SIMPLE_DEV_PM_OPS(iavf_pm_ops, iavf_suspend, iavf_resume); static struct pci_driver iavf_driver = { -- cgit From b65d52ac9c085c0c52dee012a210d4e2f352611b Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sun, 10 Dec 2023 12:52:55 +0800 Subject: qed: Fix a potential use-after-free in qed_cxt_tables_alloc qed_ilt_shadow_alloc() will call qed_ilt_shadow_free() to free p_hwfn->p_cxt_mngr->ilt_shadow on error. However, qed_cxt_tables_alloc() accesses the freed pointer on failure of qed_ilt_shadow_alloc() through calling qed_cxt_mngr_free(), which may lead to use-after-free. Fix this issue by setting p_mngr->ilt_shadow to NULL in qed_ilt_shadow_free(). Fixes: fe56b9e6a8d9 ("qed: Add module with basic common support") Reviewed-by: Przemek Kitszel Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20231210045255.21383-1-dinghao.liu@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 65e20693c549..33f4f58ee51c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -933,6 +933,7 @@ static void qed_ilt_shadow_free(struct qed_hwfn *p_hwfn) p_dma->virt_addr = NULL; } kfree(p_mngr->ilt_shadow); + p_mngr->ilt_shadow = NULL; } static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn, -- cgit From f99cd56230f56c8b6b33713c5be4da5d6766be1f Mon Sep 17 00:00:00 2001 From: Dong Chenchen Date: Sun, 10 Dec 2023 10:02:00 +0800 Subject: net: Remove acked SYN flag from packet in the transmit queue correctly syzkaller report: kernel BUG at net/core/skbuff.c:3452! invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.7.0-rc4-00009-gbee0e7762ad2-dirty #135 RIP: 0010:skb_copy_and_csum_bits (net/core/skbuff.c:3452) Call Trace: icmp_glue_bits (net/ipv4/icmp.c:357) __ip_append_data.isra.0 (net/ipv4/ip_output.c:1165) ip_append_data (net/ipv4/ip_output.c:1362 net/ipv4/ip_output.c:1341) icmp_push_reply (net/ipv4/icmp.c:370) __icmp_send (./include/net/route.h:252 net/ipv4/icmp.c:772) ip_fragment.constprop.0 (./include/linux/skbuff.h:1234 net/ipv4/ip_output.c:592 net/ipv4/ip_output.c:577) __ip_finish_output (net/ipv4/ip_output.c:311 net/ipv4/ip_output.c:295) ip_output (net/ipv4/ip_output.c:427) __ip_queue_xmit (net/ipv4/ip_output.c:535) __tcp_transmit_skb (net/ipv4/tcp_output.c:1462) __tcp_retransmit_skb (net/ipv4/tcp_output.c:3387) tcp_retransmit_skb (net/ipv4/tcp_output.c:3404) tcp_retransmit_timer (net/ipv4/tcp_timer.c:604) tcp_write_timer (./include/linux/spinlock.h:391 net/ipv4/tcp_timer.c:716) The panic issue was trigered by tcp simultaneous initiation. The initiation process is as follows: TCP A TCP B 1. CLOSED CLOSED 2. SYN-SENT --> ... 3. SYN-RECEIVED <-- <-- SYN-SENT 4. ... --> SYN-RECEIVED 5. SYN-RECEIVED --> ... // TCP B: not send challenge ack for ack limit or packet loss // TCP A: close tcp_close tcp_send_fin if (!tskb && tcp_under_memory_pressure(sk)) tskb = skb_rb_last(&sk->tcp_rtx_queue); //pick SYN_ACK packet TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; // set FIN flag 6. FIN_WAIT_1 --> ... // TCP B: send challenge ack to SYN_FIN_ACK 7. ... <-- SYN-RECEIVED //challenge ack // TCP A: 8. FIN_WAIT_1 --> ... // retransmit panic __tcp_retransmit_skb //skb->len=0 tcp_trim_head len = tp->snd_una - TCP_SKB_CB(skb)->seq // len=101-100 __pskb_trim_head skb->data_len -= len // skb->len=-1, wrap around ... ... ip_fragment icmp_glue_bits //BUG_ON If we use tcp_trim_head() to remove acked SYN from packet that contains data or other flags, skb->len will be incorrectly decremented. We can remove SYN flag that has been acked from rtx_queue earlier than tcp_trim_head(), which can fix the problem mentioned above. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Co-developed-by: Eric Dumazet Signed-off-by: Eric Dumazet Signed-off-by: Dong Chenchen Link: https://lore.kernel.org/r/20231210020200.1539875-1-dongchenchen2@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_output.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f5ef15e1d9ac..e3167ad96567 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3293,7 +3293,13 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) if (skb_still_in_host_queue(sk, skb)) return -EBUSY; +start: if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { + if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; + TCP_SKB_CB(skb)->seq++; + goto start; + } if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) { WARN_ON_ONCE(1); return -EINVAL; -- cgit From b3ae7b67b87fed771fa5bf95389df06b0433603e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Dec 2023 11:16:17 -0500 Subject: ring-buffer: Fix writing to the buffer with max_data_size The maximum ring buffer data size is the maximum size of data that can be recorded on the ring buffer. Events must be smaller than the sub buffer data size minus any meta data. This size is checked before trying to allocate from the ring buffer because the allocation assumes that the size will fit on the sub buffer. The maximum size was calculated as the size of a sub buffer page (which is currently PAGE_SIZE minus the sub buffer header) minus the size of the meta data of an individual event. But it missed the possible adding of a time stamp for events that are added long enough apart that the event meta data can't hold the time delta. When an event is added that is greater than the current BUF_MAX_DATA_SIZE minus the size of a time stamp, but still less than or equal to BUF_MAX_DATA_SIZE, the ring buffer would go into an infinite loop, looking for a page that can hold the event. Luckily, there's a check for this loop and after 1000 iterations and a warning is emitted and the ring buffer is disabled. But this should never happen. This can happen when a large event is added first, or after a long period where an absolute timestamp is prefixed to the event, increasing its size by 8 bytes. This passes the check and then goes into the algorithm that causes the infinite loop. For events that are the first event on the sub-buffer, it does not need to add a timestamp, because the sub-buffer itself contains an absolute timestamp, and adding one is redundant. The fix is to check if the event is to be the first event on the sub-buffer, and if it is, then do not add a timestamp. This also fixes 32 bit adding a timestamp when a read of before_stamp or write_stamp is interrupted. There's still no need to add that timestamp if the event is going to be the first event on the sub buffer. Also, if the buffer has "time_stamp_abs" set, then also check if the length plus the timestamp is greater than the BUF_MAX_DATA_SIZE. Link: https://lore.kernel.org/all/20231212104549.58863438@gandalf.local.home/ Link: https://lore.kernel.org/linux-trace-kernel/20231212071837.5fdd6c13@gandalf.local.home Link: https://lore.kernel.org/linux-trace-kernel/20231212111617.39e02849@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: a4543a2fa9ef3 ("ring-buffer: Get timestamp after event is allocated") Fixes: 58fbc3c63275c ("ring-buffer: Consolidate add_timestamp to remove some branches") Reported-by: Kent Overstreet # (on IRC) Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8d2a4f00eca9..b8986f82eccf 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3579,7 +3579,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, * absolute timestamp. * Don't bother if this is the start of a new page (w == 0). */ - if (unlikely(!a_ok || !b_ok || (info->before != info->after && w))) { + if (!w) { + /* Use the sub-buffer timestamp */ + info->delta = 0; + } else if (unlikely(!a_ok || !b_ok || info->before != info->after)) { info->add_timestamp |= RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND; info->length += RB_LEN_TIME_EXTEND; } else { @@ -3737,6 +3740,8 @@ rb_reserve_next_event(struct trace_buffer *buffer, if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) { add_ts_default = RB_ADD_STAMP_ABSOLUTE; info.length += RB_LEN_TIME_EXTEND; + if (info.length > BUF_MAX_DATA_SIZE) + goto out_fail; } else { add_ts_default = RB_ADD_STAMP_NONE; } -- cgit From b55b0a0d7c4aa2dac3579aa7e6802d1f57445096 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Sat, 9 Dec 2023 17:10:58 -0500 Subject: tracing: Have large events show up as '[LINE TOO BIG]' instead of nothing If a large event was added to the ring buffer that is larger than what the trace_seq can handle, it just drops the output: ~# cat /sys/kernel/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 2/2 #P:8 # # _-----=> irqs-off/BH-disabled # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / _-=> migrate-disable # |||| / delay # TASK-PID CPU# ||||| TIMESTAMP FUNCTION # | | | ||||| | | <...>-859 [001] ..... 141.118951: tracing_mark_write <...>-859 [001] ..... 141.148201: tracing_mark_write: 78901234 Instead, catch this case and add some context: ~# cat /sys/kernel/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 2/2 #P:8 # # _-----=> irqs-off/BH-disabled # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / _-=> migrate-disable # |||| / delay # TASK-PID CPU# ||||| TIMESTAMP FUNCTION # | | | ||||| | | <...>-852 [001] ..... 121.550551: tracing_mark_write[LINE TOO BIG] <...>-852 [001] ..... 121.550581: tracing_mark_write: 78901234 This now emulates the same output as trace_pipe. Link: https://lore.kernel.org/linux-trace-kernel/20231209171058.78c1a026@gandalf.local.home Cc: Mark Rutland Cc: Mathieu Desnoyers Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fbcd3bafb93e..aa8f99f3e5de 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4722,7 +4722,11 @@ static int s_show(struct seq_file *m, void *v) iter->leftover = ret; } else { - print_trace_line(iter); + ret = print_trace_line(iter); + if (ret == TRACE_TYPE_PARTIAL_LINE) { + iter->seq.full = 0; + trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); + } ret = trace_print_seq(m, &iter->seq); /* * If we overflow the seq_file buffer, then it will -- cgit From 5eaf7f0589c0d88178f0fbeebe0e0b7108258707 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Sun, 10 Dec 2023 21:35:34 +0000 Subject: eventfs: Fix events beyond NAME_MAX blocking tasks Eventfs uses simple_lookup(), however, it will fail if the name of the entry is beyond NAME_MAX length. When this error is encountered, eventfs still tries to create dentries instead of skipping the dentry creation. When the dentry is attempted to be created in this state d_wait_lookup() will loop forever, waiting for the lookup to be removed. Fix eventfs to return the error in simple_lookup() back to the caller instead of continuing to try to create the dentry. Link: https://lore.kernel.org/linux-trace-kernel/20231210213534.497-1-beaub@linux.microsoft.com Fixes: 63940449555e ("eventfs: Implement eventfs lookup, read, open functions") Link: https://lore.kernel.org/linux-trace-kernel/20231208183601.GA46-beaub@linux.microsoft.com/ Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 0b90869fd805..43e237864a42 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -546,6 +546,8 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, if (strcmp(ei_child->name, name) != 0) continue; ret = simple_lookup(dir, dentry, flags); + if (IS_ERR(ret)) + goto out; create_dir_dentry(ei, ei_child, ei_dentry, true); created = true; break; @@ -568,6 +570,8 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, if (r <= 0) continue; ret = simple_lookup(dir, dentry, flags); + if (IS_ERR(ret)) + goto out; create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops, true); break; -- cgit From 17d801758157bec93f26faaf5ff1a8b9a552d67a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Sun, 10 Dec 2023 22:12:50 -0500 Subject: ring-buffer: Fix memory leak of free page Reading the ring buffer does a swap of a sub-buffer within the ring buffer with a empty sub-buffer. This allows the reader to have full access to the content of the sub-buffer that was swapped out without having to worry about contention with the writer. The readers call ring_buffer_alloc_read_page() to allocate a page that will be used to swap with the ring buffer. When the code is finished with the reader page, it calls ring_buffer_free_read_page(). Instead of freeing the page, it stores it as a spare. Then next call to ring_buffer_alloc_read_page() will return this spare instead of calling into the memory management system to allocate a new page. Unfortunately, on freeing of the ring buffer, this spare page is not freed, and causes a memory leak. Link: https://lore.kernel.org/linux-trace-kernel/20231210221250.7b9cc83c@rorschach.local.home Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: 73a757e63114d ("ring-buffer: Return reader page back into existing ring buffer") Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b8986f82eccf..dcd47895b424 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1787,6 +1787,8 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) free_buffer_page(bpage); } + free_page((unsigned long)cpu_buffer->free_page); + kfree(cpu_buffer); } -- cgit From d06aff1cb13d2a0d52b48e605462518149c98c81 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Sun, 10 Dec 2023 22:54:47 -0500 Subject: tracing: Update snapshot buffer on resize if it is allocated The snapshot buffer is to mimic the main buffer so that when a snapshot is needed, the snapshot and main buffer are swapped. When the snapshot buffer is allocated, it is set to the minimal size that the ring buffer may be at and still functional. When it is allocated it becomes the same size as the main ring buffer, and when the main ring buffer changes in size, it should do. Currently, the resize only updates the snapshot buffer if it's used by the current tracer (ie. the preemptirqsoff tracer). But it needs to be updated anytime it is allocated. When changing the size of the main buffer, instead of looking to see if the current tracer is utilizing the snapshot buffer, just check if it is allocated to know if it should be updated or not. Also fix typo in comment just above the code change. Link: https://lore.kernel.org/linux-trace-kernel/20231210225447.48476a6a@rorschach.local.home Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: ad909e21bbe69 ("tracing: Add internal tracing_snapshot() functions") Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index aa8f99f3e5de..6c79548f9574 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6348,7 +6348,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, if (!tr->array_buffer.buffer) return 0; - /* Do not allow tracing while resizng ring buffer */ + /* Do not allow tracing while resizing ring buffer */ tracing_stop_tr(tr); ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); @@ -6356,7 +6356,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, goto out_start; #ifdef CONFIG_TRACER_MAX_TRACE - if (!tr->current_trace->use_max_tr) + if (!tr->allocated_snapshot) goto out; ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); -- cgit From 41db6f99b5489a0d2ef26afe816ef0c6118d1d47 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Mon, 11 Dec 2023 06:27:58 +0000 Subject: net: ena: Destroy correct number of xdp queues upon failure The ena_setup_and_create_all_xdp_queues() function freed all the resources upon failure, after creating only xdp_num_queues queues, instead of freeing just the created ones. In this patch, the only resources that are freed, are the ones allocated right before the failure occurs. Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") Signed-off-by: Shahar Itzko Signed-off-by: David Arinzon Link: https://lore.kernel.org/r/20231211062801.27891-2-darinzon@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index b5bca4814830..9884ef38f18a 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -74,6 +74,8 @@ static void ena_unmap_tx_buff(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info); static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, int first_index, int count); +static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, int count); /* Increase a stat by cnt while holding syncp seqlock on 32bit machines */ static void ena_increase_stat(u64 *statp, u64 cnt, @@ -457,23 +459,22 @@ static void ena_init_all_xdp_queues(struct ena_adapter *adapter) static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter) { + u32 xdp_first_ring = adapter->xdp_first_ring; + u32 xdp_num_queues = adapter->xdp_num_queues; int rc = 0; - rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring, - adapter->xdp_num_queues); + rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); if (rc) goto setup_err; - rc = ena_create_io_tx_queues_in_range(adapter, - adapter->xdp_first_ring, - adapter->xdp_num_queues); + rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues); if (rc) goto create_err; return 0; create_err: - ena_free_all_io_tx_resources(adapter); + ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); setup_err: return rc; } -- cgit From 505b1a88d311ff6f8c44a34f94e3be21745cce6f Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Mon, 11 Dec 2023 06:27:59 +0000 Subject: net: ena: Fix xdp drops handling due to multibuf packets Current xdp code drops packets larger than ENA_XDP_MAX_MTU. This is an incorrect condition since the problem is not the size of the packet, rather the number of buffers it contains. This commit: 1. Identifies and drops XDP multi-buffer packets at the beginning of the function. 2. Increases the xdp drop statistic when this drop occurs. 3. Adds a one-time print that such drops are happening to give better indication to the user. Fixes: 838c93dc5449 ("net: ena: implement XDP drop support") Signed-off-by: Arthur Kiyanovski Signed-off-by: David Arinzon Link: https://lore.kernel.org/r/20231211062801.27891-3-darinzon@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 9884ef38f18a..c7bc5720d16d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1672,20 +1672,23 @@ static void ena_set_rx_hash(struct ena_ring *rx_ring, } } -static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) +static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp, u16 num_descs) { struct ena_rx_buffer *rx_info; int ret; + /* XDP multi-buffer packets not supported */ + if (unlikely(num_descs > 1)) { + netdev_err_once(rx_ring->adapter->netdev, + "xdp: dropped unsupported multi-buffer packets\n"); + ena_increase_stat(&rx_ring->rx_stats.xdp_drop, 1, &rx_ring->syncp); + return ENA_XDP_DROP; + } + rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; xdp_prepare_buff(xdp, page_address(rx_info->page), rx_info->buf_offset, rx_ring->ena_bufs[0].len, false); - /* If for some reason we received a bigger packet than - * we expect, then we simply drop it - */ - if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU)) - return ENA_XDP_DROP; ret = ena_xdp_execute(rx_ring, xdp); @@ -1754,7 +1757,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, ena_rx_ctx.l4_proto, ena_rx_ctx.hash); if (ena_xdp_present_ring(rx_ring)) - xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp); + xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp, ena_rx_ctx.descs); /* allocate skb and fill it */ if (xdp_verdict == ENA_XDP_PASS) -- cgit From d760117060cf2e90b5c59c5492cab179a4dbce01 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Mon, 11 Dec 2023 06:28:00 +0000 Subject: net: ena: Fix DMA syncing in XDP path when SWIOTLB is on This patch fixes two issues: Issue 1 ------- Description ``````````` Current code does not call dma_sync_single_for_cpu() to sync data from the device side memory to the CPU side memory before the XDP code path uses the CPU side data. This causes the XDP code path to read the unset garbage data in the CPU side memory, resulting in incorrect handling of the packet by XDP. Solution ```````` 1. Add a call to dma_sync_single_for_cpu() before the XDP code starts to use the data in the CPU side memory. 2. The XDP code verdict can be XDP_PASS, in which case there is a fallback to the non-XDP code, which also calls dma_sync_single_for_cpu(). To avoid calling dma_sync_single_for_cpu() twice: 2.1. Put the dma_sync_single_for_cpu() in the code in such a place where it happens before XDP and non-XDP code. 2.2. Remove the calls to dma_sync_single_for_cpu() in the non-XDP code for the first buffer only (rx_copybreak and non-rx_copybreak cases), since the new call that was added covers these cases. The call to dma_sync_single_for_cpu() for the second buffer and on stays because only the first buffer is handled by the newly added dma_sync_single_for_cpu(). And there is no need for special handling of the second buffer and on for the XDP path since currently the driver supports only single buffer packets. Issue 2 ------- Description ``````````` In case the XDP code forwarded the packet (ENA_XDP_FORWARDED), ena_unmap_rx_buff_attrs() is called with attrs set to 0. This means that before unmapping the buffer, the internal function dma_unmap_page_attrs() will also call dma_sync_single_for_cpu() on the whole buffer (not only on the data part of it). This sync is both wasteful (since a sync was already explicitly called before) and also causes a bug, which will be explained using the below diagram. The following diagram shows the flow of events causing the bug. The order of events is (1)-(4) as shown in the diagram. CPU side memory area (3)convert_to_xdp_frame() initializes the headroom with xdpf metadata || \/ ___________________________________ | | 0 | V 4K --------------------------------------------------------------------- | xdpf->data | other xdpf | < data > | tailroom ||...| | | fields | | GARBAGE || | --------------------------------------------------------------------- /\ /\ || || (4)ena_unmap_rx_buff_attrs() calls (2)dma_sync_single_for_cpu() dma_sync_single_for_cpu() on the copies data from device whole buffer page, overwriting side to CPU side memory the xdpf->data with GARBAGE. || 0 4K --------------------------------------------------------------------- | headroom | < data > | tailroom ||...| | GARBAGE | | GARBAGE || | --------------------------------------------------------------------- Device side memory area /\ || (1) device writes RX packet data After the call to ena_unmap_rx_buff_attrs() in (4), the xdpf->data becomes corrupted, and so when it is later accessed in ena_clean_xdp_irq()->xdp_return_frame(), it causes a page fault, crashing the kernel. Solution ```````` Explicitly tell ena_unmap_rx_buff_attrs() not to call dma_sync_single_for_cpu() by passing it the ENA_DMA_ATTR_SKIP_CPU_SYNC flag. Fixes: f7d625adeb7b ("net: ena: Add dynamic recycling mechanism for rx buffers") Signed-off-by: Arthur Kiyanovski Signed-off-by: David Arinzon Link: https://lore.kernel.org/r/20231211062801.27891-4-darinzon@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index c7bc5720d16d..c44c44e26ddf 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1493,11 +1493,6 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, if (unlikely(!skb)) return NULL; - /* sync this buffer for CPU use */ - dma_sync_single_for_cpu(rx_ring->dev, - dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, - len, - DMA_FROM_DEVICE); skb_copy_to_linear_data(skb, buf_addr + buf_offset, len); dma_sync_single_for_device(rx_ring->dev, dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, @@ -1516,17 +1511,10 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom); - pre_reuse_paddr = dma_unmap_addr(&rx_info->ena_buf, paddr); - /* If XDP isn't loaded try to reuse part of the RX buffer */ reuse_rx_buf_page = !is_xdp_loaded && ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset); - dma_sync_single_for_cpu(rx_ring->dev, - pre_reuse_paddr + pkt_offset, - len, - DMA_FROM_DEVICE); - if (!reuse_rx_buf_page) ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC); @@ -1723,6 +1711,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, int xdp_flags = 0; int total_len = 0; int xdp_verdict; + u8 pkt_offset; int rc = 0; int i; @@ -1749,13 +1738,19 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, /* First descriptor might have an offset set by the device */ rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; - rx_info->buf_offset += ena_rx_ctx.pkt_offset; + pkt_offset = ena_rx_ctx.pkt_offset; + rx_info->buf_offset += pkt_offset; netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n", rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, ena_rx_ctx.l4_proto, ena_rx_ctx.hash); + dma_sync_single_for_cpu(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, + rx_ring->ena_bufs[0].len, + DMA_FROM_DEVICE); + if (ena_xdp_present_ring(rx_ring)) xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp, ena_rx_ctx.descs); @@ -1781,7 +1776,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, if (xdp_verdict & ENA_XDP_FORWARDED) { ena_unmap_rx_buff_attrs(rx_ring, &rx_ring->rx_buffer_info[req_id], - 0); + DMA_ATTR_SKIP_CPU_SYNC); rx_ring->rx_buffer_info[req_id].page = NULL; } } -- cgit From 4ab138ca0a340e6d6e7a6a9bd5004bd8f83127ca Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Mon, 11 Dec 2023 06:28:01 +0000 Subject: net: ena: Fix XDP redirection error When sending TX packets, the meta descriptor can be all zeroes as no meta information is required (as in XDP). This patch removes the validity check, as when `disable_meta_caching` is enabled, such TX packets will be dropped otherwise. Fixes: 0e3a3f6dacf0 ("net: ena: support new LLQ acceleration mode") Signed-off-by: Shay Agroskin Signed-off-by: David Arinzon Link: https://lore.kernel.org/r/20231211062801.27891-5-darinzon@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_eth_com.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c index 3d6f0a466a9e..f9f886289b97 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -328,9 +328,6 @@ static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, * compare it to the stored version, just create the meta */ if (io_sq->disable_meta_caching) { - if (unlikely(!ena_tx_ctx->meta_valid)) - return -EINVAL; - *have_meta = true; return ena_com_create_meta(io_sq, ena_meta); } -- cgit From 65c95f78917ea6fa7ff189a2c19879c4fe161873 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 11 Dec 2023 09:37:58 +0100 Subject: dpll: sanitize possible null pointer dereference in dpll_pin_parent_pin_set() User may not pass DPLL_A_PIN_STATE attribute in the pin set operation message. Sanitize that by checking if the attr pointer is not null and process the passed state attribute value only in that case. Reported-by: Xingyuan Mo Fixes: 9d71b54b65b1 ("dpll: netlink: Add DPLL framework base functions") Signed-off-by: Jiri Pirko Acked-by: Vadim Fedorenko Link: https://lore.kernel.org/r/20231211083758.1082853-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- drivers/dpll/dpll_netlink.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 442a0ebeb953..ce7cf736f020 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -925,7 +925,6 @@ dpll_pin_parent_pin_set(struct dpll_pin *pin, struct nlattr *parent_nest, struct netlink_ext_ack *extack) { struct nlattr *tb[DPLL_A_PIN_MAX + 1]; - enum dpll_pin_state state; u32 ppin_idx; int ret; @@ -936,10 +935,14 @@ dpll_pin_parent_pin_set(struct dpll_pin *pin, struct nlattr *parent_nest, return -EINVAL; } ppin_idx = nla_get_u32(tb[DPLL_A_PIN_PARENT_ID]); - state = nla_get_u32(tb[DPLL_A_PIN_STATE]); - ret = dpll_pin_on_pin_state_set(pin, ppin_idx, state, extack); - if (ret) - return ret; + + if (tb[DPLL_A_PIN_STATE]) { + enum dpll_pin_state state = nla_get_u32(tb[DPLL_A_PIN_STATE]); + + ret = dpll_pin_on_pin_state_set(pin, ppin_idx, state, extack); + if (ret) + return ret; + } return 0; } -- cgit From c41bd2514184d75db087fe4c1221237fb7922875 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Wed, 29 Nov 2023 22:04:09 +0000 Subject: kexec: drop dependency on ARCH_SUPPORTS_KEXEC from CRASH_DUMP In commit f8ff23429c62 ("kernel/Kconfig.kexec: drop select of KEXEC for CRASH_DUMP") we tried to fix a config regression, where CONFIG_CRASH_DUMP required CONFIG_KEXEC. However, it was not enough at least for arm64 platforms. While further testing the patch with our arm64 config I noticed that CONFIG_CRASH_DUMP is unavailable in menuconfig. This is because CONFIG_CRASH_DUMP still depends on the new CONFIG_ARCH_SUPPORTS_KEXEC introduced in commit 91506f7e5d21 ("arm64/kexec: refactor for kernel/Kconfig.kexec") and on arm64 CONFIG_ARCH_SUPPORTS_KEXEC requires CONFIG_PM_SLEEP_SMP=y, which in turn requires either CONFIG_SUSPEND=y or CONFIG_HIBERNATION=y neither of which are set in our config. Given that we already established that CONFIG_KEXEC (which is a switch for kexec system call itself) is not required for CONFIG_CRASH_DUMP drop CONFIG_ARCH_SUPPORTS_KEXEC dependency as well. The arm64 kernel builds just fine with CONFIG_CRASH_DUMP=y and with both CONFIG_KEXEC=n and CONFIG_KEXEC_FILE=n after f8ff23429c62 ("kernel/Kconfig.kexec: drop select of KEXEC for CRASH_DUMP") and this patch are applied given that the necessary shared bits are included via CONFIG_KEXEC_CORE dependency. [bhe@redhat.com: don't export some symbols when CONFIG_MMU=n] Link: https://lkml.kernel.org/r/ZW03ODUKGGhP1ZGU@MiWiFi-R3L-srv [bhe@redhat.com: riscv, kexec: fix dependency of two items] Link: https://lkml.kernel.org/r/ZW04G/SKnhbE5mnX@MiWiFi-R3L-srv Link: https://lkml.kernel.org/r/20231129220409.55006-1-ignat@cloudflare.com Fixes: 91506f7e5d21 ("arm64/kexec: refactor for kernel/Kconfig.kexec") Signed-off-by: Ignat Korchagin Signed-off-by: Baoquan He Acked-by: Baoquan He Cc: Alexander Gordeev Cc: # 6.6+: f8ff234: kernel/Kconfig.kexec: drop select of KEXEC for CRASH_DUMP Cc: # 6.6+ Signed-off-by: Andrew Morton --- arch/riscv/Kconfig | 4 ++-- arch/riscv/kernel/crash_core.c | 4 +++- kernel/Kconfig.kexec | 1 - 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 95a2a06acc6a..24c1799e2ec4 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -685,7 +685,7 @@ config RISCV_BOOT_SPINWAIT If unsure what to do here, say N. config ARCH_SUPPORTS_KEXEC - def_bool MMU + def_bool y config ARCH_SELECTS_KEXEC def_bool y @@ -693,7 +693,7 @@ config ARCH_SELECTS_KEXEC select HOTPLUG_CPU if SMP config ARCH_SUPPORTS_KEXEC_FILE - def_bool 64BIT && MMU + def_bool 64BIT config ARCH_SELECTS_KEXEC_FILE def_bool y diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c index 55f1d7856b54..8706736fd4e2 100644 --- a/arch/riscv/kernel/crash_core.c +++ b/arch/riscv/kernel/crash_core.c @@ -5,17 +5,19 @@ void arch_crash_save_vmcoreinfo(void) { - VMCOREINFO_NUMBER(VA_BITS); VMCOREINFO_NUMBER(phys_ram_base); vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET); vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); +#ifdef CONFIG_MMU + VMCOREINFO_NUMBER(VA_BITS); vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); #ifdef CONFIG_64BIT vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); +#endif #endif vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 1cc3b1c595d7..2fd510256604 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -94,7 +94,6 @@ config KEXEC_JUMP config CRASH_DUMP bool "kernel crash dumps" depends on ARCH_SUPPORTS_CRASH_DUMP - depends on ARCH_SUPPORTS_KEXEC select CRASH_CORE select KEXEC_CORE help -- cgit From ac88ff6b9d7dea9f0907c86bdae204dde7d5c0e6 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 5 Dec 2023 11:02:55 +0800 Subject: riscv: fix VMALLOC_START definition When below config items are set, compiler complained: -------------------- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y ...... ----------------------- ------------------------------------------------------------------- arch/riscv/kernel/crash_core.c: In function 'arch_crash_save_vmcoreinfo': arch/riscv/kernel/crash_core.c:11:58: warning: format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'int' [-Wformat=] 11 | vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); | ~~^ | | | long unsigned int | %x ---------------------------------------------------------------------- This is because on riscv macro VMALLOC_START has different type when CONFIG_MMU is set or unset. arch/riscv/include/asm/pgtable.h: -------------------------------------------------- Changing it to _AC(0, UL) in case CONFIG_MMU=n can fix the warning. Link: https://lkml.kernel.org/r/ZW7OsX4zQRA3mO4+@MiWiFi-R3L-srv Signed-off-by: Baoquan He Reported-by: Randy Dunlap Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Cc: Eric DeVolder Cc: Ignat Korchagin Cc: Stephen Rothwell Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Signed-off-by: Andrew Morton --- arch/riscv/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 294044429e8e..ab00235b018f 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -899,7 +899,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL #define TASK_SIZE 0xffffffffUL -#define VMALLOC_START 0 +#define VMALLOC_START _AC(0, UL) #define VMALLOC_END TASK_SIZE #endif /* !CONFIG_MMU */ -- cgit From d3bb89ea9c13e5a98d2b7a0ba8e50a77893132cb Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 7 Dec 2023 23:25:25 +0800 Subject: mm: fix VMA heap bounds checking After converting selinux to VMA heap check helper, the gcl triggers an execheap SELinux denial, which is caused by a changed logic check. Previously selinux only checked that the VMA range was within the VMA heap range, and the implementation checks the intersection between the two ranges, but the corner case (vm_end=start_brk, brk=vm_start) isn't handled correctly. Since commit 11250fd12eb8 ("mm: factor out VMA stack and heap checks") was only a function extraction, it seems that the issue was introduced by commit 0db0c01b53a1 ("procfs: fix /proc//maps heap check"). Let's fix above corner cases, meanwhile, correct the wrong indentation of the stack and heap check helpers. Fixes: 11250fd12eb8 ("mm: factor out VMA stack and heap checks") Signed-off-by: Kefeng Wang Reported-by: Ondrej Mosnacek Closes: https://lore.kernel.org/selinux/CAFqZXNv0SVT0fkOK6neP9AXbj3nxJ61JAY4+zJzvxqJaeuhbFw@mail.gmail.com/ Tested-by: Ondrej Mosnacek Link: https://lkml.kernel.org/r/20231207152525.2607420-1-wangkefeng.wang@huawei.com Cc: David Hildenbrand Cc: Paul Moore Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton --- include/linux/mm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 418d26608ece..da5219b48d52 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -886,8 +886,8 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma) */ static inline bool vma_is_initial_heap(const struct vm_area_struct *vma) { - return vma->vm_start <= vma->vm_mm->brk && - vma->vm_end >= vma->vm_mm->start_brk; + return vma->vm_start < vma->vm_mm->brk && + vma->vm_end > vma->vm_mm->start_brk; } /* @@ -901,8 +901,8 @@ static inline bool vma_is_initial_stack(const struct vm_area_struct *vma) * its "stack". It's not even well-defined for programs written * languages like Go. */ - return vma->vm_start <= vma->vm_mm->start_stack && - vma->vm_end >= vma->vm_mm->start_stack; + return vma->vm_start <= vma->vm_mm->start_stack && + vma->vm_end >= vma->vm_mm->start_stack; } static inline bool vma_is_temporary_stack(struct vm_area_struct *vma) -- cgit From a6fcd57cf2df409d35e9225b8dbad6f937b28df0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 6 Dec 2023 11:35:58 +0100 Subject: selftests/mm: cow: print ksft header before printing anything else Doing a ksft_print_msg() before the ksft_print_header() seems to confuse the ksft framework in a strange way: running the test on the cmdline results in the expected output. But piping the output somewhere else, results in some odd output, whereby we repeatedly get the same info printed: # [INFO] detected THP size: 2048 KiB # [INFO] detected hugetlb page size: 2048 KiB # [INFO] detected hugetlb page size: 1048576 KiB # [INFO] huge zeropage is enabled TAP version 13 1..190 # [INFO] Anonymous memory tests in private mappings # [RUN] Basic COW after fork() ... with base page # [INFO] detected THP size: 2048 KiB # [INFO] detected hugetlb page size: 2048 KiB # [INFO] detected hugetlb page size: 1048576 KiB # [INFO] huge zeropage is enabled TAP version 13 1..190 # [INFO] Anonymous memory tests in private mappings # [RUN] Basic COW after fork() ... with base page ok 1 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped out base page # [INFO] detected THP size: 2048 KiB # [INFO] detected hugetlb page size: 2048 KiB # [INFO] detected hugetlb page size: 1048576 KiB # [INFO] huge zeropage is enabled Doing the ksft_print_header() first seems to resolve that and gives us the output we expect: TAP version 13 # [INFO] detected THP size: 2048 KiB # [INFO] detected hugetlb page size: 2048 KiB # [INFO] detected hugetlb page size: 1048576 KiB # [INFO] huge zeropage is enabled 1..190 # [INFO] Anonymous memory tests in private mappings # [RUN] Basic COW after fork() ... with base page ok 1 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped out base page ok 2 No leak from parent into child # [RUN] Basic COW after fork() ... with THP ok 3 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped-out THP ok 4 No leak from parent into child # [RUN] Basic COW after fork() ... with PTE-mapped THP ok 5 No leak from parent into child Link: https://lkml.kernel.org/r/20231206103558.38040-1-david@redhat.com Fixes: f4b5fd6946e2 ("selftests/vm: anon_cow: THP tests") Signed-off-by: David Hildenbrand Reported-by: Nico Pache Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 7324ce5363c0..6f2f83990441 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -1680,6 +1680,8 @@ int main(int argc, char **argv) { int err; + ksft_print_header(); + pagesize = getpagesize(); thpsize = read_pmd_pagesize(); if (thpsize) @@ -1689,7 +1691,6 @@ int main(int argc, char **argv) ARRAY_SIZE(hugetlbsizes)); detect_huge_zeropage(); - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); -- cgit From 6376a824595607e99d032a39ba3394988b4fce96 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 8 Dec 2023 17:50:18 +0000 Subject: mm/damon/core: make damon_start() waits until kdamond_fn() starts The cleanup tasks of kdamond threads including reset of corresponding DAMON context's ->kdamond field and decrease of global nr_running_ctxs counter is supposed to be executed by kdamond_fn(). However, commit 0f91d13366a4 ("mm/damon: simplify stop mechanism") made neither damon_start() nor damon_stop() ensure the corresponding kdamond has started the execution of kdamond_fn(). As a result, the cleanup can be skipped if damon_stop() is called fast enough after the previous damon_start(). Especially the skipped reset of ->kdamond could cause a use-after-free. Fix it by waiting for start of kdamond_fn() execution from damon_start(). Link: https://lkml.kernel.org/r/20231208175018.63880-1-sj@kernel.org Fixes: 0f91d13366a4 ("mm/damon: simplify stop mechanism") Signed-off-by: SeongJae Park Reported-by: Jakub Acs Cc: Changbin Du Cc: Jakub Acs Cc: # 5.15.x Signed-off-by: Andrew Morton --- include/linux/damon.h | 2 ++ mm/damon/core.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index ab2f17d9926b..e00ddf1ed39c 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -559,6 +559,8 @@ struct damon_ctx { * update */ unsigned long next_ops_update_sis; + /* for waiting until the execution of the kdamond_fn is started */ + struct completion kdamond_started; /* public: */ struct task_struct *kdamond; diff --git a/mm/damon/core.c b/mm/damon/core.c index ce1562783e7e..3a05e71509b9 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -445,6 +445,8 @@ struct damon_ctx *damon_new_ctx(void) if (!ctx) return NULL; + init_completion(&ctx->kdamond_started); + ctx->attrs.sample_interval = 5 * 1000; ctx->attrs.aggr_interval = 100 * 1000; ctx->attrs.ops_update_interval = 60 * 1000 * 1000; @@ -668,11 +670,14 @@ static int __damon_start(struct damon_ctx *ctx) mutex_lock(&ctx->kdamond_lock); if (!ctx->kdamond) { err = 0; + reinit_completion(&ctx->kdamond_started); ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", nr_running_ctxs); if (IS_ERR(ctx->kdamond)) { err = PTR_ERR(ctx->kdamond); ctx->kdamond = NULL; + } else { + wait_for_completion(&ctx->kdamond_started); } } mutex_unlock(&ctx->kdamond_lock); @@ -1433,6 +1438,7 @@ static int kdamond_fn(void *data) pr_debug("kdamond (%d) starts\n", current->pid); + complete(&ctx->kdamond_started); kdamond_init_intervals_sis(ctx); if (ctx->ops.init) -- cgit From 655fc6cd45521aba4a21c6e607533f1a21e06c2e Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 8 Dec 2023 15:30:32 +0800 Subject: loongarch, kexec: change dependency of object files Patch series "kexec: fix the incorrect ifdeffery and dependency of CONFIG_KEXEC". The select of KEXEC for CRASH_DUMP in kernel/Kconfig.kexec will be dropped, then compiling errors will be triggered if below config items are set: === CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y === E.g on mips, below link error are seen: -------------------------------------------------------------------- mipsel-linux-ld: kernel/kexec_core.o: in function `kimage_free': kernel/kexec_core.c:(.text+0x2200): undefined reference to `machine_kexec_cleanup' mipsel-linux-ld: kernel/kexec_core.o: in function `__crash_kexec': kernel/kexec_core.c:(.text+0x2480): undefined reference to `machine_crash_shutdown' mipsel-linux-ld: kernel/kexec_core.c:(.text+0x2488): undefined reference to `machine_kexec' mipsel-linux-ld: kernel/kexec_core.o: in function `kernel_kexec': kernel/kexec_core.c:(.text+0x29b8): undefined reference to `machine_shutdown' mipsel-linux-ld: kernel/kexec_core.c:(.text+0x29c0): undefined reference to `machine_kexec' -------------------------------------------------------------------- Here, change the incorrect dependency of building kexec_core related object files, and the ifdeffery on architectures from CONFIG_KEXEC to CONFIG_KEXEC_CORE. Testing: ======== Passed on mips and loognarch with the LKP reproducer. This patch (of 5): Currently, in arch/loongarch/kernel/Makefile, building machine_kexec.o relocate_kernel.o depends on CONFIG_KEXEC. Whereas, since we will drop the select of KEXEC for CRASH_DUMP in kernel/Kconfig.kexec, compiling error will be triggered if below config items are set: === CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y === --------------------------------------------------------------- loongarch64-linux-ld: kernel/kexec_core.o: in function `.L209': >> kexec_core.c:(.text+0x1660): undefined reference to `machine_kexec_cleanup' loongarch64-linux-ld: kernel/kexec_core.o: in function `.L287': >> kexec_core.c:(.text+0x1c5c): undefined reference to `machine_crash_shutdown' >> loongarch64-linux-ld: kexec_core.c:(.text+0x1c64): undefined reference to `machine_kexec' loongarch64-linux-ld: kernel/kexec_core.o: in function `.L2^B5': >> kexec_core.c:(.text+0x2090): undefined reference to `machine_shutdown' loongarch64-linux-ld: kexec_core.c:(.text+0x20a0): undefined reference to `machine_kexec' --------------------------------------------------------------- Here, change the dependency of machine_kexec.o relocate_kernel.o to CONFIG_KEXEC_CORE can fix above building error. Link: https://lkml.kernel.org/r/20231208073036.7884-1-bhe@redhat.com Link: https://lkml.kernel.org/r/20231208073036.7884-2-bhe@redhat.com Signed-off-by: Baoquan He Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311300946.kHE9Iu71-lkp@intel.com/ Cc: Eric DeVolder Cc: Ignat Korchagin Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- arch/loongarch/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 4fcc168f0732..3c808c680370 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -57,7 +57,7 @@ obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o -- cgit From 9bad6b75fca1b38b08d94e93f49a90cda44702b9 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 8 Dec 2023 15:30:33 +0800 Subject: m68k, kexec: fix the incorrect ifdeffery and build dependency of CONFIG_KEXEC The select of KEXEC for CRASH_DUMP in kernel/Kconfig.kexec will be dropped, then compiling errors will be triggered if below config items are set: === CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y === Here, change the dependency of buinding machine_kexec.o relocate_kernel.o and the ifdeffery in asm/kexe.h to CONFIG_KEXEC_CORE. Link: https://lkml.kernel.org/r/20231208073036.7884-3-bhe@redhat.com Signed-off-by: Baoquan He Cc: Eric DeVolder Cc: Ignat Korchagin Cc: kernel test robot Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- arch/m68k/include/asm/kexec.h | 4 ++-- arch/m68k/kernel/Makefile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/m68k/include/asm/kexec.h b/arch/m68k/include/asm/kexec.h index f5a8b2defa4b..3b0b64f0a353 100644 --- a/arch/m68k/include/asm/kexec.h +++ b/arch/m68k/include/asm/kexec.h @@ -2,7 +2,7 @@ #ifndef _ASM_M68K_KEXEC_H #define _ASM_M68K_KEXEC_H -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* Maximum physical address we can use pages from */ #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) @@ -25,6 +25,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs, #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* _ASM_M68K_KEXEC_H */ diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile index 01fb69a5095f..f335bf3268a1 100644 --- a/arch/m68k/kernel/Makefile +++ b/arch/m68k/kernel/Makefile @@ -25,7 +25,7 @@ obj-$(CONFIG_PCI) += pcibios.o obj-$(CONFIG_M68K_NONCOHERENT_DMA) += dma.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_BOOTINFO_PROC) += bootinfo_proc.o obj-$(CONFIG_UBOOT) += uboot.o -- cgit From 8cd2accb71f5eb8e92d775fc1978d3779875c2e5 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 8 Dec 2023 15:30:34 +0800 Subject: mips, kexec: fix the incorrect ifdeffery and dependency of CONFIG_KEXEC The select of KEXEC for CRASH_DUMP in kernel/Kconfig.kexec will be dropped, then compiling errors will be triggered if below config items are set: === CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y === -------------------------------------------------------------------- mipsel-linux-ld: kernel/kexec_core.o: in function `kimage_free': kernel/kexec_core.c:(.text+0x2200): undefined reference to `machine_kexec_cleanup' mipsel-linux-ld: kernel/kexec_core.o: in function `__crash_kexec': kernel/kexec_core.c:(.text+0x2480): undefined reference to `machine_crash_shutdown' mipsel-linux-ld: kernel/kexec_core.c:(.text+0x2488): undefined reference to `machine_kexec' mipsel-linux-ld: kernel/kexec_core.o: in function `kernel_kexec': kernel/kexec_core.c:(.text+0x29b8): undefined reference to `machine_shutdown' mipsel-linux-ld: kernel/kexec_core.c:(.text+0x29c0): undefined reference to `machine_kexec' -------------------------------------------------------------------- Here, change the dependency of building kexec_core related object files, and the ifdeffery in mips from CONFIG_KEXEC to CONFIG_KEXEC_CORE. Link: https://lkml.kernel.org/r/20231208073036.7884-4-bhe@redhat.com Signed-off-by: Baoquan He Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311302042.sn8cDPIX-lkp@intel.com/ Cc: Eric DeVolder Cc: Ignat Korchagin Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- arch/mips/cavium-octeon/smp.c | 4 ++-- arch/mips/include/asm/kexec.h | 2 +- arch/mips/include/asm/smp-ops.h | 2 +- arch/mips/include/asm/smp.h | 2 +- arch/mips/kernel/Makefile | 2 +- arch/mips/kernel/smp-bmips.c | 4 ++-- arch/mips/kernel/smp-cps.c | 10 +++++----- arch/mips/loongson64/reset.c | 4 ++-- arch/mips/loongson64/smp.c | 2 +- 9 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 33c09688210f..08ea2cde1eb5 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -422,7 +422,7 @@ static const struct plat_smp_ops octeon_smp_ops = { .cpu_disable = octeon_cpu_disable, .cpu_die = octeon_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; @@ -502,7 +502,7 @@ static const struct plat_smp_ops octeon_78xx_smp_ops = { .cpu_disable = octeon_cpu_disable, .cpu_die = octeon_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; diff --git a/arch/mips/include/asm/kexec.h b/arch/mips/include/asm/kexec.h index d6d5fa5cc31d..69e579e41e66 100644 --- a/arch/mips/include/asm/kexec.h +++ b/arch/mips/include/asm/kexec.h @@ -31,7 +31,7 @@ static inline void crash_setup_regs(struct pt_regs *newregs, prepare_frametrace(newregs); } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE struct kimage; extern unsigned long kexec_args[4]; extern int (*_machine_kexec_prepare)(struct kimage *); diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index 5719ff49eff1..0c59e168f800 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h @@ -35,7 +35,7 @@ struct plat_smp_ops { void (*cpu_die)(unsigned int cpu); void (*cleanup_dead_cpu)(unsigned cpu); #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void (*kexec_nonboot_cpu)(void); #endif }; diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index a40d8c0e4b87..901bc61fa7ae 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -93,7 +93,7 @@ static inline void __cpu_die(unsigned int cpu) extern void __noreturn play_dead(void); #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static inline void kexec_nonboot_cpu(void) { extern const struct plat_smp_ops *mp_ops; /* private */ diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 853a43ee4b44..ecf3278a32f7 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -90,7 +90,7 @@ obj-$(CONFIG_GPIO_TXX9) += gpio_txx9.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_EARLY_PRINTK_8250) += early_printk_8250.o diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index c074ecce3fbf..b3dbf9ecb0d6 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -434,7 +434,7 @@ const struct plat_smp_ops bmips43xx_smp_ops = { .cpu_disable = bmips_cpu_disable, .cpu_die = bmips_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; @@ -451,7 +451,7 @@ const struct plat_smp_ops bmips5000_smp_ops = { .cpu_disable = bmips_cpu_disable, .cpu_die = bmips_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index dd55d59b88db..f6c37d407f36 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -392,7 +392,7 @@ static void cps_smp_finish(void) local_irq_enable(); } -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC) +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE) enum cpu_death { CPU_DEATH_HALT, @@ -429,7 +429,7 @@ static void cps_shutdown_this_cpu(enum cpu_death death) } } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static void cps_kexec_nonboot_cpu(void) { @@ -439,9 +439,9 @@ static void cps_kexec_nonboot_cpu(void) cps_shutdown_this_cpu(CPU_DEATH_POWER); } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ -#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC */ +#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC_CORE */ #ifdef CONFIG_HOTPLUG_CPU @@ -610,7 +610,7 @@ static const struct plat_smp_ops cps_smp_ops = { .cpu_die = cps_cpu_die, .cleanup_dead_cpu = cps_cleanup_dead_cpu, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = cps_kexec_nonboot_cpu, #endif }; diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c index e420800043b0..e01c8d4a805a 100644 --- a/arch/mips/loongson64/reset.c +++ b/arch/mips/loongson64/reset.c @@ -53,7 +53,7 @@ static void loongson_halt(void) } } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* 0X80000000~0X80200000 is safe */ #define MAX_ARGS 64 @@ -158,7 +158,7 @@ static int __init mips_reboot_setup(void) _machine_halt = loongson_halt; pm_power_off = loongson_poweroff; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE kexec_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); if (WARN_ON(!kexec_argv)) return -ENOMEM; diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c index e015a26a40f7..498bdc1bb0ed 100644 --- a/arch/mips/loongson64/smp.c +++ b/arch/mips/loongson64/smp.c @@ -864,7 +864,7 @@ const struct plat_smp_ops loongson3_smp_ops = { .cpu_disable = loongson3_cpu_disable, .cpu_die = loongson3_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; -- cgit From d70c27b728b8da1ab9c3b7ca117ee1c99dc86d29 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 8 Dec 2023 15:30:35 +0800 Subject: sh, kexec: fix the incorrect ifdeffery and dependency of CONFIG_KEXEC The select of KEXEC for CRASH_DUMP in kernel/Kconfig.kexec will be dropped, then compiling errors will be triggered if below config items are set: === CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y === Here, change the dependency of building kexec_core related object files, and the ifdeffery on SuperH from CONFIG_KEXEC to CONFIG_KEXEC_CORE. Link: https://lkml.kernel.org/r/20231208073036.7884-5-bhe@redhat.com Signed-off-by: Baoquan He Cc: Eric DeVolder Cc: Ignat Korchagin Cc: kernel test robot Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- arch/sh/include/asm/kexec.h | 4 ++-- arch/sh/kernel/Makefile | 2 +- arch/sh/kernel/reboot.c | 4 ++-- arch/sh/kernel/setup.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/sh/include/asm/kexec.h b/arch/sh/include/asm/kexec.h index 927d80ba2332..76631714673c 100644 --- a/arch/sh/include/asm/kexec.h +++ b/arch/sh/include/asm/kexec.h @@ -28,7 +28,7 @@ /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_SH -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* arch/sh/kernel/machine_kexec.c */ void reserve_crashkernel(void); @@ -67,6 +67,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs, } #else static inline void reserve_crashkernel(void) { } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* __ASM_SH_KEXEC_H */ diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index 69cd9ac4b2ab..2d7e70537de0 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -33,7 +33,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SH_STANDARD_BIOS) += sh_bios.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_MODULES) += sh_ksyms_32.o module.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_IO_TRAPPED) += io_trapped.o diff --git a/arch/sh/kernel/reboot.c b/arch/sh/kernel/reboot.c index e8eeedc9b182..1de006b1c339 100644 --- a/arch/sh/kernel/reboot.c +++ b/arch/sh/kernel/reboot.c @@ -63,7 +63,7 @@ struct machine_ops machine_ops = { .shutdown = native_machine_shutdown, .restart = native_machine_restart, .halt = native_machine_halt, -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .crash_shutdown = native_machine_crash_shutdown, #endif }; @@ -88,7 +88,7 @@ void machine_halt(void) machine_ops.halt(); } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void machine_crash_shutdown(struct pt_regs *regs) { machine_ops.crash_shutdown(regs); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 3d80515298d2..d3175f09b3aa 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -220,7 +220,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn, request_resource(res, &code_resource); request_resource(res, &data_resource); request_resource(res, &bss_resource); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE request_resource(res, &crashk_res); #endif -- cgit From 69f8ca8d36b5e52360f45c3b63bcb3d075da36df Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 8 Dec 2023 15:30:36 +0800 Subject: x86, kexec: fix the wrong ifdeffery CONFIG_KEXEC With the current ifdeffery CONFIG_KEXEC, get_cmdline_acpi_rsdp() is only available when kexec_load interface is taken, while kexec_file_load interface can't make use of it. Now change it to CONFIG_KEXEC_CORE. Link: https://lkml.kernel.org/r/20231208073036.7884-6-bhe@redhat.com Signed-off-by: Baoquan He Cc: Eric DeVolder Cc: Ignat Korchagin Cc: kernel test robot Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- arch/x86/boot/compressed/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 55c98fdd67d2..18d15d1ce87d 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -178,7 +178,7 @@ static unsigned long get_cmdline_acpi_rsdp(void) { unsigned long addr = 0; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE char val[MAX_ADDR_LEN] = { }; int ret; -- cgit From 1dd11e977360ad3493812da0b05ffd9adcdd15a1 Mon Sep 17 00:00:00 2001 From: Yuntao Wang Date: Sat, 9 Dec 2023 22:14:38 +0800 Subject: crash_core: fix the check for whether crashkernel is from high memory If crash_base is equal to CRASH_ADDR_LOW_MAX, it also indicates that the crashkernel memory is allocated from high memory. However, the current check only considers the case where crash_base is greater than CRASH_ADDR_LOW_MAX. Fix it. The runtime effects is that crashkernel high memory is successfully reserved, whereas the crashkernel low memory is bypassed in this case, then kdump kernel bootup will fail because of no low memory under 4G. This patch also includes some minor cleanups. Link: https://lkml.kernel.org/r/20231209141438.77233-1-ytcoode@gmail.com Fixes: 0ab97169aa05 ("crash_core: add generic function to do reservation") Signed-off-by: Yuntao Wang Cc: Baoquan He Cc: Dave Young Cc: Vivek Goyal Cc: Zhen Lei Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton --- kernel/crash_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index efe87d501c8c..d4313b53837e 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -199,7 +199,7 @@ static __initdata char *suffix_tbl[] = { * It returns 0 on success and -EINVAL on failure. */ static int __init parse_crashkernel_suffix(char *cmdline, - unsigned long long *crash_size, + unsigned long long *crash_size, const char *suffix) { char *cur = cmdline; @@ -268,9 +268,9 @@ static int __init __parse_crashkernel(char *cmdline, unsigned long long *crash_base, const char *suffix) { - char *first_colon, *first_space; - char *ck_cmdline; - char *name = "crashkernel="; + char *first_colon, *first_space; + char *ck_cmdline; + char *name = "crashkernel="; BUG_ON(!crash_size || !crash_base); *crash_size = 0; @@ -440,7 +440,7 @@ retry: return; } - if ((crash_base > CRASH_ADDR_LOW_MAX) && + if ((crash_base >= CRASH_ADDR_LOW_MAX) && crash_low_size && reserve_crashkernel_low(crash_low_size)) { memblock_phys_free(crash_base, crash_size); return; -- cgit From 43e8832fed08438e2a27afed9bac21acd0ceffe5 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 8 Dec 2023 18:01:44 -0800 Subject: Revert "selftests: error out if kernel header files are not yet built" This reverts commit 9fc96c7c19df ("selftests: error out if kernel header files are not yet built"). It turns out that requiring the kernel headers to be built as a prerequisite to building selftests, does not work in many cases. For example, Peter Zijlstra writes: "My biggest beef with the whole thing is that I simply do not want to use 'make headers', it doesn't work for me. I have a ton of output directories and I don't care to build tools into the output dirs, in fact some of them flat out refuse to work that way (bpf comes to mind)." [1] Therefore, stop erroring out on the selftests build. Additional patches will be required in order to change over to not requiring the kernel headers. [1] https://lore.kernel.org/20231208221007.GO28727@noisy.programming.kicks-ass.net Link: https://lkml.kernel.org/r/20231209020144.244759-1-jhubbard@nvidia.com Fixes: 9fc96c7c19df ("selftests: error out if kernel header files are not yet built") Signed-off-by: John Hubbard Cc: Anders Roxell Cc: Muhammad Usama Anjum Cc: David Hildenbrand Cc: Peter Xu Cc: Jonathan Corbet Cc: Nathan Chancellor Cc: Shuah Khan Cc: Peter Zijlstra Cc: Marcos Paulo de Souza Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/Makefile | 21 +-------------------- tools/testing/selftests/lib.mk | 40 +++------------------------------------- 2 files changed, 4 insertions(+), 57 deletions(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 3b2061d1c1a5..8247a7c69c36 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -155,12 +155,10 @@ ifneq ($(KBUILD_OUTPUT),) abs_objtree := $(realpath $(abs_objtree)) BUILD := $(abs_objtree)/kselftest KHDR_INCLUDES := -isystem ${abs_objtree}/usr/include - KHDR_DIR := ${abs_objtree}/usr/include else BUILD := $(CURDIR) abs_srctree := $(shell cd $(top_srcdir) && pwd) KHDR_INCLUDES := -isystem ${abs_srctree}/usr/include - KHDR_DIR := ${abs_srctree}/usr/include DEFAULT_INSTALL_HDR_PATH := 1 endif @@ -174,7 +172,7 @@ export KHDR_INCLUDES # all isn't the first target in the file. .DEFAULT_GOAL := all -all: kernel_header_files +all: @ret=1; \ for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ @@ -185,23 +183,6 @@ all: kernel_header_files ret=$$((ret * $$?)); \ done; exit $$ret; -kernel_header_files: - @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \ - if [ $$? -ne 0 ]; then \ - RED='\033[1;31m'; \ - NOCOLOR='\033[0m'; \ - echo; \ - echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \ - echo "Please run this and try again:"; \ - echo; \ - echo " cd $(top_srcdir)"; \ - echo " make headers"; \ - echo; \ - exit 1; \ - fi - -.PHONY: kernel_header_files - run_tests: all @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 118e0964bda9..aa646e0661f3 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -44,26 +44,10 @@ endif selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST)))) top_srcdir = $(selfdir)/../../.. -ifeq ("$(origin O)", "command line") - KBUILD_OUTPUT := $(O) +ifeq ($(KHDR_INCLUDES),) +KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include endif -ifneq ($(KBUILD_OUTPUT),) - # Make's built-in functions such as $(abspath ...), $(realpath ...) cannot - # expand a shell special character '~'. We use a somewhat tedious way here. - abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd) - $(if $(abs_objtree),, \ - $(error failed to create output directory "$(KBUILD_OUTPUT)")) - # $(realpath ...) resolves symlinks - abs_objtree := $(realpath $(abs_objtree)) - KHDR_DIR := ${abs_objtree}/usr/include -else - abs_srctree := $(shell cd $(top_srcdir) && pwd) - KHDR_DIR := ${abs_srctree}/usr/include -endif - -KHDR_INCLUDES := -isystem $(KHDR_DIR) - # The following are built by lib.mk common compile rules. # TEST_CUSTOM_PROGS should be used by tests that require # custom build rule and prevent common build rule use. @@ -74,25 +58,7 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED)) TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) -all: kernel_header_files $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) \ - $(TEST_GEN_FILES) - -kernel_header_files: - @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \ - if [ $$? -ne 0 ]; then \ - RED='\033[1;31m'; \ - NOCOLOR='\033[0m'; \ - echo; \ - echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \ - echo "Please run this and try again:"; \ - echo; \ - echo " cd $(top_srcdir)"; \ - echo " make headers"; \ - echo; \ - exit 1; \ - fi - -.PHONY: kernel_header_files +all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) define RUN_TESTS BASE_DIR="$(selfdir)"; \ -- cgit From 55ac8bbe358bdd2f3c044c12f249fd22d48fe015 Mon Sep 17 00:00:00 2001 From: David Stevens Date: Tue, 18 Apr 2023 17:40:31 +0900 Subject: mm/shmem: fix race in shmem_undo_range w/THP Split folios during the second loop of shmem_undo_range. It's not sufficient to only split folios when dealing with partial pages, since it's possible for a THP to be faulted in after that point. Calling truncate_inode_folio in that situation can result in throwing away data outside of the range being targeted. [akpm@linux-foundation.org: tidy up comment layout] Link: https://lkml.kernel.org/r/20230418084031.3439795-1-stevensd@google.com Fixes: b9a8a4195c7d ("truncate,shmem: Handle truncates that split large folios") Signed-off-by: David Stevens Cc: Matthew Wilcox (Oracle) Cc: Suleiman Souhlal Cc: Signed-off-by: Andrew Morton --- mm/shmem.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 91e2620148b2..0d1ce70bce38 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1080,7 +1080,24 @@ whole_folios: } VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio); - truncate_inode_folio(mapping, folio); + + if (!folio_test_large(folio)) { + truncate_inode_folio(mapping, folio); + } else if (truncate_inode_partial_folio(folio, lstart, lend)) { + /* + * If we split a page, reset the loop so + * that we pick up the new sub pages. + * Otherwise the THP was entirely + * dropped or the target range was + * zeroed, so just continue the loop as + * is. + */ + if (!folio_test_large(folio)) { + folio_unlock(folio); + index = start; + break; + } + } } folio_unlock(folio); } -- cgit From 081488051d28d32569ebb7c7a23572778b2e7d57 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 7 Dec 2023 23:14:04 -0700 Subject: mm/mglru: fix underprotected page cache Unmapped folios accessed through file descriptors can be underprotected. Those folios are added to the oldest generation based on: 1. The fact that they are less costly to reclaim (no need to walk the rmap and flush the TLB) and have less impact on performance (don't cause major PFs and can be non-blocking if needed again). 2. The observation that they are likely to be single-use. E.g., for client use cases like Android, its apps parse configuration files and store the data in heap (anon); for server use cases like MySQL, it reads from InnoDB files and holds the cached data for tables in buffer pools (anon). However, the oldest generation can be very short lived, and if so, it doesn't provide the PID controller with enough time to respond to a surge of refaults. (Note that the PID controller uses weighted refaults and those from evicted generations only take a half of the whole weight.) In other words, for a short lived generation, the moving average smooths out the spike quickly. To fix the problem: 1. For folios that are already on LRU, if they can be beyond the tracking range of tiers, i.e., five accesses through file descriptors, move them to the second oldest generation to give them more time to age. (Note that tiers are used by the PID controller to statistically determine whether folios accessed multiple times through file descriptors are worth protecting.) 2. When adding unmapped folios to LRU, adjust the placement of them so that they are not too close to the tail. The effect of this is similar to the above. On Android, launching 55 apps sequentially: Before After Change workingset_refault_anon 25641024 25598972 0% workingset_refault_file 115016834 106178438 -8% Link: https://lkml.kernel.org/r/20231208061407.2125867-1-yuzhao@google.com Fixes: ac35a4902374 ("mm: multi-gen LRU: minimal implementation") Signed-off-by: Yu Zhao Reported-by: Charan Teja Kalla Tested-by: Kalesh Singh Cc: T.J. Mercier Cc: Kairui Song Cc: Hillf Danton Cc: Jaroslav Pulchart Cc: Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 23 ++++++++++++++--------- mm/vmscan.c | 2 +- mm/workingset.c | 6 +++--- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 9ae7def16cb2..f4fe593c1400 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -232,22 +232,27 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, if (folio_test_unevictable(folio) || !lrugen->enabled) return false; /* - * There are three common cases for this page: - * 1. If it's hot, e.g., freshly faulted in or previously hot and - * migrated, add it to the youngest generation. - * 2. If it's cold but can't be evicted immediately, i.e., an anon page - * not in swapcache or a dirty page pending writeback, add it to the - * second oldest generation. - * 3. Everything else (clean, cold) is added to the oldest generation. + * There are four common cases for this page: + * 1. If it's hot, i.e., freshly faulted in, add it to the youngest + * generation, and it's protected over the rest below. + * 2. If it can't be evicted immediately, i.e., a dirty page pending + * writeback, add it to the second youngest generation. + * 3. If it should be evicted first, e.g., cold and clean from + * folio_rotate_reclaimable(), add it to the oldest generation. + * 4. Everything else falls between 2 & 3 above and is added to the + * second oldest generation if it's considered inactive, or the + * oldest generation otherwise. See lru_gen_is_active(). */ if (folio_test_active(folio)) seq = lrugen->max_seq; else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) || (folio_test_reclaim(folio) && (folio_test_dirty(folio) || folio_test_writeback(folio)))) - seq = lrugen->min_seq[type] + 1; - else + seq = lrugen->max_seq - 1; + else if (reclaiming || lrugen->min_seq[type] + MIN_NR_GENS >= lrugen->max_seq) seq = lrugen->min_seq[type]; + else + seq = lrugen->min_seq[type] + 1; gen = lru_gen_from_seq(seq); flags = (gen + 1UL) << LRU_GEN_PGOFF; diff --git a/mm/vmscan.c b/mm/vmscan.c index 506f8220c5fe..540333f5415c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4232,7 +4232,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c } /* protected */ - if (tier > tier_idx) { + if (tier > tier_idx || refs == BIT(LRU_REFS_WIDTH)) { int hist = lru_hist_from_seq(lrugen->min_seq[type]); gen = folio_inc_gen(lruvec, folio, false); diff --git a/mm/workingset.c b/mm/workingset.c index b192e44a0e7c..33baad203277 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -313,10 +313,10 @@ static void lru_gen_refault(struct folio *folio, void *shadow) * 1. For pages accessed through page tables, hotter pages pushed out * hot pages which refaulted immediately. * 2. For pages accessed multiple times through file descriptors, - * numbers of accesses might have been out of the range. + * they would have been protected by sort_folio(). */ - if (lru_gen_in_fault() || refs == BIT(LRU_REFS_WIDTH)) { - folio_set_workingset(folio); + if (lru_gen_in_fault() || refs >= BIT(LRU_REFS_WIDTH) - 1) { + set_mask_bits(&folio->flags, 0, LRU_REFS_MASK | BIT(PG_workingset)); mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta); } unlock: -- cgit From 5095a2b23987d3c3c47dd16b3d4080e2733b8bb9 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 7 Dec 2023 23:14:05 -0700 Subject: mm/mglru: try to stop at high watermarks The initial MGLRU patchset didn't include the memcg LRU support, and it relied on should_abort_scan(), added by commit f76c83378851 ("mm: multi-gen LRU: optimize multiple memcgs"), to "backoff to avoid overshooting their aggregate reclaim target by too much". Later on when the memcg LRU was added, should_abort_scan() was deemed unnecessary, and the test results [1] showed no side effects after it was removed by commit a579086c99ed ("mm: multi-gen LRU: remove eviction fairness safeguard"). However, that test used memory.reclaim, which sets nr_to_reclaim to SWAP_CLUSTER_MAX. So it can overshoot only by SWAP_CLUSTER_MAX-1 pages, i.e., from nr_reclaimed=nr_to_reclaim-1 to nr_reclaimed=nr_to_reclaim+SWAP_CLUSTER_MAX-1. Compared with the batch size kswapd sets to nr_to_reclaim, SWAP_CLUSTER_MAX is tiny. Therefore that test isn't able to reproduce the worst case scenario, i.e., kswapd overshooting GBs on large systems and "consuming 100% CPU" (see the Closes tag). Bring back a simplified version of should_abort_scan() on top of the memcg LRU, so that kswapd stops when all eligible zones are above their respective high watermarks plus a small delta to lower the chance of KSWAPD_HIGH_WMARK_HIT_QUICKLY. Note that this only applies to order-0 reclaim, meaning compaction-induced reclaim can still run wild (which is a different problem). On Android, launching 55 apps sequentially: Before After Change pgpgin 838377172 802955040 -4% pgpgout 38037080 34336300 -10% [1] https://lore.kernel.org/20221222041905.2431096-1-yuzhao@google.com/ Link: https://lkml.kernel.org/r/20231208061407.2125867-2-yuzhao@google.com Fixes: a579086c99ed ("mm: multi-gen LRU: remove eviction fairness safeguard") Signed-off-by: Yu Zhao Reported-by: Charan Teja Kalla Reported-by: Jaroslav Pulchart Closes: https://lore.kernel.org/CAK8fFZ4DY+GtBA40Pm7Nn5xCHy+51w3sfxPqkqpqakSXYyX+Wg@mail.gmail.com/ Tested-by: Jaroslav Pulchart Tested-by: Kalesh Singh Cc: Hillf Danton Cc: Kairui Song Cc: T.J. Mercier Cc: Signed-off-by: Andrew Morton --- mm/vmscan.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 540333f5415c..bb7dacd8f753 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4648,20 +4648,41 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0; } -static unsigned long get_nr_to_reclaim(struct scan_control *sc) +static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc) { + int i; + enum zone_watermarks mark; + /* don't abort memcg reclaim to ensure fairness */ if (!root_reclaim(sc)) - return -1; + return false; + + if (sc->nr_reclaimed >= max(sc->nr_to_reclaim, compact_gap(sc->order))) + return true; + + /* check the order to exclude compaction-induced reclaim */ + if (!current_is_kswapd() || sc->order) + return false; - return max(sc->nr_to_reclaim, compact_gap(sc->order)); + mark = sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING ? + WMARK_PROMO : WMARK_HIGH; + + for (i = 0; i <= sc->reclaim_idx; i++) { + struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i; + unsigned long size = wmark_pages(zone, mark) + MIN_LRU_BATCH; + + if (managed_zone(zone) && !zone_watermark_ok(zone, 0, size, sc->reclaim_idx, 0)) + return false; + } + + /* kswapd should abort if all eligible zones are safe */ + return true; } static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { long nr_to_scan; unsigned long scanned = 0; - unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); int swappiness = get_swappiness(lruvec, sc); /* clean file folios are more likely to exist */ @@ -4683,7 +4704,7 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) if (scanned >= nr_to_scan) break; - if (sc->nr_reclaimed >= nr_to_reclaim) + if (should_abort_scan(lruvec, sc)) break; cond_resched(); @@ -4744,7 +4765,6 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) struct lru_gen_folio *lrugen; struct mem_cgroup *memcg; const struct hlist_nulls_node *pos; - unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); bin = first_bin = get_random_u32_below(MEMCG_NR_BINS); restart: @@ -4777,7 +4797,7 @@ restart: rcu_read_lock(); - if (sc->nr_reclaimed >= nr_to_reclaim) + if (should_abort_scan(lruvec, sc)) break; } @@ -4788,7 +4808,7 @@ restart: mem_cgroup_put(memcg); - if (sc->nr_reclaimed >= nr_to_reclaim) + if (!is_a_nulls(pos)) return; /* restart if raced with lru_gen_rotate_memcg() */ -- cgit From 8aa420617918d12d1f5d55030a503c9418e73c2c Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 7 Dec 2023 23:14:06 -0700 Subject: mm/mglru: respect min_ttl_ms with memcgs While investigating kswapd "consuming 100% CPU" [1] (also see "mm/mglru: try to stop at high watermarks"), it was discovered that the memcg LRU can breach the thrashing protection imposed by min_ttl_ms. Before the memcg LRU: kswapd() shrink_node_memcgs() mem_cgroup_iter() inc_max_seq() // always hit a different memcg lru_gen_age_node() mem_cgroup_iter() check the timestamp of the oldest generation After the memcg LRU: kswapd() shrink_many() restart: iterate the memcg LRU: inc_max_seq() // occasionally hit the same memcg if raced with lru_gen_rotate_memcg(): goto restart lru_gen_age_node() mem_cgroup_iter() check the timestamp of the oldest generation Specifically, when the restart happens in shrink_many(), it needs to stick with the (memcg LRU) generation it began with. In other words, it should neither re-read memcg_lru->seq nor age an lruvec of a different generation. Otherwise it can hit the same memcg multiple times without giving lru_gen_age_node() a chance to check the timestamp of that memcg's oldest generation (against min_ttl_ms). [1] https://lore.kernel.org/CAK8fFZ4DY+GtBA40Pm7Nn5xCHy+51w3sfxPqkqpqakSXYyX+Wg@mail.gmail.com/ Link: https://lkml.kernel.org/r/20231208061407.2125867-3-yuzhao@google.com Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists") Signed-off-by: Yu Zhao Tested-by: T.J. Mercier Cc: Charan Teja Kalla Cc: Hillf Danton Cc: Jaroslav Pulchart Cc: Kairui Song Cc: Kalesh Singh Cc: Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 30 +++++++++++++++++------------- mm/vmscan.c | 30 ++++++++++++++++-------------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3c25226beeed..23533b12bee2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -505,33 +505,37 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); * the old generation, is incremented when all its bins become empty. * * There are four operations: - * 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its + * 1. MEMCG_LRU_HEAD, which moves a memcg to the head of a random bin in its * current generation (old or young) and updates its "seg" to "head"; - * 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its + * 2. MEMCG_LRU_TAIL, which moves a memcg to the tail of a random bin in its * current generation (old or young) and updates its "seg" to "tail"; - * 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old + * 3. MEMCG_LRU_OLD, which moves a memcg to the head of a random bin in the old * generation, updates its "gen" to "old" and resets its "seg" to "default"; - * 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the + * 4. MEMCG_LRU_YOUNG, which moves a memcg to the tail of a random bin in the * young generation, updates its "gen" to "young" and resets its "seg" to * "default". * * The events that trigger the above operations are: * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; - * 2. The first attempt to reclaim an memcg below low, which triggers + * 2. The first attempt to reclaim a memcg below low, which triggers * MEMCG_LRU_TAIL; - * 3. The first attempt to reclaim an memcg below reclaimable size threshold, + * 3. The first attempt to reclaim a memcg below reclaimable size threshold, * which triggers MEMCG_LRU_TAIL; - * 4. The second attempt to reclaim an memcg below reclaimable size threshold, + * 4. The second attempt to reclaim a memcg below reclaimable size threshold, * which triggers MEMCG_LRU_YOUNG; - * 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG; + * 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG; * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; - * 7. Offlining an memcg, which triggers MEMCG_LRU_OLD. + * 7. Offlining a memcg, which triggers MEMCG_LRU_OLD. * - * Note that memcg LRU only applies to global reclaim, and the round-robin - * incrementing of their max_seq counters ensures the eventual fairness to all - * eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). + * Notes: + * 1. Memcg LRU only applies to global reclaim, and the round-robin incrementing + * of their max_seq counters ensures the eventual fairness to all eligible + * memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). + * 2. There are only two valid generations: old (seq) and young (seq+1). + * MEMCG_NR_GENS is set to three so that when reading the generation counter + * locklessly, a stale value (seq-1) does not wraparound to young. */ -#define MEMCG_NR_GENS 2 +#define MEMCG_NR_GENS 3 #define MEMCG_NR_BINS 8 struct lru_gen_memcg { diff --git a/mm/vmscan.c b/mm/vmscan.c index bb7dacd8f753..e5fe4a94345f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4089,6 +4089,9 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) else VM_WARN_ON_ONCE(true); + WRITE_ONCE(lruvec->lrugen.seg, seg); + WRITE_ONCE(lruvec->lrugen.gen, new); + hlist_nulls_del_rcu(&lruvec->lrugen.list); if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD) @@ -4099,9 +4102,6 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) pgdat->memcg_lru.nr_memcgs[old]--; pgdat->memcg_lru.nr_memcgs[new]++; - lruvec->lrugen.gen = new; - WRITE_ONCE(lruvec->lrugen.seg, seg); - if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq)) WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); @@ -4124,11 +4124,11 @@ void lru_gen_online_memcg(struct mem_cgroup *memcg) gen = get_memcg_gen(pgdat->memcg_lru.seq); + lruvec->lrugen.gen = gen; + hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]); pgdat->memcg_lru.nr_memcgs[gen]++; - lruvec->lrugen.gen = gen; - spin_unlock_irq(&pgdat->memcg_lru.lock); } } @@ -4635,7 +4635,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool DEFINE_MAX_SEQ(lruvec); if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg)) - return 0; + return -1; if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan)) return nr_to_scan; @@ -4710,7 +4710,7 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) cond_resched(); } - /* whether try_to_inc_max_seq() was successful */ + /* whether this lruvec should be rotated */ return nr_to_scan < 0; } @@ -4764,13 +4764,13 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) struct lruvec *lruvec; struct lru_gen_folio *lrugen; struct mem_cgroup *memcg; - const struct hlist_nulls_node *pos; + struct hlist_nulls_node *pos; + gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); bin = first_bin = get_random_u32_below(MEMCG_NR_BINS); restart: op = 0; memcg = NULL; - gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); rcu_read_lock(); @@ -4781,6 +4781,10 @@ restart: } mem_cgroup_put(memcg); + memcg = NULL; + + if (gen != READ_ONCE(lrugen->gen)) + continue; lruvec = container_of(lrugen, struct lruvec, lrugen); memcg = lruvec_memcg(lruvec); @@ -4865,16 +4869,14 @@ static void set_initial_priority(struct pglist_data *pgdat, struct scan_control if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH) return; /* - * Determine the initial priority based on ((total / MEMCG_NR_GENS) >> - * priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, where the - * estimated reclaimed_to_scanned_ratio = inactive / total. + * Determine the initial priority based on + * (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, + * where reclaimed_to_scanned_ratio = inactive / total. */ reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE); if (get_swappiness(lruvec, sc)) reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON); - reclaimable /= MEMCG_NR_GENS; - /* round down reclaimable and round up sc->nr_to_reclaim */ priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1); -- cgit From 4376807bf2d5371c3e00080c972be568c3f8a7d1 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 7 Dec 2023 23:14:07 -0700 Subject: mm/mglru: reclaim offlined memcgs harder In the effort to reduce zombie memcgs [1], it was discovered that the memcg LRU doesn't apply enough pressure on offlined memcgs. Specifically, instead of rotating them to the tail of the current generation (MEMCG_LRU_TAIL) for a second attempt, it moves them to the next generation (MEMCG_LRU_YOUNG) after the first attempt. Not applying enough pressure on offlined memcgs can cause them to build up, and this can be particularly harmful to memory-constrained systems. On Pixel 8 Pro, launching apps for 50 cycles: Before After Change Zombie memcgs 45 35 -22% [1] https://lore.kernel.org/CABdmKX2M6koq4Q0Cmp_-=wbP0Qa190HdEGGaHfxNS05gAkUtPA@mail.gmail.com/ Link: https://lkml.kernel.org/r/20231208061407.2125867-4-yuzhao@google.com Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists") Signed-off-by: Yu Zhao Reported-by: T.J. Mercier Tested-by: T.J. Mercier Cc: Charan Teja Kalla Cc: Hillf Danton Cc: Jaroslav Pulchart Cc: Kairui Song Cc: Kalesh Singh Cc: Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 8 ++++---- mm/vmscan.c | 24 ++++++++++++++++-------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 23533b12bee2..9db36e197712 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -519,10 +519,10 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; * 2. The first attempt to reclaim a memcg below low, which triggers * MEMCG_LRU_TAIL; - * 3. The first attempt to reclaim a memcg below reclaimable size threshold, - * which triggers MEMCG_LRU_TAIL; - * 4. The second attempt to reclaim a memcg below reclaimable size threshold, - * which triggers MEMCG_LRU_YOUNG; + * 3. The first attempt to reclaim a memcg offlined or below reclaimable size + * threshold, which triggers MEMCG_LRU_TAIL; + * 4. The second attempt to reclaim a memcg offlined or below reclaimable size + * threshold, which triggers MEMCG_LRU_YOUNG; * 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG; * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; * 7. Offlining a memcg, which triggers MEMCG_LRU_OLD. diff --git a/mm/vmscan.c b/mm/vmscan.c index e5fe4a94345f..9dd8977de5a2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4598,7 +4598,12 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, } /* try to scrape all its memory if this memcg was deleted */ - *nr_to_scan = mem_cgroup_online(memcg) ? (total >> sc->priority) : total; + if (!mem_cgroup_online(memcg)) { + *nr_to_scan = total; + return false; + } + + *nr_to_scan = total >> sc->priority; /* * The aging tries to be lazy to reduce the overhead, while the eviction @@ -4719,14 +4724,9 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) bool success; unsigned long scanned = sc->nr_scanned; unsigned long reclaimed = sc->nr_reclaimed; - int seg = lru_gen_memcg_seg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); - /* see the comment on MEMCG_NR_GENS */ - if (!lruvec_is_sizable(lruvec, sc)) - return seg != MEMCG_LRU_TAIL ? MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG; - mem_cgroup_calculate_protection(NULL, memcg); if (mem_cgroup_below_min(NULL, memcg)) @@ -4734,7 +4734,7 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) if (mem_cgroup_below_low(NULL, memcg)) { /* see the comment on MEMCG_NR_GENS */ - if (seg != MEMCG_LRU_TAIL) + if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL) return MEMCG_LRU_TAIL; memcg_memory_event(memcg, MEMCG_LOW); @@ -4750,7 +4750,15 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) flush_reclaim_state(sc); - return success ? MEMCG_LRU_YOUNG : 0; + if (success && mem_cgroup_online(memcg)) + return MEMCG_LRU_YOUNG; + + if (!success && lruvec_is_sizable(lruvec, sc)) + return 0; + + /* one retry if offlined or too small */ + return lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL ? + MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG; } #ifdef CONFIG_MEMCG -- cgit From 9e45e39dc249c970d99d2681f6bcb55736fd725c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 11 Dec 2023 11:44:20 -0500 Subject: ring-buffer: Do not update before stamp when switching sub-buffers The ring buffer timestamps are synchronized by two timestamp placeholders. One is the "before_stamp" and the other is the "write_stamp" (sometimes referred to as the "after stamp" but only in the comments. These two stamps are key to knowing how to handle nested events coming in with a lockless system. When moving across sub-buffers, the before stamp is updated but the write stamp is not. There's an effort to put back the before stamp to something that seems logical in case there's nested events. But as the current event is about to cross sub-buffers, and so will any new nested event that happens, updating the before stamp is useless, and could even introduce new race conditions. The first event on a sub-buffer simply uses the sub-buffer's timestamp and keeps a "delta" of zero. The "before_stamp" and "write_stamp" are not used in the algorithm in this case. There's no reason to try to fix the before_stamp when this happens. As a bonus, it removes a cmpxchg() when crossing sub-buffers! Link: https://lore.kernel.org/linux-trace-kernel/20231211114420.36dde01b@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: a389d86f7fd09 ("ring-buffer: Have nested events still record running time stamp") Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index dcd47895b424..c7abcc215fe2 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3607,14 +3607,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* See if we shot pass the end of this buffer page */ if (unlikely(write > BUF_PAGE_SIZE)) { - /* before and after may now different, fix it up*/ - b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); - a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); - if (a_ok && b_ok && info->before != info->after) - (void)rb_time_cmpxchg(&cpu_buffer->before_stamp, - info->before, info->after); - if (a_ok && b_ok) - check_buffer(cpu_buffer, info, CHECK_FULL_PAGE); + check_buffer(cpu_buffer, info, CHECK_FULL_PAGE); return rb_move_tail(cpu_buffer, tail, info); } -- cgit From b049525855fdd0024881c9b14b8fbec61c3f53d3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Dec 2023 07:25:58 -0500 Subject: ring-buffer: Have saved event hold the entire event For the ring buffer iterator (non-consuming read), the event needs to be copied into the iterator buffer to make sure that a writer does not overwrite it while the user is reading it. If a write happens during the copy, the buffer is simply discarded. But the temp buffer itself was not big enough. The allocation of the buffer was only BUF_MAX_DATA_SIZE, which is the maximum data size that can be passed into the ring buffer and saved. But the temp buffer needs to hold the meta data as well. That would be BUF_PAGE_SIZE and not BUF_MAX_DATA_SIZE. Link: https://lore.kernel.org/linux-trace-kernel/20231212072558.61f76493@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: 785888c544e04 ("ring-buffer: Have rb_iter_head_event() handle concurrent writer") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c7abcc215fe2..1d9caee7f542 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2409,7 +2409,7 @@ rb_iter_head_event(struct ring_buffer_iter *iter) */ barrier(); - if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE) + if ((iter->head + length) > commit || length > BUF_PAGE_SIZE) /* Writer corrupted the read? */ goto reset; @@ -5118,7 +5118,8 @@ ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags) if (!iter) return NULL; - iter->event = kmalloc(BUF_MAX_DATA_SIZE, flags); + /* Holds the entire event: data and meta data */ + iter->event = kmalloc(BUF_PAGE_SIZE, flags); if (!iter->event) { kfree(iter); return NULL; -- cgit From 60be76eeabb3d83858cc6577fc65c7d0f36ffd42 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Dec 2023 08:44:44 -0500 Subject: tracing: Add size check when printing trace_marker output If for some reason the trace_marker write does not have a nul byte for the string, it will overflow the print: trace_seq_printf(s, ": %s", field->buf); The field->buf could be missing the nul byte. To prevent overflow, add the max size that the buf can be by using the event size and the field location. int max = iter->ent_size - offsetof(struct print_entry, buf); trace_seq_printf(s, ": %*.s", max, field->buf); Link: https://lore.kernel.org/linux-trace-kernel/20231212084444.4619b8ce@gandalf.local.home Cc: Mark Rutland Cc: Mathieu Desnoyers Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_output.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index d8b302d01083..3e7fa44dc2b2 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1587,11 +1587,12 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter, { struct print_entry *field; struct trace_seq *s = &iter->seq; + int max = iter->ent_size - offsetof(struct print_entry, buf); trace_assign_type(field, iter->ent); seq_print_ip_sym(s, field->ip, flags); - trace_seq_printf(s, ": %s", field->buf); + trace_seq_printf(s, ": %.*s", max, field->buf); return trace_handle_return(s); } @@ -1600,10 +1601,11 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags, struct trace_event *event) { struct print_entry *field; + int max = iter->ent_size - offsetof(struct print_entry, buf); trace_assign_type(field, iter->ent); - trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf); + trace_seq_printf(&iter->seq, "# %lx %.*s", field->ip, max, field->buf); return trace_handle_return(&iter->seq); } -- cgit From f3f32a356c0d2379d4431364e74f101f8f075ce3 Mon Sep 17 00:00:00 2001 From: Salvatore Dipietro Date: Fri, 8 Dec 2023 10:20:49 -0800 Subject: tcp: disable tcp_autocorking for socket when TCP_NODELAY flag is set Based on the tcp man page, if TCP_NODELAY is set, it disables Nagle's algorithm and packets are sent as soon as possible. However in the `tcp_push` function where autocorking is evaluated the `nonagle` value set by TCP_NODELAY is not considered which can trigger unexpected corking of packets and induce delays. For example, if two packets are generated as part of a server's reply, if the first one is not transmitted on the wire quickly enough, the second packet can trigger the autocorking in `tcp_push` and be delayed instead of sent as soon as possible. It will either wait for additional packets to be coalesced or an ACK from the client before transmitting the corked packet. This can interact badly if the receiver has tcp delayed acks enabled, introducing 40ms extra delay in completion times. It is not always possible to control who has delayed acks set, but it is possible to adjust when and how autocorking is triggered. Patch prevents autocorking if the TCP_NODELAY flag is set on the socket. Patch has been tested using an AWS c7g.2xlarge instance with Ubuntu 22.04 and Apache Tomcat 9.0.83 running the basic servlet below: import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; public class HelloWorldServlet extends HttpServlet { @Override protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=utf-8"); OutputStreamWriter osw = new OutputStreamWriter(response.getOutputStream(),"UTF-8"); String s = "a".repeat(3096); osw.write(s,0,s.length()); osw.flush(); } } Load was applied using wrk2 (https://github.com/kinvolk/wrk2) from an AWS c6i.8xlarge instance. With the current auto-corking behavior and TCP_NODELAY set an additional 40ms latency from P99.99+ values are observed. With the patch applied we see no occurrences of 40ms latencies. The patch has also been tested with iperf and uperf benchmarks and no regression was observed. # No patch with tcp_autocorking=1 and TCP_NODELAY set on all sockets ./wrk -t32 -c128 -d40s --latency -R10000 http://172.31.49.177:8080/hello/hello' ... 50.000% 0.91ms 75.000% 1.12ms 90.000% 1.46ms 99.000% 1.73ms 99.900% 1.96ms 99.990% 43.62ms <<< 40+ ms extra latency 99.999% 48.32ms 100.000% 49.34ms # With patch ./wrk -t32 -c128 -d40s --latency -R10000 http://172.31.49.177:8080/hello/hello' ... 50.000% 0.89ms 75.000% 1.13ms 90.000% 1.44ms 99.000% 1.67ms 99.900% 1.78ms 99.990% 2.27ms <<< no 40+ ms extra latency 99.999% 3.71ms 100.000% 4.57ms Fixes: f54b311142a9 ("tcp: auto corking") Signed-off-by: Salvatore Dipietro Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ff6838ca2e58..96fc89ccec33 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -716,7 +716,7 @@ void tcp_push(struct sock *sk, int flags, int mss_now, tcp_mark_urg(tp, flags); - if (tcp_should_autocork(sk, skb, size_goal)) { + if (!nonagle && tcp_should_autocork(sk, skb, size_goal)) { /* avoid atomic op if TSQ_THROTTLED bit is already set */ if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) { -- cgit From fa97e21e74df5ef63a442e4cfd13fd113fc8196e Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Tue, 12 Dec 2023 21:09:17 +0100 Subject: dt-bindings: panel-simple-dsi: move LG 5" HD TFT LCD panel into DSI yaml Originally was in the panel-simple, but belongs to panel-simple-dsi. See arch/arm/boot/dts/nvidia/tegra114-roth.dts for more details. Resolves the following warning: ``` arch/arm/boot/dts/tegra114-roth.dt.yaml: panel@0: 'reg' does not match any of the regexes: 'pinctrl-[0-9]+' From schema: Documentation/devicetree/bindings/display/panel/panel-simple.yaml ``` Fixes: 310abcea76e9 ("dt-bindings: display: convert simple lg panels to DT Schema") Signed-off-by: David Heidelberg Acked-by: Krzysztof Kozlowski Acked-by: Jessica Zhang Link: https://lore.kernel.org/r/20231212200934.99262-1-david@ixit.cz Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231212200934.99262-1-david@ixit.cz --- Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml | 2 ++ Documentation/devicetree/bindings/display/panel/panel-simple.yaml | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml index 73674baea75d..f9160d7bac3c 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml @@ -42,6 +42,8 @@ properties: - lg,acx467akm-7 # LG Corporation 7" WXGA TFT LCD panel - lg,ld070wx3-sl01 + # LG Corporation 5" HD TFT LCD panel + - lg,lh500wx1-sd03 # One Stop Displays OSD101T2587-53TS 10.1" 1920x1200 panel - osddisplays,osd101t2587-53ts # Panasonic 10" WUXGA TFT LCD panel diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml index 3ec9ee95045f..11422af3477e 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml @@ -208,8 +208,6 @@ properties: - lemaker,bl035-rgb-002 # LG 7" (800x480 pixels) TFT LCD panel - lg,lb070wv8 - # LG Corporation 5" HD TFT LCD panel - - lg,lh500wx1-sd03 # LG LP079QX1-SP0V 7.9" (1536x2048 pixels) TFT LCD panel - lg,lp079qx1-sp0v # LG 9.7" (2048x1536 pixels) TFT LCD panel -- cgit From e87d3a1370ce9f04770d789bcf7cce44865d2e8d Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Mon, 11 Dec 2023 18:33:11 +0800 Subject: stmmac: dwmac-loongson: Make sure MDIO is initialized before use Generic code will use mdio. If it is not initialized before use, the kernel will Oops. Fixes: 30bba69d7db4 ("stmmac: pci: Add dwmac support for Loongson") Signed-off-by: Yanteng Si Signed-off-by: Feiyang Chen Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 2cd6fce5c993..e7701326adc6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -68,17 +68,15 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id if (!plat) return -ENOMEM; + plat->mdio_bus_data = devm_kzalloc(&pdev->dev, + sizeof(*plat->mdio_bus_data), + GFP_KERNEL); + if (!plat->mdio_bus_data) + return -ENOMEM; + plat->mdio_node = of_get_child_by_name(np, "mdio"); if (plat->mdio_node) { dev_info(&pdev->dev, "Found MDIO subnode\n"); - - plat->mdio_bus_data = devm_kzalloc(&pdev->dev, - sizeof(*plat->mdio_bus_data), - GFP_KERNEL); - if (!plat->mdio_bus_data) { - ret = -ENOMEM; - goto err_put_node; - } plat->mdio_bus_data->needs_reset = true; } -- cgit From 31fea092c6f9f8fb2c40a08137907f5fbeae55dd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 11 Dec 2023 18:33:53 +0800 Subject: stmmac: dwmac-loongson: drop useless check for compatible fallback Device binds to proper PCI ID (LOONGSON, 0x7a03), already listed in DTS, so checking for some other compatible does not make sense. It cannot be bound to unsupported platform. Drop useless, incorrect (space in between) and undocumented compatible. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Yanteng Si Reviewed-by: Conor Dooley Acked-by: Jiaxun Yang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index e7701326adc6..9e40c28d453a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -59,11 +59,6 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id return -ENODEV; } - if (!of_device_is_compatible(np, "loongson, pci-gmac")) { - pr_info("dwmac_loongson_pci: Incompatible OF node\n"); - return -ENODEV; - } - plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL); if (!plat) return -ENOMEM; -- cgit From 4907a3f54b12b8209864572a312cf967befcae80 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 11 Dec 2023 18:33:54 +0800 Subject: MIPS: dts: loongson: drop incorrect dwmac fallback compatible Device binds to proper PCI ID (LOONGSON, 0x7a03), already listed in DTS, so checking for some other compatible does not make sense. It cannot be bound to unsupported platform. Drop useless, incorrect (space in between) and undocumented compatible. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Yanteng Si Reviewed-by: Conor Dooley Acked-by: Jiaxun Yang Signed-off-by: David S. Miller --- arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi | 3 +-- arch/mips/boot/dts/loongson/ls7a-pch.dtsi | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi index f878f47e4501..ee3e2153dd13 100644 --- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi @@ -130,8 +130,7 @@ compatible = "pci0014,7a03.0", "pci0014,7a03", "pciclass0c0320", - "pciclass0c03", - "loongson, pci-gmac"; + "pciclass0c03"; reg = <0x1800 0x0 0x0 0x0 0x0>; interrupts = <12 IRQ_TYPE_LEVEL_LOW>, diff --git a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi index 7c69e8245c2f..cce9428afc41 100644 --- a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi +++ b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi @@ -193,8 +193,7 @@ compatible = "pci0014,7a03.0", "pci0014,7a03", "pciclass020000", - "pciclass0200", - "loongson, pci-gmac"; + "pciclass0200"; reg = <0x1800 0x0 0x0 0x0 0x0>; interrupts = <12 IRQ_TYPE_LEVEL_HIGH>, -- cgit From 0cf72f7f14d12cb065c3d01954cf42fc5638aa69 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Sat, 25 Nov 2023 15:51:04 -0800 Subject: powerpc/pseries/vas: Migration suspend waits for no in-progress open windows The hypervisor returns migration failure if all VAS windows are not closed. During pre-migration stage, vas_migration_handler() sets migration_in_progress flag and closes all windows from the list. The allocate VAS window routine checks the migration flag, setup the window and then add it to the list. So there is possibility of the migration handler missing the window that is still in the process of setup. t1: Allocate and open VAS t2: Migration event window lock vas_pseries_mutex If migration_in_progress set unlock vas_pseries_mutex return open window HCALL unlock vas_pseries_mutex Modify window HCALL lock vas_pseries_mutex setup window migration_in_progress=true Closes all windows from the list // May miss windows that are // not in the list unlock vas_pseries_mutex lock vas_pseries_mutex return if nr_closed_windows == 0 // No DLPAR CPU or migration add window to the list // Window will be added to the // list after the setup is completed unlock vas_pseries_mutex return unlock vas_pseries_mutex Close VAS window // due to DLPAR CPU or migration return -EBUSY This patch resolves the issue with the following steps: - Set the migration_in_progress flag without holding mutex. - Introduce nr_open_wins_progress counter in VAS capabilities struct - This counter tracks the number of open windows are still in progress - The allocate setup window thread closes windows if the migration is set and decrements nr_open_window_progress counter - The migration handler waits for no in-progress open windows. The code flow with the fix is as follows: t1: Allocate and open VAS t2: Migration event window lock vas_pseries_mutex If migration_in_progress set unlock vas_pseries_mutex return open window HCALL nr_open_wins_progress++ // Window opened, but not // added to the list yet unlock vas_pseries_mutex Modify window HCALL migration_in_progress=true setup window lock vas_pseries_mutex Closes all windows from the list While nr_open_wins_progress { unlock vas_pseries_mutex lock vas_pseries_mutex sleep if nr_closed_windows == 0 // Wait if any open window in or migration is not started // progress. The open window // No DLPAR CPU or migration // thread closes the window without add window to the list // adding to the list and return if nr_open_wins_progress-- // the migration is in progress. unlock vas_pseries_mutex return Close VAS window nr_open_wins_progress-- unlock vas_pseries_mutex return -EBUSY lock vas_pseries_mutex } unlock vas_pseries_mutex return Fixes: 37e6764895ef ("powerpc/pseries/vas: Add VAS migration handler") Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://msgid.link/20231125235104.3405008-1-haren@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 51 +++++++++++++++++++++++++++++++----- arch/powerpc/platforms/pseries/vas.h | 2 ++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index b1f25bac280b..71d52a670d95 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -385,11 +385,15 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * same fault IRQ is not freed by the OS before. */ mutex_lock(&vas_pseries_mutex); - if (migration_in_progress) + if (migration_in_progress) { rc = -EBUSY; - else + } else { rc = allocate_setup_window(txwin, (u64 *)&domain[0], cop_feat_caps->win_type); + if (!rc) + caps->nr_open_wins_progress++; + } + mutex_unlock(&vas_pseries_mutex); if (rc) goto out; @@ -404,8 +408,17 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, goto out_free; txwin->win_type = cop_feat_caps->win_type; - mutex_lock(&vas_pseries_mutex); + /* + * The migration SUSPEND thread sets migration_in_progress and + * closes all open windows from the list. But the window is + * added to the list after open and modify HCALLs. So possible + * that migration_in_progress is set before modify HCALL which + * may cause some windows are still open when the hypervisor + * initiates the migration. + * So checks the migration_in_progress flag again and close all + * open windows. + * * Possible to lose the acquired credit with DLPAR core * removal after the window is opened. So if there are any * closed windows (means with lost credits), do not give new @@ -413,9 +426,11 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * after the existing windows are reopened when credits are * available. */ - if (!caps->nr_close_wins) { + mutex_lock(&vas_pseries_mutex); + if (!caps->nr_close_wins && !migration_in_progress) { list_add(&txwin->win_list, &caps->list); caps->nr_open_windows++; + caps->nr_open_wins_progress--; mutex_unlock(&vas_pseries_mutex); vas_user_win_add_mm_context(&txwin->vas_win.task_ref); return &txwin->vas_win; @@ -433,6 +448,12 @@ out_free: */ free_irq_setup(txwin); h_deallocate_vas_window(txwin->vas_win.winid); + /* + * Hold mutex and reduce nr_open_wins_progress counter. + */ + mutex_lock(&vas_pseries_mutex); + caps->nr_open_wins_progress--; + mutex_unlock(&vas_pseries_mutex); out: atomic_dec(&cop_feat_caps->nr_used_credits); kfree(txwin); @@ -937,14 +958,14 @@ int vas_migration_handler(int action) struct vas_caps *vcaps; int i, rc = 0; + pr_info("VAS migration event %d\n", action); + /* * NX-GZIP is not enabled. Nothing to do for migration. */ if (!copypaste_feat) return rc; - mutex_lock(&vas_pseries_mutex); - if (action == VAS_SUSPEND) migration_in_progress = true; else @@ -990,12 +1011,27 @@ int vas_migration_handler(int action) switch (action) { case VAS_SUSPEND: + mutex_lock(&vas_pseries_mutex); rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, true); + /* + * Windows are included in the list after successful + * open. So wait for closing these in-progress open + * windows in vas_allocate_window() which will be + * done if the migration_in_progress is set. + */ + while (vcaps->nr_open_wins_progress) { + mutex_unlock(&vas_pseries_mutex); + msleep(10); + mutex_lock(&vas_pseries_mutex); + } + mutex_unlock(&vas_pseries_mutex); break; case VAS_RESUME: + mutex_lock(&vas_pseries_mutex); atomic_set(&caps->nr_total_credits, new_nr_creds); rc = reconfig_open_windows(vcaps, new_nr_creds, true); + mutex_unlock(&vas_pseries_mutex); break; default: /* should not happen */ @@ -1011,8 +1047,9 @@ int vas_migration_handler(int action) goto out; } + pr_info("VAS migration event (%d) successful\n", action); + out: - mutex_unlock(&vas_pseries_mutex); return rc; } diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 7115043ec488..45567cd13178 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -91,6 +91,8 @@ struct vas_cop_feat_caps { struct vas_caps { struct vas_cop_feat_caps caps; struct list_head list; /* List of open windows */ + int nr_open_wins_progress; /* Number of open windows in */ + /* progress. Used in migration */ int nr_close_wins; /* closed windows in the hypervisor for DLPAR */ int nr_open_windows; /* Number of successful open windows */ u8 feat; /* Feature type */ -- cgit From d2441d3e8c0c076d0a2e705fa235c76869a85140 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 5 Dec 2023 16:11:05 +1100 Subject: MAINTAINERS: powerpc: Add Aneesh & Naveen Aneesh and Naveen are helping out with some aspects of upstream maintenance, add them as reviewers. Acked-by: "Aneesh Kumar K.V (IBM)" Acked-by: "Naveen N. Rao" Signed-off-by: Michael Ellerman Link: https://msgid.link/20231205051105.736470-1-mpe@ellerman.id.au --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ea790149af79..0a842ee1c50b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12240,6 +12240,8 @@ LINUX FOR POWERPC (32-BIT AND 64-BIT) M: Michael Ellerman R: Nicholas Piggin R: Christophe Leroy +R: Aneesh Kumar K.V +R: Naveen N. Rao L: linuxppc-dev@lists.ozlabs.org S: Supported W: https://github.com/linuxppc/wiki/wiki -- cgit From 595e52284d24adc376890d3fc93bdca4707d9aca Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 13 Dec 2023 08:58:15 -0700 Subject: io_uring/poll: don't enable lazy wake for POLLEXCLUSIVE There are a few quirks around using lazy wake for poll unconditionally, and one of them is related the EPOLLEXCLUSIVE. Those may trigger exclusive wakeups, which wake a limited number of entries in the wait queue. If that wake number is less than the number of entries someone is waiting for (and that someone is also using DEFER_TASKRUN), then we can get stuck waiting for more entries while we should be processing the ones we already got. If we're doing exclusive poll waits, flag the request as not being compatible with lazy wakeups. Reported-by: Pavel Begunkov Fixes: 6ce4a93dbb5b ("io_uring/poll: use IOU_F_TWQ_LAZY_WAKE for wakeups") Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 +++ io_uring/poll.c | 20 +++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 805bb635cdf5..239a4f68801b 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -434,6 +434,7 @@ enum { /* keep async read/write and isreg together and in order */ REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_ISREG_BIT, + REQ_F_POLL_NO_LAZY_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -501,6 +502,8 @@ enum { REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT), /* hashed into ->cancel_hash_locked, protected by ->uring_lock */ REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT), + /* don't use lazy poll wake for this request */ + REQ_F_POLL_NO_LAZY = BIT(REQ_F_POLL_NO_LAZY_BIT), }; typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); diff --git a/io_uring/poll.c b/io_uring/poll.c index d38d05edb4fa..d59b74a99d4e 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -366,11 +366,16 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts) static void __io_poll_execute(struct io_kiocb *req, int mask) { + unsigned flags = 0; + io_req_set_res(req, mask, 0); req->io_task_work.func = io_poll_task_func; trace_io_uring_task_add(req, mask); - __io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE); + + if (!(req->flags & REQ_F_POLL_NO_LAZY)) + flags = IOU_F_TWQ_LAZY_WAKE; + __io_req_task_work_add(req, flags); } static inline void io_poll_execute(struct io_kiocb *req, int res) @@ -526,10 +531,19 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, poll->head = head; poll->wait.private = (void *) wqe_private; - if (poll->events & EPOLLEXCLUSIVE) + if (poll->events & EPOLLEXCLUSIVE) { + /* + * Exclusive waits may only wake a limited amount of entries + * rather than all of them, this may interfere with lazy + * wake if someone does wait(events > 1). Ensure we don't do + * lazy wake for those, as we need to process each one as they + * come in. + */ + req->flags |= REQ_F_POLL_NO_LAZY; add_wait_queue_exclusive(head, &poll->wait); - else + } else { add_wait_queue(head, &poll->wait); + } } static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, -- cgit From 485053bb81c81a122edd982b263277e65d7485c5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 13 Dec 2023 11:14:09 -0500 Subject: fix ufs_get_locked_folio() breakage filemap_lock_folio() returns ERR_PTR(-ENOENT) if the thing is not in cache - not NULL like find_lock_page() used to. Fixes: 5fb7bd50b351 "ufs: add ufs_get_locked_folio and ufs_put_locked_folio" Signed-off-by: Al Viro --- fs/ufs/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 13ba34e6d64f..2acf191eb89e 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -245,7 +245,7 @@ struct folio *ufs_get_locked_folio(struct address_space *mapping, { struct inode *inode = mapping->host; struct folio *folio = filemap_lock_folio(mapping, index); - if (!folio) { + if (IS_ERR(folio)) { folio = read_mapping_folio(mapping, index, NULL); if (IS_ERR(folio)) { -- cgit From 6c9dbee84cd005bed5f9d07b3a2797ae6414b435 Mon Sep 17 00:00:00 2001 From: Farouk Bouabid Date: Wed, 13 Dec 2023 15:50:45 +0100 Subject: drm/panel: ltk050h3146w: Set burst mode for ltk050h3148w The ltk050h3148w variant expects the horizontal component lane byte clock cycle(lbcc) to be calculated using lane_mbps (burst mode) instead of the pixel clock. Using the pixel clock rate by default for this calculation was introduced in commit ac87d23694f4 ("drm/bridge: synopsys: dw-mipi-dsi: Use pixel clock rate to calculate lbcc") and starting from commit 93e82bb4de01 ("drm/bridge: synopsys: dw-mipi-dsi: Fix hcomponent lbcc for burst mode") only panels that support burst mode can keep using the lane_mbps. So add MIPI_DSI_MODE_VIDEO_BURST as part of the mode_flags for the dsi host. Fixes: 93e82bb4de01 ("drm/bridge: synopsys: dw-mipi-dsi: Fix hcomponent lbcc for burst mode") Signed-off-by: Farouk Bouabid Reviewed-by: Jessica Zhang Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20231213145045.41020-1-farouk.bouabid@theobroma-systems.com --- drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c b/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c index 6e3670508e3a..30919c872ac8 100644 --- a/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c +++ b/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c @@ -326,7 +326,7 @@ static const struct drm_display_mode ltk050h3148w_mode = { static const struct ltk050h3146w_desc ltk050h3148w_data = { .mode = <k050h3148w_mode, .init = ltk050h3148w_init_sequence, - .mode_flags = MIPI_DSI_MODE_VIDEO_SYNC_PULSE, + .mode_flags = MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_VIDEO_BURST, }; static int ltk050h3146w_init_sequence(struct ltk050h3146w *ctx) -- cgit From 9702817384aa4a3700643d0b26e71deac0172cfd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Dec 2023 10:56:29 -0800 Subject: Revert "tcp: disable tcp_autocorking for socket when TCP_NODELAY flag is set" This reverts commit f3f32a356c0d2379d4431364e74f101f8f075ce3. Paolo reports that the change disables autocorking even after the userspace sets TCP_CORK. Fixes: f3f32a356c0d ("tcp: disable tcp_autocorking for socket when TCP_NODELAY flag is set") Link: https://lore.kernel.org/r/0d30d5a41d3ac990573016308aaeacb40a9dc79f.camel@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 96fc89ccec33..ff6838ca2e58 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -716,7 +716,7 @@ void tcp_push(struct sock *sk, int flags, int mss_now, tcp_mark_urg(tp, flags); - if (!nonagle && tcp_should_autocork(sk, skb, size_goal)) { + if (tcp_should_autocork(sk, skb, size_goal)) { /* avoid atomic op if TSQ_THROTTLED bit is already set */ if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) { -- cgit From 829649443e78d85db0cff0c37cadb28fbb1a5f6f Mon Sep 17 00:00:00 2001 From: Yusong Gao Date: Wed, 13 Dec 2023 10:31:10 +0000 Subject: sign-file: Fix incorrect return values check There are some wrong return values check in sign-file when call OpenSSL API. The ERR() check cond is wrong because of the program only check the return value is < 0 which ignored the return val is 0. For example: 1. CMS_final() return 1 for success or 0 for failure. 2. i2d_CMS_bio_stream() returns 1 for success or 0 for failure. 3. i2d_TYPEbio() return 1 for success and 0 for failure. 4. BIO_free() return 1 for success and 0 for failure. Link: https://www.openssl.org/docs/manmaster/man3/ Fixes: e5a2e3c84782 ("scripts/sign-file.c: Add support for signing with a raw signature") Signed-off-by: Yusong Gao Reviewed-by: Juerg Haefliger Signed-off-by: David Howells Link: https://lore.kernel.org/r/20231213024405.624692-1-a869920004@gmail.com/ # v5 Signed-off-by: Linus Torvalds --- scripts/sign-file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/sign-file.c b/scripts/sign-file.c index 598ef5465f82..3edb156ae52c 100644 --- a/scripts/sign-file.c +++ b/scripts/sign-file.c @@ -322,7 +322,7 @@ int main(int argc, char **argv) CMS_NOSMIMECAP | use_keyid | use_signed_attrs), "CMS_add1_signer"); - ERR(CMS_final(cms, bm, NULL, CMS_NOCERTS | CMS_BINARY) < 0, + ERR(CMS_final(cms, bm, NULL, CMS_NOCERTS | CMS_BINARY) != 1, "CMS_final"); #else @@ -341,10 +341,10 @@ int main(int argc, char **argv) b = BIO_new_file(sig_file_name, "wb"); ERR(!b, "%s", sig_file_name); #ifndef USE_PKCS7 - ERR(i2d_CMS_bio_stream(b, cms, NULL, 0) < 0, + ERR(i2d_CMS_bio_stream(b, cms, NULL, 0) != 1, "%s", sig_file_name); #else - ERR(i2d_PKCS7_bio(b, pkcs7) < 0, + ERR(i2d_PKCS7_bio(b, pkcs7) != 1, "%s", sig_file_name); #endif BIO_free(b); @@ -374,9 +374,9 @@ int main(int argc, char **argv) if (!raw_sig) { #ifndef USE_PKCS7 - ERR(i2d_CMS_bio_stream(bd, cms, NULL, 0) < 0, "%s", dest_name); + ERR(i2d_CMS_bio_stream(bd, cms, NULL, 0) != 1, "%s", dest_name); #else - ERR(i2d_PKCS7_bio(bd, pkcs7) < 0, "%s", dest_name); + ERR(i2d_PKCS7_bio(bd, pkcs7) != 1, "%s", dest_name); #endif } else { BIO *b; @@ -396,7 +396,7 @@ int main(int argc, char **argv) ERR(BIO_write(bd, &sig_info, sizeof(sig_info)) < 0, "%s", dest_name); ERR(BIO_write(bd, magic_number, sizeof(magic_number) - 1) < 0, "%s", dest_name); - ERR(BIO_free(bd) < 0, "%s", dest_name); + ERR(BIO_free(bd) != 1, "%s", dest_name); /* Finally, if we're signing in place, replace the original. */ if (replace_orig) -- cgit From ab4750332dbe535243def5dcebc24ca00c1f98ac Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 7 Dec 2023 10:14:41 -0500 Subject: drm/amdgpu/sdma5.2: add begin/end_use ring callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add begin/end_use ring callbacks to disallow GFXOFF when SDMA work is submitted and allow it again afterward. This should avoid corner cases where GFXOFF is erroneously entered when SDMA is still active. For now just allow/disallow GFXOFF in the begin and end helpers until we root cause the issue. This should not impact power as SDMA usage is pretty minimal and GFXOSS should not be active when SDMA is active anyway, this just makes it explicit. v2: move everything into sdma5.2 code. No reason for this to be generic at this point. v3: Add comments in new code Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2220 Reviewed-by: Mario Limonciello (v1) Tested-by: Mario Limonciello (v1) Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.15+ --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 83c240f741b5..0058f3f7cf6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1643,6 +1643,32 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* SDMA 5.2.3 (RMB) FW doesn't seem to properly + * disallow GFXOFF in some cases leading to + * hangs in SDMA. Disallow GFXOFF while SDMA is active. + * We can probably just limit this to 5.2.3, + * but it shouldn't hurt for other parts since + * this GFXOFF will be disallowed anyway when SDMA is + * active, this just makes it explicit. + */ + amdgpu_gfx_off_ctrl(adev, false); +} + +static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* SDMA 5.2.3 (RMB) FW doesn't seem to properly + * disallow GFXOFF in some cases leading to + * hangs in SDMA. Allow GFXOFF when SDMA is complete. + */ + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .name = "sdma_v5_2", .early_init = sdma_v5_2_early_init, @@ -1690,6 +1716,8 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .test_ib = sdma_v5_2_ring_test_ib, .insert_nop = sdma_v5_2_ring_insert_nop, .pad_ib = sdma_v5_2_ring_pad_ib, + .begin_use = sdma_v5_2_ring_begin_use, + .end_use = sdma_v5_2_ring_end_use, .emit_wreg = sdma_v5_2_ring_emit_wreg, .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait, -- cgit From 2c7300d357a213d4a4bda691d1d5c06251e552d0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 12 Dec 2023 01:09:16 -0600 Subject: drm/amd: Fix a probing order problem on SDMA 2.4 commit 751e293f2c99 ("drm/amd: Move microcode init from sw_init to early_init for SDMA v2.4") made a fateful mistake in `adev->sdma.num_instances` wasn't declared when sdma_v2_4_init_microcode() was run. This caused probing to fail. Move the declaration to right before sdma_v2_4_init_microcode(). Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3043 Fixes: 751e293f2c99 ("drm/amd: Move microcode init from sw_init to early_init for SDMA v2.4") Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 45377a175250..8d5d86675a7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -813,12 +813,12 @@ static int sdma_v2_4_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; + adev->sdma.num_instances = SDMA_MAX_INSTANCE; + r = sdma_v2_4_init_microcode(adev); if (r) return r; - adev->sdma.num_instances = SDMA_MAX_INSTANCE; - sdma_v2_4_set_ring_funcs(adev); sdma_v2_4_set_buffer_funcs(adev); sdma_v2_4_set_vm_pte_funcs(adev); -- cgit From ceb9a321e7639700844aa3bf234a4e0884f13b77 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 8 Dec 2023 13:43:09 +0100 Subject: drm/amdgpu: fix tear down order in amdgpu_vm_pt_free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When freeing PD/PT with shadows it can happen that the shadow destruction races with detaching the PD/PT from the VM causing a NULL pointer dereference in the invalidation code. Fix this by detaching the the PD/PT from the VM first and then freeing the shadow instead. Signed-off-by: Christian König Fixes: https://gitlab.freedesktop.org/drm/amd/-/issues/2867 Cc: Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index a2287bb25223..a160265ddc07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -642,13 +642,14 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) if (!entry->bo) return; + + entry->bo->vm_bo = NULL; shadow = amdgpu_bo_shadowed(entry->bo); if (shadow) { ttm_bo_set_bulk_move(&shadow->tbo, NULL); amdgpu_bo_unref(&shadow); } ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); - entry->bo->vm_bo = NULL; spin_lock(&entry->vm->status_lock); list_del(&entry->vm_status); -- cgit From a4236c4b410857a70647c410e886c8a0455ec4fb Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 4 Dec 2023 15:51:50 +0100 Subject: drm/amdgpu: warn when there are still mappings when a BO is destroyed v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can only happen when there is a reference counting bug. v2: fix typo Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index d79b4ca1ecfc..5ad03f2afdb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1343,6 +1343,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) abo = ttm_to_amdgpu_bo(bo); + WARN_ON(abo->vm_bo); + if (abo->kfd_bo) amdgpu_amdkfd_release_notify(abo); -- cgit From 60316d7f10b17a7ebb1ead0642fee8710e1560e0 Mon Sep 17 00:00:00 2001 From: Nikolay Kuratov Date: Mon, 11 Dec 2023 19:23:17 +0300 Subject: vsock/virtio: Fix unsigned integer wrap around in virtio_transport_has_space() We need to do signed arithmetic if we expect condition `if (bytes < 0)` to be possible Found by Linux Verification Center (linuxtesting.org) with SVACE Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko") Signed-off-by: Nikolay Kuratov Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20231211162317.4116625-1-kniv@yandex-team.ru Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index c8e162c9d1df..6df246b53260 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -843,7 +843,7 @@ static s64 virtio_transport_has_space(struct vsock_sock *vsk) struct virtio_vsock_sock *vvs = vsk->trans; s64 bytes; - bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); + bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); if (bytes < 0) bytes = 0; -- cgit From 23d05d563b7e7b0314e65c8e882bc27eac2da8e7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Dec 2023 16:46:21 +0000 Subject: net: prevent mss overflow in skb_segment() Once again syzbot is able to crash the kernel in skb_segment() [1] GSO_BY_FRAGS is a forbidden value, but unfortunately the following computation in skb_segment() can reach it quite easily : mss = mss * partial_segs; 65535 = 3 * 5 * 17 * 257, so many initial values of mss can lead to a bad final result. Make sure to limit segmentation so that the new mss value is smaller than GSO_BY_FRAGS. [1] general protection fault, probably for non-canonical address 0xdffffc000000000e: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000070-0x0000000000000077] CPU: 1 PID: 5079 Comm: syz-executor993 Not tainted 6.7.0-rc4-syzkaller-00141-g1ae4cd3cbdd0 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 RIP: 0010:skb_segment+0x181d/0x3f30 net/core/skbuff.c:4551 Code: 83 e3 02 e9 fb ed ff ff e8 90 68 1c f9 48 8b 84 24 f8 00 00 00 48 8d 78 70 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e 8a 21 00 00 48 8b 84 24 f8 00 RSP: 0018:ffffc900043473d0 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000010046 RCX: ffffffff886b1597 RDX: 000000000000000e RSI: ffffffff886b2520 RDI: 0000000000000070 RBP: ffffc90004347578 R08: 0000000000000005 R09: 000000000000ffff R10: 000000000000ffff R11: 0000000000000002 R12: ffff888063202ac0 R13: 0000000000010000 R14: 000000000000ffff R15: 0000000000000046 FS: 0000555556e7e380(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020010000 CR3: 0000000027ee2000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: udp6_ufo_fragment+0xa0e/0xd00 net/ipv6/udp_offload.c:109 ipv6_gso_segment+0x534/0x17e0 net/ipv6/ip6_offload.c:120 skb_mac_gso_segment+0x290/0x610 net/core/gso.c:53 __skb_gso_segment+0x339/0x710 net/core/gso.c:124 skb_gso_segment include/net/gso.h:83 [inline] validate_xmit_skb+0x36c/0xeb0 net/core/dev.c:3626 __dev_queue_xmit+0x6f3/0x3d60 net/core/dev.c:4338 dev_queue_xmit include/linux/netdevice.h:3134 [inline] packet_xmit+0x257/0x380 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3087 [inline] packet_sendmsg+0x24c6/0x5220 net/packet/af_packet.c:3119 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 __sys_sendto+0x255/0x340 net/socket.c:2190 __do_sys_sendto net/socket.c:2202 [inline] __se_sys_sendto net/socket.c:2198 [inline] __x64_sys_sendto+0xe0/0x1b0 net/socket.c:2198 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7f8692032aa9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 d1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fff8d685418 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f8692032aa9 RDX: 0000000000010048 RSI: 00000000200000c0 RDI: 0000000000000003 RBP: 00000000000f4240 R08: 0000000020000540 R09: 0000000000000014 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fff8d685480 R13: 0000000000000001 R14: 00007fff8d685480 R15: 0000000000000003 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:skb_segment+0x181d/0x3f30 net/core/skbuff.c:4551 Code: 83 e3 02 e9 fb ed ff ff e8 90 68 1c f9 48 8b 84 24 f8 00 00 00 48 8d 78 70 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e 8a 21 00 00 48 8b 84 24 f8 00 RSP: 0018:ffffc900043473d0 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000010046 RCX: ffffffff886b1597 RDX: 000000000000000e RSI: ffffffff886b2520 RDI: 0000000000000070 RBP: ffffc90004347578 R08: 0000000000000005 R09: 000000000000ffff R10: 000000000000ffff R11: 0000000000000002 R12: ffff888063202ac0 R13: 0000000000010000 R14: 000000000000ffff R15: 0000000000000046 FS: 0000555556e7e380(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020010000 CR3: 0000000027ee2000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 3953c46c3ac7 ("sk_buff: allow segmenting based on frag sizes") Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231212164621.4131800-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b157efea5dea..83af8aaeb893 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4522,8 +4522,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, /* GSO partial only requires that we trim off any excess that * doesn't fit into an MSS sized block, so take care of that * now. + * Cap len to not accidentally hit GSO_BY_FRAGS. */ - partial_segs = len / mss; + partial_segs = min(len, GSO_BY_FRAGS - 1) / mss; if (partial_segs > 1) mss *= partial_segs; else -- cgit From 2aad7d4189a923b24efa8ea6ad09059882b1bfe4 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 12 Dec 2023 18:43:25 +0200 Subject: dpaa2-switch: fix size of the dma_unmap The size of the DMA unmap was wrongly put as a sizeof of a pointer. Change the value of the DMA unmap to be the actual macro used for the allocation and the DMA map. Fixes: 1110318d83e8 ("dpaa2-switch: add tc flower hardware offload on ingress traffic") Signed-off-by: Ioana Ciornei Link: https://lore.kernel.org/r/20231212164326.2753457-2-ioana.ciornei@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c index 4798fb7fe35d..b6a534a3e0b1 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c @@ -139,7 +139,8 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block, err = dpsw_acl_add_entry(ethsw->mc_io, 0, ethsw->dpsw_handle, filter_block->acl_id, acl_entry_cfg); - dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff), + dma_unmap_single(dev, acl_entry_cfg->key_iova, + DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, DMA_TO_DEVICE); if (err) { dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err); @@ -181,8 +182,8 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block, err = dpsw_acl_remove_entry(ethsw->mc_io, 0, ethsw->dpsw_handle, block->acl_id, acl_entry_cfg); - dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff), - DMA_TO_DEVICE); + dma_unmap_single(dev, acl_entry_cfg->key_iova, + DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, DMA_TO_DEVICE); if (err) { dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err); kfree(cmd_buff); -- cgit From f24a49a375f65e8e75ee1b19d806f46dbaae57fd Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 12 Dec 2023 18:43:26 +0200 Subject: dpaa2-switch: do not ask for MDB, VLAN and FDB replay Starting with commit 4e51bf44a03a ("net: bridge: move the switchdev object replay helpers to "push" mode") the switchdev_bridge_port_offload() helper was extended with the intention to provide switchdev drivers easy access to object addition and deletion replays. This works by calling the replay helpers with non-NULL notifier blocks. In the same commit, the dpaa2-switch driver was updated so that it passes valid notifier blocks to the helper. At that moment, no regression was identified through testing. In the meantime, the blamed commit changed the behavior in terms of which ports get hit by the replay. Before this commit, only the initial port which identified itself as offloaded through switchdev_bridge_port_offload() got a replay of all port objects and FDBs. After this, the newly joining port will trigger a replay of objects on all bridge ports and on the bridge itself. This behavior leads to errors in dpaa2_switch_port_vlans_add() when a VLAN gets installed on the same interface multiple times. The intended mechanism to address this is to pass a non-NULL ctx to the switchdev_bridge_port_offload() helper and then check it against the port's private structure. But since the driver does not have any use for the replayed port objects and FDBs until it gains support for LAG offload, it's better to fix the issue by reverting the dpaa2-switch driver to not ask for replay. The pointers will be added back when we are prepared to ignore replays on unrelated ports. Fixes: b28d580e2939 ("net: bridge: switchdev: replay all VLAN groups") Signed-off-by: Ioana Ciornei Link: https://lore.kernel.org/r/20231212164326.2753457-3-ioana.ciornei@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 97d3151076d5..e01a246124ac 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -1998,9 +1998,6 @@ static int dpaa2_switch_port_attr_set_event(struct net_device *netdev, return notifier_from_errno(err); } -static struct notifier_block dpaa2_switch_port_switchdev_nb; -static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb; - static int dpaa2_switch_port_bridge_join(struct net_device *netdev, struct net_device *upper_dev, struct netlink_ext_ack *extack) @@ -2043,9 +2040,7 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev, goto err_egress_flood; err = switchdev_bridge_port_offload(netdev, netdev, NULL, - &dpaa2_switch_port_switchdev_nb, - &dpaa2_switch_port_switchdev_blocking_nb, - false, extack); + NULL, NULL, false, extack); if (err) goto err_switchdev_offload; @@ -2079,9 +2074,7 @@ static int dpaa2_switch_port_restore_rxvlan(struct net_device *vdev, int vid, vo static void dpaa2_switch_port_pre_bridge_leave(struct net_device *netdev) { - switchdev_bridge_port_unoffload(netdev, NULL, - &dpaa2_switch_port_switchdev_nb, - &dpaa2_switch_port_switchdev_blocking_nb); + switchdev_bridge_port_unoffload(netdev, NULL, NULL, NULL); } static int dpaa2_switch_port_bridge_leave(struct net_device *netdev) -- cgit From 1cc111b9cddc71ce161cd388f11f0e9048edffdb Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Thu, 14 Dec 2023 09:21:53 +0800 Subject: tracing: Fix uaf issue when open the hist or hist_debug file KASAN report following issue. The root cause is when opening 'hist' file of an instance and accessing 'trace_event_file' in hist_show(), but 'trace_event_file' has been freed due to the instance being removed. 'hist_debug' file has the same problem. To fix it, call tracing_{open,release}_file_tr() in file_operations callback to have the ref count and avoid 'trace_event_file' being freed. BUG: KASAN: slab-use-after-free in hist_show+0x11e0/0x1278 Read of size 8 at addr ffff242541e336b8 by task head/190 CPU: 4 PID: 190 Comm: head Not tainted 6.7.0-rc5-g26aff849438c #133 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x98/0xf8 show_stack+0x1c/0x30 dump_stack_lvl+0x44/0x58 print_report+0xf0/0x5a0 kasan_report+0x80/0xc0 __asan_report_load8_noabort+0x1c/0x28 hist_show+0x11e0/0x1278 seq_read_iter+0x344/0xd78 seq_read+0x128/0x1c0 vfs_read+0x198/0x6c8 ksys_read+0xf4/0x1e0 __arm64_sys_read+0x70/0xa8 invoke_syscall+0x70/0x260 el0_svc_common.constprop.0+0xb0/0x280 do_el0_svc+0x44/0x60 el0_svc+0x34/0x68 el0t_64_sync_handler+0xb8/0xc0 el0t_64_sync+0x168/0x170 Allocated by task 188: kasan_save_stack+0x28/0x50 kasan_set_track+0x28/0x38 kasan_save_alloc_info+0x20/0x30 __kasan_slab_alloc+0x6c/0x80 kmem_cache_alloc+0x15c/0x4a8 trace_create_new_event+0x84/0x348 __trace_add_new_event+0x18/0x88 event_trace_add_tracer+0xc4/0x1a0 trace_array_create_dir+0x6c/0x100 trace_array_create+0x2e8/0x568 instance_mkdir+0x48/0x80 tracefs_syscall_mkdir+0x90/0xe8 vfs_mkdir+0x3c4/0x610 do_mkdirat+0x144/0x200 __arm64_sys_mkdirat+0x8c/0xc0 invoke_syscall+0x70/0x260 el0_svc_common.constprop.0+0xb0/0x280 do_el0_svc+0x44/0x60 el0_svc+0x34/0x68 el0t_64_sync_handler+0xb8/0xc0 el0t_64_sync+0x168/0x170 Freed by task 191: kasan_save_stack+0x28/0x50 kasan_set_track+0x28/0x38 kasan_save_free_info+0x34/0x58 __kasan_slab_free+0xe4/0x158 kmem_cache_free+0x19c/0x508 event_file_put+0xa0/0x120 remove_event_file_dir+0x180/0x320 event_trace_del_tracer+0xb0/0x180 __remove_instance+0x224/0x508 instance_rmdir+0x44/0x78 tracefs_syscall_rmdir+0xbc/0x140 vfs_rmdir+0x1cc/0x4c8 do_rmdir+0x220/0x2b8 __arm64_sys_unlinkat+0xc0/0x100 invoke_syscall+0x70/0x260 el0_svc_common.constprop.0+0xb0/0x280 do_el0_svc+0x44/0x60 el0_svc+0x34/0x68 el0t_64_sync_handler+0xb8/0xc0 el0t_64_sync+0x168/0x170 Link: https://lore.kernel.org/linux-trace-kernel/20231214012153.676155-1-zhengyejian1@huawei.com Suggested-by: Steven Rostedt Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 6 ++++++ kernel/trace/trace.h | 1 + kernel/trace/trace_events_hist.c | 12 ++++++++---- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6c79548f9574..199df497db07 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4968,6 +4968,12 @@ int tracing_release_file_tr(struct inode *inode, struct file *filp) return 0; } +int tracing_single_release_file_tr(struct inode *inode, struct file *filp) +{ + tracing_release_file_tr(inode, filp); + return single_release(inode, filp); +} + static int tracing_mark_open(struct inode *inode, struct file *filp) { stream_open(inode, filp); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b7f4ea25a194..0489e72c8169 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -617,6 +617,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp); int tracing_open_generic_tr(struct inode *inode, struct file *filp); int tracing_open_file_tr(struct inode *inode, struct file *filp); int tracing_release_file_tr(struct inode *inode, struct file *filp); +int tracing_single_release_file_tr(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); bool tracer_tracing_is_on(struct trace_array *tr); void tracer_tracing_on(struct trace_array *tr); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 1abc07fba1b9..5ecf3c8bde20 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -5623,10 +5623,12 @@ static int event_hist_open(struct inode *inode, struct file *file) { int ret; - ret = security_locked_down(LOCKDOWN_TRACEFS); + ret = tracing_open_file_tr(inode, file); if (ret) return ret; + /* Clear private_data to avoid warning in single_open() */ + file->private_data = NULL; return single_open(file, hist_show, file); } @@ -5634,7 +5636,7 @@ const struct file_operations event_hist_fops = { .open = event_hist_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_file_tr, }; #ifdef CONFIG_HIST_TRIGGERS_DEBUG @@ -5900,10 +5902,12 @@ static int event_hist_debug_open(struct inode *inode, struct file *file) { int ret; - ret = security_locked_down(LOCKDOWN_TRACEFS); + ret = tracing_open_file_tr(inode, file); if (ret) return ret; + /* Clear private_data to avoid warning in single_open() */ + file->private_data = NULL; return single_open(file, hist_debug_show, file); } @@ -5911,7 +5915,7 @@ const struct file_operations event_hist_debug_fops = { .open = event_hist_debug_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_file_tr, }; #endif -- cgit From 981d947bcd382c3950a593690e0e13d194d65b1c Mon Sep 17 00:00:00 2001 From: Sneh Shah Date: Tue, 12 Dec 2023 14:52:08 +0530 Subject: net: stmmac: dwmac-qcom-ethqos: Fix drops in 10M SGMII RX In 10M SGMII mode all the packets are being dropped due to wrong Rx clock. SGMII 10MBPS mode needs RX clock divider programmed to avoid drops in Rx. Update configure SGMII function with Rx clk divider programming. Fixes: 463120c31c58 ("net: stmmac: dwmac-qcom-ethqos: add support for SGMII") Tested-by: Andrew Halaney Signed-off-by: Sneh Shah Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20231212092208.22393-1-quic_snehshah@quicinc.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index d3bf42d0fceb..31631e3f89d0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -34,6 +34,7 @@ #define RGMII_CONFIG_LOOPBACK_EN BIT(2) #define RGMII_CONFIG_PROG_SWAP BIT(1) #define RGMII_CONFIG_DDR_MODE BIT(0) +#define RGMII_CONFIG_SGMII_CLK_DVDR GENMASK(18, 10) /* SDCC_HC_REG_DLL_CONFIG fields */ #define SDCC_DLL_CONFIG_DLL_RST BIT(30) @@ -78,6 +79,8 @@ #define ETHQOS_MAC_CTRL_SPEED_MODE BIT(14) #define ETHQOS_MAC_CTRL_PORT_SEL BIT(15) +#define SGMII_10M_RX_CLK_DVDR 0x31 + struct ethqos_emac_por { unsigned int offset; unsigned int value; @@ -598,6 +601,9 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos) return 0; } +/* On interface toggle MAC registers gets reset. + * Configure MAC block for SGMII on ethernet phy link up + */ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos) { int val; @@ -617,6 +623,10 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos) case SPEED_10: val |= ETHQOS_MAC_CTRL_PORT_SEL; val &= ~ETHQOS_MAC_CTRL_SPEED_MODE; + rgmii_updatel(ethqos, RGMII_CONFIG_SGMII_CLK_DVDR, + FIELD_PREP(RGMII_CONFIG_SGMII_CLK_DVDR, + SGMII_10M_RX_CLK_DVDR), + RGMII_IO_MACRO_CONFIG); break; } -- cgit From e23c0d21ce9234fbc31ece35663ababbb83f9347 Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Tue, 12 Dec 2023 16:18:33 -0600 Subject: net: stmmac: Handle disabled MDIO busses from devicetree Many hardware configurations have the MDIO bus disabled, and are instead using some other MDIO bus to talk to the MAC's phy. of_mdiobus_register() returns -ENODEV in this case. Let's handle it gracefully instead of failing to probe the MAC. Fixes: 47dd7a540b8a ("net: add support for STMicroelectronics Ethernet controllers.") Signed-off-by: Andrew Halaney Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20231212-b4-stmmac-handle-mdio-enodev-v2-1-600171acf79f@redhat.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index fa9e7e7040b9..0542cfd1817e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -591,7 +591,11 @@ int stmmac_mdio_register(struct net_device *ndev) new_bus->parent = priv->device; err = of_mdiobus_register(new_bus, mdio_node); - if (err != 0) { + if (err == -ENODEV) { + err = 0; + dev_info(dev, "MDIO bus is disabled\n"); + goto bus_register_fail; + } else if (err) { dev_err_probe(dev, err, "Cannot register the MDIO bus\n"); goto bus_register_fail; } -- cgit From 189ff16722ee36ced4d2a2469d4ab65a8fee4198 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Tue, 12 Dec 2023 23:10:56 -0500 Subject: appletalk: Fix Use-After-Free in atalk_ioctl Because atalk_ioctl() accesses sk->sk_receive_queue without holding a sk->sk_receive_queue.lock, it can cause a race with atalk_recvmsg(). A use-after-free for skb occurs with the following flow. ``` atalk_ioctl() -> skb_peek() atalk_recvmsg() -> skb_recv_datagram() -> skb_free_datagram() ``` Add sk->sk_receive_queue.lock to atalk_ioctl() to fix this issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Hyunwoo Kim Link: https://lore.kernel.org/r/20231213041056.GA519680@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Paolo Abeni --- net/appletalk/ddp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 9ba04a69ec2a..a852ec093fa8 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1775,15 +1775,14 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } case TIOCINQ: { - /* - * These two are safe on a single CPU system as only - * user tasks fiddle here - */ - struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); + struct sk_buff *skb; long amount = 0; + spin_lock_irq(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); if (skb) amount = skb->len - sizeof(struct ddpehdr); + spin_unlock_irq(&sk->sk_receive_queue.lock); rc = put_user(amount, (int __user *)argp); break; } -- cgit From 6c6fa2641402e8e753262fb61ed9a15a7cb225ad Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Thu, 14 Dec 2023 00:28:16 +0100 Subject: ALSA: hda/tas2781: call cleanup functions only once If the module can load the RCA but not the firmware binary, it will call the cleanup functions. Then unloading the module causes general protection fault due to double free. Do not call the cleanup functions in tasdev_fw_ready. general protection fault, probably for non-canonical address 0x6f2b8a2bff4c8fec: 0000 [#1] PREEMPT SMP NOPTI Call Trace: ? die_addr+0x36/0x90 ? exc_general_protection+0x1c5/0x430 ? asm_exc_general_protection+0x26/0x30 ? tasdevice_config_info_remove+0x6d/0xd0 [snd_soc_tas2781_fmwlib] tas2781_hda_unbind+0xaa/0x100 [snd_hda_scodec_tas2781_i2c] component_unbind+0x2e/0x50 component_unbind_all+0x92/0xa0 component_del+0xa8/0x140 tas2781_hda_remove.isra.0+0x32/0x60 [snd_hda_scodec_tas2781_i2c] i2c_device_remove+0x26/0xb0 Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") CC: stable@vger.kernel.org Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/1a0885c424bb21172702d254655882b59ef6477a.1702510018.git.soyer@irl.hu Signed-off-by: Takashi Iwai --- sound/pci/hda/tas2781_hda_i2c.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index d3dafc9d150b..c8ee5f809c38 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -550,11 +550,6 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context) tas2781_save_calibration(tas_priv); out: - if (tas_priv->fw_state == TASDEVICE_DSP_FW_FAIL) { - /*If DSP FW fail, kcontrol won't be created */ - tasdevice_config_info_remove(tas_priv); - tasdevice_dsp_remove(tas_priv); - } mutex_unlock(&tas_priv->codec_lock); if (fmw) release_firmware(fmw); -- cgit From 315deab289924c83ab1ded50022e8db95d6e428b Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Thu, 14 Dec 2023 00:49:20 +0100 Subject: ALSA: hda/tas2781: reset the amp before component_add Calling component_add starts loading the firmware, the callback function writes the program to the amplifiers. If the module resets the amplifiers after component_add, it happens that one of the amplifiers does not work because the reset and program writing are interleaving. Call tas2781_reset before component_add to ensure reliable initialization. Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") CC: stable@vger.kernel.org Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/4d23bf58558e23ee8097de01f70f1eb8d9de2d15.1702511246.git.soyer@irl.hu Signed-off-by: Takashi Iwai --- sound/pci/hda/tas2781_hda_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index c8ee5f809c38..63a90c7e8976 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -674,14 +674,14 @@ static int tas2781_hda_i2c_probe(struct i2c_client *clt) pm_runtime_put_autosuspend(tas_priv->dev); + tas2781_reset(tas_priv); + ret = component_add(tas_priv->dev, &tas2781_hda_comp_ops); if (ret) { dev_err(tas_priv->dev, "Register component failed: %d\n", ret); pm_runtime_disable(tas_priv->dev); - goto err; } - tas2781_reset(tas_priv); err: if (ret) tas2781_hda_remove(&clt->dev); -- cgit From 7bb26ea74aa86fdf894b7dbd8c5712c5b4187da7 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Wed, 13 Dec 2023 10:40:44 +0100 Subject: net: atlantic: fix double free in ring reinit logic Driver has a logic leak in ring data allocation/free, where double free may happen in aq_ring_free if system is under stress and driver init/deinit is happening. The probability is higher to get this during suspend/resume cycle. Verification was done simulating same conditions with stress -m 2000 --vm-bytes 20M --vm-hang 10 --backoff 1000 while true; do sudo ifconfig enp1s0 down; sudo ifconfig enp1s0 up; done Fixed by explicitly clearing pointers to NULL on deallocation Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") Reported-by: Linus Torvalds Closes: https://lore.kernel.org/netdev/CAHk-=wiZZi7FcvqVSUirHBjx0bBUZ4dFrMDVLc3+3HCrtq0rBA@mail.gmail.com/ Signed-off-by: Igor Russkikh Link: https://lore.kernel.org/r/20231213094044.22988-1-irusskikh@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 694daeaf3e61..e1885c1eb100 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -938,11 +938,14 @@ void aq_ring_free(struct aq_ring_s *self) return; kfree(self->buff_ring); + self->buff_ring = NULL; - if (self->dx_ring) + if (self->dx_ring) { dma_free_coherent(aq_nic_get_dev(self->aq_nic), self->size * self->dx_size, self->dx_ring, self->dx_ring_pa); + self->dx_ring = NULL; + } } unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data) -- cgit From 1ba0e9d69b2000e95267c888cbfa91d823388d47 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Dec 2023 21:34:08 +0000 Subject: io_uring/cmd: fix breakage in SOCKET_URING_OP_SIOC* implementation In 8e9fad0e70b7 "io_uring: Add io_uring command support for sockets" you've got an include of asm-generic/ioctls.h done in io_uring/uring_cmd.c. That had been done for the sake of this chunk - + ret = prot->ioctl(sk, SIOCINQ, &arg); + if (ret) + return ret; + return arg; + case SOCKET_URING_OP_SIOCOUTQ: + ret = prot->ioctl(sk, SIOCOUTQ, &arg); SIOC{IN,OUT}Q are defined to symbols (FIONREAD and TIOCOUTQ) that come from ioctls.h, all right, but the values vary by the architecture. FIONREAD is 0x467F on mips 0x4004667F on alpha, powerpc and sparc 0x8004667F on sh and xtensa 0x541B everywhere else TIOCOUTQ is 0x7472 on mips 0x40047473 on alpha, powerpc and sparc 0x80047473 on sh and xtensa 0x5411 everywhere else ->ioctl() expects the same values it would've gotten from userland; all places where we compare with SIOC{IN,OUT}Q are using asm/ioctls.h, so they pick the correct values. io_uring_cmd_sock(), OTOH, ends up passing the default ones. Fixes: 8e9fad0e70b7 ("io_uring: Add io_uring command support for sockets") Cc: Signed-off-by: Al Viro Link: https://lore.kernel.org/r/20231214213408.GT1674809@ZenIV Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index acbc2924ecd2..7d3ef62e620a 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include "io_uring.h" #include "rsrc.h" -- cgit From 46dec61643d7047c9b5929f98a2b7fa4fa93a7dc Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 8 Dec 2023 11:46:53 +0100 Subject: drm/nouveau: Fixup gk20a instobj hierarchy Commit 12c9b05da918 ("drm/nouveau/imem: support allocations not preserved across suspend") uses container_of() to cast from struct nvkm_memory to struct nvkm_instobj, assuming that all instance objects are derived from struct nvkm_instobj. For the gk20a family that's not the case and they are derived from struct nvkm_memory instead. This causes some subtle data corruption (nvkm_instobj.preserve ends up mapping to gk20a_instobj.vaddr) that causes a NULL pointer dereference in gk20a_instobj_acquire_iommu() (and possibly elsewhere) and also prevents suspend/resume from working. Fix this by making struct gk20a_instobj derive from struct nvkm_instobj instead. Fixes: 12c9b05da918 ("drm/nouveau/imem: support allocations not preserved across suspend") Reported-by: Jonathan Hunter Signed-off-by: Thierry Reding Tested-by: Jon Hunter Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231208104653.1917055-1-thierry.reding@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 1b811d6972a1..201022ae9214 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -49,14 +49,14 @@ #include struct gk20a_instobj { - struct nvkm_memory memory; + struct nvkm_instobj base; struct nvkm_mm_node *mn; struct gk20a_instmem *imem; /* CPU mapping */ u32 *vaddr; }; -#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory) +#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, base.memory) /* * Used for objects allocated using the DMA API @@ -148,7 +148,7 @@ gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj) list_del(&obj->vaddr_node); vunmap(obj->base.vaddr); obj->base.vaddr = NULL; - imem->vaddr_use -= nvkm_memory_size(&obj->base.memory); + imem->vaddr_use -= nvkm_memory_size(&obj->base.base.memory); nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use, imem->vaddr_max); } @@ -283,7 +283,7 @@ gk20a_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, { struct gk20a_instobj *node = gk20a_instobj(memory); struct nvkm_vmm_map map = { - .memory = &node->memory, + .memory = &node->base.memory, .offset = offset, .mem = node->mn, }; @@ -391,8 +391,8 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, return -ENOMEM; *_node = &node->base; - nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory); - node->base.memory.ptrs = &gk20a_instobj_ptrs; + nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.base.memory); + node->base.base.memory.ptrs = &gk20a_instobj_ptrs; node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, &node->handle, GFP_KERNEL, @@ -438,8 +438,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, *_node = &node->base; node->dma_addrs = (void *)(node->pages + npages); - nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory); - node->base.memory.ptrs = &gk20a_instobj_ptrs; + nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.base.memory); + node->base.base.memory.ptrs = &gk20a_instobj_ptrs; /* Allocate backing memory */ for (i = 0; i < npages; i++) { @@ -533,7 +533,7 @@ gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, else ret = gk20a_instobj_ctor_dma(imem, size >> PAGE_SHIFT, align, &node); - *pmemory = node ? &node->memory : NULL; + *pmemory = node ? &node->base.memory : NULL; if (ret) return ret; -- cgit From 7ba84cbf18c7a53107c64880d9c90f18fa68b481 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 13 Dec 2023 19:43:57 -0500 Subject: drm/nouveau/kms/nv50-: Don't allow inheritance of headless iors Turns out we made a silly mistake when coming up with OR inheritance on nouveau. On pre-DCB 4.1, iors are statically routed to output paths via the DCB. On later generations iors are only routed to an output path if they're actually being used. Unfortunately, it appears with NVIF_OUTP_INHERIT_V0 we make the mistake of assuming the later is true on all generations, which is currently leading us to return bogus ior -> head assignments through nvif, which causes WARN_ON(). So - fix this by verifying that we actually know that there's a head assigned to an ior before allowing it to be inherited through nvif. This -should- hopefully fix the WARN_ON on GT218 reported by Borislav. Signed-off-by: Lyude Paul Cc: Borislav Petkov Reported-by: Borislav Petkov (AMD) Tested-by: Borislav Petkov (AMD) Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231214004359.1028109-1-lyude@redhat.com --- drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c index e4279f1772a1..377d0e0cef84 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c @@ -385,7 +385,7 @@ nvkm_uoutp_mthd_inherit(struct nvkm_outp *outp, void *argv, u32 argc) /* Ensure an ior is hooked up to this outp already */ ior = outp->func->inherit(outp); - if (!ior) + if (!ior || !ior->arm.head) return -ENODEV; /* With iors, there will be a separate output path for each type of connector - and all of -- cgit From ef3d5cf9c59cccb012aa6b93d99f4c6eb5d6648e Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 16 Oct 2023 16:25:05 -0700 Subject: cxl/pmu: Ensure put_device on pmu devices The following kmemleaks were detected when removing the cxl module stack: unreferenced object 0xffff88822616b800 (size 1024): ... backtrace: [<00000000bedc6f83>] kmalloc_trace+0x26/0x90 [<00000000448d1afc>] devm_cxl_pmu_add+0x3a/0x110 [cxl_core] [<00000000ca3bfe16>] 0xffffffffa105213b [<00000000ba7f78dc>] local_pci_probe+0x41/0x90 [<000000005bb027ac>] pci_device_probe+0xb0/0x1c0 ... unreferenced object 0xffff8882260abcc0 (size 16): ... hex dump (first 16 bytes): 70 6d 75 5f 6d 65 6d 30 2e 30 00 26 82 88 ff ff pmu_mem0.0.&.... backtrace: ... [<00000000152b5e98>] dev_set_name+0x43/0x50 [<00000000c228798b>] devm_cxl_pmu_add+0x102/0x110 [cxl_core] [<00000000ca3bfe16>] 0xffffffffa105213b [<00000000ba7f78dc>] local_pci_probe+0x41/0x90 [<000000005bb027ac>] pci_device_probe+0xb0/0x1c0 ... unreferenced object 0xffff8882272af200 (size 256): ... backtrace: [<00000000bedc6f83>] kmalloc_trace+0x26/0x90 [<00000000a14d1813>] device_add+0x4ea/0x890 [<00000000a3f07b47>] devm_cxl_pmu_add+0xbe/0x110 [cxl_core] [<00000000ca3bfe16>] 0xffffffffa105213b [<00000000ba7f78dc>] local_pci_probe+0x41/0x90 [<000000005bb027ac>] pci_device_probe+0xb0/0x1c0 ... devm_cxl_pmu_add() correctly registers a device remove function but it only calls device_del() which is only part of device unregistration. Properly call device_unregister() to free up the memory associated with the device. Fixes: 1ad3f701c399 ("cxl/pci: Find and register CXL PMU devices") Cc: Jonathan Cameron Signed-off-by: Ira Weiny Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20231016-pmu-unregister-fix-v1-1-1e2eb2fa3c69@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/pmu.c b/drivers/cxl/core/pmu.c index 7684c843e5a5..5d8e06b0ba6e 100644 --- a/drivers/cxl/core/pmu.c +++ b/drivers/cxl/core/pmu.c @@ -23,7 +23,7 @@ const struct device_type cxl_pmu_type = { static void remove_dev(void *dev) { - device_del(dev); + device_unregister(dev); } int devm_cxl_pmu_add(struct device *parent, struct cxl_pmu_regs *regs, -- cgit From 7e2c1e4b34f07d9aa8937fab88359d4a0fce468e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 15 Dec 2023 11:24:50 +0000 Subject: perf: Fix perf_event_validate_size() lockdep splat When lockdep is enabled, the for_each_sibling_event(sibling, event) macro checks that event->ctx->mutex is held. When creating a new group leader event, we call perf_event_validate_size() on a partially initialized event where event->ctx is NULL, and so when for_each_sibling_event() attempts to check event->ctx->mutex, we get a splat, as reported by Lucas De Marchi: WARNING: CPU: 8 PID: 1471 at kernel/events/core.c:1950 __do_sys_perf_event_open+0xf37/0x1080 This only happens for a new event which is its own group_leader, and in this case there cannot be any sibling events. Thus it's safe to skip the check for siblings, which avoids having to make invasive and ugly changes to for_each_sibling_event(). Avoid the splat by bailing out early when the new event is its own group_leader. Fixes: 382c27f4ed28f803 ("perf: Fix perf_event_validate_size()") Closes: https://lore.kernel.org/lkml/20231214000620.3081018-1-lucas.demarchi@intel.com/ Closes: https://lore.kernel.org/lkml/ZXpm6gQ%2Fd59jGsuW@xpf.sh.intel.com/ Reported-by: Lucas De Marchi Reported-by: Pengfei Xu Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231215112450.3972309-1-mark.rutland@arm.com --- kernel/events/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index c9d123e13b57..9efd0d7775e7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1947,6 +1947,16 @@ static bool perf_event_validate_size(struct perf_event *event) group_leader->nr_siblings + 1) > 16*1024) return false; + /* + * When creating a new group leader, group_leader->ctx is initialized + * after the size has been validated, but we cannot safely use + * for_each_sibling_event() until group_leader->ctx is set. A new group + * leader cannot have any siblings yet, so we can safely skip checking + * the non-existent siblings. + */ + if (event == group_leader) + return true; + for_each_sibling_event(sibling, group_leader) { if (__perf_event_read_size(sibling->attr.read_format, group_leader->nr_siblings + 1) > 16*1024) -- cgit From 9483aa44912f26da2b69dade6099c2bf4b50a8c3 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Fri, 15 Dec 2023 11:03:52 +0530 Subject: EDAC/versal: Read num_csrows and num_chans using the correct bitfield macro Fix the extraction of num_csrows and num_chans. The extraction of the num_rows is wrong. Instead of extracting using the FIELD_GET it is calling FIELD_PREP. The issue was masked as the default design has the rows as 0. Fixes: 6f15b178cd63 ("EDAC/versal: Add a Xilinx Versal memory controller driver") Closes: https://lore.kernel.org/all/60ca157e-6eff-d12c-9dc0-8aeab125edda@linux-m68k.org/ Reported-by: Geert Uytterhoeven Signed-off-by: Shubhrajyoti Datta Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20231215053352.8740-1-shubhrajyoti.datta@amd.com --- drivers/edac/versal_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/versal_edac.c b/drivers/edac/versal_edac.c index 87e730dfefa0..8625de20fc71 100644 --- a/drivers/edac/versal_edac.c +++ b/drivers/edac/versal_edac.c @@ -966,10 +966,10 @@ static int mc_probe(struct platform_device *pdev) edac_mc_id = emif_get_id(pdev->dev.of_node); regval = readl(ddrmc_baseaddr + XDDR_REG_CONFIG0_OFFSET); - num_chans = FIELD_PREP(XDDR_REG_CONFIG0_NUM_CHANS_MASK, regval); + num_chans = FIELD_GET(XDDR_REG_CONFIG0_NUM_CHANS_MASK, regval); num_chans++; - num_csrows = FIELD_PREP(XDDR_REG_CONFIG0_NUM_RANKS_MASK, regval); + num_csrows = FIELD_GET(XDDR_REG_CONFIG0_NUM_RANKS_MASK, regval); num_csrows *= 2; if (!num_csrows) num_csrows = 1; -- cgit From dd939425707898da992e59ab0fcfae4652546910 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 14 Dec 2023 22:29:21 -0500 Subject: ring-buffer: Do not try to put back write_stamp If an update to an event is interrupted by another event between the time the initial event allocated its buffer and where it wrote to the write_stamp, the code try to reset the write stamp back to the what it had just overwritten. It knows that it was overwritten via checking the before_stamp, and if it didn't match what it wrote to the before_stamp before it allocated its space, it knows it was overwritten. To put back the write_stamp, it uses the before_stamp it read. The problem here is that by writing the before_stamp to the write_stamp it makes the two equal again, which means that the write_stamp can be considered valid as the last timestamp written to the ring buffer. But this is not necessarily true. The event that interrupted the event could have been interrupted in a way that it was interrupted as well, and can end up leaving with an invalid write_stamp. But if this happens and returns to this context that uses the before_stamp to update the write_stamp again, it can possibly incorrectly make it valid, causing later events to have in correct time stamps. As it is OK to leave this function with an invalid write_stamp (one that doesn't match the before_stamp), there's no reason to try to make it valid again in this case. If this race happens, then just leave with the invalid write_stamp and the next event to come along will just add a absolute timestamp and validate everything again. Bonus points: This gets rid of another cmpxchg64! Link: https://lore.kernel.org/linux-trace-kernel/20231214222921.193037a7@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Joel Fernandes Cc: Vincent Donnefort Fixes: a389d86f7fd09 ("ring-buffer: Have nested events still record running time stamp") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1d9caee7f542..2668dde23343 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3612,14 +3612,14 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, } if (likely(tail == w)) { - u64 save_before; - bool s_ok; - /* Nothing interrupted us between A and C */ /*D*/ rb_time_set(&cpu_buffer->write_stamp, info->ts); - barrier(); - /*E*/ s_ok = rb_time_read(&cpu_buffer->before_stamp, &save_before); - RB_WARN_ON(cpu_buffer, !s_ok); + /* + * If something came in between C and D, the write stamp + * may now not be in sync. But that's fine as the before_stamp + * will be different and then next event will just be forced + * to use an absolute timestamp. + */ if (likely(!(info->add_timestamp & (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)))) /* This did not interrupt any time update */ @@ -3627,24 +3627,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, else /* Just use full timestamp for interrupting event */ info->delta = info->ts; - barrier(); check_buffer(cpu_buffer, info, tail); - if (unlikely(info->ts != save_before)) { - /* SLOW PATH - Interrupted between C and E */ - - a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); - RB_WARN_ON(cpu_buffer, !a_ok); - - /* Write stamp must only go forward */ - if (save_before > info->after) { - /* - * We do not care about the result, only that - * it gets updated atomically. - */ - (void)rb_time_cmpxchg(&cpu_buffer->write_stamp, - info->after, save_before); - } - } } else { u64 ts; /* SLOW PATH - Interrupted between A and C */ -- cgit From 083e9f65bd215582bf8f6a920db729fadf16704f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 15 Dec 2023 08:18:10 -0500 Subject: ring-buffer: Remove useless update to write_stamp in rb_try_to_discard() When filtering is enabled, a temporary buffer is created to place the content of the trace event output so that the filter logic can decide from the trace event output if the trace event should be filtered out or not. If it is to be filtered out, the content in the temporary buffer is simply discarded, otherwise it is written into the trace buffer. But if an interrupt were to come in while a previous event was using that temporary buffer, the event written by the interrupt would actually go into the ring buffer itself to prevent corrupting the data on the temporary buffer. If the event is to be filtered out, the event in the ring buffer is discarded, or if it fails to discard because another event were to have already come in, it is turned into padding. The update to the write_stamp in the rb_try_to_discard() happens after a fix was made to force the next event after the discard to use an absolute timestamp by setting the before_stamp to zero so it does not match the write_stamp (which causes an event to use the absolute timestamp). But there's an effort in rb_try_to_discard() to put back the write_stamp to what it was before the event was added. But this is useless and wasteful because nothing is going to be using that write_stamp for calculations as it still will not match the before_stamp. Remove this useless update, and in doing so, we remove another cmpxchg64()! Also update the comments to reflect this change as well as remove some extra white space in another comment. Link: https://lore.kernel.org/linux-trace-kernel/20231215081810.1f4f38fe@rorschach.local.home Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Joel Fernandes Cc: Vincent Donnefort Fixes: b2dd797543cf ("ring-buffer: Force absolute timestamp on discard of event") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 47 +++++++++++----------------------------------- 1 file changed, 11 insertions(+), 36 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2668dde23343..ad4af0cba159 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2983,25 +2983,6 @@ static unsigned rb_calculate_event_length(unsigned length) return length; } -static u64 rb_time_delta(struct ring_buffer_event *event) -{ - switch (event->type_len) { - case RINGBUF_TYPE_PADDING: - return 0; - - case RINGBUF_TYPE_TIME_EXTEND: - return rb_event_time_stamp(event); - - case RINGBUF_TYPE_TIME_STAMP: - return 0; - - case RINGBUF_TYPE_DATA: - return event->time_delta; - default: - return 0; - } -} - static inline bool rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) @@ -3009,8 +2990,6 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, unsigned long new_index, old_index; struct buffer_page *bpage; unsigned long addr; - u64 write_stamp; - u64 delta; new_index = rb_event_index(event); old_index = new_index + rb_event_ts_length(event); @@ -3019,14 +2998,10 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, bpage = READ_ONCE(cpu_buffer->tail_page); - delta = rb_time_delta(event); - - if (!rb_time_read(&cpu_buffer->write_stamp, &write_stamp)) - return false; - - /* Make sure the write stamp is read before testing the location */ - barrier(); - + /* + * Make sure the tail_page is still the same and + * the next write location is the end of this event + */ if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { unsigned long write_mask = local_read(&bpage->write) & ~RB_WRITE_MASK; @@ -3037,20 +3012,20 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, * to make sure that the next event adds an absolute * value and does not rely on the saved write stamp, which * is now going to be bogus. + * + * By setting the before_stamp to zero, the next event + * is not going to use the write_stamp and will instead + * create an absolute timestamp. This means there's no + * reason to update the wirte_stamp! */ rb_time_set(&cpu_buffer->before_stamp, 0); - /* Something came in, can't discard */ - if (!rb_time_cmpxchg(&cpu_buffer->write_stamp, - write_stamp, write_stamp - delta)) - return false; - /* * If an event were to come in now, it would see that the * write_stamp and the before_stamp are different, and assume * that this event just added itself before updating * the write stamp. The interrupting event will fix the - * write stamp for us, and use the before stamp as its delta. + * write stamp for us, and use an absolute timestamp. */ /* @@ -3487,7 +3462,7 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, return; /* - * If this interrupted another event, + * If this interrupted another event, */ if (atomic_inc_return(this_cpu_ptr(&checking)) != 1) goto out; -- cgit From fff88fa0fbc7067ba46dde570912d63da42c59a9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Dec 2023 11:53:01 -0500 Subject: ring-buffer: Fix a race in rb_time_cmpxchg() for 32 bit archs Mathieu Desnoyers pointed out an issue in the rb_time_cmpxchg() for 32 bit architectures. That is: static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) { unsigned long cnt, top, bottom, msb; unsigned long cnt2, top2, bottom2, msb2; u64 val; /* The cmpxchg always fails if it interrupted an update */ if (!__rb_time_read(t, &val, &cnt2)) return false; if (val != expect) return false; <<<< interrupted here! cnt = local_read(&t->cnt); The problem is that the synchronization counter in the rb_time_t is read *after* the value of the timestamp is read. That means if an interrupt were to come in between the value being read and the counter being read, it can change the value and the counter and the interrupted process would be clueless about it! The counter needs to be read first and then the value. That way it is easy to tell if the value is stale or not. If the counter hasn't been updated, then the value is still good. Link: https://lore.kernel.org/linux-trace-kernel/20231211201324.652870-1-mathieu.desnoyers@efficios.com/ Link: https://lore.kernel.org/linux-trace-kernel/20231212115301.7a9c9a64@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Fixes: 10464b4aa605e ("ring-buffer: Add rb_time_t 64 bit operations for speeding up 32 bit") Reported-by: Mathieu Desnoyers Reviewed-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index ad4af0cba159..b8ab0557bd1b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -706,6 +706,9 @@ static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) unsigned long cnt2, top2, bottom2, msb2; u64 val; + /* Any interruptions in this function should cause a failure */ + cnt = local_read(&t->cnt); + /* The cmpxchg always fails if it interrupted an update */ if (!__rb_time_read(t, &val, &cnt2)) return false; @@ -713,7 +716,6 @@ static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) if (val != expect) return false; - cnt = local_read(&t->cnt); if ((cnt & 3) != cnt2) return false; -- cgit From dec890089bf79a4954b61482715ee2d084364856 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 12 Dec 2023 14:30:49 -0500 Subject: ring-buffer: Fix 32-bit rb_time_read() race with rb_time_cmpxchg() The following race can cause rb_time_read() to observe a corrupted time stamp: rb_time_cmpxchg() [...] if (!rb_time_read_cmpxchg(&t->msb, msb, msb2)) return false; if (!rb_time_read_cmpxchg(&t->top, top, top2)) return false; __rb_time_read() [...] do { c = local_read(&t->cnt); top = local_read(&t->top); bottom = local_read(&t->bottom); msb = local_read(&t->msb); } while (c != local_read(&t->cnt)); *cnt = rb_time_cnt(top); /* If top and msb counts don't match, this interrupted a write */ if (*cnt != rb_time_cnt(msb)) return false; ^ this check fails to catch that "bottom" is still not updated. So the old "bottom" value is returned, which is wrong. Fix this by checking that all three of msb, top, and bottom 2-bit cnt values match. The reason to favor checking all three fields over requiring a specific update order for both rb_time_set() and rb_time_cmpxchg() is because checking all three fields is more robust to handle partial failures of rb_time_cmpxchg() when interrupted by nested rb_time_set(). Link: https://lore.kernel.org/lkml/20231211201324.652870-1-mathieu.desnoyers@efficios.com/ Link: https://lore.kernel.org/linux-trace-kernel/20231212193049.680122-1-mathieu.desnoyers@efficios.com Fixes: f458a1453424e ("ring-buffer: Test last update in 32bit version of __rb_time_read()") Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b8ab0557bd1b..f22a849da179 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -644,8 +644,8 @@ static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt) *cnt = rb_time_cnt(top); - /* If top and msb counts don't match, this interrupted a write */ - if (*cnt != rb_time_cnt(msb)) + /* If top, msb or bottom counts don't match, this interrupted a write */ + if (*cnt != rb_time_cnt(msb) || *cnt != rb_time_cnt(bottom)) return false; /* The shift to msb will lose its cnt bits */ -- cgit From 0aa0e5289cfe984a8a9fdd79ccf46ccf080151f7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 15 Dec 2023 08:41:14 -0500 Subject: ring-buffer: Have rb_time_cmpxchg() set the msb counter too The rb_time_cmpxchg() on 32-bit architectures requires setting three 32-bit words to represent the 64-bit timestamp, with some salt for synchronization. Those are: msb, top, and bottom The issue is, the rb_time_cmpxchg() did not properly salt the msb portion, and the msb that was written was stale. Link: https://lore.kernel.org/linux-trace-kernel/20231215084114.20899342@rorschach.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: f03f2abce4f39 ("ring-buffer: Have 32 bit time stamps use all 64 bits") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f22a849da179..f4679013289b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -722,10 +722,12 @@ static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) cnt2 = cnt + 1; rb_time_split(val, &top, &bottom, &msb); + msb = rb_time_val_cnt(msb, cnt); top = rb_time_val_cnt(top, cnt); bottom = rb_time_val_cnt(bottom, cnt); rb_time_split(set, &top2, &bottom2, &msb2); + msb2 = rb_time_val_cnt(msb2, cnt); top2 = rb_time_val_cnt(top2, cnt2); bottom2 = rb_time_val_cnt(bottom2, cnt2); -- cgit From 712292308af2265cd9b126aedfa987f10f452a33 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 13 Dec 2023 17:54:03 -0500 Subject: ring-buffer: Do not record in NMI if the arch does not support cmpxchg in NMI As the ring buffer recording requires cmpxchg() to work, if the architecture does not support cmpxchg in NMI, then do not do any recording within an NMI. Link: https://lore.kernel.org/linux-trace-kernel/20231213175403.6fc18540@gandalf.local.home Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f4679013289b..5a114e752f11 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3674,6 +3674,12 @@ rb_reserve_next_event(struct trace_buffer *buffer, int nr_loops = 0; int add_ts_default; + /* ring buffer does cmpxchg, make sure it is safe in NMI context */ + if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && + (unlikely(in_nmi()))) { + return NULL; + } + rb_start_commit(cpu_buffer); /* The commit page can not change after this */ -- cgit From 5df12742b7e3aae2594a30a9d14d5d6e9e7699f4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 14 Dec 2023 09:08:56 -0600 Subject: Revert "PCI: acpiphp: Reassign resources on bridge if necessary" This reverts commit 40613da52b13fb21c5566f10b287e0ca8c12c4e9 and the subsequent fix to it: cc22522fd55e ("PCI: acpiphp: Use pci_assign_unassigned_bridge_resources() only for non-root bus") 40613da52b13 fixed a problem where hot-adding a device with large BARs failed if the bridge windows programmed by firmware were not large enough. cc22522fd55e ("PCI: acpiphp: Use pci_assign_unassigned_bridge_resources() only for non-root bus") fixed a problem with 40613da52b13: an ACPI hot-add of a device on a PCI root bus (common in the virt world) or firmware sending ACPI Bus Check to non-existent Root Ports (e.g., on Dell Inspiron 7352/0W6WV0) caused a NULL pointer dereference and suspend/resume hangs. Unfortunately the combination of 40613da52b13 and cc22522fd55e caused other problems: - Fiona reported that hot-add of SCSI disks in QEMU virtual machine fails sometimes. - Dongli reported a similar problem with hot-add of SCSI disks. - Jonathan reported a console freeze during boot on bare metal due to an error in radeon GPU initialization. Revert both patches to avoid adding these problems. This means we will again see the problems with hot-adding devices with large BARs and the NULL pointer dereferences and suspend/resume issues that 40613da52b13 and cc22522fd55e were intended to fix. Fixes: 40613da52b13 ("PCI: acpiphp: Reassign resources on bridge if necessary") Fixes: cc22522fd55e ("PCI: acpiphp: Use pci_assign_unassigned_bridge_resources() only for non-root bus") Reported-by: Fiona Ebner Closes: https://lore.kernel.org/r/9eb669c0-d8f2-431d-a700-6da13053ae54@proxmox.com Reported-by: Dongli Zhang Closes: https://lore.kernel.org/r/3c4a446a-b167-11b8-f36f-d3c1b49b42e9@oracle.com Reported-by: Jonathan Woithe Closes: https://lore.kernel.org/r/ZXpaNCLiDM+Kv38H@marvin.atrad.com.au Signed-off-by: Bjorn Helgaas Acked-by: Michael S. Tsirkin Acked-by: Igor Mammedov Cc: --- drivers/pci/hotplug/acpiphp_glue.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 601129772b2d..5b1f271c6034 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -512,15 +512,12 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge) if (pass && dev->subordinate) { check_hotplug_bridge(slot, dev); pcibios_resource_survey_bus(dev->subordinate); - if (pci_is_root_bus(bus)) - __pci_bus_size_bridges(dev->subordinate, &add_list); + __pci_bus_size_bridges(dev->subordinate, + &add_list); } } } - if (pci_is_root_bus(bus)) - __pci_bus_assign_resources(bus, &add_list, NULL); - else - pci_assign_unassigned_bridge_resources(bus->self); + __pci_bus_assign_resources(bus, &add_list, NULL); } acpiphp_sanitize_bus(bus); -- cgit From f8fa5d76925991976b3e7076f9d1052515ec1fca Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Dec 2023 13:24:10 -0700 Subject: cred: switch to using atomic_long_t There are multiple ways to grab references to credentials, and the only protection we have against overflowing it is the memory required to do so. With memory sizes only moving in one direction, let's bump the reference count to 64-bit and move it outside the realm of feasibly overflowing. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- include/linux/cred.h | 8 +++---- kernel/cred.c | 64 ++++++++++++++++++++++++++-------------------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/include/linux/cred.h b/include/linux/cred.h index af8d353a4b86..a3383f8efb8f 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -109,7 +109,7 @@ static inline int groups_search(const struct group_info *group_info, kgid_t grp) * same context as task->real_cred. */ struct cred { - atomic_t usage; + atomic_long_t usage; #ifdef CONFIG_DEBUG_CREDENTIALS atomic_t subscribers; /* number of processes subscribed */ void *put_addr; @@ -229,7 +229,7 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) */ static inline struct cred *get_new_cred_many(struct cred *cred, int nr) { - atomic_add(nr, &cred->usage); + atomic_long_add(nr, &cred->usage); return cred; } @@ -288,7 +288,7 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return NULL; - if (!atomic_inc_not_zero(&nonconst_cred->usage)) + if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) return NULL; validate_creds(cred); nonconst_cred->non_rcu = 0; @@ -313,7 +313,7 @@ static inline void put_cred_many(const struct cred *_cred, int nr) if (cred) { validate_creds(cred); - if (atomic_sub_and_test(nr, &cred->usage)) + if (atomic_long_sub_and_test(nr, &cred->usage)) __put_cred(cred); } } diff --git a/kernel/cred.c b/kernel/cred.c index 3c714cb31660..4a6cd0f0fef5 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -102,17 +102,17 @@ static void put_cred_rcu(struct rcu_head *rcu) #ifdef CONFIG_DEBUG_CREDENTIALS if (cred->magic != CRED_MAGIC_DEAD || - atomic_read(&cred->usage) != 0 || + atomic_long_read(&cred->usage) != 0 || read_cred_subscribers(cred) != 0) panic("CRED: put_cred_rcu() sees %p with" - " mag %x, put %p, usage %d, subscr %d\n", + " mag %x, put %p, usage %ld, subscr %d\n", cred, cred->magic, cred->put_addr, - atomic_read(&cred->usage), + atomic_long_read(&cred->usage), read_cred_subscribers(cred)); #else - if (atomic_read(&cred->usage) != 0) - panic("CRED: put_cred_rcu() sees %p with usage %d\n", - cred, atomic_read(&cred->usage)); + if (atomic_long_read(&cred->usage) != 0) + panic("CRED: put_cred_rcu() sees %p with usage %ld\n", + cred, atomic_long_read(&cred->usage)); #endif security_cred_free(cred); @@ -137,11 +137,11 @@ static void put_cred_rcu(struct rcu_head *rcu) */ void __put_cred(struct cred *cred) { - kdebug("__put_cred(%p{%d,%d})", cred, - atomic_read(&cred->usage), + kdebug("__put_cred(%p{%ld,%d})", cred, + atomic_long_read(&cred->usage), read_cred_subscribers(cred)); - BUG_ON(atomic_read(&cred->usage) != 0); + BUG_ON(atomic_long_read(&cred->usage) != 0); #ifdef CONFIG_DEBUG_CREDENTIALS BUG_ON(read_cred_subscribers(cred) != 0); cred->magic = CRED_MAGIC_DEAD; @@ -164,8 +164,8 @@ void exit_creds(struct task_struct *tsk) { struct cred *real_cred, *cred; - kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred, - atomic_read(&tsk->cred->usage), + kdebug("exit_creds(%u,%p,%p,{%ld,%d})", tsk->pid, tsk->real_cred, tsk->cred, + atomic_long_read(&tsk->cred->usage), read_cred_subscribers(tsk->cred)); real_cred = (struct cred *) tsk->real_cred; @@ -230,7 +230,7 @@ struct cred *cred_alloc_blank(void) if (!new) return NULL; - atomic_set(&new->usage, 1); + atomic_long_set(&new->usage, 1); #ifdef CONFIG_DEBUG_CREDENTIALS new->magic = CRED_MAGIC; #endif @@ -276,7 +276,7 @@ struct cred *prepare_creds(void) memcpy(new, old, sizeof(struct cred)); new->non_rcu = 0; - atomic_set(&new->usage, 1); + atomic_long_set(&new->usage, 1); set_cred_subscribers(new, 0); get_group_info(new->group_info); get_uid(new->user); @@ -363,8 +363,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) ) { p->real_cred = get_cred_many(p->cred, 2); alter_cred_subscribers(p->cred, 2); - kdebug("share_creds(%p{%d,%d})", - p->cred, atomic_read(&p->cred->usage), + kdebug("share_creds(%p{%ld,%d})", + p->cred, atomic_long_read(&p->cred->usage), read_cred_subscribers(p->cred)); inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); return 0; @@ -457,8 +457,8 @@ int commit_creds(struct cred *new) struct task_struct *task = current; const struct cred *old = task->real_cred; - kdebug("commit_creds(%p{%d,%d})", new, - atomic_read(&new->usage), + kdebug("commit_creds(%p{%ld,%d})", new, + atomic_long_read(&new->usage), read_cred_subscribers(new)); BUG_ON(task->cred != old); @@ -467,7 +467,7 @@ int commit_creds(struct cred *new) validate_creds(old); validate_creds(new); #endif - BUG_ON(atomic_read(&new->usage) < 1); + BUG_ON(atomic_long_read(&new->usage) < 1); get_cred(new); /* we will require a ref for the subj creds too */ @@ -539,14 +539,14 @@ EXPORT_SYMBOL(commit_creds); */ void abort_creds(struct cred *new) { - kdebug("abort_creds(%p{%d,%d})", new, - atomic_read(&new->usage), + kdebug("abort_creds(%p{%ld,%d})", new, + atomic_long_read(&new->usage), read_cred_subscribers(new)); #ifdef CONFIG_DEBUG_CREDENTIALS BUG_ON(read_cred_subscribers(new) != 0); #endif - BUG_ON(atomic_read(&new->usage) < 1); + BUG_ON(atomic_long_read(&new->usage) < 1); put_cred(new); } EXPORT_SYMBOL(abort_creds); @@ -562,8 +562,8 @@ const struct cred *override_creds(const struct cred *new) { const struct cred *old = current->cred; - kdebug("override_creds(%p{%d,%d})", new, - atomic_read(&new->usage), + kdebug("override_creds(%p{%ld,%d})", new, + atomic_long_read(&new->usage), read_cred_subscribers(new)); validate_creds(old); @@ -585,8 +585,8 @@ const struct cred *override_creds(const struct cred *new) rcu_assign_pointer(current->cred, new); alter_cred_subscribers(old, -1); - kdebug("override_creds() = %p{%d,%d}", old, - atomic_read(&old->usage), + kdebug("override_creds() = %p{%ld,%d}", old, + atomic_long_read(&old->usage), read_cred_subscribers(old)); return old; } @@ -603,8 +603,8 @@ void revert_creds(const struct cred *old) { const struct cred *override = current->cred; - kdebug("revert_creds(%p{%d,%d})", old, - atomic_read(&old->usage), + kdebug("revert_creds(%p{%ld,%d})", old, + atomic_long_read(&old->usage), read_cred_subscribers(old)); validate_creds(old); @@ -735,7 +735,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) *new = *old; new->non_rcu = 0; - atomic_set(&new->usage, 1); + atomic_long_set(&new->usage, 1); set_cred_subscribers(new, 0); get_uid(new->user); get_user_ns(new->user_ns); @@ -849,8 +849,8 @@ static void dump_invalid_creds(const struct cred *cred, const char *label, cred == tsk->cred ? "[eff]" : ""); pr_err("->magic=%x, put_addr=%p\n", cred->magic, cred->put_addr); - pr_err("->usage=%d, subscr=%d\n", - atomic_read(&cred->usage), + pr_err("->usage=%ld, subscr=%d\n", + atomic_long_read(&cred->usage), read_cred_subscribers(cred)); pr_err("->*uid = { %d,%d,%d,%d }\n", from_kuid_munged(&init_user_ns, cred->uid), @@ -922,9 +922,9 @@ EXPORT_SYMBOL(__validate_process_creds); */ void validate_creds_for_do_exit(struct task_struct *tsk) { - kdebug("validate_creds_for_do_exit(%p,%p{%d,%d})", + kdebug("validate_creds_for_do_exit(%p,%p{%ld,%d})", tsk->real_cred, tsk->cred, - atomic_read(&tsk->cred->usage), + atomic_long_read(&tsk->cred->usage), read_cred_subscribers(tsk->cred)); __validate_process_creds(tsk, __FILE__, __LINE__); -- cgit From ae1914174a63a558113e80d24ccac2773f9f7b2b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Dec 2023 13:40:57 -0700 Subject: cred: get rid of CONFIG_DEBUG_CREDENTIALS This code is rarely (never?) enabled by distros, and it hasn't caught anything in decades. Let's kill off this legacy debug code. Suggested-by: Linus Torvalds Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- arch/powerpc/configs/skiroot_defconfig | 1 - arch/s390/configs/debug_defconfig | 1 - fs/nfsd/auth.c | 4 - fs/nfsd/nfssvc.c | 1 - fs/nfsd/vfs.c | 9 +- fs/open.c | 3 - include/linux/cred.h | 50 ------- kernel/cred.c | 231 +++--------------------------- kernel/exit.c | 3 - lib/Kconfig.debug | 15 -- net/sunrpc/auth.c | 3 - security/selinux/hooks.c | 6 - tools/objtool/noreturns.h | 1 - tools/testing/selftests/bpf/config.x86_64 | 1 - tools/testing/selftests/hid/config.common | 1 - 15 files changed, 17 insertions(+), 313 deletions(-) diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 8d3eacb50d56..9d44e6630908 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -301,7 +301,6 @@ CONFIG_WQ_WATCHDOG=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_DEBUG_CREDENTIALS=y # CONFIG_FTRACE is not set CONFIG_XMON=y # CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 438cd92e6080..dd0608629310 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -834,7 +834,6 @@ CONFIG_DEBUG_IRQFLAGS=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index fdf2aad73470..e6beaaf4f170 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -26,8 +26,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) int i; int flags = nfsexp_flags(rqstp, exp); - validate_process_creds(); - /* discard any old override before preparing the new set */ revert_creds(get_cred(current_real_cred())); new = prepare_creds(); @@ -81,10 +79,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); - validate_process_creds(); put_cred(override_creds(new)); put_cred(new); - validate_process_creds(); return 0; oom: diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index fe61d9bbcc1f..5014ab87d313 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -955,7 +955,6 @@ nfsd(void *vrqstp) rqstp->rq_server->sv_maxconn = nn->max_connections; svc_recv(rqstp); - validate_process_creds(); } atomic_dec(&nfsdstats.th_cnt); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fbbea7498f02..e01e4e2acbd9 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -901,7 +901,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int host_err; bool retried = false; - validate_process_creds(); /* * If we get here, then the client has already done an "open", * and (hopefully) checked permission - so allow OWNER_OVERRIDE @@ -926,7 +925,6 @@ retry: } err = nfserrno(host_err); } - validate_process_creds(); return err; } @@ -943,12 +941,7 @@ int nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags, struct file **filp) { - int err; - - validate_process_creds(); - err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); - validate_process_creds(); - return err; + return __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); } /* diff --git a/fs/open.c b/fs/open.c index 02dc608d40d8..3494a9cd8046 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1088,8 +1088,6 @@ struct file *dentry_open(const struct path *path, int flags, int error; struct file *f; - validate_creds(cred); - /* We must always pass in a valid mount pointer. */ BUG_ON(!path->mnt); @@ -1128,7 +1126,6 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode, struct file *f; int error; - validate_creds(cred); f = alloc_empty_file(flags, cred); if (IS_ERR(f)) return f; diff --git a/include/linux/cred.h b/include/linux/cred.h index a3383f8efb8f..2976f534a7a3 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -110,13 +110,6 @@ static inline int groups_search(const struct group_info *group_info, kgid_t grp) */ struct cred { atomic_long_t usage; -#ifdef CONFIG_DEBUG_CREDENTIALS - atomic_t subscribers; /* number of processes subscribed */ - void *put_addr; - unsigned magic; -#define CRED_MAGIC 0x43736564 -#define CRED_MAGIC_DEAD 0x44656144 -#endif kuid_t uid; /* real UID of the task */ kgid_t gid; /* real GID of the task */ kuid_t suid; /* saved UID of the task */ @@ -172,46 +165,6 @@ extern int cred_fscmp(const struct cred *, const struct cred *); extern void __init cred_init(void); extern int set_cred_ucounts(struct cred *); -/* - * check for validity of credentials - */ -#ifdef CONFIG_DEBUG_CREDENTIALS -extern void __noreturn __invalid_creds(const struct cred *, const char *, unsigned); -extern void __validate_process_creds(struct task_struct *, - const char *, unsigned); - -extern bool creds_are_invalid(const struct cred *cred); - -static inline void __validate_creds(const struct cred *cred, - const char *file, unsigned line) -{ - if (unlikely(creds_are_invalid(cred))) - __invalid_creds(cred, file, line); -} - -#define validate_creds(cred) \ -do { \ - __validate_creds((cred), __FILE__, __LINE__); \ -} while(0) - -#define validate_process_creds() \ -do { \ - __validate_process_creds(current, __FILE__, __LINE__); \ -} while(0) - -extern void validate_creds_for_do_exit(struct task_struct *); -#else -static inline void validate_creds(const struct cred *cred) -{ -} -static inline void validate_creds_for_do_exit(struct task_struct *tsk) -{ -} -static inline void validate_process_creds(void) -{ -} -#endif - static inline bool cap_ambient_invariant_ok(const struct cred *cred) { return cap_issubset(cred->cap_ambient, @@ -264,7 +217,6 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; - validate_creds(cred); nonconst_cred->non_rcu = 0; return get_new_cred_many(nonconst_cred, nr); } @@ -290,7 +242,6 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) return NULL; if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) return NULL; - validate_creds(cred); nonconst_cred->non_rcu = 0; return cred; } @@ -312,7 +263,6 @@ static inline void put_cred_many(const struct cred *_cred, int nr) struct cred *cred = (struct cred *) _cred; if (cred) { - validate_creds(cred); if (atomic_long_sub_and_test(nr, &cred->usage)) __put_cred(cred); } diff --git a/kernel/cred.c b/kernel/cred.c index 4a6cd0f0fef5..c033a201c808 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -43,10 +43,6 @@ static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; */ struct cred init_cred = { .usage = ATOMIC_INIT(4), -#ifdef CONFIG_DEBUG_CREDENTIALS - .subscribers = ATOMIC_INIT(2), - .magic = CRED_MAGIC, -#endif .uid = GLOBAL_ROOT_UID, .gid = GLOBAL_ROOT_GID, .suid = GLOBAL_ROOT_UID, @@ -66,31 +62,6 @@ struct cred init_cred = { .ucounts = &init_ucounts, }; -static inline void set_cred_subscribers(struct cred *cred, int n) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - atomic_set(&cred->subscribers, n); -#endif -} - -static inline int read_cred_subscribers(const struct cred *cred) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - return atomic_read(&cred->subscribers); -#else - return 0; -#endif -} - -static inline void alter_cred_subscribers(const struct cred *_cred, int n) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - struct cred *cred = (struct cred *) _cred; - - atomic_add(n, &cred->subscribers); -#endif -} - /* * The RCU callback to actually dispose of a set of credentials */ @@ -100,20 +71,9 @@ static void put_cred_rcu(struct rcu_head *rcu) kdebug("put_cred_rcu(%p)", cred); -#ifdef CONFIG_DEBUG_CREDENTIALS - if (cred->magic != CRED_MAGIC_DEAD || - atomic_long_read(&cred->usage) != 0 || - read_cred_subscribers(cred) != 0) - panic("CRED: put_cred_rcu() sees %p with" - " mag %x, put %p, usage %ld, subscr %d\n", - cred, cred->magic, cred->put_addr, - atomic_long_read(&cred->usage), - read_cred_subscribers(cred)); -#else if (atomic_long_read(&cred->usage) != 0) panic("CRED: put_cred_rcu() sees %p with usage %ld\n", cred, atomic_long_read(&cred->usage)); -#endif security_cred_free(cred); key_put(cred->session_keyring); @@ -137,16 +97,10 @@ static void put_cred_rcu(struct rcu_head *rcu) */ void __put_cred(struct cred *cred) { - kdebug("__put_cred(%p{%ld,%d})", cred, - atomic_long_read(&cred->usage), - read_cred_subscribers(cred)); + kdebug("__put_cred(%p{%ld})", cred, + atomic_long_read(&cred->usage)); BUG_ON(atomic_long_read(&cred->usage) != 0); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(cred) != 0); - cred->magic = CRED_MAGIC_DEAD; - cred->put_addr = __builtin_return_address(0); -#endif BUG_ON(cred == current->cred); BUG_ON(cred == current->real_cred); @@ -164,9 +118,8 @@ void exit_creds(struct task_struct *tsk) { struct cred *real_cred, *cred; - kdebug("exit_creds(%u,%p,%p,{%ld,%d})", tsk->pid, tsk->real_cred, tsk->cred, - atomic_long_read(&tsk->cred->usage), - read_cred_subscribers(tsk->cred)); + kdebug("exit_creds(%u,%p,%p,{%ld})", tsk->pid, tsk->real_cred, tsk->cred, + atomic_long_read(&tsk->cred->usage)); real_cred = (struct cred *) tsk->real_cred; tsk->real_cred = NULL; @@ -174,15 +127,10 @@ void exit_creds(struct task_struct *tsk) cred = (struct cred *) tsk->cred; tsk->cred = NULL; - validate_creds(cred); if (real_cred == cred) { - alter_cred_subscribers(cred, -2); put_cred_many(cred, 2); } else { - validate_creds(real_cred); - alter_cred_subscribers(real_cred, -1); put_cred(real_cred); - alter_cred_subscribers(cred, -1); put_cred(cred); } @@ -231,9 +179,6 @@ struct cred *cred_alloc_blank(void) return NULL; atomic_long_set(&new->usage, 1); -#ifdef CONFIG_DEBUG_CREDENTIALS - new->magic = CRED_MAGIC; -#endif if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) goto error; @@ -264,8 +209,6 @@ struct cred *prepare_creds(void) const struct cred *old; struct cred *new; - validate_process_creds(); - new = kmem_cache_alloc(cred_jar, GFP_KERNEL); if (!new) return NULL; @@ -277,7 +220,6 @@ struct cred *prepare_creds(void) new->non_rcu = 0; atomic_long_set(&new->usage, 1); - set_cred_subscribers(new, 0); get_group_info(new->group_info); get_uid(new->user); get_user_ns(new->user_ns); @@ -300,7 +242,6 @@ struct cred *prepare_creds(void) if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error; - validate_creds(new); return new; error: @@ -362,10 +303,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) clone_flags & CLONE_THREAD ) { p->real_cred = get_cred_many(p->cred, 2); - alter_cred_subscribers(p->cred, 2); - kdebug("share_creds(%p{%ld,%d})", - p->cred, atomic_long_read(&p->cred->usage), - read_cred_subscribers(p->cred)); + kdebug("share_creds(%p{%ld})", + p->cred, atomic_long_read(&p->cred->usage)); inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); return 0; } @@ -404,8 +343,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) p->cred = p->real_cred = get_cred(new); inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); - alter_cred_subscribers(new, 2); - validate_creds(new); return 0; error_put: @@ -457,16 +394,10 @@ int commit_creds(struct cred *new) struct task_struct *task = current; const struct cred *old = task->real_cred; - kdebug("commit_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), - read_cred_subscribers(new)); + kdebug("commit_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); BUG_ON(task->cred != old); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(old) < 2); - validate_creds(old); - validate_creds(new); -#endif BUG_ON(atomic_long_read(&new->usage) < 1); get_cred(new); /* we will require a ref for the subj creds too */ @@ -502,14 +433,12 @@ int commit_creds(struct cred *new) * RLIMIT_NPROC limits on user->processes have already been checked * in set_user(). */ - alter_cred_subscribers(new, 2); if (new->user != old->user || new->user_ns != old->user_ns) inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1); rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); if (new->user != old->user || new->user_ns != old->user_ns) dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1); - alter_cred_subscribers(old, -2); /* send notifications */ if (!uid_eq(new->uid, old->uid) || @@ -539,13 +468,9 @@ EXPORT_SYMBOL(commit_creds); */ void abort_creds(struct cred *new) { - kdebug("abort_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), - read_cred_subscribers(new)); + kdebug("abort_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(new) != 0); -#endif BUG_ON(atomic_long_read(&new->usage) < 1); put_cred(new); } @@ -562,12 +487,8 @@ const struct cred *override_creds(const struct cred *new) { const struct cred *old = current->cred; - kdebug("override_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), - read_cred_subscribers(new)); - - validate_creds(old); - validate_creds(new); + kdebug("override_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); /* * NOTE! This uses 'get_new_cred()' rather than 'get_cred()'. @@ -576,18 +497,12 @@ const struct cred *override_creds(const struct cred *new) * we are only installing the cred into the thread-synchronous * '->cred' pointer, not the '->real_cred' pointer that is * visible to other threads under RCU. - * - * Also note that we did validate_creds() manually, not depending - * on the validation in 'get_cred()'. */ get_new_cred((struct cred *)new); - alter_cred_subscribers(new, 1); rcu_assign_pointer(current->cred, new); - alter_cred_subscribers(old, -1); - kdebug("override_creds() = %p{%ld,%d}", old, - atomic_long_read(&old->usage), - read_cred_subscribers(old)); + kdebug("override_creds() = %p{%ld}", old, + atomic_long_read(&old->usage)); return old; } EXPORT_SYMBOL(override_creds); @@ -603,15 +518,10 @@ void revert_creds(const struct cred *old) { const struct cred *override = current->cred; - kdebug("revert_creds(%p{%ld,%d})", old, - atomic_long_read(&old->usage), - read_cred_subscribers(old)); + kdebug("revert_creds(%p{%ld})", old, + atomic_long_read(&old->usage)); - validate_creds(old); - validate_creds(override); - alter_cred_subscribers(old, 1); rcu_assign_pointer(current->cred, old); - alter_cred_subscribers(override, -1); put_cred(override); } EXPORT_SYMBOL(revert_creds); @@ -731,12 +641,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) kdebug("prepare_kernel_cred() alloc %p", new); old = get_task_cred(daemon); - validate_creds(old); *new = *old; new->non_rcu = 0; atomic_long_set(&new->usage, 1); - set_cred_subscribers(new, 0); get_uid(new->user); get_user_ns(new->user_ns); get_group_info(new->group_info); @@ -760,7 +668,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) goto error; put_cred(old); - validate_creds(new); return new; error: @@ -825,109 +732,3 @@ int set_create_files_as(struct cred *new, struct inode *inode) return security_kernel_create_files_as(new, inode); } EXPORT_SYMBOL(set_create_files_as); - -#ifdef CONFIG_DEBUG_CREDENTIALS - -bool creds_are_invalid(const struct cred *cred) -{ - if (cred->magic != CRED_MAGIC) - return true; - return false; -} -EXPORT_SYMBOL(creds_are_invalid); - -/* - * dump invalid credentials - */ -static void dump_invalid_creds(const struct cred *cred, const char *label, - const struct task_struct *tsk) -{ - pr_err("%s credentials: %p %s%s%s\n", - label, cred, - cred == &init_cred ? "[init]" : "", - cred == tsk->real_cred ? "[real]" : "", - cred == tsk->cred ? "[eff]" : ""); - pr_err("->magic=%x, put_addr=%p\n", - cred->magic, cred->put_addr); - pr_err("->usage=%ld, subscr=%d\n", - atomic_long_read(&cred->usage), - read_cred_subscribers(cred)); - pr_err("->*uid = { %d,%d,%d,%d }\n", - from_kuid_munged(&init_user_ns, cred->uid), - from_kuid_munged(&init_user_ns, cred->euid), - from_kuid_munged(&init_user_ns, cred->suid), - from_kuid_munged(&init_user_ns, cred->fsuid)); - pr_err("->*gid = { %d,%d,%d,%d }\n", - from_kgid_munged(&init_user_ns, cred->gid), - from_kgid_munged(&init_user_ns, cred->egid), - from_kgid_munged(&init_user_ns, cred->sgid), - from_kgid_munged(&init_user_ns, cred->fsgid)); -#ifdef CONFIG_SECURITY - pr_err("->security is %p\n", cred->security); - if ((unsigned long) cred->security >= PAGE_SIZE && - (((unsigned long) cred->security & 0xffffff00) != - (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))) - pr_err("->security {%x, %x}\n", - ((u32*)cred->security)[0], - ((u32*)cred->security)[1]); -#endif -} - -/* - * report use of invalid credentials - */ -void __noreturn __invalid_creds(const struct cred *cred, const char *file, unsigned line) -{ - pr_err("Invalid credentials\n"); - pr_err("At %s:%u\n", file, line); - dump_invalid_creds(cred, "Specified", current); - BUG(); -} -EXPORT_SYMBOL(__invalid_creds); - -/* - * check the credentials on a process - */ -void __validate_process_creds(struct task_struct *tsk, - const char *file, unsigned line) -{ - if (tsk->cred == tsk->real_cred) { - if (unlikely(read_cred_subscribers(tsk->cred) < 2 || - creds_are_invalid(tsk->cred))) - goto invalid_creds; - } else { - if (unlikely(read_cred_subscribers(tsk->real_cred) < 1 || - read_cred_subscribers(tsk->cred) < 1 || - creds_are_invalid(tsk->real_cred) || - creds_are_invalid(tsk->cred))) - goto invalid_creds; - } - return; - -invalid_creds: - pr_err("Invalid process credentials\n"); - pr_err("At %s:%u\n", file, line); - - dump_invalid_creds(tsk->real_cred, "Real", tsk); - if (tsk->cred != tsk->real_cred) - dump_invalid_creds(tsk->cred, "Effective", tsk); - else - pr_err("Effective creds == Real creds\n"); - BUG(); -} -EXPORT_SYMBOL(__validate_process_creds); - -/* - * check creds for do_exit() - */ -void validate_creds_for_do_exit(struct task_struct *tsk) -{ - kdebug("validate_creds_for_do_exit(%p,%p{%ld,%d})", - tsk->real_cred, tsk->cred, - atomic_long_read(&tsk->cred->usage), - read_cred_subscribers(tsk->cred)); - - __validate_process_creds(tsk, __FILE__, __LINE__); -} - -#endif /* CONFIG_DEBUG_CREDENTIALS */ diff --git a/kernel/exit.c b/kernel/exit.c index ee9f43bed49a..aedc0832c9f4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -824,8 +824,6 @@ void __noreturn do_exit(long code) ptrace_event(PTRACE_EVENT_EXIT, code); user_events_exit(tsk); - validate_creds_for_do_exit(tsk); - io_uring_files_cancel(); exit_signals(tsk); /* sets PF_EXITING */ @@ -909,7 +907,6 @@ void __noreturn do_exit(long code) if (tsk->task_frag.page) put_page(tsk->task_frag.page); - validate_creds_for_do_exit(tsk); exit_task_stack_account(tsk); check_stack_usage(); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cc7d53d9dc01..4405f81248fb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1739,21 +1739,6 @@ config DEBUG_MAPLE_TREE endmenu -config DEBUG_CREDENTIALS - bool "Debug credential management" - depends on DEBUG_KERNEL - help - Enable this to turn on some debug checking for credential - management. The additional code keeps track of the number of - pointers from task_structs to any given cred struct, and checks to - see that this number never exceeds the usage count of the cred - struct. - - Furthermore, if SELinux is enabled, this also checks that the - security pointer in the cred struct is never seen to be invalid. - - If unsure, say N. - source "kernel/rcu/Kconfig.debug" config DEBUG_WQ_FORCE_RR_CPU diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 7bfe7d9a32aa..04534ea537c8 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -40,9 +40,6 @@ static unsigned long number_cred_unused; static struct cred machine_cred = { .usage = ATOMIC_INIT(1), -#ifdef CONFIG_DEBUG_CREDENTIALS - .magic = CRED_MAGIC, -#endif }; /* diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index feda711c6b7b..340b2bbbb2dd 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1660,8 +1660,6 @@ static int inode_has_perm(const struct cred *cred, struct inode_security_struct *isec; u32 sid; - validate_creds(cred); - if (unlikely(IS_PRIVATE(inode))) return 0; @@ -3056,8 +3054,6 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct inode *inode, struct inode_security_struct *isec; u32 sid; - validate_creds(cred); - ad.type = LSM_AUDIT_DATA_DENTRY; ad.u.dentry = dentry; sid = cred_sid(cred); @@ -3101,8 +3097,6 @@ static int selinux_inode_permission(struct inode *inode, int mask) if (!mask) return 0; - validate_creds(cred); - if (unlikely(IS_PRIVATE(inode))) return 0; diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index 649ebdef9c3f..1685d7ea6a9f 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -6,7 +6,6 @@ * * Yes, this is unfortunate. A better solution is in the works. */ -NORETURN(__invalid_creds) NORETURN(__kunit_abort) NORETURN(__module_put_and_kthread_exit) NORETURN(__reiserfs_panic) diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 2e70a6048278..49a29dbc1910 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -50,7 +50,6 @@ CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_MEMORY_INIT=y diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index 0617275d93cc..0f456dbab62f 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -46,7 +46,6 @@ CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEFAULT_FQ_CODEL=y -- cgit From a8892fd71933126ebae3d60aec5918d4dceaae76 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 15 Dec 2023 10:01:44 -0500 Subject: btrfs: do not allow non subvolume root targets for snapshot Our btrfs subvolume snapshot utility enforces that is the root of the subvolume, however this isn't enforced in the kernel. Update the kernel to also enforce this limitation to avoid problems with other users of this ioctl that don't have the appropriate checks in place. Reported-by: Martin Michaelis CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Neal Gompa Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2429ae87ad44..f70ee61dfb70 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1290,6 +1290,15 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file, * are limited to own subvolumes only */ ret = -EPERM; + } else if (btrfs_ino(BTRFS_I(src_inode)) != BTRFS_FIRST_FREE_OBJECTID) { + /* + * Snapshots must be made with the src_inode referring + * to the subvolume inode, otherwise the permission + * checking above is useless because we may have + * permission on a lower directory but not the subvol + * itself. + */ + ret = -EINVAL; } else { ret = btrfs_mksnapshot(&file->f_path, idmap, name, namelen, -- cgit From ceb6a6f023fd3e8b07761ed900352ef574010bcb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 17 Dec 2023 15:19:28 -0800 Subject: Linux 6.7-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 70fc4c11dfc0..e78ee7db0729 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- cgit From 60e43fe5285e2077ce9904d78cd42a230d03b788 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:02:31 -0700 Subject: lib/firmware_table: tables: Add CDAT table parsing support The CDAT table is very similar to ACPI tables when it comes to sub-table and entry structures. The helper functions can be also used to parse the CDAT table. Add support to the helper functions to deal with an external CDAT table, and also handle the endieness since CDAT can be processed by a BE host. Export a function cdat_table_parse() for CXL driver to parse a CDAT table. In order to minimize ACPICA code changes, __force is being utilized to deal with the case of a big endian (BE) host parsing a CDAT. All CDAT data structure variables are being force casted to __leX as appropriate. Cc: Rafael J. Wysocki Cc: Len Brown Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/170319615131.2212653.10932785667981494238.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/acpi/tables.c | 5 ++-- include/linux/fw_table.h | 21 +++++++++++++- lib/fw_table.c | 75 ++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 86 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index c1516337f668..b07f7d091d13 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -251,8 +251,9 @@ int __init_or_acpilib acpi_table_parse_entries_array( return -ENODEV; } - count = acpi_parse_entries_array(id, table_size, table_header, - proc, proc_num, max_entries); + count = acpi_parse_entries_array(id, table_size, + (union fw_table_header *)table_header, + proc, proc_num, max_entries); acpi_put_table(table_header); return count; diff --git a/include/linux/fw_table.h b/include/linux/fw_table.h index ca49947f0a77..95421860397a 100644 --- a/include/linux/fw_table.h +++ b/include/linux/fw_table.h @@ -25,16 +25,35 @@ struct acpi_subtable_proc { int count; }; +union fw_table_header { + struct acpi_table_header acpi; + struct acpi_table_cdat cdat; +}; + union acpi_subtable_headers { struct acpi_subtable_header common; struct acpi_hmat_structure hmat; struct acpi_prmt_module_header prmt; struct acpi_cedt_header cedt; + struct acpi_cdat_header cdat; }; int acpi_parse_entries_array(char *id, unsigned long table_size, - struct acpi_table_header *table_header, + union fw_table_header *table_header, struct acpi_subtable_proc *proc, int proc_num, unsigned int max_entries); +int cdat_table_parse(enum acpi_cdat_type type, + acpi_tbl_entry_handler_arg handler_arg, void *arg, + struct acpi_table_cdat *table_header); + +/* CXL is the only non-ACPI consumer of the FIRMWARE_TABLE library */ +#if IS_ENABLED(CONFIG_ACPI) && !IS_ENABLED(CONFIG_CXL_BUS) +#define EXPORT_SYMBOL_FWTBL_LIB(x) EXPORT_SYMBOL_ACPI_LIB(x) +#define __init_or_fwtbl_lib __init_or_acpilib +#else +#define EXPORT_SYMBOL_FWTBL_LIB(x) EXPORT_SYMBOL_NS_GPL(x, CXL) +#define __init_or_fwtbl_lib +#endif + #endif diff --git a/lib/fw_table.c b/lib/fw_table.c index 294df54e33b6..1e5e0b2f7012 100644 --- a/lib/fw_table.c +++ b/lib/fw_table.c @@ -12,12 +12,14 @@ #include #include #include +#include enum acpi_subtable_type { ACPI_SUBTABLE_COMMON, ACPI_SUBTABLE_HMAT, ACPI_SUBTABLE_PRMT, ACPI_SUBTABLE_CEDT, + CDAT_SUBTABLE, }; struct acpi_subtable_entry { @@ -25,7 +27,7 @@ struct acpi_subtable_entry { enum acpi_subtable_type type; }; -static unsigned long __init_or_acpilib +static unsigned long __init_or_fwtbl_lib acpi_get_entry_type(struct acpi_subtable_entry *entry) { switch (entry->type) { @@ -37,11 +39,13 @@ acpi_get_entry_type(struct acpi_subtable_entry *entry) return 0; case ACPI_SUBTABLE_CEDT: return entry->hdr->cedt.type; + case CDAT_SUBTABLE: + return entry->hdr->cdat.type; } return 0; } -static unsigned long __init_or_acpilib +static unsigned long __init_or_fwtbl_lib acpi_get_entry_length(struct acpi_subtable_entry *entry) { switch (entry->type) { @@ -53,11 +57,16 @@ acpi_get_entry_length(struct acpi_subtable_entry *entry) return entry->hdr->prmt.length; case ACPI_SUBTABLE_CEDT: return entry->hdr->cedt.length; + case CDAT_SUBTABLE: { + __le16 length = (__force __le16)entry->hdr->cdat.length; + + return le16_to_cpu(length); + } } return 0; } -static unsigned long __init_or_acpilib +static unsigned long __init_or_fwtbl_lib acpi_get_subtable_header_length(struct acpi_subtable_entry *entry) { switch (entry->type) { @@ -69,11 +78,13 @@ acpi_get_subtable_header_length(struct acpi_subtable_entry *entry) return sizeof(entry->hdr->prmt); case ACPI_SUBTABLE_CEDT: return sizeof(entry->hdr->cedt); + case CDAT_SUBTABLE: + return sizeof(entry->hdr->cdat); } return 0; } -static enum acpi_subtable_type __init_or_acpilib +static enum acpi_subtable_type __init_or_fwtbl_lib acpi_get_subtable_type(char *id) { if (strncmp(id, ACPI_SIG_HMAT, 4) == 0) @@ -82,17 +93,32 @@ acpi_get_subtable_type(char *id) return ACPI_SUBTABLE_PRMT; if (strncmp(id, ACPI_SIG_CEDT, 4) == 0) return ACPI_SUBTABLE_CEDT; + if (strncmp(id, ACPI_SIG_CDAT, 4) == 0) + return CDAT_SUBTABLE; return ACPI_SUBTABLE_COMMON; } -static __init_or_acpilib bool has_handler(struct acpi_subtable_proc *proc) +static unsigned long __init_or_fwtbl_lib +acpi_table_get_length(enum acpi_subtable_type type, + union fw_table_header *header) +{ + if (type == CDAT_SUBTABLE) { + __le32 length = (__force __le32)header->cdat.length; + + return le32_to_cpu(length); + } + + return header->acpi.length; +} + +static __init_or_fwtbl_lib bool has_handler(struct acpi_subtable_proc *proc) { return proc->handler || proc->handler_arg; } -static __init_or_acpilib int call_handler(struct acpi_subtable_proc *proc, - union acpi_subtable_headers *hdr, - unsigned long end) +static __init_or_fwtbl_lib int call_handler(struct acpi_subtable_proc *proc, + union acpi_subtable_headers *hdr, + unsigned long end) { if (proc->handler) return proc->handler(hdr, end); @@ -124,23 +150,26 @@ static __init_or_acpilib int call_handler(struct acpi_subtable_proc *proc, * On success returns sum of all matching entries for all proc handlers. * Otherwise, -ENODEV or -EINVAL is returned. */ -int __init_or_acpilib +int __init_or_fwtbl_lib acpi_parse_entries_array(char *id, unsigned long table_size, - struct acpi_table_header *table_header, + union fw_table_header *table_header, struct acpi_subtable_proc *proc, int proc_num, unsigned int max_entries) { unsigned long table_end, subtable_len, entry_len; struct acpi_subtable_entry entry; + enum acpi_subtable_type type; int count = 0; int errs = 0; int i; - table_end = (unsigned long)table_header + table_header->length; + type = acpi_get_subtable_type(id); + table_end = (unsigned long)table_header + + acpi_table_get_length(type, table_header); /* Parse all entries looking for a match. */ - entry.type = acpi_get_subtable_type(id); + entry.type = type; entry.hdr = (union acpi_subtable_headers *) ((unsigned long)table_header + table_size); subtable_len = acpi_get_subtable_header_length(&entry); @@ -186,3 +215,25 @@ acpi_parse_entries_array(char *id, unsigned long table_size, return errs ? -EINVAL : count; } + +int __init_or_fwtbl_lib +cdat_table_parse(enum acpi_cdat_type type, + acpi_tbl_entry_handler_arg handler_arg, + void *arg, + struct acpi_table_cdat *table_header) +{ + struct acpi_subtable_proc proc = { + .id = type, + .handler_arg = handler_arg, + .arg = arg, + }; + + if (!table_header) + return -EINVAL; + + return acpi_parse_entries_array(ACPI_SIG_CDAT, + sizeof(struct acpi_table_cdat), + (union fw_table_header *)table_header, + &proc, 1, 0); +} +EXPORT_SYMBOL_FWTBL_LIB(cdat_table_parse); -- cgit From 6a954e94d038f41d79c4e04348c95774d1c9337d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:02:37 -0700 Subject: base/node / acpi: Change 'node_hmem_attrs' to 'access_coordinates' Dan Williams suggested changing the struct 'node_hmem_attrs' to 'access_coordinates' [1]. The struct is a container of r/w-latency and r/w-bandwidth numbers. Moving forward, this container will also be used by CXL to store the performance characteristics of each link hop in the PCIE/CXL topology. So, where node_hmem_attrs is just the access parameters of a memory-node, access_coordinates applies more broadly to hardware topology characteristics. The observation is that seemed like an exercise in having the application identify "where" it falls on a spectrum of bandwidth and latency needs. For the tuple of read/write-latency and read/write-bandwidth, "coordinates" is not a perfect fit. Sometimes it is just conveying values in isolation and not a "location" relative to other performance points, but in the end this data is used to identify the performance operation point of a given memory-node. [2] Link: http://lore.kernel.org/r/64471313421f7_1b66294d5@dwillia2-xfh.jf.intel.com.notmuch/ Link: https://lore.kernel.org/linux-cxl/645e6215ee0de_1e6f2945e@dwillia2-xfh.jf.intel.com.notmuch/ Suggested-by: Dan Williams Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/170319615734.2212653.15319394025985499185.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/acpi/numa/hmat.c | 28 ++++++++++++++-------------- drivers/base/node.c | 12 ++++++------ include/linux/memory-tiers.h | 10 +++++----- include/linux/node.h | 8 ++++---- mm/memory-tiers.c | 12 ++++++------ 5 files changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 9ef5f1bdcfdb..83bc2b69401b 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -63,7 +63,7 @@ struct memory_target { unsigned int memory_pxm; unsigned int processor_pxm; struct resource memregions; - struct node_hmem_attrs hmem_attrs[2]; + struct access_coordinate coord[2]; struct list_head caches; struct node_cache_attrs cache_attrs; bool registered; @@ -228,24 +228,24 @@ static void hmat_update_target_access(struct memory_target *target, { switch (type) { case ACPI_HMAT_ACCESS_LATENCY: - target->hmem_attrs[access].read_latency = value; - target->hmem_attrs[access].write_latency = value; + target->coord[access].read_latency = value; + target->coord[access].write_latency = value; break; case ACPI_HMAT_READ_LATENCY: - target->hmem_attrs[access].read_latency = value; + target->coord[access].read_latency = value; break; case ACPI_HMAT_WRITE_LATENCY: - target->hmem_attrs[access].write_latency = value; + target->coord[access].write_latency = value; break; case ACPI_HMAT_ACCESS_BANDWIDTH: - target->hmem_attrs[access].read_bandwidth = value; - target->hmem_attrs[access].write_bandwidth = value; + target->coord[access].read_bandwidth = value; + target->coord[access].write_bandwidth = value; break; case ACPI_HMAT_READ_BANDWIDTH: - target->hmem_attrs[access].read_bandwidth = value; + target->coord[access].read_bandwidth = value; break; case ACPI_HMAT_WRITE_BANDWIDTH: - target->hmem_attrs[access].write_bandwidth = value; + target->coord[access].write_bandwidth = value; break; default: break; @@ -681,7 +681,7 @@ static void hmat_register_target_cache(struct memory_target *target) static void hmat_register_target_perf(struct memory_target *target, int access) { unsigned mem_nid = pxm_to_node(target->memory_pxm); - node_set_perf_attrs(mem_nid, &target->hmem_attrs[access], access); + node_set_perf_attrs(mem_nid, &target->coord[access], access); } static void hmat_register_target_devices(struct memory_target *target) @@ -765,7 +765,7 @@ static int hmat_set_default_dram_perf(void) int rc; int nid, pxm; struct memory_target *target; - struct node_hmem_attrs *attrs; + struct access_coordinate *attrs; if (!default_dram_type) return -EIO; @@ -775,7 +775,7 @@ static int hmat_set_default_dram_perf(void) target = find_mem_target(pxm); if (!target) continue; - attrs = &target->hmem_attrs[1]; + attrs = &target->coord[1]; rc = mt_set_default_dram_perf(nid, attrs, "ACPI HMAT"); if (rc) return rc; @@ -789,7 +789,7 @@ static int hmat_calculate_adistance(struct notifier_block *self, { static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); struct memory_target *target; - struct node_hmem_attrs *perf; + struct access_coordinate *perf; int *adist = data; int pxm; @@ -802,7 +802,7 @@ static int hmat_calculate_adistance(struct notifier_block *self, hmat_update_target_attrs(target, p_nodes, 1); mutex_unlock(&target_lock); - perf = &target->hmem_attrs[1]; + perf = &target->coord[1]; if (mt_perf_to_adistance(perf, adist)) return NOTIFY_OK; diff --git a/drivers/base/node.c b/drivers/base/node.c index 493d533f8375..cb2b6cc7f6e6 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -74,14 +74,14 @@ static BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES); * @dev: Device for this memory access class * @list_node: List element in the node's access list * @access: The access class rank - * @hmem_attrs: Heterogeneous memory performance attributes + * @coord: Heterogeneous memory performance coordinates */ struct node_access_nodes { struct device dev; struct list_head list_node; unsigned int access; #ifdef CONFIG_HMEM_REPORTING - struct node_hmem_attrs hmem_attrs; + struct access_coordinate coord; #endif }; #define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev) @@ -167,7 +167,7 @@ static ssize_t property##_show(struct device *dev, \ char *buf) \ { \ return sysfs_emit(buf, "%u\n", \ - to_access_nodes(dev)->hmem_attrs.property); \ + to_access_nodes(dev)->coord.property); \ } \ static DEVICE_ATTR_RO(property) @@ -187,10 +187,10 @@ static struct attribute *access_attrs[] = { /** * node_set_perf_attrs - Set the performance values for given access class * @nid: Node identifier to be set - * @hmem_attrs: Heterogeneous memory performance attributes + * @coord: Heterogeneous memory performance coordinates * @access: The access class the for the given attributes */ -void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, +void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, unsigned int access) { struct node_access_nodes *c; @@ -205,7 +205,7 @@ void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, if (!c) return; - c->hmem_attrs = *hmem_attrs; + c->coord = *coord; for (i = 0; access_attrs[i] != NULL; i++) { if (sysfs_add_file_to_group(&c->dev.kobj, access_attrs[i], "initiators")) { diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 1e39d27bee41..69e781900082 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -33,7 +33,7 @@ struct memory_dev_type { struct kref kref; }; -struct node_hmem_attrs; +struct access_coordinate; #ifdef CONFIG_NUMA extern bool numa_demotion_enabled; @@ -45,9 +45,9 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype); int register_mt_adistance_algorithm(struct notifier_block *nb); int unregister_mt_adistance_algorithm(struct notifier_block *nb); int mt_calc_adistance(int node, int *adist); -int mt_set_default_dram_perf(int nid, struct node_hmem_attrs *perf, +int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, const char *source); -int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist); +int mt_perf_to_adistance(struct access_coordinate *perf, int *adist); #ifdef CONFIG_MIGRATION int next_demotion_node(int node); void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); @@ -126,13 +126,13 @@ static inline int mt_calc_adistance(int node, int *adist) return NOTIFY_DONE; } -static inline int mt_set_default_dram_perf(int nid, struct node_hmem_attrs *perf, +static inline int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, const char *source) { return -EIO; } -static inline int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist) +static inline int mt_perf_to_adistance(struct access_coordinate *perf, int *adist) { return -EIO; } diff --git a/include/linux/node.h b/include/linux/node.h index 427a5975cf40..25b66d705ee2 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -20,14 +20,14 @@ #include /** - * struct node_hmem_attrs - heterogeneous memory performance attributes + * struct access_coordinate - generic performance coordinates container * * @read_bandwidth: Read bandwidth in MB/s * @write_bandwidth: Write bandwidth in MB/s * @read_latency: Read latency in nanoseconds * @write_latency: Write latency in nanoseconds */ -struct node_hmem_attrs { +struct access_coordinate { unsigned int read_bandwidth; unsigned int write_bandwidth; unsigned int read_latency; @@ -65,7 +65,7 @@ struct node_cache_attrs { #ifdef CONFIG_HMEM_REPORTING void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs); -void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, +void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, unsigned access); #else static inline void node_add_cache(unsigned int nid, @@ -74,7 +74,7 @@ static inline void node_add_cache(unsigned int nid, } static inline void node_set_perf_attrs(unsigned int nid, - struct node_hmem_attrs *hmem_attrs, + struct access_coordinate *coord, unsigned access) { } diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index 8d5291add2bc..5462d9e3c84c 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -109,7 +109,7 @@ static struct demotion_nodes *node_demotion __read_mostly; static BLOCKING_NOTIFIER_HEAD(mt_adistance_algorithms); static bool default_dram_perf_error; -static struct node_hmem_attrs default_dram_perf; +static struct access_coordinate default_dram_perf; static int default_dram_perf_ref_nid = NUMA_NO_NODE; static const char *default_dram_perf_ref_source; @@ -601,15 +601,15 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype) } EXPORT_SYMBOL_GPL(clear_node_memory_type); -static void dump_hmem_attrs(struct node_hmem_attrs *attrs, const char *prefix) +static void dump_hmem_attrs(struct access_coordinate *coord, const char *prefix) { pr_info( "%sread_latency: %u, write_latency: %u, read_bandwidth: %u, write_bandwidth: %u\n", - prefix, attrs->read_latency, attrs->write_latency, - attrs->read_bandwidth, attrs->write_bandwidth); + prefix, coord->read_latency, coord->write_latency, + coord->read_bandwidth, coord->write_bandwidth); } -int mt_set_default_dram_perf(int nid, struct node_hmem_attrs *perf, +int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, const char *source) { int rc = 0; @@ -666,7 +666,7 @@ out: return rc; } -int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist) +int mt_perf_to_adistance(struct access_coordinate *perf, int *adist) { if (default_dram_perf_error) return -EIO; -- cgit From 69b789b64456093819f730b3f9c13a593a5485d9 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:02:43 -0700 Subject: acpi: numa: Create enum for memory_target access coordinates indexing Create enums to provide named indexing for the access coordinate array. This is in preparation for adding generic port support which will add a third index in the array to keep the generic port attributes separate from the memory attributes. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/170319616332.2212653.3872789279950567889.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/acpi/numa/hmat.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 83bc2b69401b..ca7aedfbb5f2 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -58,12 +58,18 @@ struct target_cache { struct node_cache_attrs cache_attrs; }; +enum { + NODE_ACCESS_CLASS_0 = 0, + NODE_ACCESS_CLASS_1, + NODE_ACCESS_CLASS_MAX, +}; + struct memory_target { struct list_head node; unsigned int memory_pxm; unsigned int processor_pxm; struct resource memregions; - struct access_coordinate coord[2]; + struct access_coordinate coord[NODE_ACCESS_CLASS_MAX]; struct list_head caches; struct node_cache_attrs cache_attrs; bool registered; @@ -339,10 +345,12 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header, if (mem_hier == ACPI_HMAT_MEMORY) { target = find_mem_target(targs[targ]); if (target && target->processor_pxm == inits[init]) { - hmat_update_target_access(target, type, value, 0); + hmat_update_target_access(target, type, value, + NODE_ACCESS_CLASS_0); /* If the node has a CPU, update access 1 */ if (node_state(pxm_to_node(inits[init]), N_CPU)) - hmat_update_target_access(target, type, value, 1); + hmat_update_target_access(target, type, value, + NODE_ACCESS_CLASS_1); } } } @@ -726,8 +734,8 @@ static void hmat_register_target(struct memory_target *target) if (!target->registered) { hmat_register_target_initiators(target); hmat_register_target_cache(target); - hmat_register_target_perf(target, 0); - hmat_register_target_perf(target, 1); + hmat_register_target_perf(target, NODE_ACCESS_CLASS_0); + hmat_register_target_perf(target, NODE_ACCESS_CLASS_1); target->registered = true; } mutex_unlock(&target_lock); -- cgit From 6373c48b8c9dfb5c1e09fdb538e700d9cc91c45e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:02:49 -0700 Subject: acpi: numa: Add genport target allocation to the HMAT parsing Add SRAT parsing for the HMAT init in order to collect the device handle from the Generic Port Affinity Structure. The device handle will serve as the key to search for target data. Consolidate the common code with alloc_memory_target() in a helper function alloc_target(). Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/170319616951.2212653.14862375982250406464.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/acpi/numa/hmat.c | 59 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index ca7aedfbb5f2..21722cbec324 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -72,6 +72,7 @@ struct memory_target { struct access_coordinate coord[NODE_ACCESS_CLASS_MAX]; struct list_head caches; struct node_cache_attrs cache_attrs; + u8 gen_port_device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE]; bool registered; }; @@ -126,8 +127,7 @@ static __init void alloc_memory_initiator(unsigned int cpu_pxm) list_add_tail(&initiator->node, &initiators); } -static __init void alloc_memory_target(unsigned int mem_pxm, - resource_size_t start, resource_size_t len) +static __init struct memory_target *alloc_target(unsigned int mem_pxm) { struct memory_target *target; @@ -135,7 +135,7 @@ static __init void alloc_memory_target(unsigned int mem_pxm, if (!target) { target = kzalloc(sizeof(*target), GFP_KERNEL); if (!target) - return; + return NULL; target->memory_pxm = mem_pxm; target->processor_pxm = PXM_INVAL; target->memregions = (struct resource) { @@ -148,6 +148,19 @@ static __init void alloc_memory_target(unsigned int mem_pxm, INIT_LIST_HEAD(&target->caches); } + return target; +} + +static __init void alloc_memory_target(unsigned int mem_pxm, + resource_size_t start, + resource_size_t len) +{ + struct memory_target *target; + + target = alloc_target(mem_pxm); + if (!target) + return; + /* * There are potentially multiple ranges per PXM, so record each * in the per-target memregions resource tree. @@ -158,6 +171,18 @@ static __init void alloc_memory_target(unsigned int mem_pxm, start, start + len, mem_pxm); } +static __init void alloc_genport_target(unsigned int mem_pxm, u8 *handle) +{ + struct memory_target *target; + + target = alloc_target(mem_pxm); + if (!target) + return; + + memcpy(target->gen_port_device_handle, handle, + ACPI_SRAT_DEVICE_HANDLE_SIZE); +} + static __init const char *hmat_data_type(u8 type) { switch (type) { @@ -499,6 +524,27 @@ static __init int srat_parse_mem_affinity(union acpi_subtable_headers *header, return 0; } +static __init int srat_parse_genport_affinity(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_srat_generic_affinity *ga = (void *)header; + + if (!ga) + return -EINVAL; + + if (!(ga->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED)) + return 0; + + /* Skip PCI device_handle for now */ + if (ga->device_handle_type != 0) + return 0; + + alloc_genport_target(ga->proximity_domain, + (u8 *)ga->device_handle); + + return 0; +} + static u32 hmat_initiator_perf(struct memory_target *target, struct memory_initiator *initiator, struct acpi_hmat_locality *hmat_loc) @@ -878,6 +924,13 @@ static __init int hmat_init(void) ACPI_SRAT_TYPE_MEMORY_AFFINITY, srat_parse_mem_affinity, 0) < 0) goto out_put; + + if (acpi_table_parse_entries(ACPI_SIG_SRAT, + sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_GENERIC_PORT_AFFINITY, + srat_parse_genport_affinity, 0) < 0) + goto out_put; + acpi_put_table(tbl); status = acpi_get_table(ACPI_SIG_HMAT, 0, &tbl); -- cgit From 79205651120620c2683f90c25ef3d2ac8e454026 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:02:55 -0700 Subject: acpi: Break out nesting for hmat_parse_locality() Refactor hmat_parse_locality() to break up the deep nesting of the function. Suggested-by: Jonathan Cameron Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/170319617537.2212653.10625501075519862509.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/acpi/numa/hmat.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 21722cbec324..4cae2e84251a 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -322,11 +322,28 @@ static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc) } } +static __init void hmat_update_target(unsigned int tgt_pxm, unsigned int init_pxm, + u8 mem_hier, u8 type, u32 value) +{ + struct memory_target *target = find_mem_target(tgt_pxm); + + if (mem_hier != ACPI_HMAT_MEMORY) + return; + + if (target && target->processor_pxm == init_pxm) { + hmat_update_target_access(target, type, value, + NODE_ACCESS_CLASS_0); + /* If the node has a CPU, update access 1 */ + if (node_state(pxm_to_node(init_pxm), N_CPU)) + hmat_update_target_access(target, type, value, + NODE_ACCESS_CLASS_1); + } +} + static __init int hmat_parse_locality(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_hmat_locality *hmat_loc = (void *)header; - struct memory_target *target; unsigned int init, targ, total_size, ipds, tpds; u32 *inits, *targs, value; u16 *entries; @@ -367,17 +384,8 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header, inits[init], targs[targ], value, hmat_data_type_suffix(type)); - if (mem_hier == ACPI_HMAT_MEMORY) { - target = find_mem_target(targs[targ]); - if (target && target->processor_pxm == inits[init]) { - hmat_update_target_access(target, type, value, - NODE_ACCESS_CLASS_0); - /* If the node has a CPU, update access 1 */ - if (node_state(pxm_to_node(inits[init]), N_CPU)) - hmat_update_target_access(target, type, value, - NODE_ACCESS_CLASS_1); - } - } + hmat_update_target(targs[targ], inits[init], + mem_hier, type, value); } } -- cgit From a3a3e341f169511823f7b2d140a0bdfbd620dcbd Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:01 -0700 Subject: acpi: numa: Add setting of generic port system locality attributes Add generic port support for the parsing of HMAT system locality sub-table. The attributes will be added to the third array member of the access coordinates in order to not mix with the existing memory attributes. It only provides the system locality attributes from initiator to the generic port targets and is missing the rest of the data to the actual memory device. The complete attributes will be updated when a memory device is attached and the system locality information is calculated end to end. Through hmat_update_target_attrs(), the best performance attributes will be setup in target->coord. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/170319618135.2212653.13778540010384821833.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/acpi/numa/hmat.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 4cae2e84251a..8a1802e078f3 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -61,6 +61,7 @@ struct target_cache { enum { NODE_ACCESS_CLASS_0 = 0, NODE_ACCESS_CLASS_1, + NODE_ACCESS_CLASS_GENPORT_SINK, NODE_ACCESS_CLASS_MAX, }; @@ -654,6 +655,11 @@ static void hmat_update_target_attrs(struct memory_target *target, u32 best = 0; int i; + /* Don't update for generic port if there's no device handle */ + if (access == NODE_ACCESS_CLASS_GENPORT_SINK && + !(*(u16 *)target->gen_port_device_handle)) + return; + bitmap_zero(p_nodes, MAX_NUMNODES); /* * If the Address Range Structure provides a local processor pxm, set @@ -723,6 +729,14 @@ static void __hmat_register_target_initiators(struct memory_target *target, } } +static void hmat_register_generic_target_initiators(struct memory_target *target) +{ + static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); + + __hmat_register_target_initiators(target, p_nodes, + NODE_ACCESS_CLASS_GENPORT_SINK); +} + static void hmat_register_target_initiators(struct memory_target *target) { static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); @@ -774,6 +788,17 @@ static void hmat_register_target(struct memory_target *target) */ hmat_register_target_devices(target); + /* + * Register generic port perf numbers. The nid may not be + * initialized and is still NUMA_NO_NODE. + */ + mutex_lock(&target_lock); + if (*(u16 *)target->gen_port_device_handle) { + hmat_register_generic_target_initiators(target); + target->registered = true; + } + mutex_unlock(&target_lock); + /* * Skip offline nodes. This can happen when memory * marked EFI_MEMORY_SP, "specific purpose", is applied -- cgit From ca53543d8e340070fb37fde93f36ed9012c76b90 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:07 -0700 Subject: acpi: numa: Add helper function to retrieve the performance attributes Add helper to retrieve the performance attributes based on the device handle. The helper function is exported so the CXL driver can use that to acquire the performance data between the CPU and the CXL host bridge. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/170319618721.2212653.5552947472849081786.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/acpi/numa/hmat.c | 41 +++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 11 +++++++++++ 2 files changed, 52 insertions(+) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 8a1802e078f3..d6b85f0f6082 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -108,6 +108,47 @@ static struct memory_target *find_mem_target(unsigned int mem_pxm) return NULL; } +static struct memory_target *acpi_find_genport_target(u32 uid) +{ + struct memory_target *target; + u32 target_uid; + u8 *uid_ptr; + + list_for_each_entry(target, &targets, node) { + uid_ptr = target->gen_port_device_handle + 8; + target_uid = *(u32 *)uid_ptr; + if (uid == target_uid) + return target; + } + + return NULL; +} + +/** + * acpi_get_genport_coordinates - Retrieve the access coordinates for a generic port + * @uid: ACPI unique id + * @coord: The access coordinates written back out for the generic port + * + * Return: 0 on success. Errno on failure. + * + * Only supports device handles that are ACPI. Assume ACPI0016 HID for CXL. + */ +int acpi_get_genport_coordinates(u32 uid, + struct access_coordinate *coord) +{ + struct memory_target *target; + + guard(mutex)(&target_lock); + target = acpi_find_genport_target(uid); + if (!target) + return -ENOENT; + + *coord = target->coord[NODE_ACCESS_CLASS_GENPORT_SINK]; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(acpi_get_genport_coordinates, CXL); + static __init void alloc_memory_initiator(unsigned int cpu_pxm) { struct memory_initiator *initiator; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4db54e928b36..8b0761c682f9 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -15,6 +15,7 @@ #include #include #include +#include struct irq_domain; struct irq_domain_ops; @@ -424,6 +425,16 @@ extern int acpi_blacklisted(void); extern void acpi_osi_setup(char *str); extern bool acpi_osi_is_win8(void); +#ifdef CONFIG_ACPI_HMAT +int acpi_get_genport_coordinates(u32 uid, struct access_coordinate *coord); +#else +static inline int acpi_get_genport_coordinates(u32 uid, + struct access_coordinate *coord) +{ + return -EOPNOTSUPP; +} +#endif + #ifdef CONFIG_ACPI_NUMA int acpi_map_pxm_to_node(int pxm); int acpi_get_node(acpi_handle handle); -- cgit From ad6f04c0269b0b7908f09621d3b3c90def39a297 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:13 -0700 Subject: cxl: Add callback to parse the DSMAS subtables from CDAT Provide a callback function to the CDAT parser in order to parse the Device Scoped Memory Affinity Structure (DSMAS). Each DSMAS structure contains the DPA range and its associated attributes in each entry. See the CDAT specification for details. The device handle and the DPA range is saved and to be associated with the DSLBIS locality data when the DSLBIS entries are parsed. The xarray is a local variable. When the total path performance data is calculated and storred this xarray can be discarded. Coherent Device Attribute Table 1.03 2.1 Device Scoped memory Affinity Structure (DSMAS) Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319619355.2212653.2675953129671561293.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/Kconfig | 3 ++ drivers/cxl/core/Makefile | 1 + drivers/cxl/core/cdat.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 2 ++ drivers/cxl/port.c | 1 + tools/testing/cxl/Kbuild | 1 + 6 files changed, 100 insertions(+) create mode 100644 drivers/cxl/core/cdat.c diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 8ea1d340e438..67998dbd1d46 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -5,6 +5,7 @@ menuconfig CXL_BUS select FW_LOADER select FW_UPLOAD select PCI_DOE + select FIRMWARE_TABLE help CXL is a bus that is electrically compatible with PCI Express, but layers three protocols on that signalling (CXL.io, CXL.cache, and @@ -54,8 +55,10 @@ config CXL_MEM_RAW_COMMANDS config CXL_ACPI tristate "CXL ACPI: Platform Support" depends on ACPI + depends on ACPI_NUMA default CXL_BUS select ACPI_TABLE_LIB + select ACPI_HMAT help Enable support for host managed device memory (HDM) resources published by a platform's ACPI CXL memory layout description. See diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 1f66b5d4d935..9259bcc6773c 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -13,5 +13,6 @@ cxl_core-y += mbox.o cxl_core-y += pci.o cxl_core-y += hdm.o cxl_core-y += pmu.o +cxl_core-y += cdat.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c new file mode 100644 index 000000000000..9bf4f53bf77f --- /dev/null +++ b/drivers/cxl/core/cdat.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation. All rights reserved. */ +#include +#include +#include +#include "cxlpci.h" +#include "cxl.h" + +struct dsmas_entry { + struct range dpa_range; + u8 handle; +}; + +static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg, + const unsigned long end) +{ + struct acpi_cdat_header *hdr = &header->cdat; + struct acpi_cdat_dsmas *dsmas; + int size = sizeof(*hdr) + sizeof(*dsmas); + struct xarray *dsmas_xa = arg; + struct dsmas_entry *dent; + u16 len; + int rc; + + len = le16_to_cpu((__force __le16)hdr->length); + if (len != size || (unsigned long)hdr + len > end) { + pr_warn("Malformed DSMAS table length: (%u:%u)\n", size, len); + return -EINVAL; + } + + /* Skip common header */ + dsmas = (struct acpi_cdat_dsmas *)(hdr + 1); + + dent = kzalloc(sizeof(*dent), GFP_KERNEL); + if (!dent) + return -ENOMEM; + + dent->handle = dsmas->dsmad_handle; + dent->dpa_range.start = le64_to_cpu((__force __le64)dsmas->dpa_base_address); + dent->dpa_range.end = le64_to_cpu((__force __le64)dsmas->dpa_base_address) + + le64_to_cpu((__force __le64)dsmas->dpa_length) - 1; + + rc = xa_insert(dsmas_xa, dent->handle, dent, GFP_KERNEL); + if (rc) { + kfree(dent); + return rc; + } + + return 0; +} + +static int cxl_cdat_endpoint_process(struct cxl_port *port, + struct xarray *dsmas_xa) +{ + return cdat_table_parse(ACPI_CDAT_TYPE_DSMAS, cdat_dsmas_handler, + dsmas_xa, port->cdat.table); +} + +static void discard_dsmas(struct xarray *xa) +{ + unsigned long index; + void *ent; + + xa_for_each(xa, index, ent) { + xa_erase(xa, index); + kfree(ent); + } + xa_destroy(xa); +} +DEFINE_FREE(dsmas, struct xarray *, if (_T) discard_dsmas(_T)) + +void cxl_endpoint_parse_cdat(struct cxl_port *port) +{ + struct xarray __dsmas_xa; + struct xarray *dsmas_xa __free(dsmas) = &__dsmas_xa; + int rc; + + xa_init(&__dsmas_xa); + if (!port->cdat.table) + return; + + rc = cxl_cdat_endpoint_process(port, dsmas_xa); + if (rc < 0) { + dev_dbg(&port->dev, "Failed to parse CDAT: %d\n", rc); + return; + } + + /* Performance data processing */ +} +EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL); + +MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 687043ece101..be3b5eda875c 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -839,6 +839,8 @@ static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev) } #endif +void cxl_endpoint_parse_cdat(struct cxl_port *port); + /* * Unit test builds overrides this to __weak, find the 'strong' version * of these symbols in tools/testing/cxl/. diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 47bc8e0b8590..a889c4e6cb27 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -109,6 +109,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) /* Cache the data early to ensure is_visible() works */ read_cdat_data(port); + cxl_endpoint_parse_cdat(port); get_device(&cxlmd->dev); rc = devm_add_action_or_reset(&port->dev, schedule_detach, cxlmd); diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 95dc58b94178..0b12c36902d8 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -58,6 +58,7 @@ cxl_core-y += $(CXL_CORE_SRC)/mbox.o cxl_core-y += $(CXL_CORE_SRC)/pci.o cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += $(CXL_CORE_SRC)/pmu.o +cxl_core-y += $(CXL_CORE_SRC)/cdat.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-y += config_check.o -- cgit From 63cef81b9dca6ddf1c34d697016f830ddcfadf28 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:20 -0700 Subject: cxl: Add callback to parse the DSLBIS subtable from CDAT Provide a callback to parse the Device Scoped Latency and Bandwidth Information Structure (DSLBIS) in the CDAT structures. The DSLBIS contains the bandwidth and latency information that's tied to a DSMAS handle. The driver will retrieve the read and write latency and bandwidth associated with the DSMAS which is tied to a DPA range. Coherent Device Attribute Table 1.03 2.1 Device Scoped Latency and Bandwidth Information Structure (DSLBIS) Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319620005.2212653.7475488478229720542.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/core/cdat.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 9bf4f53bf77f..97d8ef8848c6 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -3,12 +3,15 @@ #include #include #include +#include +#include #include "cxlpci.h" #include "cxl.h" struct dsmas_entry { struct range dpa_range; u8 handle; + struct access_coordinate coord; }; static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg, @@ -49,11 +52,106 @@ static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg, return 0; } +static void cxl_access_coordinate_set(struct access_coordinate *coord, + int access, unsigned int val) +{ + switch (access) { + case ACPI_HMAT_ACCESS_LATENCY: + coord->read_latency = val; + coord->write_latency = val; + break; + case ACPI_HMAT_READ_LATENCY: + coord->read_latency = val; + break; + case ACPI_HMAT_WRITE_LATENCY: + coord->write_latency = val; + break; + case ACPI_HMAT_ACCESS_BANDWIDTH: + coord->read_bandwidth = val; + coord->write_bandwidth = val; + break; + case ACPI_HMAT_READ_BANDWIDTH: + coord->read_bandwidth = val; + break; + case ACPI_HMAT_WRITE_BANDWIDTH: + coord->write_bandwidth = val; + break; + } +} + +static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg, + const unsigned long end) +{ + struct acpi_cdat_header *hdr = &header->cdat; + struct acpi_cdat_dslbis *dslbis; + int size = sizeof(*hdr) + sizeof(*dslbis); + struct xarray *dsmas_xa = arg; + struct dsmas_entry *dent; + __le64 le_base; + __le16 le_val; + u64 val; + u16 len; + int rc; + + len = le16_to_cpu((__force __le16)hdr->length); + if (len != size || (unsigned long)hdr + len > end) { + pr_warn("Malformed DSLBIS table length: (%u:%u)\n", size, len); + return -EINVAL; + } + + /* Skip common header */ + dslbis = (struct acpi_cdat_dslbis *)(hdr + 1); + + /* Skip unrecognized data type */ + if (dslbis->data_type > ACPI_HMAT_WRITE_BANDWIDTH) + return 0; + + /* Not a memory type, skip */ + if ((dslbis->flags & ACPI_HMAT_MEMORY_HIERARCHY) != ACPI_HMAT_MEMORY) + return 0; + + dent = xa_load(dsmas_xa, dslbis->handle); + if (!dent) { + pr_warn("No matching DSMAS entry for DSLBIS entry.\n"); + return 0; + } + + le_base = (__force __le64)dslbis->entry_base_unit; + le_val = (__force __le16)dslbis->entry[0]; + rc = check_mul_overflow(le64_to_cpu(le_base), + le16_to_cpu(le_val), &val); + if (rc) + pr_warn("DSLBIS value overflowed.\n"); + + cxl_access_coordinate_set(&dent->coord, dslbis->data_type, val); + + return 0; +} + +static int cdat_table_parse_output(int rc) +{ + if (rc < 0) + return rc; + if (rc == 0) + return -ENOENT; + + return 0; +} + static int cxl_cdat_endpoint_process(struct cxl_port *port, struct xarray *dsmas_xa) { - return cdat_table_parse(ACPI_CDAT_TYPE_DSMAS, cdat_dsmas_handler, - dsmas_xa, port->cdat.table); + int rc; + + rc = cdat_table_parse(ACPI_CDAT_TYPE_DSMAS, cdat_dsmas_handler, + dsmas_xa, port->cdat.table); + rc = cdat_table_parse_output(rc); + if (rc) + return rc; + + rc = cdat_table_parse(ACPI_CDAT_TYPE_DSLBIS, cdat_dslbis_handler, + dsmas_xa, port->cdat.table); + return cdat_table_parse_output(rc); } static void discard_dsmas(struct xarray *xa) -- cgit From 80aa780dda20618be76162bf991d49cf962fda38 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:26 -0700 Subject: cxl: Add callback to parse the SSLBIS subtable from CDAT Provide a callback to parse the Switched Scoped Latency and Bandwidth Information Structure (SSLBIS) in the CDAT structures. The SSLBIS contains the bandwidth and latency information that's tied to the CXL switch that the data table has been read from. The extracted values are stored to the cxl_dport correlated by the port_id depending on the SSLBIS entry. Coherent Device Attribute Table 1.03 2.1 Switched Scoped Latency and Bandwidth Information Structure (DSLBIS) Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319620635.2212653.5194389158785365150.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/core/cdat.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 4 ++ drivers/cxl/port.c | 2 + 3 files changed, 104 insertions(+) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 97d8ef8848c6..b3ab47d250e1 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -187,4 +187,102 @@ void cxl_endpoint_parse_cdat(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL); +static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, + const unsigned long end) +{ + struct acpi_cdat_sslbis *sslbis; + int size = sizeof(header->cdat) + sizeof(*sslbis); + struct cxl_port *port = arg; + struct device *dev = &port->dev; + struct acpi_cdat_sslbe *entry; + int remain, entries, i; + u16 len; + + len = le16_to_cpu((__force __le16)header->cdat.length); + remain = len - size; + if (!remain || remain % sizeof(*entry) || + (unsigned long)header + len > end) { + dev_warn(dev, "Malformed SSLBIS table length: (%u)\n", len); + return -EINVAL; + } + + /* Skip common header */ + sslbis = (struct acpi_cdat_sslbis *)((unsigned long)header + + sizeof(header->cdat)); + + /* Unrecognized data type, we can skip */ + if (sslbis->data_type > ACPI_HMAT_WRITE_BANDWIDTH) + return 0; + + entries = remain / sizeof(*entry); + entry = (struct acpi_cdat_sslbe *)((unsigned long)header + sizeof(*sslbis)); + + for (i = 0; i < entries; i++) { + u16 x = le16_to_cpu((__force __le16)entry->portx_id); + u16 y = le16_to_cpu((__force __le16)entry->porty_id); + __le64 le_base; + __le16 le_val; + struct cxl_dport *dport; + unsigned long index; + u16 dsp_id; + u64 val; + + switch (x) { + case ACPI_CDAT_SSLBIS_US_PORT: + dsp_id = y; + break; + case ACPI_CDAT_SSLBIS_ANY_PORT: + switch (y) { + case ACPI_CDAT_SSLBIS_US_PORT: + dsp_id = x; + break; + case ACPI_CDAT_SSLBIS_ANY_PORT: + dsp_id = ACPI_CDAT_SSLBIS_ANY_PORT; + break; + default: + dsp_id = y; + break; + } + break; + default: + dsp_id = x; + break; + } + + le_base = (__force __le64)sslbis->entry_base_unit; + le_val = (__force __le16)entry->latency_or_bandwidth; + + if (check_mul_overflow(le64_to_cpu(le_base), + le16_to_cpu(le_val), &val)) + dev_warn(dev, "SSLBIS value overflowed!\n"); + + xa_for_each(&port->dports, index, dport) { + if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT || + dsp_id == dport->port_id) + cxl_access_coordinate_set(&dport->sw_coord, + sslbis->data_type, + val); + } + + entry++; + } + + return 0; +} + +void cxl_switch_parse_cdat(struct cxl_port *port) +{ + int rc; + + if (!port->cdat.table) + return; + + rc = cdat_table_parse(ACPI_CDAT_TYPE_SSLBIS, cdat_sslbis_handler, + port, port->cdat.table); + rc = cdat_table_parse_output(rc); + if (rc) + dev_dbg(&port->dev, "Failed to parse SSLBIS: %d\n", rc); +} +EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL); + MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index be3b5eda875c..22f664b9f4c6 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -8,6 +8,7 @@ #include #include #include +#include #include /** @@ -634,6 +635,7 @@ struct cxl_rcrb_info { * @rch: Indicate whether this dport was enumerated in RCH or VH mode * @port: reference to cxl_port that contains this downstream port * @regs: Dport parsed register blocks + * @sw_coord: access coordinates (performance) for switch from CDAT */ struct cxl_dport { struct device *dport_dev; @@ -643,6 +645,7 @@ struct cxl_dport { bool rch; struct cxl_port *port; struct cxl_regs regs; + struct access_coordinate sw_coord; }; /** @@ -840,6 +843,7 @@ static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev) #endif void cxl_endpoint_parse_cdat(struct cxl_port *port); +void cxl_switch_parse_cdat(struct cxl_port *port); /* * Unit test builds overrides this to __weak, find the 'strong' version diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index a889c4e6cb27..da3c3a08bd62 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -69,6 +69,8 @@ static int cxl_switch_port_probe(struct cxl_port *port) if (rc < 0) return rc; + cxl_switch_parse_cdat(port); + cxlhdm = devm_cxl_setup_hdm(port, NULL); if (!IS_ERR(cxlhdm)) return devm_cxl_enumerate_decoders(cxlhdm, NULL); -- cgit From 790815902ec61ba1715fd67d3cb9036e13c942bc Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:32 -0700 Subject: cxl: Add support for _DSM Function for retrieving QTG ID CXL spec v3.0 9.17.3 CXL Root Device Specific Methods (_DSM) Add support to retrieve QTG ID via ACPI _DSM call. The _DSM call requires an input of an ACPI package with 4 dwords (read latency, write latency, read bandwidth, write bandwidth). The call returns a package with 1 WORD that provides the max supported QTG ID and a package that may contain 0 or more WORDs as the recommended QTG IDs in the recommended order. Create a cxl_root container for the root cxl_port and provide a callback ->get_qos_class() in order to retrieve the QoS class. For the ACPI case, the _DSM helper is used to retrieve the QTG ID and returned. A devm_cxl_add_root() function is added for root port setup and registration of the cxl_root callback operation(s). Signed-off-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/170319621294.2212653.1649682083061569256.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++-- drivers/cxl/core/port.c | 49 ++++++++++++++---- drivers/cxl/cxl.h | 25 +++++++++ 3 files changed, 193 insertions(+), 13 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 2034eb4ce83f..2f7de910ce57 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "cxlpci.h" #include "cxl.h" @@ -17,6 +18,10 @@ struct cxl_cxims_data { u64 xormaps[] __counted_by(nr_maps); }; +static const guid_t acpi_cxl_qtg_id_guid = + GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, + 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); + /* * Find a targets entry (n) in the host bridge interleave list. * CXL Specification 3.0 Table 9-22 @@ -194,6 +199,125 @@ struct cxl_cfmws_context { int id; }; +/** + * cxl_acpi_evaluate_qtg_dsm - Retrieve QTG ids via ACPI _DSM + * @handle: ACPI handle + * @coord: performance access coordinates + * @entries: number of QTG IDs to return + * @qos_class: int array provided by caller to return QTG IDs + * + * Return: number of QTG IDs returned, or -errno for errors + * + * Issue QTG _DSM with accompanied bandwidth and latency data in order to get + * the QTG IDs that are suitable for the performance point in order of most + * suitable to least suitable. Write back array of QTG IDs and return the + * actual number of QTG IDs written back. + */ +static int +cxl_acpi_evaluate_qtg_dsm(acpi_handle handle, struct access_coordinate *coord, + int entries, int *qos_class) +{ + union acpi_object *out_obj, *out_buf, *obj; + union acpi_object in_array[4] = { + [0].integer = { ACPI_TYPE_INTEGER, coord->read_latency }, + [1].integer = { ACPI_TYPE_INTEGER, coord->write_latency }, + [2].integer = { ACPI_TYPE_INTEGER, coord->read_bandwidth }, + [3].integer = { ACPI_TYPE_INTEGER, coord->write_bandwidth }, + }; + union acpi_object in_obj = { + .package = { + .type = ACPI_TYPE_PACKAGE, + .count = 4, + .elements = in_array, + }, + }; + int count, pkg_entries, i; + u16 max_qtg; + int rc; + + if (!entries) + return -EINVAL; + + out_obj = acpi_evaluate_dsm(handle, &acpi_cxl_qtg_id_guid, 1, 1, &in_obj); + if (!out_obj) + return -ENXIO; + + if (out_obj->type != ACPI_TYPE_PACKAGE) { + rc = -ENXIO; + goto out; + } + + /* Check Max QTG ID */ + obj = &out_obj->package.elements[0]; + if (obj->type != ACPI_TYPE_INTEGER) { + rc = -ENXIO; + goto out; + } + + max_qtg = obj->integer.value; + + /* It's legal to have 0 QTG entries */ + pkg_entries = out_obj->package.count; + if (pkg_entries <= 1) { + rc = 0; + goto out; + } + + /* Retrieve QTG IDs package */ + obj = &out_obj->package.elements[1]; + if (obj->type != ACPI_TYPE_PACKAGE) { + rc = -ENXIO; + goto out; + } + + pkg_entries = obj->package.count; + count = min(entries, pkg_entries); + for (i = 0; i < count; i++) { + u16 qtg_id; + + out_buf = &obj->package.elements[i]; + if (out_buf->type != ACPI_TYPE_INTEGER) { + rc = -ENXIO; + goto out; + } + + qtg_id = out_buf->integer.value; + if (qtg_id > max_qtg) + pr_warn("QTG ID %u greater than MAX %u\n", + qtg_id, max_qtg); + + qos_class[i] = qtg_id; + } + rc = count; + +out: + ACPI_FREE(out_obj); + return rc; +} + +static int cxl_acpi_qos_class(struct cxl_port *root_port, + struct access_coordinate *coord, int entries, + int *qos_class) +{ + acpi_handle handle; + struct device *dev; + + dev = root_port->uport_dev; + + if (!dev_is_platform(dev)) + return -ENODEV; + + handle = ACPI_HANDLE(dev); + if (!handle) + return -ENODEV; + + return cxl_acpi_evaluate_qtg_dsm(handle, coord, entries, qos_class); +} + +static const struct cxl_root_ops acpi_root_ops = { + .qos_class = cxl_acpi_qos_class, +}; + static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, const unsigned long end) { @@ -656,6 +780,7 @@ static int cxl_acpi_probe(struct platform_device *pdev) { int rc; struct resource *cxl_res; + struct cxl_root *cxl_root; struct cxl_port *root_port; struct device *host = &pdev->dev; struct acpi_device *adev = ACPI_COMPANION(host); @@ -675,9 +800,10 @@ static int cxl_acpi_probe(struct platform_device *pdev) cxl_res->end = -1; cxl_res->flags = IORESOURCE_MEM; - root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); - if (IS_ERR(root_port)) - return PTR_ERR(root_port); + cxl_root = devm_cxl_add_root(host, &acpi_root_ops); + if (IS_ERR(cxl_root)) + return PTR_ERR(cxl_root); + root_port = &cxl_root->port; rc = bus_for_each_dev(adev->dev.bus, NULL, root_port, add_host_bridge_dport); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index b7c93bb18f6e..9393cbf04652 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -541,7 +541,10 @@ static void cxl_port_release(struct device *dev) xa_destroy(&port->dports); xa_destroy(&port->regions); ida_free(&cxl_port_ida, port->id); - kfree(port); + if (is_cxl_root(port)) + kfree(to_cxl_root(port)); + else + kfree(port); } static ssize_t decoders_committed_show(struct device *dev, @@ -669,17 +672,31 @@ static struct lock_class_key cxl_port_key; static struct cxl_port *cxl_port_alloc(struct device *uport_dev, struct cxl_dport *parent_dport) { - struct cxl_port *port; + struct cxl_root *cxl_root __free(kfree) = NULL; + struct cxl_port *port, *_port __free(kfree) = NULL; struct device *dev; int rc; - port = kzalloc(sizeof(*port), GFP_KERNEL); - if (!port) - return ERR_PTR(-ENOMEM); + /* No parent_dport, root cxl_port */ + if (!parent_dport) { + cxl_root = kzalloc(sizeof(*cxl_root), GFP_KERNEL); + if (!cxl_root) + return ERR_PTR(-ENOMEM); + } else { + _port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!_port) + return ERR_PTR(-ENOMEM); + } rc = ida_alloc(&cxl_port_ida, GFP_KERNEL); if (rc < 0) - goto err; + return ERR_PTR(rc); + + if (cxl_root) + port = &no_free_ptr(cxl_root)->port; + else + port = no_free_ptr(_port); + port->id = rc; port->uport_dev = uport_dev; @@ -731,10 +748,6 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev, dev->type = &cxl_port_type; return port; - -err: - kfree(port); - return ERR_PTR(rc); } static int cxl_setup_comp_regs(struct device *host, struct cxl_register_map *map, @@ -884,6 +897,22 @@ struct cxl_port *devm_cxl_add_port(struct device *host, } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL); +struct cxl_root *devm_cxl_add_root(struct device *host, + const struct cxl_root_ops *ops) +{ + struct cxl_root *cxl_root; + struct cxl_port *port; + + port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); + if (IS_ERR(port)) + return (struct cxl_root *)port; + + cxl_root = to_cxl_root(port); + cxl_root->ops = ops; + return cxl_root; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_root, CXL); + struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port) { /* There is no pci_bus associated with a CXL platform-root port */ diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 22f664b9f4c6..abbdcd3a7596 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -615,6 +615,29 @@ struct cxl_port { bool cdat_available; }; +struct cxl_root_ops { + int (*qos_class)(struct cxl_port *root_port, + struct access_coordinate *coord, int entries, + int *qos_class); +}; + +/** + * struct cxl_root - logical collection of root cxl_port items + * + * @port: cxl_port member + * @ops: cxl root operations + */ +struct cxl_root { + struct cxl_port port; + const struct cxl_root_ops *ops; +}; + +static inline struct cxl_root * +to_cxl_root(const struct cxl_port *port) +{ + return container_of(port, struct cxl_root, port); +} + static inline struct cxl_dport * cxl_find_dport_by_dev(struct cxl_port *port, const struct device *dport_dev) { @@ -703,6 +726,8 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport_dev, resource_size_t component_reg_phys, struct cxl_dport *parent_dport); +struct cxl_root *devm_cxl_add_root(struct device *host, + const struct cxl_root_ops *ops); struct cxl_port *find_cxl_root(struct cxl_port *port); int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd); void cxl_bus_rescan(void); -- cgit From 4d07a05397c8c15c37c8c3abb7afaea1dcd2f0e7 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:39 -0700 Subject: cxl: Calculate and store PCI link latency for the downstream ports The latency is calculated by dividing the flit size over the bandwidth. Add support to retrieve the flit size for the CXL switch device and calculate the latency of the PCIe link. Cache the latency number with cxl_dport. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319621931.2212653.6800240203604822886.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/core/core.h | 2 ++ drivers/cxl/core/pci.c | 36 ++++++++++++++++++++++++++++++++++++ drivers/cxl/core/port.c | 6 ++++++ drivers/cxl/cxl.h | 4 ++++ drivers/cxl/cxlpci.h | 13 +++++++++++++ drivers/pci/pci.c | 38 ++++++++++++++++++++++++++++++++++++-- include/linux/pci.h | 1 + 7 files changed, 98 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 86d7ba23235e..3b64fb1b9ed0 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -88,4 +88,6 @@ enum cxl_poison_trace_type { CXL_POISON_TRACE_CLEAR, }; +long cxl_pci_get_latency(struct pci_dev *pdev); + #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 37e1652afbc7..6c9c8d92f8f7 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2021 Intel Corporation. All rights reserved. */ +#include #include #include #include @@ -979,3 +980,38 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, return PCI_ERS_RESULT_NEED_RESET; } EXPORT_SYMBOL_NS_GPL(cxl_error_detected, CXL); + +static int cxl_flit_size(struct pci_dev *pdev) +{ + if (cxl_pci_flit_256(pdev)) + return 256; + + return 68; +} + +/** + * cxl_pci_get_latency - calculate the link latency for the PCIe link + * @pdev: PCI device + * + * return: calculated latency or 0 for no latency + * + * CXL Memory Device SW Guide v1.0 2.11.4 Link latency calculation + * Link latency = LinkPropagationLatency + FlitLatency + RetimerLatency + * LinkProgationLatency is negligible, so 0 will be used + * RetimerLatency is assumed to be negligible and 0 will be used + * FlitLatency = FlitSize / LinkBandwidth + * FlitSize is defined by spec. CXL rev3.0 4.2.1. + * 68B flit is used up to 32GT/s. >32GT/s, 256B flit size is used. + * The FlitLatency is converted to picoseconds. + */ +long cxl_pci_get_latency(struct pci_dev *pdev) +{ + long bw; + + bw = pcie_link_speed_mbps(pdev); + if (bw < 0) + return 0; + bw /= BITS_PER_BYTE; + + return cxl_flit_size(pdev) * MEGA / bw; +} diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 9393cbf04652..b5ad227fe0d8 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -854,6 +854,9 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host, if (rc) return ERR_PTR(rc); + if (parent_dport && dev_is_pci(uport_dev)) + port->pci_latency = cxl_pci_get_latency(to_pci_dev(uport_dev)); + return port; err: @@ -1137,6 +1140,9 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (rc) return ERR_PTR(rc); + if (dev_is_pci(dport_dev)) + dport->link_latency = cxl_pci_get_latency(to_pci_dev(dport_dev)); + return dport; } diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index abbdcd3a7596..7da8db919a20 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -591,6 +591,7 @@ struct cxl_dax_region { * @depth: How deep this port is relative to the root. depth 0 is the root. * @cdat: Cached CDAT data * @cdat_available: Should a CDAT attribute be available in sysfs + * @pci_latency: Upstream latency in picoseconds */ struct cxl_port { struct device dev; @@ -613,6 +614,7 @@ struct cxl_port { size_t length; } cdat; bool cdat_available; + long pci_latency; }; struct cxl_root_ops { @@ -659,6 +661,7 @@ struct cxl_rcrb_info { * @port: reference to cxl_port that contains this downstream port * @regs: Dport parsed register blocks * @sw_coord: access coordinates (performance) for switch from CDAT + * @link_latency: calculated PCIe downstream latency */ struct cxl_dport { struct device *dport_dev; @@ -669,6 +672,7 @@ struct cxl_dport { struct cxl_port *port; struct cxl_regs regs; struct access_coordinate sw_coord; + long link_latency; }; /** diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 0fa4799ea316..711b05d9a370 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -85,6 +85,19 @@ struct cdat_entry_header { __le16 length; } __packed; +/* + * CXL v3.0 6.2.3 Table 6-4 + * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits + * mode, otherwise it's 68B flits mode. + */ +static inline bool cxl_pci_flit_256(struct pci_dev *pdev) +{ + u16 lnksta2; + + pcie_capability_read_word(pdev, PCI_EXP_LNKSTA2, &lnksta2); + return lnksta2 & PCI_EXP_LNKSTA2_FLIT; +} + int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 55bc3576a985..00817d403ad4 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6224,6 +6224,41 @@ int pcie_set_mps(struct pci_dev *dev, int mps) } EXPORT_SYMBOL(pcie_set_mps); +static enum pci_bus_speed to_pcie_link_speed(u16 lnksta) +{ + return pcie_link_speed[FIELD_GET(PCI_EXP_LNKSTA_CLS, lnksta)]; +} + +int pcie_link_speed_mbps(struct pci_dev *pdev) +{ + u16 lnksta; + int err; + + err = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta); + if (err) + return err; + + switch (to_pcie_link_speed(lnksta)) { + case PCIE_SPEED_2_5GT: + return 2500; + case PCIE_SPEED_5_0GT: + return 5000; + case PCIE_SPEED_8_0GT: + return 8000; + case PCIE_SPEED_16_0GT: + return 16000; + case PCIE_SPEED_32_0GT: + return 32000; + case PCIE_SPEED_64_0GT: + return 64000; + default: + break; + } + + return -EINVAL; +} +EXPORT_SYMBOL(pcie_link_speed_mbps); + /** * pcie_bandwidth_available - determine minimum link settings of a PCIe * device and its bandwidth limitation @@ -6257,8 +6292,7 @@ u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, while (dev) { pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); - next_speed = pcie_link_speed[FIELD_GET(PCI_EXP_LNKSTA_CLS, - lnksta)]; + next_speed = to_pcie_link_speed(lnksta); next_width = FIELD_GET(PCI_EXP_LNKSTA_NLW, lnksta); next_bw = next_width * PCIE_SPEED2MBS_ENC(next_speed); diff --git a/include/linux/pci.h b/include/linux/pci.h index dea043bc1e38..504a4ba2c29e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1364,6 +1364,7 @@ int pcie_set_mps(struct pci_dev *dev, int mps); u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, enum pci_bus_speed *speed, enum pcie_link_width *width); +int pcie_link_speed_mbps(struct pci_dev *pdev); void pcie_print_link_status(struct pci_dev *dev); int pcie_reset_flr(struct pci_dev *dev, bool probe); int pcie_flr(struct pci_dev *dev); -- cgit From f2202f990456acc52b50f6b14c95b232ac14429b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:45 -0700 Subject: tools/testing/cxl: Add hostbridge UID string for cxl_test mock hb devices In order to support acpi_device_uid() call, add static string to acpi_device->pnp.unique_id. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319622564.2212653.1534465446670631698.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- tools/testing/cxl/test/cxl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index f4e517a0c774..a3cdbb2be038 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -68,15 +68,19 @@ static struct acpi_device acpi0017_mock; static struct acpi_device host_bridge[NR_BRIDGES] = { [0] = { .handle = &host_bridge[0], + .pnp.unique_id = "0", }, [1] = { .handle = &host_bridge[1], + .pnp.unique_id = "1", }, [2] = { .handle = &host_bridge[2], + .pnp.unique_id = "2", }, [3] = { .handle = &host_bridge[3], + .pnp.unique_id = "3", }, }; -- cgit From 1037b82fccfe9c001ffa7a883651bb4cde7b705c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:51 -0700 Subject: cxl: Store the access coordinates for the generic ports Each CXL host bridge is represented by an ACPI0016 device. A generic port device handle that is an ACPI device is represented by a string of ACPI0016 device HID and UID. Create a device handle from the ACPI device and retrieve the access coordinates from the stored memory targets. The access coordinates are stored under the cxl_dport that is associated with the CXL host bridge. The access coordinates struct is dynamically allocated under cxl_dport in order for code later on to detect whether the data exists or not. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319623196.2212653.17916695743464172534.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 25 +++++++++++++++++++++++++ drivers/cxl/cxl.h | 2 ++ 2 files changed, 27 insertions(+) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 2f7de910ce57..afc712264d1c 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -513,8 +513,29 @@ static int cxl_get_chbs(struct device *dev, struct acpi_device *hb, return 0; } +static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport) +{ + struct acpi_device *hb = to_cxl_host_bridge(NULL, dev); + u32 uid; + int rc; + + if (kstrtou32(acpi_device_uid(hb), 0, &uid)) + return -EINVAL; + + rc = acpi_get_genport_coordinates(uid, &dport->hb_coord); + if (rc < 0) + return rc; + + /* Adjust back to picoseconds from nanoseconds */ + dport->hb_coord.read_latency *= 1000; + dport->hb_coord.write_latency *= 1000; + + return 0; +} + static int add_host_bridge_dport(struct device *match, void *arg) { + int ret; acpi_status rc; struct device *bridge; struct cxl_dport *dport; @@ -564,6 +585,10 @@ static int add_host_bridge_dport(struct device *match, void *arg) if (IS_ERR(dport)) return PTR_ERR(dport); + ret = get_genport_coordinates(match, dport); + if (ret) + dev_dbg(match, "Failed to get generic port perf coordinates.\n"); + return 0; } diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 7da8db919a20..dd234f3b9ed4 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -661,6 +661,7 @@ struct cxl_rcrb_info { * @port: reference to cxl_port that contains this downstream port * @regs: Dport parsed register blocks * @sw_coord: access coordinates (performance) for switch from CDAT + * @hb_coord: access coordinates (performance) from ACPI generic port (host bridge) * @link_latency: calculated PCIe downstream latency */ struct cxl_dport { @@ -672,6 +673,7 @@ struct cxl_dport { struct cxl_port *port; struct cxl_regs regs; struct access_coordinate sw_coord; + struct access_coordinate hb_coord; long link_latency; }; -- cgit From 14a6960b3e928ccea22d687fb0626237885a20bd Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:58 -0700 Subject: cxl: Add helper function that calculate performance data for downstream ports The CDAT information from the switch, Switch Scoped Latency and Bandwidth Information Structure (SSLBIS), is parsed and stored under a cxl_dport based on the correlated downstream port id from the SSLBIS entry. Walk the entire CXL port paths and collect all the performance data. Also pick up the link latency number that's stored under the dports. The entire path PCIe bandwidth can be retrieved using the pcie_bandwidth_available() call. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319623824.2212653.10302079766473698427.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 3 ++ 2 files changed, 78 insertions(+) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index b5ad227fe0d8..8c00fd6be730 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -2094,6 +2095,80 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd) } EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL); +static void combine_coordinates(struct access_coordinate *c1, + struct access_coordinate *c2) +{ + if (c2->write_bandwidth) + c1->write_bandwidth = min(c1->write_bandwidth, + c2->write_bandwidth); + c1->write_latency += c2->write_latency; + + if (c2->read_bandwidth) + c1->read_bandwidth = min(c1->read_bandwidth, + c2->read_bandwidth); + c1->read_latency += c2->read_latency; +} + +/** + * cxl_endpoint_get_perf_coordinates - Retrieve performance numbers stored in dports + * of CXL path + * @port: endpoint cxl_port + * @coord: output performance data + * + * Return: errno on failure, 0 on success. + */ +int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, + struct access_coordinate *coord) +{ + struct access_coordinate c = { + .read_bandwidth = UINT_MAX, + .write_bandwidth = UINT_MAX, + }; + struct cxl_port *iter = port; + struct cxl_dport *dport; + struct pci_dev *pdev; + unsigned int bw; + + if (!is_cxl_endpoint(port)) + return -EINVAL; + + dport = iter->parent_dport; + + /* + * Exit the loop when the parent port of the current port is cxl root. + * The iterative loop starts at the endpoint and gathers the + * latency of the CXL link from the current iter to the next downstream + * port each iteration. If the parent is cxl root then there is + * nothing to gather. + */ + while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) { + combine_coordinates(&c, &dport->sw_coord); + c.write_latency += dport->link_latency; + c.read_latency += dport->link_latency; + + iter = to_cxl_port(iter->dev.parent); + dport = iter->parent_dport; + } + + /* Augment with the generic port (host bridge) perf data */ + combine_coordinates(&c, &dport->hb_coord); + + /* Get the calculated PCI paths bandwidth */ + pdev = to_pci_dev(port->uport_dev->parent); + bw = pcie_bandwidth_available(pdev, NULL, NULL, NULL); + if (bw == 0) + return -ENXIO; + bw /= BITS_PER_BYTE; + + c.write_bandwidth = min(c.write_bandwidth, bw); + c.read_bandwidth = min(c.read_bandwidth, bw); + + *coord = c; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_endpoint_get_perf_coordinates, CXL); + /* for user tooling to ensure port disable work has completed */ static ssize_t flush_store(const struct bus_type *bus, const char *buf, size_t count) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index dd234f3b9ed4..492dbf63935f 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -876,6 +876,9 @@ static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev) void cxl_endpoint_parse_cdat(struct cxl_port *port); void cxl_switch_parse_cdat(struct cxl_port *port); +int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, + struct access_coordinate *coord); + /* * Unit test builds overrides this to __weak, find the 'strong' version * of these symbols in tools/testing/cxl/. -- cgit From 7a4f148dd8d518bc1e012aa738b0ed6244959293 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:04:04 -0700 Subject: cxl: Compute the entire CXL path latency and bandwidth data CXL Memory Device SW Guide [1] rev1.0 2.11.2 provides instruction on how to calculate latency and bandwidth for CXL memory device. Calculate minimum bandwidth and total latency for the path from the CXL device to the root port. The QTG id is retrieved by providing the performance data as input and calling the root port callback ->get_qos_class(). The retrieved id is stored with the cxl_port of the CXL device. For example for a device that is directly attached to a host bus: Total Latency = Device Latency (from CDAT) + Dev to Host Bus (HB) Link Latency + Generic Port Latency Min Bandwidth = Min bandwidth for link bandwidth between HB and CXL device, device CDAT bandwidth, and Generic Port Bandwidth For a device that has a switch in between host bus and CXL device: Total Latency = Device (CDAT) Latency + Dev to Switch Link Latency + Switch (CDAT) Latency + Switch to HB Link Latency + Generic Port Latency Min Bandwidth = Min bandwidth for link bandwidth between CXL device to CXL switch, CXL device CDAT bandwidth, CXL switch CDAT bandwidth, CXL switch to HB bandwidth, and Generic Port Bandwidth. [1]: https://cdrdv2-public.intel.com/643805/643805_CXL%20Memory%20Device%20SW%20Guide_Rev1p0.pdf Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319624458.2212653.13252496567443656371.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/core/cdat.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index b3ab47d250e1..43dfef80fb84 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -12,6 +12,9 @@ struct dsmas_entry { struct range dpa_range; u8 handle; struct access_coordinate coord; + + int entries; + int qos_class; }; static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg, @@ -154,6 +157,55 @@ static int cxl_cdat_endpoint_process(struct cxl_port *port, return cdat_table_parse_output(rc); } +static int cxl_port_perf_data_calculate(struct cxl_port *port, + struct xarray *dsmas_xa) +{ + struct access_coordinate c; + struct cxl_port *root_port; + struct cxl_root *cxl_root; + struct dsmas_entry *dent; + int valid_entries = 0; + unsigned long index; + int rc; + + rc = cxl_endpoint_get_perf_coordinates(port, &c); + if (rc) { + dev_dbg(&port->dev, "Failed to retrieve perf coordinates.\n"); + return rc; + } + + root_port = find_cxl_root(port); + cxl_root = to_cxl_root(root_port); + if (!cxl_root->ops || !cxl_root->ops->qos_class) + return -EOPNOTSUPP; + + xa_for_each(dsmas_xa, index, dent) { + int qos_class; + + dent->coord.read_latency = dent->coord.read_latency + + c.read_latency; + dent->coord.write_latency = dent->coord.write_latency + + c.write_latency; + dent->coord.read_bandwidth = min_t(int, c.read_bandwidth, + dent->coord.read_bandwidth); + dent->coord.write_bandwidth = min_t(int, c.write_bandwidth, + dent->coord.write_bandwidth); + + dent->entries = 1; + rc = cxl_root->ops->qos_class(root_port, &dent->coord, 1, &qos_class); + if (rc != 1) + continue; + + valid_entries++; + dent->qos_class = qos_class; + } + + if (!valid_entries) + return -ENOENT; + + return 0; +} + static void discard_dsmas(struct xarray *xa) { unsigned long index; @@ -183,7 +235,12 @@ void cxl_endpoint_parse_cdat(struct cxl_port *port) return; } - /* Performance data processing */ + rc = cxl_port_perf_data_calculate(port, dsmas_xa); + if (rc) { + dev_dbg(&port->dev, "Failed to do perf coord calculations.\n"); + return; + } + } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL); -- cgit From 86557b7edf77d2a3835136c325c8baa6fe803234 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:04:11 -0700 Subject: cxl: Store QTG IDs and related info to the CXL memory device context Once the QTG ID _DSM is executed successfully, the QTG ID is retrieved from the return package. Create a list of entries in the cxl_memdev context and store the QTG ID as qos_class token and the associated DPA range. This information can be exposed to user space via sysfs in order to help region setup for hot-plugged CXL memory devices. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319625109.2212653.11872111896220384056.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/core/cdat.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/core/mbox.c | 2 ++ drivers/cxl/cxlmem.h | 21 +++++++++++++++ 3 files changed, 92 insertions(+) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 43dfef80fb84..6189d967f399 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -6,6 +6,7 @@ #include #include #include "cxlpci.h" +#include "cxlmem.h" #include "cxl.h" struct dsmas_entry { @@ -206,6 +207,71 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port, return 0; } +static void add_perf_entry(struct device *dev, struct dsmas_entry *dent, + struct list_head *list) +{ + struct cxl_dpa_perf *dpa_perf; + + dpa_perf = kzalloc(sizeof(*dpa_perf), GFP_KERNEL); + if (!dpa_perf) + return; + + dpa_perf->dpa_range = dent->dpa_range; + dpa_perf->coord = dent->coord; + dpa_perf->qos_class = dent->qos_class; + list_add_tail(&dpa_perf->list, list); + dev_dbg(dev, + "DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n", + dent->dpa_range.start, dpa_perf->qos_class, + dent->coord.read_bandwidth, dent->coord.write_bandwidth, + dent->coord.read_latency, dent->coord.write_latency); +} + +static void free_perf_ents(void *data) +{ + struct cxl_memdev_state *mds = data; + struct cxl_dpa_perf *dpa_perf, *n; + LIST_HEAD(discard); + + list_splice_tail_init(&mds->ram_perf_list, &discard); + list_splice_tail_init(&mds->pmem_perf_list, &discard); + list_for_each_entry_safe(dpa_perf, n, &discard, list) { + list_del(&dpa_perf->list); + kfree(dpa_perf); + } +} + +static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, + struct xarray *dsmas_xa) +{ + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct device *dev = cxlds->dev; + struct range pmem_range = { + .start = cxlds->pmem_res.start, + .end = cxlds->pmem_res.end, + }; + struct range ram_range = { + .start = cxlds->ram_res.start, + .end = cxlds->ram_res.end, + }; + struct dsmas_entry *dent; + unsigned long index; + + xa_for_each(dsmas_xa, index, dent) { + if (resource_size(&cxlds->ram_res) && + range_contains(&ram_range, &dent->dpa_range)) + add_perf_entry(dev, dent, &mds->ram_perf_list); + else if (resource_size(&cxlds->pmem_res) && + range_contains(&pmem_range, &dent->dpa_range)) + add_perf_entry(dev, dent, &mds->pmem_perf_list); + else + dev_dbg(dev, "no partition for dsmas dpa: %#llx\n", + dent->dpa_range.start); + } + + devm_add_action_or_reset(&cxlds->cxlmd->dev, free_perf_ents, mds); +} + static void discard_dsmas(struct xarray *xa) { unsigned long index; @@ -221,6 +287,8 @@ DEFINE_FREE(dsmas, struct xarray *, if (_T) discard_dsmas(_T)) void cxl_endpoint_parse_cdat(struct cxl_port *port) { + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; struct xarray __dsmas_xa; struct xarray *dsmas_xa __free(dsmas) = &__dsmas_xa; int rc; @@ -241,6 +309,7 @@ void cxl_endpoint_parse_cdat(struct cxl_port *port) return; } + cxl_memdev_set_qos_class(cxlds, dsmas_xa); } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL); diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 36270dcfb42e..fbaa508ab245 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1404,6 +1404,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mds->cxlds.reg_map.host = dev; mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; + INIT_LIST_HEAD(&mds->ram_perf_list); + INIT_LIST_HEAD(&mds->pmem_perf_list); return mds; } diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index a2fcbca253f3..205bc2a016b2 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "cxl.h" /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */ @@ -391,6 +392,20 @@ enum cxl_devtype { CXL_DEVTYPE_CLASSMEM, }; +/** + * struct cxl_dpa_perf - DPA performance property entry + * @list - list entry + * @dpa_range - range for DPA address + * @coord - QoS performance data (i.e. latency, bandwidth) + * @qos_class - QoS Class cookies + */ +struct cxl_dpa_perf { + struct list_head list; + struct range dpa_range; + struct access_coordinate coord; + int qos_class; +}; + /** * struct cxl_dev_state - The driver device state * @@ -455,6 +470,8 @@ struct cxl_dev_state { * @security: security driver state info * @fw: firmware upload / activation state * @mbox_send: @dev specific transport for transmitting mailbox commands + * @ram_perf_list: performance data entries matched to RAM + * @pmem_perf_list: performance data entries matched to PMEM * * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -475,6 +492,10 @@ struct cxl_memdev_state { u64 active_persistent_bytes; u64 next_volatile_bytes; u64 next_persistent_bytes; + + struct list_head ram_perf_list; + struct list_head pmem_perf_list; + struct cxl_event_state event; struct cxl_poison_state poison; struct cxl_security_state security; -- cgit From 42834b17cf1f00fa79ff1f02134f9c576a125252 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:04:16 -0700 Subject: cxl: Export sysfs attributes for memory device QoS class Export qos_class sysfs attributes for the CXL memory device. The QoS clas should show up as /sys/bus/cxl/devices/memX/ram/qos_class for the volatile partition and /sys/bus/cxl/devices/memX/pmem/qos_class for the persistent partition. The QTG ID is retrieved via _DSM after supplying the calculated bandwidth and latency for the entire CXL path from device to the CPU. This ID is used to match up to the root decoder QoS class to determine which CFMWS the memory range of a hotplugged CXL mem device should be assigned under. While there may be multiple DSMAS exported by the device CDAT, the driver will only expose the first QTG ID per partition in sysfs for now. In the future when multiple QTG IDs are necessary, they can be exposed. [1] [1]: https://lore.kernel.org/linux-cxl/167571650007.587790.10040913293130712882.stgit@djiang5-mobl3.local/T/#md2a47b1ead3e1ba08f50eab29a4af1aed1d215ab Suggested-by: Dan Williams Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319625698.2212653.17544381274847420961.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- Documentation/ABI/testing/sysfs-bus-cxl | 34 +++++++++++++++++ drivers/cxl/mem.c | 67 ++++++++++++++++++++++++++++++--- 2 files changed, 95 insertions(+), 6 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index e76c3600607f..fff2581b8033 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -28,6 +28,23 @@ Description: Payload in the CXL-2.0 specification. +What: /sys/bus/cxl/devices/memX/ram/qos_class +Date: May, 2023 +KernelVersion: v6.8 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) For CXL host platforms that support "QoS Telemmetry" + this attribute conveys a comma delimited list of platform + specific cookies that identifies a QoS performance class + for the volatile partition of the CXL mem device. These + class-ids can be compared against a similar "qos_class" + published for a root decoder. While it is not required + that the endpoints map their local memory-class to a + matching platform class, mismatches are not recommended + and there are platform specific performance related + side-effects that may result. First class-id is displayed. + + What: /sys/bus/cxl/devices/memX/pmem/size Date: December, 2020 KernelVersion: v5.12 @@ -38,6 +55,23 @@ Description: Payload in the CXL-2.0 specification. +What: /sys/bus/cxl/devices/memX/pmem/qos_class +Date: May, 2023 +KernelVersion: v6.8 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) For CXL host platforms that support "QoS Telemmetry" + this attribute conveys a comma delimited list of platform + specific cookies that identifies a QoS performance class + for the persistent partition of the CXL mem device. These + class-ids can be compared against a similar "qos_class" + published for a root decoder. While it is not required + that the endpoints map their local memory-class to a + matching platform class, mismatches are not recommended + and there are platform specific performance related + side-effects that may result. First class-id is displayed. + + What: /sys/bus/cxl/devices/memX/serial Date: January, 2022 KernelVersion: v5.18 diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index e087febf9af0..c5c9d8e0d88d 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -215,23 +215,78 @@ static ssize_t trigger_poison_list_store(struct device *dev, } static DEVICE_ATTR_WO(trigger_poison_list); +static ssize_t ram_qos_class_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct cxl_dpa_perf *dpa_perf; + + if (!dev->driver) + return -ENOENT; + + if (list_empty(&mds->ram_perf_list)) + return -ENOENT; + + dpa_perf = list_first_entry(&mds->ram_perf_list, struct cxl_dpa_perf, + list); + + return sysfs_emit(buf, "%d\n", dpa_perf->qos_class); +} + +static struct device_attribute dev_attr_ram_qos_class = + __ATTR(qos_class, 0444, ram_qos_class_show, NULL); + +static ssize_t pmem_qos_class_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct cxl_dpa_perf *dpa_perf; + + if (!dev->driver) + return -ENOENT; + + if (list_empty(&mds->pmem_perf_list)) + return -ENOENT; + + dpa_perf = list_first_entry(&mds->pmem_perf_list, struct cxl_dpa_perf, + list); + + return sysfs_emit(buf, "%d\n", dpa_perf->qos_class); +} + +static struct device_attribute dev_attr_pmem_qos_class = + __ATTR(qos_class, 0444, pmem_qos_class_show, NULL); + static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) { - if (a == &dev_attr_trigger_poison_list.attr) { - struct device *dev = kobj_to_dev(kobj); - struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = - to_cxl_memdev_state(cxlmd->cxlds); + struct device *dev = kobj_to_dev(kobj); + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + if (a == &dev_attr_trigger_poison_list.attr) if (!test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds)) return 0; - } + + if (a == &dev_attr_pmem_qos_class.attr) + if (list_empty(&mds->pmem_perf_list)) + return 0; + + if (a == &dev_attr_ram_qos_class.attr) + if (list_empty(&mds->ram_perf_list)) + return 0; + return a->mode; } static struct attribute *cxl_mem_attrs[] = { &dev_attr_trigger_poison_list.attr, + &dev_attr_ram_qos_class.attr, + &dev_attr_pmem_qos_class.attr, NULL }; -- cgit From 185c1a489f873cb71520fc089401e02dbf302dcd Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:04:23 -0700 Subject: cxl: Check qos_class validity on memdev probe Add a check to make sure the qos_class for the device will match one of the root decoders qos_class. If no match is found, then the qos_class for the device is set to invalid. Also add a check to ensure that the device's host bridge matches to one of the root decoder's downstream targets. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319626313.2212653.9021004640856081917.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/core/cdat.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 6189d967f399..cd84d87f597a 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -272,6 +272,108 @@ static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, devm_add_action_or_reset(&cxlds->cxlmd->dev, free_perf_ents, mds); } +static int match_cxlrd_qos_class(struct device *dev, void *data) +{ + int dev_qos_class = *(int *)data; + struct cxl_root_decoder *cxlrd; + + if (!is_root_decoder(dev)) + return 0; + + cxlrd = to_cxl_root_decoder(dev); + if (cxlrd->qos_class == CXL_QOS_CLASS_INVALID) + return 0; + + if (cxlrd->qos_class == dev_qos_class) + return 1; + + return 0; +} + +static void cxl_qos_match(struct cxl_port *root_port, + struct list_head *work_list, + struct list_head *discard_list) +{ + struct cxl_dpa_perf *dpa_perf, *n; + + list_for_each_entry_safe(dpa_perf, n, work_list, list) { + int rc; + + if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID) + return; + + rc = device_for_each_child(&root_port->dev, + (void *)&dpa_perf->qos_class, + match_cxlrd_qos_class); + if (!rc) + list_move_tail(&dpa_perf->list, discard_list); + } +} + +static int match_cxlrd_hb(struct device *dev, void *data) +{ + struct device *host_bridge = data; + struct cxl_switch_decoder *cxlsd; + struct cxl_root_decoder *cxlrd; + unsigned int seq; + + if (!is_root_decoder(dev)) + return 0; + + cxlrd = to_cxl_root_decoder(dev); + cxlsd = &cxlrd->cxlsd; + + do { + seq = read_seqbegin(&cxlsd->target_lock); + for (int i = 0; i < cxlsd->nr_targets; i++) { + if (host_bridge == cxlsd->target[i]->dport_dev) + return 1; + } + } while (read_seqretry(&cxlsd->target_lock, seq)); + + return 0; +} + +static void discard_dpa_perf(struct list_head *list) +{ + struct cxl_dpa_perf *dpa_perf, *n; + + list_for_each_entry_safe(dpa_perf, n, list, list) { + list_del(&dpa_perf->list); + kfree(dpa_perf); + } +} +DEFINE_FREE(dpa_perf, struct list_head *, if (!list_empty(_T)) discard_dpa_perf(_T)) + +static int cxl_qos_class_verify(struct cxl_memdev *cxlmd) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct cxl_port *root_port __free(put_device) = NULL; + LIST_HEAD(__discard); + struct list_head *discard __free(dpa_perf) = &__discard; + int rc; + + root_port = find_cxl_root(cxlmd->endpoint); + if (!root_port) + return -ENODEV; + + /* Check that the QTG IDs are all sane between end device and root decoders */ + cxl_qos_match(root_port, &mds->ram_perf_list, discard); + cxl_qos_match(root_port, &mds->pmem_perf_list, discard); + + /* Check to make sure that the device's host bridge is under a root decoder */ + rc = device_for_each_child(&root_port->dev, + (void *)cxlmd->endpoint->host_bridge, + match_cxlrd_hb); + if (!rc) { + list_splice_tail_init(&mds->ram_perf_list, discard); + list_splice_tail_init(&mds->pmem_perf_list, discard); + } + + return rc; +} + static void discard_dsmas(struct xarray *xa) { unsigned long index; @@ -310,6 +412,7 @@ void cxl_endpoint_parse_cdat(struct cxl_port *port) } cxl_memdev_set_qos_class(cxlds, dsmas_xa); + cxl_qos_class_verify(cxlmd); } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL); -- cgit