From 024b32db43a359e0ded3fcc6cd86247cbbed4224 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 21 Dec 2023 13:55:48 -0800 Subject: [PATCH 001/347] drm/bridge: parade-ps8640: Wait for HPD when doing an AUX transfer Unlike what is claimed in commit f5aa7d46b0ee ("drm/bridge: parade-ps8640: Provide wait_hpd_asserted() in struct drm_dp_aux"), if someone manually tries to do an AUX transfer (like via `i2cdump ${bus} 0x50 i`) while the panel is off we don't just get a simple transfer error. Instead, the whole ps8640 gets thrown for a loop and goes into a bad state. Let's put the function to wait for the HPD (and the magical 50 ms after first reset) back in when we're doing an AUX transfer. This shouldn't actually make things much slower (assuming the panel is on) because we should immediately poll and see the HPD high. Mostly this is just an extra i2c transfer to the bridge. Fixes: f5aa7d46b0ee ("drm/bridge: parade-ps8640: Provide wait_hpd_asserted() in struct drm_dp_aux") Tested-by: Pin-yen Lin Reviewed-by: Pin-yen Lin Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231221135548.1.I10f326a9305d57ad32cee7f8d9c60518c8be20fb@changeid --- drivers/gpu/drm/bridge/parade-ps8640.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index 541e4f5afc4c..fb5e9ae9ad81 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -346,6 +346,11 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, int ret; pm_runtime_get_sync(dev); + ret = _ps8640_wait_hpd_asserted(ps_bridge, 200 * 1000); + if (ret) { + pm_runtime_put_sync_suspend(dev); + return ret; + } ret = ps8640_aux_transfer_msg(aux, msg); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); From 571c7ed0baa928447bac29ef79a90bd6525d1ebc Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 13 Dec 2023 16:42:01 -0600 Subject: [PATCH 002/347] dt-bindings: display: samsung,exynos-mixer: Fix 'regs' typo The correct property name is 'reg' not 'regs'. Fixes: 68e89bb36d58 ("dt-bindings: display: samsung,exynos-mixer: convert to dtschema") Signed-off-by: Rob Herring Signed-off-by: Inki Dae --- .../bindings/display/samsung/samsung,exynos-mixer.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml b/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml index 25d53fde92e1..597c9cc6a312 100644 --- a/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml +++ b/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml @@ -85,7 +85,7 @@ allOf: clocks: minItems: 6 maxItems: 6 - regs: + reg: minItems: 2 maxItems: 2 @@ -99,7 +99,7 @@ allOf: clocks: minItems: 4 maxItems: 4 - regs: + reg: minItems: 2 maxItems: 2 @@ -116,7 +116,7 @@ allOf: clocks: minItems: 3 maxItems: 3 - regs: + reg: minItems: 1 maxItems: 1 From 2ad62d16cd24b5e2f18318e97e1f06bef9f1ce7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Tue, 9 Jan 2024 11:28:24 -0300 Subject: [PATCH 003/347] drm/v3d: Free the job and assign it to NULL if initialization fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, if `v3d_job_init()` fails (e.g. in the IGT test "bad-in-sync", where we submit an invalid in-sync to the IOCTL), then we end up with the following NULL pointer dereference: [ 34.146279] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000078 [ 34.146301] Mem abort info: [ 34.146306] ESR = 0x0000000096000005 [ 34.146315] EC = 0x25: DABT (current EL), IL = 32 bits [ 34.146322] SET = 0, FnV = 0 [ 34.146328] EA = 0, S1PTW = 0 [ 34.146334] FSC = 0x05: level 1 translation fault [ 34.146340] Data abort info: [ 34.146345] ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 [ 34.146351] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 34.146357] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 34.146366] user pgtable: 4k pages, 39-bit VAs, pgdp=00000001232e6000 [ 34.146375] [0000000000000078] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 [ 34.146399] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP [ 34.146406] Modules linked in: rfcomm snd_seq_dummy snd_hrtimer snd_seq snd_seq_device algif_hash aes_neon_bs aes_neon_blk algif_skcipher af_alg bnep hid_logitech_hidpp brcmfmac_wcc brcmfmac brcmutil hci_uart vc4 btbcm cfg80211 bluetooth bcm2835_v4l2(C) snd_soc_hdmi_codec binfmt_misc cec drm_display_helper hid_logitech_dj bcm2835_mmal_vchiq(C) drm_dma_helper drm_kms_helper videobuf2_v4l2 raspberrypi_hwmon ecdh_generic videobuf2_vmalloc videobuf2_memops ecc videobuf2_common rfkill videodev libaes snd_soc_core dwc2 i2c_brcmstb snd_pcm_dmaengine snd_bcm2835(C) i2c_bcm2835 pwm_bcm2835 snd_pcm mc v3d snd_timer snd gpu_sched drm_shmem_helper nvmem_rmem uio_pdrv_genirq uio i2c_dev drm fuse dm_mod drm_panel_orientation_quirks backlight configfs ip_tables x_tables ipv6 [ 34.146556] CPU: 1 PID: 1890 Comm: v3d_submit_csd Tainted: G C 6.7.0-rc3-g49ddab089611 #68 [ 34.146563] Hardware name: Raspberry Pi 4 Model B Rev 1.5 (DT) [ 34.146569] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 34.146575] pc : drm_sched_job_cleanup+0x3c/0x190 [gpu_sched] [ 34.146611] lr : v3d_submit_csd_ioctl+0x1b4/0x460 [v3d] [ 34.146653] sp : ffffffc083cbbb80 [ 34.146658] x29: ffffffc083cbbb90 x28: ffffff81035afc00 x27: ffffffe77a641168 [ 34.146668] x26: ffffff81056a8000 x25: 0000000000000058 x24: 0000000000000000 [ 34.146677] x23: ffffff81065e2000 x22: ffffff81035afe00 x21: ffffffc083cbbcf0 [ 34.146686] x20: ffffff81035afe00 x19: 00000000ffffffea x18: 0000000000000000 [ 34.146694] x17: 0000000000000000 x16: ffffffe7989e34b0 x15: 0000000000000000 [ 34.146703] x14: 0000000004000004 x13: ffffff81035afe80 x12: ffffffc083cb8000 [ 34.146711] x11: cc57e05dfbe5ef00 x10: cc57e05dfbe5ef00 x9 : ffffffe77a64131c [ 34.146719] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 000000000000003f [ 34.146727] x5 : 0000000000000040 x4 : ffffff81fefb03f0 x3 : ffffffc083cbba40 [ 34.146736] x2 : ffffff81056a8000 x1 : ffffffe7989e35e8 x0 : 0000000000000000 [ 34.146745] Call trace: [ 34.146748] drm_sched_job_cleanup+0x3c/0x190 [gpu_sched] [ 34.146768] v3d_submit_csd_ioctl+0x1b4/0x460 [v3d] [ 34.146791] drm_ioctl_kernel+0xe0/0x120 [drm] [ 34.147029] drm_ioctl+0x264/0x408 [drm] [ 34.147135] __arm64_sys_ioctl+0x9c/0xe0 [ 34.147152] invoke_syscall+0x4c/0x118 [ 34.147162] el0_svc_common+0xb8/0xf0 [ 34.147168] do_el0_svc+0x28/0x40 [ 34.147174] el0_svc+0x38/0x88 [ 34.147184] el0t_64_sync_handler+0x84/0x100 [ 34.147191] el0t_64_sync+0x190/0x198 [ 34.147201] Code: aa0003f4 f90007e8 f9401008 aa0803e0 (b8478c09) [ 34.147210] ---[ end trace 0000000000000000 ]--- This happens because we are calling `drm_sched_job_cleanup()` twice: once at `v3d_job_init()` and again when we call `v3d_job_cleanup()`. To mitigate this issue, we can return to the same approach that we used to use before 464c61e76de8: deallocate the job after `v3d_job_init()` fails and assign it to NULL. Then, when we call `v3d_job_cleanup()`, job is NULL and the function returns. Fixes: 464c61e76de8 ("drm/v3d: Decouple job allocation from job initiation") Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240109142857.1122704-1-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_submit.c | 35 +++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index fcff41dd2315..88f63d526b22 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -147,6 +147,13 @@ v3d_job_allocate(void **container, size_t size) return 0; } +static void +v3d_job_deallocate(void **container) +{ + kfree(*container); + *container = NULL; +} + static int v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, struct v3d_job *job, void (*free)(struct kref *ref), @@ -273,8 +280,10 @@ v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv, ret = v3d_job_init(v3d, file_priv, &(*job)->base, v3d_job_free, args->in_sync, se, V3D_CSD); - if (ret) + if (ret) { + v3d_job_deallocate((void *)job); return ret; + } ret = v3d_job_allocate((void *)clean_job, sizeof(**clean_job)); if (ret) @@ -282,8 +291,10 @@ v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv, ret = v3d_job_init(v3d, file_priv, *clean_job, v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); - if (ret) + if (ret) { + v3d_job_deallocate((void *)clean_job); return ret; + } (*job)->args = *args; @@ -860,8 +871,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, &render->base, v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&render); goto fail; + } render->start = args->rcl_start; render->end = args->rcl_end; @@ -874,8 +887,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, &bin->base, v3d_job_free, args->in_sync_bcl, &se, V3D_BIN); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&bin); goto fail; + } bin->start = args->bcl_start; bin->end = args->bcl_end; @@ -892,8 +907,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&clean_job); goto fail; + } last_job = clean_job; } else { @@ -1015,8 +1032,10 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, &job->base, v3d_job_free, args->in_sync, &se, V3D_TFU); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&job); goto fail; + } job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), sizeof(*job->base.bo), GFP_KERNEL); @@ -1233,8 +1252,10 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, &cpu_job->base, v3d_job_free, 0, &se, V3D_CPU); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&cpu_job); goto fail; + } clean_job = cpu_job->indirect_csd.clean_job; csd_job = cpu_job->indirect_csd.job; From 45dd7df26cee741b31c25ffdd44fb8794eb45ccd Mon Sep 17 00:00:00 2001 From: Markus Niebel Date: Thu, 12 Oct 2023 10:42:08 +0200 Subject: [PATCH 004/347] drm: panel-simple: add missing bus flags for Tianma tm070jvhg[30/33] The DE signal is active high on this display, fill in the missing bus_flags. This aligns panel_desc with its display_timing. Fixes: 9a2654c0f62a ("drm/panel: Add and fill drm_panel type field") Fixes: b3bfcdf8a3b6 ("drm/panel: simple: add Tianma TM070JVHG33") Signed-off-by: Markus Niebel Signed-off-by: Alexander Stein Reviewed-by: Sam Ravnborg Link: https://lore.kernel.org/r/20231012084208.2731650-1-alexander.stein@ew.tq-group.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231012084208.2731650-1-alexander.stein@ew.tq-group.com --- drivers/gpu/drm/panel/panel-simple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 9367a4572dcf..1ba633fd5696 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -3861,6 +3861,7 @@ static const struct panel_desc tianma_tm070jdhg30 = { }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, .connector_type = DRM_MODE_CONNECTOR_LVDS, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, }; static const struct panel_desc tianma_tm070jvhg33 = { @@ -3873,6 +3874,7 @@ static const struct panel_desc tianma_tm070jvhg33 = { }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, .connector_type = DRM_MODE_CONNECTOR_LVDS, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, }; static const struct display_timing tianma_tm070rvhg71_timing = { From 62b143b5ec4a14e1ae0dede5aabaf1832e3b0073 Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Fri, 5 Jan 2024 07:53:02 +0100 Subject: [PATCH 005/347] drm/panel: samsung-s6d7aa0: drop DRM_BUS_FLAG_DE_HIGH for lsl080al02 It turns out that I had misconfigured the device I was using the panel with; the bus data polarity is not high for this panel, I had to change the config on the display controller's side. Fix the panel config to properly reflect its accurate settings. Fixes: 6810bb390282 ("drm/panel: Add Samsung S6D7AA0 panel controller driver") Reviewed-by: Jessica Zhang Signed-off-by: Artur Weber Link: https://lore.kernel.org/r/20240105-tab3-display-fixes-v2-2-904d1207bf6f@gmail.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240105-tab3-display-fixes-v2-2-904d1207bf6f@gmail.com --- drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c index ea5a85779382..f23d8832a1ad 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c @@ -309,7 +309,7 @@ static const struct s6d7aa0_panel_desc s6d7aa0_lsl080al02_desc = { .off_func = s6d7aa0_lsl080al02_off, .drm_mode = &s6d7aa0_lsl080al02_mode, .mode_flags = MIPI_DSI_MODE_VSYNC_FLUSH | MIPI_DSI_MODE_VIDEO_NO_HFP, - .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .bus_flags = 0, .has_backlight = false, .use_passwd3 = false, From 589830b13ac21bddf99b9bc5a4ec17813d0869ef Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 23 Oct 2023 13:55:58 +0200 Subject: [PATCH 006/347] drm/panel/raydium-rm692e5: select CONFIG_DRM_DISPLAY_DP_HELPER As with several other panel drivers, this fails to link without the DP helper library: ld: drivers/gpu/drm/panel/panel-raydium-rm692e5.o: in function `rm692e5_prepare': panel-raydium-rm692e5.c:(.text+0x11f4): undefined reference to `drm_dsc_pps_payload_pack' Select the same symbols that the others already use. Fixes: 988d0ff29ecf7 ("drm/panel: Add driver for BOE RM692E5 AMOLED panel") Signed-off-by: Arnd Bergmann Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20231023115619.3551348-1-arnd@kernel.org Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231023115619.3551348-1-arnd@kernel.org --- drivers/gpu/drm/panel/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index 99e14dc212ec..a4ac4b47777f 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -530,6 +530,8 @@ config DRM_PANEL_RAYDIUM_RM692E5 depends on OF depends on DRM_MIPI_DSI depends on BACKLIGHT_CLASS_DEVICE + select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER help Say Y here if you want to enable support for Raydium RM692E5-based display panels, such as the one found in the Fairphone 5 smartphone. From a9668169961106f3598384fe95004106ec191201 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Jan 2024 17:34:14 +0300 Subject: [PATCH 007/347] HID: hid-steam: remove pointless error message This error message doesn't really add any information. If modprobe fails then the user will already know what the error code is. In the case of kmalloc() it's a style violation to print an error message for that because kmalloc has it's own better error messages built in. Signed-off-by: Dan Carpenter Reviewed-by: Vicki Pfau Link: https://lore.kernel.org/r/305898fb-6bd4-4749-806c-05ec51bbeb80@moroto.mountain Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-steam.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index b3c4e50e248a..59df6ead7b54 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1109,10 +1109,9 @@ static int steam_probe(struct hid_device *hdev, return hid_hw_start(hdev, HID_CONNECT_DEFAULT); steam = devm_kzalloc(&hdev->dev, sizeof(*steam), GFP_KERNEL); - if (!steam) { - ret = -ENOMEM; - goto steam_alloc_fail; - } + if (!steam) + return -ENOMEM; + steam->hdev = hdev; hid_set_drvdata(hdev, steam); spin_lock_init(&steam->lock); @@ -1179,9 +1178,6 @@ hid_hw_start_fail: cancel_work_sync(&steam->work_connect); cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->rumble_work); -steam_alloc_fail: - hid_err(hdev, "%s: failed with error %d\n", - __func__, ret); return ret; } From a9f1da09c69f13ef471db8b22107a28042d230ca Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Jan 2024 17:35:06 +0300 Subject: [PATCH 008/347] HID: hid-steam: Fix cleanup in probe() There are a number of issues in this code. First of all if steam_create_client_hid() fails then it leads to an error pointer dereference when we call hid_destroy_device(steam->client_hdev). Also there are a number of leaks. hid_hw_stop() is not called if hid_hw_open() fails for example. And it doesn't call steam_unregister() or hid_hw_close(). Fixes: 691ead124a0c ("HID: hid-steam: Clean up locking") Signed-off-by: Dan Carpenter Reviewed-by: Vicki Pfau Link: https://lore.kernel.org/r/1fd87904-dabf-4879-bb89-72d13ebfc91e@moroto.mountain Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-steam.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index 59df6ead7b54..b08a5ab58528 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1128,14 +1128,14 @@ static int steam_probe(struct hid_device *hdev, */ ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_HIDRAW); if (ret) - goto hid_hw_start_fail; + goto err_cancel_work; ret = hid_hw_open(hdev); if (ret) { hid_err(hdev, "%s:hid_hw_open\n", __func__); - goto hid_hw_open_fail; + goto err_hw_stop; } if (steam->quirks & STEAM_QUIRK_WIRELESS) { @@ -1151,33 +1151,37 @@ static int steam_probe(struct hid_device *hdev, hid_err(hdev, "%s:steam_register failed with error %d\n", __func__, ret); - goto input_register_fail; + goto err_hw_close; } } steam->client_hdev = steam_create_client_hid(hdev); if (IS_ERR(steam->client_hdev)) { ret = PTR_ERR(steam->client_hdev); - goto client_hdev_fail; + goto err_stream_unregister; } steam->client_hdev->driver_data = steam; ret = hid_add_device(steam->client_hdev); if (ret) - goto client_hdev_add_fail; + goto err_destroy; return 0; -client_hdev_add_fail: - hid_hw_stop(hdev); -client_hdev_fail: +err_destroy: hid_destroy_device(steam->client_hdev); -input_register_fail: -hid_hw_open_fail: -hid_hw_start_fail: +err_stream_unregister: + if (steam->connected) + steam_unregister(steam); +err_hw_close: + hid_hw_close(hdev); +err_hw_stop: + hid_hw_stop(hdev); +err_cancel_work: cancel_work_sync(&steam->work_connect); cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->rumble_work); + return ret; } From 1f1626ac0428820f998245478610f452650bcab5 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Sun, 14 Jan 2024 00:33:45 +0300 Subject: [PATCH 009/347] drm/ttm: fix ttm pool initialization for no-dma-device drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QXL driver doesn't use any device for DMA mappings or allocations so dev_to_node() will panic inside ttm_device_init() on NUMA systems: general protection fault, probably for non-canonical address 0xdffffc000000007a: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x00000000000003d0-0x00000000000003d7] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 6.7.0+ #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 RIP: 0010:ttm_device_init+0x10e/0x340 Call Trace: qxl_ttm_init+0xaa/0x310 qxl_device_init+0x1071/0x2000 qxl_pci_probe+0x167/0x3f0 local_pci_probe+0xe1/0x1b0 pci_device_probe+0x29d/0x790 really_probe+0x251/0x910 __driver_probe_device+0x1ea/0x390 driver_probe_device+0x4e/0x2e0 __driver_attach+0x1e3/0x600 bus_for_each_dev+0x12d/0x1c0 bus_add_driver+0x25a/0x590 driver_register+0x15c/0x4b0 qxl_pci_driver_init+0x67/0x80 do_one_initcall+0xf5/0x5d0 kernel_init_freeable+0x637/0xb10 kernel_init+0x1c/0x2e0 ret_from_fork+0x48/0x80 ret_from_fork_asm+0x1b/0x30 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:ttm_device_init+0x10e/0x340 Fall back to NUMA_NO_NODE if there is no device for DMA. Found by Linux Verification Center (linuxtesting.org). Fixes: b0a7ce53d494 ("drm/ttm: Schedule delayed_delete worker closer") Signed-off-by: Fedor Pchelkin Link: https://patchwork.freedesktop.org/patch/msgid/20240113213347.9562-1-pchelkin@ispras.ru Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_device.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index f5187b384ae9..4130945052ed 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -195,7 +195,7 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func bool use_dma_alloc, bool use_dma32) { struct ttm_global *glob = &ttm_glob; - int ret; + int ret, nid; if (WARN_ON(vma_manager == NULL)) return -EINVAL; @@ -215,7 +215,12 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func ttm_sys_man_init(bdev); - ttm_pool_init(&bdev->pool, dev, dev_to_node(dev), use_dma_alloc, use_dma32); + if (dev) + nid = dev_to_node(dev); + else + nid = NUMA_NO_NODE; + + ttm_pool_init(&bdev->pool, dev, nid, use_dma_alloc, use_dma32); bdev->vma_manager = vma_manager; spin_lock_init(&bdev->lru_lock); From 08ac6f132dd77e40f786d8af51140c96c6d739c9 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 3 Jan 2024 15:31:07 +0200 Subject: [PATCH 010/347] drm/bridge: sii902x: Fix probing race issue A null pointer dereference crash has been observed rarely on TI platforms using sii9022 bridge: [ 53.271356] sii902x_get_edid+0x34/0x70 [sii902x] [ 53.276066] sii902x_bridge_get_edid+0x14/0x20 [sii902x] [ 53.281381] drm_bridge_get_edid+0x20/0x34 [drm] [ 53.286305] drm_bridge_connector_get_modes+0x8c/0xcc [drm_kms_helper] [ 53.292955] drm_helper_probe_single_connector_modes+0x190/0x538 [drm_kms_helper] [ 53.300510] drm_client_modeset_probe+0x1f0/0xbd4 [drm] [ 53.305958] __drm_fb_helper_initial_config_and_unlock+0x50/0x510 [drm_kms_helper] [ 53.313611] drm_fb_helper_initial_config+0x48/0x58 [drm_kms_helper] [ 53.320039] drm_fbdev_dma_client_hotplug+0x84/0xd4 [drm_dma_helper] [ 53.326401] drm_client_register+0x5c/0xa0 [drm] [ 53.331216] drm_fbdev_dma_setup+0xc8/0x13c [drm_dma_helper] [ 53.336881] tidss_probe+0x128/0x264 [tidss] [ 53.341174] platform_probe+0x68/0xc4 [ 53.344841] really_probe+0x188/0x3c4 [ 53.348501] __driver_probe_device+0x7c/0x16c [ 53.352854] driver_probe_device+0x3c/0x10c [ 53.357033] __device_attach_driver+0xbc/0x158 [ 53.361472] bus_for_each_drv+0x88/0xe8 [ 53.365303] __device_attach+0xa0/0x1b4 [ 53.369135] device_initial_probe+0x14/0x20 [ 53.373314] bus_probe_device+0xb0/0xb4 [ 53.377145] deferred_probe_work_func+0xcc/0x124 [ 53.381757] process_one_work+0x1f0/0x518 [ 53.385770] worker_thread+0x1e8/0x3dc [ 53.389519] kthread+0x11c/0x120 [ 53.392750] ret_from_fork+0x10/0x20 The issue here is as follows: - tidss probes, but is deferred as sii902x is still missing. - sii902x starts probing and enters sii902x_init(). - sii902x calls drm_bridge_add(). Now the sii902x bridge is ready from DRM's perspective. - sii902x calls sii902x_audio_codec_init() and platform_device_register_data() - The registration of the audio platform device causes probing of the deferred devices. - tidss probes, which eventually causes sii902x_bridge_get_edid() to be called. - sii902x_bridge_get_edid() tries to use the i2c to read the edid. However, the sii902x driver has not set up the i2c part yet, leading to the crash. Fix this by moving the drm_bridge_add() to the end of the sii902x_init(), which is also at the very end of sii902x_probe(). Signed-off-by: Tomi Valkeinen Fixes: 21d808405fe4 ("drm/bridge/sii902x: Fix EDID readback") Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20240103-si902x-fixes-v1-1-b9fd3e448411@ideasonboard.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240103-si902x-fixes-v1-1-b9fd3e448411@ideasonboard.com --- drivers/gpu/drm/bridge/sii902x.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 2bdc5b439beb..69da73e414a9 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -1080,16 +1080,6 @@ static int sii902x_init(struct sii902x *sii902x) return ret; } - sii902x->bridge.funcs = &sii902x_bridge_funcs; - sii902x->bridge.of_node = dev->of_node; - sii902x->bridge.timings = &default_sii902x_timings; - sii902x->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID; - - if (sii902x->i2c->irq > 0) - sii902x->bridge.ops |= DRM_BRIDGE_OP_HPD; - - drm_bridge_add(&sii902x->bridge); - sii902x_audio_codec_init(sii902x, dev); i2c_set_clientdata(sii902x->i2c, sii902x); @@ -1102,7 +1092,21 @@ static int sii902x_init(struct sii902x *sii902x) return -ENOMEM; sii902x->i2cmux->priv = sii902x; - return i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); + ret = i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); + if (ret) + return ret; + + sii902x->bridge.funcs = &sii902x_bridge_funcs; + sii902x->bridge.of_node = dev->of_node; + sii902x->bridge.timings = &default_sii902x_timings; + sii902x->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID; + + if (sii902x->i2c->irq > 0) + sii902x->bridge.ops |= DRM_BRIDGE_OP_HPD; + + drm_bridge_add(&sii902x->bridge); + + return 0; } static int sii902x_probe(struct i2c_client *client) @@ -1170,12 +1174,11 @@ static int sii902x_probe(struct i2c_client *client) } static void sii902x_remove(struct i2c_client *client) - { struct sii902x *sii902x = i2c_get_clientdata(client); - i2c_mux_del_adapters(sii902x->i2cmux); drm_bridge_remove(&sii902x->bridge); + i2c_mux_del_adapters(sii902x->i2cmux); } static const struct of_device_id sii902x_dt_ids[] = { From 3fc6c76a8d208d3955c9e64b382d0ff370bc61fc Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 3 Jan 2024 15:31:08 +0200 Subject: [PATCH 011/347] drm/bridge: sii902x: Fix audio codec unregistration The driver never unregisters the audio codec platform device, which can lead to a crash on module reloading, nor does it handle the return value from sii902x_audio_codec_init(). Signed-off-by: Tomi Valkeinen Fixes: ff5781634c41 ("drm/bridge: sii902x: Implement HDMI audio support") Cc: Jyri Sarha Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20240103-si902x-fixes-v1-2-b9fd3e448411@ideasonboard.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240103-si902x-fixes-v1-2-b9fd3e448411@ideasonboard.com --- drivers/gpu/drm/bridge/sii902x.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 69da73e414a9..4560ae9cbce1 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -1080,7 +1080,9 @@ static int sii902x_init(struct sii902x *sii902x) return ret; } - sii902x_audio_codec_init(sii902x, dev); + ret = sii902x_audio_codec_init(sii902x, dev); + if (ret) + return ret; i2c_set_clientdata(sii902x->i2c, sii902x); @@ -1088,13 +1090,15 @@ static int sii902x_init(struct sii902x *sii902x) 1, 0, I2C_MUX_GATE, sii902x_i2c_bypass_select, sii902x_i2c_bypass_deselect); - if (!sii902x->i2cmux) - return -ENOMEM; + if (!sii902x->i2cmux) { + ret = -ENOMEM; + goto err_unreg_audio; + } sii902x->i2cmux->priv = sii902x; ret = i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); if (ret) - return ret; + goto err_unreg_audio; sii902x->bridge.funcs = &sii902x_bridge_funcs; sii902x->bridge.of_node = dev->of_node; @@ -1107,6 +1111,12 @@ static int sii902x_init(struct sii902x *sii902x) drm_bridge_add(&sii902x->bridge); return 0; + +err_unreg_audio: + if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev)) + platform_device_unregister(sii902x->audio.pdev); + + return ret; } static int sii902x_probe(struct i2c_client *client) @@ -1179,6 +1189,9 @@ static void sii902x_remove(struct i2c_client *client) drm_bridge_remove(&sii902x->bridge); i2c_mux_del_adapters(sii902x->i2cmux); + + if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev)) + platform_device_unregister(sii902x->audio.pdev); } static const struct of_device_id sii902x_dt_ids[] = { From 0a8c1feed387f8460b8b65fc46fb3608afa7512e Mon Sep 17 00:00:00 2001 From: Yangyu Chen Date: Wed, 17 Jan 2024 02:50:34 +0800 Subject: [PATCH 012/347] drm/ttm: allocate dummy_read_page without DMA32 on fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some platforms may not have any memory in ZONE_DMA32 and use IOMMU to allow 32-bit-DMA-only device to work. Forcing GFP_DMA32 on dummy_read_page will fail on such platforms. Retry after fail will get this works on such platforms. Signed-off-by: Yangyu Chen Link: https://patchwork.freedesktop.org/patch/msgid/tencent_8637383EE0A2C7CC870036AAF01909B26A0A@qq.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_device.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index d48b39132b32..c9fa8561f71f 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -95,11 +95,17 @@ static int ttm_global_init(void) ttm_pool_mgr_init(num_pages); ttm_tt_mgr_init(num_pages, num_dma32); - glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32); + glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32 | + __GFP_NOWARN); + /* Retry without GFP_DMA32 for platforms DMA32 is not available */ if (unlikely(glob->dummy_read_page == NULL)) { - ret = -ENOMEM; - goto out; + glob->dummy_read_page = alloc_page(__GFP_ZERO); + if (unlikely(glob->dummy_read_page == NULL)) { + ret = -ENOMEM; + goto out; + } + pr_warn("Using GFP_DMA32 fallback for dummy_read_page\n"); } INIT_LIST_HEAD(&glob->device_list); From 3d9e9020b92288871b02f194c3ec88e03a1afa88 Mon Sep 17 00:00:00 2001 From: Khaled Almahallawy Date: Wed, 13 Dec 2023 13:15:42 -0800 Subject: [PATCH 013/347] drm/i915/dp: Fix passing the correct DPCD_REV for drm_dp_set_phy_test_pattern Using link_status to get DPCD_REV fails when disabling/defaulting phy pattern. Use intel_dp->dpcd to access DPCD_REV correctly. Fixes: 8cdf72711928 ("drm/i915/dp: Program vswing, pre-emphasis, test-pattern") Cc: Jani Nikula Cc: Imre Deak Cc: Lee Shawn C Signed-off-by: Khaled Almahallawy Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231213211542.3585105-3-khaled.almahallawy@intel.com (cherry picked from commit 3ee302ec22d6e1d7d1e6d381b0d507ee80f2135c) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index c3b906ebe542..f5ef95da5534 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4764,7 +4764,7 @@ static void intel_dp_process_phy_request(struct intel_dp *intel_dp, intel_dp->train_set, crtc_state->lane_count); drm_dp_set_phy_test_pattern(&intel_dp->aux, data, - link_status[DP_DPCD_REV]); + intel_dp->dpcd[DP_DPCD_REV]); } static u8 intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp) From 0103b4496087c99c195800456bf6a4fcf24fd444 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 18 Dec 2023 16:05:43 -0800 Subject: [PATCH 014/347] drm/i915/perf: Update handling of MMIO triggered reports On XEHP platforms user is not able to find MMIO triggered reports in the OA buffer since i915 squashes the context ID fields. These context ID fields hold the MMIO trigger markers. Update logic to not squash the context ID fields of MMIO triggered reports. Fixes: cba94bbcff08 ("drm/i915/perf: Determine context valid in OA reports") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20231219000543.1087706-1-umesh.nerlige.ramappa@intel.com (cherry picked from commit 0c68132df6e66244acec1bb5b9e19b0751414389) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_perf.c | 39 ++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 7b1c8de2f9cb..2d695818f006 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -772,10 +772,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * The reason field includes flags identifying what * triggered this specific report (mostly timer * triggered or e.g. due to a context switch). - * - * In MMIO triggered reports, some platforms do not set the - * reason bit in this field and it is valid to have a reason - * field of zero. */ reason = oa_report_reason(stream, report); ctx_id = oa_context_id(stream, report32); @@ -787,8 +783,41 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. + * + * Update: + * + * On XEHP platforms the behavior of context id valid bit has + * changed compared to prior platforms. To describe this, we + * define a few terms: + * + * context-switch-report: This is a report with the reason type + * being context-switch. It is generated when a context switches + * out. + * + * context-valid-bit: A bit that is set in the report ID field + * to indicate that a valid context has been loaded. + * + * gpu-idle: A condition characterized by a + * context-switch-report with context-valid-bit set to 0. + * + * On prior platforms, context-id-valid bit is set to 0 only + * when GPU goes idle. In all other reports, it is set to 1. + * + * On XEHP platforms, context-valid-bit is set to 1 in a context + * switch report if a new context switched in. For all other + * reports it is set to 0. + * + * This change in behavior causes an issue with MMIO triggered + * reports. MMIO triggered reports have the markers in the + * context ID field and the context-valid-bit is 0. The logic + * below to squash the context ID would render the report + * useless since the user will not be able to find it in the OA + * buffer. Since MMIO triggered reports exist only on XEHP, + * we should avoid squashing these for XEHP platforms. */ - if (oa_report_ctx_invalid(stream, report)) { + + if (oa_report_ctx_invalid(stream, report) && + GRAPHICS_VER_FULL(stream->engine->i915) < IP_VER(12, 50)) { ctx_id = INVALID_CTX_ID; oa_context_id_squash(stream, report32); } From 3eb791c891aa91603a5fbbfea940f8acf5f17d45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Tue, 16 Jan 2024 18:46:02 +0100 Subject: [PATCH 015/347] drm/tests: mm: Call drm_mm_print in drm_test_mm_debug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original intent behind the test was to sanity check whether calling the debug iterator (drm_mm_print) doesn't cause any problems. Unfortunately - this call got accidentally removed during KUnit transition. Restore it. Signed-off-by: Michał Winiarski Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240116174602.1019512-1-michal.winiarski@intel.com --- drivers/gpu/drm/tests/drm_mm_test.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tests/drm_mm_test.c b/drivers/gpu/drm/tests/drm_mm_test.c index 05d5e7af6d25..6803cb2eb8fd 100644 --- a/drivers/gpu/drm/tests/drm_mm_test.c +++ b/drivers/gpu/drm/tests/drm_mm_test.c @@ -247,13 +247,13 @@ out: static void drm_test_mm_debug(struct kunit *test) { + struct drm_printer p = drm_debug_printer(test->name); struct drm_mm mm; struct drm_mm_node nodes[2]; /* Create a small drm_mm with a couple of nodes and a few holes, and * check that the debug iterator doesn't explode over a trivial drm_mm. */ - drm_mm_init(&mm, 0, 4096); memset(nodes, 0, sizeof(nodes)); @@ -268,6 +268,9 @@ static void drm_test_mm_debug(struct kunit *test) KUNIT_ASSERT_FALSE_MSG(test, drm_mm_reserve_node(&mm, &nodes[1]), "failed to reserve node[0] {start=%lld, size=%lld)\n", nodes[0].start, nodes[0].size); + + drm_mm_print(&mm, &p); + KUNIT_SUCCEED(test); } static struct drm_mm_node *set_node(struct drm_mm_node *node, From 26db46bc9c675e43230cc6accd110110a7654299 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Tue, 9 Jan 2024 20:04:57 +0800 Subject: [PATCH 016/347] drm/bridge: parade-ps8640: Ensure bridge is suspended in .post_disable() The ps8640 bridge seems to expect everything to be power cycled at the disable process, but sometimes ps8640_aux_transfer() holds the runtime PM reference and prevents the bridge from suspend. Prevent that by introducing a mutex lock between ps8640_aux_transfer() and .post_disable() to make sure the bridge is really powered off. Fixes: 826cff3f7ebb ("drm/bridge: parade-ps8640: Enable runtime power management") Signed-off-by: Pin-yen Lin Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240109120528.1292601-1-treapking@chromium.org --- drivers/gpu/drm/bridge/parade-ps8640.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index fb5e9ae9ad81..166bfc725ef4 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -107,6 +107,7 @@ struct ps8640 { struct device_link *link; bool pre_enabled; bool need_post_hpd_delay; + struct mutex aux_lock; }; static const struct regmap_config ps8640_regmap_config[] = { @@ -345,6 +346,7 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, struct device *dev = &ps_bridge->page[PAGE0_DP_CNTL]->dev; int ret; + mutex_lock(&ps_bridge->aux_lock); pm_runtime_get_sync(dev); ret = _ps8640_wait_hpd_asserted(ps_bridge, 200 * 1000); if (ret) { @@ -354,6 +356,7 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, ret = ps8640_aux_transfer_msg(aux, msg); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + mutex_unlock(&ps_bridge->aux_lock); return ret; } @@ -475,7 +478,18 @@ static void ps8640_atomic_post_disable(struct drm_bridge *bridge, ps_bridge->pre_enabled = false; ps8640_bridge_vdo_control(ps_bridge, DISABLE); + + /* + * The bridge seems to expect everything to be power cycled at the + * disable process, so grab a lock here to make sure + * ps8640_aux_transfer() is not holding a runtime PM reference and + * preventing the bridge from suspend. + */ + mutex_lock(&ps_bridge->aux_lock); + pm_runtime_put_sync_suspend(&ps_bridge->page[PAGE0_DP_CNTL]->dev); + + mutex_unlock(&ps_bridge->aux_lock); } static int ps8640_bridge_attach(struct drm_bridge *bridge, @@ -624,6 +638,8 @@ static int ps8640_probe(struct i2c_client *client) if (!ps_bridge) return -ENOMEM; + mutex_init(&ps_bridge->aux_lock); + ps_bridge->supplies[0].supply = "vdd12"; ps_bridge->supplies[1].supply = "vdd33"; ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ps_bridge->supplies), From 658365c6b0857e6a306436e315a8633937e3af42 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Fri, 12 Jan 2024 12:19:27 +0800 Subject: [PATCH 017/347] scsi: isci: Fix an error code problem in isci_io_request_build() Clang static complains that Value stored to 'status' is never read. Return 'status' rather than 'SCI_SUCCESS'. Fixes: f1f52e75939b ("isci: uplevel request infrastructure") Signed-off-by: Su Hui Link: https://lore.kernel.org/r/20240112041926.3924315-1-suhui@nfschina.com Reviewed-by: Artur Paszkiewicz Signed-off-by: Martin K. Petersen --- drivers/scsi/isci/request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index 71f711cb0628..355a0bc0828e 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -3387,7 +3387,7 @@ static enum sci_status isci_io_request_build(struct isci_host *ihost, return SCI_FAILURE; } - return SCI_SUCCESS; + return status; } static struct isci_request *isci_request_from_tag(struct isci_host *ihost, u16 tag) From d6b75ba5218915be48542bcd7e2a09776b7c66c9 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 16 Jan 2024 12:58:36 +0800 Subject: [PATCH 018/347] scsi: virtio_scsi: Remove duplicate check if queue is broken virtqueue_enable_cb() will call virtqueue_poll() which will check if queue is broken at beginning, so remove the virtqueue_is_broken() call Signed-off-by: Li RongQing Link: https://lore.kernel.org/r/20240116045836.12475-1-lirongqing@baidu.com Reviewed-by: Stefan Hajnoczi Acked-by: Michael S. Tsirkin Signed-off-by: Martin K. Petersen --- drivers/scsi/virtio_scsi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 9d1bdcdc1331..e15b3809d059 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -182,8 +182,6 @@ static void virtscsi_vq_done(struct virtio_scsi *vscsi, while ((buf = virtqueue_get_buf(vq, &len)) != NULL) fn(vscsi, buf); - if (unlikely(virtqueue_is_broken(vq))) - break; } while (!virtqueue_enable_cb(vq)); spin_unlock_irqrestore(&virtscsi_vq->vq_lock, flags); } From e6f3799de2f2b2cd23c3894a127921dfb6c6a512 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Jan 2024 11:26:06 +0000 Subject: [PATCH 019/347] scsi: initio: Remove redundant variable 'rb' The variable 'rb' is being assigned a value but it isn't being read afterwards. The assignment is redundant and so 'rb' can be removed. Cleans up clang scan build warning: warning: Although the value stored to 'rb' is used in the enclosing expression, the value is never actually read from 'rb'[deadcode.DeadStores] Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240116112606.2263738-1-colin.i.king@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/initio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c index 2a50fda3a628..625fd547ee60 100644 --- a/drivers/scsi/initio.c +++ b/drivers/scsi/initio.c @@ -371,7 +371,6 @@ static u16 initio_se2_rd(unsigned long base, u8 addr) */ static void initio_se2_wr(unsigned long base, u8 addr, u16 val) { - u8 rb; u8 instr; int i; @@ -400,7 +399,7 @@ static void initio_se2_wr(unsigned long base, u8 addr, u16 val) udelay(30); outb(SE2CS, base + TUL_NVRAM); /* -CLK */ udelay(30); - if ((rb = inb(base + TUL_NVRAM)) & SE2DI) + if (inb(base + TUL_NVRAM) & SE2DI) break; /* write complete */ } outb(0, base + TUL_NVRAM); /* -CS */ From 7d1ae55ffed0ff78ed33b1465c1233e05024e135 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Tue, 16 Jan 2024 13:55:09 -0800 Subject: [PATCH 020/347] scsi: MAINTAINERS: Update ibmvscsi_tgt maintainer Michael has not been responsible for this code as an IBMer for quite some time. Seeing as the rest of the IBM Virtual SCSI related drivers already fall under my purview replace Michael with myself as maintainer. Signed-off-by: Tyrel Datwyler Link: https://lore.kernel.org/r/20240116215509.1155787-1-tyreld@linux.ibm.com Signed-off-by: Martin K. Petersen --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index b5ac8c4ae434..afbd909258df 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10203,7 +10203,7 @@ F: drivers/scsi/ibmvscsi/ibmvscsi* F: include/scsi/viosrp.h IBM Power Virtual SCSI Device Target Driver -M: Michael Cyr +M: Tyrel Datwyler L: linux-scsi@vger.kernel.org L: target-devel@vger.kernel.org S: Supported From a20f1b02bafcbf5a32d96a1d4185d6981cf7d016 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 17 Jan 2024 10:35:03 -0800 Subject: [PATCH 021/347] drm/bridge: parade-ps8640: Make sure we drop the AUX mutex in the error case After commit 26db46bc9c67 ("drm/bridge: parade-ps8640: Ensure bridge is suspended in .post_disable()"), if we hit the error case in ps8640_aux_transfer() then we return without dropping the mutex. Fix this oversight. Fixes: 26db46bc9c67 ("drm/bridge: parade-ps8640: Ensure bridge is suspended in .post_disable()") Reviewed-by: Hsin-Yi Wang Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240117103502.1.Ib726a0184913925efc7e99c4d4fc801982e1bc24@changeid --- drivers/gpu/drm/bridge/parade-ps8640.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index 166bfc725ef4..14d4dcf239da 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -351,11 +351,13 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, ret = _ps8640_wait_hpd_asserted(ps_bridge, 200 * 1000); if (ret) { pm_runtime_put_sync_suspend(dev); - return ret; + goto exit; } ret = ps8640_aux_transfer_msg(aux, msg); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + +exit: mutex_unlock(&ps_bridge->aux_lock); return ret; From 4d695869d3fb952d6f690bec0260ffe2a8b99b37 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 17 Jan 2024 14:27:15 +0100 Subject: [PATCH 022/347] selftests/hid: wacom: fix confidence tests The device is exported with a fuzz of 4, meaning that the `+ t` here is removed by the fuzz algorithm, making those tests failing. Not sure why, but when I run this locally it was passing, but not in the VM of the CI. Fixes: b0fb904d074e ("HID: wacom: Add additional tests of confidence behavior") Link: https://gitlab.freedesktop.org/bentiss/hid/-/jobs/53692957#L3315 Acked-by: Jason Gerecke Link: https://lore.kernel.org/r/20240117-b4-wip-wacom-tests-fixes-v1-1-f317784f3c36@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_wacom_generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py index 352fc39f3c6c..b62c7dba6777 100644 --- a/tools/testing/selftests/hid/tests/test_wacom_generic.py +++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py @@ -880,8 +880,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest does not overlap with other contacts. The value of `t` may be incremented over time to move the point along a linear path. """ - x = 50 + 10 * contact_id + t - y = 100 + 100 * contact_id + t + x = 50 + 10 * contact_id + t * 11 + y = 100 + 100 * contact_id + t * 11 return test_multitouch.Touch(contact_id, x, y) def make_contacts(self, n, t=0): @@ -902,8 +902,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest tracking_id = contact_ids.tracking_id slot_num = contact_ids.slot_num - x = 50 + 10 * contact_id + t - y = 100 + 100 * contact_id + t + x = 50 + 10 * contact_id + t * 11 + y = 100 + 100 * contact_id + t * 11 # If the data isn't supposed to be stored in any slots, there is # nothing we can check for in the evdev stream. From 6992eb815d087858f8d7e4020529c2fe800456b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 16 Jan 2024 23:08:21 +0200 Subject: [PATCH 023/347] Revert "drm/i915/dsi: Do display on sequence later on icl+" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 88b065943cb583e890324d618e8d4b23460d51a3. Lenovo 82TQ is unhappy if we do the display on sequence this late. The display output shows severe corruption. It's unclear if this is a failure on our part (perhaps something to do with sending commands in LP mode after HS /video mode transmission has been started? Though the backlight on command at least seems to work) or simply that there are some commands in the sequence that are needed to be done earlier (eg. could be some DSC init stuff?). If the latter then I don't think the current Windows code would work either, but maybe this was originally tested with an older driver, who knows. Root causing this fully would likely require a lot of experimentation which isn't really feasible without direct access to the machine, so let's just accept failure and go back to the original sequence. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10071 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240116210821.30194-1-ville.syrjala@linux.intel.com Acked-by: Jani Nikula (cherry picked from commit dc524d05974f615b145404191fcf91b478950499) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/icl_dsi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index ac456a2275db..eda4a8b88590 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1155,6 +1155,7 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder) } intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); /* ensure all panel commands dispatched before enabling transcoder */ wait_for_cmds_dispatched_to_panel(encoder); @@ -1255,8 +1256,6 @@ static void gen11_dsi_enable(struct intel_atomic_state *state, /* step6d: enable dsi transcoder */ gen11_dsi_enable_transcoder(encoder); - intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); - /* step7: enable backlight */ intel_backlight_enable(crtc_state, conn_state); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); From 84b5ece64477df4394d362d494a2496bf0878985 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 12 Jan 2024 07:49:12 -0800 Subject: [PATCH 024/347] drm/i915: Drop -Wstringop-overflow -Wstringop-overflow is broken on GCC11. In future changes it will be moved to the normal C flags in the top level Makefile (out of Makefile.extrawarn), but accounting for the compiler support. Just remove it out of i915's forced extra warnings, preparing for the upcoming change and avoiding build warnings to show up. Fixes: 2250c7ead8ad ("drm/i915: enable W=1 warnings by default") References: https://lore.kernel.org/all/45ad1d0f-a10f-483e-848a-76a30252edbe@paulmck-laptop/ Signed-off-by: Lucas De Marchi Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240112154912.1775199-1-lucas.demarchi@intel.com (cherry picked from commit 05ae67d95bade8b7facd5612baea21c12d243149) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e777686190ca..c13f14edb508 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -17,7 +17,6 @@ subdir-ccflags-y += $(call cc-option, -Wunused-const-variable) subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned) subdir-ccflags-y += $(call cc-option, -Wformat-overflow) subdir-ccflags-y += $(call cc-option, -Wformat-truncation) -subdir-ccflags-y += $(call cc-option, -Wstringop-overflow) subdir-ccflags-y += $(call cc-option, -Wstringop-truncation) # The following turn off the warnings enabled by -Wextra ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),) From 0991abeddefa118479b0af32c28bcd662dec1561 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Thu, 18 Jan 2024 09:52:52 +0800 Subject: [PATCH 025/347] exfat: fix zero the unwritten part for dio read For dio read, bio will be leave in flight when a successful partial aio read have been setup, blockdev_direct_IO() will return -EIOCBQUEUED. In the case, iter->iov_offset will be not advanced, the oops reported by syzbot will occur if revert iter->iov_offset with iov_iter_revert(). The unwritten part had been zeroed by aio read, so there is no need to zero it in dio read. Reported-by: syzbot+fd404f6b03a58e8bc403@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fd404f6b03a58e8bc403 Fixes: 11a347fb6cef ("exfat: change to get file size from DataLength") Signed-off-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/inode.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 522edcbb2ce4..0687f952956c 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -501,7 +501,7 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = mapping->host; struct exfat_inode_info *ei = EXFAT_I(inode); loff_t pos = iocb->ki_pos; - loff_t size = iocb->ki_pos + iov_iter_count(iter); + loff_t size = pos + iov_iter_count(iter); int rw = iov_iter_rw(iter); ssize_t ret; @@ -525,11 +525,10 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) */ ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block); if (ret < 0) { - if (rw == WRITE) + if (rw == WRITE && ret != -EIOCBQUEUED) exfat_write_failed(mapping, size); - if (ret != -EIOCBQUEUED) - return ret; + return ret; } else size = pos + ret; From 6a9531c3a88096a26cf3ac582f7ec44f94a7dcb2 Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Thu, 18 Jan 2024 14:18:53 +0800 Subject: [PATCH 026/347] memblock: fix crash when reserved memory is not added to memory After commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") nid of a reserved region is used by init_reserved_page() (with CONFIG_DEFERRED_STRUCT_PAGE_INIT=y) to access node strucure. In many cases the nid of the reserved memory is not set and this causes a crash. When the nid of a reserved region is not set, fall back to early_pfn_to_nid(), so that nid of the first_online_node will be passed to init_reserved_page(). Fixes: 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") Signed-off-by: Yajun Deng Link: https://lore.kernel.org/r/20240118061853.2652295-1-yajun.deng@linux.dev [rppt: massaged the commit message] Signed-off-by: Mike Rapoport (IBM) --- mm/memblock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memblock.c b/mm/memblock.c index 5a88d6d24d79..4823ad979b72 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2141,6 +2141,9 @@ static void __init memmap_init_reserved_pages(void) start = region->base; end = start + region->size; + if (nid == NUMA_NO_NODE || nid >= MAX_NUMNODES) + nid = early_pfn_to_nid(PFN_DOWN(start)); + reserve_bootmem_region(start, end, nid); } } From e626cb02ee8399fd42c415e542d031d185783903 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 18 Jan 2024 12:54:51 +0100 Subject: [PATCH 027/347] futex: Prevent the reuse of stale pi_state Jiri Slaby reported a futex state inconsistency resulting in -EINVAL during a lock operation for a PI futex. It requires that the a lock process is interrupted by a timeout or signal: T1 Owns the futex in user space. T2 Tries to acquire the futex in kernel (futex_lock_pi()). Allocates a pi_state and attaches itself to it. T2 Times out and removes its rt_waiter from the rt_mutex. Drops the rtmutex lock and tries to acquire the hash bucket lock to remove the futex_q. The lock is contended and T2 schedules out. T1 Unlocks the futex (futex_unlock_pi()). Finds a futex_q but no rt_waiter. Unlocks the futex (do_uncontended) and makes it available to user space. T3 Acquires the futex in user space. T4 Tries to acquire the futex in kernel (futex_lock_pi()). Finds the existing futex_q of T2 and tries to attach itself to the existing pi_state. This (attach_to_pi_state()) fails with -EINVAL because uval contains the TID of T3 but pi_state points to T1. It's incorrect to unlock the futex and make it available for user space to acquire as long as there is still an existing state attached to it in the kernel. T1 cannot hand over the futex to T2 because T2 already gave up and started to clean up and is blocked on the hash bucket lock, so T2's futex_q with the pi_state pointing to T1 is still queued. T2 observes the futex_q, but ignores it as there is no waiter on the corresponding rt_mutex and takes the uncontended path which allows the subsequent caller of futex_lock_pi() (T4) to observe that stale state. To prevent this the unlock path must dequeue all futex_q entries which point to the same pi_state when there is no waiter on the rt mutex. This requires obviously to make the dequeue conditional in the locking path to prevent a double dequeue. With that it's guaranteed that user space cannot observe an uncontended futex which has kernel state attached. Fixes: fbeb558b0dd0d ("futex/pi: Fix recursive rt_mutex waiter state") Reported-by: Jiri Slaby Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Tested-by: Jiri Slaby Link: https://lore.kernel.org/r/20240118115451.0TkD_ZhB@linutronix.de Closes: https://lore.kernel.org/all/4611bcf2-44d0-4c34-9b84-17406f881003@kernel.org --- kernel/futex/core.c | 15 ++++++++++++--- kernel/futex/pi.c | 11 ++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index e0e853412c15..1e78ef24321e 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -627,12 +627,21 @@ retry: } /* - * PI futexes can not be requeued and must remove themselves from the - * hash bucket. The hash bucket lock (i.e. lock_ptr) is held. + * PI futexes can not be requeued and must remove themselves from the hash + * bucket. The hash bucket lock (i.e. lock_ptr) is held. */ void futex_unqueue_pi(struct futex_q *q) { - __futex_unqueue(q); + /* + * If the lock was not acquired (due to timeout or signal) then the + * rt_waiter is removed before futex_q is. If this is observed by + * an unlocker after dropping the rtmutex wait lock and before + * acquiring the hash bucket lock, then the unlocker dequeues the + * futex_q from the hash bucket list to guarantee consistent state + * vs. userspace. Therefore the dequeue here must be conditional. + */ + if (!plist_node_empty(&q->list)) + __futex_unqueue(q); BUG_ON(!q->pi_state); put_pi_state(q->pi_state); diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index 90e5197f4e56..5722467f2737 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -1135,6 +1135,7 @@ retry: hb = futex_hash(&key); spin_lock(&hb->lock); +retry_hb: /* * Check waiters first. We do not trust user space values at @@ -1177,12 +1178,17 @@ retry: /* * Futex vs rt_mutex waiter state -- if there are no rt_mutex * waiters even though futex thinks there are, then the waiter - * is leaving and the uncontended path is safe to take. + * is leaving. The entry needs to be removed from the list so a + * new futex_lock_pi() is not using this stale PI-state while + * the futex is available in user space again. + * There can be more than one task on its way out so it needs + * to retry. */ rt_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); if (!rt_waiter) { + __futex_unqueue(top_waiter); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - goto do_uncontended; + goto retry_hb; } get_pi_state(pi_state); @@ -1217,7 +1223,6 @@ retry: return ret; } -do_uncontended: /* * We have no kernel internal state, i.e. no waiters in the * kernel. Waiters which are about to queue themselves are stuck From 72b0cbf6b81003c01d63c60180b335f7692d170e Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 19 Jan 2024 17:57:07 +0800 Subject: [PATCH 028/347] smb: Fix some kernel-doc comments Fix some kernel-doc comments to silence the warnings: fs/smb/server/transport_tcp.c:374: warning: Function parameter or struct member 'max_retries' not described in 'ksmbd_tcp_read' fs/smb/server/transport_tcp.c:423: warning: Function parameter or struct member 'iface' not described in 'create_socket' Signed-off-by: Yang Li Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 9d4222154dcc..002a3f0dc7c5 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -365,6 +365,7 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, * @t: TCP transport instance * @buf: buffer to store read data from socket * @to_read: number of bytes to read from socket + * @max_retries: number of retries if reading from socket fails * * Return: on success return number of bytes read from socket, * otherwise return error number @@ -416,6 +417,7 @@ static void tcp_destroy_socket(struct socket *ksmbd_socket) /** * create_socket - create socket for ksmbd/0 + * @iface: interface to bind the created socket to * * Return: 0 on success, error number otherwise */ From 0086ffec768bec6f5d61fc7e406af640eb912a24 Mon Sep 17 00:00:00 2001 From: Stanley Chan Date: Mon, 27 Nov 2023 15:20:48 -0600 Subject: [PATCH 029/347] tools cpupower bench: Override CFLAGS assignments Allow user to specify outside CFLAGS values as make argument Corrects an issue where CFLAGS is passed as a make argument for cpupower, but bench's makefile does not inherit and append to them. Signed-off-by: Stanley Chan Signed-off-by: Shuah Khan --- tools/power/cpupower/bench/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile index d9d9923af85c..a4b902f9e1c4 100644 --- a/tools/power/cpupower/bench/Makefile +++ b/tools/power/cpupower/bench/Makefile @@ -15,7 +15,7 @@ LIBS = -L../ -L$(OUTPUT) -lm -lcpupower OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o endif -CFLAGS += -D_GNU_SOURCE -I../lib -DDEFAULT_CONFIG_FILE=\"$(confdir)/cpufreq-bench.conf\" +override CFLAGS += -D_GNU_SOURCE -I../lib -DDEFAULT_CONFIG_FILE=\"$(confdir)/cpufreq-bench.conf\" $(OUTPUT)%.o : %.c $(ECHO) " CC " $@ From 477552e1d339d9fa654a363c9c0f1e9c4f087d2b Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Fri, 15 Dec 2023 09:38:19 +0900 Subject: [PATCH 030/347] drm/exynos: fix incorrect type issue Fix incorrect type issue in fimd_commit() of exynos_drm_fimd.c module. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312140930.Me9yWf8F-lkp@intel.com/ Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 8dde7b1e9b35..a090ff837075 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -480,7 +480,7 @@ static void fimd_commit(struct exynos_drm_crtc *crtc) struct fimd_context *ctx = crtc->ctx; struct drm_display_mode *mode = &crtc->base.state->adjusted_mode; const struct fimd_driver_data *driver_data = ctx->driver_data; - void *timing_base = ctx->regs + driver_data->timing_base; + void __iomem *timing_base = ctx->regs + driver_data->timing_base; u32 val; if (ctx->suspended) From 960b537e91725bcb17dd1b19e48950e62d134078 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Dec 2023 13:32:15 +0100 Subject: [PATCH 031/347] drm/exynos: fix accidental on-stack copy of exynos_drm_plane gcc rightfully complains about excessive stack usage in the fimd_win_set_pixfmt() function: drivers/gpu/drm/exynos/exynos_drm_fimd.c: In function 'fimd_win_set_pixfmt': drivers/gpu/drm/exynos/exynos_drm_fimd.c:750:1: error: the frame size of 1032 bytes is larger than 1024 byte drivers/gpu/drm/exynos/exynos5433_drm_decon.c: In function 'decon_win_set_pixfmt': drivers/gpu/drm/exynos/exynos5433_drm_decon.c:381:1: error: the frame size of 1032 bytes is larger than 1024 bytes There is really no reason to copy the large exynos_drm_plane structure to the stack before using one of its members, so just use a pointer instead. Fixes: 6f8ee5c21722 ("drm/exynos: fimd: Make plane alpha configurable") Signed-off-by: Arnd Bergmann Reviewed-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 ++-- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 4d986077738b..bce027552474 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -319,9 +319,9 @@ static void decon_win_set_bldmod(struct decon_context *ctx, unsigned int win, static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win, struct drm_framebuffer *fb) { - struct exynos_drm_plane plane = ctx->planes[win]; + struct exynos_drm_plane *plane = &ctx->planes[win]; struct exynos_drm_plane_state *state = - to_exynos_plane_state(plane.base.state); + to_exynos_plane_state(plane->base.state); unsigned int alpha = state->base.alpha; unsigned int pixel_alpha; unsigned long val; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index a090ff837075..65489e18ab91 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -661,9 +661,9 @@ static void fimd_win_set_bldmod(struct fimd_context *ctx, unsigned int win, static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win, struct drm_framebuffer *fb, int width) { - struct exynos_drm_plane plane = ctx->planes[win]; + struct exynos_drm_plane *plane = &ctx->planes[win]; struct exynos_drm_plane_state *state = - to_exynos_plane_state(plane.base.state); + to_exynos_plane_state(plane->base.state); uint32_t pixel_format = fb->format->format; unsigned int alpha = state->base.alpha; u32 val = WINCONx_ENWIN; From 4050957c7c2c14aa795dbf423b4180d5ac04e113 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 20 Dec 2023 12:53:15 +0300 Subject: [PATCH 032/347] drm/exynos: gsc: minor fix for loop iteration in gsc_runtime_resume Do not forget to call clk_disable_unprepare() on the first element of ctx->clocks array. Found by Linux Verification Center (linuxtesting.org). Fixes: 8b7d3ec83aba ("drm/exynos: gsc: Convert driver to IPP v2 core API") Signed-off-by: Fedor Pchelkin Reviewed-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 34cdabc30b4f..5302bebbe38c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1342,7 +1342,7 @@ static int __maybe_unused gsc_runtime_resume(struct device *dev) for (i = 0; i < ctx->num_clocks; i++) { ret = clk_prepare_enable(ctx->clocks[i]); if (ret) { - while (--i > 0) + while (--i >= 0) clk_disable_unprepare(ctx->clocks[i]); return ret; } From d8d222e09dab84a17bb65dda4b94d01c565f5327 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 16 Jan 2024 15:33:07 +1100 Subject: [PATCH 033/347] xfs: read only mounts with fsopen mount API are busted Recently xfs/513 started failing on my test machines testing "-o ro,norecovery" mount options. This was being emitted in dmesg: [ 9906.932724] XFS (pmem0): no-recovery mounts must be read-only. Turns out, readonly mounts with the fsopen()/fsconfig() mount API have been busted since day zero. It's only taken 5 years for debian unstable to start using this "new" mount API, and shortly after this I noticed xfs/513 had started to fail as per above. The syscall trace is: fsopen("xfs", FSOPEN_CLOEXEC) = 3 mount_setattr(-1, NULL, 0, NULL, 0) = -1 EINVAL (Invalid argument) ..... fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/pmem0", 0) = 0 fsconfig(3, FSCONFIG_SET_FLAG, "ro", NULL, 0) = 0 fsconfig(3, FSCONFIG_SET_FLAG, "norecovery", NULL, 0) = 0 fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = -1 EINVAL (Invalid argument) close(3) = 0 Showing that the actual mount instantiation (FSCONFIG_CMD_CREATE) is what threw out the error. During mount instantiation, we call xfs_fs_validate_params() which does: /* No recovery flag requires a read-only mount */ if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { xfs_warn(mp, "no-recovery mounts must be read-only."); return -EINVAL; } and xfs_is_readonly() checks internal mount flags for read only state. This state is set in xfs_init_fs_context() from the context superblock flag state: /* * Copy binary VFS mount flags we are interested in. */ if (fc->sb_flags & SB_RDONLY) set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); With the old mount API, all of the VFS specific superblock flags had already been parsed and set before xfs_init_fs_context() is called, so this all works fine. However, in the brave new fsopen/fsconfig world, xfs_init_fs_context() is called from fsopen() context, before any VFS superblock have been set or parsed. Hence if we use fsopen(), the internal XFS readonly state is *never set*. Hence anything that depends on xfs_is_readonly() actually returning true for read only mounts is broken if fsopen() has been used to mount the filesystem. Fix this by moving this internal state initialisation to xfs_fs_fill_super() before we attempt to validate the parameters that have been set prior to the FSCONFIG_CMD_CREATE call being made. Signed-off-by: Dave Chinner Fixes: 73e5fff98b64 ("xfs: switch to use the new mount-api") cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/xfs_super.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index aff20ddd4a9f..5a2512d20bd0 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1496,6 +1496,18 @@ xfs_fs_fill_super( mp->m_super = sb; + /* + * Copy VFS mount flags from the context now that all parameter parsing + * is guaranteed to have been completed by either the old mount API or + * the newer fsopen/fsconfig API. + */ + if (fc->sb_flags & SB_RDONLY) + set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + if (fc->sb_flags & SB_DIRSYNC) + mp->m_features |= XFS_FEAT_DIRSYNC; + if (fc->sb_flags & SB_SYNCHRONOUS) + mp->m_features |= XFS_FEAT_WSYNC; + error = xfs_fs_validate_params(mp); if (error) return error; @@ -1965,6 +1977,11 @@ static const struct fs_context_operations xfs_context_ops = { .free = xfs_fs_free, }; +/* + * WARNING: do not initialise any parameters in this function that depend on + * mount option parsing having already been performed as this can be called from + * fsopen() before any parameters have been set. + */ static int xfs_init_fs_context( struct fs_context *fc) { @@ -1996,16 +2013,6 @@ static int xfs_init_fs_context( mp->m_logbsize = -1; mp->m_allocsize_log = 16; /* 64k */ - /* - * Copy binary VFS mount flags we are interested in. - */ - if (fc->sb_flags & SB_RDONLY) - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); - if (fc->sb_flags & SB_DIRSYNC) - mp->m_features |= XFS_FEAT_DIRSYNC; - if (fc->sb_flags & SB_SYNCHRONOUS) - mp->m_features |= XFS_FEAT_WSYNC; - fc->s_fs_info = mp; fc->ops = &xfs_context_ops; From 805c74eac8cb306dc69b87b6b066ab4da77ceaf1 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 17 Jan 2024 08:29:42 -0600 Subject: [PATCH 034/347] gpiolib: acpi: Ignore touchpad wakeup on GPD G1619-04 Spurious wakeups are reported on the GPD G1619-04 which can be absolved by programming the GPIO to ignore wakeups. Cc: stable@vger.kernel.org Reported-and-tested-by: George Melikov Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3073 Signed-off-by: Mario Limonciello Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-acpi.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 88066826d8e5..cd3e9657cc36 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1651,6 +1651,20 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { .ignore_interrupt = "INT33FC:00@3", }, }, + { + /* + * Spurious wakeups from TP_ATTN# pin + * Found in BIOS 0.35 + * https://gitlab.freedesktop.org/drm/amd/-/issues/3073 + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_MATCH(DMI_PRODUCT_NAME, "G1619-04"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "PNP0C50:00@8", + }, + }, {} /* Terminating entry */ }; From 30cf36bb0408a163eb3d58ea6b883c612c029286 Mon Sep 17 00:00:00 2001 From: "Wachowski, Karol" Date: Mon, 15 Jan 2024 14:44:26 +0100 Subject: [PATCH 035/347] accel/ivpu: Dump MMU events in case of VPU boot timeout Add ivpu_mmu_evtq_dump() function that dumps existing MMU events from MMU event queue. Call this function if VPU boot failed. Previously MMU events were only checked in interrupt handler, but if VPU failed to boot due to MMU faults, those faults were missed because of interrupts not yet being enabled. This will allow checking potential fault reason of VPU not booting. Signed-off-by: Wachowski, Karol Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-2-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 1 + drivers/accel/ivpu/ivpu_mmu.c | 8 ++++++++ drivers/accel/ivpu/ivpu_mmu.h | 1 + 3 files changed, 10 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 64927682161b..0c3180411b0e 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -369,6 +369,7 @@ int ivpu_boot(struct ivpu_device *vdev) ret = ivpu_wait_for_ready(vdev); if (ret) { ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret); + ivpu_mmu_evtq_dump(vdev); return ret; } diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 2228c44b115f..92ef651098d8 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -905,6 +905,14 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) ivpu_pm_schedule_recovery(vdev); } +void ivpu_mmu_evtq_dump(struct ivpu_device *vdev) +{ + u32 *event; + + while ((event = ivpu_mmu_get_event(vdev)) != NULL) + ivpu_mmu_dump_event(vdev, event); +} + void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev) { u32 gerror_val, gerrorn_val, active; diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h index cb551126806b..6fa35c240710 100644 --- a/drivers/accel/ivpu/ivpu_mmu.h +++ b/drivers/accel/ivpu/ivpu_mmu.h @@ -46,5 +46,6 @@ int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid); void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev); void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev); +void ivpu_mmu_evtq_dump(struct ivpu_device *vdev); #endif /* __IVPU_MMU_H__ */ From 929acfb9c53986d5ba37cfb9e1172ad79735f8eb Mon Sep 17 00:00:00 2001 From: "Wachowski, Karol" Date: Mon, 15 Jan 2024 14:44:27 +0100 Subject: [PATCH 036/347] accel/ivpu: Call diagnose failure in ivpu_mmu_cmdq_sync() Check for possible failure reasons in the buttress. Some errors (like external abort) should have corresponding buttress errors registers set indicating the real reason of failure. Signed-off-by: Wachowski, Karol Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-3-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_mmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 92ef651098d8..1f813625aab3 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -7,6 +7,7 @@ #include #include "ivpu_drv.h" +#include "ivpu_hw.h" #include "ivpu_hw_reg_io.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" @@ -518,6 +519,7 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev) ivpu_err(vdev, "Timed out waiting for MMU consumer: %d, error: %s\n", ret, ivpu_mmu_cmdq_err_to_str(err)); + ivpu_hw_diagnose_failure(vdev); } return ret; From 8047d36fe563bf7ee7b28a284a0892506a6000a9 Mon Sep 17 00:00:00 2001 From: "Wachowski, Karol" Date: Mon, 15 Jan 2024 14:44:28 +0100 Subject: [PATCH 037/347] accel/ivpu: Add debug prints for MMU map/unmap operations It is common need to be able to see IOVA/physical to VPU addresses mappings. Especially when debugging different kind of memory related issues. Lack of such logs forces user to modify and recompile KMD manually. This commit adds those logs under MMU debug mask which can be turned on dynamically with module param during KMD load. Signed-off-by: Wachowski, Karol Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-4-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.h | 1 + drivers/accel/ivpu/ivpu_mmu_context.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index ebc4b84f27b2..9b6e336626e3 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -56,6 +56,7 @@ #define IVPU_DBG_JSM BIT(10) #define IVPU_DBG_KREF BIT(11) #define IVPU_DBG_RPM BIT(12) +#define IVPU_DBG_MMU_MAP BIT(13) #define ivpu_err(vdev, fmt, ...) \ drm_err(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index 12a8c09d4547..fe6161299236 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -355,6 +355,9 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset; size_t size = sg_dma_len(sg) + sg->offset; + ivpu_dbg(vdev, MMU_MAP, "Map ctx: %u dma_addr: 0x%llx vpu_addr: 0x%llx size: %lu\n", + ctx->id, dma_addr, vpu_addr, size); + ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot); if (ret) { ivpu_err(vdev, "Failed to map context pages\n"); @@ -366,6 +369,7 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, /* Ensure page table modifications are flushed from wc buffers to memory */ wmb(); + mutex_unlock(&ctx->lock); ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); @@ -388,14 +392,19 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct mutex_lock(&ctx->lock); for_each_sgtable_dma_sg(sgt, sg, i) { + dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset; size_t size = sg_dma_len(sg) + sg->offset; + ivpu_dbg(vdev, MMU_MAP, "Unmap ctx: %u dma_addr: 0x%llx vpu_addr: 0x%llx size: %lu\n", + ctx->id, dma_addr, vpu_addr, size); + ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size); vpu_addr += size; } /* Ensure page table modifications are flushed from wc buffers to memory */ wmb(); + mutex_unlock(&ctx->lock); ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); From 2a20b857dd654595d332f2521d80bd67b03536a0 Mon Sep 17 00:00:00 2001 From: "Wachowski, Karol" Date: Mon, 15 Jan 2024 14:44:29 +0100 Subject: [PATCH 038/347] accel/ivpu: Add diagnostic messages when VPU fails to boot or suspend Make boot/suspend failure debugging easier by dumping FW logs and error registers. Signed-off-by: Wachowski, Karol Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-5-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 5 +++-- drivers/accel/ivpu/ivpu_pm.c | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 0c3180411b0e..ec66c2c39877 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -17,6 +17,7 @@ #include "ivpu_debugfs.h" #include "ivpu_drv.h" #include "ivpu_fw.h" +#include "ivpu_fw_log.h" #include "ivpu_gem.h" #include "ivpu_hw.h" #include "ivpu_ipc.h" @@ -340,8 +341,6 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev) if (!ret) ivpu_dbg(vdev, PM, "VPU ready message received successfully\n"); - else - ivpu_hw_diagnose_failure(vdev); return ret; } @@ -369,7 +368,9 @@ int ivpu_boot(struct ivpu_device *vdev) ret = ivpu_wait_for_ready(vdev); if (ret) { ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret); + ivpu_hw_diagnose_failure(vdev); ivpu_mmu_evtq_dump(vdev); + ivpu_fw_log_dump(vdev); return ret; } diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 0af8864cb3b5..8407f1d8c99c 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -13,6 +13,7 @@ #include "ivpu_drv.h" #include "ivpu_hw.h" #include "ivpu_fw.h" +#include "ivpu_fw_log.h" #include "ivpu_ipc.h" #include "ivpu_job.h" #include "ivpu_jsm_msg.h" @@ -247,7 +248,8 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev) ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret); if (!hw_is_idle) { - ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n"); + ivpu_err(vdev, "VPU failed to enter idle, force suspended.\n"); + ivpu_fw_log_dump(vdev); ivpu_pm_prepare_cold_boot(vdev); } else { ivpu_pm_prepare_warm_boot(vdev); From 7f66319927a8ced3be715eb7616a890b9bf0348b Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 15 Jan 2024 14:44:30 +0100 Subject: [PATCH 039/347] accel/ivpu: Fix for missing lock around drm_gem_shmem_vmap() drm_gem_shmem_vmap/vunmap requires dma resv lock to be held. This was missed during conversion to shmem helper. Fixes: 8d88e4cdce4f ("accel/ivpu: Use GEM shmem helper for all buffers") Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-6-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_gem.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 1dda4f38ea25..6890d33cf352 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -361,7 +361,9 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla if (ret) goto err_put; + dma_resv_lock(bo->base.base.resv, NULL); ret = drm_gem_shmem_vmap(&bo->base, &map); + dma_resv_unlock(bo->base.base.resv); if (ret) goto err_put; @@ -376,7 +378,10 @@ void ivpu_bo_free_internal(struct ivpu_bo *bo) { struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr); + dma_resv_lock(bo->base.base.resv, NULL); drm_gem_shmem_vunmap(&bo->base, &map); + dma_resv_unlock(bo->base.base.resv); + drm_gem_object_put(&bo->base.base); } From a8c099d5d0e4b0400cfddfd95b881c8bd9349a88 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 15 Jan 2024 14:44:31 +0100 Subject: [PATCH 040/347] accel/ivpu: Free buffer sgt on unbind Call dma_unmap() on all buffers before the VPU is unbinded to avoid "device driver has pending DMA allocations while released from device" warning when DMA-API debug is enabled. Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-7-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_gem.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 6890d33cf352..4de454bfbf91 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -112,8 +112,6 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) ivpu_dbg_bo(vdev, bo, "unbind"); - /* TODO: dma_unmap */ - if (bo->mmu_mapped) { drm_WARN_ON(&vdev->drm, !bo->ctx); drm_WARN_ON(&vdev->drm, !bo->vpu_addr); @@ -127,6 +125,18 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) bo->vpu_addr = 0; bo->ctx = NULL; } + + if (bo->base.base.import_attach) + return; + + dma_resv_lock(bo->base.base.resv, NULL); + if (bo->base.sgt) { + dma_unmap_sgtable(vdev->drm.dev, bo->base.sgt, DMA_BIDIRECTIONAL, 0); + sg_free_table(bo->base.sgt); + kfree(bo->base.sgt); + bo->base.sgt = NULL; + } + dma_resv_unlock(bo->base.base.resv); } static void ivpu_bo_unbind(struct ivpu_bo *bo) From b7a0e75632eb6568c82de9108bd29e130dd082d9 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 15 Jan 2024 14:44:32 +0100 Subject: [PATCH 041/347] accel/ivpu: Disable buffer sharing among VPU contexts This was not supported properly. A buffer was imported to another VPU context as a separate buffer object with duplicated sgt. Both exported and imported buffers could be DMA mapped causing a double mapping on the same device. Buffers imported from another VPU context will now just increase reference count, leaving only a single sgt, fixing the problem above. Buffers still can't be shared among VPU contexts because each has its own MMU mapping and ivpu_bo only supports single MMU mappings. The solution would be to use a mapping list as in panfrost or etnaviv drivers and it will be implemented in future if required. Signed-off-by: Jacek Lawrynowicz Reviewed-by: Andrzej Kacprowski Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-8-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_gem.c | 44 +++++------------------------------ 1 file changed, 6 insertions(+), 38 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 4de454bfbf91..95e731e13941 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -222,6 +222,12 @@ static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file) struct ivpu_bo *bo = to_ivpu_bo(obj); struct ivpu_addr_range *range; + if (bo->ctx) { + ivpu_warn(vdev, "Can't add BO to ctx %u: already in ctx %u\n", + file_priv->ctx.id, bo->ctx->id); + return -EALREADY; + } + if (bo->flags & DRM_IVPU_BO_SHAVE_MEM) range = &vdev->hw->ranges.shave; else if (bo->flags & DRM_IVPU_BO_DMA_MEM) @@ -252,47 +258,9 @@ static void ivpu_bo_free(struct drm_gem_object *obj) drm_gem_shmem_free(&bo->base); } -static const struct dma_buf_ops ivpu_bo_dmabuf_ops = { - .cache_sgt_mapping = true, - .attach = drm_gem_map_attach, - .detach = drm_gem_map_detach, - .map_dma_buf = drm_gem_map_dma_buf, - .unmap_dma_buf = drm_gem_unmap_dma_buf, - .release = drm_gem_dmabuf_release, - .mmap = drm_gem_dmabuf_mmap, - .vmap = drm_gem_dmabuf_vmap, - .vunmap = drm_gem_dmabuf_vunmap, -}; - -static struct dma_buf *ivpu_bo_export(struct drm_gem_object *obj, int flags) -{ - struct drm_device *dev = obj->dev; - struct dma_buf_export_info exp_info = { - .exp_name = KBUILD_MODNAME, - .owner = dev->driver->fops->owner, - .ops = &ivpu_bo_dmabuf_ops, - .size = obj->size, - .flags = flags, - .priv = obj, - .resv = obj->resv, - }; - void *sgt; - - /* - * Make sure that pages are allocated and dma-mapped before exporting the bo. - * DMA-mapping is required if the bo will be imported to the same device. - */ - sgt = drm_gem_shmem_get_pages_sgt(to_drm_gem_shmem_obj(obj)); - if (IS_ERR(sgt)) - return sgt; - - return drm_gem_dmabuf_export(dev, &exp_info); -} - static const struct drm_gem_object_funcs ivpu_gem_funcs = { .free = ivpu_bo_free, .open = ivpu_bo_open, - .export = ivpu_bo_export, .print_info = drm_gem_shmem_object_print_info, .pin = drm_gem_shmem_object_pin, .unpin = drm_gem_shmem_object_unpin, From 37dee2a2f4330a030abc5674bcec25ccc4addbcc Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 15 Jan 2024 14:44:33 +0100 Subject: [PATCH 042/347] accel/ivpu: Improve buffer object debug logs Make debug logs more readable and consistent: - don't print handle as it is not always available for all buffers - use hashed ivpu_bo ptr as main buffer identifier - remove unused fields from ivpu_bo_print_info() Signed-off-by: Jacek Lawrynowicz Reviewed-by: Wachowski, Karol Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-9-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_gem.c | 63 ++++++++++++----------------------- drivers/accel/ivpu/ivpu_gem.h | 1 - 2 files changed, 22 insertions(+), 42 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 95e731e13941..16f3035b91c0 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -24,14 +24,11 @@ static const struct drm_gem_object_funcs ivpu_gem_funcs; static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action) { - if (bo->ctx) - ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u ctx %d vpu_addr 0x%llx mmu_mapped %d\n", - action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt, - bo->handle, bo->ctx->id, bo->vpu_addr, bo->mmu_mapped); - else - ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u (not added to context)\n", - action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt, - bo->handle); + ivpu_dbg(vdev, BO, + "%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n", + action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx ? bo->ctx->id : 0, + (bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc, + (bool)bo->base.base.import_attach); } /* @@ -49,12 +46,7 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo) mutex_lock(&bo->lock); ivpu_dbg_bo(vdev, bo, "pin"); - - if (!bo->ctx) { - ivpu_err(vdev, "vpu_addr not allocated for BO %d\n", bo->handle); - ret = -EINVAL; - goto unlock; - } + drm_WARN_ON(&vdev->drm, !bo->ctx); if (!bo->mmu_mapped) { struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(&bo->base); @@ -108,9 +100,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) { struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - lockdep_assert_held(&bo->lock); - - ivpu_dbg_bo(vdev, bo, "unbind"); + lockdep_assert(lockdep_is_held(&bo->lock) || !kref_read(&bo->base.base.refcount)); if (bo->mmu_mapped) { drm_WARN_ON(&vdev->drm, !bo->ctx); @@ -122,7 +112,6 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) if (bo->ctx) { ivpu_mmu_context_remove_node(bo->ctx, &bo->mm_node); - bo->vpu_addr = 0; bo->ctx = NULL; } @@ -156,8 +145,10 @@ void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_m mutex_lock(&vdev->bo_list_lock); list_for_each_entry(bo, &vdev->bo_list, bo_list_node) { mutex_lock(&bo->lock); - if (bo->ctx == ctx) + if (bo->ctx == ctx) { + ivpu_dbg_bo(vdev, bo, "unbind"); ivpu_bo_unbind_locked(bo); + } mutex_unlock(&bo->lock); } mutex_unlock(&vdev->bo_list_lock); @@ -209,9 +200,6 @@ ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags) list_add_tail(&bo->bo_list_node, &vdev->bo_list); mutex_unlock(&vdev->bo_list_lock); - ivpu_dbg(vdev, BO, "create: vpu_addr 0x%llx size %zu flags 0x%x\n", - bo->vpu_addr, bo->base.base.size, flags); - return bo; } @@ -243,14 +231,14 @@ static void ivpu_bo_free(struct drm_gem_object *obj) struct ivpu_device *vdev = to_ivpu_device(obj->dev); struct ivpu_bo *bo = to_ivpu_bo(obj); + ivpu_dbg_bo(vdev, bo, "free"); + mutex_lock(&vdev->bo_list_lock); list_del(&bo->bo_list_node); mutex_unlock(&vdev->bo_list_lock); drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ)); - ivpu_dbg_bo(vdev, bo, "free"); - ivpu_bo_unbind(bo); mutex_destroy(&bo->lock); @@ -293,11 +281,9 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi return PTR_ERR(bo); } - ret = drm_gem_handle_create(file, &bo->base.base, &bo->handle); - if (!ret) { + ret = drm_gem_handle_create(file, &bo->base.base, &args->handle); + if (!ret) args->vpu_addr = bo->vpu_addr; - args->handle = bo->handle; - } drm_gem_object_put(&bo->base.base); @@ -415,19 +401,11 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) { - unsigned long dma_refcount = 0; - mutex_lock(&bo->lock); - if (bo->base.base.dma_buf && bo->base.base.dma_buf->file) - dma_refcount = atomic_long_read(&bo->base.base.dma_buf->file->f_count); - - drm_printf(p, "%-3u %-6d 0x%-12llx %-10lu 0x%-8x %-4u %-8lu", - bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.base.size, - bo->flags, kref_read(&bo->base.base.refcount), dma_refcount); - - if (bo->base.base.import_attach) - drm_printf(p, " imported"); + drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u", + bo, bo->ctx->id, bo->vpu_addr, bo->base.base.size, + bo->flags, kref_read(&bo->base.base.refcount)); if (bo->base.pages) drm_printf(p, " has_pages"); @@ -435,6 +413,9 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) if (bo->mmu_mapped) drm_printf(p, " mmu_mapped"); + if (bo->base.base.import_attach) + drm_printf(p, " imported"); + drm_printf(p, "\n"); mutex_unlock(&bo->lock); @@ -445,8 +426,8 @@ void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p) struct ivpu_device *vdev = to_ivpu_device(dev); struct ivpu_bo *bo; - drm_printf(p, "%-3s %-6s %-14s %-10s %-10s %-4s %-8s %s\n", - "ctx", "handle", "vpu_addr", "size", "flags", "refs", "dma_refs", "attribs"); + drm_printf(p, "%-9s %-3s %-14s %-10s %-10s %-4s %s\n", + "bo", "ctx", "vpu_addr", "size", "flags", "refs", "attribs"); mutex_lock(&vdev->bo_list_lock); list_for_each_entry(bo, &vdev->bo_list, bo_list_node) diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index d75cad0d3c74..5cb1dda3e58e 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -19,7 +19,6 @@ struct ivpu_bo { struct mutex lock; /* Protects: ctx, mmu_mapped, vpu_addr */ u64 vpu_addr; - u32 handle; u32 flags; u32 job_status; /* Valid only for command buffer */ bool mmu_mapped; From b246271d257b4b0573e88f443ed8091f8b044895 Mon Sep 17 00:00:00 2001 From: "Wachowski, Karol" Date: Mon, 15 Jan 2024 14:44:34 +0100 Subject: [PATCH 043/347] accel/ivpu: Deprecate DRM_IVPU_PARAM_CONTEXT_PRIORITY param DRM_IVPU_PARAM_CONTEXT_PRIORITY has been deprecated because it has been replaced with DRM_IVPU_JOB_PRIORITY levels set with submit IOCTL and was unused anyway. Signed-off-by: Wachowski, Karol Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-10-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 11 ----------- drivers/accel/ivpu/ivpu_drv.h | 1 - drivers/accel/ivpu/ivpu_job.c | 3 +++ include/uapi/drm/ivpu_accel.h | 25 ++++++++++++++++++++----- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index ec66c2c39877..546c0899bb9e 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -177,9 +177,6 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f case DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS: args->value = vdev->hw->ranges.user.start; break; - case DRM_IVPU_PARAM_CONTEXT_PRIORITY: - args->value = file_priv->priority; - break; case DRM_IVPU_PARAM_CONTEXT_ID: args->value = file_priv->ctx.id; break; @@ -219,17 +216,10 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f static int ivpu_set_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct ivpu_file_priv *file_priv = file->driver_priv; struct drm_ivpu_param *args = data; int ret = 0; switch (args->param) { - case DRM_IVPU_PARAM_CONTEXT_PRIORITY: - if (args->value <= DRM_IVPU_CONTEXT_PRIORITY_REALTIME) - file_priv->priority = args->value; - else - ret = -EINVAL; - break; default: ret = -EINVAL; } @@ -258,7 +248,6 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) } file_priv->vdev = vdev; - file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL; kref_init(&file_priv->ref); mutex_init(&file_priv->lock); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 9b6e336626e3..7a6bc1918780 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -146,7 +146,6 @@ struct ivpu_file_priv { struct mutex lock; /* Protects cmdq */ struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES]; struct ivpu_mmu_context ctx; - u32 priority; bool has_mmu_faults; }; diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 7206cf9cdb4a..82e40bb4803c 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -488,6 +488,9 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (params->engine > DRM_IVPU_ENGINE_COPY) return -EINVAL; + if (params->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) + return -EINVAL; + if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT) return -EINVAL; diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index de1944e42c65..63c49318a863 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -53,7 +53,7 @@ extern "C" { #define DRM_IVPU_PARAM_CORE_CLOCK_RATE 3 #define DRM_IVPU_PARAM_NUM_CONTEXTS 4 #define DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS 5 -#define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 +#define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 /* Deprecated */ #define DRM_IVPU_PARAM_CONTEXT_ID 7 #define DRM_IVPU_PARAM_FW_API_VERSION 8 #define DRM_IVPU_PARAM_ENGINE_HEARTBEAT 9 @@ -64,11 +64,18 @@ extern "C" { #define DRM_IVPU_PLATFORM_TYPE_SILICON 0 +/* Deprecated, use DRM_IVPU_JOB_PRIORITY */ #define DRM_IVPU_CONTEXT_PRIORITY_IDLE 0 #define DRM_IVPU_CONTEXT_PRIORITY_NORMAL 1 #define DRM_IVPU_CONTEXT_PRIORITY_FOCUS 2 #define DRM_IVPU_CONTEXT_PRIORITY_REALTIME 3 +#define DRM_IVPU_JOB_PRIORITY_DEFAULT 0 +#define DRM_IVPU_JOB_PRIORITY_IDLE 1 +#define DRM_IVPU_JOB_PRIORITY_NORMAL 2 +#define DRM_IVPU_JOB_PRIORITY_FOCUS 3 +#define DRM_IVPU_JOB_PRIORITY_REALTIME 4 + /** * DRM_IVPU_CAP_METRIC_STREAMER * @@ -112,10 +119,6 @@ struct drm_ivpu_param { * %DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS: * Lowest VPU virtual address available in the current context (read-only) * - * %DRM_IVPU_PARAM_CONTEXT_PRIORITY: - * Value of current context scheduling priority (read-write). - * See DRM_IVPU_CONTEXT_PRIORITY_* for possible values. - * * %DRM_IVPU_PARAM_CONTEXT_ID: * Current context ID, always greater than 0 (read-only) * @@ -286,6 +289,18 @@ struct drm_ivpu_submit { * to be executed. The offset has to be 8-byte aligned. */ __u32 commands_offset; + + /** + * @priority: + * + * Priority to be set for related job command queue, can be one of the following: + * %DRM_IVPU_JOB_PRIORITY_DEFAULT + * %DRM_IVPU_JOB_PRIORITY_IDLE + * %DRM_IVPU_JOB_PRIORITY_NORMAL + * %DRM_IVPU_JOB_PRIORITY_FOCUS + * %DRM_IVPU_JOB_PRIORITY_REALTIME + */ + __u32 priority; }; /* drm_ivpu_bo_wait job status codes */ From 0dd20a48a541ea5485b8bfc0e0bdbc6ae29b389f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 16 Jan 2024 20:30:59 -0800 Subject: [PATCH 044/347] MIPS: Cobalt: Fix missing prototypes Fix missing prototypes warnings for cobalt_machine_halt() and cobalt_machine_restart() by moving their prototypes to cobalt.h which is included by setup.c. Signed-off-by: Florian Fainelli Signed-off-by: Thomas Bogendoerfer --- arch/mips/cobalt/setup.c | 3 --- arch/mips/include/asm/mach-cobalt/cobalt.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/cobalt/setup.c b/arch/mips/cobalt/setup.c index 2e099d55a564..9a266bf78339 100644 --- a/arch/mips/cobalt/setup.c +++ b/arch/mips/cobalt/setup.c @@ -23,9 +23,6 @@ #include -extern void cobalt_machine_restart(char *command); -extern void cobalt_machine_halt(void); - const char *get_system_type(void) { switch (cobalt_board_id) { diff --git a/arch/mips/include/asm/mach-cobalt/cobalt.h b/arch/mips/include/asm/mach-cobalt/cobalt.h index 5b9fce73f11d..97f9d5e9446d 100644 --- a/arch/mips/include/asm/mach-cobalt/cobalt.h +++ b/arch/mips/include/asm/mach-cobalt/cobalt.h @@ -19,4 +19,7 @@ extern int cobalt_board_id; #define COBALT_BRD_ID_QUBE2 0x5 #define COBALT_BRD_ID_RAQ2 0x6 +void cobalt_machine_halt(void); +void cobalt_machine_restart(char *command); + #endif /* __ASM_COBALT_H */ From feab19143a1c8c5efdf1934dd0b1defb29bb5026 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 17 Jan 2024 15:49:44 -0800 Subject: [PATCH 045/347] MIPS: Alchemy: Fix missing prototypes We have a number of missing prototypes warnings for board_setup(), alchemy_set_lpj() and prom_init_cmdline(), prom_getenv() and prom_get_ethernet_addr(). Fix those by providing definitions for the first two functions in au1000.h which is included everywhere relevant, and including prom.h for the last three. Signed-off-by: Florian Fainelli Signed-off-by: Thomas Bogendoerfer --- arch/mips/alchemy/common/prom.c | 1 + arch/mips/alchemy/common/setup.c | 4 +--- arch/mips/include/asm/mach-au1x00/au1000.h | 3 +++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/mips/alchemy/common/prom.c b/arch/mips/alchemy/common/prom.c index b13d8adf3be4..20d30f6265cd 100644 --- a/arch/mips/alchemy/common/prom.c +++ b/arch/mips/alchemy/common/prom.c @@ -40,6 +40,7 @@ #include #include +#include int prom_argc; char **prom_argv; diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c index 2388d68786f4..a7a6d31a7a41 100644 --- a/arch/mips/alchemy/common/setup.c +++ b/arch/mips/alchemy/common/setup.c @@ -30,13 +30,11 @@ #include #include /* for dma_default_coherent */ +#include #include #include -extern void __init board_setup(void); -extern void __init alchemy_set_lpj(void); - static bool alchemy_dma_coherent(void) { switch (alchemy_get_cputype()) { diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h index a7eec3364a64..41546777902b 100644 --- a/arch/mips/include/asm/mach-au1x00/au1000.h +++ b/arch/mips/include/asm/mach-au1x00/au1000.h @@ -597,6 +597,9 @@ #include +void alchemy_set_lpj(void); +void board_setup(void); + /* helpers to access the SYS_* registers */ static inline unsigned long alchemy_rdsys(int regofs) { From 437a310b22244d4e0b78665c3042e5d1c0f45306 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 20 Dec 2023 17:21:12 +0000 Subject: [PATCH 046/347] firmware: arm_scmi: Check mailbox/SMT channel for consistency On reception of a completion interrupt the shared memory area is accessed to retrieve the message header at first and then, if the message sequence number identifies a transaction which is still pending, the related payload is fetched too. When an SCMI command times out the channel ownership remains with the platform until eventually a late reply is received and, as a consequence, any further transmission attempt remains pending, waiting for the channel to be relinquished by the platform. Once that late reply is received the channel ownership is given back to the agent and any pending request is then allowed to proceed and overwrite the SMT area of the just delivered late reply; then the wait for the reply to the new request starts. It has been observed that the spurious IRQ related to the late reply can be wrongly associated with the freshly enqueued request: when that happens the SCMI stack in-flight lookup procedure is fooled by the fact that the message header now present in the SMT area is related to the new pending transaction, even though the real reply has still to arrive. This race-condition on the A2P channel can be detected by looking at the channel status bits: a genuine reply from the platform will have set the channel free bit before triggering the completion IRQ. Add a consistency check to validate such condition in the A2P ISR. Reported-by: Xinglong Yang Closes: https://lore.kernel.org/all/PUZPR06MB54981E6FA00D82BFDBB864FBF08DA@PUZPR06MB5498.apcprd06.prod.outlook.com/ Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type") Cc: stable@vger.kernel.org # 5.15+ Signed-off-by: Cristian Marussi Tested-by: Xinglong Yang Link: https://lore.kernel.org/r/20231220172112.763539-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/common.h | 1 + drivers/firmware/arm_scmi/mailbox.c | 14 ++++++++++++++ drivers/firmware/arm_scmi/shmem.c | 6 ++++++ 3 files changed, 21 insertions(+) diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index c46dc5215af7..00b165d1f502 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -314,6 +314,7 @@ void shmem_fetch_notification(struct scmi_shared_mem __iomem *shmem, void shmem_clear_channel(struct scmi_shared_mem __iomem *shmem); bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem, struct scmi_xfer *xfer); +bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem); /* declarations for message passing transports */ struct scmi_msg_payld; diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c index 19246ed1f01f..b8d470417e8f 100644 --- a/drivers/firmware/arm_scmi/mailbox.c +++ b/drivers/firmware/arm_scmi/mailbox.c @@ -45,6 +45,20 @@ static void rx_callback(struct mbox_client *cl, void *m) { struct scmi_mailbox *smbox = client_to_scmi_mailbox(cl); + /* + * An A2P IRQ is NOT valid when received while the platform still has + * the ownership of the channel, because the platform at first releases + * the SMT channel and then sends the completion interrupt. + * + * This addresses a possible race condition in which a spurious IRQ from + * a previous timed-out reply which arrived late could be wrongly + * associated with the next pending transaction. + */ + if (cl->knows_txdone && !shmem_channel_free(smbox->shmem)) { + dev_warn(smbox->cinfo->dev, "Ignoring spurious A2P IRQ !\n"); + return; + } + scmi_rx_callback(smbox->cinfo, shmem_read_header(smbox->shmem), NULL); } diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c index 87b4f4d35f06..517d52fb3bcb 100644 --- a/drivers/firmware/arm_scmi/shmem.c +++ b/drivers/firmware/arm_scmi/shmem.c @@ -122,3 +122,9 @@ bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem, (SCMI_SHMEM_CHAN_STAT_CHANNEL_ERROR | SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE); } + +bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem) +{ + return (ioread32(&shmem->channel_status) & + SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE); +} From 0726fcc8d4af75441b38aaa082f820e63b3a8748 Mon Sep 17 00:00:00 2001 From: Tanzir Hasan Date: Tue, 26 Dec 2023 22:52:03 +0000 Subject: [PATCH 047/347] firmware: arm_scmi: Replace asm-generic/bug.h with linux/bug.h linux/bug.h includes asm-generic/bug.h already and hence replacing asm-generic/bug.h with linux/bug.h will not regress any build. Also, it is always better to avoid header file inclusion from asm-generic if possible. Suggested-by: Al Viro Signed-off-by: Tanzir Hasan Link: https://lore.kernel.org/r/20231226-shmem-v1-1-ea15ce81d8ba@google.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c index 517d52fb3bcb..8bf495bcad09 100644 --- a/drivers/firmware/arm_scmi/shmem.c +++ b/drivers/firmware/arm_scmi/shmem.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include "common.h" From e8ef4bbe39b9576a73f104f6af743fb9c7b624ba Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 18:50:49 +0000 Subject: [PATCH 048/347] firmware: arm_scmi: Use xa_insert() to store opps When storing opps by level or index use xa_insert() instead of xa_store() and add error-checking to spot bad duplicates indexes possibly wrongly provided by the platform firmware. Fixes: 31c7c1397a33 ("firmware: arm_scmi: Add v3.2 perf level indexing mode support") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108185050.1628687-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/perf.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 8ea2a7b3d35d..211e8e0aef2c 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -350,8 +350,8 @@ process_response_opp(struct scmi_opp *opp, unsigned int loop_idx, } static inline void -process_response_opp_v4(struct perf_dom_info *dom, struct scmi_opp *opp, - unsigned int loop_idx, +process_response_opp_v4(struct device *dev, struct perf_dom_info *dom, + struct scmi_opp *opp, unsigned int loop_idx, const struct scmi_msg_resp_perf_describe_levels_v4 *r) { opp->perf = le32_to_cpu(r->opp[loop_idx].perf_val); @@ -362,10 +362,23 @@ process_response_opp_v4(struct perf_dom_info *dom, struct scmi_opp *opp, /* Note that PERF v4 reports always five 32-bit words */ opp->indicative_freq = le32_to_cpu(r->opp[loop_idx].indicative_freq); if (dom->level_indexing_mode) { + int ret; + opp->level_index = le32_to_cpu(r->opp[loop_idx].level_index); - xa_store(&dom->opps_by_idx, opp->level_index, opp, GFP_KERNEL); - xa_store(&dom->opps_by_lvl, opp->perf, opp, GFP_KERNEL); + ret = xa_insert(&dom->opps_by_idx, opp->level_index, opp, + GFP_KERNEL); + if (ret) + dev_warn(dev, + "Failed to add opps_by_idx at %d - ret:%d\n", + opp->level_index, ret); + + ret = xa_insert(&dom->opps_by_lvl, opp->perf, opp, GFP_KERNEL); + if (ret) + dev_warn(dev, + "Failed to add opps_by_lvl at %d - ret:%d\n", + opp->perf, ret); + hash_add(dom->opps_by_freq, &opp->hash, opp->indicative_freq); } } @@ -382,7 +395,7 @@ iter_perf_levels_process_response(const struct scmi_protocol_handle *ph, if (PROTOCOL_REV_MAJOR(p->version) <= 0x3) process_response_opp(opp, st->loop_idx, response); else - process_response_opp_v4(p->perf_dom, opp, st->loop_idx, + process_response_opp_v4(ph->dev, p->perf_dom, opp, st->loop_idx, response); p->perf_dom->opp_count++; From b5dc0ffd36560dbadaed9a3d9fd7838055d62d74 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 18:50:50 +0000 Subject: [PATCH 049/347] firmware: arm_scmi: Use xa_insert() when saving raw queues Use xa_insert() when saving per-channel raw queues to better check for duplicates. Fixes: 7860701d1e6e ("firmware: arm_scmi: Add per-channel raw injection support") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108185050.1628687-2-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/raw_mode.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c index 0493aa3c12bf..350573518503 100644 --- a/drivers/firmware/arm_scmi/raw_mode.c +++ b/drivers/firmware/arm_scmi/raw_mode.c @@ -1111,7 +1111,6 @@ static int scmi_raw_mode_setup(struct scmi_raw_mode_info *raw, int i; for (i = 0; i < num_chans; i++) { - void *xret; struct scmi_raw_queue *q; q = scmi_raw_queue_init(raw); @@ -1120,13 +1119,12 @@ static int scmi_raw_mode_setup(struct scmi_raw_mode_info *raw, goto err_xa; } - xret = xa_store(&raw->chans_q, channels[i], q, + ret = xa_insert(&raw->chans_q, channels[i], q, GFP_KERNEL); - if (xa_err(xret)) { + if (ret) { dev_err(dev, "Fail to allocate Raw queue 0x%02X\n", channels[i]); - ret = xa_err(xret); goto err_xa; } } @@ -1322,6 +1320,12 @@ void scmi_raw_message_report(void *r, struct scmi_xfer *xfer, dev = raw->handle->dev; q = scmi_raw_queue_select(raw, idx, SCMI_XFER_IS_CHAN_SET(xfer) ? chan_id : 0); + if (!q) { + dev_warn(dev, + "RAW[%d] - NO queue for chan 0x%X. Dropping report.\n", + idx, chan_id); + return; + } /* * Grab the msg_q_lock upfront to avoid a possible race between From 27600c96e2ffa6c1b2cb378ddc75c6620c628d04 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 9 Jan 2024 15:01:06 +0000 Subject: [PATCH 050/347] firmware: arm_scmi: Fix the clock protocol version for v3.2 The clock protocol version as per the SCMI v3.2 specification is 0x30000. Enable the v3.0 clock protocol features only when clock protocol version equals 0x30000. The previous beta version of the spec had this value set to 0x20001 and th same value trickled down from the initial development. The version update were missed in the driver. Fixes: e49e314a2cf7 ("firmware: arm_scmi: Add clock v3.2 CONFIG_SET support") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240109150106.2066739-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/clock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index c0644558042a..d6357513163c 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -954,8 +954,7 @@ static int scmi_clock_protocol_init(const struct scmi_protocol_handle *ph) scmi_clock_describe_rates_get(ph, clkid, clk); } - if (PROTOCOL_REV_MAJOR(version) >= 0x2 && - PROTOCOL_REV_MINOR(version) >= 0x1) { + if (PROTOCOL_REV_MAJOR(version) >= 0x3) { cinfo->clock_config_set = scmi_clock_config_set_v2; cinfo->clock_config_get = scmi_clock_config_get_v2; } else { From 6bd1b3fede83d8ba5314886062a9bfdada5102a9 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 9 Jan 2024 18:17:16 +0000 Subject: [PATCH 051/347] firmware: arm_scmi: Fix the clock protocol supported version Rollback currently supported SCMI clock protocol version to v2.0 since some of the mandatory v3.0 features are indeed still not supported yet. Fixes: b5efc28a754d ("firmware: arm_scmi: Add protocol versioning checks") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240109181716.2338636-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index d6357513163c..e2050adbf85c 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -13,7 +13,7 @@ #include "notify.h" /* Updated only after ALL the mandatory features for that version are merged */ -#define SCMI_PROTOCOL_SUPPORTED_VERSION 0x20001 +#define SCMI_PROTOCOL_SUPPORTED_VERSION 0x20000 enum scmi_clock_protocol_cmd { CLOCK_ATTRIBUTES = 0x3, From 59b2e242b13192e50bf47df3780bf8a7e2260e98 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 12:34:11 +0000 Subject: [PATCH 052/347] firmware: arm_ffa: Add missing rwlock_init() in ffa_setup_partitions() Add the missing rwlock initialization for the individual FF-A partition information in ffa_setup_partitions(). Fixes: 0184450b8b1e ("firmware: arm_ffa: Add schedule receiver callback mechanism") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108-ffa_fixes_6-8-v1-1-75bf7035bc50@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 6146b2927d5c..ed1d6a24934e 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1226,6 +1226,7 @@ static void ffa_setup_partitions(void) ffa_device_unregister(ffa_dev); continue; } + rwlock_init(&info->rw_lock); xa_store(&drv_info->partition_info, tpbuf->id, info, GFP_KERNEL); } drv_info->partition_count = count; From 5ff30ade16cd9efc2466d3ea22bbaf370772941a Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 12:34:12 +0000 Subject: [PATCH 053/347] firmware: arm_ffa: Add missing rwlock_init() for the driver partition Add the missing rwlock initialization for the FF-A partition associated the driver in ffa_setup_partitions(). It will the primary scheduler partition in the host or the VM partition in the virtualised environment. IOW, it corresponds to the partition with VM ID == drv_info->vm_id. Fixes: 1b6bf41b7a65 ("firmware: arm_ffa: Add notification handling mechanism") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108-ffa_fixes_6-8-v1-2-75bf7035bc50@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index ed1d6a24934e..8df92c9521f4 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1237,6 +1237,7 @@ static void ffa_setup_partitions(void) info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return; + rwlock_init(&info->rw_lock); xa_store(&drv_info->partition_info, drv_info->vm_id, info, GFP_KERNEL); drv_info->partition_count++; } From c00d9738fd5fce15dc5494d05b7599dce23e8146 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 12:34:13 +0000 Subject: [PATCH 054/347] firmware: arm_ffa: Check xa_load() return value Add a check to verify the result of xa_load() during the partition lookups done while registering/unregistering the scheduler receiver interrupt callbacks and while executing the main scheduler receiver interrupt callback handler. Fixes: 0184450b8b1e ("firmware: arm_ffa: Add schedule receiver callback mechanism") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108-ffa_fixes_6-8-v1-3-75bf7035bc50@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 8df92c9521f4..0ea1dd6e55c4 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -733,6 +733,11 @@ static void __do_sched_recv_cb(u16 part_id, u16 vcpu, bool is_per_vcpu) void *cb_data; partition = xa_load(&drv_info->partition_info, part_id); + if (!partition) { + pr_err("%s: Invalid partition ID 0x%x\n", __func__, part_id); + return; + } + read_lock(&partition->rw_lock); callback = partition->callback; cb_data = partition->cb_data; @@ -915,6 +920,11 @@ static int ffa_sched_recv_cb_update(u16 part_id, ffa_sched_recv_cb callback, return -EOPNOTSUPP; partition = xa_load(&drv_info->partition_info, part_id); + if (!partition) { + pr_err("%s: Invalid partition ID 0x%x\n", __func__, part_id); + return -EINVAL; + } + write_lock(&partition->rw_lock); cb_valid = !!partition->callback; From ad9d9a107a4308e75ec34890547447c7095b4781 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 12:34:14 +0000 Subject: [PATCH 055/347] firmware: arm_ffa: Simplify ffa_partitions_cleanup() On cleanup iterate the XArrays with xa_for_each() and remove the existent entries with xa_erase(), finally destroy the XArray itself. Remove partition_count field from drv_info since no more used anywhwere. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108-ffa_fixes_6-8-v1-4-75bf7035bc50@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 0ea1dd6e55c4..2426021dbb58 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -107,7 +107,6 @@ struct ffa_drv_info { struct work_struct notif_pcpu_work; struct work_struct irq_work; struct xarray partition_info; - unsigned int partition_count; DECLARE_HASHTABLE(notifier_hash, ilog2(FFA_MAX_NOTIFICATIONS)); struct mutex notify_lock; /* lock to protect notifier hashtable */ }; @@ -1239,7 +1238,6 @@ static void ffa_setup_partitions(void) rwlock_init(&info->rw_lock); xa_store(&drv_info->partition_info, tpbuf->id, info, GFP_KERNEL); } - drv_info->partition_count = count; kfree(pbuf); @@ -1249,29 +1247,18 @@ static void ffa_setup_partitions(void) return; rwlock_init(&info->rw_lock); xa_store(&drv_info->partition_info, drv_info->vm_id, info, GFP_KERNEL); - drv_info->partition_count++; } static void ffa_partitions_cleanup(void) { - struct ffa_dev_part_info **info; - int idx, count = drv_info->partition_count; + struct ffa_dev_part_info *info; + unsigned long idx; - if (!count) - return; + xa_for_each(&drv_info->partition_info, idx, info) { + xa_erase(&drv_info->partition_info, idx); + kfree(info); + } - info = kcalloc(count, sizeof(*info), GFP_KERNEL); - if (!info) - return; - - xa_extract(&drv_info->partition_info, (void **)info, 0, VM_ID_MASK, - count, XA_PRESENT); - - for (idx = 0; idx < count; idx++) - kfree(info[idx]); - kfree(info); - - drv_info->partition_count = 0; xa_destroy(&drv_info->partition_info); } @@ -1547,7 +1534,6 @@ static void __exit ffa_exit(void) ffa_rxtx_unmap(drv_info->vm_id); free_pages_exact(drv_info->tx_buffer, RXTX_BUFFER_SIZE); free_pages_exact(drv_info->rx_buffer, RXTX_BUFFER_SIZE); - xa_destroy(&drv_info->partition_info); kfree(drv_info); arm_ffa_bus_exit(); } From ace760d9c0498fb226269ed34f0e86417d90f91b Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 12:34:15 +0000 Subject: [PATCH 056/347] firmware: arm_ffa: Use xa_insert() and check for result While adding new partitions descriptors to the XArray the outcome of the stores should be checked and, in particular, it has also to be ensured that an existing entry with the same index was not already present, since partitions IDs are expected to be unique. Use xa_insert() instead of xa_store() since it returns -EBUSY when the index is already in use and log an error when that happens. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108-ffa_fixes_6-8-v1-5-75bf7035bc50@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 2426021dbb58..c613b57747cf 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1197,7 +1197,7 @@ void ffa_device_match_uuid(struct ffa_device *ffa_dev, const uuid_t *uuid) static void ffa_setup_partitions(void) { - int count, idx; + int count, idx, ret; uuid_t uuid; struct ffa_device *ffa_dev; struct ffa_dev_part_info *info; @@ -1236,7 +1236,14 @@ static void ffa_setup_partitions(void) continue; } rwlock_init(&info->rw_lock); - xa_store(&drv_info->partition_info, tpbuf->id, info, GFP_KERNEL); + ret = xa_insert(&drv_info->partition_info, tpbuf->id, + info, GFP_KERNEL); + if (ret) { + pr_err("%s: failed to save partition ID 0x%x - ret:%d\n", + __func__, tpbuf->id, ret); + ffa_device_unregister(ffa_dev); + kfree(info); + } } kfree(pbuf); @@ -1246,7 +1253,13 @@ static void ffa_setup_partitions(void) if (!info) return; rwlock_init(&info->rw_lock); - xa_store(&drv_info->partition_info, drv_info->vm_id, info, GFP_KERNEL); + ret = xa_insert(&drv_info->partition_info, drv_info->vm_id, + info, GFP_KERNEL); + if (ret) { + pr_err("%s: failed to save Host partition ID 0x%x - ret:%d. Abort.\n", + __func__, drv_info->vm_id, ret); + kfree(info); + } } static void ffa_partitions_cleanup(void) From 0c565d16b80074e57e3e56240d13fc6cd6ed0334 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 12:34:16 +0000 Subject: [PATCH 057/347] firmware: arm_ffa: Handle partitions setup failures Make ffa_setup_partitions() fail, cleanup and return an error when the Host partition setup fails: in such a case ffa_init() itself will fail. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108-ffa_fixes_6-8-v1-6-75bf7035bc50@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index c613b57747cf..f2556a8e9401 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -112,6 +112,7 @@ struct ffa_drv_info { }; static struct ffa_drv_info *drv_info; +static void ffa_partitions_cleanup(void); /* * The driver must be able to support all the versions from the earliest @@ -1195,7 +1196,7 @@ void ffa_device_match_uuid(struct ffa_device *ffa_dev, const uuid_t *uuid) kfree(pbuf); } -static void ffa_setup_partitions(void) +static int ffa_setup_partitions(void) { int count, idx, ret; uuid_t uuid; @@ -1206,7 +1207,7 @@ static void ffa_setup_partitions(void) count = ffa_partition_probe(&uuid_null, &pbuf); if (count <= 0) { pr_info("%s: No partitions found, error %d\n", __func__, count); - return; + return -EINVAL; } xa_init(&drv_info->partition_info); @@ -1250,8 +1251,14 @@ static void ffa_setup_partitions(void) /* Allocate for the host */ info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return; + if (!info) { + pr_err("%s: failed to alloc Host partition ID 0x%x. Abort.\n", + __func__, drv_info->vm_id); + /* Already registered devices are freed on bus_exit */ + ffa_partitions_cleanup(); + return -ENOMEM; + } + rwlock_init(&info->rw_lock); ret = xa_insert(&drv_info->partition_info, drv_info->vm_id, info, GFP_KERNEL); @@ -1259,7 +1266,11 @@ static void ffa_setup_partitions(void) pr_err("%s: failed to save Host partition ID 0x%x - ret:%d. Abort.\n", __func__, drv_info->vm_id, ret); kfree(info); + /* Already registered devices are freed on bus_exit */ + ffa_partitions_cleanup(); } + + return ret; } static void ffa_partitions_cleanup(void) @@ -1520,7 +1531,11 @@ static int __init ffa_init(void) ffa_notifications_setup(); - ffa_setup_partitions(); + ret = ffa_setup_partitions(); + if (ret) { + pr_err("failed to setup partitions\n"); + goto cleanup_notifs; + } ret = ffa_sched_recv_cb_update(drv_info->vm_id, ffa_self_notif_handle, drv_info, true); @@ -1528,6 +1543,9 @@ static int __init ffa_init(void) pr_info("Failed to register driver sched callback %d\n", ret); return 0; + +cleanup_notifs: + ffa_notifications_cleanup(); free_pages: if (drv_info->tx_buffer) free_pages_exact(drv_info->tx_buffer, RXTX_BUFFER_SIZE); From f134bd1ebc28d40010328e5b314e10575e3550e0 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 19 Jan 2024 14:32:16 +0100 Subject: [PATCH 058/347] MIPS: sgi-ip27: Fix missing prototypes Fix missing prototypes by making not shared functions static and adding others to ip27-common.h. Also drop ip27-hubio.c as it's not used for a long time. Signed-off-by: Thomas Bogendoerfer Reviewed-by: Florian Fainelli --- arch/mips/sgi-ip27/Makefile | 2 +- arch/mips/sgi-ip27/ip27-berr.c | 4 +- arch/mips/sgi-ip27/ip27-common.h | 2 + arch/mips/sgi-ip27/ip27-hubio.c | 185 ------------------------------- arch/mips/sgi-ip27/ip27-irq.c | 2 + arch/mips/sgi-ip27/ip27-memory.c | 1 + arch/mips/sgi-ip27/ip27-nmi.c | 25 ++--- 7 files changed, 17 insertions(+), 204 deletions(-) delete mode 100644 arch/mips/sgi-ip27/ip27-hubio.c diff --git a/arch/mips/sgi-ip27/Makefile b/arch/mips/sgi-ip27/Makefile index 27c14ede191e..9877fcc512b1 100644 --- a/arch/mips/sgi-ip27/Makefile +++ b/arch/mips/sgi-ip27/Makefile @@ -5,7 +5,7 @@ obj-y := ip27-berr.o ip27-irq.o ip27-init.o ip27-klconfig.o \ ip27-klnuma.o ip27-memory.o ip27-nmi.o ip27-reset.o ip27-timer.o \ - ip27-hubio.o ip27-xtalk.o + ip27-xtalk.o obj-$(CONFIG_EARLY_PRINTK) += ip27-console.o obj-$(CONFIG_SMP) += ip27-smp.o diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index 923a63a51cda..9eb497cb5d52 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -22,6 +22,8 @@ #include #include +#include "ip27-common.h" + static void dump_hub_information(unsigned long errst0, unsigned long errst1) { static char *err_type[2][8] = { @@ -57,7 +59,7 @@ static void dump_hub_information(unsigned long errst0, unsigned long errst1) [st0.pi_stat0_fmt.s0_err_type] ? : "invalid"); } -int ip27_be_handler(struct pt_regs *regs, int is_fixup) +static int ip27_be_handler(struct pt_regs *regs, int is_fixup) { unsigned long errst0, errst1; int data = regs->cp0_cause & 4; diff --git a/arch/mips/sgi-ip27/ip27-common.h b/arch/mips/sgi-ip27/ip27-common.h index ed008a08464c..a0059fa13934 100644 --- a/arch/mips/sgi-ip27/ip27-common.h +++ b/arch/mips/sgi-ip27/ip27-common.h @@ -10,6 +10,7 @@ extern void hub_rt_clock_event_init(void); extern void hub_rtc_init(nasid_t nasid); extern void install_cpu_nmi_handler(int slice); extern void install_ipi(void); +extern void ip27_be_init(void); extern void ip27_reboot_setup(void); extern const struct plat_smp_ops ip27_smp_ops; extern unsigned long node_getfirstfree(nasid_t nasid); @@ -17,4 +18,5 @@ extern void per_cpu_init(void); extern void replicate_kernel_text(void); extern void setup_replication_mask(void); + #endif /* __IP27_COMMON_H */ diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c deleted file mode 100644 index c57f0d8f3218..000000000000 --- a/arch/mips/sgi-ip27/ip27-hubio.c +++ /dev/null @@ -1,185 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 1992-1997, 2000-2003 Silicon Graphics, Inc. - * Copyright (C) 2004 Christoph Hellwig. - * - * Support functions for the HUB ASIC - mostly PIO mapping related. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - - -static int force_fire_and_forget = 1; - -/** - * hub_pio_map - establish a HUB PIO mapping - * - * @nasid: nasid to perform PIO mapping on - * @widget: widget ID to perform PIO mapping for - * @xtalk_addr: xtalk_address that needs to be mapped - * @size: size of the PIO mapping - * - **/ -unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget, - unsigned long xtalk_addr, size_t size) -{ - unsigned i; - - /* use small-window mapping if possible */ - if ((xtalk_addr % SWIN_SIZE) + size <= SWIN_SIZE) - return NODE_SWIN_BASE(nasid, widget) + (xtalk_addr % SWIN_SIZE); - - if ((xtalk_addr % BWIN_SIZE) + size > BWIN_SIZE) { - printk(KERN_WARNING "PIO mapping at hub %d widget %d addr 0x%lx" - " too big (%ld)\n", - nasid, widget, xtalk_addr, size); - return 0; - } - - xtalk_addr &= ~(BWIN_SIZE-1); - for (i = 0; i < HUB_NUM_BIG_WINDOW; i++) { - if (test_and_set_bit(i, hub_data(nasid)->h_bigwin_used)) - continue; - - /* - * The code below does a PIO write to setup an ITTE entry. - * - * We need to prevent other CPUs from seeing our updated - * memory shadow of the ITTE (in the piomap) until the ITTE - * entry is actually set up; otherwise, another CPU might - * attempt a PIO prematurely. - * - * Also, the only way we can know that an entry has been - * received by the hub and can be used by future PIO reads/ - * writes is by reading back the ITTE entry after writing it. - * - * For these two reasons, we PIO read back the ITTE entry - * after we write it. - */ - IIO_ITTE_PUT(nasid, i, HUB_PIO_MAP_TO_MEM, widget, xtalk_addr); - __raw_readq(IIO_ITTE_GET(nasid, i)); - - return NODE_BWIN_BASE(nasid, widget) + (xtalk_addr % BWIN_SIZE); - } - - printk(KERN_WARNING "unable to establish PIO mapping for at" - " hub %d widget %d addr 0x%lx\n", - nasid, widget, xtalk_addr); - return 0; -} - - -/* - * hub_setup_prb(nasid, prbnum, credits, conveyor) - * - * Put a PRB into fire-and-forget mode if conveyor isn't set. Otherwise, - * put it into conveyor belt mode with the specified number of credits. - */ -static void hub_setup_prb(nasid_t nasid, int prbnum, int credits) -{ - union iprb_u prb; - int prb_offset; - - /* - * Get the current register value. - */ - prb_offset = IIO_IOPRB(prbnum); - prb.iprb_regval = REMOTE_HUB_L(nasid, prb_offset); - - /* - * Clear out some fields. - */ - prb.iprb_ovflow = 1; - prb.iprb_bnakctr = 0; - prb.iprb_anakctr = 0; - - /* - * Enable or disable fire-and-forget mode. - */ - prb.iprb_ff = force_fire_and_forget ? 1 : 0; - - /* - * Set the appropriate number of PIO credits for the widget. - */ - prb.iprb_xtalkctr = credits; - - /* - * Store the new value to the register. - */ - REMOTE_HUB_S(nasid, prb_offset, prb.iprb_regval); -} - -/** - * hub_set_piomode - set pio mode for a given hub - * - * @nasid: physical node ID for the hub in question - * - * Put the hub into either "PIO conveyor belt" mode or "fire-and-forget" mode. - * To do this, we have to make absolutely sure that no PIOs are in progress - * so we turn off access to all widgets for the duration of the function. - * - * XXX - This code should really check what kind of widget we're talking - * to. Bridges can only handle three requests, but XG will do more. - * How many can crossbow handle to widget 0? We're assuming 1. - * - * XXX - There is a bug in the crossbow that link reset PIOs do not - * return write responses. The easiest solution to this problem is to - * leave widget 0 (xbow) in fire-and-forget mode at all times. This - * only affects pio's to xbow registers, which should be rare. - **/ -static void hub_set_piomode(nasid_t nasid) -{ - u64 ii_iowa; - union hubii_wcr_u ii_wcr; - unsigned i; - - ii_iowa = REMOTE_HUB_L(nasid, IIO_OUTWIDGET_ACCESS); - REMOTE_HUB_S(nasid, IIO_OUTWIDGET_ACCESS, 0); - - ii_wcr.wcr_reg_value = REMOTE_HUB_L(nasid, IIO_WCR); - - if (ii_wcr.iwcr_dir_con) { - /* - * Assume a bridge here. - */ - hub_setup_prb(nasid, 0, 3); - } else { - /* - * Assume a crossbow here. - */ - hub_setup_prb(nasid, 0, 1); - } - - /* - * XXX - Here's where we should take the widget type into - * when account assigning credits. - */ - for (i = HUB_WIDGET_ID_MIN; i <= HUB_WIDGET_ID_MAX; i++) - hub_setup_prb(nasid, i, 3); - - REMOTE_HUB_S(nasid, IIO_OUTWIDGET_ACCESS, ii_iowa); -} - -/* - * hub_pio_init - PIO-related hub initialization - * - * @hub: hubinfo structure for our hub - */ -void hub_pio_init(nasid_t nasid) -{ - unsigned i; - - /* initialize big window piomaps for this hub */ - bitmap_zero(hub_data(nasid)->h_bigwin_used, HUB_NUM_BIG_WINDOW); - for (i = 0; i < HUB_NUM_BIG_WINDOW; i++) - IIO_ITTE_DISABLE(nasid, i); - - hub_set_piomode(nasid); -} diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c index a0dd3bd2b81b..8f5299b269e7 100644 --- a/arch/mips/sgi-ip27/ip27-irq.c +++ b/arch/mips/sgi-ip27/ip27-irq.c @@ -23,6 +23,8 @@ #include #include +#include "ip27-common.h" + struct hub_irq_data { u64 *irq_mask[2]; cpuid_t cpu; diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index f79c48393716..b8ca94cfb4fe 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/sgi-ip27/ip27-nmi.c b/arch/mips/sgi-ip27/ip27-nmi.c index 84889b57d5ff..fc2816398d0c 100644 --- a/arch/mips/sgi-ip27/ip27-nmi.c +++ b/arch/mips/sgi-ip27/ip27-nmi.c @@ -11,6 +11,8 @@ #include #include +#include "ip27-common.h" + #if 0 #define NODE_NUM_CPUS(n) CNODE_NUM_CPUS(n) #else @@ -23,16 +25,7 @@ typedef unsigned long machreg_t; static arch_spinlock_t nmi_lock = __ARCH_SPIN_LOCK_UNLOCKED; - -/* - * Let's see what else we need to do here. Set up sp, gp? - */ -void nmi_dump(void) -{ - void cont_nmi_dump(void); - - cont_nmi_dump(); -} +static void nmi_dump(void); void install_cpu_nmi_handler(int slice) { @@ -53,7 +46,7 @@ void install_cpu_nmi_handler(int slice) * into the eframe format for the node under consideration. */ -void nmi_cpu_eframe_save(nasid_t nasid, int slice) +static void nmi_cpu_eframe_save(nasid_t nasid, int slice) { struct reg_struct *nr; int i; @@ -129,7 +122,7 @@ void nmi_cpu_eframe_save(nasid_t nasid, int slice) pr_emerg("\n"); } -void nmi_dump_hub_irq(nasid_t nasid, int slice) +static void nmi_dump_hub_irq(nasid_t nasid, int slice) { u64 mask0, mask1, pend0, pend1; @@ -153,7 +146,7 @@ void nmi_dump_hub_irq(nasid_t nasid, int slice) * Copy the cpu registers which have been saved in the IP27prom format * into the eframe format for the node under consideration. */ -void nmi_node_eframe_save(nasid_t nasid) +static void nmi_node_eframe_save(nasid_t nasid) { int slice; @@ -170,8 +163,7 @@ void nmi_node_eframe_save(nasid_t nasid) /* * Save the nmi cpu registers for all cpus in the system. */ -void -nmi_eframes_save(void) +static void nmi_eframes_save(void) { nasid_t nasid; @@ -179,8 +171,7 @@ nmi_eframes_save(void) nmi_node_eframe_save(nasid); } -void -cont_nmi_dump(void) +static void nmi_dump(void) { #ifndef REAL_NMI_SIGNAL static atomic_t nmied_cpus = ATOMIC_INIT(0); From e3a4f1b7ada8360c1838ac3aad9837749a698c7a Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 19 Jan 2024 14:36:34 +0100 Subject: [PATCH 059/347] MIPS: fw arc: Fix missing prototypes Make ArcGetMemoryDescriptor() static since it's only needed internally. Signed-off-by: Thomas Bogendoerfer Reviewed-by: Florian Fainelli --- arch/mips/fw/arc/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c index 66188739f54d..fb78e6fd5de4 100644 --- a/arch/mips/fw/arc/memory.c +++ b/arch/mips/fw/arc/memory.c @@ -37,7 +37,7 @@ static unsigned int nr_prom_mem __initdata; */ #define ARC_PAGE_SHIFT 12 -struct linux_mdesc * __init ArcGetMemoryDescriptor(struct linux_mdesc *Current) +static struct linux_mdesc * __init ArcGetMemoryDescriptor(struct linux_mdesc *Current) { return (struct linux_mdesc *) ARC_CALL1(get_mdesc, Current); } From ab58a2f319de945f631150a190653a90e307df8e Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 19 Jan 2024 14:37:57 +0100 Subject: [PATCH 060/347] MIPS: sgi-ip30: Fix missing prototypes Include needed header files. Signed-off-by: Thomas Bogendoerfer Reviewed-by: Florian Fainelli --- arch/mips/sgi-ip30/ip30-console.c | 1 + arch/mips/sgi-ip30/ip30-setup.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/mips/sgi-ip30/ip30-console.c b/arch/mips/sgi-ip30/ip30-console.c index b91f8c4fdc78..7c6dcf6e73f7 100644 --- a/arch/mips/sgi-ip30/ip30-console.c +++ b/arch/mips/sgi-ip30/ip30-console.c @@ -3,6 +3,7 @@ #include #include +#include static inline struct ioc3_uartregs *console_uart(void) { diff --git a/arch/mips/sgi-ip30/ip30-setup.c b/arch/mips/sgi-ip30/ip30-setup.c index 75a34684e704..e8547636a748 100644 --- a/arch/mips/sgi-ip30/ip30-setup.c +++ b/arch/mips/sgi-ip30/ip30-setup.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include From f64fdde9bc771d7d790e8bcc30a7e03bbe0b1a9f Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 19 Jan 2024 14:52:51 +0100 Subject: [PATCH 061/347] MIPS: sgi-ip32: Fix missing prototypes Fix interrupt function prototypes, move all prototypes into a new file ip32-common.h and include it where needed. Signed-off-by: Thomas Bogendoerfer Reviewed-by: Florian Fainelli --- arch/mips/sgi-ip32/crime.c | 6 ++++-- arch/mips/sgi-ip32/ip32-berr.c | 2 ++ arch/mips/sgi-ip32/ip32-common.h | 15 +++++++++++++++ arch/mips/sgi-ip32/ip32-irq.c | 6 ++---- arch/mips/sgi-ip32/ip32-memory.c | 1 + arch/mips/sgi-ip32/ip32-reset.c | 2 ++ arch/mips/sgi-ip32/ip32-setup.c | 3 +-- 7 files changed, 27 insertions(+), 8 deletions(-) create mode 100644 arch/mips/sgi-ip32/ip32-common.h diff --git a/arch/mips/sgi-ip32/crime.c b/arch/mips/sgi-ip32/crime.c index a8e0c776ca6c..b8a0e4cfa9ce 100644 --- a/arch/mips/sgi-ip32/crime.c +++ b/arch/mips/sgi-ip32/crime.c @@ -18,6 +18,8 @@ #include #include +#include "ip32-common.h" + struct sgi_crime __iomem *crime; struct sgi_mace __iomem *mace; @@ -39,7 +41,7 @@ void __init crime_init(void) id, rev, field, (unsigned long) CRIME_BASE); } -irqreturn_t crime_memerr_intr(unsigned int irq, void *dev_id) +irqreturn_t crime_memerr_intr(int irq, void *dev_id) { unsigned long stat, addr; int fatal = 0; @@ -90,7 +92,7 @@ irqreturn_t crime_memerr_intr(unsigned int irq, void *dev_id) return IRQ_HANDLED; } -irqreturn_t crime_cpuerr_intr(unsigned int irq, void *dev_id) +irqreturn_t crime_cpuerr_intr(int irq, void *dev_id) { unsigned long stat = crime->cpu_error_stat & CRIME_CPU_ERROR_MASK; unsigned long addr = crime->cpu_error_addr & CRIME_CPU_ERROR_ADDR_MASK; diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c index 478b63b4c808..7cbc27941f92 100644 --- a/arch/mips/sgi-ip32/ip32-berr.c +++ b/arch/mips/sgi-ip32/ip32-berr.c @@ -18,6 +18,8 @@ #include #include +#include "ip32-common.h" + static int ip32_be_handler(struct pt_regs *regs, int is_fixup) { int data = regs->cp0_cause & 4; diff --git a/arch/mips/sgi-ip32/ip32-common.h b/arch/mips/sgi-ip32/ip32-common.h new file mode 100644 index 000000000000..cfc0225b1419 --- /dev/null +++ b/arch/mips/sgi-ip32/ip32-common.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __IP32_COMMON_H +#define __IP32_COMMON_H + +#include +#include + +void __init crime_init(void); +irqreturn_t crime_memerr_intr(int irq, void *dev_id); +irqreturn_t crime_cpuerr_intr(int irq, void *dev_id); +void __init ip32_be_init(void); +void ip32_prepare_poweroff(void); + +#endif /* __IP32_COMMON_H */ diff --git a/arch/mips/sgi-ip32/ip32-irq.c b/arch/mips/sgi-ip32/ip32-irq.c index e21ea1de05e3..29d04468a06b 100644 --- a/arch/mips/sgi-ip32/ip32-irq.c +++ b/arch/mips/sgi-ip32/ip32-irq.c @@ -28,6 +28,8 @@ #include #include +#include "ip32-common.h" + /* issue a PIO read to make sure no PIO writes are pending */ static inline void flush_crime_bus(void) { @@ -107,10 +109,6 @@ static inline void flush_mace_bus(void) * is quite different anyway. */ -/* Some initial interrupts to set up */ -extern irqreturn_t crime_memerr_intr(int irq, void *dev_id); -extern irqreturn_t crime_cpuerr_intr(int irq, void *dev_id); - /* * This is for pure CRIME interrupts - ie not MACE. The advantage? * We get to split the register in half and do faster lookups. diff --git a/arch/mips/sgi-ip32/ip32-memory.c b/arch/mips/sgi-ip32/ip32-memory.c index 3fc8d0a0bdfa..5fee33744f67 100644 --- a/arch/mips/sgi-ip32/ip32-memory.c +++ b/arch/mips/sgi-ip32/ip32-memory.c @@ -15,6 +15,7 @@ #include #include #include +#include extern void crime_init(void); diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c index 18d1c115cd53..6bdc1421cda4 100644 --- a/arch/mips/sgi-ip32/ip32-reset.c +++ b/arch/mips/sgi-ip32/ip32-reset.c @@ -29,6 +29,8 @@ #include #include +#include "ip32-common.h" + #define POWERDOWN_TIMEOUT 120 /* * Blink frequency during reboot grace period and when panicked. diff --git a/arch/mips/sgi-ip32/ip32-setup.c b/arch/mips/sgi-ip32/ip32-setup.c index 8019dae1721a..aeb0805aae57 100644 --- a/arch/mips/sgi-ip32/ip32-setup.c +++ b/arch/mips/sgi-ip32/ip32-setup.c @@ -26,8 +26,7 @@ #include #include -extern void ip32_be_init(void); -extern void crime_init(void); +#include "ip32-common.h" #ifdef CONFIG_SGI_O2MACE_ETH /* From 6ba7843b59b77360812617d071313c7f35f3757a Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 3 Jan 2024 20:27:04 +0100 Subject: [PATCH 062/347] platform/x86: wmi: Fix error handling in legacy WMI notify handler functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When wmi_install_notify_handler()/wmi_remove_notify_handler() are unable to enable/disable the WMI device, they unconditionally return an error to the caller. When registering legacy WMI notify handlers, this means that the callback remains registered despite wmi_install_notify_handler() having returned an error. When removing legacy WMI notify handlers, this means that the callback is removed despite wmi_remove_notify_handler() having returned an error. Fix this by only warning when the WMI device could not be enabled. This behaviour matches the bus-based WMI interface. Tested on a Dell Inspiron 3505 and a Acer Aspire E1-731. Fixes: 58f6425eb92f ("WMI: Cater for multiple events with same GUID") Signed-off-by: Armin Wolf Reviewed-by: Ilpo Järvinen Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240103192707.115512-2-W_Armin@gmx.de Signed-off-by: Hans de Goede --- drivers/platform/x86/wmi.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index bd271a5730aa..23b4043c0fe7 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -593,9 +593,10 @@ acpi_status wmi_install_notify_handler(const char *guid, block->handler_data = data; wmi_status = wmi_method_enable(block, true); - if ((wmi_status != AE_OK) || - ((wmi_status == AE_OK) && (status == AE_NOT_EXIST))) - status = wmi_status; + if (ACPI_FAILURE(wmi_status)) + dev_warn(&block->dev.dev, "Failed to enable device\n"); + + status = AE_OK; } } @@ -631,10 +632,13 @@ acpi_status wmi_remove_notify_handler(const char *guid) return AE_NULL_ENTRY; wmi_status = wmi_method_enable(block, false); + if (ACPI_FAILURE(wmi_status)) + dev_warn(&block->dev.dev, "Failed to disable device\n"); + block->handler = NULL; block->handler_data = NULL; - if (wmi_status != AE_OK || (wmi_status == AE_OK && status == AE_NOT_EXIST)) - status = wmi_status; + + status = AE_OK; } } From 3d8a29fec2cb96b3aa75a595f20c4b73ff294a97 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 3 Jan 2024 20:27:05 +0100 Subject: [PATCH 063/347] platform/x86: wmi: Return immediately if an suitable WMI event is found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 58f6425eb92f ("WMI: Cater for multiple events with same GUID") allowed legacy WMI notify handlers to be installed for multiple WMI devices with the same GUID. However this is useless since the legacy GUID-based interface is blacklisted from seeing WMI devices with duplicated GUIDs. Return immediately if a suitable WMI event is found in wmi_install/remove_notify_handler() since searching for other suitable events is pointless. Tested on a Dell Inspiron 3505 and a Acer Aspire E1-731. Signed-off-by: Armin Wolf Reviewed-by: Ilpo Järvinen Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240103192707.115512-3-W_Armin@gmx.de Signed-off-by: Hans de Goede --- drivers/platform/x86/wmi.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 23b4043c0fe7..4e2d0e15b155 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -573,7 +573,6 @@ acpi_status wmi_install_notify_handler(const char *guid, void *data) { struct wmi_block *block; - acpi_status status = AE_NOT_EXIST; guid_t guid_input; if (!guid || !handler) @@ -596,11 +595,11 @@ acpi_status wmi_install_notify_handler(const char *guid, if (ACPI_FAILURE(wmi_status)) dev_warn(&block->dev.dev, "Failed to enable device\n"); - status = AE_OK; + return AE_OK; } } - return status; + return AE_NOT_EXIST; } EXPORT_SYMBOL_GPL(wmi_install_notify_handler); @@ -615,7 +614,6 @@ EXPORT_SYMBOL_GPL(wmi_install_notify_handler); acpi_status wmi_remove_notify_handler(const char *guid) { struct wmi_block *block; - acpi_status status = AE_NOT_EXIST; guid_t guid_input; if (!guid) @@ -638,11 +636,11 @@ acpi_status wmi_remove_notify_handler(const char *guid) block->handler = NULL; block->handler_data = NULL; - status = AE_OK; + return AE_OK; } } - return status; + return AE_NOT_EXIST; } EXPORT_SYMBOL_GPL(wmi_remove_notify_handler); From 3ea7f59af8ffa17ce5f5173d6f4bfbc73334187d Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 3 Jan 2024 20:27:06 +0100 Subject: [PATCH 064/347] platform/x86: wmi: Decouple legacy WMI notify handlers from wmi_block_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until now, legacy WMI notify handler functions where using the wmi_block_list, which did no refcounting on the returned WMI device. This meant that the WMI device could disappear at any moment, potentially leading to various errors. Fix this by using bus_find_device() which returns an actual reference to the found WMI device. Tested on a Dell Inspiron 3505 and a Acer Aspire E1-731. Signed-off-by: Armin Wolf Reviewed-by: Ilpo Järvinen Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240103192707.115512-4-W_Armin@gmx.de Signed-off-by: Hans de Goede --- drivers/platform/x86/wmi.c | 118 +++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 50 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 4e2d0e15b155..699157ce347c 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -219,6 +219,17 @@ static int wmidev_match_guid(struct device *dev, const void *data) return 0; } +static int wmidev_match_notify_id(struct device *dev, const void *data) +{ + struct wmi_block *wblock = dev_to_wblock(dev); + const u32 *notify_id = data; + + if (wblock->gblock.flags & ACPI_WMI_EVENT && wblock->gblock.notify_id == *notify_id) + return 1; + + return 0; +} + static struct bus_type wmi_bus_type; static struct wmi_device *wmi_find_device_by_guid(const char *guid_string) @@ -238,6 +249,17 @@ static struct wmi_device *wmi_find_device_by_guid(const char *guid_string) return dev_to_wdev(dev); } +static struct wmi_device *wmi_find_event_by_notify_id(const u32 notify_id) +{ + struct device *dev; + + dev = bus_find_device(&wmi_bus_type, NULL, ¬ify_id, wmidev_match_notify_id); + if (!dev) + return ERR_PTR(-ENODEV); + + return to_wmi_device(dev); +} + static void wmi_device_put(struct wmi_device *wdev) { put_device(&wdev->dev); @@ -572,34 +594,30 @@ acpi_status wmi_install_notify_handler(const char *guid, wmi_notify_handler handler, void *data) { - struct wmi_block *block; - guid_t guid_input; + struct wmi_block *wblock; + struct wmi_device *wdev; + acpi_status status; - if (!guid || !handler) - return AE_BAD_PARAMETER; + wdev = wmi_find_device_by_guid(guid); + if (IS_ERR(wdev)) + return AE_ERROR; - if (guid_parse(guid, &guid_input)) - return AE_BAD_PARAMETER; + wblock = container_of(wdev, struct wmi_block, dev); + if (wblock->handler) { + status = AE_ALREADY_ACQUIRED; + } else { + wblock->handler = handler; + wblock->handler_data = data; - list_for_each_entry(block, &wmi_block_list, list) { - acpi_status wmi_status; + if (ACPI_FAILURE(wmi_method_enable(wblock, true))) + dev_warn(&wblock->dev.dev, "Failed to enable device\n"); - if (guid_equal(&block->gblock.guid, &guid_input)) { - if (block->handler) - return AE_ALREADY_ACQUIRED; - - block->handler = handler; - block->handler_data = data; - - wmi_status = wmi_method_enable(block, true); - if (ACPI_FAILURE(wmi_status)) - dev_warn(&block->dev.dev, "Failed to enable device\n"); - - return AE_OK; - } + status = AE_OK; } - return AE_NOT_EXIST; + wmi_device_put(wdev); + + return status; } EXPORT_SYMBOL_GPL(wmi_install_notify_handler); @@ -613,34 +631,30 @@ EXPORT_SYMBOL_GPL(wmi_install_notify_handler); */ acpi_status wmi_remove_notify_handler(const char *guid) { - struct wmi_block *block; - guid_t guid_input; + struct wmi_block *wblock; + struct wmi_device *wdev; + acpi_status status; - if (!guid) - return AE_BAD_PARAMETER; + wdev = wmi_find_device_by_guid(guid); + if (IS_ERR(wdev)) + return AE_ERROR; - if (guid_parse(guid, &guid_input)) - return AE_BAD_PARAMETER; + wblock = container_of(wdev, struct wmi_block, dev); + if (!wblock->handler) { + status = AE_NULL_ENTRY; + } else { + if (ACPI_FAILURE(wmi_method_enable(wblock, false))) + dev_warn(&wblock->dev.dev, "Failed to disable device\n"); - list_for_each_entry(block, &wmi_block_list, list) { - acpi_status wmi_status; + wblock->handler = NULL; + wblock->handler_data = NULL; - if (guid_equal(&block->gblock.guid, &guid_input)) { - if (!block->handler) - return AE_NULL_ENTRY; - - wmi_status = wmi_method_enable(block, false); - if (ACPI_FAILURE(wmi_status)) - dev_warn(&block->dev.dev, "Failed to disable device\n"); - - block->handler = NULL; - block->handler_data = NULL; - - return AE_OK; - } + status = AE_OK; } - return AE_NOT_EXIST; + wmi_device_put(wdev); + + return status; } EXPORT_SYMBOL_GPL(wmi_remove_notify_handler); @@ -657,15 +671,19 @@ EXPORT_SYMBOL_GPL(wmi_remove_notify_handler); acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out) { struct wmi_block *wblock; + struct wmi_device *wdev; + acpi_status status; - list_for_each_entry(wblock, &wmi_block_list, list) { - struct guid_block *gblock = &wblock->gblock; + wdev = wmi_find_event_by_notify_id(event); + if (IS_ERR(wdev)) + return AE_NOT_FOUND; - if ((gblock->flags & ACPI_WMI_EVENT) && gblock->notify_id == event) - return get_event_data(wblock, out); - } + wblock = container_of(wdev, struct wmi_block, dev); + status = get_event_data(wblock, out); - return AE_NOT_FOUND; + wmi_device_put(wdev); + + return status; } EXPORT_SYMBOL_GPL(wmi_get_event_data); From 29e473f4b51ee56b5808323e274a8369b4d181cb Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 3 Jan 2024 20:27:07 +0100 Subject: [PATCH 065/347] platform/x86: wmi: Fix notify callback locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an legacy WMI event handler is removed, an WMI event could have called the handler just before it was removed, meaning the handler could still be running after wmi_remove_notify_handler() returns. Something similar could also happens when using the WMI bus, as the WMI core might still call the notify() callback from an WMI driver even if its remove() callback was just called. Fix this by introducing a rw semaphore which ensures that the event state of a WMI device does not change while the WMI core is handling an event for it. Tested on a Dell Inspiron 3505 and a Acer Aspire E1-731. Fixes: 1686f5444546 ("platform/x86: wmi: Incorporate acpi_install_notify_handler") Signed-off-by: Armin Wolf Reviewed-by: Ilpo Järvinen Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240103192707.115512-5-W_Armin@gmx.de Signed-off-by: Hans de Goede --- drivers/platform/x86/wmi.c | 71 +++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 699157ce347c..d1df1a31de00 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -56,7 +57,6 @@ static_assert(__alignof__(struct guid_block) == 1); enum { /* wmi_block flags */ WMI_READ_TAKES_NO_ARGS, - WMI_PROBED, }; struct wmi_block { @@ -64,8 +64,10 @@ struct wmi_block { struct list_head list; struct guid_block gblock; struct acpi_device *acpi_device; + struct rw_semaphore notify_lock; /* Protects notify callback add/remove */ wmi_notify_handler handler; void *handler_data; + bool driver_ready; unsigned long flags; }; @@ -603,6 +605,8 @@ acpi_status wmi_install_notify_handler(const char *guid, return AE_ERROR; wblock = container_of(wdev, struct wmi_block, dev); + + down_write(&wblock->notify_lock); if (wblock->handler) { status = AE_ALREADY_ACQUIRED; } else { @@ -614,6 +618,7 @@ acpi_status wmi_install_notify_handler(const char *guid, status = AE_OK; } + up_write(&wblock->notify_lock); wmi_device_put(wdev); @@ -640,6 +645,8 @@ acpi_status wmi_remove_notify_handler(const char *guid) return AE_ERROR; wblock = container_of(wdev, struct wmi_block, dev); + + down_write(&wblock->notify_lock); if (!wblock->handler) { status = AE_NULL_ENTRY; } else { @@ -651,6 +658,7 @@ acpi_status wmi_remove_notify_handler(const char *guid) status = AE_OK; } + up_write(&wblock->notify_lock); wmi_device_put(wdev); @@ -896,7 +904,9 @@ static int wmi_dev_probe(struct device *dev) } } - set_bit(WMI_PROBED, &wblock->flags); + down_write(&wblock->notify_lock); + wblock->driver_ready = true; + up_write(&wblock->notify_lock); return 0; } @@ -906,7 +916,9 @@ static void wmi_dev_remove(struct device *dev) struct wmi_block *wblock = dev_to_wblock(dev); struct wmi_driver *wdriver = drv_to_wdrv(dev->driver); - clear_bit(WMI_PROBED, &wblock->flags); + down_write(&wblock->notify_lock); + wblock->driver_ready = false; + up_write(&wblock->notify_lock); if (wdriver->remove) wdriver->remove(dev_to_wdev(dev)); @@ -1019,6 +1031,8 @@ static int wmi_create_device(struct device *wmi_bus_dev, wblock->dev.setable = true; out_init: + init_rwsem(&wblock->notify_lock); + wblock->driver_ready = false; wblock->dev.dev.bus = &wmi_bus_type; wblock->dev.dev.parent = wmi_bus_dev; @@ -1191,6 +1205,26 @@ acpi_wmi_ec_space_handler(u32 function, acpi_physical_address address, } } +static void wmi_notify_driver(struct wmi_block *wblock) +{ + struct wmi_driver *driver = drv_to_wdrv(wblock->dev.dev.driver); + struct acpi_buffer data = { ACPI_ALLOCATE_BUFFER, NULL }; + acpi_status status; + + if (!driver->no_notify_data) { + status = get_event_data(wblock, &data); + if (ACPI_FAILURE(status)) { + dev_warn(&wblock->dev.dev, "Failed to get event data\n"); + return; + } + } + + if (driver->notify) + driver->notify(&wblock->dev, data.pointer); + + kfree(data.pointer); +} + static int wmi_notify_device(struct device *dev, void *data) { struct wmi_block *wblock = dev_to_wblock(dev); @@ -1199,28 +1233,17 @@ static int wmi_notify_device(struct device *dev, void *data) if (!(wblock->gblock.flags & ACPI_WMI_EVENT && wblock->gblock.notify_id == *event)) return 0; - /* If a driver is bound, then notify the driver. */ - if (test_bit(WMI_PROBED, &wblock->flags) && wblock->dev.dev.driver) { - struct wmi_driver *driver = drv_to_wdrv(wblock->dev.dev.driver); - struct acpi_buffer evdata = { ACPI_ALLOCATE_BUFFER, NULL }; - acpi_status status; - - if (!driver->no_notify_data) { - status = get_event_data(wblock, &evdata); - if (ACPI_FAILURE(status)) { - dev_warn(&wblock->dev.dev, "failed to get event data\n"); - return -EIO; - } - } - - if (driver->notify) - driver->notify(&wblock->dev, evdata.pointer); - - kfree(evdata.pointer); - } else if (wblock->handler) { - /* Legacy handler */ - wblock->handler(*event, wblock->handler_data); + down_read(&wblock->notify_lock); + /* The WMI driver notify handler conflicts with the legacy WMI handler. + * Because of this the WMI driver notify handler takes precedence. + */ + if (wblock->dev.dev.driver && wblock->driver_ready) { + wmi_notify_driver(wblock); + } else { + if (wblock->handler) + wblock->handler(*event, wblock->handler_data); } + up_read(&wblock->notify_lock); acpi_bus_generate_netlink_event(wblock->acpi_device->pnp.device_class, dev_name(&wblock->dev.dev), *event, 0); From 8446f9d11678bc268897882e86733d73204f83c4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Jan 2024 16:47:54 +0300 Subject: [PATCH 066/347] platform/x86: wmi: Fix wmi_dev_probe() This has a reversed if statement so it accidentally disables the wmi method before returning. Fixes: 704af3a40747 ("platform/x86: wmi: Remove chardev interface") Signed-off-by: Dan Carpenter Reviewed-by: Armin Wolf Link: https://lore.kernel.org/r/9c81251b-bc87-4ca3-bb86-843dc85e5145@moroto.mountain Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index d1df1a31de00..3c288e8f404b 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -896,7 +896,7 @@ static int wmi_dev_probe(struct device *dev) if (wdriver->probe) { ret = wdriver->probe(dev_to_wdev(dev), find_guid_context(wblock, wdriver)); - if (!ret) { + if (ret) { if (ACPI_FAILURE(wmi_method_enable(wblock, false))) dev_warn(dev, "Failed to disable device\n"); From 416de0246f35f43d871a57939671fe814f4455ee Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 4 Jan 2024 15:59:03 -0700 Subject: [PATCH 067/347] platform/x86: intel-uncore-freq: Fix types in sysfs callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When booting a kernel with CONFIG_CFI_CLANG, there is a CFI failure when accessing any of the values under /sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00: $ cat /sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/max_freq_khz fish: Job 1, 'cat /sys/devices/system/cpu/int…' terminated by signal SIGSEGV (Address boundary error) $ sudo dmesg &| grep 'CFI failure' [ 170.953925] CFI failure at kobj_attr_show+0x19/0x30 (target: show_max_freq_khz+0x0/0xc0 [intel_uncore_frequency_common]; expected type: 0xd34078c5 The sysfs callback functions such as show_domain_id() are written as if they are going to be called by dev_attr_show() but as the above message shows, they are instead called by kobj_attr_show(). kCFI checks that the destination of an indirect jump has the exact same type as the prototype of the function pointer it is called through and fails when they do not. These callbacks are called through kobj_attr_show() because uncore_root_kobj was initialized with kobject_create_and_add(), which means uncore_root_kobj has a ->sysfs_ops of kobj_sysfs_ops from kobject_create(), which uses kobj_attr_show() as its ->show() value. The only reason there has not been a more noticeable problem until this point is that 'struct kobj_attribute' and 'struct device_attribute' have the same layout, so getting the callback from container_of() works the same with either value. Change all the callbacks and their uses to be compatible with kobj_attr_show() and kobj_attr_store(), which resolves the kCFI failure and allows the sysfs files to work properly. Closes: https://github.com/ClangBuiltLinux/linux/issues/1974 Fixes: ae7b2ce57851 ("platform/x86/intel/uncore-freq: Use sysfs API to create attributes") Cc: stable@vger.kernel.org Signed-off-by: Nathan Chancellor Reviewed-by: Sami Tolvanen Acked-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20240104-intel-uncore-freq-kcfi-fix-v1-1-bf1e8939af40@kernel.org Signed-off-by: Hans de Goede --- .../uncore-frequency-common.c | 82 +++++++++---------- .../uncore-frequency-common.h | 32 ++++---- 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c index 33ab207493e3..33bb58dc3f78 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c @@ -23,23 +23,23 @@ static int (*uncore_read)(struct uncore_data *data, unsigned int *min, unsigned static int (*uncore_write)(struct uncore_data *data, unsigned int input, unsigned int min_max); static int (*uncore_read_freq)(struct uncore_data *data, unsigned int *freq); -static ssize_t show_domain_id(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t show_domain_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct uncore_data *data = container_of(attr, struct uncore_data, domain_id_dev_attr); + struct uncore_data *data = container_of(attr, struct uncore_data, domain_id_kobj_attr); return sprintf(buf, "%u\n", data->domain_id); } -static ssize_t show_fabric_cluster_id(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t show_fabric_cluster_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct uncore_data *data = container_of(attr, struct uncore_data, fabric_cluster_id_dev_attr); + struct uncore_data *data = container_of(attr, struct uncore_data, fabric_cluster_id_kobj_attr); return sprintf(buf, "%u\n", data->cluster_id); } -static ssize_t show_package_id(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t show_package_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct uncore_data *data = container_of(attr, struct uncore_data, package_id_dev_attr); + struct uncore_data *data = container_of(attr, struct uncore_data, package_id_kobj_attr); return sprintf(buf, "%u\n", data->package_id); } @@ -97,30 +97,30 @@ static ssize_t show_perf_status_freq_khz(struct uncore_data *data, char *buf) } #define store_uncore_min_max(name, min_max) \ - static ssize_t store_##name(struct device *dev, \ - struct device_attribute *attr, \ + static ssize_t store_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, \ const char *buf, size_t count) \ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return store_min_max_freq_khz(data, buf, count, \ min_max); \ } #define show_uncore_min_max(name, min_max) \ - static ssize_t show_##name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return show_min_max_freq_khz(data, buf, min_max); \ } #define show_uncore_perf_status(name) \ - static ssize_t show_##name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return show_perf_status_freq_khz(data, buf); \ } @@ -134,11 +134,11 @@ show_uncore_min_max(max_freq_khz, 1); show_uncore_perf_status(current_freq_khz); #define show_uncore_data(member_name) \ - static ssize_t show_##member_name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##member_name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ struct uncore_data *data = container_of(attr, struct uncore_data,\ - member_name##_dev_attr);\ + member_name##_kobj_attr);\ \ return sysfs_emit(buf, "%u\n", \ data->member_name); \ @@ -149,29 +149,29 @@ show_uncore_data(initial_max_freq_khz); #define init_attribute_rw(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = store_##_name; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0644; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = store_##_name; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0644; \ } while (0) #define init_attribute_ro(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = NULL; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0444; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = NULL; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0444; \ } while (0) #define init_attribute_root_ro(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = NULL; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0400; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = NULL; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0400; \ } while (0) static int create_attr_group(struct uncore_data *data, char *name) @@ -186,21 +186,21 @@ static int create_attr_group(struct uncore_data *data, char *name) if (data->domain_id != UNCORE_DOMAIN_ID_INVALID) { init_attribute_root_ro(domain_id); - data->uncore_attrs[index++] = &data->domain_id_dev_attr.attr; + data->uncore_attrs[index++] = &data->domain_id_kobj_attr.attr; init_attribute_root_ro(fabric_cluster_id); - data->uncore_attrs[index++] = &data->fabric_cluster_id_dev_attr.attr; + data->uncore_attrs[index++] = &data->fabric_cluster_id_kobj_attr.attr; init_attribute_root_ro(package_id); - data->uncore_attrs[index++] = &data->package_id_dev_attr.attr; + data->uncore_attrs[index++] = &data->package_id_kobj_attr.attr; } - data->uncore_attrs[index++] = &data->max_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->min_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->initial_min_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->initial_max_freq_khz_dev_attr.attr; + data->uncore_attrs[index++] = &data->max_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->min_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->initial_min_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->initial_max_freq_khz_kobj_attr.attr; ret = uncore_read_freq(data, &freq); if (!ret) - data->uncore_attrs[index++] = &data->current_freq_khz_dev_attr.attr; + data->uncore_attrs[index++] = &data->current_freq_khz_kobj_attr.attr; data->uncore_attrs[index] = NULL; diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h index 7afb69977c7e..0e5bf507e555 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h @@ -26,14 +26,14 @@ * @instance_id: Unique instance id to append to directory name * @name: Sysfs entry name for this instance * @uncore_attr_group: Attribute group storage - * @max_freq_khz_dev_attr: Storage for device attribute max_freq_khz - * @mix_freq_khz_dev_attr: Storage for device attribute min_freq_khz - * @initial_max_freq_khz_dev_attr: Storage for device attribute initial_max_freq_khz - * @initial_min_freq_khz_dev_attr: Storage for device attribute initial_min_freq_khz - * @current_freq_khz_dev_attr: Storage for device attribute current_freq_khz - * @domain_id_dev_attr: Storage for device attribute domain_id - * @fabric_cluster_id_dev_attr: Storage for device attribute fabric_cluster_id - * @package_id_dev_attr: Storage for device attribute package_id + * @max_freq_khz_kobj_attr: Storage for kobject attribute max_freq_khz + * @mix_freq_khz_kobj_attr: Storage for kobject attribute min_freq_khz + * @initial_max_freq_khz_kobj_attr: Storage for kobject attribute initial_max_freq_khz + * @initial_min_freq_khz_kobj_attr: Storage for kobject attribute initial_min_freq_khz + * @current_freq_khz_kobj_attr: Storage for kobject attribute current_freq_khz + * @domain_id_kobj_attr: Storage for kobject attribute domain_id + * @fabric_cluster_id_kobj_attr: Storage for kobject attribute fabric_cluster_id + * @package_id_kobj_attr: Storage for kobject attribute package_id * @uncore_attrs: Attribute storage for group creation * * This structure is used to encapsulate all data related to uncore sysfs @@ -53,14 +53,14 @@ struct uncore_data { char name[32]; struct attribute_group uncore_attr_group; - struct device_attribute max_freq_khz_dev_attr; - struct device_attribute min_freq_khz_dev_attr; - struct device_attribute initial_max_freq_khz_dev_attr; - struct device_attribute initial_min_freq_khz_dev_attr; - struct device_attribute current_freq_khz_dev_attr; - struct device_attribute domain_id_dev_attr; - struct device_attribute fabric_cluster_id_dev_attr; - struct device_attribute package_id_dev_attr; + struct kobj_attribute max_freq_khz_kobj_attr; + struct kobj_attribute min_freq_khz_kobj_attr; + struct kobj_attribute initial_max_freq_khz_kobj_attr; + struct kobj_attribute initial_min_freq_khz_kobj_attr; + struct kobj_attribute current_freq_khz_kobj_attr; + struct kobj_attribute domain_id_kobj_attr; + struct kobj_attribute fabric_cluster_id_kobj_attr; + struct kobj_attribute package_id_kobj_attr; struct attribute *uncore_attrs[9]; }; From 5913320eb0b3ec88158cfcb0fa5e996bf4ef681b Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Mon, 8 Jan 2024 15:20:58 +0900 Subject: [PATCH 068/347] platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit p2sb_bar() unhides P2SB device to get resources from the device. It guards the operation by locking pci_rescan_remove_lock so that parallel rescans do not find the P2SB device. However, this lock causes deadlock when PCI bus rescan is triggered by /sys/bus/pci/rescan. The rescan locks pci_rescan_remove_lock and probes PCI devices. When PCI devices call p2sb_bar() during probe, it locks pci_rescan_remove_lock again. Hence the deadlock. To avoid the deadlock, do not lock pci_rescan_remove_lock in p2sb_bar(). Instead, do the lock at fs_initcall. Introduce p2sb_cache_resources() for fs_initcall which gets and caches the P2SB resources. At p2sb_bar(), refer the cache and return to the caller. Before operating the device at P2SB DEVFN for resource cache, check that its device class is PCI_CLASS_MEMORY_OTHER 0x0580 that PCH specifications define. This avoids unexpected operation to other devices at the same DEVFN. Link: https://lore.kernel.org/linux-pci/6xb24fjmptxxn5js2fjrrddjae6twex5bjaftwqsuawuqqqydx@7cl3uik5ef6j/ Fixes: 9745fb07474f ("platform/x86/intel: Add Primary to Sideband (P2SB) bridge support") Cc: stable@vger.kernel.org Suggested-by: Andy Shevchenko Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20240108062059.3583028-2-shinichiro.kawasaki@wdc.com Reviewed-by: Andy Shevchenko Reviewed-by: Ilpo Järvinen Tested-by Klara Modin Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/p2sb.c | 202 ++++++++++++++++++++++++++---------- 1 file changed, 150 insertions(+), 52 deletions(-) diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 1cf2471d54dd..17cc4b45e023 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -26,6 +26,21 @@ static const struct x86_cpu_id p2sb_cpu_ids[] = { {} }; +/* + * Cache BAR0 of P2SB device functions 0 to 7. + * TODO: The constant 8 is the number of functions that PCI specification + * defines. Same definitions exist tree-wide. Unify this definition and + * the other definitions then move to include/uapi/linux/pci.h. + */ +#define NR_P2SB_RES_CACHE 8 + +struct p2sb_res_cache { + u32 bus_dev_id; + struct resource res; +}; + +static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE]; + static int p2sb_get_devfn(unsigned int *devfn) { unsigned int fn = P2SB_DEVFN_DEFAULT; @@ -39,8 +54,16 @@ static int p2sb_get_devfn(unsigned int *devfn) return 0; } +static bool p2sb_valid_resource(struct resource *res) +{ + if (res->flags) + return true; + + return false; +} + /* Copy resource from the first BAR of the device in question */ -static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) +static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) { struct resource *bar0 = &pdev->resource[0]; @@ -56,22 +79,108 @@ static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) mem->end = bar0->end; mem->flags = bar0->flags; mem->desc = bar0->desc; +} + +static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) +{ + struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)]; + struct pci_dev *pdev; + + pdev = pci_scan_single_device(bus, devfn); + if (!pdev) + return; + + p2sb_read_bar0(pdev, &cache->res); + cache->bus_dev_id = bus->dev.id; + + pci_stop_and_remove_bus_device(pdev); +} + +static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) +{ + unsigned int slot, fn; + + if (PCI_FUNC(devfn) == 0) { + /* + * When function number of the P2SB device is zero, scan it and + * other function numbers, and if devices are available, cache + * their BAR0s. + */ + slot = PCI_SLOT(devfn); + for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++) + p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn)); + } else { + /* Scan the P2SB device and cache its BAR0 */ + p2sb_scan_and_cache_devfn(bus, devfn); + } + + if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) + return -ENOENT; return 0; } -static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct resource *mem) +static struct pci_bus *p2sb_get_bus(struct pci_bus *bus) { - struct pci_dev *pdev; + static struct pci_bus *p2sb_bus; + + bus = bus ?: p2sb_bus; + if (bus) + return bus; + + /* Assume P2SB is on the bus 0 in domain 0 */ + p2sb_bus = pci_find_bus(0, 0); + return p2sb_bus; +} + +static int p2sb_cache_resources(void) +{ + unsigned int devfn_p2sb; + u32 value = P2SBC_HIDE; + struct pci_bus *bus; + u16 class; int ret; - pdev = pci_scan_single_device(bus, devfn); - if (!pdev) + /* Get devfn for P2SB device itself */ + ret = p2sb_get_devfn(&devfn_p2sb); + if (ret) + return ret; + + bus = p2sb_get_bus(NULL); + if (!bus) return -ENODEV; - ret = p2sb_read_bar0(pdev, mem); + /* + * When a device with same devfn exists and its device class is not + * PCI_CLASS_MEMORY_OTHER for P2SB, do not touch it. + */ + pci_bus_read_config_word(bus, devfn_p2sb, PCI_CLASS_DEVICE, &class); + if (!PCI_POSSIBLE_ERROR(class) && class != PCI_CLASS_MEMORY_OTHER) + return -ENODEV; + + /* + * Prevent concurrent PCI bus scan from seeing the P2SB device and + * removing via sysfs while it is temporarily exposed. + */ + pci_lock_rescan_remove(); + + /* + * The BIOS prevents the P2SB device from being enumerated by the PCI + * subsystem, so we need to unhide and hide it back to lookup the BAR. + * Unhide the P2SB device here, if needed. + */ + pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); + if (value & P2SBC_HIDE) + pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); + + ret = p2sb_scan_and_cache(bus, devfn_p2sb); + + /* Hide the P2SB device, if it was hidden */ + if (value & P2SBC_HIDE) + pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE); + + pci_unlock_rescan_remove(); - pci_stop_and_remove_bus_device(pdev); return ret; } @@ -81,64 +190,53 @@ static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct re * @devfn: PCI slot and function to communicate with * @mem: memory resource to be filled in * - * The BIOS prevents the P2SB device from being enumerated by the PCI - * subsystem, so we need to unhide and hide it back to lookup the BAR. - * - * if @bus is NULL, the bus 0 in domain 0 will be used. + * If @bus is NULL, the bus 0 in domain 0 will be used. * If @devfn is 0, it will be replaced by devfn of the P2SB device. * * Caller must provide a valid pointer to @mem. * - * Locking is handled by pci_rescan_remove_lock mutex. - * * Return: * 0 on success or appropriate errno value on error. */ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) { - struct pci_dev *pdev_p2sb; - unsigned int devfn_p2sb; - u32 value = P2SBC_HIDE; + struct p2sb_res_cache *cache; int ret; - /* Get devfn for P2SB device itself */ - ret = p2sb_get_devfn(&devfn_p2sb); - if (ret) - return ret; - - /* if @bus is NULL, use bus 0 in domain 0 */ - bus = bus ?: pci_find_bus(0, 0); - - /* - * Prevent concurrent PCI bus scan from seeing the P2SB device and - * removing via sysfs while it is temporarily exposed. - */ - pci_lock_rescan_remove(); - - /* Unhide the P2SB device, if needed */ - pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); - if (value & P2SBC_HIDE) - pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); - - pdev_p2sb = pci_scan_single_device(bus, devfn_p2sb); - if (devfn) - ret = p2sb_scan_and_read(bus, devfn, mem); - else - ret = p2sb_read_bar0(pdev_p2sb, mem); - pci_stop_and_remove_bus_device(pdev_p2sb); - - /* Hide the P2SB device, if it was hidden */ - if (value & P2SBC_HIDE) - pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE); - - pci_unlock_rescan_remove(); - - if (ret) - return ret; - - if (mem->flags == 0) + bus = p2sb_get_bus(bus); + if (!bus) return -ENODEV; + if (!devfn) { + ret = p2sb_get_devfn(&devfn); + if (ret) + return ret; + } + + cache = &p2sb_resources[PCI_FUNC(devfn)]; + if (cache->bus_dev_id != bus->dev.id) + return -ENODEV; + + if (!p2sb_valid_resource(&cache->res)) + return -ENOENT; + + memcpy(mem, &cache->res, sizeof(*mem)); return 0; } EXPORT_SYMBOL_GPL(p2sb_bar); + +static int __init p2sb_fs_init(void) +{ + p2sb_cache_resources(); + return 0; +} + +/* + * pci_rescan_remove_lock to avoid access to unhidden P2SB devices can + * not be locked in sysfs pci bus rescan path because of deadlock. To + * avoid the deadlock, access to P2SB devices with the lock at an early + * step in kernel initialization and cache required resources. This + * should happen after subsys_initcall which initializes PCI subsystem + * and before device_initcall which requires P2SB resources. + */ +fs_initcall(p2sb_fs_init); From 9e054ed05ddae2c1b4b2bcb5dfcae49c5ac7637e Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Mon, 8 Jan 2024 15:20:59 +0900 Subject: [PATCH 069/347] platform/x86: p2sb: Use pci_resource_n() in p2sb_read_bar0() Accesses to resource[] member of struct pci_dev shall be wrapped with pci_resource_n() for future compatibility. Call the helper function in p2sb_read_bar0(). Suggested-by: Andy Shevchenko Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20240108062059.3583028-3-shinichiro.kawasaki@wdc.com Tested-by Klara Modin Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/p2sb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 17cc4b45e023..6bd14d0132db 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -65,7 +65,7 @@ static bool p2sb_valid_resource(struct resource *res) /* Copy resource from the first BAR of the device in question */ static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) { - struct resource *bar0 = &pdev->resource[0]; + struct resource *bar0 = pci_resource_n(pdev, 0); /* Make sure we have no dangling pointers in the output */ memset(mem, 0, sizeof(*mem)); From 348d9cc7bde30852aa4f54aa70a22c3ad5dd081a Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sat, 6 Jan 2024 23:41:26 +0100 Subject: [PATCH 070/347] platform/x86: intel-wmi-sbl-fw-update: Fix function name in error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since when the driver was converted to use the bus-based WMI interface, the old GUID-based WMI functions are not used anymore. Update the error message to avoid confusing users. Compile-tested only. Fixes: 75c487fcb69c ("platform/x86: intel-wmi-sbl-fw-update: Use bus-based WMI interface") Signed-off-by: Armin Wolf Acked-by: Randy Dunlap Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240106224126.13803-1-W_Armin@gmx.de Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/wmi/sbl-fw-update.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/wmi/sbl-fw-update.c b/drivers/platform/x86/intel/wmi/sbl-fw-update.c index 9cf5ed0f8dc2..040153ad67c1 100644 --- a/drivers/platform/x86/intel/wmi/sbl-fw-update.c +++ b/drivers/platform/x86/intel/wmi/sbl-fw-update.c @@ -32,7 +32,7 @@ static int get_fwu_request(struct device *dev, u32 *out) return -ENODEV; if (obj->type != ACPI_TYPE_INTEGER) { - dev_warn(dev, "wmi_query_block returned invalid value\n"); + dev_warn(dev, "wmidev_block_query returned invalid value\n"); kfree(obj); return -EINVAL; } @@ -55,7 +55,7 @@ static int set_fwu_request(struct device *dev, u32 in) status = wmidev_block_set(to_wmi_device(dev), 0, &input); if (ACPI_FAILURE(status)) { - dev_err(dev, "wmi_set_block failed\n"); + dev_err(dev, "wmidev_block_set failed\n"); return -ENODEV; } From 41237735ccde2cc3fe1d83ae0b776a085be6a22f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 8 Jan 2024 15:06:55 +0100 Subject: [PATCH 071/347] platform/x86: silicom-platform: Add missing "Description:" for power_cycle sysfs attr The Documentation/ABI/testing/sysfs-platform-silicom entry for the power_cycle sysfs attr is missing the "Description:" keyword, add this. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240108140655.547261-1-hdegoede@redhat.com --- Documentation/ABI/testing/sysfs-platform-silicom | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/ABI/testing/sysfs-platform-silicom b/Documentation/ABI/testing/sysfs-platform-silicom index 2288b3665d16..4d1cc5bdbcc5 100644 --- a/Documentation/ABI/testing/sysfs-platform-silicom +++ b/Documentation/ABI/testing/sysfs-platform-silicom @@ -10,6 +10,7 @@ What: /sys/devices/platform/silicom-platform/power_cycle Date: November 2023 KernelVersion: 6.7 Contact: Henry Shi +Description: This file allow user to power cycle the platform. Default value is 0; when set to 1, it powers down the platform, waits 5 seconds, then powers on the From 452c314988dbccc491a32b674a3945d93a23c87c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 14 Jan 2024 17:53:16 +0100 Subject: [PATCH 072/347] MAINTAINERS: Remove Perry Yuan as DELL WMI HARDWARE PRIVACY SUPPORT maintainer Recent mails to his Dell address bounced with "user unknown". So remove him as maintainer. Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/c9757d0a-2046-464b-93e1-a2d9ab0ce36b@gmail.com Signed-off-by: Hans de Goede --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..30e231c8da37 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5958,7 +5958,6 @@ S: Maintained F: drivers/platform/x86/dell/dell-wmi-descriptor.c DELL WMI HARDWARE PRIVACY SUPPORT -M: Perry Yuan L: Dell.Client.Kernel@dell.com L: platform-driver-x86@vger.kernel.org S: Maintained From 20fe5e9be47efc952c66374334f7bb94e4a51d90 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Tue, 16 Jan 2024 10:18:29 +1300 Subject: [PATCH 073/347] MAINTAINERS: add Luke Jones as maintainer for asus notebooks Add myself as maintainer for "ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS" as suggested by Hans de Goede based on my history of contributions. Signed-off-by: Luke D. Jones Link: https://lore.kernel.org/r/20240115211829.48251-1-luke@ljones.dev Signed-off-by: Hans de Goede --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 30e231c8da37..8be1d172cc06 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3168,6 +3168,7 @@ F: drivers/hwmon/asus-ec-sensors.c ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS M: Corentin Chary +M: Luke D. Jones L: acpi4asus-user@lists.sourceforge.net L: platform-driver-x86@vger.kernel.org S: Maintained From 8530ecaf35f23d02fdce49aded7227fc8f14ebcc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 16 Jan 2024 11:39:21 +0100 Subject: [PATCH 074/347] MAINTAINERS: remove defunct acpi4asus project info from asus notebooks section The acpi4asus project appears to be defunct, according to: https://sourceforge.net/p/acpi4asus/mailman/acpi4asus-user/ the last posts to the list were done in May 2020 and even then they were mostly spam. And the http://acpi4asus.sf.net website still talks about 2.6.x kernels. Drop the defunct mailing-list and update the W: entry to point to the new up2date https://asus-linux.org/ site. Cc: Corentin Chary Cc: Luke D. Jones Signed-off-by: Hans de Goede --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8be1d172cc06..d9f66b5dec6b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3169,10 +3169,9 @@ F: drivers/hwmon/asus-ec-sensors.c ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS M: Corentin Chary M: Luke D. Jones -L: acpi4asus-user@lists.sourceforge.net L: platform-driver-x86@vger.kernel.org S: Maintained -W: http://acpi4asus.sf.net +W: https://asus-linux.org/ F: drivers/platform/x86/asus*.c F: drivers/platform/x86/eeepc*.c From 84aef4ed59705585d629e81d633a83b7d416f5fb Mon Sep 17 00:00:00 2001 From: Wenhua Lin Date: Tue, 9 Jan 2024 15:38:48 +0800 Subject: [PATCH 075/347] gpio: eic-sprd: Clear interrupt after set the interrupt type The raw interrupt status of eic maybe set before the interrupt is enabled, since the eic interrupt has a latch function, which would trigger the interrupt event once enabled it from user side. To solve this problem, interrupts generated before setting the interrupt trigger type are ignored. Fixes: 25518e024e3a ("gpio: Add Spreadtrum EIC driver support") Acked-by: Chunyan Zhang Signed-off-by: Wenhua Lin Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-eic-sprd.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index be7f2fa5aa7b..806b88d8dfb7 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -330,20 +330,27 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) switch (flow_type) { case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 1); + sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IC, 1); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 0); + sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IC, 1); break; case IRQ_TYPE_EDGE_RISING: case IRQ_TYPE_EDGE_FALLING: case IRQ_TYPE_EDGE_BOTH: state = sprd_eic_get(chip, offset); - if (state) + if (state) { sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 0); - else + sprd_eic_update(chip, offset, + SPRD_EIC_DBNC_IC, 1); + } else { sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 1); + sprd_eic_update(chip, offset, + SPRD_EIC_DBNC_IC, 1); + } break; default: return -ENOTSUPP; @@ -355,20 +362,27 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) switch (flow_type) { case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTCLR, 1); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTCLR, 1); break; case IRQ_TYPE_EDGE_RISING: case IRQ_TYPE_EDGE_FALLING: case IRQ_TYPE_EDGE_BOTH: state = sprd_eic_get(chip, offset); - if (state) + if (state) { sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 0); - else + sprd_eic_update(chip, offset, + SPRD_EIC_LATCH_INTCLR, 1); + } else { sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 1); + sprd_eic_update(chip, offset, + SPRD_EIC_LATCH_INTCLR, 1); + } break; default: return -ENOTSUPP; @@ -382,29 +396,34 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_FALLING: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; default: @@ -417,29 +436,34 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_FALLING: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; default: From 80c86ff6800b857c8008cebe7b8d22a6e574e68d Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 9 Jan 2024 11:49:07 +0000 Subject: [PATCH 076/347] arm64: dts: exynos: gs101: comply with the new cmu_misc clock names The cmu_misc clock-names were renamed to just "bus" and "sss" because naming is local to the module, so cmu_misc is implied. As the bindings and the device tree have not made a release yet, comply with the renamed clocks. Suggested-by: Rob Herring Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20240109114908.3623645-3-tudor.ambarus@linaro.org Signed-off-by: Krzysztof Kozlowski --- arch/arm64/boot/dts/exynos/google/gs101.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/exynos/google/gs101.dtsi b/arch/arm64/boot/dts/exynos/google/gs101.dtsi index 9747cb3fa03a..d838e3a7af6e 100644 --- a/arch/arm64/boot/dts/exynos/google/gs101.dtsi +++ b/arch/arm64/boot/dts/exynos/google/gs101.dtsi @@ -289,7 +289,7 @@ #clock-cells = <1>; clocks = <&cmu_top CLK_DOUT_CMU_MISC_BUS>, <&cmu_top CLK_DOUT_CMU_MISC_SSS>; - clock-names = "dout_cmu_misc_bus", "dout_cmu_misc_sss"; + clock-names = "bus", "sss"; }; watchdog_cl0: watchdog@10060000 { From eab4f56d3e75dad697acf8dc2c8be3c341d6c63e Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Fri, 5 Jan 2024 07:53:01 +0100 Subject: [PATCH 077/347] ARM: dts: exynos4212-tab3: add samsung,invert-vclk flag to fimd After more investigation, I've found that it's not the panel driver config that needs to be modified to invert the data polarity, but the FIMD config. Add the missing invert-vclk option that is required to get the display to work correctly. Fixes: ee37a457af1d ("ARM: dts: exynos: Add Samsung Galaxy Tab 3 8.0 boards") Signed-off-by: Artur Weber Link: https://lore.kernel.org/r/20240105-tab3-display-fixes-v2-1-904d1207bf6f@gmail.com Signed-off-by: Krzysztof Kozlowski --- arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi b/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi index d7954ff466b4..e5254e32aa8f 100644 --- a/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi +++ b/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi @@ -434,6 +434,7 @@ }; &fimd { + samsung,invert-vclk; status = "okay"; }; From c6a783be82c893c6f124a5853bef2edeaf26dadf Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 18 Jan 2024 04:23:40 -0800 Subject: [PATCH 078/347] thermal: intel: powerclamp: Remove dead code for target mwait value After conversion of this driver to use powercap idle_inject core, this driver doesn't use target_mwait value. So remove dead code. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_powerclamp.c | 32 ------------------------ 1 file changed, 32 deletions(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 5ac5cb60bae6..bc6eb0dd66a4 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -49,7 +49,6 @@ */ #define DEFAULT_DURATION_JIFFIES (6) -static unsigned int target_mwait; static struct dentry *debug_dir; static bool poll_pkg_cstate_enable; @@ -312,34 +311,6 @@ MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n" "\twindow size results in slower response time but more smooth\n" "\tclamping results. default to 2."); -static void find_target_mwait(void) -{ - unsigned int eax, ebx, ecx, edx; - unsigned int highest_cstate = 0; - unsigned int highest_subcstate = 0; - int i; - - if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) - return; - - cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); - - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || - !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) - return; - - edx >>= MWAIT_SUBSTATE_SIZE; - for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { - if (edx & MWAIT_SUBSTATE_MASK) { - highest_cstate = i; - highest_subcstate = edx & MWAIT_SUBSTATE_MASK; - } - } - target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) | - (highest_subcstate - 1); - -} - struct pkg_cstate_info { bool skip; int msr_index; @@ -759,9 +730,6 @@ static int __init powerclamp_probe(void) return -ENODEV; } - /* find the deepest mwait value */ - find_target_mwait(); - return 0; } From 8cbc756b802605dee3dd40019bd75960772bacf5 Mon Sep 17 00:00:00 2001 From: Liming Sun Date: Thu, 11 Jan 2024 12:31:06 -0500 Subject: [PATCH 079/347] platform/mellanox: mlxbf-tmfifo: Drop Tx network packet when Tx TmFIFO is full MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting from Linux 5.16 kernel, Tx timeout mechanism was added in the virtio_net driver which prints the "Tx timeout" warning message when a packet stays in Tx queue for too long. Below is an example of the reported message: "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since last trans: 3079892256". This issue could happen when external host driver which drains the FIFO is restared, stopped or upgraded. To avoid such confusing "Tx timeout" messages, this commit adds logic to drop the outstanding Tx packet if it's not able to transmit in two seconds due to Tx FIFO full, which can be considered as congestion or out-of-resource drop. This commit also handles the special case that the packet is half- transmitted into the Tx FIFO. In such case, the packet is discarded with remaining length stored in vring->rem_padding. So paddings with zeros can be sent out when Tx space is available to maintain the integrity of the packet format. The padded packet will be dropped on the receiving side. Signed-off-by: Liming Sun Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240111173106.96958-1-limings@nvidia.com Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxbf-tmfifo.c | 67 ++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index ed16ec422a7b..b8d1e32e97eb 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -47,6 +47,9 @@ /* Message with data needs at least two words (for header & data). */ #define MLXBF_TMFIFO_DATA_MIN_WORDS 2 +/* Tx timeout in milliseconds. */ +#define TMFIFO_TX_TIMEOUT 2000 + /* ACPI UID for BlueField-3. */ #define TMFIFO_BF3_UID 1 @@ -62,12 +65,14 @@ struct mlxbf_tmfifo; * @drop_desc: dummy desc for packet dropping * @cur_len: processed length of the current descriptor * @rem_len: remaining length of the pending packet + * @rem_padding: remaining bytes to send as paddings * @pkt_len: total length of the pending packet * @next_avail: next avail descriptor id * @num: vring size (number of descriptors) * @align: vring alignment size * @index: vring index * @vdev_id: vring virtio id (VIRTIO_ID_xxx) + * @tx_timeout: expire time of last tx packet * @fifo: pointer to the tmfifo structure */ struct mlxbf_tmfifo_vring { @@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring { struct vring_desc drop_desc; int cur_len; int rem_len; + int rem_padding; u32 pkt_len; u16 next_avail; int num; int align; int index; int vdev_id; + unsigned long tx_timeout; struct mlxbf_tmfifo *fifo; }; @@ -819,6 +826,50 @@ mlxbf_tmfifo_desc_done: return true; } +static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring *vring) +{ + unsigned long flags; + + /* Only handle Tx timeout for network vdev. */ + if (vring->vdev_id != VIRTIO_ID_NET) + return; + + /* Initialize the timeout or return if not expired. */ + if (!vring->tx_timeout) { + /* Initialize the timeout. */ + vring->tx_timeout = jiffies + + msecs_to_jiffies(TMFIFO_TX_TIMEOUT); + return; + } else if (time_before(jiffies, vring->tx_timeout)) { + /* Return if not timeout yet. */ + return; + } + + /* + * Drop the packet after timeout. The outstanding packet is + * released and the remaining bytes will be sent with padding byte 0x00 + * as a recovery. On the peer(host) side, the padding bytes 0x00 will be + * either dropped directly, or appended into existing outstanding packet + * thus dropped as corrupted network packet. + */ + vring->rem_padding = round_up(vring->rem_len, sizeof(u64)); + mlxbf_tmfifo_release_pkt(vring); + vring->cur_len = 0; + vring->rem_len = 0; + vring->fifo->vring[0] = NULL; + + /* + * Make sure the load/store are in order before + * returning back to virtio. + */ + virtio_mb(false); + + /* Notify upper layer. */ + spin_lock_irqsave(&vring->fifo->spin_lock[0], flags); + vring_interrupt(0, vring->vq); + spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags); +} + /* Rx & Tx processing of a queue. */ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) { @@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) return; do { +retry: /* Get available FIFO space. */ if (avail == 0) { if (is_rx) @@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) break; } + /* Insert paddings for discarded Tx packet. */ + if (!is_rx) { + vring->tx_timeout = 0; + while (vring->rem_padding >= sizeof(u64)) { + writeq(0, vring->fifo->tx.data); + vring->rem_padding -= sizeof(u64); + if (--avail == 0) + goto retry; + } + } + /* Console output always comes from the Tx buffer. */ if (!is_rx && devid == VIRTIO_ID_CONSOLE) { mlxbf_tmfifo_console_tx(fifo, avail); @@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) /* Handle one descriptor. */ more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail); } while (more); + + /* Check Tx timeout. */ + if (avail <= 0 && !is_rx) + mlxbf_tmfifo_check_tx_timeout(vring); } /* Handle Rx or Tx queues. */ From 732c35ce6d4892f7b07cc9aca61a6ad0fd400a26 Mon Sep 17 00:00:00 2001 From: Shravan Kumar Ramani Date: Wed, 17 Jan 2024 05:01:34 -0500 Subject: [PATCH 080/347] platform/mellanox: mlxbf-pmc: Fix offset calculation for crspace events The event selector fields for 2 counters are contained in one 32-bit register and the current logic does not account for this. Fixes: 423c3361855c ("platform/mellanox: mlxbf-pmc: Add support for BlueField-3") Signed-off-by: Shravan Kumar Ramani Reviewed-by: David Thompson Reviewed-by: Vadim Pasternak Link: https://lore.kernel.org/r/8834cfa496c97c7c2fcebcfca5a2aa007e20ae96.1705485095.git.shravankr@nvidia.com Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxbf-pmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index 1dd84c7a79de..b1995ac268d7 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -1170,7 +1170,7 @@ static int mlxbf_pmc_program_crspace_counter(int blk_num, uint32_t cnt_num, int ret; addr = pmc->block[blk_num].mmio_base + - (rounddown(cnt_num, 2) * MLXBF_PMC_CRSPACE_PERFSEL_SZ); + ((cnt_num / 2) * MLXBF_PMC_CRSPACE_PERFSEL_SZ); ret = mlxbf_pmc_readl(addr, &word); if (ret) return ret; @@ -1413,7 +1413,7 @@ static int mlxbf_pmc_read_crspace_event(int blk_num, uint32_t cnt_num, int ret; addr = pmc->block[blk_num].mmio_base + - (rounddown(cnt_num, 2) * MLXBF_PMC_CRSPACE_PERFSEL_SZ); + ((cnt_num / 2) * MLXBF_PMC_CRSPACE_PERFSEL_SZ); ret = mlxbf_pmc_readl(addr, &word); if (ret) return ret; From 4b5581f112075e46d73b34b9848be041e6c1e489 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 24 Oct 2023 18:53:53 +0200 Subject: [PATCH 081/347] accel/ivpu: Disable PLL after VPU IP reset during FLR IP reset has to followed by ivpu_pll_disable() to properly enter reset state. Fixes: 828d63042aec ("accel/ivpu: Don't enter d0i3 during FLR") Signed-off-by: Jacek Lawrynowicz Reviewed-by: Stanislaw Gruszka Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20231024165353.761507-1-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_40xx.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index eba2fdef2ace..c02061f299e2 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -746,7 +746,7 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev) return 0; } -static int ivpu_hw_40xx_reset(struct ivpu_device *vdev) +static int ivpu_hw_40xx_ip_reset(struct ivpu_device *vdev) { int ret; u32 val; @@ -768,6 +768,23 @@ static int ivpu_hw_40xx_reset(struct ivpu_device *vdev) return ret; } +static int ivpu_hw_40xx_reset(struct ivpu_device *vdev) +{ + int ret = 0; + + if (ivpu_hw_40xx_ip_reset(vdev)) { + ivpu_err(vdev, "Failed to reset VPU IP\n"); + ret = -EIO; + } + + if (ivpu_pll_disable(vdev)) { + ivpu_err(vdev, "Failed to disable PLL\n"); + ret = -EIO; + } + + return ret; +} + static int ivpu_hw_40xx_d0i3_enable(struct ivpu_device *vdev) { int ret; @@ -913,7 +930,7 @@ static int ivpu_hw_40xx_power_down(struct ivpu_device *vdev) ivpu_hw_40xx_save_d0i3_entry_timestamp(vdev); - if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_reset(vdev)) + if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_ip_reset(vdev)) ivpu_warn(vdev, "Failed to reset the VPU\n"); if (ivpu_pll_disable(vdev)) { From 192cdb1c907fd8df2d764c5bb17496e415e59391 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 22 Jan 2024 15:18:11 +0100 Subject: [PATCH 082/347] cpufreq: intel_pstate: Refine computation of P-state for given frequency On systems using HWP, if a given frequency is equal to the maximum turbo frequency or the maximum non-turbo frequency, the HWP performance level corresponding to it is already known and can be used directly without any computation. Accordingly, adjust the code to use the known HWP performance levels in the cases mentioned above. This also helps to avoid limiting CPU capacity artificially in some cases when the BIOS produces the HWP_CAP numbers using a different E-core-to-P-core performance scaling factor than expected by the kernel. Fixes: f5c8cf2a4992 ("cpufreq: intel_pstate: hybrid: Use known scaling factor for P-cores") Cc: 6.1+ # 6.1+ Tested-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 55 +++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2ca70b0b5fdc..ca94e60e705a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -529,6 +529,30 @@ static int intel_pstate_cppc_get_scaling(int cpu) } #endif /* CONFIG_ACPI_CPPC_LIB */ +static int intel_pstate_freq_to_hwp_rel(struct cpudata *cpu, int freq, + unsigned int relation) +{ + if (freq == cpu->pstate.turbo_freq) + return cpu->pstate.turbo_pstate; + + if (freq == cpu->pstate.max_freq) + return cpu->pstate.max_pstate; + + switch (relation) { + case CPUFREQ_RELATION_H: + return freq / cpu->pstate.scaling; + case CPUFREQ_RELATION_C: + return DIV_ROUND_CLOSEST(freq, cpu->pstate.scaling); + } + + return DIV_ROUND_UP(freq, cpu->pstate.scaling); +} + +static int intel_pstate_freq_to_hwp(struct cpudata *cpu, int freq) +{ + return intel_pstate_freq_to_hwp_rel(cpu, freq, CPUFREQ_RELATION_L); +} + /** * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels. * @cpu: Target CPU. @@ -546,6 +570,7 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling; int perf_ctl_turbo = pstate_funcs.get_turbo(cpu->cpu); int scaling = cpu->pstate.scaling; + int freq; pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys); pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo); @@ -559,16 +584,16 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling, perf_ctl_scaling); - cpu->pstate.max_pstate_physical = - DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling, - scaling); + freq = perf_ctl_max_phys * perf_ctl_scaling; + cpu->pstate.max_pstate_physical = intel_pstate_freq_to_hwp(cpu, freq); - cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling; + freq = cpu->pstate.min_pstate * perf_ctl_scaling; + cpu->pstate.min_freq = freq; /* * Cast the min P-state value retrieved via pstate_funcs.get_min() to * the effective range of HWP performance levels. */ - cpu->pstate.min_pstate = DIV_ROUND_UP(cpu->pstate.min_freq, scaling); + cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq); } static inline void update_turbo_state(void) @@ -2528,13 +2553,12 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, * abstract values to represent performance rather than pure ratios. */ if (hwp_active && cpu->pstate.scaling != perf_ctl_scaling) { - int scaling = cpu->pstate.scaling; int freq; freq = max_policy_perf * perf_ctl_scaling; - max_policy_perf = DIV_ROUND_UP(freq, scaling); + max_policy_perf = intel_pstate_freq_to_hwp(cpu, freq); freq = min_policy_perf * perf_ctl_scaling; - min_policy_perf = DIV_ROUND_UP(freq, scaling); + min_policy_perf = intel_pstate_freq_to_hwp(cpu, freq); } pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n", @@ -2908,18 +2932,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, cpufreq_freq_transition_begin(policy, &freqs); - switch (relation) { - case CPUFREQ_RELATION_L: - target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling); - break; - case CPUFREQ_RELATION_H: - target_pstate = freqs.new / cpu->pstate.scaling; - break; - default: - target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling); - break; - } - + target_pstate = intel_pstate_freq_to_hwp_rel(cpu, freqs.new, relation); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, false); freqs.new = target_pstate * cpu->pstate.scaling; @@ -2937,7 +2950,7 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, update_turbo_state(); - target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); + target_pstate = intel_pstate_freq_to_hwp(cpu, target_freq); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true); From f7cfe7017b531e08c108ac6615b1ddedcc892428 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 9 Jan 2024 09:22:32 +0100 Subject: [PATCH 083/347] x86/paravirt: Make BUG_func() usable by non-GPL modules Several inlined functions subject to paravirt patching are referencing BUG_func() after the recent switch to the alternative patching mechanism. As those functions can legally be used by non-GPL modules, BUG_func() must be usable by those modules, too. So use EXPORT_SYMBOL() when exporting BUG_func(). Fixes: 9824b00c2b58 ("x86/paravirt: Move some functions and defines to alternative.c") Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240109082232.22657-1-jgross@suse.com --- arch/x86/kernel/alternative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index cc130b57542a..1d85cb7071cb 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -403,7 +403,7 @@ noinstr void BUG_func(void) { BUG(); } -EXPORT_SYMBOL_GPL(BUG_func); +EXPORT_SYMBOL(BUG_func); #define CALL_RIP_REL_OPCODE 0xff #define CALL_RIP_REL_MODRM 0x15 From ed1a72fb0d646c983c85b62144fb1d134a8edb71 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jan 2024 21:55:22 +0300 Subject: [PATCH 084/347] kunit: Fix a NULL vs IS_ERR() bug The kunit_device_register() function doesn't return NULL, it returns error pointers. Change the KUNIT_ASSERT_NOT_NULL() to check for ERR_OR_NULL(). Fixes: d03c720e03bd ("kunit: Add APIs for managing devices") Signed-off-by: Dan Carpenter Reviewed-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/kunit-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index c4259d910356..f7980ef236a3 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -720,7 +720,7 @@ static void kunit_device_cleanup_test(struct kunit *test) long action_was_run = 0; test_device = kunit_device_register(test, "my_device"); - KUNIT_ASSERT_NOT_NULL(test, test_device); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, test_device); /* Add an action to verify cleanup. */ devm_add_action(test_device, test_dev_action, &action_was_run); From 083974ebb8fc65978d6cacd1bcfe9158d6234b98 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jan 2024 21:55:14 +0300 Subject: [PATCH 085/347] kunit: device: Fix a NULL vs IS_ERR() check in init() The root_device_register() function does not return NULL, it returns error pointers. Fix the check to match. Fixes: d03c720e03bd ("kunit: Add APIs for managing devices") Signed-off-by: Dan Carpenter Reviewed-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/kunit/device.c b/lib/kunit/device.c index f5371287b375..074c6dd2e36a 100644 --- a/lib/kunit/device.c +++ b/lib/kunit/device.c @@ -45,8 +45,8 @@ int kunit_bus_init(void) int error; kunit_bus_device = root_device_register("kunit"); - if (!kunit_bus_device) - return -ENOMEM; + if (IS_ERR(kunit_bus_device)) + return PTR_ERR(kunit_bus_device); error = bus_register(&kunit_bus_type); if (error) From 57e39086fb868a84f772cf097004f4715d8aaccb Mon Sep 17 00:00:00 2001 From: David Gow Date: Fri, 12 Jan 2024 07:49:47 +0800 Subject: [PATCH 086/347] MAINTAINERS: kunit: Add Rae Moar as a reviewer Rae has been shouldering a lot of the KUnit review burden for the last year, and will continue to do so in the future. Thanks! Signed-off-by: David Gow Reviewed-by: Rae Moar Signed-off-by: Shuah Khan --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..354d993bc2cf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11724,6 +11724,7 @@ F: fs/smb/server/ KERNEL UNIT TESTING FRAMEWORK (KUnit) M: Brendan Higgins M: David Gow +R: Rae Moar L: linux-kselftest@vger.kernel.org L: kunit-dev@googlegroups.com S: Maintained From a1af6a2bfa0cb46d70b7df5352993e750da6c79b Mon Sep 17 00:00:00 2001 From: Marco Pagani Date: Wed, 10 Jan 2024 16:59:47 +0100 Subject: [PATCH 087/347] kunit: run test suites only after module initialization completes Commit 2810c1e99867 ("kunit: Fix wild-memory-access bug in kunit_free_suite_set()") fixed a wild-memory-access bug that could have happened during the loading phase of test suites built and executed as loadable modules. However, it also introduced a problematic side effect that causes test suites modules to crash when they attempt to register fake devices. When a module is loaded, it traverses the MODULE_STATE_UNFORMED and MODULE_STATE_COMING states before reaching the normal operating state MODULE_STATE_LIVE. Finally, when the module is removed, it moves to MODULE_STATE_GOING before being released. However, if the loading function load_module() fails between complete_formation() and do_init_module(), the module goes directly from MODULE_STATE_COMING to MODULE_STATE_GOING without passing through MODULE_STATE_LIVE. This behavior was causing kunit_module_exit() to be called without having first executed kunit_module_init(). Since kunit_module_exit() is responsible for freeing the memory allocated by kunit_module_init() through kunit_filter_suites(), this behavior was resulting in a wild-memory-access bug. Commit 2810c1e99867 ("kunit: Fix wild-memory-access bug in kunit_free_suite_set()") fixed this issue by running the tests when the module is still in MODULE_STATE_COMING. However, modules in that state are not fully initialized, lacking sysfs kobjects. Therefore, if a test module attempts to register a fake device, it will inevitably crash. This patch proposes a different approach to fix the original wild-memory-access bug while restoring the normal module execution flow by making kunit_module_exit() able to detect if kunit_module_init() has previously initialized the tests suite set. In this way, test modules can once again register fake devices without crashing. This behavior is achieved by checking whether mod->kunit_suites is a virtual or direct mapping address. If it is a virtual address, then kunit_module_init() has allocated the suite_set in kunit_filter_suites() using kmalloc_array(). On the contrary, if mod->kunit_suites is still pointing to the original address that was set when looking up the .kunit_test_suites section of the module, then the loading phase has failed and there's no memory to be freed. v4: - rebased on 6.8 - noted that kunit_filter_suites() must return a virtual address v3: - add a comment to clarify why the start address is checked v2: - add include Fixes: 2810c1e99867 ("kunit: Fix wild-memory-access bug in kunit_free_suite_set()") Reviewed-by: David Gow Tested-by: Rae Moar Tested-by: Richard Fitzgerald Reviewed-by: Javier Martinez Canillas Signed-off-by: Marco Pagani Signed-off-by: Shuah Khan --- lib/kunit/executor.c | 4 ++++ lib/kunit/test.c | 14 +++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index 717b9599036b..689fff2b2b10 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -146,6 +146,10 @@ void kunit_free_suite_set(struct kunit_suite_set suite_set) kfree(suite_set.start); } +/* + * Filter and reallocate test suites. Must return the filtered test suites set + * allocated at a valid virtual address or NULL in case of error. + */ struct kunit_suite_set kunit_filter_suites(const struct kunit_suite_set *suite_set, const char *filter_glob, diff --git a/lib/kunit/test.c b/lib/kunit/test.c index f95d2093a0aa..31a5a992e646 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "debugfs.h" #include "device-impl.h" @@ -801,12 +802,19 @@ static void kunit_module_exit(struct module *mod) }; const char *action = kunit_action(); + /* + * Check if the start address is a valid virtual address to detect + * if the module load sequence has failed and the suite set has not + * been initialized and filtered. + */ + if (!suite_set.start || !virt_addr_valid(suite_set.start)) + return; + if (!action) __kunit_test_suites_exit(mod->kunit_suites, mod->num_kunit_suites); - if (suite_set.start) - kunit_free_suite_set(suite_set); + kunit_free_suite_set(suite_set); } static int kunit_module_notify(struct notifier_block *nb, unsigned long val, @@ -816,12 +824,12 @@ static int kunit_module_notify(struct notifier_block *nb, unsigned long val, switch (val) { case MODULE_STATE_LIVE: + kunit_module_init(mod); break; case MODULE_STATE_GOING: kunit_module_exit(mod); break; case MODULE_STATE_COMING: - kunit_module_init(mod); break; case MODULE_STATE_UNFORMED: break; From 1a9f2c776d1416c4ea6cb0d0b9917778c41a1a7d Mon Sep 17 00:00:00 2001 From: Arthur Grillo Date: Wed, 10 Jan 2024 14:39:28 -0300 Subject: [PATCH 088/347] Documentation: KUnit: Update the instructions on how to test static functions Now that we have the VISIBLE_IF_KUNIT and EXPORT_SYMBOL_IF_KUNIT macros, update the instructions to recommend this way of testing static functions. Signed-off-by: Arthur Grillo Reviewed-by: David Gow Signed-off-by: Shuah Khan --- Documentation/dev-tools/kunit/usage.rst | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst index a9efab50eed8..22955d56b379 100644 --- a/Documentation/dev-tools/kunit/usage.rst +++ b/Documentation/dev-tools/kunit/usage.rst @@ -671,8 +671,23 @@ Testing Static Functions ------------------------ If we do not want to expose functions or variables for testing, one option is to -conditionally ``#include`` the test file at the end of your .c file. For -example: +conditionally export the used symbol. For example: + +.. code-block:: c + + /* In my_file.c */ + + VISIBLE_IF_KUNIT int do_interesting_thing(); + EXPORT_SYMBOL_IF_KUNIT(do_interesting_thing); + + /* In my_file.h */ + + #if IS_ENABLED(CONFIG_KUNIT) + int do_interesting_thing(void); + #endif + +Alternatively, you could conditionally ``#include`` the test file at the end of +your .c file. For example: .. code-block:: c From c92688cac239794e4a1d976afa5203a4d3a2ac0e Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 13 Jan 2024 23:46:26 +0100 Subject: [PATCH 089/347] regulator: pwm-regulator: Add validity checks in continuous .get_voltage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continuous regulators can be configured to operate only in a certain duty cycle range (for example from 0..91%). Add a check to error out if the duty cycle translates to an unsupported (or out of range) voltage. Suggested-by: Uwe Kleine-König Signed-off-by: Martin Blumenstingl Link: https://msgid.link/r/20240113224628.377993-2-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- drivers/regulator/pwm-regulator.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index 698c420e0869..226ca4c62673 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -158,6 +158,9 @@ static int pwm_regulator_get_voltage(struct regulator_dev *rdev) pwm_get_state(drvdata->pwm, &pstate); voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit); + if (voltage < min(max_uV_duty, min_uV_duty) || + voltage > max(max_uV_duty, min_uV_duty)) + return -ENOTRECOVERABLE; /* * The dutycycle for min_uV might be greater than the one for max_uV. From 6a7d11efd6915d80a025f2a0be4ae09d797b91ec Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 13 Jan 2024 23:46:27 +0100 Subject: [PATCH 090/347] regulator: pwm-regulator: Calculate the output voltage for disabled PWMs If a PWM output is disabled then it's voltage has to be calculated based on a zero duty cycle (for normal polarity) or duty cycle being equal to the PWM period (for inverted polarity). Add support for this to pwm_regulator_get_voltage(). Signed-off-by: Martin Blumenstingl Link: https://msgid.link/r/20240113224628.377993-3-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- drivers/regulator/pwm-regulator.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index 226ca4c62673..d27b9a7a30c9 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -157,6 +157,13 @@ static int pwm_regulator_get_voltage(struct regulator_dev *rdev) pwm_get_state(drvdata->pwm, &pstate); + if (!pstate.enabled) { + if (pstate.polarity == PWM_POLARITY_INVERSED) + pstate.duty_cycle = pstate.period; + else + pstate.duty_cycle = 0; + } + voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit); if (voltage < min(max_uV_duty, min_uV_duty) || voltage > max(max_uV_duty, min_uV_duty)) From b3cbdcc191819b75c04178142e2d0d4c668f68c0 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 13 Jan 2024 23:46:28 +0100 Subject: [PATCH 091/347] regulator: pwm-regulator: Manage boot-on with disabled PWM channels Odroid-C1 uses a Monolithic Power Systems MP2161 controlled via PWM for the VDDEE voltage supply of the Meson8b SoC. Commit 6b9352f3f8a1 ("pwm: meson: modify and simplify calculation in meson_pwm_get_state") results in my Odroid-C1 crashing with memory corruption in many different places (seemingly at random). It turns out that this is due to a currently not supported corner case. The VDDEE regulator can generate between 860mV (duty cycle of ~91%) and 1140mV (duty cycle of 0%). We consider it to be enabled by the bootloader (which is why it has the regulator-boot-on flag in .dts) as well as being always-on (which is why it has the regulator-always-on flag in .dts) because the VDDEE voltage is generally required for the Meson8b SoC to work. The public S805 datasheet [0] states on page 17 (where "A5" refers to the Cortex-A5 CPU cores): [...] So if EE domains is shut off, A5 memory is also shut off. That does not matter. Before EE power domain is shut off, A5 should be shut off at first. It turns out that at least some bootloader versions are keeping the PWM output disabled. This is not a problem due to the specific design of the regulator: when the PWM output is disabled the output pin is pulled LOW, effectively achieving a 0% duty cycle (which in return means that VDDEE voltage is at 1140mV). The problem comes when the pwm-regulator driver tries to initialize the PWM output. To do so it reads the current state from the hardware, which is: period: 3666ns duty cycle: 3333ns (= ~91%) enabled: false Then those values are translated using the continuous voltage range to 860mV. Later, when the regulator is being enabled (either by the regulator core due to the always-on flag or first consumer - in this case the lima driver for the Mali-450 GPU) the pwm-regulator driver tries to keep the voltage (at 860mV) and just enable the PWM output. This is when things start to go wrong as the typical voltage used for VDDEE is 1100mV. Commit 6b9352f3f8a1 ("pwm: meson: modify and simplify calculation in meson_pwm_get_state") triggers above condition as before that change period and duty cycle were both at 0. Since the change to the pwm-meson driver is considered correct the solution is to be found in the pwm-regulator driver. Update the duty cycle during driver probe if the regulator is flagged as boot-on so that a call to pwm_regulator_enable() (by the regulator core during initialization of a regulator flagged with boot-on) without any preceding call to pwm_regulator_set_voltage() does not change the output voltage. [0] https://dn.odroid.com/S805/Datasheet/S805_Datasheet%20V0.8%2020150126.pdf Signed-off-by: Martin Blumenstingl Link: https://msgid.link/r/20240113224628.377993-4-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- drivers/regulator/pwm-regulator.c | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index d27b9a7a30c9..60cfcd741c2a 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -323,6 +323,32 @@ static int pwm_regulator_init_continuous(struct platform_device *pdev, return 0; } +static int pwm_regulator_init_boot_on(struct platform_device *pdev, + struct pwm_regulator_data *drvdata, + const struct regulator_init_data *init_data) +{ + struct pwm_state pstate; + + if (!init_data->constraints.boot_on || drvdata->enb_gpio) + return 0; + + pwm_get_state(drvdata->pwm, &pstate); + if (pstate.enabled) + return 0; + + /* + * Update the duty cycle so the output does not change + * when the regulator core enables the regulator (and + * thus the PWM channel). + */ + if (pstate.polarity == PWM_POLARITY_INVERSED) + pstate.duty_cycle = pstate.period; + else + pstate.duty_cycle = 0; + + return pwm_apply_might_sleep(drvdata->pwm, &pstate); +} + static int pwm_regulator_probe(struct platform_device *pdev) { const struct regulator_init_data *init_data; @@ -382,6 +408,13 @@ static int pwm_regulator_probe(struct platform_device *pdev) if (ret) return ret; + ret = pwm_regulator_init_boot_on(pdev, drvdata, init_data); + if (ret) { + dev_err(&pdev->dev, "Failed to apply boot_on settings: %d\n", + ret); + return ret; + } + regulator = devm_regulator_register(&pdev->dev, &drvdata->desc, &config); if (IS_ERR(regulator)) { From 6c314425b9ef6b247cefd0903e287eb072580c3b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 22 Jan 2024 14:00:33 +0200 Subject: [PATCH 092/347] spi: intel-pci: Remove Meteor Lake-S SoC PCI ID from the list Turns out this "SoC" side controller does not support certain commands, such as reading chip JEDEC ID, so the controller is pretty much unusable in Linux. We should be using the "PCH" side controller instead. For this reason remove this PCI ID from the list. Fixes: c2912d42e86e ("spi: intel-pci: Add support for Meteor Lake-S SPI serial flash") Signed-off-by: Mika Westerberg Link: https://msgid.link/r/20240122120034.2664812-2-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel-pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index 57d767a68e7b..b9918dcc3802 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -84,7 +84,6 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0xa2a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa324), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa3a4), (unsigned long)&cnl_info }, - { PCI_VDEVICE(INTEL, 0xae23), (unsigned long)&cnl_info }, { }, }; MODULE_DEVICE_TABLE(pci, intel_spi_pci_ids); From 8afe3c7fcaf72fca1e7d3dab16a5b7f4201ece17 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 22 Jan 2024 14:00:34 +0200 Subject: [PATCH 093/347] spi: intel-pci: Add support for Arrow Lake SPI serial flash This adds the PCI ID of the Arrow Lake and Meteor Lake-S PCH SPI serial flash controller. This one supports all the necessary commands Linux SPI-NOR stack requires. Signed-off-by: Mika Westerberg Link: https://msgid.link/r/20240122120034.2664812-3-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index b9918dcc3802..07d20ca1164c 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -76,6 +76,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x7a24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7e23), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0x7f24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x9d24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x9da4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa0a4), (unsigned long)&cnl_info }, From 7777f47f2ea64efd1016262e7b59fab34adfb869 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Thu, 18 Jan 2024 21:04:01 +0800 Subject: [PATCH 094/347] block: Move checking GENHD_FL_NO_PART to bdev_add_partition() Commit 1a721de8489f ("block: don't add or resize partition on the disk with GENHD_FL_NO_PART") prevented all operations about partitions on disks with GENHD_FL_NO_PART in blkpg_do_ioctl() since they are meaningless. However, it changed error code in some scenarios. So move checking GENHD_FL_NO_PART to bdev_add_partition() to eliminate impact. Fixes: 1a721de8489f ("block: don't add or resize partition on the disk with GENHD_FL_NO_PART") Reported-by: Allison Karlitskaya Closes: https://lore.kernel.org/all/CAOYeF9VsmqKMcQjo1k6YkGNujwN-nzfxY17N3F-CMikE1tYp+w@mail.gmail.com/ Signed-off-by: Li Lingfeng Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20240118130401.792757-1-lilingfeng@huaweicloud.com Signed-off-by: Jens Axboe --- block/ioctl.c | 2 -- block/partitions/core.c | 5 +++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/block/ioctl.c b/block/ioctl.c index 9c73a763ef88..438f79c564cf 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -20,8 +20,6 @@ static int blkpg_do_ioctl(struct block_device *bdev, struct blkpg_partition p; sector_t start, length; - if (disk->flags & GENHD_FL_NO_PART) - return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) diff --git a/block/partitions/core.c b/block/partitions/core.c index cab0d76a828e..5f5ed5c75f04 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -439,6 +439,11 @@ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start, goto out; } + if (disk->flags & GENHD_FL_NO_PART) { + ret = -EINVAL; + goto out; + } + if (partition_overlaps(disk, start, length, -1)) { ret = -EBUSY; goto out; From 4d5b7daa3c610af3f322ad1e91fc0c752ff32f0e Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Wed, 17 Jan 2024 17:58:14 -0800 Subject: [PATCH 095/347] drm/bridge: anx7625: Ensure bridge is suspended in disable() Similar to commit 26db46bc9c67 ("drm/bridge: parade-ps8640: Ensure bridge is suspended in .post_disable()"). Add a mutex to ensure that aux transfer won't race with atomic_disable by holding the PM reference and prevent the bridge from suspend. Also we need to use pm_runtime_put_sync_suspend() to suspend the bridge instead of idle with pm_runtime_put_sync(). Fixes: 3203e497eb76 ("drm/bridge: anx7625: Synchronously run runtime suspend.") Fixes: adca62ec370c ("drm/bridge: anx7625: Support reading edid through aux channel") Signed-off-by: Hsin-Yi Wang Tested-by: Xuxin Xiong Reviewed-by: Pin-yen Lin Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240118015916.2296741-1-hsinyi@chromium.org --- drivers/gpu/drm/bridge/analogix/anx7625.c | 7 ++++++- drivers/gpu/drm/bridge/analogix/anx7625.h | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index ef31033439bc..29d91493b101 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -1762,6 +1762,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux, u8 request = msg->request & ~DP_AUX_I2C_MOT; int ret = 0; + mutex_lock(&ctx->aux_lock); pm_runtime_get_sync(dev); msg->reply = 0; switch (request) { @@ -1778,6 +1779,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux, msg->size, msg->buffer); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + mutex_unlock(&ctx->aux_lock); return ret; } @@ -2474,7 +2476,9 @@ static void anx7625_bridge_atomic_disable(struct drm_bridge *bridge, ctx->connector = NULL; anx7625_dp_stop(ctx); - pm_runtime_put_sync(dev); + mutex_lock(&ctx->aux_lock); + pm_runtime_put_sync_suspend(dev); + mutex_unlock(&ctx->aux_lock); } static enum drm_connector_status @@ -2668,6 +2672,7 @@ static int anx7625_i2c_probe(struct i2c_client *client) mutex_init(&platform->lock); mutex_init(&platform->hdcp_wq_lock); + mutex_init(&platform->aux_lock); INIT_DELAYED_WORK(&platform->hdcp_work, hdcp_check_work_func); platform->hdcp_workqueue = create_workqueue("hdcp workqueue"); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h b/drivers/gpu/drm/bridge/analogix/anx7625.h index 66ebee7f3d83..39ed35d33836 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.h +++ b/drivers/gpu/drm/bridge/analogix/anx7625.h @@ -475,6 +475,8 @@ struct anx7625_data { struct workqueue_struct *hdcp_workqueue; /* Lock for hdcp work queue */ struct mutex hdcp_wq_lock; + /* Lock for aux transfer and disable */ + struct mutex aux_lock; char edid_block; struct display_timing dt; u8 display_timing_valid; From 1a84c213146a06aca1fd0e5b376ab7d36d15e1b3 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Tue, 14 Nov 2023 15:10:33 +0700 Subject: [PATCH 096/347] drm/dp_mst: Separate @failing_port list in drm_dp_mst_atomic_check_mgr() comment Stephen Rothwell reported htmldocs warnings when merging drm-intel tree: Documentation/gpu/drm-kms-helpers:296: drivers/gpu/drm/display/drm_dp_mst_topology.c:5484: ERROR: Unexpected indentation. Documentation/gpu/drm-kms-helpers:296: drivers/gpu/drm/display/drm_dp_mst_topology.c:5488: WARNING: Block quote ends without a blank line; unexpected unindent. Separate @failing_port return value list by surrounding it with a blank line to fix above warnings. Fixes: 1cd0a5ea427931 ("drm/dp_mst: Factor out a helper to check the atomic state of a topology manager") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/linux-next/20231114141715.6f435118@canb.auug.org.au/ Signed-off-by: Bagas Sanjaya Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231114081033.27343-1-bagasdotme@gmail.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index bd6c24d4213c..f7c6b60629c2 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -5491,6 +5491,7 @@ EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc); * - 0 if the new state is valid * - %-ENOSPC, if the new state is invalid, because of BW limitation * @failing_port is set to: + * * - The non-root port where a BW limit check failed * with all the ports downstream of @failing_port passing * the BW limit check. @@ -5499,6 +5500,7 @@ EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc); * - %NULL if the BW limit check failed at the root port * with all the ports downstream of the root port passing * the BW limit check. + * * - %-EINVAL, if the new state is invalid, because the root port has * too many payloads. */ From 612e1110d689387aab81b2727895cd307d3cbbfd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 22 Jan 2024 12:25:00 -0500 Subject: [PATCH 097/347] bcachefs: Add gfp flags param to bch2_prt_task_backtrace() Fixes: e6a2566f7a00 ("bcachefs: Better journal tracepoints") Signed-off-by: Kent Overstreet Reported-by: smatch --- fs/bcachefs/btree_locking.c | 4 ++-- fs/bcachefs/debug.c | 2 +- fs/bcachefs/journal.c | 2 +- fs/bcachefs/util.c | 10 +++++----- fs/bcachefs/util.h | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index bed75c93c069..684397442338 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -92,7 +92,7 @@ static noinline void print_cycle(struct printbuf *out, struct lock_graph *g) continue; bch2_btree_trans_to_text(out, i->trans); - bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1); + bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1, GFP_NOWAIT); } } @@ -227,7 +227,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) prt_printf(&buf, "backtrace:"); prt_newline(&buf); printbuf_indent_add(&buf, 2); - bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2); + bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT); printbuf_indent_sub(&buf, 2); prt_newline(&buf); } diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index cadda9bbe4a4..7bdba8507fc9 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -627,7 +627,7 @@ restart: prt_printf(&i->buf, "backtrace:"); prt_newline(&i->buf); printbuf_indent_add(&i->buf, 2); - bch2_prt_task_backtrace(&i->buf, task, 0); + bch2_prt_task_backtrace(&i->buf, task, 0, GFP_KERNEL); printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index d71d26e39521..bc890776eb57 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -233,7 +233,7 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t prt_str(&pbuf, "entry size: "); prt_human_readable_u64(&pbuf, vstruct_bytes(buf->data)); prt_newline(&pbuf); - bch2_prt_task_backtrace(&pbuf, current, 1); + bch2_prt_task_backtrace(&pbuf, current, 1, GFP_NOWAIT); trace_journal_entry_close(c, pbuf.buf); printbuf_exit(&pbuf); } diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index a135136adeee..56b815fd9fc6 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -272,14 +272,14 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines) console_unlock(); } -int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr) +int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr, + gfp_t gfp) { #ifdef CONFIG_STACKTRACE unsigned nr_entries = 0; - int ret = 0; stack->nr = 0; - ret = darray_make_room(stack, 32); + int ret = darray_make_room_gfp(stack, 32, gfp); if (ret) return ret; @@ -308,10 +308,10 @@ void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack) } } -int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr) +int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr, gfp_t gfp) { bch_stacktrace stack = { 0 }; - int ret = bch2_save_backtrace(&stack, task, skipnr + 1); + int ret = bch2_save_backtrace(&stack, task, skipnr + 1, gfp); bch2_prt_backtrace(out, &stack); darray_exit(&stack); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index df67bf55fe2b..b414736d59a5 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -348,9 +348,9 @@ void bch2_prt_u64_base2(struct printbuf *, u64); void bch2_print_string_as_lines(const char *prefix, const char *lines); typedef DARRAY(unsigned long) bch_stacktrace; -int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned); +int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t); void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *); -int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned); +int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned, gfp_t); static inline void prt_bdevname(struct printbuf *out, struct block_device *bdev) { From 3e44f325f6f75078cdcd44cd337f517ba3650d05 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jan 2024 08:36:55 +0100 Subject: [PATCH 098/347] bcachefs: fix incorrect usage of REQ_OP_FLUSH REQ_OP_FLUSH is only for internal use in the blk-mq and request based drivers. File systems and other block layer consumers must use REQ_OP_WRITE | REQ_PREFLUSH as documented in Documentation/block/writeback_cache_control.rst. While REQ_OP_FLUSH appears to work for blk-mq drivers it does not get the proper flush state machine handling, and completely fails for any bio based drivers, including all the stacking drivers. The block layer will also get a check in 6.8 to reject this use case entirely. [Note: completely untested, but as this never got fixed since the original bug report in November: https://bugzilla.kernel.org/show_bug.cgi?id=218184 and the the discussion in December: https://lore.kernel.org/all/20231221053016.72cqcfg46vxwohcj@moria.home.lan/T/ this seems to be best way to force it] Signed-off-by: Christoph Hellwig Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 2 +- fs/bcachefs/journal_io.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index dc52918d06ef..8c70123b6a0c 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -79,7 +79,7 @@ void bch2_inode_flush_nocow_writes_async(struct bch_fs *c, continue; bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 0, - REQ_OP_FLUSH, + REQ_OP_WRITE|REQ_PREFLUSH, GFP_KERNEL, &c->nocow_flush_bioset), struct nocow_flush, bio); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 04a1e79a5ed3..bfd6585e746d 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1988,7 +1988,8 @@ CLOSURE_CALLBACK(bch2_journal_write) percpu_ref_get(&ca->io_ref); bio = ca->journal.bio; - bio_reset(bio, ca->disk_sb.bdev, REQ_OP_FLUSH); + bio_reset(bio, ca->disk_sb.bdev, + REQ_OP_WRITE|REQ_PREFLUSH); bio->bi_end_io = journal_write_endio; bio->bi_private = ca; closure_bio_submit(bio, cl); From d53271c05965b4469c57a18c66585075df81c504 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 11 Jan 2024 10:49:22 -0500 Subject: [PATCH 099/347] selftests/rseq: Do not skip !allowed_cpus for mm_cid Indexing with mm_cid is incompatible with skipping disallowed cpumask, because concurrency IDs are based on a virtual ID allocation which is unrelated to the physical CPU mask. These issues can be reproduced by running the rseq selftests under a taskset which excludes CPU 0, e.g. taskset -c 10-20 ./run_param_test.sh Signed-off-by: Mathieu Desnoyers Cc: Shuah Khan Cc: Peter Zijlstra Cc: "Paul E. McKenney" Cc: Boqun Feng Signed-off-by: Shuah Khan --- .../selftests/rseq/basic_percpu_ops_test.c | 14 ++++++++++-- tools/testing/selftests/rseq/param_test.c | 22 ++++++++++++++----- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c index 887542961968..2348d2c20d0a 100644 --- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c +++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c @@ -24,6 +24,11 @@ bool rseq_validate_cpu_id(void) { return rseq_mm_cid_available(); } +static +bool rseq_use_cpu_index(void) +{ + return false; /* Use mm_cid */ +} #else # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID static @@ -36,6 +41,11 @@ bool rseq_validate_cpu_id(void) { return rseq_current_cpu_raw() >= 0; } +static +bool rseq_use_cpu_index(void) +{ + return true; /* Use cpu_id as index. */ +} #endif struct percpu_lock_entry { @@ -274,7 +284,7 @@ void test_percpu_list(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; for (j = 1; j <= 100; j++) { struct percpu_list_node *node; @@ -299,7 +309,7 @@ void test_percpu_list(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_list_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_list_pop(&list, i))) { diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 20403d58345c..2f37961240ca 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -288,6 +288,11 @@ bool rseq_validate_cpu_id(void) { return rseq_mm_cid_available(); } +static +bool rseq_use_cpu_index(void) +{ + return false; /* Use mm_cid */ +} # ifdef TEST_MEMBARRIER /* * Membarrier does not currently support targeting a mm_cid, so @@ -312,6 +317,11 @@ bool rseq_validate_cpu_id(void) { return rseq_current_cpu_raw() >= 0; } +static +bool rseq_use_cpu_index(void) +{ + return true; /* Use cpu_id as index. */ +} # ifdef TEST_MEMBARRIER static int rseq_membarrier_expedited(int cpu) @@ -715,7 +725,7 @@ void test_percpu_list(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; for (j = 1; j <= 100; j++) { struct percpu_list_node *node; @@ -752,7 +762,7 @@ void test_percpu_list(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_list_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_list_pop(&list, i))) { @@ -902,7 +912,7 @@ void test_percpu_buffer(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; /* Worse-case is every item in same CPU. */ buffer.c[i].array = @@ -952,7 +962,7 @@ void test_percpu_buffer(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_buffer_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_buffer_pop(&buffer, i))) { @@ -1113,7 +1123,7 @@ void test_percpu_memcpy_buffer(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; /* Worse-case is every item in same CPU. */ buffer.c[i].array = @@ -1160,7 +1170,7 @@ void test_percpu_memcpy_buffer(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_memcpy_buffer_node item; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { From 68deb9972079c9904fe714c049a7f08bd997a9ee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 16 Jan 2024 13:17:17 -0800 Subject: [PATCH 100/347] tools/testing/cxl: Disable "missing prototypes / declarations" warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent warnings of the form: tools/testing/cxl/test/mock.c:44:6: error: no previous prototype for ‘__wrap_is_acpi_device_node’ [-Werror=missing-prototypes] tools/testing/cxl/test/mock.c:63:5: error: no previous prototype for ‘__wrap_acpi_table_parse_cedt’ [-Werror=missing-prototypes] tools/testing/cxl/test/mock.c:81:13: error: no previous prototype for ‘__wrap_acpi_evaluate_integer’ [-Werror=missing-prototypes] ...by locally disabling some warnings. It turns out that: Commit 0fcb70851fbf ("Makefile.extrawarn: turn on missing-prototypes globally") ...in addition to expanding in-tree coverage, also impacts out-of-tree module builds like those in tools/testing/cxl/. Filter out the warning options on unit test code that does not effect mainline builds. Reviewed-by: Alison Schofield Link: https://lore.kernel.org/r/170543983780.460832.10920261849128601697.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- tools/testing/cxl/Kbuild | 2 ++ tools/testing/cxl/test/Kbuild | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 0b12c36902d8..caff3834671f 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -65,4 +65,6 @@ cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_exports.o +KBUILD_CFLAGS := $(filter-out -Wmissing-prototypes -Wmissing-declarations, $(KBUILD_CFLAGS)) + obj-m += test/ diff --git a/tools/testing/cxl/test/Kbuild b/tools/testing/cxl/test/Kbuild index 61d5f7bcddf9..6b1927897856 100644 --- a/tools/testing/cxl/test/Kbuild +++ b/tools/testing/cxl/test/Kbuild @@ -8,3 +8,5 @@ obj-m += cxl_mock_mem.o cxl_test-y := cxl.o cxl_mock-y := mock.o cxl_mock_mem-y := mem.o + +KBUILD_CFLAGS := $(filter-out -Wmissing-prototypes -Wmissing-declarations, $(KBUILD_CFLAGS)) From c97dac57c804b53eab492ca0230d86356729d633 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 16 Jan 2024 13:17:23 -0800 Subject: [PATCH 101/347] tools/testing/nvdimm: Disable "missing prototypes / declarations" warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent warnings of the form: tools/testing/nvdimm/config_check.c:4:6: error: no previous prototype for ‘check’ [-Werror=missing-prototypes] ...by locally disabling some warnings. It turns out that: Commit 0fcb70851fbf ("Makefile.extrawarn: turn on missing-prototypes globally") ...in addition to expanding in-tree coverage, also impacts out-of-tree module builds like those in tools/testing/nvdimm/. Filter out the warning options on unit test code that does not effect mainline builds. Reviewed-by: Alison Schofield Link: https://lore.kernel.org/r/170543984331.460832.1780246477583036191.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- tools/testing/nvdimm/Kbuild | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 8153251ea389..91a3627f301a 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -82,4 +82,6 @@ libnvdimm-$(CONFIG_NVDIMM_KEYS) += $(NVDIMM_SRC)/security.o libnvdimm-y += libnvdimm_test.o libnvdimm-y += config_check.o +KBUILD_CFLAGS := $(filter-out -Wmissing-prototypes -Wmissing-declarations, $(KBUILD_CFLAGS)) + obj-m += test/ From d72a4caf685989e353dff0a97d7376ee10edbf87 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 17 Jan 2024 17:24:01 -0800 Subject: [PATCH 102/347] cxl/pci: Skip irq features if MSI/MSI-X are not supported CXL 3.1 Section 3.1.1 states: "A Function on a CXL device must not generate INTx messages if that Function participates in CXL.cache protocol or CXL.mem protocols." The generic CXL memory driver only supports devices which use the CXL.mem protocol. The current driver attempts to allocate MSI/MSI-X vectors in anticipation of their need for mailbox interrupts or event processing. However, the above requirement does not require a device to support interrupts, only that they use MSI/MSI-X. For example, a device may disable mailbox interrupts and either be configured for firmware first or skip event processing and function. Dave Larsen reported that the following Intel / Agilex card does not support interrupts on function 0. CXL: Intel Corporation Device 0ddb (rev 01) (prog-if 10 [CXL Memory Device (CXL 2.x)]) Rather than fail device probe if interrupts are not supported; flag that irqs are not enabled and avoid features which require interrupts. Emit messages appropriate for the situation to aid in debugging should device behavior be unexpected due to a failure to allocate vectors. Note that it is possible for a device to have host based event processing through polling. However, the driver does not support polling and it is not anticipated to be generally required. Leave that functionality to a future patch if such a device comes along. Reported-by: Dave Larsen Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Signed-off-by: Ira Weiny Reviewed-and-tested-by: Davidlohr Bueso Link: https://lore.kernel.org/r/20240117-dont-fail-irq-v2-1-f33f26b0e365@intel.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 4fd1f207c84e..233e7c42c161 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -382,7 +382,7 @@ static int cxl_pci_mbox_send(struct cxl_memdev_state *mds, return rc; } -static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds) +static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) { struct cxl_dev_state *cxlds = &mds->cxlds; const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); @@ -441,7 +441,7 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds) INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); /* background command interrupts are optional */ - if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ)) + if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) || !irq_avail) return 0; msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); @@ -588,7 +588,7 @@ static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds) return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf); } -static int cxl_alloc_irq_vectors(struct pci_dev *pdev) +static bool cxl_alloc_irq_vectors(struct pci_dev *pdev) { int nvecs; @@ -605,9 +605,9 @@ static int cxl_alloc_irq_vectors(struct pci_dev *pdev) PCI_IRQ_MSIX | PCI_IRQ_MSI); if (nvecs < 1) { dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs); - return -ENXIO; + return false; } - return 0; + return true; } static irqreturn_t cxl_event_thread(int irq, void *id) @@ -743,7 +743,7 @@ static bool cxl_event_int_is_fw(u8 setting) } static int cxl_event_config(struct pci_host_bridge *host_bridge, - struct cxl_memdev_state *mds) + struct cxl_memdev_state *mds, bool irq_avail) { struct cxl_event_interrupt_policy policy; int rc; @@ -755,6 +755,11 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, if (!host_bridge->native_cxl_error) return 0; + if (!irq_avail) { + dev_info(mds->cxlds.dev, "No interrupt support, disable event processing.\n"); + return 0; + } + rc = cxl_mem_alloc_event_buf(mds); if (rc) return rc; @@ -789,6 +794,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct cxl_register_map map; struct cxl_memdev *cxlmd; int i, rc, pmu_count; + bool irq_avail; /* * Double check the anonymous union trickery in struct cxl_regs @@ -846,11 +852,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) else dev_warn(&pdev->dev, "Media not active (%d)\n", rc); - rc = cxl_alloc_irq_vectors(pdev); - if (rc) - return rc; + irq_avail = cxl_alloc_irq_vectors(pdev); - rc = cxl_pci_setup_mailbox(mds); + rc = cxl_pci_setup_mailbox(mds, irq_avail); if (rc) return rc; @@ -909,7 +913,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) } } - rc = cxl_event_config(host_bridge, mds); + rc = cxl_event_config(host_bridge, mds, irq_avail); if (rc) return rc; From 22fb4f041999f5f16ecbda15a2859b4ef4cbf47e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 19 Jan 2024 05:33:19 -0600 Subject: [PATCH 103/347] cpufreq/amd-pstate: Fix setting scaling max/min freq values Scaling min/max freq values were being cached and lagging a setting each time. Fix the ordering of the clamp call to ensure they work. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217931 Fixes: febab20caeba ("cpufreq/amd-pstate: Fix scaling_min_freq and scaling_max_freq update") Signed-off-by: Mario Limonciello Reviewed-by: Wyes Karny Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 1f6186475715..1791d37fbc53 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1232,14 +1232,13 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); + WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); + max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, cpudata->max_limit_perf); min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, cpudata->max_limit_perf); - - WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); - WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); - value = READ_ONCE(cpudata->cppc_req_cached); if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) From afa6ac2690bb9904ff883c6e942281e1032a484d Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 22 Jan 2024 21:32:01 +0100 Subject: [PATCH 104/347] HID: logitech-hidpp: add support for Logitech G Pro X Superlight 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let logitech-hidpp driver claim Logitech G Pro X Superlight 2. Reported-by: Marcus Rückert Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index fd6d8f1d9b8f..6ef0c88e3e60 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4610,6 +4610,8 @@ static const struct hid_device_id hidpp_devices[] = { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC088) }, { /* Logitech G Pro X Superlight Gaming Mouse over USB */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC094) }, + { /* Logitech G Pro X Superlight 2 Gaming Mouse over USB */ + HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC09b) }, { /* G935 Gaming Headset */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0x0a87), From 73ae7e1c7644a8c33ba526302a10267cdbc249f8 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 11 Jan 2024 17:57:44 +0100 Subject: [PATCH 105/347] ata: libata-sata: improve sysfs description for ATA_LPM_UNKNOWN Currently, both ATA_LPM_UNKNOWN (0) and ATA_LPM_MAX_POWER (1) displays as "max_performance" in sysfs. This is quite misleading as they are not the same. For ATA_LPM_UNKNOWN, ata_eh_set_lpm() will not be called at all, leaving the configuration in unknown state. For ATA_LPM_MAX_POWER, ata_eh_set_lpm() is called, and setting the policy to ATA_LPM_MAX_POWER. This also matches the description of the SATA_MOBILE_LPM_POLICY Kconfig: 0 => Keep firmware settings 1 => Maximum performance Thus, update the sysfs description for ATA_LPM_UNKNOWN to match reality. While at it, update libata.h to mention that the ascii descriptions are in libata-sata.c and not in libata-scsi.c. Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-sata.c | 2 +- include/linux/libata.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index b6656c287175..0fb1934875f2 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -784,7 +784,7 @@ bool sata_lpm_ignore_phy_events(struct ata_link *link) EXPORT_SYMBOL_GPL(sata_lpm_ignore_phy_events); static const char *ata_lpm_policy_names[] = { - [ATA_LPM_UNKNOWN] = "max_performance", + [ATA_LPM_UNKNOWN] = "keep_firmware_settings", [ATA_LPM_MAX_POWER] = "max_performance", [ATA_LPM_MED_POWER] = "medium_power", [ATA_LPM_MED_POWER_WITH_DIPM] = "med_power_with_dipm", diff --git a/include/linux/libata.h b/include/linux/libata.h index 1dbb14daccfa..26d68115afb8 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -471,7 +471,7 @@ enum ata_completion_errors { /* * Link power management policy: If you alter this, you also need to - * alter libata-scsi.c (for the ascii descriptions) + * alter libata-sata.c (for the ascii descriptions) */ enum ata_lpm_policy { ATA_LPM_UNKNOWN, From 3e4147f33f8b647775357bae0248b9a2aeebfcd2 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 4 Jan 2024 21:11:37 +0100 Subject: [PATCH 106/347] x86/CPU/AMD: Add X86_FEATURE_ZEN5 Add a synthetic feature flag for Zen5. Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240104201138.5072-1-bp@alien8.de --- arch/x86/include/asm/cpufeatures.h | 4 +--- arch/x86/kernel/cpu/amd.c | 25 +++++++++++++++++++++---- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 29cb275a219d..fdf723b6f6d0 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -81,10 +81,8 @@ #define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ #define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ - -/* CPU types for specific tunings: */ #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */ +#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */ #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9f42d1c59e09..bc49e3b3aae0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -538,7 +538,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) /* Figure out Zen generations: */ switch (c->x86) { - case 0x17: { + case 0x17: switch (c->x86_model) { case 0x00 ... 0x2f: case 0x50 ... 0x5f: @@ -554,8 +554,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) goto warn; } break; - } - case 0x19: { + + case 0x19: switch (c->x86_model) { case 0x00 ... 0x0f: case 0x20 ... 0x5f: @@ -569,7 +569,17 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) goto warn; } break; - } + + case 0x1a: + switch (c->x86_model) { + case 0x00 ... 0x0f: + setup_force_cpu_cap(X86_FEATURE_ZEN5); + break; + default: + goto warn; + } + break; + default: break; } @@ -1039,6 +1049,11 @@ static void init_amd_zen4(struct cpuinfo_x86 *c) msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT); } +static void init_amd_zen5(struct cpuinfo_x86 *c) +{ + init_amd_zen_common(); +} + static void init_amd(struct cpuinfo_x86 *c) { u64 vm_cr; @@ -1084,6 +1099,8 @@ static void init_amd(struct cpuinfo_x86 *c) init_amd_zen3(c); else if (boot_cpu_has(X86_FEATURE_ZEN4)) init_amd_zen4(c); + else if (boot_cpu_has(X86_FEATURE_ZEN5)) + init_amd_zen5(c); /* * Enable workaround for FXSAVE leak on CPUs From 090e3bec01763e415bccae445f5bfe3d0c61b629 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Jan 2024 11:18:44 -0800 Subject: [PATCH 107/347] x86/cpu: Add model number for Intel Clearwater Forest processor Server product based on the Atom Darkmont core. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240117191844.56180-1-tony.luck@intel.com --- arch/x86/include/asm/intel-family.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 197316121f04..b65e9c46b922 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -162,6 +162,8 @@ #define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */ #define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */ +#define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */ + /* Xeon Phi */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ From b6eda11c44dc89a681e1c105f0f4660e69b1e183 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 19 Jan 2024 14:07:14 +0800 Subject: [PATCH 108/347] HID: nvidia-shield: Add missing null pointer checks to LED initialization devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. [jkosina@suse.com: tweak changelog a bit] Signed-off-by: Kunwu Chan Reviewed-by: Rahul Rameshbabu Signed-off-by: Jiri Kosina --- drivers/hid/hid-nvidia-shield.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c index 82d0a77359c4..58b15750dbb0 100644 --- a/drivers/hid/hid-nvidia-shield.c +++ b/drivers/hid/hid-nvidia-shield.c @@ -800,6 +800,8 @@ static inline int thunderstrike_led_create(struct thunderstrike *ts) led->name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL, "thunderstrike%d:blue:led", ts->id); + if (!led->name) + return -ENOMEM; led->max_brightness = 1; led->flags = LED_CORE_SUSPENDRESUME | LED_RETAIN_AT_SHUTDOWN; led->brightness_get = &thunderstrike_led_get_brightness; @@ -831,6 +833,8 @@ static inline int thunderstrike_psy_create(struct shield_device *shield_dev) shield_dev->battery_dev.desc.name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL, "thunderstrike_%d", ts->id); + if (!shield_dev->battery_dev.desc.name) + return -ENOMEM; shield_dev->battery_dev.psy = power_supply_register( &hdev->dev, &shield_dev->battery_dev.desc, &psy_cfg); From 26dd6a5667f500c5d991f90a9ac5998a71afaf5c Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 15 Jan 2024 12:50:51 +0800 Subject: [PATCH 109/347] HID: i2c-hid: Skip SET_POWER SLEEP for Cirque touchpad on system suspend There's a Cirque touchpad that wakes system up without anything touched the touchpad. The input report is empty when this happens. The reason is stated in HID over I2C spec, 7.2.8.2: "If the DEVICE wishes to wake the HOST from its low power state, it can issue a wake by asserting the interrupt." This is fine if OS can put system back to suspend by identifying input wakeup count stays the same on resume, like Chrome OS Dark Resume [0]. But for regular distro such policy is lacking. Though the change doesn't bring any impact on power consumption for touchpad is minimal, other i2c-hid device may depends on SLEEP control power. So use a quirk to limit the change scope. [0] https://chromium.googlesource.com/chromiumos/platform2/+/HEAD/power_manager/docs/dark_resume.md Signed-off-by: Kai-Heng Feng Reviewed-by: Douglas Anderson Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/i2c-hid/i2c-hid-core.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index fb30e228d35f..828a5c022c64 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -298,6 +298,9 @@ #define USB_VENDOR_ID_CIDC 0x1677 +#define I2C_VENDOR_ID_CIRQUE 0x0488 +#define I2C_PRODUCT_ID_CIRQUE_1063 0x1063 + #define USB_VENDOR_ID_CJTOUCH 0x24b8 #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0020 0x0020 #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0040 0x0040 diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 90f316ae9819..2df1ab3c31cc 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -49,6 +49,7 @@ #define I2C_HID_QUIRK_RESET_ON_RESUME BIT(2) #define I2C_HID_QUIRK_BAD_INPUT_SIZE BIT(3) #define I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET BIT(4) +#define I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND BIT(5) /* Command opcodes */ #define I2C_HID_OPCODE_RESET 0x01 @@ -131,6 +132,8 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_RESET_ON_RESUME }, { USB_VENDOR_ID_ITE, I2C_DEVICE_ID_ITE_LENOVO_LEGION_Y720, I2C_HID_QUIRK_BAD_INPUT_SIZE }, + { I2C_VENDOR_ID_CIRQUE, I2C_PRODUCT_ID_CIRQUE_1063, + I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND }, /* * Sending the wakeup after reset actually break ELAN touchscreen controller */ @@ -956,7 +959,8 @@ static int i2c_hid_core_suspend(struct i2c_hid *ihid, bool force_poweroff) return ret; /* Save some power */ - i2c_hid_set_power(ihid, I2C_HID_PWR_SLEEP); + if (!(ihid->quirks & I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND)) + i2c_hid_set_power(ihid, I2C_HID_PWR_SLEEP); disable_irq(client->irq); From 574bf7bbe83794a902679846770f75a9b7f28176 Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Tue, 9 Jan 2024 16:00:32 -0500 Subject: [PATCH 110/347] spi: bcm-qspi: fix SFDP BFPT read by usig mspi read SFDP read shall use the mspi reads when using the bcm_qspi_exec_mem_op() call. This fixes SFDP parameter page read failures seen with parts that now use SFDP protocol to read the basic flash parameter table. Fixes: 5f195ee7d830 ("spi: bcm-qspi: Implement the spi_mem interface") Signed-off-by: Kamal Dasu Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://msgid.link/r/20240109210033.43249-1-kamal.dasu@broadcom.com Signed-off-by: Mark Brown --- drivers/spi/spi-bcm-qspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index d96222e6d7d2..cfdaa5eaec76 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include "spi-bcm-qspi.h" @@ -1221,7 +1221,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, /* non-aligned and very short transfers are handled by MSPI */ if (!IS_ALIGNED((uintptr_t)addr, 4) || !IS_ALIGNED((uintptr_t)buf, 4) || - len < 4) + len < 4 || op->cmd.opcode == SPINOR_OP_RDSFDP) mspi_read = true; if (!has_bspi(qspi) || mspi_read) From e267a5b3ec59ce88d6be21078e2deb807ca3b436 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 10 Jan 2024 09:54:03 +0100 Subject: [PATCH 111/347] spi: spi-imx: Use dev_err_probe for failed DMA channel requests If dma_request_chan() fails, no error is shown nor any information is shown in /sys/kernel/debug/devices_deferred if -EPROBE_DEFER is returned. Use dev_err_probe to fix both problems. Signed-off-by: Alexander Stein Reviewed-by: Francesco Dolcini Link: https://msgid.link/r/20240110085403.457089-1-alexander.stein@ew.tq-group.com Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 272bc871a848..546cdce525fc 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1344,7 +1344,7 @@ static int spi_imx_sdma_init(struct device *dev, struct spi_imx_data *spi_imx, controller->dma_tx = dma_request_chan(dev, "tx"); if (IS_ERR(controller->dma_tx)) { ret = PTR_ERR(controller->dma_tx); - dev_dbg(dev, "can't get the TX DMA channel, error %d!\n", ret); + dev_err_probe(dev, ret, "can't get the TX DMA channel!\n"); controller->dma_tx = NULL; goto err; } @@ -1353,7 +1353,7 @@ static int spi_imx_sdma_init(struct device *dev, struct spi_imx_data *spi_imx, controller->dma_rx = dma_request_chan(dev, "rx"); if (IS_ERR(controller->dma_rx)) { ret = PTR_ERR(controller->dma_rx); - dev_dbg(dev, "can't get the RX DMA channel, error %d\n", ret); + dev_err_probe(dev, ret, "can't get the RX DMA channel!\n"); controller->dma_rx = NULL; goto err; } From 633cd6fe6e1993ba80e0954c2db127a0b1a3e66f Mon Sep 17 00:00:00 2001 From: Amit Kumar Mahapatra Date: Mon, 18 Dec 2023 14:36:52 +0530 Subject: [PATCH 112/347] spi: spi-cadence: Reverse the order of interleaved write and read operations In the existing implementation, when executing interleaved write and read operations in the ISR for a transfer length greater than the FIFO size, the TXFIFO write precedes the RXFIFO read. Consequently, the initially received data in the RXFIFO is pushed out and lost, leading to a failure in data integrity. To address this issue, reverse the order of interleaved operations and conduct the RXFIFO read followed by the TXFIFO write. Fixes: 6afe2ae8dc48 ("spi: spi-cadence: Interleave write of TX and read of RX FIFO") Signed-off-by: Amit Kumar Mahapatra Link: https://msgid.link/r/20231218090652.18403-1-amit.kumar-mahapatra@amd.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index a50eb4db79de..e5140532071d 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -317,6 +317,15 @@ static void cdns_spi_process_fifo(struct cdns_spi *xspi, int ntx, int nrx) xspi->rx_bytes -= nrx; while (ntx || nrx) { + if (nrx) { + u8 data = cdns_spi_read(xspi, CDNS_SPI_RXD); + + if (xspi->rxbuf) + *xspi->rxbuf++ = data; + + nrx--; + } + if (ntx) { if (xspi->txbuf) cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++); @@ -326,14 +335,6 @@ static void cdns_spi_process_fifo(struct cdns_spi *xspi, int ntx, int nrx) ntx--; } - if (nrx) { - u8 data = cdns_spi_read(xspi, CDNS_SPI_RXD); - - if (xspi->rxbuf) - *xspi->rxbuf++ = data; - - nrx--; - } } } From a67e1f0bd4564b485e0f0c3ed7f6bf17688be268 Mon Sep 17 00:00:00 2001 From: Romain Naour Date: Tue, 23 Jan 2024 12:14:56 +0100 Subject: [PATCH 113/347] regulator: ti-abb: don't use devm_platform_ioremap_resource_byname for shared interrupt register We can't use devm_platform_ioremap_resource_byname() to remap the interrupt register that can be shared between regulator-abb-{ivahd,dspeve,gpu} drivers instances. The combined helper introduce a call to devm_request_mem_region() that creates a new busy resource region on PRM_IRQSTATUS_MPU register (0x4ae06010). The first devm_request_mem_region() call succeeds for regulator-abb-ivahd but fails for the two other regulator-abb-dspeve and regulator-abb-gpu. # cat /proc/iomem | grep -i 4ae06 4ae06010-4ae06013 : 4ae07e34.regulator-abb-ivahd int-address 4ae06014-4ae06017 : 4ae07ddc.regulator-abb-mpu int-address regulator-abb-dspeve and regulator-abb-gpu are missing due to devm_request_mem_region() failure (EBUSY): [ 1.326660] ti_abb 4ae07e30.regulator-abb-dspeve: can't request region for resource [mem 0x4ae06010-0x4ae06013] [ 1.326660] ti_abb: probe of 4ae07e30.regulator-abb-dspeve failed with error -16 [ 1.327239] ti_abb 4ae07de4.regulator-abb-gpu: can't request region for resource [mem 0x4ae06010-0x4ae06013] [ 1.327270] ti_abb: probe of 4ae07de4.regulator-abb-gpu failed with error -16 >From arm/boot/dts/dra7.dtsi: The abb_mpu is the only instance using its own interrupt register: (0x4ae06014) PRM_IRQSTATUS_MPU_2, ABB_MPU_DONE_ST (bit 7) The other tree instances (abb_ivahd, abb_dspeve, abb_gpu) share PRM_IRQSTATUS_MPU register (0x4ae06010) but use different bits ABB_IVA_DONE_ST (bit 30), ABB_DSPEVE_DONE_ST( bit 29) and ABB_GPU_DONE_ST (but 28). The commit b36c6b1887ff ("regulator: ti-abb: Make use of the helper function devm_ioremap related") overlooked the following comment implicitly explaining why devm_ioremap() is used in this case: /* * We may have shared interrupt register offsets which are * write-1-to-clear between domains ensuring exclusivity. */ Fixes and partially reverts commit b36c6b1887ff ("regulator: ti-abb: Make use of the helper function devm_ioremap related"). Improve the existing comment to avoid further conversion to devm_platform_ioremap_resource_byname(). Fixes: b36c6b1887ff ("regulator: ti-abb: Make use of the helper function devm_ioremap related") Signed-off-by: Romain Naour Reviewed-by: Yoann Congal Link: https://msgid.link/r/20240123111456.739381-1-romain.naour@smile.fr Signed-off-by: Mark Brown --- drivers/regulator/ti-abb-regulator.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c index f48214e2c3b4..04133510e5af 100644 --- a/drivers/regulator/ti-abb-regulator.c +++ b/drivers/regulator/ti-abb-regulator.c @@ -726,9 +726,25 @@ static int ti_abb_probe(struct platform_device *pdev) return PTR_ERR(abb->setup_reg); } - abb->int_base = devm_platform_ioremap_resource_byname(pdev, "int-address"); - if (IS_ERR(abb->int_base)) - return PTR_ERR(abb->int_base); + pname = "int-address"; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, pname); + if (!res) { + dev_err(dev, "Missing '%s' IO resource\n", pname); + return -ENODEV; + } + /* + * The MPU interrupt status register (PRM_IRQSTATUS_MPU) is + * shared between regulator-abb-{ivahd,dspeve,gpu} driver + * instances. Therefore use devm_ioremap() rather than + * devm_platform_ioremap_resource_byname() to avoid busy + * resource region conflicts. + */ + abb->int_base = devm_ioremap(dev, res->start, + resource_size(res)); + if (!abb->int_base) { + dev_err(dev, "Unable to map '%s'\n", pname); + return -ENOMEM; + } /* Map Optional resources */ pname = "efuse-address"; From de8b6e1c231a95abf95ad097b993d34b31458ec9 Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Tue, 23 Jan 2024 15:11:49 +0800 Subject: [PATCH 114/347] spi: hisi-sfc-v3xx: Return IRQ_NONE if no interrupts were detected Return IRQ_NONE from the interrupt handler when no interrupt was detected. Because an empty interrupt will cause a null pointer error: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Call trace: complete+0x54/0x100 hisi_sfc_v3xx_isr+0x2c/0x40 [spi_hisi_sfc_v3xx] __handle_irq_event_percpu+0x64/0x1e0 handle_irq_event+0x7c/0x1cc Signed-off-by: Devyn Liu Link: https://msgid.link/r/20240123071149.917678-1-liudingyuan@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-hisi-sfc-v3xx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/spi/spi-hisi-sfc-v3xx.c b/drivers/spi/spi-hisi-sfc-v3xx.c index 9d22018f7985..1301d14483d4 100644 --- a/drivers/spi/spi-hisi-sfc-v3xx.c +++ b/drivers/spi/spi-hisi-sfc-v3xx.c @@ -377,6 +377,11 @@ static const struct spi_controller_mem_ops hisi_sfc_v3xx_mem_ops = { static irqreturn_t hisi_sfc_v3xx_isr(int irq, void *data) { struct hisi_sfc_v3xx_host *host = data; + u32 reg; + + reg = readl(host->regbase + HISI_SFC_V3XX_INT_STAT); + if (!reg) + return IRQ_NONE; hisi_sfc_v3xx_disable_int(host); From 13f3956eb5681a4045a8dfdef48df5dc4d9f58a6 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:26:34 +0100 Subject: [PATCH 115/347] block: Fix WARNING in _copy_from_iter Syzkaller reports a warning in _copy_from_iter because an iov_iter is supposedly used in the wrong direction. The reason is that syzcaller managed to generate a request with a transfer direction of SG_DXFER_TO_FROM_DEV. This instructs the kernel to copy user buffers into the kernel, read into the copied buffers and then copy the data back to user space. Thus the iovec is used in both directions. Detect this situation in the block layer and construct a new iterator with the correct direction for the copy-in. Reported-by: syzbot+a532b03fdfee2c137666@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/0000000000009b92c10604d7a5e9@google.com/t/ Reported-by: syzbot+63dec323ac56c28e644f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/0000000000003faaa105f6e7c658@google.com/T/ Signed-off-by: Christian A. Ehrhardt Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240121202634.275068-1-lk@c--e.de Signed-off-by: Jens Axboe --- block/blk-map.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index 8584babf3ea0..71210cdb3442 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -205,12 +205,19 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, /* * success */ - if ((iov_iter_rw(iter) == WRITE && - (!map_data || !map_data->null_mapped)) || - (map_data && map_data->from_user)) { + if (iov_iter_rw(iter) == WRITE && + (!map_data || !map_data->null_mapped)) { ret = bio_copy_from_iter(bio, iter); if (ret) goto cleanup; + } else if (map_data && map_data->from_user) { + struct iov_iter iter2 = *iter; + + /* This is the copy-in part of SG_DXFER_TO_FROM_DEV. */ + iter2.data_source = ITER_SOURCE; + ret = bio_copy_from_iter(bio, &iter2); + if (ret) + goto cleanup; } else { if (bmd->is_our_pages) zero_fill_bio(bio); From 5d390df3bdd13d178eb2e02e60e9a480f7103f7b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 23 Jan 2024 13:40:00 +0300 Subject: [PATCH 116/347] smb: client: delete "true", "false" defines Kernel has its own official true/false definitions. The defines aren't even used in this file. Signed-off-by: Alexey Dobriyan Signed-off-by: Steve French --- fs/smb/client/smbencrypt.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/smb/client/smbencrypt.c b/fs/smb/client/smbencrypt.c index f0ce26414f17..1d1ee9f18f37 100644 --- a/fs/smb/client/smbencrypt.c +++ b/fs/smb/client/smbencrypt.c @@ -26,13 +26,6 @@ #include "cifsproto.h" #include "../common/md4.h" -#ifndef false -#define false 0 -#endif -#ifndef true -#define true 1 -#endif - /* following came from the other byteorder.h to avoid include conflicts */ #define CVAL(buf,pos) (((unsigned char *)(buf))[pos]) #define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8) From d2d0223441d3caad65f6978c07869321bce968e0 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Tue, 23 Jan 2024 13:21:58 -0300 Subject: [PATCH 117/347] docs/sphinx: Fix TOC scroll hack for the home page When on the documentation home page, there won't be any ".current" element since no entry from the TOC was selected yet. That results in a javascript error. Fix that by only trying to set the scrollTop if we have matches for current entries. Signed-off-by: Gustavo Sousa Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20240123162157.61819-2-gustavo.sousa@intel.com --- Documentation/sphinx/templates/kernel-toc.html | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/sphinx/templates/kernel-toc.html b/Documentation/sphinx/templates/kernel-toc.html index b58efa99df52..41f1efbe64bb 100644 --- a/Documentation/sphinx/templates/kernel-toc.html +++ b/Documentation/sphinx/templates/kernel-toc.html @@ -12,5 +12,7 @@ From 3f0e4df37a1b505307f61fbfa7b1f9a2fa2c40bc Mon Sep 17 00:00:00 2001 From: Hu Haowen <2023002089@link.tyut.edu.cn> Date: Thu, 18 Jan 2024 17:01:40 +0800 Subject: [PATCH 118/347] docs/accel: correct links to mailing list archives Since the mailing archive list lkml.org is obsolete, change the links into lore.kernel.org's ones. Signed-off-by: Hu Haowen <2023002089@link.tyut.edu.cn> Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20240118090140.4868-1-2023002089@link.tyut.edu.cn --- Documentation/accel/introduction.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/accel/introduction.rst b/Documentation/accel/introduction.rst index 89984dfececf..ae3030136637 100644 --- a/Documentation/accel/introduction.rst +++ b/Documentation/accel/introduction.rst @@ -101,8 +101,8 @@ External References email threads ------------- -* `Initial discussion on the New subsystem for acceleration devices `_ - Oded Gabbay (2022) -* `patch-set to add the new subsystem `_ - Oded Gabbay (2022) +* `Initial discussion on the New subsystem for acceleration devices `_ - Oded Gabbay (2022) +* `patch-set to add the new subsystem `_ - Oded Gabbay (2022) Conference talks ---------------- From ea7dcd8a48ea3b440a7070b1a0f70f757f8ed9a8 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 11 Jan 2024 09:52:20 +0100 Subject: [PATCH 119/347] doc: admin-guide/kernel-parameters: remove useless comment This comment about DRM drivers has been there since the first git commit. It simply doesn't belong in kernel-parameters; remove it. Signed-off-by: Vegard Nossum Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20240111085220.3693059-1-vegard.nossum@oracle.com --- Documentation/admin-guide/kernel-parameters.rst | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index 102937bc8443..4410384596a9 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -218,8 +218,3 @@ bytes respectively. Such letter suffixes can also be entirely omitted: .. include:: kernel-parameters.txt :literal: - -Todo ----- - - Add more DRM drivers. From d546978e0c07b6333fdbcbd81b2f2e058d4560b5 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 30 Nov 2023 10:55:15 +0100 Subject: [PATCH 120/347] docs: admin-guide: remove obsolete advice related to SLAB allocator Commit 1db9d06aaa55 ("mm/slab: remove CONFIG_SLAB from all Kconfig and Makefile") removes the config SLAB and makes the SLUB allocator the only default allocator in the kernel. Hence, the advice on reducing OS jitter due to kworker kernel threads to build with CONFIG_SLUB instead of CONFIG_SLAB is obsolete. Remove the obsolete advice to build with SLUB instead of SLAB. Signed-off-by: Lukas Bulwahn Acked-by: Vlastimil Babka Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20231130095515.21586-1-lukas.bulwahn@gmail.com --- .../admin-guide/kernel-per-CPU-kthreads.rst | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst index 993c2a05f5ee..b6aeae3327ce 100644 --- a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst +++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst @@ -243,13 +243,9 @@ To reduce its OS jitter, do any of the following: 3. Do any of the following needed to avoid jitter that your application cannot tolerate: - a. Build your kernel with CONFIG_SLUB=y rather than - CONFIG_SLAB=y, thus avoiding the slab allocator's periodic - use of each CPU's workqueues to run its cache_reap() - function. - b. Avoid using oprofile, thus avoiding OS jitter from + a. Avoid using oprofile, thus avoiding OS jitter from wq_sync_buffer(). - c. Limit your CPU frequency so that a CPU-frequency + b. Limit your CPU frequency so that a CPU-frequency governor is not required, possibly enlisting the aid of special heatsinks or other cooling technologies. If done correctly, and if you CPU architecture permits, you should @@ -259,7 +255,7 @@ To reduce its OS jitter, do any of the following: WARNING: Please check your CPU specifications to make sure that this is safe on your particular system. - d. As of v3.18, Christoph Lameter's on-demand vmstat workers + c. As of v3.18, Christoph Lameter's on-demand vmstat workers commit prevents OS jitter due to vmstat_update() on CONFIG_SMP=y systems. Before v3.18, is not possible to entirely get rid of the OS jitter, but you can @@ -274,7 +270,7 @@ To reduce its OS jitter, do any of the following: (based on an earlier one from Gilad Ben-Yossef) that reduces or even eliminates vmstat overhead for some workloads at https://lore.kernel.org/r/00000140e9dfd6bd-40db3d4f-c1be-434f-8132-7820f81bb586-000000@email.amazonses.com. - e. If running on high-end powerpc servers, build with + d. If running on high-end powerpc servers, build with CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS daemon from running on each CPU every second or so. (This will require editing Kconfig files and will defeat @@ -282,12 +278,12 @@ To reduce its OS jitter, do any of the following: due to the rtas_event_scan() function. WARNING: Please check your CPU specifications to make sure that this is safe on your particular system. - f. If running on Cell Processor, build your kernel with + e. If running on Cell Processor, build your kernel with CBE_CPUFREQ_SPU_GOVERNOR=n to avoid OS jitter from spu_gov_work(). WARNING: Please check your CPU specifications to make sure that this is safe on your particular system. - g. If running on PowerMAC, build your kernel with + f. If running on PowerMAC, build your kernel with CONFIG_PMAC_RACKMETER=n to disable the CPU-meter, avoiding OS jitter from rackmeter_do_timer(). From 16bae3e1377846734ec6b87eee459c0f3551692c Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 23 Jan 2024 16:55:02 -0500 Subject: [PATCH 121/347] io_uring: enable audit and restrict cred override for IORING_OP_FIXED_FD_INSTALL We need to correct some aspects of the IORING_OP_FIXED_FD_INSTALL command to take into account the security implications of making an io_uring-private file descriptor generally accessible to a userspace task. The first change in this patch is to enable auditing of the FD_INSTALL operation as installing a file descriptor into a task's file descriptor table is a security relevant operation and something that admins/users may want to audit. The second change is to disable the io_uring credential override functionality, also known as io_uring "personalities", in the FD_INSTALL command. The credential override in FD_INSTALL is particularly problematic as it affects the credentials used in the security_file_receive() LSM hook. If a task were to request a credential override via REQ_F_CREDS on a FD_INSTALL operation, the LSM would incorrectly check to see if the overridden credentials of the io_uring were able to "receive" the file as opposed to the task's credentials. After discussions upstream, it's difficult to imagine a use case where we would want to allow a credential override on a FD_INSTALL operation so we are simply going to block REQ_F_CREDS on IORING_OP_FIXED_FD_INSTALL operations. Fixes: dc18b89ab113 ("io_uring/openclose: add support for IORING_OP_FIXED_FD_INSTALL") Signed-off-by: Paul Moore Link: https://lore.kernel.org/r/20240123215501.289566-2-paul@paul-moore.com Signed-off-by: Jens Axboe --- io_uring/opdef.c | 1 - io_uring/openclose.c | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 6705634e5f52..b1ee3a9c3807 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -471,7 +471,6 @@ const struct io_issue_def io_issue_defs[] = { }, [IORING_OP_FIXED_FD_INSTALL] = { .needs_file = 1, - .audit_skip = 1, .prep = io_install_fixed_fd_prep, .issue = io_install_fixed_fd, }, diff --git a/io_uring/openclose.c b/io_uring/openclose.c index 0fe0dd305546..e3357dfa14ca 100644 --- a/io_uring/openclose.c +++ b/io_uring/openclose.c @@ -277,6 +277,10 @@ int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sq if (flags & ~IORING_FIXED_FD_NO_CLOEXEC) return -EINVAL; + /* ensure the task's creds are used when installing/receiving fds */ + if (req->flags & REQ_F_CREDS) + return -EPERM; + /* default to O_CLOEXEC, disable if IORING_FIXED_FD_NO_CLOEXEC is set */ ifi = io_kiocb_to_cmd(req, struct io_fixed_install); ifi->o_flags = O_CLOEXEC; From 8deb05c84b63b4fdb8549e08942867a68924a5b8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 23 Jan 2024 15:47:34 -0800 Subject: [PATCH 122/347] smb: Work around Clang __bdos() type confusion Recent versions of Clang gets confused about the possible size of the "user" allocation, and CONFIG_FORTIFY_SOURCE ends up emitting a warning[1]: repro.c:126:4: warning: call to '__write_overflow_field' declared with 'warning' attribute: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] 126 | __write_overflow_field(p_size_field, size); | ^ for this memset(): int len; __le16 *user; ... len = ses->user_name ? strlen(ses->user_name) : 0; user = kmalloc(2 + (len * 2), GFP_KERNEL); ... if (len) { ... } else { memset(user, '\0', 2); } While Clang works on this bug[2], switch to using a direct assignment, which avoids memset() entirely which both simplifies the code and silences the false positive warning. (Making "len" size_t also silences the warning, but the direct assignment seems better.) Reported-by: Nathan Chancellor Closes: https://github.com/ClangBuiltLinux/linux/issues/1966 [1] Link: https://github.com/llvm/llvm-project/issues/77813 [2] Cc: Steve French Cc: Paulo Alcantara Cc: Ronnie Sahlberg Cc: Shyam Prasad N Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: llvm@lists.linux.dev Signed-off-by: Kees Cook Signed-off-by: Steve French --- fs/smb/client/cifsencrypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index ef4c2e3c9fa6..6322f0f68a17 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -572,7 +572,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp); UniStrupr(user); } else { - memset(user, '\0', 2); + *(u16 *)user = 0; } rc = crypto_shash_update(ses->server->secmech.hmacmd5, From 966cc171c8be4fbeae1bf166d264e0bfb09e141c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 15 Feb 2022 19:22:18 +0000 Subject: [PATCH 123/347] cifs: Share server EOF pos with netfslib Use cifsi->netfs_ctx.remote_i_size instead of cifsi->server_eof so that netfslib can refer to it to. Signed-off-by: David Howells cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 17 ++++++++++++++--- fs/smb/client/cifsglob.h | 1 - fs/smb/client/file.c | 8 ++++---- fs/smb/client/inode.c | 8 +++++--- fs/smb/client/readdir.c | 2 +- fs/smb/client/smb2ops.c | 18 +++++++++++++----- 6 files changed, 37 insertions(+), 17 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index e902de4e475a..2a4a4e3a8751 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -396,7 +396,7 @@ cifs_alloc_inode(struct super_block *sb) spin_lock_init(&cifs_inode->writers_lock); cifs_inode->writers = 0; cifs_inode->netfs.inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ - cifs_inode->server_eof = 0; + cifs_inode->netfs.remote_i_size = 0; cifs_inode->uniqueid = 0; cifs_inode->createtime = 0; cifs_inode->epoch = 0; @@ -1380,6 +1380,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, struct inode *src_inode = file_inode(src_file); struct inode *target_inode = file_inode(dst_file); struct cifsInodeInfo *src_cifsi = CIFS_I(src_inode); + struct cifsInodeInfo *target_cifsi = CIFS_I(target_inode); struct cifsFileInfo *smb_file_src; struct cifsFileInfo *smb_file_target; struct cifs_tcon *src_tcon; @@ -1428,7 +1429,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, * Advance the EOF marker after the flush above to the end of the range * if it's short of that. */ - if (src_cifsi->server_eof < off + len) { + if (src_cifsi->netfs.remote_i_size < off + len) { rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len); if (rc < 0) goto unlock; @@ -1452,12 +1453,22 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, /* Discard all the folios that overlap the destination region. */ truncate_inode_pages_range(&target_inode->i_data, fstart, fend); + fscache_invalidate(cifs_inode_cookie(target_inode), NULL, + i_size_read(target_inode), 0); + rc = file_modified(dst_file); if (!rc) { rc = target_tcon->ses->server->ops->copychunk_range(xid, smb_file_src, smb_file_target, off, len, destoff); - if (rc > 0 && destoff + rc > i_size_read(target_inode)) + if (rc > 0 && destoff + rc > i_size_read(target_inode)) { truncate_setsize(target_inode, destoff + rc); + netfs_resize_file(&target_cifsi->netfs, + i_size_read(target_inode), true); + fscache_resize_cookie(cifs_inode_cookie(target_inode), + i_size_read(target_inode)); + } + if (rc > 0 && destoff + rc > target_cifsi->netfs.zero_point) + target_cifsi->netfs.zero_point = destoff + rc; } file_accessed(src_file); diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 20036fb16cec..dd12364ef372 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1561,7 +1561,6 @@ struct cifsInodeInfo { spinlock_t writers_lock; unsigned int writers; /* Number of writers on this inode */ unsigned long time; /* jiffies of last update of inode */ - u64 server_eof; /* current file size on server -- protected by i_lock */ u64 uniqueid; /* server inode number */ u64 createtime; /* creation time on server */ __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for this inode */ diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 3a213432775b..70f9e3f12a5c 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -2120,8 +2120,8 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, { loff_t end_of_write = offset + bytes_written; - if (end_of_write > cifsi->server_eof) - cifsi->server_eof = end_of_write; + if (end_of_write > cifsi->netfs.remote_i_size) + netfs_resize_file(&cifsi->netfs, end_of_write, true); } static ssize_t @@ -3247,8 +3247,8 @@ cifs_uncached_writev_complete(struct work_struct *work) spin_lock(&inode->i_lock); cifs_update_eof(cifsi, wdata->offset, wdata->bytes); - if (cifsi->server_eof > inode->i_size) - i_size_write(inode, cifsi->server_eof); + if (cifsi->netfs.remote_i_size > inode->i_size) + i_size_write(inode, cifsi->netfs.remote_i_size); spin_unlock(&inode->i_lock); complete(&wdata->done); diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index f0989484f2c6..d02f8ba29cb5 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -104,7 +104,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) fattr->cf_mtime = timestamp_truncate(fattr->cf_mtime, inode); mtime = inode_get_mtime(inode); if (timespec64_equal(&mtime, &fattr->cf_mtime) && - cifs_i->server_eof == fattr->cf_eof) { + cifs_i->netfs.remote_i_size == fattr->cf_eof) { cifs_dbg(FYI, "%s: inode %llu is unchanged\n", __func__, cifs_i->uniqueid); return; @@ -194,7 +194,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) else clear_bit(CIFS_INO_DELETE_PENDING, &cifs_i->flags); - cifs_i->server_eof = fattr->cf_eof; + cifs_i->netfs.remote_i_size = fattr->cf_eof; /* * Can't safely change the file size here if the client is writing to * it due to potential races. @@ -2858,7 +2858,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, set_size_out: if (rc == 0) { - cifsInode->server_eof = attrs->ia_size; + netfs_resize_file(&cifsInode->netfs, attrs->ia_size, true); cifs_setsize(inode, attrs->ia_size); /* * i_blocks is not related to (i_size / i_blksize), but instead @@ -3011,6 +3011,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) if ((attrs->ia_valid & ATTR_SIZE) && attrs->ia_size != i_size_read(inode)) { truncate_setsize(inode, attrs->ia_size); + netfs_resize_file(&cifsInode->netfs, attrs->ia_size, true); fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size); } @@ -3210,6 +3211,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) if ((attrs->ia_valid & ATTR_SIZE) && attrs->ia_size != i_size_read(inode)) { truncate_setsize(inode, attrs->ia_size); + netfs_resize_file(&cifsInode->netfs, attrs->ia_size, true); fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size); } diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 94255401b38d..3b1b01d10f7d 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -141,7 +141,7 @@ retry: if (likely(reparse_inode_match(inode, fattr))) { fattr->cf_mode = inode->i_mode; fattr->cf_rdev = inode->i_rdev; - fattr->cf_eof = CIFS_I(inode)->server_eof; + fattr->cf_eof = CIFS_I(inode)->netfs.remote_i_size; fattr->cf_symlink_target = NULL; } else { CIFS_I(inode)->time = 0; diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index d9553c2556a2..27f8caccff7f 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3213,6 +3213,9 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, cfile->fid.volatile_fid, cfile->pid, new_size); if (rc >= 0) { truncate_setsize(inode, new_size); + netfs_resize_file(&cifsi->netfs, new_size, true); + if (offset < cifsi->netfs.zero_point) + cifsi->netfs.zero_point = offset; fscache_resize_cookie(cifs_inode_cookie(inode), new_size); } } @@ -3436,7 +3439,7 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, new_eof); if (rc == 0) { - cifsi->server_eof = new_eof; + netfs_resize_file(&cifsi->netfs, new_eof, true); cifs_setsize(inode, new_eof); cifs_truncate_page(inode->i_mapping, inode->i_size); truncate_setsize(inode, new_eof); @@ -3528,8 +3531,9 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, int rc; unsigned int xid; struct inode *inode = file_inode(file); - struct cifsFileInfo *cfile = file->private_data; struct cifsInodeInfo *cifsi = CIFS_I(inode); + struct cifsFileInfo *cfile = file->private_data; + struct netfs_inode *ictx = &cifsi->netfs; loff_t old_eof, new_eof; xid = get_xid(); @@ -3549,6 +3553,7 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, goto out_2; truncate_pagecache_range(inode, off, old_eof); + ictx->zero_point = old_eof; rc = smb2_copychunk_range(xid, cfile, cfile, off + len, old_eof - off - len, off); @@ -3563,9 +3568,10 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, rc = 0; - cifsi->server_eof = i_size_read(inode) - len; - truncate_setsize(inode, cifsi->server_eof); - fscache_resize_cookie(cifs_inode_cookie(inode), cifsi->server_eof); + truncate_setsize(inode, new_eof); + netfs_resize_file(&cifsi->netfs, new_eof, true); + ictx->zero_point = new_eof; + fscache_resize_cookie(cifs_inode_cookie(inode), new_eof); out_2: filemap_invalidate_unlock(inode->i_mapping); out: @@ -3581,6 +3587,7 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, unsigned int xid; struct cifsFileInfo *cfile = file->private_data; struct inode *inode = file_inode(file); + struct cifsInodeInfo *cifsi = CIFS_I(inode); __u64 count, old_eof, new_eof; xid = get_xid(); @@ -3608,6 +3615,7 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, goto out_2; truncate_setsize(inode, new_eof); + netfs_resize_file(&cifsi->netfs, i_size_read(inode), true); fscache_resize_cookie(cifs_inode_cookie(inode), i_size_read(inode)); rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len); From fc43a8ac396d302ced1e991e4913827cf72c8eb9 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sun, 21 Jan 2024 03:32:43 +0000 Subject: [PATCH 124/347] cifs: cifs_pick_channel should try selecting active channels cifs_pick_channel today just selects a channel based on the policy of least loaded channel. However, it does not take into account if the channel needs reconnect. As a result, we can have failures in send that can be completely avoided. This change doesn't make a channel a candidate for this selection if it needs reconnect. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/transport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 4f717ad7c21b..8695c9961f5a 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1026,6 +1026,9 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) if (!server || server->terminate) continue; + if (CIFS_CHAN_NEEDS_RECONNECT(ses, i)) + continue; + /* * strictly speaking, we should pick up req_lock to read * server->in_flight. But it shouldn't matter much here if we From 4373534a9850627a2695317944898eb1283a2db0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 12 Jan 2024 15:00:00 +0800 Subject: [PATCH 125/347] scsi: core: Move scsi_host_busy() out of host lock for waking up EH handler Inside scsi_eh_wakeup(), scsi_host_busy() is called & checked with host lock every time for deciding if error handler kthread needs to be waken up. This can be too heavy in case of recovery, such as: - N hardware queues - queue depth is M for each hardware queue - each scsi_host_busy() iterates over (N * M) tag/requests If recovery is triggered in case that all requests are in-flight, each scsi_eh_wakeup() is strictly serialized, when scsi_eh_wakeup() is called for the last in-flight request, scsi_host_busy() has been run for (N * M - 1) times, and request has been iterated for (N*M - 1) * (N * M) times. If both N and M are big enough, hard lockup can be triggered on acquiring host lock, and it is observed on mpi3mr(128 hw queues, queue depth 8169). Fix the issue by calling scsi_host_busy() outside the host lock. We don't need the host lock for getting busy count because host the lock never covers that. [mkp: Drop unnecessary 'busy' variables pointed out by Bart] Cc: Ewan Milne Fixes: 6eb045e092ef ("scsi: core: avoid host-wide host_busy counter for scsi_mq") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20240112070000.4161982-1-ming.lei@redhat.com Reviewed-by: Ewan D. Milne Reviewed-by: Sathya Prakash Veerichetty Tested-by: Sathya Prakash Veerichetty Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 8 ++++---- drivers/scsi/scsi_lib.c | 2 +- drivers/scsi/scsi_priv.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 79da4b1c1df0..4f455884fdc4 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -61,11 +61,11 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd); static enum scsi_disposition scsi_try_to_abort_cmd(const struct scsi_host_template *, struct scsi_cmnd *); -void scsi_eh_wakeup(struct Scsi_Host *shost) +void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy) { lockdep_assert_held(shost->host_lock); - if (scsi_host_busy(shost) == shost->host_failed) { + if (busy == shost->host_failed) { trace_scsi_eh_wakeup(shost); wake_up_process(shost->ehandler); SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost, @@ -88,7 +88,7 @@ void scsi_schedule_eh(struct Scsi_Host *shost) if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 || scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) { shost->host_eh_scheduled++; - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); } spin_unlock_irqrestore(shost->host_lock, flags); @@ -286,7 +286,7 @@ static void scsi_eh_inc_host_failed(struct rcu_head *head) spin_lock_irqsave(shost->host_lock, flags); shost->host_failed++; - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); spin_unlock_irqrestore(shost->host_lock, flags); } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index cf3864f72093..1fb80eae9a63 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -280,7 +280,7 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd) if (unlikely(scsi_host_in_recovery(shost))) { spin_lock_irqsave(shost->host_lock, flags); if (shost->host_failed || shost->host_eh_scheduled) - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); spin_unlock_irqrestore(shost->host_lock, flags); } rcu_read_unlock(); diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 3f0dfb97db6b..1fbfe1b52c9f 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -92,7 +92,7 @@ extern void scmd_eh_abort_handler(struct work_struct *work); extern enum blk_eh_timer_return scsi_timeout(struct request *req); extern int scsi_error_handler(void *host); extern enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *cmd); -extern void scsi_eh_wakeup(struct Scsi_Host *shost); +extern void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy); extern void scsi_eh_scmd_add(struct scsi_cmnd *); void scsi_eh_ready_devs(struct Scsi_Host *shost, struct list_head *work_q, From a68106a6928e0a6680f12bcc7338c0dddcfe4d11 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sun, 21 Jan 2024 03:32:45 +0000 Subject: [PATCH 126/347] cifs: translate network errors on send to -ECONNABORTED When the network stack returns various errors, we today bubble up the error to the user (in case of soft mounts). This change translates all network errors except -EINTR and -EAGAIN to -ECONNABORTED. A similar approach is taken when we receive network errors when reading from the socket. The change also forces the cifsd thread to reconnect during it's next activity. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/transport.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 8695c9961f5a..e00278fcfa4f 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -400,10 +400,17 @@ unmask: server->conn_id, server->hostname); } smbd_done: - if (rc < 0 && rc != -EINTR) + /* + * there's hardly any use for the layers above to know the + * actual error code here. All they should do at this point is + * to retry the connection and hope it goes away. + */ + if (rc < 0 && rc != -EINTR && rc != -EAGAIN) { cifs_server_dbg(VFS, "Error %d sending data on socket to server\n", rc); - else if (rc > 0) + rc = -ECONNABORTED; + cifs_signal_cifsd_for_reconnect(server, false); + } else if (rc > 0) rc = 0; out: cifs_in_send_dec(server); From 64cc377b7628b81ffdbdb1c6bacfba895dcac3f8 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sun, 21 Jan 2024 03:32:46 +0000 Subject: [PATCH 127/347] cifs: helper function to check replayable error codes The code to check for replay is not just -EAGAIN. In some cases, the send request or receive response may result in network errors, which we're now mapping to -ECONNABORTED. This change introduces a helper function which checks if the error returned in one of the above two errors. And all checks for replays will now use this helper. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 1 + fs/smb/client/cifsglob.h | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 971892620504..5730c65ffb40 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -367,6 +367,7 @@ out: atomic_inc(&tcon->num_remote_opens); } kfree(utf16_path); + return rc; } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index dd12364ef372..4eb706e27c82 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1830,6 +1830,13 @@ static inline bool is_retryable_error(int error) return false; } +static inline bool is_replayable_error(int error) +{ + if (error == -EAGAIN || error == -ECONNABORTED) + return true; + return false; +} + /* cifs_get_writable_file() flags */ #define FIND_WR_ANY 0 From 4f1fffa2376922f3d1d506e49c0fd445b023a28e Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sun, 21 Jan 2024 03:32:47 +0000 Subject: [PATCH 128/347] cifs: commands that are retried should have replay flag set MS-SMB2 states that the header flag SMB2_FLAGS_REPLAY_OPERATION needs to be set when a command needs to be retried, so that the server is aware that this is a replay for an operation that appeared before. This can be very important, for example, for state changing operations and opens which get retried following a reconnect; since the client maybe unaware of the status of the previous open. This is particularly important for multichannel scenario, since disconnection of one connection does not mean that the session is lost. The requests can be replayed on another channel. This change also makes use of exponential back-off before replays and also limits the number of retries to "retrans" mount option value. Also, this change does not modify the read/write codepath. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 23 +++- fs/smb/client/cifsglob.h | 5 + fs/smb/client/smb2inode.c | 33 ++++- fs/smb/client/smb2ops.c | 123 +++++++++++++++-- fs/smb/client/smb2pdu.c | 264 +++++++++++++++++++++++++++++++++---- fs/smb/client/smb2proto.h | 5 + 6 files changed, 406 insertions(+), 47 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 5730c65ffb40..1daeb5714faa 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -145,21 +145,27 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, struct cached_fid *cfid; struct cached_fids *cfids; const char *npath; + int retries = 0, cur_sleep = 1; if (tcon == NULL || tcon->cfids == NULL || tcon->nohandlecache || is_smb1_server(tcon->ses->server) || (dir_cache_timeout == 0)) return -EOPNOTSUPP; ses = tcon->ses; - server = cifs_pick_channel(ses); cfids = tcon->cfids; - if (!server->ops->new_lease_key) - return -EIO; - if (cifs_sb->root == NULL) return -ENOENT; +replay_again: + /* reinitialize for possible replay */ + flags = 0; + oplock = SMB2_OPLOCK_LEVEL_II; + server = cifs_pick_channel(ses); + + if (!server->ops->new_lease_key) + return -EIO; + utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) return -ENOMEM; @@ -268,6 +274,11 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, */ cfid->has_lease = true; + if (retries) { + smb2_set_replay(server, &rqst[0]); + smb2_set_replay(server, &rqst[1]); + } + rc = compound_send_recv(xid, ses, server, flags, 2, rqst, resp_buftype, rsp_iov); @@ -368,6 +379,10 @@ out: } kfree(utf16_path); + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 4eb706e27c82..ceaf9857885d 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -49,6 +49,11 @@ */ #define CIFS_DEF_ACTIMEO (1 * HZ) +/* + * max sleep time before retry to server + */ +#define CIFS_MAX_SLEEP 2000 + /* * max attribute cache timeout (jiffies) - 2^30 */ diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index a652200540c8..05818cd6d932 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -120,6 +120,14 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, unsigned int size[2]; void *data[2]; int len; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + oplock = SMB2_OPLOCK_LEVEL_NONE; + num_rqst = 0; + server = cifs_pick_channel(ses); vars = kzalloc(sizeof(*vars), GFP_ATOMIC); if (vars == NULL) @@ -127,8 +135,6 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, rqst = &vars->rqst[0]; rsp_iov = &vars->rsp_iov[0]; - server = cifs_pick_channel(ses); - if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -463,15 +469,24 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, num_rqst++; if (cfile) { + if (retries) + for (i = 1; i < num_rqst - 2; i++) + smb2_set_replay(server, &rqst[i]); + rc = compound_send_recv(xid, ses, server, flags, num_rqst - 2, &rqst[1], &resp_buftype[1], &rsp_iov[1]); - } else + } else { + if (retries) + for (i = 0; i < num_rqst; i++) + smb2_set_replay(server, &rqst[i]); + rc = compound_send_recv(xid, ses, server, flags, num_rqst, rqst, resp_buftype, rsp_iov); + } finished: num_rqst = 0; @@ -620,9 +635,6 @@ finished: } SMB2_close_free(&rqst[num_rqst]); - if (cfile) - cifsFileInfo_put(cfile); - num_cmds += 2; if (out_iov && out_buftype) { memcpy(out_iov, rsp_iov, num_cmds * sizeof(*out_iov)); @@ -632,7 +644,16 @@ finished: for (i = 0; i < num_cmds; i++) free_rsp_buf(resp_buftype[i], rsp_iov[i].iov_base); } + num_cmds -= 2; /* correct num_cmds as there could be a retry */ kfree(vars); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + + if (cfile) + cifsFileInfo_put(cfile); + return rc; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 27f8caccff7f..83c898afc835 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1108,7 +1108,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, { struct smb2_compound_vars *vars; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; struct smb_rqst *rqst; struct kvec *rsp_iov; __le16 *utf16_path = NULL; @@ -1124,6 +1124,13 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, struct smb2_file_full_ea_info *ea = NULL; struct smb2_query_info_rsp *rsp; int rc, used_len = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = CIFS_CP_CREATE_CLOSE_OP; + oplock = SMB2_OPLOCK_LEVEL_NONE; + server = cifs_pick_channel(ses); if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -1244,6 +1251,12 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, goto sea_exit; smb2_set_related(&rqst[2]); + if (retries) { + smb2_set_replay(server, &rqst[0]); + smb2_set_replay(server, &rqst[1]); + smb2_set_replay(server, &rqst[2]); + } + rc = compound_send_recv(xid, ses, server, flags, 3, rqst, resp_buftype, rsp_iov); @@ -1260,6 +1273,11 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, kfree(vars); out_free_path: kfree(utf16_path); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } #endif @@ -1484,7 +1502,7 @@ smb2_ioctl_query_info(const unsigned int xid, struct smb_rqst *rqst; struct kvec *rsp_iov; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; char __user *arg = (char __user *)p; struct smb_query_info qi; struct smb_query_info __user *pqi; @@ -1501,6 +1519,13 @@ smb2_ioctl_query_info(const unsigned int xid, void *data[2]; int create_options = is_dir ? CREATE_NOT_FILE : CREATE_NOT_DIR; void (*free_req1_func)(struct smb_rqst *r); + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = CIFS_CP_CREATE_CLOSE_OP; + oplock = SMB2_OPLOCK_LEVEL_NONE; + server = cifs_pick_channel(ses); vars = kzalloc(sizeof(*vars), GFP_ATOMIC); if (vars == NULL) @@ -1641,6 +1666,12 @@ smb2_ioctl_query_info(const unsigned int xid, goto free_req_1; smb2_set_related(&rqst[2]); + if (retries) { + smb2_set_replay(server, &rqst[0]); + smb2_set_replay(server, &rqst[1]); + smb2_set_replay(server, &rqst[2]); + } + rc = compound_send_recv(xid, ses, server, flags, 3, rqst, resp_buftype, rsp_iov); @@ -1701,6 +1732,11 @@ free_output_buffer: kfree(buffer); free_vars: kfree(vars); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -2227,8 +2263,14 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_open_parms oparms; struct smb2_query_directory_rsp *qd_rsp = NULL; struct smb2_create_rsp *op_rsp = NULL; - struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses); - int retry_count = 0; + struct TCP_Server_Info *server; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + oplock = SMB2_OPLOCK_LEVEL_NONE; + server = cifs_pick_channel(tcon->ses); utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) @@ -2278,14 +2320,15 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, smb2_set_related(&rqst[1]); -again: + if (retries) { + smb2_set_replay(server, &rqst[0]); + smb2_set_replay(server, &rqst[1]); + } + rc = compound_send_recv(xid, tcon->ses, server, flags, 2, rqst, resp_buftype, rsp_iov); - if (rc == -EAGAIN && retry_count++ < 10) - goto again; - /* If the open failed there is nothing to do */ op_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base; if (op_rsp == NULL || op_rsp->hdr.Status != STATUS_SUCCESS) { @@ -2333,6 +2376,11 @@ again: SMB2_query_directory_free(&rqst[1]); free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base); free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -2457,6 +2505,22 @@ smb2_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid, CIFS_CACHE_READ(cinode) ? 1 : 0); } +void +smb2_set_replay(struct TCP_Server_Info *server, struct smb_rqst *rqst) +{ + struct smb2_hdr *shdr; + + if (server->dialect < SMB30_PROT_ID) + return; + + shdr = (struct smb2_hdr *)(rqst->rq_iov[0].iov_base); + if (shdr == NULL) { + cifs_dbg(FYI, "shdr NULL in smb2_set_related\n"); + return; + } + shdr->Flags |= SMB2_FLAGS_REPLAY_OPERATION; +} + void smb2_set_related(struct smb_rqst *rqst) { @@ -2529,6 +2593,27 @@ smb2_set_next_command(struct cifs_tcon *tcon, struct smb_rqst *rqst) shdr->NextCommand = cpu_to_le32(len); } +/* + * helper function for exponential backoff and check if replayable + */ +bool smb2_should_replay(struct cifs_tcon *tcon, + int *pretries, + int *pcur_sleep) +{ + if (!pretries || !pcur_sleep) + return false; + + if (tcon->retry || (*pretries)++ < tcon->ses->server->retrans) { + msleep(*pcur_sleep); + (*pcur_sleep) = ((*pcur_sleep) << 1); + if ((*pcur_sleep) > CIFS_MAX_SLEEP) + (*pcur_sleep) = CIFS_MAX_SLEEP; + return true; + } + + return false; +} + /* * Passes the query info response back to the caller on success. * Caller need to free this with free_rsp_buf(). @@ -2542,7 +2627,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, { struct smb2_compound_vars *vars; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; int flags = CIFS_CP_CREATE_CLOSE_OP; struct smb_rqst *rqst; int resp_buftype[3]; @@ -2553,6 +2638,13 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, int rc; __le16 *utf16_path; struct cached_fid *cfid = NULL; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = CIFS_CP_CREATE_CLOSE_OP; + oplock = SMB2_OPLOCK_LEVEL_NONE; + server = cifs_pick_channel(ses); if (!path) path = ""; @@ -2633,6 +2725,14 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, goto qic_exit; smb2_set_related(&rqst[2]); + if (retries) { + if (!cfid) { + smb2_set_replay(server, &rqst[0]); + smb2_set_replay(server, &rqst[2]); + } + smb2_set_replay(server, &rqst[1]); + } + if (cfid) { rc = compound_send_recv(xid, ses, server, flags, 1, &rqst[1], @@ -2665,6 +2765,11 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, kfree(vars); out_free_path: kfree(utf16_path); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 288199f0b987..f92d77099ab4 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2765,7 +2765,14 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, int flags = 0; unsigned int total_len; __le16 *utf16_path = NULL; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + n_iov = 2; + server = cifs_pick_channel(ses); cifs_dbg(FYI, "mkdir\n"); @@ -2869,6 +2876,10 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, /* no need to inc num_remote_opens because we close it just below */ trace_smb3_posix_mkdir_enter(xid, tcon->tid, ses->Suid, full_path, CREATE_NOT_FILE, FILE_WRITE_ATTRIBUTES); + + if (retries) + smb2_set_replay(server, &rqst); + /* resource #4: response buffer */ rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -2906,6 +2917,11 @@ err_free_req: cifs_small_buf_release(req); err_free_path: kfree(utf16_path); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -3101,12 +3117,18 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, struct smb2_create_rsp *rsp = NULL; struct cifs_tcon *tcon = oparms->tcon; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; struct kvec iov[SMB2_CREATE_IOV_SIZE]; struct kvec rsp_iov = {NULL, 0}; int resp_buftype = CIFS_NO_BUFFER; int rc = 0; int flags = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); cifs_dbg(FYI, "create/open\n"); if (!ses || !server) @@ -3128,6 +3150,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, trace_smb3_open_enter(xid, tcon->tid, tcon->ses->Suid, oparms->path, oparms->create_options, oparms->desired_access); + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -3181,6 +3206,11 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, creat_exit: SMB2_open_free(&rqst); free_rsp_buf(resp_buftype, rsp); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -3305,6 +3335,22 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, int resp_buftype = CIFS_NO_BUFFER; int rc = 0; int flags = 0; + int retries = 0, cur_sleep = 1; + + if (!tcon) + return -EIO; + + ses = tcon->ses; + if (!ses) + return -EIO; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); + + if (!server) + return -EIO; cifs_dbg(FYI, "SMB2 IOCTL\n"); @@ -3315,17 +3361,6 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, if (plen) *plen = 0; - if (!tcon) - return -EIO; - - ses = tcon->ses; - if (!ses) - return -EIO; - - server = cifs_pick_channel(ses); - if (!server) - return -EIO; - if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -3340,6 +3375,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, if (rc) goto ioctl_exit; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -3409,6 +3447,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, ioctl_exit: SMB2_ioctl_free(&rqst); free_rsp_buf(resp_buftype, rsp); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -3480,13 +3523,20 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, struct smb_rqst rqst; struct smb2_close_rsp *rsp = NULL; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; struct kvec iov[1]; struct kvec rsp_iov; int resp_buftype = CIFS_NO_BUFFER; int rc = 0; int flags = 0; bool query_attrs = false; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + query_attrs = false; + server = cifs_pick_channel(ses); cifs_dbg(FYI, "Close\n"); @@ -3512,6 +3562,9 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, if (rc) goto close_exit; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); rsp = (struct smb2_close_rsp *)rsp_iov.iov_base; @@ -3545,6 +3598,11 @@ close_exit: cifs_dbg(VFS, "handle cancelled close fid 0x%llx returned error %d\n", persistent_fid, tmp_rc); } + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -3675,12 +3733,19 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, struct TCP_Server_Info *server; int flags = 0; bool allocated = false; + int retries = 0, cur_sleep = 1; cifs_dbg(FYI, "Query Info\n"); if (!ses) return -EIO; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + allocated = false; server = cifs_pick_channel(ses); + if (!server) return -EIO; @@ -3702,6 +3767,9 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, trace_smb3_query_info_enter(xid, persistent_fid, tcon->tid, ses->Suid, info_class, (__u32)info_type); + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base; @@ -3744,6 +3812,11 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, qinf_exit: SMB2_query_info_free(&rqst); free_rsp_buf(resp_buftype, rsp); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -3844,7 +3917,7 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon, u32 *plen /* returned data len */) { struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; struct smb_rqst rqst; struct smb2_change_notify_rsp *smb_rsp; struct kvec iov[1]; @@ -3852,6 +3925,12 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon, int resp_buftype = CIFS_NO_BUFFER; int flags = 0; int rc = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); cifs_dbg(FYI, "change notify\n"); if (!ses || !server) @@ -3876,6 +3955,10 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon, trace_smb3_notify_enter(xid, persistent_fid, tcon->tid, ses->Suid, (u8)watch_tree, completion_filter); + + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -3910,6 +3993,11 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon, if (rqst.rq_iov) cifs_small_buf_release(rqst.rq_iov[0].iov_base); /* request */ free_rsp_buf(resp_buftype, rsp_iov.iov_base); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -4152,10 +4240,16 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, struct smb_rqst rqst; struct kvec iov[1]; struct kvec rsp_iov = {NULL, 0}; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; int resp_buftype = CIFS_NO_BUFFER; int flags = 0; int rc = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); cifs_dbg(FYI, "flush\n"); if (!ses || !(ses->server)) @@ -4175,6 +4269,10 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, goto flush_exit; trace_smb3_flush_enter(xid, persistent_fid, tcon->tid, ses->Suid); + + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -4189,6 +4287,11 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, flush_exit: SMB2_flush_free(&rqst); free_rsp_buf(resp_buftype, rsp_iov.iov_base); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -4826,18 +4929,21 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, int flags = 0; unsigned int total_len; struct TCP_Server_Info *server; + int retries = 0, cur_sleep = 1; +replay_again: + /* reinitialize for possible replay */ + flags = 0; *nbytes = 0; - - if (n_vec < 1) - return rc; - if (!io_parms->server) io_parms->server = cifs_pick_channel(io_parms->tcon->ses); server = io_parms->server; if (server == NULL) return -ECONNABORTED; + if (n_vec < 1) + return rc; + rc = smb2_plain_req_init(SMB2_WRITE, io_parms->tcon, server, (void **) &req, &total_len); if (rc) @@ -4871,6 +4977,9 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, rqst.rq_iov = iov; rqst.rq_nvec = n_vec + 1; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, io_parms->tcon->ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -4895,6 +5004,11 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, cifs_small_buf_release(req); free_rsp_buf(resp_buftype, rsp); + + if (is_replayable_error(rc) && + smb2_should_replay(io_parms->tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -5206,8 +5320,14 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, struct kvec rsp_iov; int rc = 0; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; int flags = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); if (!ses || !(ses->server)) return -EIO; @@ -5227,6 +5347,9 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, if (rc) goto qdir_exit; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); rsp = (struct smb2_query_directory_rsp *)rsp_iov.iov_base; @@ -5261,6 +5384,11 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, qdir_exit: SMB2_query_directory_free(&rqst); free_rsp_buf(resp_buftype, rsp); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -5327,8 +5455,14 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; int resp_buftype; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; int flags = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); if (!ses || !server) return -EIO; @@ -5356,6 +5490,8 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, return rc; } + if (retries) + smb2_set_replay(server, &rqst); rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, @@ -5371,6 +5507,11 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, free_rsp_buf(resp_buftype, rsp); kfree(iov); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -5423,12 +5564,18 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, int rc; struct smb2_oplock_break *req = NULL; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; int flags = CIFS_OBREAK_OP; unsigned int total_len; struct kvec iov[1]; struct kvec rsp_iov; int resp_buf_type; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = CIFS_OBREAK_OP; + server = cifs_pick_channel(ses); cifs_dbg(FYI, "SMB2_oplock_break\n"); rc = smb2_plain_req_init(SMB2_OPLOCK_BREAK, tcon, server, @@ -5453,15 +5600,21 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, rqst.rq_iov = iov; rqst.rq_nvec = 1; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buf_type, flags, &rsp_iov); cifs_small_buf_release(req); - if (rc) { cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); cifs_dbg(FYI, "Send error in Oplock Break = %d\n", rc); } + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -5547,9 +5700,15 @@ SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; int resp_buftype; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; FILE_SYSTEM_POSIX_INFO *info = NULL; int flags = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); rc = build_qfs_info_req(&iov, tcon, server, FS_POSIX_INFORMATION, @@ -5565,6 +5724,9 @@ SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon, rqst.rq_iov = &iov; rqst.rq_nvec = 1; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); free_qfs_info_req(&iov); @@ -5584,6 +5746,11 @@ SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon, posix_qfsinf_exit: free_rsp_buf(resp_buftype, rsp_iov.iov_base); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -5598,9 +5765,15 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; int resp_buftype; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; struct smb2_fs_full_size_info *info = NULL; int flags = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); rc = build_qfs_info_req(&iov, tcon, server, FS_FULL_SIZE_INFORMATION, @@ -5616,6 +5789,9 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, rqst.rq_iov = &iov; rqst.rq_nvec = 1; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); free_qfs_info_req(&iov); @@ -5635,6 +5811,11 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, qfsinf_exit: free_rsp_buf(resp_buftype, rsp_iov.iov_base); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -5649,9 +5830,15 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; int resp_buftype, max_len, min_len; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; unsigned int rsp_len, offset; int flags = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); if (level == FS_DEVICE_INFORMATION) { max_len = sizeof(FILE_SYSTEM_DEVICE_INFO); @@ -5683,6 +5870,9 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, rqst.rq_iov = &iov; rqst.rq_nvec = 1; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); free_qfs_info_req(&iov); @@ -5720,6 +5910,11 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, qfsattr_exit: free_rsp_buf(resp_buftype, rsp_iov.iov_base); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -5737,7 +5932,13 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, unsigned int count; int flags = CIFS_NO_RSP_BUF; unsigned int total_len; - struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses); + struct TCP_Server_Info *server; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = CIFS_NO_RSP_BUF; + server = cifs_pick_channel(tcon->ses); cifs_dbg(FYI, "smb2_lockv num lock %d\n", num_lock); @@ -5768,6 +5969,9 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, rqst.rq_iov = iov; rqst.rq_nvec = 2; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, tcon->ses, server, &rqst, &resp_buf_type, flags, &rsp_iov); @@ -5779,6 +5983,10 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, tcon->ses->Suid, rc); } + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 0034b537b0b3..b3069911e9dd 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -122,6 +122,11 @@ extern unsigned long smb_rqst_len(struct TCP_Server_Info *server, extern void smb2_set_next_command(struct cifs_tcon *tcon, struct smb_rqst *rqst); extern void smb2_set_related(struct smb_rqst *rqst); +extern void smb2_set_replay(struct TCP_Server_Info *server, + struct smb_rqst *rqst); +extern bool smb2_should_replay(struct cifs_tcon *tcon, + int *pretries, + int *pcur_sleep); /* * SMB2 Worker functions - most of protocol specific implementation details From 4cdad80261862c8cdcbb5fd232aa713d0bdefe24 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 18 Jan 2024 09:14:10 +0000 Subject: [PATCH 129/347] cifs: set replay flag for retries of write command Similar to the rest of the commands, this is a change to add replay flags on retry. This one does not add a back-off, considering that we may want to flush a write ASAP to the server. Considering that this will be a flush of cached pages, the retrans value is also not honoured. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 1 + fs/smb/client/file.c | 1 + fs/smb/client/smb2pdu.c | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index ceaf9857885d..16befff4cbb4 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1506,6 +1506,7 @@ struct cifs_writedata { struct smbd_mr *mr; #endif struct cifs_credits credits; + bool replay; }; /* diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 70f9e3f12a5c..6f079976f9e7 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -3300,6 +3300,7 @@ cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, if (wdata->cfile->invalidHandle) rc = -EAGAIN; else { + wdata->replay = true; #ifdef CONFIG_CIFS_SMB_DIRECT if (wdata->mr) { wdata->mr->need_invalidate = true; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index f92d77099ab4..4f2cc8373b67 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4771,7 +4771,7 @@ smb2_async_writev(struct cifs_writedata *wdata, struct cifs_io_parms *io_parms = NULL; int credit_request; - if (!wdata->server) + if (!wdata->server || wdata->replay) server = wdata->server = cifs_pick_channel(tcon->ses); /* @@ -4856,6 +4856,8 @@ smb2_async_writev(struct cifs_writedata *wdata, rqst.rq_nvec = 1; rqst.rq_iter = wdata->iter; rqst.rq_iter_size = iov_iter_count(&rqst.rq_iter); + if (wdata->replay) + smb2_set_replay(server, &rqst); #ifdef CONFIG_CIFS_SMB_DIRECT if (wdata->mr) iov[0].iov_len += sizeof(struct smbd_buffer_descriptor_v1); From 993d1c346b1a51ac41b2193609a0d4e51e9748f4 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 23 Jan 2024 05:07:57 +0000 Subject: [PATCH 130/347] cifs: fix stray unlock in cifs_chan_skip_or_disable A recent change moved the code that decides to skip a channel or disable multichannel entirely, into a helper function. During this, a mutex_unlock of the session_mutex should have been removed. Doing that here. Fixes: f591062bdbf4 ("cifs: handle servers that still advertise multichannel after disabling") Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 4f2cc8373b67..86f6f35b7f32 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -195,7 +195,6 @@ cifs_chan_skip_or_disable(struct cifs_ses *ses, pserver = server->primary_server; cifs_signal_cifsd_for_reconnect(pserver, false); skip_terminate: - mutex_unlock(&ses->session_mutex); return -EHOSTDOWN; } From f4469f3858352ad1197434557150b1f7086762a0 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 22 Jan 2024 09:09:56 -0800 Subject: [PATCH 131/347] scsi: storvsc: Fix ring buffer size calculation Current code uses the specified ring buffer size (either the default of 128 Kbytes or a module parameter specified value) to encompass the one page ring buffer header plus the actual ring itself. When the page size is 4K, carving off one page for the header isn't significant. But when the page size is 64K on ARM64, only half of the default 128 Kbytes is left for the actual ring. While this doesn't break anything, the smaller ring size could be a performance bottleneck. Fix this by applying the VMBUS_RING_SIZE macro to the specified ring buffer size. This macro adds a page for the header, and rounds up the size to a page boundary, using the page size for which the kernel is built. Use this new size for subsequent ring buffer calculations. For example, on ARM64 with 64K page size and the default ring size, this results in the actual ring being 128 Kbytes, which is intended. Cc: stable@vger.kernel.org # 5.15.x Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20240122170956.496436-1-mhklinux@outlook.com Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index a95936b18f69..7ceb982040a5 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -330,6 +330,7 @@ enum storvsc_request_type { */ static int storvsc_ringbuffer_size = (128 * 1024); +static int aligned_ringbuffer_size; static u32 max_outstanding_req_per_channel; static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth); @@ -687,8 +688,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc) new_sc->next_request_id_callback = storvsc_next_request_id; ret = vmbus_open(new_sc, - storvsc_ringbuffer_size, - storvsc_ringbuffer_size, + aligned_ringbuffer_size, + aligned_ringbuffer_size, (void *)&props, sizeof(struct vmstorage_channel_properties), storvsc_on_channel_callback, new_sc); @@ -1973,7 +1974,7 @@ static int storvsc_probe(struct hv_device *device, dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1); stor_device->port_number = host->host_no; - ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc); + ret = storvsc_connect_to_vsp(device, aligned_ringbuffer_size, is_fc); if (ret) goto err_out1; @@ -2164,7 +2165,7 @@ static int storvsc_resume(struct hv_device *hv_dev) { int ret; - ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size, + ret = storvsc_connect_to_vsp(hv_dev, aligned_ringbuffer_size, hv_dev_is_fc(hv_dev)); return ret; } @@ -2198,8 +2199,9 @@ static int __init storvsc_drv_init(void) * the ring buffer indices) by the max request size (which is * vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64) */ + aligned_ringbuffer_size = VMBUS_RING_SIZE(storvsc_ringbuffer_size); max_outstanding_req_per_channel = - ((storvsc_ringbuffer_size - PAGE_SIZE) / + ((aligned_ringbuffer_size - PAGE_SIZE) / ALIGN(MAX_MULTIPAGE_BUFFER_PACKET + sizeof(struct vstor_packet) + sizeof(u64), sizeof(u64))); From 0077a504e1a4468669fd2e011108db49133db56e Mon Sep 17 00:00:00 2001 From: Conrad Kostecki Date: Tue, 23 Jan 2024 19:30:02 +0100 Subject: [PATCH 132/347] ahci: asm1166: correct count of reported ports The ASM1166 SATA host controller always reports wrongly, that it has 32 ports. But in reality, it only has six ports. This seems to be a hardware issue, as all tested ASM1166 SATA host controllers reports such high count of ports. Example output: ahci 0000:09:00.0: AHCI 0001.0301 32 slots 32 ports 6 Gbps 0xffffff3f impl SATA mode. By adjusting the port_map, the count is limited to six ports. New output: ahci 0000:09:00.0: AHCI 0001.0301 32 slots 32 ports 6 Gbps 0x3f impl SATA mode. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=211873 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218346 Signed-off-by: Conrad Kostecki Reviewed-by: Hans de Goede Signed-off-by: Niklas Cassel --- drivers/ata/ahci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 3a5f3255f51b..762c5d8b7c1a 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -663,6 +663,11 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets"); static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == 0x1166) { + dev_info(&pdev->dev, "ASM1166 has only six ports\n"); + hpriv->saved_port_map = 0x3f; + } + if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) { dev_info(&pdev->dev, "JMB361 has only one port\n"); hpriv->saved_port_map = 1; From 3213b8070ac69b32f05fa2328cbebe0eca75c1bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 17 Jan 2024 14:40:44 +0100 Subject: [PATCH 133/347] drm/xe/dmabuf: Make xe_dmabuf_ops static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is not referenced outside of the xe_dma_buf.c source file. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Rodrigo Vivi Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20240117134048.165425-2-thomas.hellstrom@linux.intel.com (cherry picked from commit e2dc52f849f8694bdabb75127164c9df622af459) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_dma_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 64ed303728fd..da2627ed6ae7 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -175,7 +175,7 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, return 0; } -const struct dma_buf_ops xe_dmabuf_ops = { +static const struct dma_buf_ops xe_dmabuf_ops = { .attach = xe_dma_buf_attach, .detach = xe_dma_buf_detach, .pin = xe_dma_buf_pin, From 03b72dbbd4e96d0197aa8cf894a24a4db8623031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 17 Jan 2024 14:40:48 +0100 Subject: [PATCH 134/347] drm/xe: Use a NULL pointer instead of 0. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last argument of xe_pcode_read() is a pointer. Use NULL instead of 0. Fixes: 92d44a422d0d ("drm/xe/hwmon: Expose card reactive critical power") Cc: Rodrigo Vivi Signed-off-by: Thomas Hellström Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20240117134048.165425-6-thomas.hellstrom@linux.intel.com (cherry picked from commit 79f8eacbdf9dad7ead39b3319e31e12d4dc6529e) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_hwmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 6ef2aa1eae8b..174ed2185481 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -419,7 +419,7 @@ static int xe_hwmon_pcode_read_i1(struct xe_gt *gt, u32 *uval) return xe_pcode_read(gt, PCODE_MBOX(PCODE_POWER_SETUP, POWER_SETUP_SUBCOMMAND_READ_I1, 0), - uval, 0); + uval, NULL); } static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval) From 32f6c3325703c98edee8f1005ad47b4d8431b758 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 18 Jan 2024 16:16:08 -0800 Subject: [PATCH 135/347] drm/xe: Use _ULL for u64 division MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use DIV_ROUND_UP_ULL() so it also works on 32bit build. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Reviewed-by: Matt Roper Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240119001612.2991381-2-lucas.demarchi@intel.com (cherry picked from commit 7b5bdb447b14930b9ef3e39bd301937889c60c96) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index b8d8da546670..1f0b4b9ce84f 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -613,7 +613,7 @@ void xe_device_wmb(struct xe_device *xe) u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? - DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0; + DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0; } bool xe_device_mem_access_ongoing(struct xe_device *xe) From 52e8948c6b6a41603371996b9bc0e43e17d690b4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 18 Jan 2024 16:16:09 -0800 Subject: [PATCH 136/347] drm/xe/mmio: Cast to u64 when printing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resource_size_t uses %pa format in printk since the size varies depending on build options. However to keep the io_size/physical_size addition in the same call we can't pass the address without adding yet another variable in these function. Simply cast it to u64 and keep using %llx. Fixes: 286089ce6929 ("drm/xe: Improve vram info debug printing") Cc: Oak Zeng Cc: Michael J. Ruhl Cc: Matthew Brost Cc: Rodrigo Vivi Reviewed-by: Matt Roper Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240119001612.2991381-3-lucas.demarchi@intel.com (cherry picked from commit 6d8d038364d8ec573e9dc0872e17bee1e5f12490) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_mmio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index c8c5d74b6e90..5f6b53ea5528 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -272,8 +272,8 @@ int xe_mmio_probe_vram(struct xe_device *xe) drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id, tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size); drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id, - &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + tile->mem.vram.actual_physical_size, - &tile->mem.vram.io_start, tile->mem.vram.io_start + tile->mem.vram.io_size); + &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size, + &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size); /* calculate total size using tile size to get the correct HW sizing */ total_size += tile_size; From 981460d8ee6042b14149fd8931ae27b91f2146b1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 18 Jan 2024 16:16:10 -0800 Subject: [PATCH 137/347] drm/xe/display: Avoid calling readq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit readq() is not available in 32bits and i915_gem_object_read_from_page() is supposed to allow reading arbitrary sizes determined by the `size` argument. Currently the only caller only passes a size == 8 so the second problem is not that big. Migrate to calling memcpy()/memcpy_fromio() to allow possible changes in the display side and to fix the build on 32b architectures. v2: Use memcpy/memcpy_fromio directly rather than using iosys-map with the same size == 8 bytes restriction (Matt Roper) Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240119001612.2991381-4-lucas.demarchi@intel.com (cherry picked from commit 406663f777bee53e9ad93dc080c333d4655ab7de) Signed-off-by: Thomas Hellström --- .../drm/xe/compat-i915-headers/gem/i915_gem_object.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h index 5f19550cc845..68d9f6116bdf 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h @@ -35,12 +35,10 @@ static inline int i915_gem_object_read_from_page(struct xe_bo *bo, u32 ofs, u64 *ptr, u32 size) { struct ttm_bo_kmap_obj map; - void *virtual; + void *src; bool is_iomem; int ret; - XE_WARN_ON(size != 8); - ret = xe_bo_lock(bo, true); if (ret) return ret; @@ -50,11 +48,12 @@ static inline int i915_gem_object_read_from_page(struct xe_bo *bo, goto out_unlock; ofs &= ~PAGE_MASK; - virtual = ttm_kmap_obj_virtual(&map, &is_iomem); + src = ttm_kmap_obj_virtual(&map, &is_iomem); + src += ofs; if (is_iomem) - *ptr = readq((void __iomem *)(virtual + ofs)); + memcpy_fromio(ptr, (void __iomem *)src, size); else - *ptr = *(u64 *)(virtual + ofs); + memcpy(ptr, src, size); ttm_bo_kunmap(&map); out_unlock: From c0e2508cb1004fdb153fbbcf0101404abfefdddd Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 19 Jan 2024 09:48:26 +0530 Subject: [PATCH 138/347] drm/xe/xe2: Use XE_CACHE_WB pat index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pat table entry associated with XE_CACHE_WB is coherent whereas XE_CACHE_NONE is non coherent. Migration expects the coherency with cpu therefore use the coherent entry XE_CACHE_WB for buffers not supporting compression. For read/write to flat ccs region the issue is not related to coherency with cpu. The hardware expects the pat index associated with GPUVA for indirect access to be compression enabled hence use XE_CACHE_NONE_COMPRESSION. v2 - Fix the argument to emit_pte, pass the bool directly. (Thomas) v3 - Rebase - Update commit message (Matt) v4 - Add a Fixes: tag. (Thomas) Cc: Matt Roper Cc: Thomas Hellström Fixes: 65ef8dbad1db ("drm/xe/xe2: Update emit_pte to use compression enabled PAT index") Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Thomas Hellström Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240119041826.1670496-1-himal.prasad.ghimiray@intel.com (cherry picked from commit 6a02867560f77328ae5637b70b06704b140aafa6) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_migrate.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index e05e9e7282b6..5c6c54624252 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -472,7 +472,7 @@ static void emit_pte(struct xe_migrate *m, /* Indirect access needs compression enabled uncached PAT index */ if (GRAPHICS_VERx100(xe) >= 2000) pat_index = is_comp_pte ? xe->pat.idx[XE_CACHE_NONE_COMPRESSION] : - xe->pat.idx[XE_CACHE_NONE]; + xe->pat.idx[XE_CACHE_WB]; else pat_index = xe->pat.idx[XE_CACHE_WB]; @@ -760,14 +760,14 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); else - emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0, - src); + emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, + &src_it, src_L0, src); if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) xe_res_next(&dst_it, src_L0); else - emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0, - dst); + emit_pte(m, bb, dst_L0_pt, dst_is_vram, copy_system_ccs, + &dst_it, src_L0, dst); if (copy_system_ccs) emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); @@ -1009,8 +1009,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) xe_res_next(&src_it, clear_L0); else - emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0, - dst); + emit_pte(m, bb, clear_L0_pt, clear_vram, clear_system_ccs, + &src_it, clear_L0, dst); bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; From d186e51b0ed05a0cd94c7c9756740a855325c557 Mon Sep 17 00:00:00 2001 From: Moti Haimovski Date: Mon, 22 Jan 2024 12:24:24 +0200 Subject: [PATCH 139/347] drm/xe/vm: bugfix in xe_vm_create_ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix xe_vm_create_ioctl routine not freeing the vm-id allocated to it when the function fails. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Moti Haimovski Reviewed-by: Matthew Brost Reviewed-by: Tomer Tayar Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240122102424.4008095-1-mhaimovski@habana.ai (cherry picked from commit f6bf0424cadc27d7cf6a049d2db960e4b52fa513) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 10b6995fbf29..53833ab81424 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1855,10 +1855,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, mutex_lock(&xef->vm.lock); err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); mutex_unlock(&xef->vm.lock); - if (err) { - xe_vm_close_and_put(vm); - return err; - } + if (err) + goto err_close_and_put; if (xe->info.has_asid) { mutex_lock(&xe->usm.lock); @@ -1866,11 +1864,9 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, XA_LIMIT(1, XE_MAX_ASID - 1), &xe->usm.next_asid, GFP_KERNEL); mutex_unlock(&xe->usm.lock); - if (err < 0) { - xe_vm_close_and_put(vm); - return err; - } - err = 0; + if (err < 0) + goto err_free_id; + vm->usm.asid = asid; } @@ -1888,6 +1884,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, #endif return 0; + +err_free_id: + mutex_lock(&xef->vm.lock); + xa_erase(&xef->vm.xa, id); + mutex_unlock(&xef->vm.lock); +err_close_and_put: + xe_vm_close_and_put(vm); + + return err; } int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, From 9e3a13f3eef6b14a26cc2660ca2f43f0e46b4318 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 22 Jan 2024 19:12:42 -0800 Subject: [PATCH 140/347] drm/xe: Remove PVC from xe_wa kunit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the PCI IDs for PVC weren't added to the xe driver, the xe_wa tests should not try to create a fake PVC device since they can't find the right PCI ID. Fix bugs when running kunit: # xe_wa_gt: ASSERTION FAILED at drivers/gpu/drm/xe/tests/xe_wa_test.c:111 Expected ret == 0, but ret == -19 (0xffffffffffffffed) [FAILED] PVC (B0) # xe_wa_gt: ASSERTION FAILED at drivers/gpu/drm/xe/tests/xe_wa_test.c:111 Expected ret == 0, but ret == -19 (0xffffffffffffffed) [FAILED] PVC (B1) # xe_wa_gt: ASSERTION FAILED at drivers/gpu/drm/xe/tests/xe_wa_test.c:111 Expected ret == 0, but ret == -19 (0xffffffffffffffed) [FAILED] PVC (C0) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240123031242.3548724-1-lucas.demarchi@intel.com (cherry picked from commit ab5ae65fb25d06c38a6617a628b964828adb4786) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/tests/xe_wa_test.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index a53c22a19582..b4715b78ef3b 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -74,9 +74,6 @@ static const struct platform_test_case cases[] = { SUBPLATFORM_CASE(DG2, G11, B1), SUBPLATFORM_CASE(DG2, G12, A0), SUBPLATFORM_CASE(DG2, G12, A1), - PLATFORM_CASE(PVC, B0), - PLATFORM_CASE(PVC, B1), - PLATFORM_CASE(PVC, C0), GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), From 56062d60f117dccfb5281869e0ab61e090baf864 Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Wed, 10 Jan 2024 15:01:22 +0200 Subject: [PATCH 141/347] x86/entry/ia32: Ensure s32 is sign extended to s64 Presently ia32 registers stored in ptregs are unconditionally cast to unsigned int by the ia32 stub. They are then cast to long when passed to __se_sys*, but will not be sign extended. This takes the sign of the syscall argument into account in the ia32 stub. It still casts to unsigned int to avoid implementation specific behavior. However then casts to int or unsigned int as necessary. So that the following cast to long sign extends the value. This fixes the io_pgetevents02 LTP test when compiled with -m32. Presently the systemcall io_pgetevents_time64() unexpectedly accepts -1 for the maximum number of events. It doesn't appear other systemcalls with signed arguments are effected because they all have compat variants defined and wired up. Fixes: ebeb8c82ffaf ("syscalls/x86: Use 'struct pt_regs' based syscall calling for IA32_EMULATION and x32") Suggested-by: Arnd Bergmann Signed-off-by: Richard Palethorpe Signed-off-by: Nikolay Borisov Signed-off-by: Thomas Gleixner Reviewed-by: Arnd Bergmann Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240110130122.3836513-1-nik.borisov@suse.com Link: https://lore.kernel.org/ltp/20210921130127.24131-1-rpalethorpe@suse.com/ --- arch/x86/include/asm/syscall_wrapper.h | 25 +++++++++++++++++++++---- include/linux/syscalls.h | 1 + 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 21f9407be5d3..7e88705e907f 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -58,12 +58,29 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); ,,regs->di,,regs->si,,regs->dx \ ,,regs->r10,,regs->r8,,regs->r9) \ + +/* SYSCALL_PT_ARGS is Adapted from s390x */ +#define SYSCALL_PT_ARG6(m, t1, t2, t3, t4, t5, t6) \ + SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5), m(t6, (regs->bp)) +#define SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5) \ + SYSCALL_PT_ARG4(m, t1, t2, t3, t4), m(t5, (regs->di)) +#define SYSCALL_PT_ARG4(m, t1, t2, t3, t4) \ + SYSCALL_PT_ARG3(m, t1, t2, t3), m(t4, (regs->si)) +#define SYSCALL_PT_ARG3(m, t1, t2, t3) \ + SYSCALL_PT_ARG2(m, t1, t2), m(t3, (regs->dx)) +#define SYSCALL_PT_ARG2(m, t1, t2) \ + SYSCALL_PT_ARG1(m, t1), m(t2, (regs->cx)) +#define SYSCALL_PT_ARG1(m, t1) m(t1, (regs->bx)) +#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__) + +#define __SC_COMPAT_CAST(t, a) \ + (__typeof(__builtin_choose_expr(__TYPE_IS_L(t), 0, 0U))) \ + (unsigned int)a + /* Mapping of registers to parameters for syscalls on i386 */ #define SC_IA32_REGS_TO_ARGS(x, ...) \ - __MAP(x,__SC_ARGS \ - ,,(unsigned int)regs->bx,,(unsigned int)regs->cx \ - ,,(unsigned int)regs->dx,,(unsigned int)regs->si \ - ,,(unsigned int)regs->di,,(unsigned int)regs->bp) + SYSCALL_PT_ARGS(x, __SC_COMPAT_CAST, \ + __MAP(x, __SC_TYPE, __VA_ARGS__)) \ #define __SYS_STUB0(abi, name) \ long __##abi##_##name(const struct pt_regs *regs); \ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index cdba4d0c6d4a..77eb9b0e7685 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -128,6 +128,7 @@ struct mnt_id_req; #define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL)) #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a #define __SC_CAST(t, a) (__force t) a +#define __SC_TYPE(t, a) t #define __SC_ARGS(t, a) a #define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long)) From b184c8c2889ceef0a137c7d0567ef9fe3d92276e Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Mon, 22 Jan 2024 16:57:15 +0800 Subject: [PATCH 142/347] genirq: Initialize resend_node hlist for all interrupt descriptors For a CONFIG_SPARSE_IRQ=n kernel, early_irq_init() is supposed to initialize all interrupt descriptors. It does except for irq_desc::resend_node, which ia only initialized for the first descriptor. Use the indexed decriptor and not the base pointer to address that. Fixes: bc06a9e08742 ("genirq: Use hlist for managing resend handlers") Signed-off-by: Dawei Li Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240122085716.2999875-5-dawei.li@shingroup.cn --- kernel/irq/irqdesc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 27ca1c866f29..371eb1711d34 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -600,7 +600,7 @@ int __init early_irq_init(void) mutex_init(&desc[i].request_mutex); init_waitqueue_head(&desc[i].wait_for_threads); desc_set_defaults(i, &desc[i], node, NULL, NULL); - irq_resend_init(desc); + irq_resend_init(&desc[i]); } return arch_early_irq_init(); } From 2f8c7c3715f2c6fb51a4ecc0905c04dd78a3da29 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 24 Jan 2024 13:24:24 +0000 Subject: [PATCH 143/347] spi: Raise limit on number of chip selects As reported by Guenter the limit we've got on the number of chip selects is set too low for some systems, raise the limit. We should really remove the hard coded limit but this is needed as a fix so let's do the simple thing and raise the limit for now. Fixes: 4d8ff6b0991d ("spi: Add multi-cs memories support in SPI core") Reported-by: Guenter Roeck Suggested-by: Guenter Roeck Signed-off-by: Mark Brown Link: https://msgid.link/r/20240124-spi-multi-cs-max-v2-1-df6fc5ab1abc@kernel.org Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 471fe2ff9066..600fbd5daf68 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -21,7 +21,7 @@ #include /* Max no. of CS supported per spi device */ -#define SPI_CS_CNT_MAX 4 +#define SPI_CS_CNT_MAX 16 struct dma_chan; struct software_node; From e169bd4fb2b36c4b2bee63c35c740c85daeb2e86 Mon Sep 17 00:00:00 2001 From: Maksim Kiselev Date: Wed, 24 Jan 2024 10:24:36 +0300 Subject: [PATCH 144/347] aoe: avoid potential deadlock at set_capacity Move set_capacity() outside of the section procected by (&d->lock). To avoid possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- [1] lock(&bdev->bd_size_lock); local_irq_disable(); [2] lock(&d->lock); [3] lock(&bdev->bd_size_lock); [4] lock(&d->lock); *** DEADLOCK *** Where [1](&bdev->bd_size_lock) hold by zram_add()->set_capacity(). [2]lock(&d->lock) hold by aoeblk_gdalloc(). And aoeblk_gdalloc() is trying to acquire [3](&bdev->bd_size_lock) at set_capacity() call. In this situation an attempt to acquire [4]lock(&d->lock) from aoecmd_cfg_rsp() will lead to deadlock. So the simplest solution is breaking lock dependency [2](&d->lock) -> [3](&bdev->bd_size_lock) by moving set_capacity() outside. Signed-off-by: Maksim Kiselev Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240124072436.3745720-2-bigunclemax@gmail.com Signed-off-by: Jens Axboe --- drivers/block/aoe/aoeblk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index d2dbf8aaccb5..b1b47d88f5db 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -333,6 +333,7 @@ aoeblk_gdalloc(void *vp) struct gendisk *gd; mempool_t *mp; struct blk_mq_tag_set *set; + sector_t ssize; ulong flags; int late = 0; int err; @@ -396,7 +397,7 @@ aoeblk_gdalloc(void *vp) gd->minors = AOE_PARTITIONS; gd->fops = &aoe_bdops; gd->private_data = d; - set_capacity(gd, d->ssize); + ssize = d->ssize; snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", d->aoemajor, d->aoeminor); @@ -405,6 +406,8 @@ aoeblk_gdalloc(void *vp) spin_unlock_irqrestore(&d->lock, flags); + set_capacity(gd, ssize); + err = device_add_disk(NULL, gd, aoe_attr_groups); if (err) goto out_disk_cleanup; From d1b163aa0749706379055e40a52cf7a851abf9dc Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 23 Jan 2024 13:09:26 +0100 Subject: [PATCH 145/347] Revert "drivers/firmware: Move sysfb_init() from device_initcall to subsys_initcall_sync" This reverts commit 60aebc9559492cea6a9625f514a8041717e3a2e4. Commit 60aebc9559492cea ("drivers/firmware: Move sysfb_init() from device_initcall to subsys_initcall_sync") messes up initialization order of the graphics drivers and leads to blank displays on some systems. So revert the commit. To make the display drivers fully independent from initialization order requires to track framebuffer memory by device and independently from the loaded drivers. The kernel currently lacks the infrastructure to do so. Reported-by: Jaak Ristioja Closes: https://lore.kernel.org/dri-devel/ZUnNi3q3yB3zZfTl@P70.localdomain/T/#t Reported-by: Huacai Chen Closes: https://lore.kernel.org/dri-devel/20231108024613.2898921-1-chenhuacai@loongson.cn/ Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10133 Signed-off-by: Thomas Zimmermann Cc: Javier Martinez Canillas Cc: Thorsten Leemhuis Cc: Jani Nikula Cc: stable@vger.kernel.org # v6.5+ Reviewed-by: Javier Martinez Canillas Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240123120937.27736-1-tzimmermann@suse.de --- drivers/firmware/sysfb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 82fcfd29bc4d..3c197db42c9d 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -128,4 +128,4 @@ unlock_mutex: } /* must execute after PCI subsystem for EFI quirks */ -subsys_initcall_sync(sysfb_init); +device_initcall(sysfb_init); From 520d9708979349dcf6e044eac51efc43788b5cec Mon Sep 17 00:00:00 2001 From: Brandon Brnich Date: Mon, 11 Dec 2023 14:59:19 -0600 Subject: [PATCH 146/347] dt-bindings: media: Remove K3 Family Prefix from Compatible K3 family prefix is not included in other TI compatible strings. Remove this prefix to keep naming convention consistent. Fixes: de4b9f7e371a ("dt-bindings: media: wave5: add yaml devicetree bindings") Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdUYOq=q1j=d+Eac28hthOUAaNUkuvxmRu-mUN1pLKq69g@mail.gmail.com/ Signed-off-by: Brandon Brnich Acked-by: Krzysztof Kozlowski Reviewed-by: Nishanth Menon Signed-off-by: Hans Verkuil --- Documentation/devicetree/bindings/media/cnm,wave521c.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/media/cnm,wave521c.yaml b/Documentation/devicetree/bindings/media/cnm,wave521c.yaml index 6d5569e77b7a..6a11c1d11fb5 100644 --- a/Documentation/devicetree/bindings/media/cnm,wave521c.yaml +++ b/Documentation/devicetree/bindings/media/cnm,wave521c.yaml @@ -17,7 +17,7 @@ properties: compatible: items: - enum: - - ti,k3-j721s2-wave521c + - ti,j721s2-wave521c - const: cnm,wave521c reg: @@ -53,7 +53,7 @@ additionalProperties: false examples: - | vpu: video-codec@12345678 { - compatible = "ti,k3-j721s2-wave521c", "cnm,wave521c"; + compatible = "ti,j721s2-wave521c", "cnm,wave521c"; reg = <0x12345678 0x1000>; clocks = <&clks 42>; interrupts = <42>; From c14d17a32568679385d32fe7a23994560c12772c Mon Sep 17 00:00:00 2001 From: Brandon Brnich Date: Mon, 11 Dec 2023 14:59:20 -0600 Subject: [PATCH 147/347] media: chips-media: wave5: Remove K3 References Change compatible string to match dt bindings for TI devices. K3 family prefix should not be included as it deviates from naming convention. Fixes: 9707a6254a8a ("media: chips-media: wave5: Add the v4l2 layer") Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdUYOq=q1j=d+Eac28hthOUAaNUkuvxmRu-mUN1pLKq69g@mail.gmail.com/ Signed-off-by: Brandon Brnich Reviewed-by: Nishanth Menon Signed-off-by: Hans Verkuil --- drivers/media/platform/chips-media/wave5/wave5-vpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu.c b/drivers/media/platform/chips-media/wave5/wave5-vpu.c index bfe4caa79cc9..0d90b5820bef 100644 --- a/drivers/media/platform/chips-media/wave5/wave5-vpu.c +++ b/drivers/media/platform/chips-media/wave5/wave5-vpu.c @@ -272,7 +272,7 @@ static const struct wave5_match_data ti_wave521c_data = { }; static const struct of_device_id wave5_dt_ids[] = { - { .compatible = "ti,k3-j721s2-wave521c", .data = &ti_wave521c_data }, + { .compatible = "ti,j721s2-wave521c", .data = &ti_wave521c_data }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, wave5_dt_ids); From 78e23c3e914a5e678a20286fb09bc218017af89a Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Thu, 18 Jan 2024 13:14:52 +0100 Subject: [PATCH 148/347] media: media videobuf2: Stop direct calls to queue num_buffers field Use vb2_get_num_buffers() to avoid using queue num_buffers field directly. This allows us to change how the number of buffers is computed in the future. Fixes: d055a76c0065 ("media: core: Report the maximum possible number of buffers for the queue") Signed-off-by: Benjamin Gaignard Acked-by: Tomasz Figa Signed-off-by: Hans Verkuil --- drivers/media/common/videobuf2/videobuf2-core.c | 2 +- drivers/media/common/videobuf2/videobuf2-v4l2.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index 41a832dd1426..b6bf8f232f48 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -989,7 +989,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, bool no_previous_buffers = !q_num_bufs; int ret = 0; - if (q->num_buffers == q->max_num_buffers) { + if (q_num_bufs == q->max_num_buffers) { dprintk(q, 1, "maximum number of buffers already allocated\n"); return -ENOBUFS; } diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c index 54d572c3b515..6380155d8575 100644 --- a/drivers/media/common/videobuf2/videobuf2-v4l2.c +++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c @@ -1029,7 +1029,7 @@ int vb2_ioctl_create_bufs(struct file *file, void *priv, int res = vb2_verify_memory_type(vdev->queue, p->memory, p->format.type); - p->index = vdev->queue->num_buffers; + p->index = vb2_get_num_buffers(vdev->queue); fill_buf_caps(vdev->queue, &p->capabilities); validate_memory_flags(vdev->queue, p->memory, &p->flags); /* From b32431b753217d8d45b018443b1a7aac215921fb Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 19 Jan 2024 10:03:23 +0100 Subject: [PATCH 149/347] media: vb2: refactor setting flags and caps, fix missing cap Several functions implementing VIDIOC_REQBUFS and _CREATE_BUFS all use almost the same code to fill in the flags and capability fields. Refactor this into a new vb2_set_flags_and_caps() function that replaces the old fill_buf_caps() and validate_memory_flags() functions. This also fixes a bug where vb2_ioctl_create_bufs() would not set the V4L2_BUF_CAP_SUPPORTS_MAX_NUM_BUFFERS cap and also not fill in the max_num_buffers field. Fixes: d055a76c0065 ("media: core: Report the maximum possible number of buffers for the queue") Signed-off-by: Hans Verkuil Reviewed-by: Benjamin Gaignard Acked-by: Tomasz Figa --- .../media/common/videobuf2/videobuf2-v4l2.c | 53 +++++++++---------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c index 6380155d8575..c575198e8354 100644 --- a/drivers/media/common/videobuf2/videobuf2-v4l2.c +++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c @@ -671,8 +671,20 @@ int vb2_querybuf(struct vb2_queue *q, struct v4l2_buffer *b) } EXPORT_SYMBOL(vb2_querybuf); -static void fill_buf_caps(struct vb2_queue *q, u32 *caps) +static void vb2_set_flags_and_caps(struct vb2_queue *q, u32 memory, + u32 *flags, u32 *caps, u32 *max_num_bufs) { + if (!q->allow_cache_hints || memory != V4L2_MEMORY_MMAP) { + /* + * This needs to clear V4L2_MEMORY_FLAG_NON_COHERENT only, + * but in order to avoid bugs we zero out all bits. + */ + *flags = 0; + } else { + /* Clear all unknown flags. */ + *flags &= V4L2_MEMORY_FLAG_NON_COHERENT; + } + *caps = V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS; if (q->io_modes & VB2_MMAP) *caps |= V4L2_BUF_CAP_SUPPORTS_MMAP; @@ -686,21 +698,9 @@ static void fill_buf_caps(struct vb2_queue *q, u32 *caps) *caps |= V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS; if (q->supports_requests) *caps |= V4L2_BUF_CAP_SUPPORTS_REQUESTS; -} - -static void validate_memory_flags(struct vb2_queue *q, - int memory, - u32 *flags) -{ - if (!q->allow_cache_hints || memory != V4L2_MEMORY_MMAP) { - /* - * This needs to clear V4L2_MEMORY_FLAG_NON_COHERENT only, - * but in order to avoid bugs we zero out all bits. - */ - *flags = 0; - } else { - /* Clear all unknown flags. */ - *flags &= V4L2_MEMORY_FLAG_NON_COHERENT; + if (max_num_bufs) { + *max_num_bufs = q->max_num_buffers; + *caps |= V4L2_BUF_CAP_SUPPORTS_MAX_NUM_BUFFERS; } } @@ -709,8 +709,8 @@ int vb2_reqbufs(struct vb2_queue *q, struct v4l2_requestbuffers *req) int ret = vb2_verify_memory_type(q, req->memory, req->type); u32 flags = req->flags; - fill_buf_caps(q, &req->capabilities); - validate_memory_flags(q, req->memory, &flags); + vb2_set_flags_and_caps(q, req->memory, &flags, + &req->capabilities, NULL); req->flags = flags; return ret ? ret : vb2_core_reqbufs(q, req->memory, req->flags, &req->count); @@ -751,11 +751,9 @@ int vb2_create_bufs(struct vb2_queue *q, struct v4l2_create_buffers *create) int ret = vb2_verify_memory_type(q, create->memory, f->type); unsigned i; - fill_buf_caps(q, &create->capabilities); - validate_memory_flags(q, create->memory, &create->flags); create->index = vb2_get_num_buffers(q); - create->max_num_buffers = q->max_num_buffers; - create->capabilities |= V4L2_BUF_CAP_SUPPORTS_MAX_NUM_BUFFERS; + vb2_set_flags_and_caps(q, create->memory, &create->flags, + &create->capabilities, &create->max_num_buffers); if (create->count == 0) return ret != -EBUSY ? ret : 0; @@ -1006,8 +1004,8 @@ int vb2_ioctl_reqbufs(struct file *file, void *priv, int res = vb2_verify_memory_type(vdev->queue, p->memory, p->type); u32 flags = p->flags; - fill_buf_caps(vdev->queue, &p->capabilities); - validate_memory_flags(vdev->queue, p->memory, &flags); + vb2_set_flags_and_caps(vdev->queue, p->memory, &flags, + &p->capabilities, NULL); p->flags = flags; if (res) return res; @@ -1026,12 +1024,11 @@ int vb2_ioctl_create_bufs(struct file *file, void *priv, struct v4l2_create_buffers *p) { struct video_device *vdev = video_devdata(file); - int res = vb2_verify_memory_type(vdev->queue, p->memory, - p->format.type); + int res = vb2_verify_memory_type(vdev->queue, p->memory, p->format.type); p->index = vb2_get_num_buffers(vdev->queue); - fill_buf_caps(vdev->queue, &p->capabilities); - validate_memory_flags(vdev->queue, p->memory, &p->flags); + vb2_set_flags_and_caps(vdev->queue, p->memory, &p->flags, + &p->capabilities, &p->max_num_buffers); /* * If count == 0, then just check if memory and type are valid. * Any -EBUSY result from vb2_verify_memory_type can be mapped to 0. From f9f4b0c6425eb9ffd9bf62b8b8143e786b6ba695 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 24 Jan 2024 17:41:01 +0000 Subject: [PATCH 150/347] spi: cs42l43: Handle error from devm_pm_runtime_enable As it devm_pm_runtime_enable can fail due to memory allocations, it is best to handle the error. Suggested-by: Andy Shevchenko Signed-off-by: Charles Keepax Link: https://msgid.link/r/20240124174101.2270249-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi-cs42l43.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-cs42l43.c b/drivers/spi/spi-cs42l43.c index f13073e12593..b24190526ce9 100644 --- a/drivers/spi/spi-cs42l43.c +++ b/drivers/spi/spi-cs42l43.c @@ -244,7 +244,10 @@ static int cs42l43_spi_probe(struct platform_device *pdev) priv->ctlr->use_gpio_descriptors = true; priv->ctlr->auto_runtime_pm = true; - devm_pm_runtime_enable(priv->dev); + ret = devm_pm_runtime_enable(priv->dev); + if (ret) + return ret; + pm_runtime_idle(priv->dev); regmap_write(priv->regmap, CS42L43_TRAN_CONFIG6, CS42L43_FIFO_SIZE - 1); From 096386a5bcf02e4053dc8b6cacb09a8493eeee4f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 22 Jan 2024 18:08:51 -0500 Subject: [PATCH 151/347] bcachefs: discard path uses unlock_long() Some (bad) devices can have really terrible discard latency; we don't want them blocking memory reclaim and causing warnings. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 10704f2d3af5..fd3e175d8342 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1715,7 +1715,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, * This works without any other locks because this is the only * thread that removes items from the need_discard tree */ - bch2_trans_unlock(trans); + bch2_trans_unlock_long(trans); blkdev_issue_discard(ca->disk_sb.bdev, k.k->p.offset * ca->mi.bucket_size, ca->mi.bucket_size, From 97cf5d53b4812dcb52c13fda700dad5aa8d3446c Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Wed, 24 Jan 2024 11:19:45 +0800 Subject: [PATCH 152/347] erofs: get rid of unneeded GFP_NOFS Clean up some leftovers since there is no way for EROFS to be called again from a reclaim context. Signed-off-by: Jingbo Xu Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20240124031945.130782-1-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/erofs/fscache.c | 2 +- fs/erofs/inode.c | 2 +- fs/erofs/utils.c | 2 +- fs/erofs/zdata.c | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index bc12030393b2..5ff90026fd43 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -459,7 +459,7 @@ static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &erofs_fscache_meta_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); inode->i_blkbits = EROFS_SB(sb)->blkszbits; inode->i_private = ctx; diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 3d616dea55dc..36e638e8b53a 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -60,7 +60,7 @@ static void *erofs_read_inode(struct erofs_buf *buf, } else { const unsigned int gotten = sb->s_blocksize - *ofs; - copied = kmalloc(vi->inode_isize, GFP_NOFS); + copied = kmalloc(vi->inode_isize, GFP_KERNEL); if (!copied) { err = -ENOMEM; goto err_out; diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c index 5dea308764b4..e146d09151af 100644 --- a/fs/erofs/utils.c +++ b/fs/erofs/utils.c @@ -81,7 +81,7 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, repeat: xa_lock(&sbi->managed_pslots); pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, - NULL, grp, GFP_NOFS); + NULL, grp, GFP_KERNEL); if (pre) { if (xa_is_err(pre)) { pre = ERR_PTR(xa_err(pre)); diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 692c0c39be63..583c062cd0e4 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -230,7 +230,7 @@ static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter, struct page *nextpage = *candidate_bvpage; if (!nextpage) { - nextpage = erofs_allocpage(pagepool, GFP_NOFS); + nextpage = erofs_allocpage(pagepool, GFP_KERNEL); if (!nextpage) return -ENOMEM; set_page_private(nextpage, Z_EROFS_SHORTLIVED_PAGE); @@ -302,7 +302,7 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size) if (nrpages > pcs->maxpages) continue; - pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS); + pcl = kmem_cache_zalloc(pcs->slab, GFP_KERNEL); if (!pcl) return ERR_PTR(-ENOMEM); pcl->pclustersize = size; @@ -694,7 +694,7 @@ static void z_erofs_cache_invalidate_folio(struct folio *folio, DBG_BUGON(stop > folio_size(folio) || stop < length); if (offset == 0 && stop == folio_size(folio)) - while (!z_erofs_cache_release_folio(folio, GFP_NOFS)) + while (!z_erofs_cache_release_folio(folio, 0)) cond_resched(); } @@ -713,7 +713,7 @@ int erofs_init_managed_cache(struct super_block *sb) set_nlink(inode, 1); inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &z_erofs_cache_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); EROFS_SB(sb)->managed_cache = inode; return 0; } From d76779dd3681c01a4c6c3cae4d0627c9083e0ee6 Mon Sep 17 00:00:00 2001 From: Quanquan Cao Date: Wed, 24 Jan 2024 17:15:26 +0800 Subject: [PATCH 153/347] =?UTF-8?q?cxl/region=EF=BC=9AFix=20overflow=20iss?= =?UTF-8?q?ue=20in=20alloc=5Fhpa()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Creating a region with 16 memory devices caused a problem. The div_u64_rem function, used for dividing an unsigned 64-bit number by a 32-bit one, faced an issue when SZ_256M * p->interleave_ways. The result surpassed the maximum limit of the 32-bit divisor (4G), leading to an overflow and a remainder of 0. note: At this point, p->interleave_ways is 16, meaning 16 * 256M = 4G To fix this issue, I replaced the div_u64_rem function with div64_u64_rem and adjusted the type of the remainder. Signed-off-by: Quanquan Cao Reviewed-by: Dave Jiang Fixes: 23a22cd1c98b ("cxl/region: Allocate HPA capacity to regions") Cc: Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 0f05692bfec3..ce0e2d82bb2b 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -525,7 +525,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_region_params *p = &cxlr->params; struct resource *res; - u32 remainder = 0; + u64 remainder = 0; lockdep_assert_held_write(&cxl_region_rwsem); @@ -545,7 +545,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid))) return -ENXIO; - div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder); + div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder); if (remainder) return -EINVAL; From ebeae8adf89d9a82359f6659b1663d09beec2faa Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Sun, 21 Jan 2024 15:35:06 +0800 Subject: [PATCH 154/347] ksmbd: fix global oob in ksmbd_nl_policy Similar to a reported issue (check the commit b33fb5b801c6 ("net: qualcomm: rmnet: fix global oob in rmnet_policy"), my local fuzzer finds another global out-of-bounds read for policy ksmbd_nl_policy. See bug trace below: ================================================================== BUG: KASAN: global-out-of-bounds in validate_nla lib/nlattr.c:386 [inline] BUG: KASAN: global-out-of-bounds in __nla_validate_parse+0x24af/0x2750 lib/nlattr.c:600 Read of size 1 at addr ffffffff8f24b100 by task syz-executor.1/62810 CPU: 0 PID: 62810 Comm: syz-executor.1 Tainted: G N 6.1.0 #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x8b/0xb3 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:284 [inline] print_report+0x172/0x475 mm/kasan/report.c:395 kasan_report+0xbb/0x1c0 mm/kasan/report.c:495 validate_nla lib/nlattr.c:386 [inline] __nla_validate_parse+0x24af/0x2750 lib/nlattr.c:600 __nla_parse+0x3e/0x50 lib/nlattr.c:697 __nlmsg_parse include/net/netlink.h:748 [inline] genl_family_rcv_msg_attrs_parse.constprop.0+0x1b0/0x290 net/netlink/genetlink.c:565 genl_family_rcv_msg_doit+0xda/0x330 net/netlink/genetlink.c:734 genl_family_rcv_msg net/netlink/genetlink.c:833 [inline] genl_rcv_msg+0x441/0x780 net/netlink/genetlink.c:850 netlink_rcv_skb+0x14f/0x410 net/netlink/af_netlink.c:2540 genl_rcv+0x24/0x40 net/netlink/genetlink.c:861 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x54e/0x800 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x930/0xe50 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0x154/0x190 net/socket.c:734 ____sys_sendmsg+0x6df/0x840 net/socket.c:2482 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fdd66a8f359 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fdd65e00168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fdd66bbcf80 RCX: 00007fdd66a8f359 RDX: 0000000000000000 RSI: 0000000020000500 RDI: 0000000000000003 RBP: 00007fdd66ada493 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffc84b81aff R14: 00007fdd65e00300 R15: 0000000000022000 The buggy address belongs to the variable: ksmbd_nl_policy+0x100/0xa80 The buggy address belongs to the physical page: page:0000000034f47940 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1ccc4b flags: 0x200000000001000(reserved|node=0|zone=2) raw: 0200000000001000 ffffea00073312c8 ffffea00073312c8 0000000000000000 raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffffffff8f24b000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffffffff8f24b080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffffffff8f24b100: f9 f9 f9 f9 00 00 f9 f9 f9 f9 f9 f9 00 00 07 f9 ^ ffffffff8f24b180: f9 f9 f9 f9 00 05 f9 f9 f9 f9 f9 f9 00 00 00 05 ffffffff8f24b200: f9 f9 f9 f9 00 00 03 f9 f9 f9 f9 f9 00 00 04 f9 ================================================================== To fix it, add a placeholder named __KSMBD_EVENT_MAX and let KSMBD_EVENT_MAX to be its original value - 1 according to what other netlink families do. Also change two sites that refer the KSMBD_EVENT_MAX to correct value. Cc: stable@vger.kernel.org Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Signed-off-by: Lin Ma Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/ksmbd_netlink.h | 3 ++- fs/smb/server/transport_ipc.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index b7521e41402e..0ebf91ffa236 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -304,7 +304,8 @@ enum ksmbd_event { KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST, KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE = 15, - KSMBD_EVENT_MAX + __KSMBD_EVENT_MAX, + KSMBD_EVENT_MAX = __KSMBD_EVENT_MAX - 1 }; /* diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index b49d47bdafc9..f29bb03f0dc4 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -74,7 +74,7 @@ static int handle_unsupported_event(struct sk_buff *skb, struct genl_info *info) static int handle_generic_event(struct sk_buff *skb, struct genl_info *info); static int ksmbd_ipc_heartbeat_request(void); -static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX] = { +static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX + 1] = { [KSMBD_EVENT_UNSPEC] = { .len = 0, }, @@ -403,7 +403,7 @@ static int handle_generic_event(struct sk_buff *skb, struct genl_info *info) return -EPERM; #endif - if (type >= KSMBD_EVENT_MAX) { + if (type > KSMBD_EVENT_MAX) { WARN_ON(1); return -EINVAL; } From 9f3fe29d77ef4e7f7cb5c4c8c59f6dc373e57e78 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 17 Jan 2024 19:22:36 +0100 Subject: [PATCH 155/347] md: fix a suspicious RCU usage warning RCU protection was removed in the commit 2d32777d60de ("raid1: remove rcu protection to access rdev from conf"). However, the code in fix_read_error does rcu_dereference outside rcu_read_lock - this triggers the following warning. The warning is triggered by a LVM2 test shell/integrity-caching.sh. This commit removes rcu_dereference. ============================= WARNING: suspicious RCU usage 6.7.0 #2 Not tainted ----------------------------- drivers/md/raid1.c:2265 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 no locks held by mdX_raid1/1859. stack backtrace: CPU: 2 PID: 1859 Comm: mdX_raid1 Not tainted 6.7.0 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Call Trace: dump_stack_lvl+0x60/0x70 lockdep_rcu_suspicious+0x153/0x1b0 raid1d+0x1732/0x1750 [raid1] ? lock_acquire+0x9f/0x270 ? finish_wait+0x3d/0x80 ? md_thread+0xf7/0x130 [md_mod] ? lock_release+0xaa/0x230 ? md_register_thread+0xd0/0xd0 [md_mod] md_thread+0xa0/0x130 [md_mod] ? housekeeping_test_cpu+0x30/0x30 kthread+0xdc/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x28/0x40 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork_asm+0x11/0x20 Signed-off-by: Mikulas Patocka Fixes: ca294b34aaf3 ("md/raid1: support read error check") Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/51539879-e1ca-fde3-b8b4-8934ddedcbc@redhat.com --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 24f0d799fd98..286f8b16c7bd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2262,7 +2262,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio) int sectors = r1_bio->sectors; int read_disk = r1_bio->read_disk; struct mddev *mddev = conf->mddev; - struct md_rdev *rdev = rcu_dereference(conf->mirrors[read_disk].rdev); + struct md_rdev *rdev = conf->mirrors[read_disk].rdev; if (exceed_read_errors(mddev, rdev)) { r1_bio->bios[r1_bio->read_disk] = IO_BLOCKED; From a3bdcdd022c68942a774e8e63424cc11c85aab78 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 25 Jan 2024 14:32:26 +0800 Subject: [PATCH 156/347] HID: hidraw: fix a problem of memory leak in hidraw_release() 'struct hidraw_list' is a circular queue whose head can be smaller than tail. Using 'list->tail != list->head' to release all memory that should be released. Fixes: a5623a203cff ("HID: hidraw: fix memory leak in hidraw_release()") Signed-off-by: Su Hui Reviewed-by: Dan Carpenter Signed-off-by: Jiri Kosina --- drivers/hid/hidraw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 13c8dd8cd350..2bc762d31ac7 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -357,8 +357,11 @@ static int hidraw_release(struct inode * inode, struct file * file) down_write(&minors_rwsem); spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags); - for (int i = list->tail; i < list->head; i++) - kfree(list->buffer[i].value); + while (list->tail != list->head) { + kfree(list->buffer[list->tail].value); + list->buffer[list->tail].value = NULL; + list->tail = (list->tail + 1) & (HIDRAW_BUFFER_SIZE - 1); + } list_del(&list->node); spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags); kfree(list); From 644649553508b9bacf0fc7a5bdc4f9e0165576a5 Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Mon, 22 Jan 2024 18:23:50 +0100 Subject: [PATCH 157/347] clocksource: Skip watchdog check for large watchdog intervals There have been reports of the watchdog marking clocksources unstable on machines with 8 NUMA nodes: clocksource: timekeeping watchdog on CPU373: Marking clocksource 'tsc' as unstable because the skew is too large: clocksource: 'hpet' wd_nsec: 14523447520 clocksource: 'tsc' cs_nsec: 14524115132 The measured clocksource skew - the absolute difference between cs_nsec and wd_nsec - was 668 microseconds: cs_nsec - wd_nsec = 14524115132 - 14523447520 = 667612 The kernel used 200 microseconds for the uncertainty_margin of both the clocksource and watchdog, resulting in a threshold of 400 microseconds (the md variable). Both the cs_nsec and the wd_nsec value indicate that the readout interval was circa 14.5 seconds. The observed behaviour is that watchdog checks failed for large readout intervals on 8 NUMA node machines. This indicates that the size of the skew was directly proportinal to the length of the readout interval on those machines. The measured clocksource skew, 668 microseconds, was evaluated against a threshold (the md variable) that is suited for readout intervals of roughly WATCHDOG_INTERVAL, i.e. HZ >> 1, which is 0.5 second. The intention of 2e27e793e280 ("clocksource: Reduce clocksource-skew threshold") was to tighten the threshold for evaluating skew and set the lower bound for the uncertainty_margin of clocksources to twice WATCHDOG_MAX_SKEW. Later in c37e85c135ce ("clocksource: Loosen clocksource watchdog constraints"), the WATCHDOG_MAX_SKEW constant was increased to 125 microseconds to fit the limit of NTP, which is able to use a clocksource that suffers from up to 500 microseconds of skew per second. Both the TSC and the HPET use default uncertainty_margin. When the readout interval gets stretched the default uncertainty_margin is no longer a suitable lower bound for evaluating skew - it imposes a limit that is far stricter than the skew with which NTP can deal. The root causes of the skew being directly proportinal to the length of the readout interval are: * the inaccuracy of the shift/mult pairs of clocksources and the watchdog * the conversion to nanoseconds is imprecise for large readout intervals Prevent this by skipping the current watchdog check if the readout interval exceeds 2 * WATCHDOG_INTERVAL. Considering the maximum readout interval of 2 * WATCHDOG_INTERVAL, the current default uncertainty margin (of the TSC and HPET) corresponds to a limit on clocksource skew of 250 ppm (microseconds of skew per second). To keep the limit imposed by NTP (500 microseconds of skew per second) for all possible readout intervals, the margins would have to be scaled so that the threshold value is proportional to the length of the actual readout interval. As for why the readout interval may get stretched: Since the watchdog is executed in softirq context the expiration of the watchdog timer can get severely delayed on account of a ksoftirqd thread not getting to run in a timely manner. Surely, a system with such belated softirq execution is not working well and the scheduling issue should be looked into but the clocksource watchdog should be able to deal with it accordingly. Fixes: 2e27e793e280 ("clocksource: Reduce clocksource-skew threshold") Suggested-by: Feng Tang Signed-off-by: Jiri Wiesner Signed-off-by: Thomas Gleixner Tested-by: Paul E. McKenney Reviewed-by: Feng Tang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240122172350.GA740@incl --- kernel/time/clocksource.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c108ed8a9804..3052b1f1168e 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -99,6 +99,7 @@ static u64 suspend_start; * Interval: 0.5sec. */ #define WATCHDOG_INTERVAL (HZ >> 1) +#define WATCHDOG_INTERVAL_MAX_NS ((2 * WATCHDOG_INTERVAL) * (NSEC_PER_SEC / HZ)) /* * Threshold: 0.0312s, when doubled: 0.0625s. @@ -134,6 +135,7 @@ static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DEFINE_SPINLOCK(watchdog_lock); static int watchdog_running; static atomic_t watchdog_reset_pending; +static int64_t watchdog_max_interval; static inline void clocksource_watchdog_lock(unsigned long *flags) { @@ -399,8 +401,8 @@ static inline void clocksource_reset_watchdog(void) static void clocksource_watchdog(struct timer_list *unused) { u64 csnow, wdnow, cslast, wdlast, delta; + int64_t wd_nsec, cs_nsec, interval; int next_cpu, reset_pending; - int64_t wd_nsec, cs_nsec; struct clocksource *cs; enum wd_read_status read_ret; unsigned long extra_wait = 0; @@ -470,6 +472,27 @@ static void clocksource_watchdog(struct timer_list *unused) if (atomic_read(&watchdog_reset_pending)) continue; + /* + * The processing of timer softirqs can get delayed (usually + * on account of ksoftirqd not getting to run in a timely + * manner), which causes the watchdog interval to stretch. + * Skew detection may fail for longer watchdog intervals + * on account of fixed margins being used. + * Some clocksources, e.g. acpi_pm, cannot tolerate + * watchdog intervals longer than a few seconds. + */ + interval = max(cs_nsec, wd_nsec); + if (unlikely(interval > WATCHDOG_INTERVAL_MAX_NS)) { + if (system_state > SYSTEM_SCHEDULING && + interval > 2 * watchdog_max_interval) { + watchdog_max_interval = interval; + pr_warn("Long readout interval, skipping watchdog check: cs_nsec: %lld wd_nsec: %lld\n", + cs_nsec, wd_nsec); + } + watchdog_timer.expires = jiffies; + continue; + } + /* Check the deviation from the watchdog clocksource. */ md = cs->uncertainty_margin + watchdog->uncertainty_margin; if (abs(cs_nsec - wd_nsec) > md) { From f9f031dd21a7ce13a13862fa5281d32e1029c70f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jan 2024 23:21:31 +0200 Subject: [PATCH 158/347] drm/i915/psr: Only allow PSR in LPSP mode on HSW non-ULT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On HSW non-ULT (or at least on Dell Latitude E6540) external displays start to flicker when we enable PSR on the eDP. We observe a much higher SR and PC6 residency than should be possible with an external display, and indeen much higher than what we observe with eDP disabled and only the external display enabled. Looks like the hardware is somehow ignoring the fact that the external display is active during PSR. I wasn't able to redproduce this on my HSW ULT machine, or BDW. So either there's something specific about this particular laptop (eg. some unknown firmware thing) or the issue is limited to just non-ULT HSW systems. All known registers that could affect this look perfectly reasonable on the affected machine. As a workaround let's unmask the LPSP event to prevent PSR entry except while in LPSP mode (only pipe A + eDP active). This will prevent PSR entry entirely when multiple pipes are active. The one slight downside is that we now also prevent PSR entry when driving eDP with pipe B or C, but I think that's a reasonable tradeoff to avoid having to implement a more complex workaround. Cc: stable@vger.kernel.org Fixes: 783d8b80871f ("drm/i915/psr: Re-enable PSR1 on hsw/bdw") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10092 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240118212131.31868-1-ville.syrjala@linux.intel.com Reviewed-by: Jouni Högander (cherry picked from commit 94501c3ca6400e463ff6cc0c9cf4a2feb6a9205d) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_psr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 8f702c3fc62d..57bbf3e3af92 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1525,8 +1525,18 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, * can rely on frontbuffer tracking. */ mask = EDP_PSR_DEBUG_MASK_MEMUP | - EDP_PSR_DEBUG_MASK_HPD | - EDP_PSR_DEBUG_MASK_LPSP; + EDP_PSR_DEBUG_MASK_HPD; + + /* + * For some unknown reason on HSW non-ULT (or at least on + * Dell Latitude E6540) external displays start to flicker + * when PSR is enabled on the eDP. SR/PC6 residency is much + * higher than should be possible with an external display. + * As a workaround leave LPSP unmasked to prevent PSR entry + * when external displays are active. + */ + if (DISPLAY_VER(dev_priv) >= 8 || IS_HASWELL_ULT(dev_priv)) + mask |= EDP_PSR_DEBUG_MASK_LPSP; if (DISPLAY_VER(dev_priv) < 20) mask |= EDP_PSR_DEBUG_MASK_MAX_SLEEP; From 9a574ea9069be30b835a3da772c039993c43369b Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Mon, 22 Jan 2024 15:35:34 -0800 Subject: [PATCH 159/347] tick/sched: Preserve number of idle sleeps across CPU hotplug events Commit 71fee48f ("tick-sched: Fix idle and iowait sleeptime accounting vs CPU hotplug") preserved total idle sleep time and iowait sleeptime across CPU hotplug events. Similar reasoning applies to the number of idle calls and idle sleeps to get the proper average of sleep time per idle invocation. Preserve those fields too. Fixes: 71fee48f ("tick-sched: Fix idle and iowait sleeptime accounting vs CPU hotplug") Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240122233534.3094238-1-tim.c.chen@linux.intel.com --- kernel/time/tick-sched.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d2501673028d..01fb50c1b17e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1577,6 +1577,7 @@ void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); ktime_t idle_sleeptime, iowait_sleeptime; + unsigned long idle_calls, idle_sleeps; # ifdef CONFIG_HIGH_RES_TIMERS if (ts->sched_timer.base) @@ -1585,9 +1586,13 @@ void tick_cancel_sched_timer(int cpu) idle_sleeptime = ts->idle_sleeptime; iowait_sleeptime = ts->iowait_sleeptime; + idle_calls = ts->idle_calls; + idle_sleeps = ts->idle_sleeps; memset(ts, 0, sizeof(*ts)); ts->idle_sleeptime = idle_sleeptime; ts->iowait_sleeptime = iowait_sleeptime; + ts->idle_calls = idle_calls; + ts->idle_sleeps = idle_sleeps; } #endif From f1cc6aceecd04964304786530eaa4ad87d90b972 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 22 Jan 2024 13:09:43 +0100 Subject: [PATCH 160/347] accel/ivpu: Fix dev open/close races with unbind - Add context_list_lock to synchronize user context addition/removal - Use drm_dev_enter() to prevent unbinding the device during ivpu_open() and vpu address allocation Signed-off-by: Jacek Lawrynowicz Reviewed-by: Wachowski, Karol Link: https://patchwork.freedesktop.org/patch/msgid/20240122120945.1150728-2-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 107 +++++++++++++++++++++------------- drivers/accel/ivpu/ivpu_drv.h | 3 +- drivers/accel/ivpu/ivpu_gem.c | 18 +++--- drivers/accel/ivpu/ivpu_gem.h | 2 +- drivers/accel/ivpu/ivpu_job.c | 16 ++--- drivers/accel/ivpu/ivpu_job.h | 2 +- 6 files changed, 84 insertions(+), 64 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 546c0899bb9e..9418c73ee8ef 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -66,22 +67,20 @@ struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv) return file_priv; } -struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id) +static void file_priv_unbind(struct ivpu_device *vdev, struct ivpu_file_priv *file_priv) { - struct ivpu_file_priv *file_priv; + mutex_lock(&file_priv->lock); + if (file_priv->bound) { + ivpu_dbg(vdev, FILE, "file_priv unbind: ctx %u\n", file_priv->ctx.id); - xa_lock_irq(&vdev->context_xa); - file_priv = xa_load(&vdev->context_xa, id); - /* file_priv may still be in context_xa during file_priv_release() */ - if (file_priv && !kref_get_unless_zero(&file_priv->ref)) - file_priv = NULL; - xa_unlock_irq(&vdev->context_xa); - - if (file_priv) - ivpu_dbg(vdev, KREF, "file_priv get by id: ctx %u refcount %u\n", - file_priv->ctx.id, kref_read(&file_priv->ref)); - - return file_priv; + ivpu_cmdq_release_all_locked(file_priv); + ivpu_jsm_context_release(vdev, file_priv->ctx.id); + ivpu_bo_unbind_all_bos_from_context(vdev, &file_priv->ctx); + ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); + file_priv->bound = false; + drm_WARN_ON(&vdev->drm, !xa_erase_irq(&vdev->context_xa, file_priv->ctx.id)); + } + mutex_unlock(&file_priv->lock); } static void file_priv_release(struct kref *ref) @@ -89,13 +88,15 @@ static void file_priv_release(struct kref *ref) struct ivpu_file_priv *file_priv = container_of(ref, struct ivpu_file_priv, ref); struct ivpu_device *vdev = file_priv->vdev; - ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id); + ivpu_dbg(vdev, FILE, "file_priv release: ctx %u bound %d\n", + file_priv->ctx.id, (bool)file_priv->bound); + + pm_runtime_get_sync(vdev->drm.dev); + mutex_lock(&vdev->context_list_lock); + file_priv_unbind(vdev, file_priv); + mutex_unlock(&vdev->context_list_lock); + pm_runtime_put_autosuspend(vdev->drm.dev); - ivpu_cmdq_release_all(file_priv); - ivpu_jsm_context_release(vdev, file_priv->ctx.id); - ivpu_bo_remove_all_bos_from_context(vdev, &file_priv->ctx); - ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); - drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv); mutex_destroy(&file_priv->lock); kfree(file_priv); } @@ -232,49 +233,53 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) struct ivpu_device *vdev = to_ivpu_device(dev); struct ivpu_file_priv *file_priv; u32 ctx_id; - void *old; - int ret; + int idx, ret; - ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, NULL, vdev->context_xa_limit, GFP_KERNEL); - if (ret) { - ivpu_err(vdev, "Failed to allocate context id: %d\n", ret); - return ret; - } + if (!drm_dev_enter(dev, &idx)) + return -ENODEV; file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); if (!file_priv) { ret = -ENOMEM; - goto err_xa_erase; + goto err_dev_exit; } file_priv->vdev = vdev; + file_priv->bound = true; kref_init(&file_priv->ref); mutex_init(&file_priv->lock); + mutex_lock(&vdev->context_list_lock); + + ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, file_priv, + vdev->context_xa_limit, GFP_KERNEL); + if (ret) { + ivpu_err(vdev, "Failed to allocate context id: %d\n", ret); + goto err_unlock; + } + ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id); if (ret) - goto err_mutex_destroy; + goto err_xa_erase; - old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL); - if (xa_is_err(old)) { - ret = xa_err(old); - ivpu_err(vdev, "Failed to store context %u: %d\n", ctx_id, ret); - goto err_ctx_fini; - } + mutex_unlock(&vdev->context_list_lock); + drm_dev_exit(idx); + + file->driver_priv = file_priv; ivpu_dbg(vdev, FILE, "file_priv create: ctx %u process %s pid %d\n", ctx_id, current->comm, task_pid_nr(current)); - file->driver_priv = file_priv; return 0; -err_ctx_fini: - ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); -err_mutex_destroy: - mutex_destroy(&file_priv->lock); - kfree(file_priv); err_xa_erase: xa_erase_irq(&vdev->context_xa, ctx_id); +err_unlock: + mutex_unlock(&vdev->context_list_lock); + mutex_destroy(&file_priv->lock); + kfree(file_priv); +err_dev_exit: + drm_dev_exit(idx); return ret; } @@ -531,6 +536,10 @@ static int ivpu_dev_init(struct ivpu_device *vdev) lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); INIT_LIST_HEAD(&vdev->bo_list); + ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock); + if (ret) + goto err_xa_destroy; + ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock); if (ret) goto err_xa_destroy; @@ -602,14 +611,30 @@ err_xa_destroy: return ret; } +static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev) +{ + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + + mutex_lock(&vdev->context_list_lock); + + xa_for_each(&vdev->context_xa, ctx_id, file_priv) + file_priv_unbind(vdev, file_priv); + + mutex_unlock(&vdev->context_list_lock); +} + static void ivpu_dev_fini(struct ivpu_device *vdev) { ivpu_pm_disable(vdev); ivpu_shutdown(vdev); if (IVPU_WA(d3hot_after_power_off)) pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); + + ivpu_jobs_abort_all(vdev); ivpu_job_done_consumer_fini(vdev); ivpu_pm_cancel_recovery(vdev); + ivpu_bo_unbind_all_user_contexts(vdev); ivpu_ipc_fini(vdev); ivpu_fw_fini(vdev); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 7a6bc1918780..069ace4adb2d 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -115,6 +115,7 @@ struct ivpu_device { struct ivpu_mmu_context gctx; struct ivpu_mmu_context rctx; + struct mutex context_list_lock; /* Protects user context addition/removal */ struct xarray context_xa; struct xa_limit context_xa_limit; @@ -147,6 +148,7 @@ struct ivpu_file_priv { struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES]; struct ivpu_mmu_context ctx; bool has_mmu_faults; + bool bound; }; extern int ivpu_dbg_mask; @@ -162,7 +164,6 @@ extern bool ivpu_disable_mmu_cont_pages; extern int ivpu_test_mode; struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); -struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id); void ivpu_file_priv_put(struct ivpu_file_priv **link); int ivpu_boot(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 16f3035b91c0..e9ddbe9f50eb 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -77,7 +77,10 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range) { struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - int ret; + int idx, ret; + + if (!drm_dev_enter(&vdev->drm, &idx)) + return -ENODEV; mutex_lock(&bo->lock); @@ -93,6 +96,8 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx, mutex_unlock(&bo->lock); + drm_dev_exit(idx); + return ret; } @@ -128,14 +133,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) dma_resv_unlock(bo->base.base.resv); } -static void ivpu_bo_unbind(struct ivpu_bo *bo) -{ - mutex_lock(&bo->lock); - ivpu_bo_unbind_locked(bo); - mutex_unlock(&bo->lock); -} - -void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) +void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) { struct ivpu_bo *bo; @@ -239,7 +237,7 @@ static void ivpu_bo_free(struct drm_gem_object *obj) drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ)); - ivpu_bo_unbind(bo); + ivpu_bo_unbind_locked(bo); mutex_destroy(&bo->lock); drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1); diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index 5cb1dda3e58e..a8559211c70d 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -25,7 +25,7 @@ struct ivpu_bo { }; int ivpu_bo_pin(struct ivpu_bo *bo); -void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); +void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size); struct ivpu_bo *ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags); diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 82e40bb4803c..4fed0c05e051 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -112,16 +112,14 @@ static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engin } } -void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv) +void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) { int i; - mutex_lock(&file_priv->lock); + lockdep_assert_held(&file_priv->lock); for (i = 0; i < IVPU_NUM_ENGINES; i++) ivpu_cmdq_release_locked(file_priv, i); - - mutex_unlock(&file_priv->lock); } /* @@ -161,15 +159,13 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) struct ivpu_file_priv *file_priv; unsigned long ctx_id; - xa_for_each(&vdev->context_xa, ctx_id, file_priv) { - file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id); - if (!file_priv) - continue; + mutex_lock(&vdev->context_list_lock); + xa_for_each(&vdev->context_xa, ctx_id, file_priv) ivpu_cmdq_reset_all(file_priv); - ivpu_file_priv_put(&file_priv); - } + mutex_unlock(&vdev->context_list_lock); + } static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index 45a2f2ec82e5..bd22cf8e39e7 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -56,7 +56,7 @@ struct ivpu_job { int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv); +void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv); void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev); void ivpu_job_done_consumer_init(struct ivpu_device *vdev); From 264b271d12d0af794f3d1dc3793e6589fae8c66c Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 22 Jan 2024 13:09:44 +0100 Subject: [PATCH 161/347] accel/ivpu: Improve stability of ivpu_submit_ioctl() - Wake up the device as late as possible - Remove job reference counting in order to simplify the code - Don't put jobs that are not fully submitted on submitted_jobs_xa in order to avoid potential races with reset/recovery Signed-off-by: Jacek Lawrynowicz Reviewed-by: Wachowski, Karol Link: https://patchwork.freedesktop.org/patch/msgid/20240122120945.1150728-3-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 139 +++++++++++++++------------------- drivers/accel/ivpu/ivpu_job.h | 1 - 2 files changed, 62 insertions(+), 78 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 4fed0c05e051..d9b47a04b35f 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -125,7 +125,7 @@ void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) /* * Mark the doorbell as unregistered and reset job queue pointers. * This function needs to be called when the VPU hardware is restarted - * and FW looses job queue state. The next time job queue is used it + * and FW loses job queue state. The next time job queue is used it * will be registered again. */ static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine) @@ -239,60 +239,32 @@ static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev) return &fence->base; } -static void job_get(struct ivpu_job *job, struct ivpu_job **link) +static void ivpu_job_destroy(struct ivpu_job *job) { struct ivpu_device *vdev = job->vdev; - - kref_get(&job->ref); - *link = job; - - ivpu_dbg(vdev, KREF, "Job get: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); -} - -static void job_release(struct kref *ref) -{ - struct ivpu_job *job = container_of(ref, struct ivpu_job, ref); - struct ivpu_device *vdev = job->vdev; u32 i; + ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d", + job->job_id, job->file_priv->ctx.id, job->engine_idx); + for (i = 0; i < job->bo_count; i++) if (job->bos[i]) drm_gem_object_put(&job->bos[i]->base.base); dma_fence_put(job->done_fence); ivpu_file_priv_put(&job->file_priv); - - ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id); kfree(job); - - /* Allow the VPU to get suspended, must be called after ivpu_file_priv_put() */ - ivpu_rpm_put(vdev); -} - -static void job_put(struct ivpu_job *job) -{ - struct ivpu_device *vdev = job->vdev; - - ivpu_dbg(vdev, KREF, "Job put: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); - kref_put(&job->ref, job_release); } static struct ivpu_job * -ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) +ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) { struct ivpu_device *vdev = file_priv->vdev; struct ivpu_job *job; - int ret; - - ret = ivpu_rpm_get(vdev); - if (ret < 0) - return NULL; job = kzalloc(struct_size(job, bos, bo_count), GFP_KERNEL); if (!job) - goto err_rpm_put; - - kref_init(&job->ref); + return NULL; job->vdev = vdev; job->engine_idx = engine_idx; @@ -306,17 +278,14 @@ ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) job->file_priv = ivpu_file_priv_get(file_priv); ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx); - return job; err_free_job: kfree(job); -err_rpm_put: - ivpu_rpm_put(vdev); return NULL; } -static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status) +static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status) { struct ivpu_job *job; @@ -333,9 +302,10 @@ static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status) ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); + ivpu_job_destroy(job); ivpu_stop_job_timeout_detection(vdev); - job_put(job); + ivpu_rpm_put(vdev); return 0; } @@ -345,10 +315,10 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev) unsigned long id; xa_for_each(&vdev->submitted_jobs_xa, id, job) - ivpu_job_done(vdev, id, VPU_JSM_STATUS_ABORTED); + ivpu_job_signal_and_destroy(vdev, id, VPU_JSM_STATUS_ABORTED); } -static int ivpu_direct_job_submission(struct ivpu_job *job) +static int ivpu_job_submit(struct ivpu_job *job) { struct ivpu_file_priv *file_priv = job->file_priv; struct ivpu_device *vdev = job->vdev; @@ -356,53 +326,65 @@ static int ivpu_direct_job_submission(struct ivpu_job *job) struct ivpu_cmdq *cmdq; int ret; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->lock); cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx); if (!cmdq) { - ivpu_warn(vdev, "Failed get job queue, ctx %d engine %d\n", - file_priv->ctx.id, job->engine_idx); + ivpu_warn_ratelimited(vdev, "Failed get job queue, ctx %d engine %d\n", + file_priv->ctx.id, job->engine_idx); ret = -EINVAL; - goto err_unlock; + goto err_unlock_file_priv; } job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK; - job_get(job, &job); - ret = xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); + xa_lock(&vdev->submitted_jobs_xa); + ret = __xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); if (ret) { - ivpu_warn_ratelimited(vdev, "Failed to allocate job id: %d\n", ret); - goto err_job_put; + ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", + file_priv->ctx.id); + ret = -EBUSY; + goto err_unlock_submitted_jobs_xa; } ret = ivpu_cmdq_push_job(cmdq, job); if (ret) - goto err_xa_erase; + goto err_erase_xa; ivpu_start_job_timeout_detection(vdev); - ivpu_dbg(vdev, JOB, "Job submitted: id %3u addr 0x%llx ctx %2d engine %d next %d\n", - job->job_id, job->cmd_buf_vpu_addr, file_priv->ctx.id, - job->engine_idx, cmdq->jobq->header.tail); - - if (ivpu_test_mode & IVPU_TEST_MODE_NULL_HW) { - ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); + if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) { cmdq->jobq->header.head = cmdq->jobq->header.tail; wmb(); /* Flush WC buffer for jobq header */ } else { ivpu_cmdq_ring_db(vdev, cmdq); } + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d addr 0x%llx next %d\n", + job->job_id, file_priv->ctx.id, job->engine_idx, + job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); + + xa_unlock(&vdev->submitted_jobs_xa); + mutex_unlock(&file_priv->lock); + + if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) + ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); + return 0; -err_xa_erase: - xa_erase(&vdev->submitted_jobs_xa, job->job_id); -err_job_put: - job_put(job); -err_unlock: +err_erase_xa: + __xa_erase(&vdev->submitted_jobs_xa, job->job_id); +err_unlock_submitted_jobs_xa: + xa_unlock(&vdev->submitted_jobs_xa); +err_unlock_file_priv: mutex_unlock(&file_priv->lock); + ivpu_rpm_put(vdev); return ret; } @@ -508,44 +490,47 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) params->buffer_count * sizeof(u32)); if (ret) { ret = -EFAULT; - goto free_handles; + goto err_free_handles; } if (!drm_dev_enter(&vdev->drm, &idx)) { ret = -ENODEV; - goto free_handles; + goto err_free_handles; } ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", file_priv->ctx.id, params->buffer_count); - job = ivpu_create_job(file_priv, params->engine, params->buffer_count); + job = ivpu_job_create(file_priv, params->engine, params->buffer_count); if (!job) { ivpu_err(vdev, "Failed to create job\n"); ret = -ENOMEM; - goto dev_exit; + goto err_exit_dev; } ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count, params->commands_offset); if (ret) { - ivpu_err(vdev, "Failed to prepare job, ret %d\n", ret); - goto job_put; + ivpu_err(vdev, "Failed to prepare job: %d\n", ret); + goto err_destroy_job; } - ret = ivpu_direct_job_submission(job); - if (ret) { - dma_fence_signal(job->done_fence); - ivpu_err(vdev, "Failed to submit job to the HW, ret %d\n", ret); - } + ret = ivpu_job_submit(job); + if (ret) + goto err_signal_fence; -job_put: - job_put(job); -dev_exit: drm_dev_exit(idx); -free_handles: kfree(buf_handles); + return ret; +err_signal_fence: + dma_fence_signal(job->done_fence); +err_destroy_job: + ivpu_job_destroy(job); +err_exit_dev: + drm_dev_exit(idx); +err_free_handles: + kfree(buf_handles); return ret; } @@ -567,7 +552,7 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr, } payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload; - ret = ivpu_job_done(vdev, payload->job_id, payload->job_status); + ret = ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); if (!ret && !xa_empty(&vdev->submitted_jobs_xa)) ivpu_start_job_timeout_detection(vdev); } diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index bd22cf8e39e7..ca4984071cc7 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -43,7 +43,6 @@ struct ivpu_cmdq { will update the job status */ struct ivpu_job { - struct kref ref; struct ivpu_device *vdev; struct ivpu_file_priv *file_priv; struct dma_fence *done_fence; From 27d19268cf394f2c78db732be0cb31852eeadb0a Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 22 Jan 2024 13:09:45 +0100 Subject: [PATCH 162/347] accel/ivpu: Improve recovery and reset support - Synchronize job submission with reset/recovery using reset_lock - Always print recovery reason and call diagnose_failure() - Don't allow for autosupend during recovery - Prevent immediate autosuspend after reset/recovery - Prevent force_recovery for issuing TDR when device is suspended - Reset VPU instead triggering recovery after changing debugfs params Signed-off-by: Jacek Lawrynowicz Reviewed-by: Wachowski, Karol Link: https://patchwork.freedesktop.org/patch/msgid/20240122120945.1150728-4-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_debugfs.c | 20 ++++++++++--- drivers/accel/ivpu/ivpu_hw_37xx.c | 14 +++------ drivers/accel/ivpu/ivpu_hw_40xx.c | 8 +++--- drivers/accel/ivpu/ivpu_ipc.c | 6 ++-- drivers/accel/ivpu/ivpu_job.c | 2 ++ drivers/accel/ivpu/ivpu_mmu.c | 14 ++++----- drivers/accel/ivpu/ivpu_pm.c | 48 ++++++++++++++++++++----------- drivers/accel/ivpu/ivpu_pm.h | 6 ++-- 8 files changed, 70 insertions(+), 48 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index 19035230563d..7cb962e21453 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -102,7 +102,7 @@ static int reset_pending_show(struct seq_file *s, void *v) { struct ivpu_device *vdev = seq_to_ivpu(s); - seq_printf(s, "%d\n", atomic_read(&vdev->pm->in_reset)); + seq_printf(s, "%d\n", atomic_read(&vdev->pm->reset_pending)); return 0; } @@ -130,7 +130,9 @@ dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size fw->dvfs_mode = dvfs_mode; - ivpu_pm_schedule_recovery(vdev); + ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); + if (ret) + return ret; return size; } @@ -190,7 +192,10 @@ fw_profiling_freq_fops_write(struct file *file, const char __user *user_buf, return ret; ivpu_hw_profiling_freq_drive(vdev, enable); - ivpu_pm_schedule_recovery(vdev); + + ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); + if (ret) + return ret; return size; } @@ -301,11 +306,18 @@ static ssize_t ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) { struct ivpu_device *vdev = file->private_data; + int ret; if (!size) return -EINVAL; - ivpu_pm_schedule_recovery(vdev); + ret = ivpu_rpm_get(vdev); + if (ret) + return ret; + + ivpu_pm_trigger_recovery(vdev, "debugfs"); + flush_work(&vdev->pm->recovery_work); + ivpu_rpm_put(vdev); return size; } diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index 574cdeefb66b..f15a93d83057 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -875,24 +875,18 @@ static void ivpu_hw_37xx_irq_disable(struct ivpu_device *vdev) static void ivpu_hw_37xx_irq_wdt_nce_handler(struct ivpu_device *vdev) { - ivpu_err_ratelimited(vdev, "WDT NCE irq\n"); - - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ"); } static void ivpu_hw_37xx_irq_wdt_mss_handler(struct ivpu_device *vdev) { - ivpu_err_ratelimited(vdev, "WDT MSS irq\n"); - ivpu_hw_wdt_disable(vdev); - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ"); } static void ivpu_hw_37xx_irq_noc_firewall_handler(struct ivpu_device *vdev) { - ivpu_err_ratelimited(vdev, "NOC Firewall irq\n"); - - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ"); } /* Handler for IRQs from VPU core (irqV) */ @@ -970,7 +964,7 @@ static bool ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq) REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, status); if (schedule_recovery) - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "Buttress IRQ"); return true; } diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index c02061f299e2..704288084f37 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -1049,18 +1049,18 @@ static void ivpu_hw_40xx_irq_disable(struct ivpu_device *vdev) static void ivpu_hw_40xx_irq_wdt_nce_handler(struct ivpu_device *vdev) { /* TODO: For LNN hang consider engine reset instead of full recovery */ - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ"); } static void ivpu_hw_40xx_irq_wdt_mss_handler(struct ivpu_device *vdev) { ivpu_hw_wdt_disable(vdev); - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ"); } static void ivpu_hw_40xx_irq_noc_firewall_handler(struct ivpu_device *vdev) { - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ"); } /* Handler for IRQs from VPU core (irqV) */ @@ -1154,7 +1154,7 @@ static bool ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq) REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status); if (schedule_recovery) - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "Buttress IRQ"); return true; } diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index e86621f16f85..fa66c39b57ec 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -343,10 +343,8 @@ int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *r hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, &hb_resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); - if (hb_ret == -ETIMEDOUT) { - ivpu_hw_diagnose_failure(vdev); - ivpu_pm_schedule_recovery(vdev); - } + if (hb_ret == -ETIMEDOUT) + ivpu_pm_trigger_recovery(vdev, "IPC timeout"); return ret; } diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index d9b47a04b35f..0440bee3ecaf 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -515,7 +515,9 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto err_destroy_job; } + down_read(&vdev->pm->reset_lock); ret = ivpu_job_submit(job); + up_read(&vdev->pm->reset_lock); if (ret) goto err_signal_fence; diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 1f813625aab3..9a3122ffce03 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -887,7 +887,6 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) { - bool schedule_recovery = false; u32 *event; u32 ssid; @@ -897,14 +896,13 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) ivpu_mmu_dump_event(vdev, event); ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]); - if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) - schedule_recovery = true; - else - ivpu_mmu_user_context_mark_invalid(vdev, ssid); - } + if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) { + ivpu_pm_trigger_recovery(vdev, "MMU event"); + return; + } - if (schedule_recovery) - ivpu_pm_schedule_recovery(vdev); + ivpu_mmu_user_context_mark_invalid(vdev, ssid); + } } void ivpu_mmu_evtq_dump(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 8407f1d8c99c..f501f27ebafd 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -112,6 +112,14 @@ static void ivpu_pm_recovery_work(struct work_struct *work) char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; int ret; + ivpu_err(vdev, "Recovering the VPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); + + ret = pm_runtime_resume_and_get(vdev->drm.dev); + if (ret) + ivpu_err(vdev, "Failed to resume VPU: %d\n", ret); + + ivpu_fw_log_dump(vdev); + retry: ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) { @@ -123,11 +131,13 @@ retry: ivpu_err(vdev, "Failed to reset VPU: %d\n", ret); kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); + pm_runtime_mark_last_busy(vdev->drm.dev); + pm_runtime_put_autosuspend(vdev->drm.dev); } -void ivpu_pm_schedule_recovery(struct ivpu_device *vdev) +void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason) { - struct ivpu_pm_info *pm = vdev->pm; + ivpu_err(vdev, "Recovery triggered by %s\n", reason); if (ivpu_disable_recovery) { ivpu_err(vdev, "Recovery not available when disable_recovery param is set\n"); @@ -139,10 +149,11 @@ void ivpu_pm_schedule_recovery(struct ivpu_device *vdev) return; } - /* Schedule recovery if it's not in progress */ - if (atomic_cmpxchg(&pm->in_reset, 0, 1) == 0) { - ivpu_hw_irq_disable(vdev); - queue_work(system_long_wq, &pm->recovery_work); + /* Trigger recovery if it's not in progress */ + if (atomic_cmpxchg(&vdev->pm->reset_pending, 0, 1) == 0) { + ivpu_hw_diagnose_failure(vdev); + ivpu_hw_irq_disable(vdev); /* Disable IRQ early to protect from IRQ storm */ + queue_work(system_long_wq, &vdev->pm->recovery_work); } } @@ -150,12 +161,8 @@ static void ivpu_job_timeout_work(struct work_struct *work) { struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work); struct ivpu_device *vdev = pm->vdev; - unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; - ivpu_err(vdev, "TDR detected, timeout %lu ms", timeout_ms); - ivpu_hw_diagnose_failure(vdev); - - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "TDR"); } void ivpu_start_job_timeout_detection(struct ivpu_device *vdev) @@ -228,6 +235,9 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev) bool hw_is_idle = true; int ret; + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); + drm_WARN_ON(&vdev->drm, work_pending(&vdev->pm->recovery_work)); + ivpu_dbg(vdev, PM, "Runtime suspend..\n"); if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) { @@ -310,11 +320,12 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) { struct ivpu_device *vdev = pci_get_drvdata(pdev); - pm_runtime_get_sync(vdev->drm.dev); - ivpu_dbg(vdev, PM, "Pre-reset..\n"); atomic_inc(&vdev->pm->reset_counter); - atomic_set(&vdev->pm->in_reset, 1); + atomic_set(&vdev->pm->reset_pending, 1); + + pm_runtime_get_sync(vdev->drm.dev); + down_write(&vdev->pm->reset_lock); ivpu_prepare_for_reset(vdev); ivpu_hw_reset(vdev); ivpu_pm_prepare_cold_boot(vdev); @@ -331,9 +342,11 @@ void ivpu_pm_reset_done_cb(struct pci_dev *pdev) ret = ivpu_resume(vdev); if (ret) ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); - atomic_set(&vdev->pm->in_reset, 0); + up_write(&vdev->pm->reset_lock); + atomic_set(&vdev->pm->reset_pending, 0); ivpu_dbg(vdev, PM, "Post-reset done.\n"); + pm_runtime_mark_last_busy(vdev->drm.dev); pm_runtime_put_autosuspend(vdev->drm.dev); } @@ -346,7 +359,10 @@ void ivpu_pm_init(struct ivpu_device *vdev) pm->vdev = vdev; pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; - atomic_set(&pm->in_reset, 0); + init_rwsem(&pm->reset_lock); + atomic_set(&pm->reset_pending, 0); + atomic_set(&pm->reset_counter, 0); + INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work); INIT_DELAYED_WORK(&pm->job_timeout_work, ivpu_job_timeout_work); diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h index 97c6e0b0aa42..ec60fbeefefc 100644 --- a/drivers/accel/ivpu/ivpu_pm.h +++ b/drivers/accel/ivpu/ivpu_pm.h @@ -6,6 +6,7 @@ #ifndef __IVPU_PM_H__ #define __IVPU_PM_H__ +#include #include struct ivpu_device; @@ -14,8 +15,9 @@ struct ivpu_pm_info { struct ivpu_device *vdev; struct delayed_work job_timeout_work; struct work_struct recovery_work; - atomic_t in_reset; + struct rw_semaphore reset_lock; atomic_t reset_counter; + atomic_t reset_pending; bool is_warmboot; u32 suspend_reschedule_counter; }; @@ -37,7 +39,7 @@ int __must_check ivpu_rpm_get(struct ivpu_device *vdev); int __must_check ivpu_rpm_get_if_active(struct ivpu_device *vdev); void ivpu_rpm_put(struct ivpu_device *vdev); -void ivpu_pm_schedule_recovery(struct ivpu_device *vdev); +void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason); void ivpu_start_job_timeout_detection(struct ivpu_device *vdev); void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev); From b9328fd636bd50da89e792e135b234ba8e6fe59f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 24 Jan 2024 16:07:49 -0600 Subject: [PATCH 163/347] x86/CPU/AMD: Add more models to X86_FEATURE_ZEN5 Add model ranges starting at 0x20, 0x40 and 0x70 to the synthetic feature flag X86_FEATURE_ZEN5. Signed-off-by: Mario Limonciello Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240124220749.2983-1-mario.limonciello@amd.com --- arch/x86/kernel/cpu/amd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bc49e3b3aae0..f3abca334199 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -573,6 +573,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) case 0x1a: switch (c->x86_model) { case 0x00 ... 0x0f: + case 0x20 ... 0x2f: + case 0x40 ... 0x4f: + case 0x70 ... 0x7f: setup_force_cpu_cap(X86_FEATURE_ZEN5); break; default: From 20730e9b277873deeb6637339edcba64468f3da3 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 25 Jan 2024 17:04:01 +0200 Subject: [PATCH 164/347] ahci: add 43-bit DMA address quirk for ASMedia ASM1061 controllers With one of the on-board ASM1061 AHCI controllers (1b21:0612) on an ASUSTeK Pro WS WRX80E-SAGE SE WIFI mainboard, a controller hang was observed that was immediately preceded by the following kernel messages: ahci 0000:28:00.0: Using 64-bit DMA addresses ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00000 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00300 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00380 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00400 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00680 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00700 flags=0x0000] The first message is produced by code in drivers/iommu/dma-iommu.c which is accompanied by the following comment that seems to apply: /* * Try to use all the 32-bit PCI addresses first. The original SAC vs. * DAC reasoning loses relevance with PCIe, but enough hardware and * firmware bugs are still lurking out there that it's safest not to * venture into the 64-bit space until necessary. * * If your device goes wrong after seeing the notice then likely either * its driver is not setting DMA masks accurately, the hardware has * some inherent bug in handling >32-bit addresses, or not all the * expected address bits are wired up between the device and the IOMMU. */ Asking the ASM1061 on a discrete PCIe card to DMA from I/O virtual address 0xffffffff00000000 produces the following I/O page faults: vfio-pci 0000:07:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0x7ff00000000 flags=0x0010] vfio-pci 0000:07:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0x7ff00000500 flags=0x0010] Note that the upper 21 bits of the logged DMA address are zero. (When asking a different PCIe device in the same PCIe slot to DMA to the same I/O virtual address, we do see all the upper 32 bits of the DMA address as 1, so this is not an issue with the chipset or IOMMU configuration on the test system.) Also, hacking libahci to always set the upper 21 bits of all DMA addresses to 1 produces no discernible effect on the behavior of the ASM1061, and mkfs/mount/scrub/etc work as without this hack. This all strongly suggests that the ASM1061 has a 43 bit DMA address limit, and this commit therefore adds a quirk to deal with this limit. This issue probably applies to (some of) the other supported ASMedia parts as well, but we limit it to the PCI IDs known to refer to ASM1061 parts, as that's the only part we know for sure to be affected by this issue at this point. Link: https://lore.kernel.org/linux-ide/ZaZ2PIpEId-rl6jv@wantstofly.org/ Signed-off-by: Lennert Buytenhek [cassel: drop date from error messages in commit log] Signed-off-by: Niklas Cassel --- drivers/ata/ahci.c | 29 +++++++++++++++++++++++------ drivers/ata/ahci.h | 1 + 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 762c5d8b7c1a..d2460fa985b7 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -48,6 +48,7 @@ enum { enum board_ids { /* board IDs by feature in alphabetical order */ board_ahci, + board_ahci_43bit_dma, board_ahci_ign_iferr, board_ahci_low_power, board_ahci_no_debounce_delay, @@ -128,6 +129,13 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, + [board_ahci_43bit_dma] = { + AHCI_HFLAGS (AHCI_HFLAG_43BIT_ONLY), + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_ops, + }, [board_ahci_ign_iferr] = { AHCI_HFLAGS (AHCI_HFLAG_IGN_IRQ_IF_ERR), .flags = AHCI_FLAG_COMMON, @@ -597,11 +605,11 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(PROMISE, 0x3f20), board_ahci }, /* PDC42819 */ { PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */ - /* Asmedia */ + /* ASMedia */ { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */ { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci }, /* ASM1060 */ - { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci }, /* ASM1061 */ - { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci }, /* ASM1062 */ + { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci_43bit_dma }, /* ASM1061 */ + { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci_43bit_dma }, /* ASM1061/1062 */ { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ @@ -954,11 +962,20 @@ static int ahci_pci_device_resume(struct device *dev) #endif /* CONFIG_PM */ -static int ahci_configure_dma_masks(struct pci_dev *pdev, int using_dac) +static int ahci_configure_dma_masks(struct pci_dev *pdev, + struct ahci_host_priv *hpriv) { - const int dma_bits = using_dac ? 64 : 32; + int dma_bits; int rc; + if (hpriv->cap & HOST_CAP_64) { + dma_bits = 64; + if (hpriv->flags & AHCI_HFLAG_43BIT_ONLY) + dma_bits = 43; + } else { + dma_bits = 32; + } + /* * If the device fixup already set the dma_mask to some non-standard * value, don't extend it here. This happens on STA2X11, for example. @@ -1931,7 +1948,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ahci_gtf_filter_workaround(host); /* initialize adapter */ - rc = ahci_configure_dma_masks(pdev, hpriv->cap & HOST_CAP_64); + rc = ahci_configure_dma_masks(pdev, hpriv); if (rc) return rc; diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 4bae95b06ae3..df8f8a1a3a34 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -247,6 +247,7 @@ enum { AHCI_HFLAG_SUSPEND_PHYS = BIT(26), /* handle PHYs during suspend/resume */ AHCI_HFLAG_NO_SXS = BIT(28), /* SXS not supported */ + AHCI_HFLAG_43BIT_ONLY = BIT(29), /* 43bit DMA addr limit */ /* ap->flags bits */ From 8412c47d68436b9f9a260039a4a773daa6824925 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 13 Jan 2024 10:03:51 +0100 Subject: [PATCH 165/347] ARM: dts: Fix TPM schema violations Since commit 26c9d152ebf3 ("dt-bindings: tpm: Consolidate TCG TIS bindings"), several issues are reported by "make dtbs_check" for ARM devicetrees: The nodename needs to be "tpm@0" rather than "tpmdev@0" and the compatible property needs to contain the chip's name in addition to the generic "tcg,tpm_tis-spi" or "tcg,tpm-tis-i2c": tpmdev@0: $nodename:0: 'tpmdev@0' does not match '^tpm(@[0-9a-f]+)?$' from schema $id: http://devicetree.org/schemas/tpm/tcg,tpm_tis-spi.yaml# tpm@2e: compatible: 'oneOf' conditional failed, one must be fixed: ['tcg,tpm-tis-i2c'] is too short from schema $id: http://devicetree.org/schemas/tpm/tcg,tpm-tis-i2c.yaml# Fix these schema violations. Aspeed Facebook BMCs use an Infineon SLB9670: https://lore.kernel.org/all/ZZSmMJ%2F%2Fl972Qbxu@fedora/ https://lore.kernel.org/all/ZZT4%2Fw2eVzMhtsPx@fedora/ https://lore.kernel.org/all/ZZTS0p1hdAchIbKp@heinlein.vulture-banana.ts.net/ Aspeed Tacoma uses a Nuvoton NPCT75X per commit 39d8a73c53a2 ("ARM: dts: aspeed: tacoma: Add TPM"). phyGATE-Tauri uses an Infineon SLB9670: https://lore.kernel.org/all/ab45c82485fa272f74adf560cbb58ee60cc42689.camel@phytec.de/ A single schema violation remains in am335x-moxa-uc-2100-common.dtsi because it is unknown which chip is used on the board. The devicetree's author has been asked for clarification but has not responded so far: https://lore.kernel.org/all/20231220090910.GA32182@wunner.de/ Signed-off-by: Lukas Wunner Reviewed-by: Patrick Williams Reviewed-by: Tao Ren Reviewed-by: Bruno Thomsen --- arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts | 4 ++-- arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts | 4 ++-- arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts | 2 +- arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi | 4 ++-- arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi | 2 +- arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts | 2 +- arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts index e899de681f47..5be0e8fd2633 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts +++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts @@ -45,8 +45,8 @@ num-chipselects = <1>; cs-gpios = <&gpio0 ASPEED_GPIO(Z, 0) GPIO_ACTIVE_LOW>; - tpmdev@0 { - compatible = "tcg,tpm_tis-spi"; + tpm@0 { + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; spi-max-frequency = <33000000>; reg = <0>; }; diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts index a677c827e758..5a8169bbda87 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts +++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts @@ -80,8 +80,8 @@ gpio-miso = <&gpio ASPEED_GPIO(R, 5) GPIO_ACTIVE_HIGH>; num-chipselects = <1>; - tpmdev@0 { - compatible = "tcg,tpm_tis-spi"; + tpm@0 { + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; spi-max-frequency = <33000000>; reg = <0>; }; diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts index 3f6010ef2b86..213023bc5aec 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts +++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts @@ -456,7 +456,7 @@ status = "okay"; tpm: tpm@2e { - compatible = "tcg,tpm-tis-i2c"; + compatible = "nuvoton,npct75x", "tcg,tpm-tis-i2c"; reg = <0x2e>; }; }; diff --git a/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi b/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi index 31590d3186a2..00e5887c926f 100644 --- a/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi +++ b/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi @@ -35,8 +35,8 @@ gpio-mosi = <&gpio0 ASPEED_GPIO(X, 4) GPIO_ACTIVE_HIGH>; gpio-miso = <&gpio0 ASPEED_GPIO(X, 5) GPIO_ACTIVE_HIGH>; - tpmdev@0 { - compatible = "tcg,tpm_tis-spi"; + tpm@0 { + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; spi-max-frequency = <33000000>; reg = <0>; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi index 44cc4ff1d0df..d12fb44aeb14 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi @@ -116,7 +116,7 @@ tpm_tis: tpm@1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_tpm>; - compatible = "tcg,tpm_tis-spi"; + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; reg = <1>; spi-max-frequency = <20000000>; interrupt-parent = <&gpio5>; diff --git a/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts b/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts index 3a723843d562..9984b343cdf0 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts +++ b/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts @@ -130,7 +130,7 @@ * TCG specification - Section 6.4.1 Clocking: * TPM shall support a SPI clock frequency range of 10-24 MHz. */ - st33htph: tpm-tis@0 { + st33htph: tpm@0 { compatible = "st,st33htpm-spi", "tcg,tpm_tis-spi"; reg = <0>; spi-max-frequency = <24000000>; diff --git a/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi b/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi index b8730aa52ce6..a59331aa58e5 100644 --- a/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi +++ b/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi @@ -217,7 +217,7 @@ pinctrl-names = "default"; pinctrl-0 = <&spi1_pins>; - tpm_spi_tis@0 { + tpm@0 { compatible = "tcg,tpm_tis-spi"; reg = <0>; spi-max-frequency = <500000>; From 5e2400f11d4deec444ac00b73e96267e057fbb37 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 13 Jan 2024 19:06:56 +0100 Subject: [PATCH 166/347] arm64: dts: Fix TPM schema violations Since commit 26c9d152ebf3 ("dt-bindings: tpm: Consolidate TCG TIS bindings"), several issues are reported by "make dtbs_check" for arm64 devicetrees: The compatible property needs to contain the chip's name in addition to the generic "tcg,tpm_tis-spi" and the nodename needs to be "tpm@0" rather than "cr50@0": tpm@1: compatible: ['tcg,tpm_tis-spi'] is too short from schema $id: http://devicetree.org/schemas/tpm/tcg,tpm_tis-spi.yaml# cr50@0: $nodename:0: 'cr50@0' does not match '^tpm(@[0-9a-f]+)?$' from schema $id: http://devicetree.org/schemas/tpm/google,cr50.yaml# Fix these schema violations. phyGATE-Tauri uses an Infineon SLB9670: https://lore.kernel.org/all/ab45c82485fa272f74adf560cbb58ee60cc42689.camel@phytec.de/ Gateworks Venice uses an Atmel ATTPM20P: https://trac.gateworks.com/wiki/tpm Signed-off-by: Lukas Wunner Acked-by: Heiko Stuebner Reviewed-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts | 2 +- arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi | 2 +- arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi | 2 +- arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts | 2 +- arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi | 2 +- arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi | 2 +- arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts | 2 +- arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts | 2 +- arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 2 +- arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi | 2 +- arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts index 968f475b9a96..27a902569e2a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts @@ -120,7 +120,7 @@ }; tpm: tpm@1 { - compatible = "tcg,tpm_tis-spi"; + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; interrupts = <11 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&gpio2>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi index 3f3f2a2c89cd..752caa38eb03 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi @@ -89,7 +89,7 @@ status = "okay"; tpm@1 { - compatible = "tcg,tpm_tis-spi"; + compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; spi-max-frequency = <36000000>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi index 06fed9376996..2aa6c1090fc7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi @@ -109,7 +109,7 @@ status = "okay"; tpm@1 { - compatible = "tcg,tpm_tis-spi"; + compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; spi-max-frequency = <36000000>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts b/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts index feae77e03835..a08057410bde 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts @@ -234,7 +234,7 @@ status = "okay"; tpm: tpm@0 { - compatible = "infineon,slb9670"; + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; reg = <0>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_tpm>; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi index c24587c895e1..41c79d2ebdd6 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi @@ -103,7 +103,7 @@ status = "okay"; tpm@1 { - compatible = "tcg,tpm_tis-spi"; + compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; spi-max-frequency = <36000000>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi index 628ffba69862..d5c400b355af 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi @@ -115,7 +115,7 @@ status = "okay"; tpm@1 { - compatible = "tcg,tpm_tis-spi"; + compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; spi-max-frequency = <36000000>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts index 9caf7ca25444..cae586cd45bd 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts @@ -196,7 +196,7 @@ status = "okay"; tpm@0 { - compatible = "tcg,tpm_tis-spi"; + compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x0>; spi-max-frequency = <36000000>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts b/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts index 6376417e918c..d8cf1f27c3ec 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts @@ -65,7 +65,7 @@ status = "okay"; tpm@0 { - compatible = "infineon,slb9670"; + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; reg = <0>; spi-max-frequency = <43000000>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index 5506de83f61d..1b3396b1cee3 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -888,7 +888,7 @@ status = "okay"; cs-gpios = <&pio 86 GPIO_ACTIVE_LOW>; - cr50@0 { + tpm@0 { compatible = "google,cr50"; reg = <0>; spi-max-frequency = <1000000>; diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi index f2281250ac35..d87aab8d7a79 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi @@ -1402,7 +1402,7 @@ pinctrl-names = "default"; pinctrl-0 = <&spi5_pins>; - cr50@0 { + tpm@0 { compatible = "google,cr50"; reg = <0>; interrupts-extended = <&pio 171 IRQ_TYPE_EDGE_RISING>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts index 0f9cc042d9bf..1cba1d857c96 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts @@ -70,7 +70,7 @@ &spi0 { status = "okay"; - cr50@0 { + tpm@0 { compatible = "google,cr50"; reg = <0>; interrupt-parent = <&gpio0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index c5e7de60c121..5846a11f0e84 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -706,7 +706,7 @@ camera: &i2c7 { &spi2 { status = "okay"; - cr50@0 { + tpm@0 { compatible = "google,cr50"; reg = <0>; interrupt-parent = <&gpio1>; From fdaca31a7668cb17f70df5c32b6a9b90e82fc9b5 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 19 Jan 2024 11:32:41 +0800 Subject: [PATCH 167/347] drm/amd/pm: udpate smu v13.0.6 message permission update smu v13.0.6 message to allow guest driver set gfx clock. Signed-off-by: Yang Wang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 3c98a8a0386a..7e1941cf1796 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -160,8 +160,8 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0), MSG_MAP(GetMinGfxclkFrequency, PPSMC_MSG_GetMinGfxDpmFreq, 1), MSG_MAP(GetMaxGfxclkFrequency, PPSMC_MSG_GetMaxGfxDpmFreq, 1), - MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxClk, 0), - MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk, 0), + MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxClk, 1), + MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk, 1), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareForDriverUnload, 0), MSG_MAP(GetCTFLimit, PPSMC_MSG_GetCTFLimit, 0), MSG_MAP(GetThermalLimit, PPSMC_MSG_ReadThrottlerLimit, 0), From a58371d632ebab9ea63f10893a6b6731196b6f8d Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 17 Jan 2024 08:41:52 +0530 Subject: [PATCH 168/347] drm/amd/display: Fix uninitialized variable usage in core_link_ 'read_dpcd() & write_dpcd()' functions The 'status' variable in 'core_link_read_dpcd()' & 'core_link_write_dpcd()' was uninitialized. Thus, initializing 'status' variable to 'DC_ERROR_UNEXPECTED' by default. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dpcd.c:226 core_link_read_dpcd() error: uninitialized symbol 'status'. drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dpcd.c:248 core_link_write_dpcd() error: uninitialized symbol 'status'. Cc: stable@vger.kernel.org Cc: Jerry Zuo Cc: Jun Lei Cc: Wayne Lin Cc: Aurabindo Pillai Cc: Rodrigo Siqueira Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c index 5c9a30211c10..fc50931c2aec 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c @@ -205,7 +205,7 @@ enum dc_status core_link_read_dpcd( uint32_t extended_size; /* size of the remaining partitioned address space */ uint32_t size_left_to_read; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; /* size of the next partition to be read from */ uint32_t partition_size; uint32_t data_index = 0; @@ -234,7 +234,7 @@ enum dc_status core_link_write_dpcd( { uint32_t partition_size; uint32_t data_index = 0; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; while (size) { partition_size = dpcd_get_next_partition_size(address, size); From ca1ffb174f16b699c536734fc12a4162097c49f4 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Wed, 17 Jan 2024 14:35:29 +0800 Subject: [PATCH 169/347] drm/amdgpu/pm: Fix the power source flag error The power source flag should be updated when [1] System receives an interrupt indicating that the power source has changed. [2] System resumes from suspend or runtime suspend Signed-off-by: Ma Jun Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 13 +++---------- drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 2 ++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 2 ++ 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index c16703868e5c..a54663f2e2ab 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -24,6 +24,7 @@ #include #include +#include #include #include "amdgpu.h" @@ -817,16 +818,8 @@ static int smu_late_init(void *handle) * handle the switch automatically. Driver involvement * is unnecessary. */ - if (!smu->dc_controlled_by_gpio) { - ret = smu_set_power_source(smu, - adev->pm.ac_power ? SMU_POWER_SOURCE_AC : - SMU_POWER_SOURCE_DC); - if (ret) { - dev_err(adev->dev, "Failed to switch to %s mode!\n", - adev->pm.ac_power ? "AC" : "DC"); - return ret; - } - } + adev->pm.ac_power = power_supply_is_system_supplied() > 0; + smu_set_ac_dc(smu); if ((amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 1)) || (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 3))) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 5a314d0316c1..c7bfa68bf00f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1442,10 +1442,12 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, case 0x3: dev_dbg(adev->dev, "Switched to AC mode!\n"); schedule_work(&smu->interrupt_work); + adev->pm.ac_power = true; break; case 0x4: dev_dbg(adev->dev, "Switched to DC mode!\n"); schedule_work(&smu->interrupt_work); + adev->pm.ac_power = false; break; case 0x7: /* diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 771a3d457c33..c486182ff275 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1379,10 +1379,12 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, case 0x3: dev_dbg(adev->dev, "Switched to AC mode!\n"); smu_v13_0_ack_ac_dc_interrupt(smu); + adev->pm.ac_power = true; break; case 0x4: dev_dbg(adev->dev, "Switched to DC mode!\n"); smu_v13_0_ack_ac_dc_interrupt(smu); + adev->pm.ac_power = false; break; case 0x7: /* From 90751bdeee4e3ac87ebf814bf282b0fa97edfeab Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Sat, 20 Jan 2024 13:32:51 +0530 Subject: [PATCH 170/347] drm/amdgpu: Avoid fetching vram vendor information For GFX 9.4.3 APUs, the current method of fetching vram vendor information is not reliable. Avoid fetching the information. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.7.x --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f9039d64ff2d..17b7a25121b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1950,7 +1950,8 @@ static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) static const u32 regBIF_BIOS_SCRATCH_4 = 0x50; u32 vram_info; - if (!amdgpu_sriov_vf(adev)) { + /* Only for dGPU, vendor informaton is reliable */ + if (!amdgpu_sriov_vf(adev) && !(adev->flags & AMD_IS_APU)) { vram_info = RREG32(regBIF_BIOS_SCRATCH_4); adev->gmc.vram_vendor = vram_info & 0xF; } From 30269954745c6cac730352829ac9850918457440 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 19 Jan 2024 16:12:00 +0800 Subject: [PATCH 171/347] drm/amd/pm: update the power cap setting update the power cap setting for smu_v13.0.0/smu_v13.0.7 Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2356 Signed-off-by: Kenneth Feng Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 54 ++++++++++++++++++- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 54 ++++++++++++++++++- 2 files changed, 104 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a9b25faa63e4..4fdf34fffa9a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2357,6 +2357,7 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu, PPTable_t *pptable = table_context->driver_pptable; SkuTable_t *skutable = &pptable->SkuTable; uint32_t power_limit, od_percent_upper, od_percent_lower; + uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; if (smu_v13_0_get_current_power_limit(smu, &power_limit)) power_limit = smu->adev->pm.ac_power ? @@ -2380,7 +2381,7 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu, od_percent_upper, od_percent_lower, power_limit); if (max_power_limit) { - *max_power_limit = power_limit * (100 + od_percent_upper); + *max_power_limit = msg_limit * (100 + od_percent_upper); *max_power_limit /= 100; } @@ -2959,6 +2960,55 @@ static bool smu_v13_0_0_wbrf_support_check(struct smu_context *smu) } } +static int smu_v13_0_0_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + SkuTable_t *skutable = &pptable->SkuTable; + uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + int ret = 0; + + if (limit_type != SMU_DEFAULT_PPT_LIMIT) + return -EINVAL; + + if (limit <= msg_limit) { + if (smu->current_power_limit > msg_limit) { + od_table->OverDriveTable.Ppt = 0; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v13_0_0_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + } + return smu_v13_0_set_power_limit(smu, limit_type, limit); + } else if (smu->od_enabled) { + ret = smu_v13_0_set_power_limit(smu, limit_type, msg_limit); + if (ret) + return ret; + + od_table->OverDriveTable.Ppt = (limit * 100) / msg_limit - 100; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v13_0_0_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + + smu->current_power_limit = limit; + } else { + return -EINVAL; + } + + return 0; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -3013,7 +3063,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .set_fan_control_mode = smu_v13_0_set_fan_control_mode, .enable_mgpu_fan_boost = smu_v13_0_0_enable_mgpu_fan_boost, .get_power_limit = smu_v13_0_0_get_power_limit, - .set_power_limit = smu_v13_0_set_power_limit, + .set_power_limit = smu_v13_0_0_set_power_limit, .set_power_source = smu_v13_0_set_power_source, .get_power_profile_mode = smu_v13_0_0_get_power_profile_mode, .set_power_profile_mode = smu_v13_0_0_set_power_profile_mode, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 59606a19e3d2..7c3e162e2d81 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2321,6 +2321,7 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu, PPTable_t *pptable = table_context->driver_pptable; SkuTable_t *skutable = &pptable->SkuTable; uint32_t power_limit, od_percent_upper, od_percent_lower; + uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; if (smu_v13_0_get_current_power_limit(smu, &power_limit)) power_limit = smu->adev->pm.ac_power ? @@ -2344,7 +2345,7 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu, od_percent_upper, od_percent_lower, power_limit); if (max_power_limit) { - *max_power_limit = power_limit * (100 + od_percent_upper); + *max_power_limit = msg_limit * (100 + od_percent_upper); *max_power_limit /= 100; } @@ -2545,6 +2546,55 @@ static bool smu_v13_0_7_wbrf_support_check(struct smu_context *smu) return smu->smc_fw_version > 0x00524600; } +static int smu_v13_0_7_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + SkuTable_t *skutable = &pptable->SkuTable; + uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + int ret = 0; + + if (limit_type != SMU_DEFAULT_PPT_LIMIT) + return -EINVAL; + + if (limit <= msg_limit) { + if (smu->current_power_limit > msg_limit) { + od_table->OverDriveTable.Ppt = 0; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v13_0_7_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + } + return smu_v13_0_set_power_limit(smu, limit_type, limit); + } else if (smu->od_enabled) { + ret = smu_v13_0_set_power_limit(smu, limit_type, msg_limit); + if (ret) + return ret; + + od_table->OverDriveTable.Ppt = (limit * 100) / msg_limit - 100; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v13_0_7_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + + smu->current_power_limit = limit; + } else { + return -EINVAL; + } + + return 0; +} + static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table, @@ -2596,7 +2646,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .set_fan_control_mode = smu_v13_0_set_fan_control_mode, .enable_mgpu_fan_boost = smu_v13_0_7_enable_mgpu_fan_boost, .get_power_limit = smu_v13_0_7_get_power_limit, - .set_power_limit = smu_v13_0_set_power_limit, + .set_power_limit = smu_v13_0_7_set_power_limit, .set_power_source = smu_v13_0_set_power_source, .get_power_profile_mode = smu_v13_0_7_get_power_profile_mode, .set_power_profile_mode = smu_v13_0_7_set_power_profile_mode, From 89a7c0bd74918f723c94c10452265e25063cba9b Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Sat, 20 Jan 2024 13:48:09 +0530 Subject: [PATCH 172/347] drm/amdgpu: Show vram vendor only if available Ony if vram vendor info is available, show in sysfs. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.7.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 08916538a615..8db880244324 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -221,8 +221,23 @@ static struct attribute *amdgpu_vram_mgr_attributes[] = { NULL }; +static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + if (attr == &dev_attr_mem_info_vram_vendor.attr && + !adev->gmc.vram_vendor) + return 0; + + return attr->mode; +} + const struct attribute_group amdgpu_vram_mgr_attr_group = { - .attrs = amdgpu_vram_mgr_attributes + .attrs = amdgpu_vram_mgr_attributes, + .is_visible = amdgpu_vram_attrs_is_visible }; /** From bc8f6d42b1334f486980d57c8d12f3128d30c2e3 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 22 Jan 2024 17:38:23 +0800 Subject: [PATCH 173/347] drm/amdgpu: Fix null pointer dereference amdgpu_reg_state_sysfs_fini could be invoked at the time when asic_func is even not initialized, i.e., amdgpu_discovery_init fails for some reason. Signed-off-by: Hawking Zhang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/include/amdgpu_reg_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h index be519c8edf49..335980e2afbf 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h +++ b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h @@ -138,7 +138,7 @@ static inline size_t amdgpu_reginst_size(uint16_t num_inst, size_t inst_size, } #define amdgpu_asic_get_reg_state_supported(adev) \ - ((adev)->asic_funcs->get_reg_state ? 1 : 0) + (((adev)->asic_funcs && (adev)->asic_funcs->get_reg_state) ? 1 : 0) #define amdgpu_asic_get_reg_state(adev, state, buf, size) \ ((adev)->asic_funcs->get_reg_state ? \ From f1807682de0edbff6c1e46b19642a517d2e15c57 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 18 Jan 2024 14:25:35 +0530 Subject: [PATCH 174/347] drm/amd/pm: Fetch current power limit from FW Power limit of SMUv13.0.6 SOCs can be updated by out-of-band ways. Fetch the limit from firmware instead of using cached values. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.7.x --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index a54663f2e2ab..7ffad3eb0a01 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2703,6 +2703,7 @@ int smu_get_power_limit(void *handle, case SMU_PPT_LIMIT_CURRENT: switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 2): + case IP_VERSION(13, 0, 6): case IP_VERSION(11, 0, 7): case IP_VERSION(11, 0, 11): case IP_VERSION(11, 0, 12): From e7a8594cc2af920a905db15653c19c362d4ebd3f Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Wed, 17 Jan 2024 12:47:37 -0500 Subject: [PATCH 175/347] drm/amd/amdgpu: Assign GART pages to AMD device mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows kernel mapped pages like the PDB and PTB to be read via the iomem debugfs when there is no vram in the system. Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.7.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 73b8cca35bab..c623e23049d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -121,6 +121,7 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev) struct amdgpu_bo_param bp; dma_addr_t dma_addr; struct page *p; + unsigned long x; int ret; if (adev->gart.bo != NULL) @@ -130,6 +131,10 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev) if (!p) return -ENOMEM; + /* assign pages to this device */ + for (x = 0; x < (1UL << order); x++) + p[x].mapping = adev->mman.bdev.dev_mapping; + /* If the hardware does not support UTCL2 snooping of the CPU caches * then set_memory_wc() could be used as a workaround to mark the pages * as write combine memory. @@ -223,6 +228,7 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) unsigned int order = get_order(adev->gart.table_size); struct sg_table *sg = adev->gart.bo->tbo.sg; struct page *p; + unsigned long x; int ret; ret = amdgpu_bo_reserve(adev->gart.bo, false); @@ -234,6 +240,8 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) sg_free_table(sg); kfree(sg); p = virt_to_page(adev->gart.ptr); + for (x = 0; x < (1UL << order); x++) + p[x].mapping = NULL; __free_pages(p, order); adev->gart.ptr = NULL; From 03ff6d7238b77e5fb2b85dc5fe01d2db9eb893bd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 19 Jan 2024 12:23:55 -0500 Subject: [PATCH 176/347] drm/amdgpu/gfx10: set UNORD_DISPATCH in compute MQDs This needs to be set to 1 to avoid a potential deadlock in the GC 10.x and newer. On GC 9.x and older, this needs to be set to 0. This can lead to hangs in some mixed graphics and compute workloads. Updated firmware is also required for AQL. Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d63cab294883..ecb622b7f970 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6589,7 +6589,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 8b7fed913526..22cbfa1bdadd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -170,6 +170,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); From 3380fcad2c906872110d31ddf7aa1fdea57f9df6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 19 Jan 2024 12:32:59 -0500 Subject: [PATCH 177/347] drm/amdgpu/gfx11: set UNORD_DISPATCH in compute MQDs This needs to be set to 1 to avoid a potential deadlock in the GC 10.x and newer. On GC 9.x and older, this needs to be set to 0. This can lead to hangs in some mixed graphics and compute workloads. Updated firmware is also required for AQL. Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0ea0866c261f..d9cf9fd03d30 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -3846,7 +3846,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 15277f1d5cf0..d722cbd31783 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -224,6 +224,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); From ff8caade7429f28217c293672ab64323031f350e Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 7 Dec 2023 14:12:03 -0500 Subject: [PATCH 178/347] drm/amd/display: Allow IPS2 during Replay [Why & How] Add regkey to block video playback in IPS2 by default Allow idle optimizations in the same spot we allow Replay for video playback usecases. Avoid sending it when there's an external display connected by modifying the allow idle checks to check for active non-eDP screens. Reviewed-by: Charlene Liu Acked-by: Alex Hung Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 9 ++++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d4f525b66a09..5552d30a6d97 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1717,6 +1717,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL; + init_data.flags.disable_ips_in_vpb = 1; + /* Enable DWB for tested platforms only */ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) init_data.num_virtual_links = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 5d7aa882416b..c9317ea0258e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -434,6 +434,7 @@ struct dc_config { bool EnableMinDispClkODM; bool enable_auto_dpm_test_logs; unsigned int disable_ips; + unsigned int disable_ips_in_vpb; }; enum visual_confirm { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 9c806385ecbd..8b6c49622f3b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -680,7 +680,7 @@ void dcn35_power_down_on_boot(struct dc *dc) bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable) { struct dc_link *edp_links[MAX_NUM_EDP]; - int edp_num; + int i, edp_num; if (dc->debug.dmcub_emulation) return true; @@ -688,6 +688,13 @@ bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable) dc_get_edp_links(dc, edp_links, &edp_num); if (edp_num == 0 || edp_num > 1) return false; + + for (i = 0; i < dc->current_state->stream_count; ++i) { + struct dc_stream_state *stream = dc->current_state->streams[i]; + + if (!stream->dpms_off && !dc_is_embedded_signal(stream->signal)) + return false; + } } // TODO: review other cases when idle optimization is allowed From 955406e6fd241b2936e7f033a03b2956922c8f32 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 2 Jan 2024 12:02:39 -0500 Subject: [PATCH 179/347] drm/amd/display: Add Replay IPS register for DMUB command table - Introduce a new Replay mode for DMUB version 0.0.199.0 Reviewed-by: Martin Leung Acked-by: Alex Hung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index c64b6c848ef7..bcd3c361cca5 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -2832,6 +2832,7 @@ struct dmub_rb_cmd_psr_set_power_opt { #define REPLAY_RESIDENCY_MODE_MASK (0x1 << REPLAY_RESIDENCY_MODE_SHIFT) # define REPLAY_RESIDENCY_MODE_PHY (0x0 << REPLAY_RESIDENCY_MODE_SHIFT) # define REPLAY_RESIDENCY_MODE_ALPM (0x1 << REPLAY_RESIDENCY_MODE_SHIFT) +# define REPLAY_RESIDENCY_MODE_IPS 0x10 #define REPLAY_RESIDENCY_ENABLE_MASK (0x1 << REPLAY_RESIDENCY_ENABLE_SHIFT) # define REPLAY_RESIDENCY_DISABLE (0x0 << REPLAY_RESIDENCY_ENABLE_SHIFT) From 196107eb1e1557df25e1425bbfb53e0f7588b80a Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 9 Jan 2024 17:31:33 -0500 Subject: [PATCH 180/347] drm/amd/display: Add IPS checks before dcn register access [Why] With IPS enabled a system hangs once PSR is active. PSR active triggers transition to IPS2 state. While in IPS2 an access to dcn registers results in hard hang. Existing check doesn't cover for PSR sequence. [How] Safeguard register access by disabling idle optimization in atomic commit and crtc scanout. It will be re-enabled on next vblank. Reviewed-by: Nicholas Kazlauskas Acked-by: Roman Li Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5552d30a6d97..d8f85df2bc66 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -272,6 +272,7 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, { u32 v_blank_start, v_blank_end, h_position, v_position; struct amdgpu_crtc *acrtc = NULL; + struct dc *dc = adev->dm.dc; if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc)) return -EINVAL; @@ -284,6 +285,9 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, return 0; } + if (dc && dc->caps.ips_support && dc->idle_optimizations_allowed) + dc_allow_idle_optimizations(dc, false); + /* * TODO rework base driver to use values directly. * for now parse it back into reg-format @@ -8978,16 +8982,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) trace_amdgpu_dm_atomic_commit_tail_begin(state); - if (dm->dc->caps.ips_support) { - for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { - if (new_con_state->crtc && - new_con_state->crtc->state->active && - drm_atomic_crtc_needs_modeset(new_con_state->crtc->state)) { - dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false); - break; - } - } - } + if (dm->dc->caps.ips_support && dm->dc->idle_optimizations_allowed) + dc_allow_idle_optimizations(dm->dc, false); drm_atomic_helper_update_legacy_modeset_state(dev, state); drm_dp_mst_atomic_wait_for_dependencies(state); From f37f7979202d45489d84469838f5352cda3557bc Mon Sep 17 00:00:00 2001 From: ChunTao Tso Date: Mon, 8 Jan 2024 13:46:59 +0800 Subject: [PATCH 181/347] drm/amd/display: Replay + IPS + ABM in Full Screen VPB [Why] Because ABM will wait VStart to start getting histogram data, it will cause we can't enter IPS while full screnn video playing. [How] Modify the panel refresh rate to the maximun multiple of current refresh rate. Reviewed-by: Dennis Chan Acked-by: Roman Li Signed-off-by: ChunTao Tso Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 5 ++ .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 46 +++++++++++++++++++ .../amd/display/modules/power/power_helpers.c | 5 ++ .../amd/display/modules/power/power_helpers.h | 1 + 4 files changed, 57 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index b08ccb8c68bc..9900dda2eef5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1034,6 +1034,7 @@ enum replay_FW_Message_type { Replay_Msg_Not_Support = -1, Replay_Set_Timing_Sync_Supported, Replay_Set_Residency_Frameupdate_Timer, + Replay_Set_Pseudo_VTotal, }; union replay_error_status { @@ -1089,6 +1090,10 @@ struct replay_settings { uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM]; /* Maximum link off frame count */ enum replay_link_off_frame_count_level link_off_frame_count_level; + /* Replay pseudo vtotal for abm + ips on full screen video which can improve ips residency */ + uint16_t abm_with_ips_on_full_screen_video_pseudo_vtotal; + /* Replay last pseudo vtotal set to DMUB */ + uint16_t last_pseudo_vtotal; }; /* To split out "global" and "per-panel" config settings. diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index bcd3c361cca5..e699731ee68e 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -2895,6 +2895,10 @@ enum dmub_cmd_replay_type { * Set Residency Frameupdate Timer. */ DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER = 6, + /** + * Set pseudo vtotal + */ + DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL = 7, }; /** @@ -3077,6 +3081,26 @@ struct dmub_cmd_replay_set_timing_sync_data { uint8_t pad[2]; }; +/** + * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command. + */ +struct dmub_cmd_replay_set_pseudo_vtotal { + /** + * Panel Instance. + * Panel isntance to identify which replay_state to use + * Currently the support is only for 0 or 1 + */ + uint8_t panel_inst; + /** + * Source Vtotal that Replay + IPS + ABM full screen video src vtotal + */ + uint16_t vtotal; + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad; +}; + /** * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command. */ @@ -3157,6 +3181,20 @@ struct dmub_rb_cmd_replay_set_timing_sync { struct dmub_cmd_replay_set_timing_sync_data replay_set_timing_sync_data; }; +/** + * Definition of a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command. + */ +struct dmub_rb_cmd_replay_set_pseudo_vtotal { + /** + * Command header. + */ + struct dmub_cmd_header header; + /** + * Definition of DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command. + */ + struct dmub_cmd_replay_set_pseudo_vtotal data; +}; + /** * Data passed from driver to FW in DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command. */ @@ -3208,6 +3246,10 @@ union dmub_replay_cmd_set { * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command data. */ struct dmub_cmd_replay_frameupdate_timer_data timer_data; + /** + * Definition of DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command data. + */ + struct dmub_cmd_replay_set_pseudo_vtotal pseudo_vtotal_data; }; /** @@ -4359,6 +4401,10 @@ union dmub_rb_cmd { * Definition of a DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command. */ struct dmub_rb_cmd_replay_set_frameupdate_timer replay_set_frameupdate_timer; + /** + * Definition of a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command. + */ + struct dmub_rb_cmd_replay_set_pseudo_vtotal replay_set_pseudo_vtotal; }; /** diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index ad98e504c00d..e304e8435fb8 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -980,6 +980,11 @@ void set_replay_coasting_vtotal(struct dc_link *link, link->replay_settings.coasting_vtotal_table[type] = vtotal; } +void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal) +{ + link->replay_settings.abm_with_ips_on_full_screen_video_pseudo_vtotal = vtotal; +} + void calculate_replay_link_off_frame_count(struct dc_link *link, uint16_t vtotal, uint16_t htotal) { diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index c17bbc6fb38c..bef4815e1703 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -57,6 +57,7 @@ void init_replay_config(struct dc_link *link, struct replay_config *pr_config); void set_replay_coasting_vtotal(struct dc_link *link, enum replay_coasting_vtotal_type type, uint16_t vtotal); +void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal); void calculate_replay_link_off_frame_count(struct dc_link *link, uint16_t vtotal, uint16_t htotal); From 8894b9283afd35b8d22ae07a0c118eb5f7d2e78b Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 22 Jan 2024 17:45:41 -0500 Subject: [PATCH 182/347] drm/amd/display: Disable ips before dc interrupt setting [Why] While in IPS2 an access to dcn registers is not allowed. If interrupt results in dc call, we should disable IPS. [How] Safeguard register access in IPS2 by disabling idle optimization before calling dc interrupt setting api. Signed-off-by: Roman Li Tested-by: Mark Broadworth Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 58b880acb087..3390f0d8420a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -711,7 +711,7 @@ static inline int dm_irq_state(struct amdgpu_device *adev, { bool st; enum dc_irq_source irq_source; - + struct dc *dc = adev->dm.dc; struct amdgpu_crtc *acrtc = adev->mode_info.crtcs[crtc_id]; if (!acrtc) { @@ -729,6 +729,9 @@ static inline int dm_irq_state(struct amdgpu_device *adev, st = (state == AMDGPU_IRQ_STATE_ENABLE); + if (dc && dc->caps.ips_support && dc->idle_optimizations_allowed) + dc_allow_idle_optimizations(dc, false); + dc_interrupt_set(adev->dm.dc, irq_source, st); return 0; } From d45669eb5e68c052d0d890cd88c33a65c115d9f3 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 23 Jan 2024 15:14:28 -0500 Subject: [PATCH 183/347] drm/amd: Add a DC debug mask for IPS For debugging IPS-related issues, expose a new debug mask that allows to disable IPS. Usage: amdgpu.dcdebugmask=0x800 Signed-off-by: Roman Li Tested-by: Mark Broadworth Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/amd_shared.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 1dc5dd9b7bf7..df2c7ffe190f 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -258,6 +258,7 @@ enum DC_DEBUG_MASK { DC_ENABLE_DML2 = 0x100, DC_DISABLE_PSR_SU = 0x200, DC_DISABLE_REPLAY = 0x400, + DC_DISABLE_IPS = 0x800, }; enum amd_dpm_forced_level; From c82eb25c5f005b33aebb1415a8472fc2eeea0deb Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 23 Jan 2024 15:18:24 -0500 Subject: [PATCH 184/347] drm/amd/display: "Enable IPS by default" [Why] IPS was temporary disabled due to instability. It was fixed in dmub firmware and with: - "drm/amd/display: Add IPS checks before dcn register access" - "drm/amd/display: Disable ips before dc interrupt setting" [How] Enable IPS by default. Disable IPS if 0x800 bit set in amdgpu.dcdebugmask module params Signed-off-by: Roman Li Tested-by: Mark Broadworth Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d8f85df2bc66..6cda5b536362 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1719,7 +1719,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.nbio_reg_offsets = adev->reg_offset[NBIO_HWIP][0]; init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0]; - init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL; + if (amdgpu_dc_debug_mask & DC_DISABLE_IPS) + init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL; init_data.flags.disable_ips_in_vpb = 1; From 8c2ae772fe08e33f3d7a83849e85539320701abd Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 25 Jan 2024 14:53:09 -0600 Subject: [PATCH 185/347] spi: fix finalize message on error return In __spi_pump_transfer_message(), the message was not finalized in the first error return as it is in the other error return paths. Not finalizing the message could cause anything waiting on the message to complete to hang forever. This adds the missing call to spi_finalize_current_message(). Fixes: ae7d2346dc89 ("spi: Don't use the message queue if possible in spi_sync") Signed-off-by: David Lechner Link: https://msgid.link/r/20240125205312.3458541-2-dlechner@baylibre.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 7477a11e12be..f2170f4b5077 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1717,6 +1717,10 @@ static int __spi_pump_transfer_message(struct spi_controller *ctlr, pm_runtime_put_noidle(ctlr->dev.parent); dev_err(&ctlr->dev, "Failed to power device: %d\n", ret); + + msg->status = ret; + spi_finalize_current_message(ctlr); + return ret; } } From b64787840080bdbd048bb9c68222ad17236cbd7e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 24 Jan 2024 11:25:50 -0800 Subject: [PATCH 186/347] selftests: tcp_ao: add a config file Still a bit unclear whether each directory should have its own config file, but assuming they should lets add one for tcp_ao. The following tests still fail with this config in place: - rst_ipv4, - rst_ipv6, - bench-lookups_ipv6. other 21 pass. Fixes: d11301f65977 ("selftests/net: Add TCP-AO ICMPs accept test") Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20240124192550.1865743-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tcp_ao/config | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 tools/testing/selftests/net/tcp_ao/config diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config new file mode 100644 index 000000000000..d3277a9de987 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/config @@ -0,0 +1,10 @@ +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_RMD160=y +CONFIG_CRYPTO_SHA1=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=y +CONFIG_TCP_AO=y +CONFIG_TCP_MD5SIG=y +CONFIG_VETH=m From 98cb12eb52a780e682bea8372fdb2912c08132dd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 24 Jan 2024 22:33:20 +0100 Subject: [PATCH 187/347] selftests: net: remove dependency on ebpf tests Several net tests requires an XDP program build under the ebpf directory, and error out if such program is not available. That makes running successful net test hard, let's duplicate into the net dir the [very small] program, re-using the existing rules to build it, and finally dropping the bogus dependency. Signed-off-by: Paolo Abeni Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/28e7af7c031557f691dc8045ee41dd549dd5e74c.1706131762.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 5 +++-- tools/testing/selftests/net/udpgro.sh | 4 ++-- tools/testing/selftests/net/udpgro_bench.sh | 4 ++-- tools/testing/selftests/net/udpgro_frglist.sh | 6 +++--- tools/testing/selftests/net/udpgro_fwd.sh | 2 +- tools/testing/selftests/net/veth.sh | 4 ++-- tools/testing/selftests/net/xdp_dummy.c | 13 +++++++++++++ 7 files changed, 26 insertions(+), 12 deletions(-) create mode 100644 tools/testing/selftests/net/xdp_dummy.c diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 50818075e566..304d8b852ef0 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -84,6 +84,7 @@ TEST_PROGS += sctp_vrf.sh TEST_GEN_FILES += sctp_hello TEST_GEN_FILES += csum TEST_GEN_FILES += nat6to4.o +TEST_GEN_FILES += xdp_dummy.o TEST_GEN_FILES += ip_local_port_range TEST_GEN_FILES += bind_wildcard TEST_PROGS += test_vxlan_mdb.sh @@ -104,7 +105,7 @@ $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread $(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/ -# Rules to generate bpf obj nat6to4.o +# Rules to generate bpf objs CLANG ?= clang SCRATCH_DIR := $(OUTPUT)/tools BUILD_DIR := $(SCRATCH_DIR)/build @@ -139,7 +140,7 @@ endif CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) -$(OUTPUT)/nat6to4.o: nat6to4.c $(BPFOBJ) | $(MAKE_DIRS) +$(OUTPUT)/nat6to4.o $(OUTPUT)/xdp_dummy.o: $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS) $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index af5dc57c8ce9..8802604148dd 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" # set global exit status, but never reset nonzero one. check_err() @@ -197,7 +197,7 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index cb664679b434..7080eae5312b 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" cleanup() { local -r jobs="$(jobs -p)" @@ -84,7 +84,7 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index dd47fa96f6b3..e1ff645bd3d1 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" cleanup() { local -r jobs="$(jobs -p)" @@ -85,12 +85,12 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi if [ ! -f nat6to4.o ]; then - echo "Missing nat6to4 helper. Build bpf nat6to4.o selftest first" + echo "Missing nat6to4 helper. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index c079565add39..5fa8659ab13d 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 2d073595c620..27574bbf2d63 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 @@ -218,7 +218,7 @@ while getopts "hs:" option; do done if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit 1 fi diff --git a/tools/testing/selftests/net/xdp_dummy.c b/tools/testing/selftests/net/xdp_dummy.c new file mode 100644 index 000000000000..d988b2e0cee8 --- /dev/null +++ b/tools/testing/selftests/net/xdp_dummy.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include +#include + +SEC("xdp") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; From f5173fe3e13b2cbd25d0d73f40acd923d75add55 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 24 Jan 2024 22:33:21 +0100 Subject: [PATCH 188/347] selftests: net: included needed helper in the install targets The blamed commit below introduce a dependency in some net self-tests towards a newly introduce helper script. Such script is currently not included into the TEST_PROGS_EXTENDED list and thus is not installed, causing failure for the relevant tests when executed from the install dir. Fix the issue updating the install targets. Fixes: 3bdd9fd29cb0 ("selftests/net: synchronize udpgro tests' tx and rx connection") Signed-off-by: Paolo Abeni Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/076e8758e21ff2061cc9f81640e7858df775f0a9.1706131762.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 304d8b852ef0..48c6f93b8149 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -55,6 +55,7 @@ TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh lib.sh +TEST_PROGS_EXTENDED += net_helper.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite From 4acffb66630a0e4800880baa61a54ef18047ccd3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 24 Jan 2024 22:33:22 +0100 Subject: [PATCH 189/347] selftests: net: explicitly wait for listener ready The UDP GRO forwarding test still hard-code an arbitrary pause to wait for the UDP listener becoming ready in background. That causes sporadic failures depending on the host load. Replace the sleep with the existing helper waiting for the desired port being exposed. Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Paolo Abeni Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/4d58900fb09cef42749cfcf2ad7f4b91a97d225c.1706131762.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgro_fwd.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 5fa8659ab13d..d6b9c759043c 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +source net_helper.sh + BPF_FILE="xdp_dummy.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 @@ -119,7 +121,7 @@ run_test() { ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000 ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args & local spid=$! - sleep 0.1 + wait_local_port_listen "$NS_DST" 8000 udp ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst local retc=$? wait $spid @@ -168,7 +170,7 @@ run_bench() { ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus" ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 & local spid=$! - sleep 0.1 + wait_local_port_listen "$NS_DST" 8000 udp ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst local retc=$? wait $spid From 39b383d77961a544d896242051168b8129cf5be7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 24 Jan 2024 15:36:30 -0800 Subject: [PATCH 190/347] selftests: tcp_ao: set the timeout to 2 minutes The default timeout for tests is 45sec, bench-lookups_ipv6 seems to take around 50sec when running in a VM without HW acceleration. Give it a 2x margin and set the timeout to 120sec. Fixes: d1066c9c58d4 ("selftests/net: Add test/benchmark for removing MKTs") Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20240124233630.1977708-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tcp_ao/settings | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/net/tcp_ao/settings diff --git a/tools/testing/selftests/net/tcp_ao/settings b/tools/testing/selftests/net/tcp_ao/settings new file mode 100644 index 000000000000..6091b45d226b --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/settings @@ -0,0 +1 @@ +timeout=120 From d2fda304bb739b97c1a3e46e39700eb49f07a62c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 24 Jan 2024 17:26:33 -0500 Subject: [PATCH 191/347] bcachefs: __lookup_dirent() works in snapshot, not subvol Add a new helper, bch2_hash_lookup_in_snapshot(), for when we're not operating in a subvolume and already have a snapshot ID, and then use it in lookup_lostfound() -> __lookup_dirent(). This is a bugfix - lookup_lostfound() doesn't take a subvolume ID, we were passing a nonsense subvolume ID before, and don't have one to pass since we may be operating in an interior snapshot node that doesn't have a subvolume ID. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 23 ++++++++++++----------- fs/bcachefs/str_hash.h | 22 +++++++++++++++------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 4f0ecd605675..6a760777bafb 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -119,22 +119,19 @@ static int lookup_inode(struct btree_trans *trans, u64 inode_nr, if (!ret) *snapshot = iter.pos.snapshot; err: - bch_err_msg(trans->c, ret, "fetching inode %llu:%u", inode_nr, *snapshot); bch2_trans_iter_exit(trans, &iter); return ret; } -static int __lookup_dirent(struct btree_trans *trans, +static int lookup_dirent_in_snapshot(struct btree_trans *trans, struct bch_hash_info hash_info, subvol_inum dir, struct qstr *name, - u64 *target, unsigned *type) + u64 *target, unsigned *type, u32 snapshot) { struct btree_iter iter; struct bkey_s_c_dirent d; - int ret; - - ret = bch2_hash_lookup(trans, &iter, bch2_dirent_hash_desc, - &hash_info, dir, name, 0); + int ret = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc, + &hash_info, dir, name, 0, snapshot); if (ret) return ret; @@ -225,15 +222,16 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, struct bch_inode_unpacked root_inode; struct bch_hash_info root_hash_info; - ret = lookup_inode(trans, root_inum.inum, &root_inode, &snapshot); + u32 root_inode_snapshot = snapshot; + ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); bch_err_msg(c, ret, "looking up root inode"); if (ret) return ret; root_hash_info = bch2_hash_info_init(c, &root_inode); - ret = __lookup_dirent(trans, root_hash_info, root_inum, - &lostfound_str, &inum, &d_type); + ret = lookup_dirent_in_snapshot(trans, root_hash_info, root_inum, + &lostfound_str, &inum, &d_type, snapshot); if (bch2_err_matches(ret, ENOENT)) goto create_lostfound; @@ -250,7 +248,10 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, * The bch2_check_dirents pass has already run, dangling dirents * shouldn't exist here: */ - return lookup_inode(trans, inum, lostfound, &snapshot); + ret = lookup_inode(trans, inum, lostfound, &snapshot); + bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", + inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); + return ret; create_lostfound: /* diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 89fdb7c21134..fcaa5a888744 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -160,21 +160,16 @@ static inline bool is_visible_key(struct bch_hash_desc desc, subvol_inum inum, s } static __always_inline int -bch2_hash_lookup(struct btree_trans *trans, +bch2_hash_lookup_in_snapshot(struct btree_trans *trans, struct btree_iter *iter, const struct bch_hash_desc desc, const struct bch_hash_info *info, subvol_inum inum, const void *key, - unsigned flags) + unsigned flags, u32 snapshot) { struct bkey_s_c k; - u32 snapshot; int ret; - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); - if (ret) - return ret; - for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, SPOS(inum.inum, desc.hash_key(info, key), snapshot), POS(inum.inum, U64_MAX), @@ -194,6 +189,19 @@ bch2_hash_lookup(struct btree_trans *trans, return ret ?: -BCH_ERR_ENOENT_str_hash_lookup; } +static __always_inline int +bch2_hash_lookup(struct btree_trans *trans, + struct btree_iter *iter, + const struct bch_hash_desc desc, + const struct bch_hash_info *info, + subvol_inum inum, const void *key, + unsigned flags) +{ + u32 snapshot; + return bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot) ?: + bch2_hash_lookup_in_snapshot(trans, iter, desc, info, inum, key, flags, snapshot); +} + static __always_inline int bch2_hash_hole(struct btree_trans *trans, struct btree_iter *iter, From fc836129f708407502632107e58d48f54b1caf75 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 24 Jan 2024 14:13:44 +0800 Subject: [PATCH 192/347] selftests/net/lib: update busywait timeout value The busywait timeout value is a millisecond, not a second. So the current setting 2 is too small. On slow/busy host (or VMs) the current timeout can expire even on "correct" execution, causing random failures. Let's copy the WAIT_TIMEOUT from forwarding/lib.sh and set BUSYWAIT_TIMEOUT here. Fixes: 25ae948b4478 ("selftests/net: add lib.sh") Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240124061344.1864484-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index dca549443801..f9fe182dfbd4 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -4,6 +4,9 @@ ############################################################################## # Defines +WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} +BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 # namespace list created by setup_ns @@ -48,7 +51,7 @@ cleanup_ns() for ns in "$@"; do ip netns delete "${ns}" &> /dev/null - if ! busywait 2 ip netns list \| grep -vq "^$ns$" &> /dev/null; then + if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then echo "Warn: Failed to remove namespace $ns" ret=1 fi From 534326711000c318fe1523c77308450522baa499 Mon Sep 17 00:00:00 2001 From: Praveen Kaligineedi Date: Wed, 24 Jan 2024 08:10:25 -0800 Subject: [PATCH 193/347] gve: Fix skb truesize underestimation For a skb frag with a newly allocated copy page, the true size is incorrectly set to packet buffer size. It should be set to PAGE_SIZE instead. Fixes: 82fd151d38d9 ("gve: Reduce alloc and copy costs in the GQ rx path") Signed-off-by: Praveen Kaligineedi Link: https://lore.kernel.org/r/20240124161025.1819836-1-pkaligineedi@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_rx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 7a8dc5386fff..76615d47e055 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -356,7 +356,7 @@ static enum pkt_hash_types gve_rss_type(__be16 pkt_flags) static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, struct gve_rx_slot_page_info *page_info, - u16 packet_buffer_size, u16 len, + unsigned int truesize, u16 len, struct gve_rx_ctx *ctx) { u32 offset = page_info->page_offset + page_info->pad; @@ -389,10 +389,10 @@ static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, if (skb != ctx->skb_head) { ctx->skb_head->len += len; ctx->skb_head->data_len += len; - ctx->skb_head->truesize += packet_buffer_size; + ctx->skb_head->truesize += truesize; } skb_add_rx_frag(skb, num_frags, page_info->page, - offset, len, packet_buffer_size); + offset, len, truesize); return ctx->skb_head; } @@ -486,7 +486,7 @@ static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx, memcpy(alloc_page_info.page_address, src, page_info->pad + len); skb = gve_rx_add_frags(napi, &alloc_page_info, - rx->packet_buffer_size, + PAGE_SIZE, len, ctx); u64_stats_update_begin(&rx->statss); From cefa98e806fd4e2a5e2047457a11ae5f17b8f621 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 24 Jan 2024 17:19:08 +0200 Subject: [PATCH 194/347] nfp: flower: add hardware offload check for post ct entry The nfp offload flow pay will not allocate a mask id when the out port is openvswitch internal port. This is because these flows are used to configure the pre_tun table and are never actually send to the firmware as an add-flow message. When a tc rule which action contains ct and the post ct entry's out port is openvswitch internal port, the merge offload flow pay with the wrong mask id of 0 will be send to the firmware. Actually, the nfp can not support hardware offload for this situation, so return EOPNOTSUPP. Fixes: bd0fe7f96a3c ("nfp: flower-ct: add zone table entry when handling pre/post_ct flows") CC: stable@vger.kernel.org # 5.14+ Signed-off-by: Hui Zhou Signed-off-by: Louis Peens Link: https://lore.kernel.org/r/20240124151909.31603-2-louis.peens@corigine.com Signed-off-by: Jakub Kicinski --- .../ethernet/netronome/nfp/flower/conntrack.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 2967bab72505..726d8cdf0b9c 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1864,10 +1864,30 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, { struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct nfp_fl_ct_flow_entry *ct_entry; + struct flow_action_entry *ct_goto; struct nfp_fl_ct_zone_entry *zt; + struct flow_action_entry *act; bool wildcarded = false; struct flow_match_ct ct; - struct flow_action_entry *ct_goto; + int i; + + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_REDIRECT_INGRESS: + case FLOW_ACTION_MIRRED: + case FLOW_ACTION_MIRRED_INGRESS: + if (act->dev->rtnl_link_ops && + !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: out port is openvswitch internal port"); + return -EOPNOTSUPP; + } + break; + default: + break; + } + } flow_rule_match_ct(rule, &ct); if (!ct.mask->ct_zone) { From 3a007b8009b5f8af021021b7a590a6da0dc4c6e0 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 24 Jan 2024 17:19:09 +0200 Subject: [PATCH 195/347] nfp: flower: fix hardware offload for the transfer layer port The nfp driver will merge the tp source port and tp destination port into one dword which the offset must be zero to do hardware offload. However, the mangle action for the tp source port and tp destination port is separated for tc ct action. Modify the mangle action for the FLOW_ACT_MANGLE_HDR_TYPE_TCP and FLOW_ACT_MANGLE_HDR_TYPE_UDP to satisfy the nfp driver offload check for the tp port. The mangle action provides a 4B value for source, and a 4B value for the destination, but only 2B of each contains the useful information. For offload the 2B of each is combined into a single 4B word. Since the incoming mask for the source is '0xFFFF' the shift-left will throw away the 0xFFFF part. When this gets combined together in the offload it will clear the destination field. Fix this by setting the lower bits back to 0xFFFF, effectively doing a rotate-left operation on the mask. Fixes: 5cee92c6f57a ("nfp: flower: support hw offload for ct nat action") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Hui Zhou Signed-off-by: Louis Peens Link: https://lore.kernel.org/r/20240124151909.31603-3-louis.peens@corigine.com Signed-off-by: Jakub Kicinski --- .../ethernet/netronome/nfp/flower/conntrack.c | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 726d8cdf0b9c..15180538b80a 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1424,10 +1424,30 @@ static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_ mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask); return; + /* Both struct tcphdr and struct udphdr start with + * __be16 source; + * __be16 dest; + * so we can use the same code for both. + */ case FLOW_ACT_MANGLE_HDR_TYPE_TCP: case FLOW_ACT_MANGLE_HDR_TYPE_UDP: - mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val); - mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask); + if (mangle_action->mangle.offset == offsetof(struct tcphdr, source)) { + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val << 16); + /* The mask of mangle action is inverse mask, + * so clear the dest tp port with 0xFFFF to + * instead of rotate-left operation. + */ + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask << 16 | 0xFFFF); + } + if (mangle_action->mangle.offset == offsetof(struct tcphdr, dest)) { + mangle_action->mangle.offset = 0; + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val); + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask); + } return; default: From cae1f1c36661f28c92a1db9113961a9ebd61dbaa Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 24 Jan 2024 16:22:09 +0000 Subject: [PATCH 196/347] net: ethernet: mtk_eth_soc: set DMA coherent mask to get PPE working Set DMA coherent mask to 32-bit which makes PPE offloading engine start working on BPi-R4 which got 4 GiB of RAM. Fixes: 2d75891ebc09 ("net: ethernet: mtk_eth_soc: support 36-bit DMA addressing on MT7988") Suggested-by: Elad Yifee Signed-off-by: Daniel Golle Link: https://lore.kernel.org/r/97e90925368b405f0974b9b15f1b7377c4a329ad.1706113251.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index a6e91573f8da..de123350bd46 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4761,7 +4761,10 @@ static int mtk_probe(struct platform_device *pdev) } if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA)) { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(36)); + err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(36)); + if (!err) + err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { dev_err(&pdev->dev, "Wrong DMA config\n"); return -EINVAL; From ff63cc2e95065bea978d2db01f7e7356cca3d021 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 24 Jan 2024 05:18:23 +0000 Subject: [PATCH 197/347] net: phy: mediatek-ge-soc: sync driver with MediaTek SDK Sync initialization and calibration routines with MediaTek's reference driver. Improves compliance and resolves link stability issues with CH340 IoT devices connected to MT798x built-in PHYs. Fixes: 98c485eaf509 ("net: phy: add driver for MediaTek SoC built-in GE PHYs") Signed-off-by: Daniel Golle Link: https://lore.kernel.org/r/f2195279c234c0f618946424b8236026126bc595.1706071311.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/mediatek-ge-soc.c | 147 ++++++++++++++++-------------- 1 file changed, 81 insertions(+), 66 deletions(-) diff --git a/drivers/net/phy/mediatek-ge-soc.c b/drivers/net/phy/mediatek-ge-soc.c index 8a20d9889f10..0f3a1538a8b8 100644 --- a/drivers/net/phy/mediatek-ge-soc.c +++ b/drivers/net/phy/mediatek-ge-soc.c @@ -489,7 +489,7 @@ static int tx_r50_fill_result(struct phy_device *phydev, u16 tx_r50_cal_val, u16 reg, val; if (phydev->drv->phy_id == MTK_GPHY_ID_MT7988) - bias = -2; + bias = -1; val = clamp_val(bias + tx_r50_cal_val, 0, 63); @@ -705,6 +705,11 @@ restore: static void mt798x_phy_common_finetune(struct phy_device *phydev) { phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); + /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */ + __phy_write(phydev, 0x11, 0xc71); + __phy_write(phydev, 0x12, 0xc); + __phy_write(phydev, 0x10, 0x8fae); + /* EnabRandUpdTrig = 1 */ __phy_write(phydev, 0x11, 0x2f00); __phy_write(phydev, 0x12, 0xe); @@ -715,15 +720,56 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev) __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x83aa); - /* TrFreeze = 0 */ + /* FfeUpdGainForce = 1(Enable), FfeUpdGainForceVal = 4 */ + __phy_write(phydev, 0x11, 0x240); + __phy_write(phydev, 0x12, 0x0); + __phy_write(phydev, 0x10, 0x9680); + + /* TrFreeze = 0 (mt7988 default) */ __phy_write(phydev, 0x11, 0x0); __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x9686); + /* SSTrKp100 = 5 */ + /* SSTrKf100 = 6 */ + /* SSTrKp1000Mas = 5 */ + /* SSTrKf1000Mas = 6 */ /* SSTrKp1000Slv = 5 */ + /* SSTrKf1000Slv = 6 */ __phy_write(phydev, 0x11, 0xbaef); __phy_write(phydev, 0x12, 0x2e); __phy_write(phydev, 0x10, 0x968c); + phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); +} + +static void mt7981_phy_finetune(struct phy_device *phydev) +{ + u16 val[8] = { 0x01ce, 0x01c1, + 0x020f, 0x0202, + 0x03d0, 0x03c0, + 0x0013, 0x0005 }; + int i, k; + + /* 100M eye finetune: + * Keep middle level of TX MLT3 shapper as default. + * Only change TX MLT3 overshoot level here. + */ + for (k = 0, i = 1; i < 12; i++) { + if (i % 3 == 0) + continue; + phy_write_mmd(phydev, MDIO_MMD_VEND1, i, val[k++]); + } + + phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); + /* ResetSyncOffset = 6 */ + __phy_write(phydev, 0x11, 0x600); + __phy_write(phydev, 0x12, 0x0); + __phy_write(phydev, 0x10, 0x8fc0); + + /* VgaDecRate = 1 */ + __phy_write(phydev, 0x11, 0x4c2a); + __phy_write(phydev, 0x12, 0x3e); + __phy_write(phydev, 0x10, 0x8fa4); /* MrvlTrFix100Kp = 3, MrvlTrFix100Kf = 2, * MrvlTrFix1000Kp = 3, MrvlTrFix1000Kf = 2 @@ -738,7 +784,7 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev) __phy_write(phydev, 0x10, 0x8ec0); phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); - /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9*/ + /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9 */ phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG234, MTK_PHY_TR_OPEN_LOOP_EN_MASK | MTK_PHY_LPF_X_AVERAGE_MASK, BIT(0) | FIELD_PREP(MTK_PHY_LPF_X_AVERAGE_MASK, 0x9)); @@ -771,48 +817,6 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_LDO_OUTPUT_V, 0x2222); } -static void mt7981_phy_finetune(struct phy_device *phydev) -{ - u16 val[8] = { 0x01ce, 0x01c1, - 0x020f, 0x0202, - 0x03d0, 0x03c0, - 0x0013, 0x0005 }; - int i, k; - - /* 100M eye finetune: - * Keep middle level of TX MLT3 shapper as default. - * Only change TX MLT3 overshoot level here. - */ - for (k = 0, i = 1; i < 12; i++) { - if (i % 3 == 0) - continue; - phy_write_mmd(phydev, MDIO_MMD_VEND1, i, val[k++]); - } - - phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */ - __phy_write(phydev, 0x11, 0xc71); - __phy_write(phydev, 0x12, 0xc); - __phy_write(phydev, 0x10, 0x8fae); - - /* ResetSyncOffset = 6 */ - __phy_write(phydev, 0x11, 0x600); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x8fc0); - - /* VgaDecRate = 1 */ - __phy_write(phydev, 0x11, 0x4c2a); - __phy_write(phydev, 0x12, 0x3e); - __phy_write(phydev, 0x10, 0x8fa4); - - /* FfeUpdGainForce = 4 */ - __phy_write(phydev, 0x11, 0x240); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x9680); - - phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); -} - static void mt7988_phy_finetune(struct phy_device *phydev) { u16 val[12] = { 0x0187, 0x01cd, 0x01c8, 0x0182, @@ -827,17 +831,7 @@ static void mt7988_phy_finetune(struct phy_device *phydev) /* TCT finetune */ phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_TX_FILTER, 0x5); - /* Disable TX power saving */ - phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7, - MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3 << 8); - phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - - /* SlvDSPreadyTime = 24, MasDSPreadyTime = 12 */ - __phy_write(phydev, 0x11, 0x671); - __phy_write(phydev, 0x12, 0xc); - __phy_write(phydev, 0x10, 0x8fae); - /* ResetSyncOffset = 5 */ __phy_write(phydev, 0x11, 0x500); __phy_write(phydev, 0x12, 0x0); @@ -845,13 +839,27 @@ static void mt7988_phy_finetune(struct phy_device *phydev) /* VgaDecRate is 1 at default on mt7988 */ + /* MrvlTrFix100Kp = 6, MrvlTrFix100Kf = 7, + * MrvlTrFix1000Kp = 6, MrvlTrFix1000Kf = 7 + */ + __phy_write(phydev, 0x11, 0xb90a); + __phy_write(phydev, 0x12, 0x6f); + __phy_write(phydev, 0x10, 0x8f82); + + /* RemAckCntLimitCtrl = 1 */ + __phy_write(phydev, 0x11, 0xfbba); + __phy_write(phydev, 0x12, 0xc3); + __phy_write(phydev, 0x10, 0x87f8); + phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); - phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_2A30); - /* TxClkOffset = 2 */ - __phy_modify(phydev, MTK_PHY_ANARG_RG, MTK_PHY_TCLKOFFSET_MASK, - FIELD_PREP(MTK_PHY_TCLKOFFSET_MASK, 0x2)); - phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); + /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 10 */ + phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG234, + MTK_PHY_TR_OPEN_LOOP_EN_MASK | MTK_PHY_LPF_X_AVERAGE_MASK, + BIT(0) | FIELD_PREP(MTK_PHY_LPF_X_AVERAGE_MASK, 0xa)); + + /* rg_tr_lpf_cnt_val = 1023 */ + phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_LPF_CNT_VAL, 0x3ff); } static void mt798x_phy_eee(struct phy_device *phydev) @@ -884,11 +892,11 @@ static void mt798x_phy_eee(struct phy_device *phydev) MTK_PHY_LPI_SLV_SEND_TX_EN, FIELD_PREP(MTK_PHY_LPI_SLV_SEND_TX_TIMER_MASK, 0x120)); - phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG239, - MTK_PHY_LPI_SEND_LOC_TIMER_MASK | - MTK_PHY_LPI_TXPCS_LOC_RCV, - FIELD_PREP(MTK_PHY_LPI_SEND_LOC_TIMER_MASK, 0x117)); + /* Keep MTK_PHY_LPI_SEND_LOC_TIMER as 375 */ + phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG239, + MTK_PHY_LPI_TXPCS_LOC_RCV); + /* This also fixes some IoT issues, such as CH340 */ phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG2C7, MTK_PHY_MAX_GAIN_MASK | MTK_PHY_MIN_GAIN_MASK, FIELD_PREP(MTK_PHY_MAX_GAIN_MASK, 0x8) | @@ -922,7 +930,7 @@ static void mt798x_phy_eee(struct phy_device *phydev) __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x9690); - /* REG_EEE_st2TrKf1000 = 3 */ + /* REG_EEE_st2TrKf1000 = 2 */ __phy_write(phydev, 0x11, 0x114f); __phy_write(phydev, 0x12, 0x2); __phy_write(phydev, 0x10, 0x969a); @@ -947,7 +955,7 @@ static void mt798x_phy_eee(struct phy_device *phydev) __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x96b8); - /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 1 */ + /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 0 */ __phy_write(phydev, 0x11, 0x1463); __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x96ca); @@ -1459,6 +1467,13 @@ static int mt7988_phy_probe(struct phy_device *phydev) if (err) return err; + /* Disable TX power saving at probing to: + * 1. Meet common mode compliance test criteria + * 2. Make sure that TX-VCM calibration works fine + */ + phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7, + MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3 << 8); + return mt798x_phy_calibration(phydev); } From 66dbd9004a55073c5931f5f65f5fe2bbd414bdaa Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Jan 2024 13:08:11 -0800 Subject: [PATCH 198/347] drm/sched: Drain all entities in DRM sched run job worker All entities must be drained in the DRM scheduler run job worker to avoid the following case. An entity found that is ready, no job found ready on entity, and run job worker goes idle with other entities + jobs ready. Draining all ready entities (i.e. loop over all ready entities) in the run job worker ensures all job that are ready will be scheduled. Cc: Thorsten Leemhuis Reported-by: Mikhail Gavrilov Closes: https://lore.kernel.org/all/CABXGCsM2VLs489CH-vF-1539-s3in37=bwuOWtoeeE+q26zE+Q@mail.gmail.com/ Reported-and-tested-by: Mario Limonciello Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3124 Link: https://lore.kernel.org/all/20240123021155.2775-1-mario.limonciello@amd.com/ Reported-and-tested-by: Vlastimil Babka Closes: https://lore.kernel.org/dri-devel/05ddb2da-b182-4791-8ef7-82179fd159a8@amd.com/T/#m0c31d4d1b9ae9995bb880974c4f1dbaddc33a48a Signed-off-by: Matthew Brost Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20240124210811.1639040-1-matthew.brost@intel.com --- drivers/gpu/drm/scheduler/sched_main.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 550492a7a031..85f082396d42 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1178,21 +1178,20 @@ static void drm_sched_run_job_work(struct work_struct *w) struct drm_sched_entity *entity; struct dma_fence *fence; struct drm_sched_fence *s_fence; - struct drm_sched_job *sched_job; + struct drm_sched_job *sched_job = NULL; int r; if (READ_ONCE(sched->pause_submit)) return; - entity = drm_sched_select_entity(sched); - if (!entity) - return; - - sched_job = drm_sched_entity_pop_job(entity); - if (!sched_job) { - complete_all(&entity->entity_idle); - return; /* No more work */ + /* Find entity with a ready job */ + while (!sched_job && (entity = drm_sched_select_entity(sched))) { + sched_job = drm_sched_entity_pop_job(entity); + if (!sched_job) + complete_all(&entity->entity_idle); } + if (!entity) + return; /* No more work */ s_fence = sched_job->s_fence; From ab4443fe3ca6298663a55c4a70efc6c3ce913ca6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 4 Jan 2024 09:58:39 +0100 Subject: [PATCH 199/347] readahead: avoid multiple marked readahead pages ra_alloc_folio() marks a page that should trigger next round of async readahead. However it rounds up computed index to the order of page being allocated. This can however lead to multiple consecutive pages being marked with readahead flag. Consider situation with index == 1, mark == 1, order == 0. We insert order 0 page at index 1 and mark it. Then we bump order to 1, index to 2, mark (still == 1) is rounded up to 2 so page at index 2 is marked as well. Then we bump order to 2, index is incremented to 4, mark gets rounded to 4 so page at index 4 is marked as well. The fact that multiple pages get marked within a single readahead window confuses the readahead logic and results in readahead window being trimmed back to 1. This situation is triggered in particular when maximum readahead window size is not a power of two (in the observed case it was 768 KB) and as a result sequential read throughput suffers. Fix the problem by rounding 'mark' down instead of up. Because the index is naturally aligned to 'order', we are guaranteed 'rounded mark' == index iff 'mark' is within the page we are allocating at 'index' and thus exactly one page is marked with readahead flag as required by the readahead code and sequential read performance is restored. This effectively reverts part of commit b9ff43dd2743 ("mm/readahead: Fix readahead with large folios"). The commit changed the rounding with the rationale: "... we were setting the readahead flag on the folio which contains the last byte read from the block. This is wrong because we will trigger readahead at the end of the read without waiting to see if a subsequent read is going to use the pages we just read." Although this is true, the fact is this was always the case with read sizes not aligned to folio boundaries and large folios in the page cache just make the situation more obvious (and frequent). Also for sequential read workloads it is better to trigger the readahead earlier rather than later. It is true that the difference in the rounding and thus earlier triggering of the readahead can result in reading more for semi-random workloads. However workloads really suffering from this seem to be rare. In particular I have verified that the workload described in commit b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") of reading random 100k blocks from a file like: [reader] bs=100k rw=randread numjobs=1 size=64g runtime=60s is not impacted by the rounding change and achieves ~70MB/s in both cases. [jack@suse.cz: fix one more place where mark rounding was done as well] Link: https://lkml.kernel.org/r/20240123153254.5206-1-jack@suse.cz Link: https://lkml.kernel.org/r/20240104085839.21029-1-jack@suse.cz Fixes: b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") Signed-off-by: Jan Kara Cc: Matthew Wilcox Cc: Guo Xuenan Cc: Signed-off-by: Andrew Morton --- mm/readahead.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/readahead.c b/mm/readahead.c index 23620c57c122..2648ec4f0494 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -469,7 +469,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, if (!folio) return -ENOMEM; - mark = round_up(mark, 1UL << order); + mark = round_down(mark, 1UL << order); if (index == mark) folio_set_readahead(folio); err = filemap_add_folio(ractl->mapping, folio, index, gfp); @@ -575,7 +575,7 @@ static void ondemand_readahead(struct readahead_control *ractl, * It's the expected callback index, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - expected = round_up(ra->start + ra->size - ra->async_size, + expected = round_down(ra->start + ra->size - ra->async_size, 1UL << order); if (index == expected || index == (ra->start + ra->size)) { ra->start += ra->size; From 19d3e221807772f8443e565234a6fdc5a2b09d26 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 12 Jan 2024 10:08:40 -0800 Subject: [PATCH 200/347] fs/hugetlbfs/inode.c: mm/memory-failure.c: fix hugetlbfs hwpoison handling has_extra_refcount() makes the assumption that the page cache adds a ref count of 1 and subtracts this in the extra_pins case. Commit a08c7193e4f1 (mm/filemap: remove hugetlb special casing in filemap.c) modifies __filemap_add_folio() by calling folio_ref_add(folio, nr); for all cases (including hugtetlb) where nr is the number of pages in the folio. We should adjust the number of references coming from the page cache by subtracing the number of pages rather than 1. In hugetlbfs_read_iter(), folio_test_has_hwpoisoned() is testing the wrong flag as, in the hugetlb case, memory-failure code calls folio_test_set_hwpoison() to indicate poison. folio_test_hwpoison() is the correct function to test for that flag. After these fixes, the hugetlb hwpoison read selftest passes all cases. Link: https://lkml.kernel.org/r/20240112180840.367006-1-sidhartha.kumar@oracle.com Fixes: a08c7193e4f1 ("mm/filemap: remove hugetlb special casing in filemap.c") Signed-off-by: Sidhartha Kumar Closes: https://lore.kernel.org/linux-mm/20230713001833.3778937-1-jiaqiyan@google.com/T/#m8e1469119e5b831bbd05d495f96b842e4a1c5519 Reported-by: Muhammad Usama Anjum Tested-by: Muhammad Usama Anjum Acked-by: Miaohe Lin Acked-by: Muchun Song Cc: James Houghton Cc: Jiaqi Yan Cc: Matthew Wilcox (Oracle) Cc: Naoya Horiguchi Cc: [6.7+] Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 2 +- mm/memory-failure.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index ea5b8e57d904..671664fed307 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -340,7 +340,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) } else { folio_unlock(folio); - if (!folio_test_has_hwpoisoned(folio)) + if (!folio_test_hwpoison(folio)) want = nr; else { /* diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 4f9b61f4a668..636280d04008 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -982,7 +982,7 @@ static bool has_extra_refcount(struct page_state *ps, struct page *p, int count = page_count(p) - 1; if (extra_pins) - count -= 1; + count -= folio_nr_pages(page_folio(p)); if (count > 0) { pr_err("%#lx: %s still referenced by %d users\n", From f8ee4361b7be3f0c5bd21ee47561473ddf3aa17b Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 12 Jan 2024 12:18:50 +0500 Subject: [PATCH 201/347] selftests/mm: mremap_test: fix build warning Use 2 separate variables of types int and unsigned long long instead of confusing them. This corrects the correct print format for each of them and removes the build warning: warning: format `%d' expects argument of type `int', but argument 2 has type `long long unsigned int' Link: https://lkml.kernel.org/r/20240112071851.612930-1-usama.anjum@collabora.com Fixes: a4cb3b243343 ("selftests: mm: add a test for remapping to area immediately after existing mapping") Signed-off-by: Muhammad Usama Anjum Cc: Joel Fernandes (Google) Cc: Lorenzo Stoakes Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mremap_test.c | 27 ++++++++++++------------ 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 1d4c1589c305..2f8b991f78cb 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -360,7 +360,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, char pattern_seed) { void *addr, *src_addr, *dest_addr, *dest_preamble_addr; - unsigned long long i; + int d; + unsigned long long t; struct timespec t_start = {0, 0}, t_end = {0, 0}; long long start_ns, end_ns, align_mask, ret, offset; unsigned long long threshold; @@ -378,8 +379,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Set byte pattern for source block. */ srand(pattern_seed); - for (i = 0; i < threshold; i++) - memset((char *) src_addr + i, (char) rand(), 1); + for (t = 0; t < threshold; t++) + memset((char *) src_addr + t, (char) rand(), 1); /* Mask to zero out lower bits of address for alignment */ align_mask = ~(c.dest_alignment - 1); @@ -420,8 +421,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Set byte pattern for the dest preamble block. */ srand(pattern_seed); - for (i = 0; i < c.dest_preamble_size; i++) - memset((char *) dest_preamble_addr + i, (char) rand(), 1); + for (d = 0; d < c.dest_preamble_size; d++) + memset((char *) dest_preamble_addr + d, (char) rand(), 1); } clock_gettime(CLOCK_MONOTONIC, &t_start); @@ -437,14 +438,14 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Verify byte pattern after remapping */ srand(pattern_seed); - for (i = 0; i < threshold; i++) { + for (t = 0; t < threshold; t++) { char c = (char) rand(); - if (((char *) dest_addr)[i] != c) { + if (((char *) dest_addr)[t] != c) { ksft_print_msg("Data after remap doesn't match at offset %llu\n", - i); + t); ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, - ((char *) dest_addr)[i] & 0xff); + ((char *) dest_addr)[t] & 0xff); ret = -1; goto clean_up_dest; } @@ -453,14 +454,14 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Verify the dest preamble byte pattern after remapping */ if (c.dest_preamble_size) { srand(pattern_seed); - for (i = 0; i < c.dest_preamble_size; i++) { + for (d = 0; d < c.dest_preamble_size; d++) { char c = (char) rand(); - if (((char *) dest_preamble_addr)[i] != c) { + if (((char *) dest_preamble_addr)[d] != c) { ksft_print_msg("Preamble data after remap doesn't match at offset %d\n", - i); + d); ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, - ((char *) dest_preamble_addr)[i] & 0xff); + ((char *) dest_preamble_addr)[d] & 0xff); ret = -1; goto clean_up_dest; } From 4dca82d14174fe53f656a6bc32398db1bdd8f481 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 15 Jan 2024 11:07:31 +0100 Subject: [PATCH 202/347] uprobes: use pagesize-aligned virtual address when replacing pages uprobes passes an unaligned page mapping address to folio_add_new_anon_rmap(), which ends up triggering a VM_BUG_ON() we recently extended in commit 372cbd4d5a066 ("mm: non-pmd-mappable, large folios for folio_add_new_anon_rmap()"). Arguably, this is uprobes code doing something wrong; however, for the time being it would have likely worked in rmap code because __folio_set_anon() would set folio->index to the same value. Looking at __replace_page(), we'd also pass slightly wrong values to mmu_notifier_range_init(), page_vma_mapped_walk(), flush_cache_page(), ptep_clear_flush() and set_pte_at_notify(). I suspect most of them are fine, but let's just mark the introducing commit as the one needed fixing. I don't think CC stable is warranted. We'll add more sanity checks in rmap code separately, to make sure that we always get properly aligned addresses. Link: https://lkml.kernel.org/r/20240115100731.91007-1-david@redhat.com Fixes: c517ee744b96 ("uprobes: __replace_page() should not use page_address_in_vma()") Signed-off-by: David Hildenbrand Reported-by: Jiri Olsa Closes: https://lkml.kernel.org/r/ZaMR2EWN-HvlCfUl@krava Tested-by: Jiri Olsa Reviewed-by: Ryan Roberts Acked-by: Oleg Nesterov Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Ian Rogers Cc: Adrian Hunter Signed-off-by: Andrew Morton --- kernel/events/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 485bb0389b48..929e98c62965 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -537,7 +537,7 @@ retry: } } - ret = __replace_page(vma, vaddr, old_page, new_page); + ret = __replace_page(vma, vaddr & PAGE_MASK, old_page, new_page); if (new_page) put_page(new_page); put_old: From c4608d1bf7c6536d1a3d233eb21e50678681564e Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Wed, 20 Dec 2023 22:59:43 -0800 Subject: [PATCH 203/347] mm: mmap: map MAP_STACK to VM_NOHUGEPAGE commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") incured regression for stress-ng pthread benchmark [1]. It is because THP get allocated to pthread's stack area much more possible than before. Pthread's stack area is allocated by mmap without VM_GROWSDOWN or VM_GROWSUP flag, so kernel can't tell whether it is a stack area or not. The MAP_STACK flag is used to mark the stack area, but it is a no-op on Linux. Mapping MAP_STACK to VM_NOHUGEPAGE to prevent from allocating THP for such stack area. With this change the stack area looks like: fffd18e10000-fffd19610000 rw-p 00000000 00:00 0 Size: 8192 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Rss: 12 kB Pss: 12 kB Pss_Dirty: 12 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 12 kB Referenced: 12 kB Anonymous: 12 kB KSM: 0 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB FilePmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB Locked: 0 kB THPeligible: 0 VmFlags: rd wr mr mw me ac nh The "nh" flag is set. [1] https://lore.kernel.org/linux-mm/202312192310.56367035-oliver.sang@intel.com/ Link: https://lkml.kernel.org/r/20231221065943.2803551-2-shy828301@gmail.com Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") Signed-off-by: Yang Shi Reported-by: kernel test robot Tested-by: Oliver Sang Reviewed-by: Yin Fengwei Cc: Rik van Riel Cc: Matthew Wilcox Cc: Christopher Lameter Cc: Huang, Ying Cc: Signed-off-by: Andrew Morton --- include/linux/mman.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mman.h b/include/linux/mman.h index 40d94411d492..dc7048824be8 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -156,6 +156,7 @@ calc_vm_flag_bits(unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | + _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | arch_calc_vm_flag_bits(flags); } From 63fd327016fdfca6f2fa27eba3496bd079eb8ed3 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 11 Jan 2024 08:29:02 -0500 Subject: [PATCH 204/347] mm: memcontrol: don't throttle dying tasks on memory.high While investigating hosts with high cgroup memory pressures, Tejun found culprit zombie tasks that had were holding on to a lot of memory, had SIGKILL pending, but were stuck in memory.high reclaim. In the past, we used to always force-charge allocations from tasks that were exiting in order to accelerate them dying and freeing up their rss. This changed for memory.max in a4ebf1b6ca1e ("memcg: prohibit unconditional exceeding the limit of dying tasks"); it noted that this can cause (userspace inducable) containment failures, so it added a mandatory reclaim and OOM kill cycle before forcing charges. At the time, memory.high enforcement was handled in the userspace return path, which isn't reached by dying tasks, and so memory.high was still never enforced by dying tasks. When c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges") added synchronous reclaim for memory.high, it added unconditional memory.high enforcement for dying tasks as well. The callstack shows that this path is where the zombie is stuck in. We need to accelerate dying tasks getting past memory.high, but we cannot do it quite the same way as we do for memory.max: memory.max is enforced strictly, and tasks aren't allowed to move past it without FIRST reclaiming and OOM killing if necessary. This ensures very small levels of excess. With memory.high, though, enforcement happens lazily after the charge, and OOM killing is never triggered. A lot of concurrent threads could have pushed, or could actively be pushing, the cgroup into excess. The dying task will enter reclaim on every allocation attempt, with little hope of restoring balance. To fix this, skip synchronous memory.high enforcement on dying tasks altogether again. Update memory.high path documentation while at it. [hannes@cmpxchg.org: also handle tasks are being killed during the reclaim] Link: https://lkml.kernel.org/r/20240111192807.GA424308@cmpxchg.org Link: https://lkml.kernel.org/r/20240111132902.389862-1-hannes@cmpxchg.org Fixes: c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges") Signed-off-by: Johannes Weiner Reported-by: Tejun Heo Reviewed-by: Yosry Ahmed Acked-by: Shakeel Butt Acked-by: Roman Gushchin Cc: Dan Schatzberg Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/memcontrol.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e4c8735e7c85..46d8d02114cf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2623,8 +2623,9 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg, } /* - * Scheduled by try_charge() to be executed from the userland return path - * and reclaims memory over the high limit. + * Reclaims memory over the high limit. Called directly from + * try_charge() (context permitting), as well as from the userland + * return path where reclaim is always able to block. */ void mem_cgroup_handle_over_high(gfp_t gfp_mask) { @@ -2643,6 +2644,17 @@ void mem_cgroup_handle_over_high(gfp_t gfp_mask) current->memcg_nr_pages_over_high = 0; retry_reclaim: + /* + * Bail if the task is already exiting. Unlike memory.max, + * memory.high enforcement isn't as strict, and there is no + * OOM killer involved, which means the excess could already + * be much bigger (and still growing) than it could for + * memory.max; the dying task could get stuck in fruitless + * reclaim for a long time, which isn't desirable. + */ + if (task_is_dying()) + goto out; + /* * The allocating task should reclaim at least the batch size, but for * subsequent retries we only want to do what's necessary to prevent oom @@ -2693,6 +2705,9 @@ retry_reclaim: } /* + * Reclaim didn't manage to push usage below the limit, slow + * this allocating task down. + * * If we exit early, we're guaranteed to die (since * schedule_timeout_killable sets TASK_KILLABLE). This means we don't * need to account for any ill-begotten jiffies to pay them off later. @@ -2887,11 +2902,17 @@ done_restock: } } while ((memcg = parent_mem_cgroup(memcg))); + /* + * Reclaim is set up above to be called from the userland + * return path. But also attempt synchronous reclaim to avoid + * excessive overrun while the task is still inside the + * kernel. If this is successful, the return path will see it + * when it rechecks the overage and simply bail out. + */ if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH && !(current->flags & PF_MEMALLOC) && - gfpflags_allow_blocking(gfp_mask)) { + gfpflags_allow_blocking(gfp_mask)) mem_cgroup_handle_over_high(gfp_mask); - } return 0; } From 0d7e68c0271853c30655b05934d66cec81dc6afc Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Wed, 17 Jan 2024 13:22:57 +0100 Subject: [PATCH 205/347] MAINTAINERS: add man-pages git trees The maintainer uses both. Link: https://lkml.kernel.org/r/20240117122257.2707637-1-pvorel@suse.cz Signed-off-by: Petr Vorel Reviewed-by: Alejandro Colomar Signed-off-by: Andrew Morton --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..8c756737491e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12902,6 +12902,8 @@ M: Alejandro Colomar L: linux-man@vger.kernel.org S: Maintained W: http://www.kernel.org/doc/man-pages +T: git git://git.kernel.org/pub/scm/docs/man-pages/man-pages.git +T: git git://www.alejandro-colomar.es/src/alx/linux/man-pages/man-pages.git MANAGEMENT COMPONENT TRANSPORT PROTOCOL (MCTP) M: Jeremy Kerr From bc29036e1da1cf66e5f8312649aeec2d51ea3d86 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Tue, 16 Jan 2024 14:04:54 +0500 Subject: [PATCH 206/347] selftests/mm: switch to bash from sh Running charge_reserved_hugetlb.sh generates errors if sh is set to dash: ./charge_reserved_hugetlb.sh: 9: [[: not found ./charge_reserved_hugetlb.sh: 19: [[: not found ./charge_reserved_hugetlb.sh: 27: [[: not found ./charge_reserved_hugetlb.sh: 37: [[: not found ./charge_reserved_hugetlb.sh: 45: Syntax error: "(" unexpected Switch to using /bin/bash instead of /bin/sh. Make the switch for write_hugetlb_memory.sh as well which is called from charge_reserved_hugetlb.sh. Link: https://lkml.kernel.org/r/20240116090455.3407378-1-usama.anjum@collabora.com Signed-off-by: Muhammad Usama Anjum Cc: Muhammad Usama Anjum Cc: Shuah Khan Cc: David Laight Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/charge_reserved_hugetlb.sh | 2 +- tools/testing/selftests/mm/write_hugetlb_memory.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index 0899019a7fcb..e14bdd4455f2 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Kselftest framework requirement - SKIP code is 4. diff --git a/tools/testing/selftests/mm/write_hugetlb_memory.sh b/tools/testing/selftests/mm/write_hugetlb_memory.sh index 70a02301f4c2..3d2d2eb9d6ff 100755 --- a/tools/testing/selftests/mm/write_hugetlb_memory.sh +++ b/tools/testing/selftests/mm/write_hugetlb_memory.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 set -e From 9319b647902cbd5cc884ac08a8a6d54ce111fc78 Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Thu, 18 Jan 2024 10:19:53 -0800 Subject: [PATCH 207/347] mm/writeback: fix possible divide-by-zero in wb_dirty_limits(), again (struct dirty_throttle_control *)->thresh is an unsigned long, but is passed as the u32 divisor argument to div_u64(). On architectures where unsigned long is 64 bytes, the argument will be implicitly truncated. Use div64_u64() instead of div_u64() so that the value used in the "is this a safe division" check is the same as the divisor. Also, remove redundant cast of the numerator to u64, as that should happen implicitly. This would be difficult to exploit in memcg domain, given the ratio-based arithmetic domain_drity_limits() uses, but is much easier in global writeback domain with a BDI_CAP_STRICTLIMIT-backing device, using e.g. vm.dirty_bytes=(1<<32)*PAGE_SIZE so that dtc->thresh == (1<<32) Link: https://lkml.kernel.org/r/20240118181954.1415197-1-zokeefe@google.com Fixes: f6789593d5ce ("mm/page-writeback.c: fix divide by zero in bdi_dirty_limits()") Signed-off-by: Zach O'Keefe Cc: Maxim Patlasov Cc: Signed-off-by: Andrew Morton --- mm/page-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index cd4e4ae77c40..02147b61712b 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1638,7 +1638,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) */ dtc->wb_thresh = __wb_calc_thresh(dtc); dtc->wb_bg_thresh = dtc->thresh ? - div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; + div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; /* * In order to avoid the stacked BDI deadlock we need From f6564fce256a3944aa1bc76cb3c40e792d97c1eb Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 18 Jan 2024 11:59:14 +0100 Subject: [PATCH 208/347] mm, kmsan: fix infinite recursion due to RCU critical section Alexander Potapenko writes in [1]: "For every memory access in the code instrumented by KMSAN we call kmsan_get_metadata() to obtain the metadata for the memory being accessed. For virtual memory the metadata pointers are stored in the corresponding `struct page`, therefore we need to call virt_to_page() to get them. According to the comment in arch/x86/include/asm/page.h, virt_to_page(kaddr) returns a valid pointer iff virt_addr_valid(kaddr) is true, so KMSAN needs to call virt_addr_valid() as well. To avoid recursion, kmsan_get_metadata() must not call instrumented code, therefore ./arch/x86/include/asm/kmsan.h forks parts of arch/x86/mm/physaddr.c to check whether a virtual address is valid or not. But the introduction of rcu_read_lock() to pfn_valid() added instrumented RCU API calls to virt_to_page_or_null(), which is called by kmsan_get_metadata(), so there is an infinite recursion now. I do not think it is correct to stop that recursion by doing kmsan_enter_runtime()/kmsan_exit_runtime() in kmsan_get_metadata(): that would prevent instrumented functions called from within the runtime from tracking the shadow values, which might introduce false positives." Fix the issue by switching pfn_valid() to the _sched() variant of rcu_read_lock/unlock(), which does not require calling into RCU. Given the critical section in pfn_valid() is very small, this is a reasonable trade-off (with preemptible RCU). KMSAN further needs to be careful to suppress calls into the scheduler, which would be another source of recursion. This can be done by wrapping the call to pfn_valid() into preempt_disable/enable_no_resched(). The downside is that this sacrifices breaking scheduling guarantees; however, a kernel compiled with KMSAN has already given up any performance guarantees due to being heavily instrumented. Note, KMSAN code already disables tracing via Makefile, and since mmzone.h is included, it is not necessary to use the notrace variant, which is generally preferred in all other cases. Link: https://lkml.kernel.org/r/20240115184430.2710652-1-glider@google.com [1] Link: https://lkml.kernel.org/r/20240118110022.2538350-1-elver@google.com Fixes: 5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing memory_section->usage") Signed-off-by: Marco Elver Reported-by: Alexander Potapenko Reported-by: syzbot+93a9e8a3dea8d6085e12@syzkaller.appspotmail.com Reviewed-by: Alexander Potapenko Tested-by: Alexander Potapenko Cc: Charan Teja Kalla Cc: Borislav Petkov (AMD) Cc: Dave Hansen Cc: Dmitry Vyukov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/x86/include/asm/kmsan.h | 17 ++++++++++++++++- include/linux/mmzone.h | 6 +++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h index 8fa6ac0e2d76..d91b37f5b4bb 100644 --- a/arch/x86/include/asm/kmsan.h +++ b/arch/x86/include/asm/kmsan.h @@ -64,6 +64,7 @@ static inline bool kmsan_virt_addr_valid(void *addr) { unsigned long x = (unsigned long)addr; unsigned long y = x - __START_KERNEL_map; + bool ret; /* use the carry flag to determine if x was < __START_KERNEL_map */ if (unlikely(x > y)) { @@ -79,7 +80,21 @@ static inline bool kmsan_virt_addr_valid(void *addr) return false; } - return pfn_valid(x >> PAGE_SHIFT); + /* + * pfn_valid() relies on RCU, and may call into the scheduler on exiting + * the critical section. However, this would result in recursion with + * KMSAN. Therefore, disable preemption here, and re-enable preemption + * below while suppressing reschedules to avoid recursion. + * + * Note, this sacrifices occasionally breaking scheduling guarantees. + * Although, a kernel compiled with KMSAN has already given up on any + * performance guarantees due to being heavily instrumented. + */ + preempt_disable(); + ret = pfn_valid(x >> PAGE_SHIFT); + preempt_enable_no_resched(); + + return ret; } #endif /* !MODULE */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4ed33b127821..a497f189d988 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -2013,9 +2013,9 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __pfn_to_section(pfn); - rcu_read_lock(); + rcu_read_lock_sched(); if (!valid_section(ms)) { - rcu_read_unlock(); + rcu_read_unlock_sched(); return 0; } /* @@ -2023,7 +2023,7 @@ static inline int pfn_valid(unsigned long pfn) * the entire section-sized span. */ ret = early_section(ms) || pfn_section_valid(ms, pfn); - rcu_read_unlock(); + rcu_read_unlock_sched(); return ret; } From c2a292545cd43dcab86d5290ed95b006f1cefe90 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 18 Jan 2024 12:01:29 +0100 Subject: [PATCH 209/347] stackdepot: add stats counters exported via debugfs Add a few basic stats counters for stack depot that can be used to derive if stack depot is working as intended. This is a snapshot of the new stats after booting a system with a KASAN-enabled kernel: $ cat /sys/kernel/debug/stackdepot/stats pools: 838 allocations: 29861 frees: 6561 in_use: 23300 freelist_size: 1840 Generally, "pools" should be well below the max; once the system is booted, "in_use" should remain relatively steady. Link: https://lkml.kernel.org/r/20240118110216.2539519-1-elver@google.com Signed-off-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Andi Kleen Cc: Dmitry Vyukov Cc: Vlastimil Babka Cc: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index a0be5d05c7f0..80a8ca24ccc8 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) "stackdepot: " fmt +#include #include #include #include @@ -115,6 +116,23 @@ static bool new_pool_required = true; /* Lock that protects the variables above. */ static DEFINE_RWLOCK(pool_rwlock); +/* Statistics counters for debugfs. */ +enum depot_counter_id { + DEPOT_COUNTER_ALLOCS, + DEPOT_COUNTER_FREES, + DEPOT_COUNTER_INUSE, + DEPOT_COUNTER_FREELIST_SIZE, + DEPOT_COUNTER_COUNT, +}; +static long counters[DEPOT_COUNTER_COUNT]; +static const char *const counter_names[] = { + [DEPOT_COUNTER_ALLOCS] = "allocations", + [DEPOT_COUNTER_FREES] = "frees", + [DEPOT_COUNTER_INUSE] = "in_use", + [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size", +}; +static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT); + static int __init disable_stack_depot(char *str) { return kstrtobool(str, &stack_depot_disabled); @@ -277,6 +295,7 @@ static void depot_init_pool(void *pool) stack->handle.extra = 0; list_add(&stack->list, &free_stacks); + counters[DEPOT_COUNTER_FREELIST_SIZE]++; } /* Save reference to the pool to be used by depot_fetch_stack(). */ @@ -376,6 +395,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) /* Get and unlink the first entry from the freelist. */ stack = list_first_entry(&free_stacks, struct stack_record, list); list_del(&stack->list); + counters[DEPOT_COUNTER_FREELIST_SIZE]--; /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ if (size > CONFIG_STACKDEPOT_MAX_FRAMES) @@ -394,6 +414,8 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) */ kmsan_unpoison_memory(stack, DEPOT_STACK_RECORD_SIZE); + counters[DEPOT_COUNTER_ALLOCS]++; + counters[DEPOT_COUNTER_INUSE]++; return stack; } @@ -426,6 +448,10 @@ static void depot_free_stack(struct stack_record *stack) lockdep_assert_held_write(&pool_rwlock); list_add(&stack->list, &free_stacks); + + counters[DEPOT_COUNTER_FREELIST_SIZE]++; + counters[DEPOT_COUNTER_FREES]++; + counters[DEPOT_COUNTER_INUSE]--; } /* Calculates the hash for a stack. */ @@ -690,3 +716,30 @@ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) return parts.extra; } EXPORT_SYMBOL(stack_depot_get_extra_bits); + +static int stats_show(struct seq_file *seq, void *v) +{ + /* + * data race ok: These are just statistics counters, and approximate + * statistics are ok for debugging. + */ + seq_printf(seq, "pools: %d\n", data_race(pools_num)); + for (int i = 0; i < DEPOT_COUNTER_COUNT; i++) + seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i])); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(stats); + +static int depot_debugfs_init(void) +{ + struct dentry *dir; + + if (stack_depot_disabled) + return 0; + + dir = debugfs_create_dir("stackdepot", NULL); + debugfs_create_file("stats", 0444, dir, NULL, &stats_fops); + return 0; +} +late_initcall(depot_debugfs_init); From 4434a56ec20925333d6cf4d4093641d063abd35b Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 18 Jan 2024 12:01:30 +0100 Subject: [PATCH 210/347] stackdepot: make fast paths lock-less again With the introduction of the pool_rwlock (reader-writer lock), several fast paths end up taking the pool_rwlock as readers. Furthermore, stack_depot_put() unconditionally takes the pool_rwlock as a writer. Despite allowing readers to make forward-progress concurrently, reader-writer locks have inherent cache contention issues, which does not scale well on systems with large CPU counts. Rework the synchronization story of stack depot to again avoid taking any locks in the fast paths. This is done by relying on RCU-protected list traversal, and the NMI-safe subset of RCU to delay reuse of freed stack records. See code comments for more details. Along with the performance issues, this also fixes incorrect nesting of rwlock within a raw_spinlock, given that stack depot should still be usable from anywhere: | [ BUG: Invalid wait context ] | ----------------------------- | swapper/0/1 is trying to lock: | ffffffff89869be8 (pool_rwlock){..--}-{3:3}, at: stack_depot_save_flags | other info that might help us debug this: | context-{5:5} | 2 locks held by swapper/0/1: | #0: ffffffff89632440 (rcu_read_lock){....}-{1:3}, at: __queue_work | #1: ffff888100092018 (&pool->lock){-.-.}-{2:2}, at: __queue_work <-- raw_spin_lock Stack depot usage stats are similar to the previous version after a KASAN kernel boot: $ cat /sys/kernel/debug/stackdepot/stats pools: 838 allocations: 29865 frees: 6604 in_use: 23261 freelist_size: 1879 The number of pools is the same as previously. The freelist size is minimally larger, but this may also be due to variance across system boots. This shows that even though we do not eagerly wait for the next RCU grace period (such as with synchronize_rcu() or call_rcu()) after freeing a stack record - requiring depot_pop_free() to "poll" if an entry may be used - new allocations are very likely to happen in later RCU grace periods. Link: https://lkml.kernel.org/r/20240118110216.2539519-2-elver@google.com Fixes: 108be8def46e ("lib/stackdepot: allow users to evict stack traces") Reported-by: Andi Kleen Signed-off-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 322 +++++++++++++++++++++++++++++++---------------- 1 file changed, 211 insertions(+), 111 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 80a8ca24ccc8..5caa1f566553 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -22,8 +22,9 @@ #include #include #include -#include #include +#include +#include #include #include #include @@ -68,12 +69,28 @@ union handle_parts { }; struct stack_record { - struct list_head list; /* Links in hash table or freelist */ + struct list_head hash_list; /* Links in the hash table */ u32 hash; /* Hash in hash table */ u32 size; /* Number of stored frames */ - union handle_parts handle; + union handle_parts handle; /* Constant after initialization */ refcount_t count; - unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ + union { + unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ + struct { + /* + * An important invariant of the implementation is to + * only place a stack record onto the freelist iff its + * refcount is zero. Because stack records with a zero + * refcount are never considered as valid, it is safe to + * union @entries and freelist management state below. + * Conversely, as soon as an entry is off the freelist + * and its refcount becomes non-zero, the below must not + * be accessed until being placed back on the freelist. + */ + struct list_head free_list; /* Links in the freelist */ + unsigned long rcu_state; /* RCU cookie */ + }; + }; }; #define DEPOT_STACK_RECORD_SIZE \ @@ -113,8 +130,8 @@ static LIST_HEAD(free_stacks); * yet allocated or if the limit on the number of pools is reached. */ static bool new_pool_required = true; -/* Lock that protects the variables above. */ -static DEFINE_RWLOCK(pool_rwlock); +/* The lock must be held when performing pool or freelist modifications. */ +static DEFINE_RAW_SPINLOCK(pool_lock); /* Statistics counters for debugfs. */ enum depot_counter_id { @@ -276,14 +293,15 @@ out_unlock: } EXPORT_SYMBOL_GPL(stack_depot_init); -/* Initializes a stack depol pool. */ +/* + * Initializes new stack depot @pool, release all its entries to the freelist, + * and update the list of pools. + */ static void depot_init_pool(void *pool) { int offset; - lockdep_assert_held_write(&pool_rwlock); - - WARN_ON(!list_empty(&free_stacks)); + lockdep_assert_held(&pool_lock); /* Initialize handles and link stack records into the freelist. */ for (offset = 0; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE; @@ -294,19 +312,36 @@ static void depot_init_pool(void *pool) stack->handle.offset = offset >> DEPOT_STACK_ALIGN; stack->handle.extra = 0; - list_add(&stack->list, &free_stacks); + /* + * Stack traces of size 0 are never saved, and we can simply use + * the size field as an indicator if this is a new unused stack + * record in the freelist. + */ + stack->size = 0; + + INIT_LIST_HEAD(&stack->hash_list); + /* + * Add to the freelist front to prioritize never-used entries: + * required in case there are entries in the freelist, but their + * RCU cookie still belongs to the current RCU grace period + * (there can still be concurrent readers). + */ + list_add(&stack->free_list, &free_stacks); counters[DEPOT_COUNTER_FREELIST_SIZE]++; } /* Save reference to the pool to be used by depot_fetch_stack(). */ stack_pools[pools_num] = pool; - pools_num++; + + /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */ + WRITE_ONCE(pools_num, pools_num + 1); + ASSERT_EXCLUSIVE_WRITER(pools_num); } /* Keeps the preallocated memory to be used for a new stack depot pool. */ static void depot_keep_new_pool(void **prealloc) { - lockdep_assert_held_write(&pool_rwlock); + lockdep_assert_held(&pool_lock); /* * If a new pool is already saved or the maximum number of @@ -329,17 +364,16 @@ static void depot_keep_new_pool(void **prealloc) * number of pools is reached. In either case, take note that * keeping another pool is not required. */ - new_pool_required = false; + WRITE_ONCE(new_pool_required, false); } -/* Updates references to the current and the next stack depot pools. */ -static bool depot_update_pools(void **prealloc) +/* + * Try to initialize a new stack depot pool from either a previous or the + * current pre-allocation, and release all its entries to the freelist. + */ +static bool depot_try_init_pool(void **prealloc) { - lockdep_assert_held_write(&pool_rwlock); - - /* Check if we still have objects in the freelist. */ - if (!list_empty(&free_stacks)) - goto out_keep_prealloc; + lockdep_assert_held(&pool_lock); /* Check if we have a new pool saved and use it. */ if (new_pool) { @@ -348,10 +382,9 @@ static bool depot_update_pools(void **prealloc) /* Take note that we might need a new new_pool. */ if (pools_num < DEPOT_MAX_POOLS) - new_pool_required = true; + WRITE_ONCE(new_pool_required, true); - /* Try keeping the preallocated memory for new_pool. */ - goto out_keep_prealloc; + return true; } /* Bail out if we reached the pool limit. */ @@ -368,12 +401,32 @@ static bool depot_update_pools(void **prealloc) } return false; +} -out_keep_prealloc: - /* Keep the preallocated memory for a new pool if required. */ - if (*prealloc) - depot_keep_new_pool(prealloc); - return true; +/* Try to find next free usable entry. */ +static struct stack_record *depot_pop_free(void) +{ + struct stack_record *stack; + + lockdep_assert_held(&pool_lock); + + if (list_empty(&free_stacks)) + return NULL; + + /* + * We maintain the invariant that the elements in front are least + * recently used, and are therefore more likely to be associated with an + * RCU grace period in the past. Consequently it is sufficient to only + * check the first entry. + */ + stack = list_first_entry(&free_stacks, struct stack_record, free_list); + if (stack->size && !poll_state_synchronize_rcu(stack->rcu_state)) + return NULL; + + list_del(&stack->free_list); + counters[DEPOT_COUNTER_FREELIST_SIZE]--; + + return stack; } /* Allocates a new stack in a stack depot pool. */ @@ -382,20 +435,22 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) { struct stack_record *stack; - lockdep_assert_held_write(&pool_rwlock); + lockdep_assert_held(&pool_lock); - /* Update current and new pools if required and possible. */ - if (!depot_update_pools(prealloc)) + /* This should already be checked by public API entry points. */ + if (WARN_ON_ONCE(!size)) return NULL; /* Check if we have a stack record to save the stack trace. */ - if (list_empty(&free_stacks)) - return NULL; - - /* Get and unlink the first entry from the freelist. */ - stack = list_first_entry(&free_stacks, struct stack_record, list); - list_del(&stack->list); - counters[DEPOT_COUNTER_FREELIST_SIZE]--; + stack = depot_pop_free(); + if (!stack) { + /* No usable entries on the freelist - try to refill the freelist. */ + if (!depot_try_init_pool(prealloc)) + return NULL; + stack = depot_pop_free(); + if (WARN_ON(!stack)) + return NULL; + } /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ if (size > CONFIG_STACKDEPOT_MAX_FRAMES) @@ -421,37 +476,73 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) { + const int pools_num_cached = READ_ONCE(pools_num); union handle_parts parts = { .handle = handle }; void *pool; size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; - lockdep_assert_held(&pool_rwlock); + lockdep_assert_not_held(&pool_lock); - if (parts.pool_index > pools_num) { + if (parts.pool_index > pools_num_cached) { WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", - parts.pool_index, pools_num, handle); + parts.pool_index, pools_num_cached, handle); return NULL; } pool = stack_pools[parts.pool_index]; - if (!pool) + if (WARN_ON(!pool)) return NULL; stack = pool + offset; + if (WARN_ON(!refcount_read(&stack->count))) + return NULL; + return stack; } /* Links stack into the freelist. */ static void depot_free_stack(struct stack_record *stack) { - lockdep_assert_held_write(&pool_rwlock); + unsigned long flags; - list_add(&stack->list, &free_stacks); + lockdep_assert_not_held(&pool_lock); + + raw_spin_lock_irqsave(&pool_lock, flags); + printk_deferred_enter(); + + /* + * Remove the entry from the hash list. Concurrent list traversal may + * still observe the entry, but since the refcount is zero, this entry + * will no longer be considered as valid. + */ + list_del_rcu(&stack->hash_list); + + /* + * Due to being used from constrained contexts such as the allocators, + * NMI, or even RCU itself, stack depot cannot rely on primitives that + * would sleep (such as synchronize_rcu()) or recursively call into + * stack depot again (such as call_rcu()). + * + * Instead, get an RCU cookie, so that we can ensure this entry isn't + * moved onto another list until the next grace period, and concurrent + * RCU list traversal remains safe. + */ + stack->rcu_state = get_state_synchronize_rcu(); + + /* + * Add the entry to the freelist tail, so that older entries are + * considered first - their RCU cookie is more likely to no longer be + * associated with the current grace period. + */ + list_add_tail(&stack->free_list, &free_stacks); counters[DEPOT_COUNTER_FREELIST_SIZE]++; counters[DEPOT_COUNTER_FREES]++; counters[DEPOT_COUNTER_INUSE]--; + + printk_deferred_exit(); + raw_spin_unlock_irqrestore(&pool_lock, flags); } /* Calculates the hash for a stack. */ @@ -479,22 +570,52 @@ int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, /* Finds a stack in a bucket of the hash table. */ static inline struct stack_record *find_stack(struct list_head *bucket, - unsigned long *entries, int size, - u32 hash) + unsigned long *entries, int size, + u32 hash, depot_flags_t flags) { - struct list_head *pos; - struct stack_record *found; + struct stack_record *stack, *ret = NULL; - lockdep_assert_held(&pool_rwlock); + /* + * Stack depot may be used from instrumentation that instruments RCU or + * tracing itself; use variant that does not call into RCU and cannot be + * traced. + * + * Note: Such use cases must take care when using refcounting to evict + * unused entries, because the stack record free-then-reuse code paths + * do call into RCU. + */ + rcu_read_lock_sched_notrace(); - list_for_each(pos, bucket) { - found = list_entry(pos, struct stack_record, list); - if (found->hash == hash && - found->size == size && - !stackdepot_memcmp(entries, found->entries, size)) - return found; + list_for_each_entry_rcu(stack, bucket, hash_list) { + if (stack->hash != hash || stack->size != size) + continue; + + /* + * This may race with depot_free_stack() accessing the freelist + * management state unioned with @entries. The refcount is zero + * in that case and the below refcount_inc_not_zero() will fail. + */ + if (data_race(stackdepot_memcmp(entries, stack->entries, size))) + continue; + + /* + * Try to increment refcount. If this succeeds, the stack record + * is valid and has not yet been freed. + * + * If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior + * to then call stack_depot_put() later, and we can assume that + * a stack record is never placed back on the freelist. + */ + if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count)) + continue; + + ret = stack; + break; } - return NULL; + + rcu_read_unlock_sched_notrace(); + + return ret; } depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, @@ -508,7 +629,6 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, struct page *page = NULL; void *prealloc = NULL; bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC; - bool need_alloc = false; unsigned long flags; u32 hash; @@ -531,31 +651,16 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, hash = hash_stack(entries, nr_entries); bucket = &stack_table[hash & stack_hash_mask]; - read_lock_irqsave(&pool_rwlock, flags); - printk_deferred_enter(); - - /* Fast path: look the stack trace up without full locking. */ - found = find_stack(bucket, entries, nr_entries, hash); - if (found) { - if (depot_flags & STACK_DEPOT_FLAG_GET) - refcount_inc(&found->count); - printk_deferred_exit(); - read_unlock_irqrestore(&pool_rwlock, flags); + /* Fast path: look the stack trace up without locking. */ + found = find_stack(bucket, entries, nr_entries, hash, depot_flags); + if (found) goto exit; - } - - /* Take note if another stack pool needs to be allocated. */ - if (new_pool_required) - need_alloc = true; - - printk_deferred_exit(); - read_unlock_irqrestore(&pool_rwlock, flags); /* * Allocate memory for a new pool if required now: * we won't be able to do that under the lock. */ - if (unlikely(can_alloc && need_alloc)) { + if (unlikely(can_alloc && READ_ONCE(new_pool_required))) { /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic @@ -569,31 +674,36 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, prealloc = page_address(page); } - write_lock_irqsave(&pool_rwlock, flags); + raw_spin_lock_irqsave(&pool_lock, flags); printk_deferred_enter(); - found = find_stack(bucket, entries, nr_entries, hash); + /* Try to find again, to avoid concurrently inserting duplicates. */ + found = find_stack(bucket, entries, nr_entries, hash, depot_flags); if (!found) { struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc); if (new) { - list_add(&new->list, bucket); + /* + * This releases the stack record into the bucket and + * makes it visible to readers in find_stack(). + */ + list_add_rcu(&new->hash_list, bucket); found = new; } - } else { - if (depot_flags & STACK_DEPOT_FLAG_GET) - refcount_inc(&found->count); + } + + if (prealloc) { /* - * Stack depot already contains this stack trace, but let's - * keep the preallocated memory for future. + * Either stack depot already contains this stack trace, or + * depot_alloc_stack() did not consume the preallocated memory. + * Try to keep the preallocated memory for future. */ - if (prealloc) - depot_keep_new_pool(&prealloc); + depot_keep_new_pool(&prealloc); } printk_deferred_exit(); - write_unlock_irqrestore(&pool_rwlock, flags); + raw_spin_unlock_irqrestore(&pool_lock, flags); exit: if (prealloc) { /* Stack depot didn't use this memory, free it. */ @@ -618,7 +728,6 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries) { struct stack_record *stack; - unsigned long flags; *entries = NULL; /* @@ -630,13 +739,13 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, if (!handle || stack_depot_disabled) return 0; - read_lock_irqsave(&pool_rwlock, flags); - printk_deferred_enter(); - stack = depot_fetch_stack(handle); - - printk_deferred_exit(); - read_unlock_irqrestore(&pool_rwlock, flags); + /* + * Should never be NULL, otherwise this is a use-after-put (or just a + * corrupt handle). + */ + if (WARN(!stack, "corrupt handle or use after stack_depot_put()")) + return 0; *entries = stack->entries; return stack->size; @@ -646,29 +755,20 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch); void stack_depot_put(depot_stack_handle_t handle) { struct stack_record *stack; - unsigned long flags; if (!handle || stack_depot_disabled) return; - write_lock_irqsave(&pool_rwlock, flags); - printk_deferred_enter(); - stack = depot_fetch_stack(handle); - if (WARN_ON(!stack)) - goto out; + /* + * Should always be able to find the stack record, otherwise this is an + * unbalanced put attempt (or corrupt handle). + */ + if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()")) + return; - if (refcount_dec_and_test(&stack->count)) { - /* Unlink stack from the hash table. */ - list_del(&stack->list); - - /* Free stack. */ + if (refcount_dec_and_test(&stack->count)) depot_free_stack(stack); - } - -out: - printk_deferred_exit(); - write_unlock_irqrestore(&pool_rwlock, flags); } EXPORT_SYMBOL_GPL(stack_depot_put); From 0fe8ff51efb3fe5cb25da13229f95aa709613cb3 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 17 Jan 2024 18:21:52 +0000 Subject: [PATCH 211/347] MAINTAINERS: supplement of zswap maintainers update As discussed on the mailing list [1], merge the zpool maintainers entry into the zswap one. Also, add CREDITS entries for previous zswap/zpool maintainers. [1] https://lore.kernel.org/linux-mm/CAJD7tkYx4YWhGoVwnSeGc8dY_1aRRxxg8PzWBV==A6iqG_OgFw@mail.gmail.com/ Link: https://lkml.kernel.org/r/20240117182152.1439822-1-yosryahmed@google.com Signed-off-by: Yosry Ahmed Acked-by: Nhat Pham Acked-by: Dan Streetman Acked-by: Seth Jennings Acked-by: Johannes Weiner Cc: Vitaly Wool Signed-off-by: Andrew Morton --- CREDITS | 13 +++++++++++++ MAINTAINERS | 9 ++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/CREDITS b/CREDITS index 5797e8f7e92b..df8d6946739f 100644 --- a/CREDITS +++ b/CREDITS @@ -2161,6 +2161,19 @@ N: Mike Kravetz E: mike.kravetz@oracle.com D: Maintenance and development of the hugetlb subsystem +N: Seth Jennings +E: sjenning@redhat.com +D: Creation and maintenance of zswap + +N: Dan Streetman +E: ddstreet@ieee.org +D: Maintenance and development of zswap +D: Creation and maintenance of the zpool API + +N: Vitaly Wool +E: vitaly.wool@konsulko.com +D: Maintenance and development of zswap + N: Andreas S. Krebs E: akrebs@altavista.net D: CYPRESS CY82C693 chipset IDE, Digital's PC-Alpha 164SX boards diff --git a/MAINTAINERS b/MAINTAINERS index 8c756737491e..fecebfc4c0dc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24341,13 +24341,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs.git F: Documentation/filesystems/zonefs.rst F: fs/zonefs/ -ZPOOL COMPRESSED PAGE STORAGE API -M: Dan Streetman -L: linux-mm@kvack.org -S: Maintained -F: include/linux/zpool.h -F: mm/zpool.c - ZR36067 VIDEO FOR LINUX DRIVER M: Corentin Labbe L: mjpeg-users@lists.sourceforge.net @@ -24399,7 +24392,9 @@ M: Nhat Pham L: linux-mm@kvack.org S: Maintained F: Documentation/admin-guide/mm/zswap.rst +F: include/linux/zpool.h F: include/linux/zswap.h +F: mm/zpool.c F: mm/zswap.c THE REST From 91b80cc5b39f00399e8e2d17527cad2c7fa535e2 Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Fri, 19 Jan 2024 06:14:29 -0700 Subject: [PATCH 212/347] selftests: mm: fix map_hugetlb failure on 64K page size systems On systems with 64k page size and 512M huge page sizes, the allocation and test succeeds but errors out at the munmap. As the comment states, munmap will failure if its not HUGEPAGE aligned. This is due to the length of the mapping being 1/2 the size of the hugepage causing the munmap to not be hugepage aligned. Fix this by making the mapping length the full hugepage if the hugepage is larger than the length of the mapping. Link: https://lkml.kernel.org/r/20240119131429.172448-1-npache@redhat.com Signed-off-by: Nico Pache Cc: Donet Tom Cc: Shuah Khan Cc: Christophe Leroy Cc: Michael Ellerman Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/map_hugetlb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c index 193281560b61..86e8f2048a40 100644 --- a/tools/testing/selftests/mm/map_hugetlb.c +++ b/tools/testing/selftests/mm/map_hugetlb.c @@ -15,6 +15,7 @@ #include #include #include +#include "vm_util.h" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -58,10 +59,16 @@ int main(int argc, char **argv) { void *addr; int ret; + size_t hugepage_size; size_t length = LENGTH; int flags = FLAGS; int shift = 0; + hugepage_size = default_huge_page_size(); + /* munmap with fail if the length is not page aligned */ + if (hugepage_size > length) + length = hugepage_size; + if (argc > 1) length = atol(argv[1]) << 20; if (argc > 2) { From 52e63d67b5bb423b33d7a262ac7f8bd375a90145 Mon Sep 17 00:00:00 2001 From: Audra Mitchell Date: Fri, 19 Jan 2024 15:58:01 -0500 Subject: [PATCH 213/347] selftests/mm: Update va_high_addr_switch.sh to check CPU for la57 flag In order for the page table level 5 to be in use, the CPU must have the setting enabled in addition to the CONFIG option. Check for the flag to be set to avoid false test failures on systems that do not have this cpu flag set. The test does a series of mmap calls including three using the MAP_FIXED flag and specifying an address that is 1<<47 or 1<<48. These addresses are only available if you are using level 5 page tables, which requires both the CPU to have the capabiltiy (la57 flag) and the kernel to be configured. Currently the test only checks for the kernel configuration option, so this test can still report a false positive. Here are the three failing lines: $ ./va_high_addr_switch | grep FAILED mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED): 0xffffffffffffffff - FAILED mmap(HIGH_ADDR, MAP_FIXED): 0xffffffffffffffff - FAILED mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED): 0xffffffffffffffff - FAILED I thought (for about a second) refactoring the test so that these three mmap calls will only be run on systems with the level 5 page tables available, but the whole point of the test is to check the level 5 feature... Link: https://lkml.kernel.org/r/20240119205801.62769-1-audra@redhat.com Fixes: 4f2930c6718a ("selftests/vm: only run 128TBswitch with 5-level paging") Signed-off-by: Audra Mitchell Cc: Rafael Aquini Cc: Shuah Khan Cc: Adam Sindelar Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/va_high_addr_switch.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 45cae7cab27e..a0a75f302904 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -29,9 +29,15 @@ check_supported_x86_64() # See man 1 gzip under '-f'. local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;} + else {print 1}; exit}' /proc/cpuinfo 2>/dev/null) + if [[ "${pg_table_levels}" -lt 5 ]]; then echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" exit $ksft_skip + elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then + echo "$0: CPU does not have the necessary la57 flag to support page table level 5" + exit $ksft_skip fi } From db44c658f798ad907219f15e033229b8d1aadb93 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 22 Jan 2024 18:54:07 +0100 Subject: [PATCH 214/347] mm/huge_memory: fix folio_set_dirty() vs. folio_mark_dirty() The correct folio replacement for "set_page_dirty()" is "folio_mark_dirty()", not "folio_set_dirty()". Using the latter won't properly inform the FS using the dirty_folio() callback. This has been found by code inspection, but likely this can result in some real trouble. Link: https://lkml.kernel.org/r/20240122175407.307992-1-david@redhat.com Fixes: a8e61d584eda0 ("mm/huge_memory: page_remove_rmap() -> folio_remove_rmap_pmd()") Signed-off-by: David Hildenbrand Cc: Ryan Roberts Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/huge_memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 94ef5c02b459..5f31e51f2235 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2437,7 +2437,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, page = pmd_page(old_pmd); folio = page_folio(page); if (!folio_test_dirty(folio) && pmd_dirty(old_pmd)) - folio_set_dirty(folio); + folio_mark_dirty(folio); if (!folio_test_referenced(folio) && pmd_young(old_pmd)) folio_set_referenced(folio); folio_remove_rmap_pmd(folio, page, vma); @@ -3563,7 +3563,7 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, } if (pmd_dirty(pmdval)) - folio_set_dirty(folio); + folio_mark_dirty(folio); if (pmd_write(pmdval)) entry = make_writable_migration_entry(page_to_pfn(page)); else if (anon_exclusive) From e4e3df290f65da6cb27dac1309389c916f27db1a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 22 Jan 2024 18:17:51 +0100 Subject: [PATCH 215/347] mm/memory: fix folio_set_dirty() vs. folio_mark_dirty() in zap_pte_range() The correct folio replacement for "set_page_dirty()" is "folio_mark_dirty()", not "folio_set_dirty()". Using the latter won't properly inform the FS using the dirty_folio() callback. This has been found by code inspection, but likely this can result in some real trouble when zapping dirty PTEs that point at clean pagecache folios. Yuezhang Mo said: "Without this fix, testing the latest exfat with xfstests, test cases generic/029 and generic/030 will fail." Link: https://lkml.kernel.org/r/20240122171751.272074-1-david@redhat.com Fixes: c46265030b0f ("mm/memory: page_remove_rmap() -> folio_remove_rmap_pte()") Signed-off-by: David Hildenbrand Reported-by: Ryan Roberts Closes: https://lkml.kernel.org/r/2445cedb-61fb-422c-8bfb-caf0a2beed62@arm.com Reviewed-by: Ryan Roberts Cc: Matthew Wilcox (Oracle) Reviewed-by: Yuezhang Mo Signed-off-by: Andrew Morton --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 7e1f4849463a..89bcae0b224d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1464,7 +1464,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, delay_rmap = 0; if (!folio_test_anon(folio)) { if (pte_dirty(ptent)) { - folio_set_dirty(folio); + folio_mark_dirty(folio); if (tlb_delay_rmap(tlb)) { delay_rmap = 1; force_flush = 1; From 6f9dc684cae638dda0570154509884ee78d0f75c Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 22 Jan 2024 09:52:01 -0800 Subject: [PATCH 216/347] scs: add CONFIG_MMU dependency for vfree_atomic() The shadow call stack implementation fails to build without CONFIG_MMU: ld.lld: error: undefined symbol: vfree_atomic >>> referenced by scs.c >>> kernel/scs.o:(scs_free) in archive vmlinux.a Link: https://lkml.kernel.org/r/20240122175204.2371009-1-samuel.holland@sifive.com Fixes: a2abe7cbd8fe ("scs: switch to vmapped shadow stacks") Signed-off-by: Samuel Holland Reviewed-by: Sami Tolvanen Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- arch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/Kconfig b/arch/Kconfig index c91917b50873..a5af0edd3eb8 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -673,6 +673,7 @@ config SHADOW_CALL_STACK bool "Shadow Call Stack" depends on ARCH_SUPPORTS_SHADOW_CALL_STACK depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER + depends on MMU help This option enables the compiler's Shadow Call Stack, which uses a shadow stack to protect function return addresses from From d021b442cf312664811783e92b3d5e4548e92a53 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 22 Jan 2024 12:05:54 +0000 Subject: [PATCH 217/347] selftests/mm: ksm_tests should only MADV_HUGEPAGE valid memory ksm_tests was previously mmapping a region of memory, aligning the returned pointer to a PMD boundary, then setting MADV_HUGEPAGE, but was setting it past the end of the mmapped area due to not taking the pointer alignment into consideration. Fix this behaviour. Up until commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries"), this buggy behavior was (usually) masked because the alignment difference was always less than PMD-size. But since the mentioned commit, `ksm_tests -H -s 100` started failing. Link: https://lkml.kernel.org/r/20240122120554.3108022-1-ryan.roberts@arm.com Fixes: 325254899684 ("selftests: vm: add KSM huge pages merging time test") Signed-off-by: Ryan Roberts Cc: Pedro Demarchi Gomes Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/ksm_tests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index 380b691d3eb9..b748c48908d9 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -566,7 +566,7 @@ static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot, if (map_ptr_orig == MAP_FAILED) err(2, "initial mmap"); - if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE)) + if (madvise(map_ptr, len, MADV_HUGEPAGE)) err(2, "MADV_HUGEPAGE"); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); From 67695f18d55924b2013534ef3bdc363bc9e14605 Mon Sep 17 00:00:00 2001 From: Lokesh Gidra Date: Wed, 17 Jan 2024 14:37:29 -0800 Subject: [PATCH 218/347] userfaultfd: fix mmap_changing checking in mfill_atomic_hugetlb In mfill_atomic_hugetlb(), mmap_changing isn't being checked again if we drop mmap_lock and reacquire it. When the lock is not held, mmap_changing could have been incremented. This is also inconsistent with the behavior in mfill_atomic(). Link: https://lkml.kernel.org/r/20240117223729.1444522-1-lokeshgidra@google.com Fixes: df2cc96e77011 ("userfaultfd: prevent non-cooperative events vs mcopy_atomic races") Signed-off-by: Lokesh Gidra Cc: Andrea Arcangeli Cc: Mike Rapoport Cc: Axel Rasmussen Cc: Brian Geffon Cc: David Hildenbrand Cc: Jann Horn Cc: Kalesh Singh Cc: Matthew Wilcox (Oracle) Cc: Nicolas Geoffray Cc: Peter Xu Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 20e3b0d9cf7e..75fcf1f783bc 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -357,6 +357,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb( unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, uffd_flags_t flags) { struct mm_struct *dst_mm = dst_vma->vm_mm; @@ -472,6 +473,15 @@ retry: goto out; } mmap_read_lock(dst_mm); + /* + * If memory mappings are changing because of non-cooperative + * operation (e.g. mremap) running in parallel, bail out and + * request the user to retry later + */ + if (mmap_changing && atomic_read(mmap_changing)) { + err = -EAGAIN; + break; + } dst_vma = NULL; goto retry; @@ -506,6 +516,7 @@ extern ssize_t mfill_atomic_hugetlb(struct vm_area_struct *dst_vma, unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, uffd_flags_t flags); #endif /* CONFIG_HUGETLB_PAGE */ @@ -622,8 +633,8 @@ retry: * If this is a HUGETLB vma, pass off to appropriate routine */ if (is_vm_hugetlb_page(dst_vma)) - return mfill_atomic_hugetlb(dst_vma, dst_start, - src_start, len, flags); + return mfill_atomic_hugetlb(dst_vma, dst_start, src_start, + len, mmap_changing, flags); if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) goto out_unlock; From 4ef9ad19e17676b9ef071309bc62020e2373705d Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 18 Jan 2024 10:05:05 -0800 Subject: [PATCH 219/347] mm: huge_memory: don't force huge page alignment on 32 bit commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") caused two issues [1] [2] reported on 32 bit system or compat userspace. It doesn't make too much sense to force huge page alignment on 32 bit system due to the constrained virtual address space. [1] https://lore.kernel.org/linux-mm/d0a136a0-4a31-46bc-adf4-2db109a61672@kernel.org/ [2] https://lore.kernel.org/linux-mm/CAJuCfpHXLdQy1a2B6xN2d7quTYwg2OoZseYPZTRpU0eHHKD-sQ@mail.gmail.com/ Link: https://lkml.kernel.org/r/20240118180505.2914778-1-shy828301@gmail.com Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") Signed-off-by: Yang Shi Reported-by: Jiri Slaby Reported-by: Suren Baghdasaryan Tested-by: Jiri Slaby Tested-by: Suren Baghdasaryan Reviewed-by: Matthew Wilcox (Oracle) Cc: Rik van Riel Cc: Christopher Lameter Signed-off-by: Andrew Morton --- mm/huge_memory.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5f31e51f2235..172b85208276 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -811,6 +812,9 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, loff_t off_align = round_up(off, size); unsigned long len_pad, ret; + if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) + return 0; + if (off_end <= off_align || (off_end - off_align) < size) return 0; From 5056c596c3d1848021a4eaa76ee42f4c05c50346 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 26 Jan 2024 16:22:07 +0800 Subject: [PATCH 220/347] LoongArch/smp: Call rcutree_report_cpu_starting() at tlb_init() Machines which have more than 8 nodes fail to boot SMP after commit a2ccf46333d7b2cf96 ("LoongArch/smp: Call rcutree_report_cpu_starting() earlier"). Because such machines use tlb-based per-cpu base address rather than dmw-based per-cpu base address, resulting per-cpu variables can only be accessed after tlb_init(). But rcutree_report_cpu_starting() is now called before tlb_init() and accesses per-cpu variables indeed. Since the original patch want to avoid the lockdep warning caused by page allocation in tlb_init(), we can move rcutree_report_cpu_starting() to tlb_init() where after tlb exception configuration but before page allocation. Fixes: a2ccf46333d7b2cf96 ("LoongArch/smp: Call rcutree_report_cpu_starting() earlier") Signed-off-by: Huacai Chen --- arch/loongarch/kernel/smp.c | 1 - arch/loongarch/mm/tlb.c | 16 ++++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index a16e3dbe9f09..2b49d30eb7c0 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -509,7 +509,6 @@ asmlinkage void start_secondary(void) sync_counter(); cpu = raw_smp_processor_id(); set_my_cpu_offset(per_cpu_offset(cpu)); - rcutree_report_cpu_starting(cpu); cpu_probe(); constant_clockevent_init(); diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 2c0a411f23aa..0b95d32b30c9 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -284,12 +284,16 @@ static void setup_tlb_handler(int cpu) set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); - } + } else { + int vec_sz __maybe_unused; + void *addr __maybe_unused; + struct page *page __maybe_unused; + + /* Avoid lockdep warning */ + rcutree_report_cpu_starting(cpu); + #ifdef CONFIG_NUMA - else { - void *addr; - struct page *page; - const int vec_sz = sizeof(exception_handlers); + vec_sz = sizeof(exception_handlers); if (pcpu_handlers[cpu]) return; @@ -305,8 +309,8 @@ static void setup_tlb_handler(int cpu) csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY); csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY); csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY); - } #endif + } } void tlb_init(int cpu) From 614f362918c782d1cfa4ee50f96072a95eac264e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 26 Jan 2024 16:22:07 +0800 Subject: [PATCH 221/347] LoongArch: KVM: Fix build due to API changes Commit 8569992d64b8f750e34b7858eac ("KVM: Use gfn instead of hva for mmu_notifier_retry") replaces mmu_invalidate_retry_hva() usage with mmu_invalidate_retry_gfn() for X86, LoongArch also need similar changes to fix build. Signed-off-by: Huacai Chen --- arch/loongarch/kvm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 915f17527893..50a6acd7ffe4 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -675,7 +675,7 @@ static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot, * * There are several ways to safely use this helper: * - * - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before + * - Check mmu_invalidate_retry_gfn() after grabbing the mapping level, before * consuming it. In this case, mmu_lock doesn't need to be held during the * lookup, but it does need to be held while checking the MMU notifier. * @@ -855,7 +855,7 @@ retry: /* Check if an invalidation has taken place since we got pfn */ spin_lock(&kvm->mmu_lock); - if (mmu_invalidate_retry_hva(kvm, mmu_seq, hva)) { + if (mmu_invalidate_retry_gfn(kvm, mmu_seq, gfn)) { /* * This can happen when mappings are changed asynchronously, but * also synchronously if a COW is triggered by From 48ef9e87b407f89f230f804815af7ac2031ec17a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 26 Jan 2024 16:22:07 +0800 Subject: [PATCH 222/347] LoongArch: KVM: Add returns to SIMD stubs The stubs for kvm_own/lsx()/kvm_own_lasx() when CONFIG_CPU_HAS_LSX or CONFIG_CPU_HAS_LASX is not defined should have a return value since they return an int, so add "return -EINVAL;" to the stubs. Fixes the build error: In file included from ../arch/loongarch/include/asm/kvm_csr.h:12, from ../arch/loongarch/kvm/interrupt.c:8: ../arch/loongarch/include/asm/kvm_vcpu.h: In function 'kvm_own_lasx': ../arch/loongarch/include/asm/kvm_vcpu.h:73:39: error: no return statement in function returning non-void [-Werror=return-type] 73 | static inline int kvm_own_lasx(struct kvm_vcpu *vcpu) { } Fixes: db1ecca22edf ("LoongArch: KVM: Add LSX (128bit SIMD) support") Fixes: 118e10cd893d ("LoongArch: KVM: Add LASX (256bit SIMD) support") Signed-off-by: Randy Dunlap Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_vcpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index e71ceb88f29e..0cb4fdb8a9b5 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -60,7 +60,7 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu); void kvm_save_lsx(struct loongarch_fpu *fpu); void kvm_restore_lsx(struct loongarch_fpu *fpu); #else -static inline int kvm_own_lsx(struct kvm_vcpu *vcpu) { } +static inline int kvm_own_lsx(struct kvm_vcpu *vcpu) { return -EINVAL; } static inline void kvm_save_lsx(struct loongarch_fpu *fpu) { } static inline void kvm_restore_lsx(struct loongarch_fpu *fpu) { } #endif @@ -70,7 +70,7 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu); void kvm_save_lasx(struct loongarch_fpu *fpu); void kvm_restore_lasx(struct loongarch_fpu *fpu); #else -static inline int kvm_own_lasx(struct kvm_vcpu *vcpu) { } +static inline int kvm_own_lasx(struct kvm_vcpu *vcpu) { return -EINVAL; } static inline void kvm_save_lasx(struct loongarch_fpu *fpu) { } static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { } #endif From e1d54d153fc3e697b841999df7cbad51492def8e Mon Sep 17 00:00:00 2001 From: Damian Muszynski Date: Fri, 19 Jan 2024 17:12:38 +0100 Subject: [PATCH 223/347] crypto: qat - fix arbiter mapping generation algorithm for QAT 402xx The commit "crypto: qat - generate dynamically arbiter mappings" introduced a regression on qat_402xx devices. This is reported when the driver probes the device, as indicated by the following error messages: 4xxx 0000:0b:00.0: enabling device (0140 -> 0142) 4xxx 0000:0b:00.0: Generate of the thread to arbiter map failed 4xxx 0000:0b:00.0: Direct firmware load for qat_402xx_mmp.bin failed with error -2 The root cause of this issue was the omission of a necessary function pointer required by the mapping algorithm during the implementation. Fix it by adding the missing function pointer. Fixes: 5da6a2d5353e ("crypto: qat - generate dynamically arbiter mappings") Signed-off-by: Damian Muszynski Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index 479062aa5e6b..94a0ebb03d8c 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -463,6 +463,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) hw_data->fw_name = ADF_402XX_FW; hw_data->fw_mmp_name = ADF_402XX_MMP; hw_data->uof_get_name = uof_get_name_402xx; + hw_data->get_ena_thd_mask = get_ena_thd_mask; break; case ADF_401XX_PCI_DEVICE_ID: hw_data->fw_name = ADF_4XXX_FW; From c5a2f74db71a849f3a60bc153d684d6d28a0c665 Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Thu, 18 Jan 2024 14:55:57 +0530 Subject: [PATCH 224/347] crypto: caam - fix asynchronous hash ahash_alg->setkey is updated to ahash_nosetkey in ahash.c so checking setkey() function to determine hmac algorithm is not valid. to fix this added is_hmac variable in structure caam_hash_alg to determine whether the algorithm is hmac or not. Fixes: 2f1f34c1bf7b ("crypto: ahash - optimize performance when wrapping shash") Signed-off-by: Gaurav Jain Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg_qi2.c | 7 +++++-- drivers/crypto/caam/caamhash.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index a148ff1f0872..a4f6884416a0 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -4545,6 +4545,7 @@ struct caam_hash_alg { struct list_head entry; struct device *dev; int alg_type; + bool is_hmac; struct ahash_alg ahash_alg; }; @@ -4571,7 +4572,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) ctx->dev = caam_hash->dev; - if (alg->setkey) { + if (caam_hash->is_hmac) { ctx->adata.key_dma = dma_map_single_attrs(ctx->dev, ctx->key, ARRAY_SIZE(ctx->key), DMA_TO_DEVICE, @@ -4611,7 +4612,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) * For keyed hash algorithms shared descriptors * will be created later in setkey() callback */ - return alg->setkey ? 0 : ahash_set_sh_desc(ahash); + return caam_hash->is_hmac ? 0 : ahash_set_sh_desc(ahash); } static void caam_hash_cra_exit(struct crypto_tfm *tfm) @@ -4646,12 +4647,14 @@ static struct caam_hash_alg *caam_hash_alloc(struct device *dev, template->hmac_name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->hmac_driver_name); + t_alg->is_hmac = true; } else { snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->driver_name); t_alg->ahash_alg.setkey = NULL; + t_alg->is_hmac = false; } alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 290c8500c247..fdd724228c2f 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -1753,6 +1753,7 @@ static struct caam_hash_template driver_hash[] = { struct caam_hash_alg { struct list_head entry; int alg_type; + bool is_hmac; struct ahash_engine_alg ahash_alg; }; @@ -1804,7 +1805,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) } else { if (priv->era >= 6) { ctx->dir = DMA_BIDIRECTIONAL; - ctx->key_dir = alg->setkey ? DMA_TO_DEVICE : DMA_NONE; + ctx->key_dir = caam_hash->is_hmac ? DMA_TO_DEVICE : DMA_NONE; } else { ctx->dir = DMA_TO_DEVICE; ctx->key_dir = DMA_NONE; @@ -1862,7 +1863,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) * For keyed hash algorithms shared descriptors * will be created later in setkey() callback */ - return alg->setkey ? 0 : ahash_set_sh_desc(ahash); + return caam_hash->is_hmac ? 0 : ahash_set_sh_desc(ahash); } static void caam_hash_cra_exit(struct crypto_tfm *tfm) @@ -1915,12 +1916,14 @@ caam_hash_alloc(struct caam_hash_template *template, template->hmac_name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->hmac_driver_name); + t_alg->is_hmac = true; } else { snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->driver_name); halg->setkey = NULL; + t_alg->is_hmac = false; } alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; From 96204e15310c218fd9355bdcacd02fed1d18070e Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Tue, 23 Jan 2024 17:14:20 +0000 Subject: [PATCH 225/347] mm: thp_get_unmapped_area must honour topdown preference The addition of commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") caused the "virtual_address_range" mm selftest to start failing on arm64. Let's fix that regression. There were 2 visible problems when running the test; 1) it takes much longer to execute, and 2) the test fails. Both are related: The (first part of the) test allocates as many 1GB anonymous blocks as it can in the low 256TB of address space, passing NULL as the addr hint to mmap. Before the faulty patch, all allocations were abutted and contained in a single, merged VMA. However, after this patch, each allocation is in its own VMA, and there is a 2M gap between each VMA. This causes the 2 problems in the test: 1) mmap becomes MUCH slower because there are so many VMAs to check to find a new 1G gap. 2) mmap fails once it hits the VMA limit (/proc/sys/vm/max_map_count). Hitting this limit then causes a subsequent calloc() to fail, which causes the test to fail. The problem is that arm64 (unlike x86) selects ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT. But __thp_get_unmapped_area() allocates len+2M then always aligns to the bottom of the discovered gap. That causes the 2M hole. Fix this by detecting cases where we can still achive the alignment goal when moved to the top of the allocated area, if configured to prefer top-down allocation. While we are at it, fix thp_get_unmapped_area's use of pgoff, which should always be zero for anonymous mappings. Prior to the faulty change, while it was possible for user space to pass in pgoff!=0, the old mm->get_unmapped_area() handler would not use it. thp_get_unmapped_area() does use it, so let's explicitly zero it before calling the handler. This should also be the correct behavior for arches that define their own get_unmapped_area() handler. Link: https://lkml.kernel.org/r/20240123171420.3970220-1-ryan.roberts@arm.com Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") Closes: https://lore.kernel.org/linux-mm/1e8f5ac7-54ce-433a-ae53-81522b2320e1@arm.com/ Signed-off-by: Ryan Roberts Reviewed-by: Yang Shi Cc: Matthew Wilcox (Oracle) Cc: Rik van Riel Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 10 ++++++++-- mm/mmap.c | 6 ++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 172b85208276..94c958f7ebb5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -810,7 +810,7 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, { loff_t off_end = off + len; loff_t off_align = round_up(off, size); - unsigned long len_pad, ret; + unsigned long len_pad, ret, off_sub; if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) return 0; @@ -839,7 +839,13 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, if (ret == addr) return addr; - ret += (off - ret) & (size - 1); + off_sub = (off - ret) & (size - 1); + + if (current->mm->get_unmapped_area == arch_get_unmapped_area_topdown && + !off_sub) + return ret + size; + + ret += off_sub; return ret; } diff --git a/mm/mmap.c b/mm/mmap.c index b78e83d351d2..d89770eaab6b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1825,15 +1825,17 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, /* * mmap_region() will call shmem_zero_setup() to create a file, * so use shmem's get_unmapped_area in case it can be huge. - * do_mmap() will clear pgoff, so match alignment. */ - pgoff = 0; get_area = shmem_get_unmapped_area; } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { /* Ensures that larger anonymous mappings are THP aligned. */ get_area = thp_get_unmapped_area; } + /* Always treat pgoff as zero for anonymous memory. */ + if (!file) + pgoff = 0; + addr = get_area(file, addr, len, pgoff, flags); if (IS_ERR_VALUE(addr)) return addr; From dd3c33ccbb8f0dc6a256dc55e7607569aea69721 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 23 Jan 2024 09:46:54 -0800 Subject: [PATCH 226/347] MIPS: BCM63XX: Fix missing prototypes Most of the symbols for which we do not have a prototype can actually be made static and for the few that cannot, there is already a declaration in a header for it. Signed-off-by: Florian Fainelli Signed-off-by: Thomas Bogendoerfer --- arch/mips/bcm63xx/boards/board_bcm963xx.c | 2 +- arch/mips/bcm63xx/dev-rng.c | 2 +- arch/mips/bcm63xx/dev-uart.c | 1 + arch/mips/bcm63xx/dev-wdt.c | 2 +- arch/mips/bcm63xx/irq.c | 2 +- arch/mips/bcm63xx/setup.c | 2 +- arch/mips/bcm63xx/timer.c | 2 +- 7 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c index 01aff80a5967..99f321b6e417 100644 --- a/arch/mips/bcm63xx/boards/board_bcm963xx.c +++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c @@ -702,7 +702,7 @@ static struct ssb_sprom bcm63xx_sprom = { .boardflags_hi = 0x0000, }; -int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out) +static int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out) { if (bus->bustype == SSB_BUSTYPE_PCI) { memcpy(out, &bcm63xx_sprom, sizeof(struct ssb_sprom)); diff --git a/arch/mips/bcm63xx/dev-rng.c b/arch/mips/bcm63xx/dev-rng.c index d277b4dc6c68..f94151f7c96f 100644 --- a/arch/mips/bcm63xx/dev-rng.c +++ b/arch/mips/bcm63xx/dev-rng.c @@ -26,7 +26,7 @@ static struct platform_device bcm63xx_rng_device = { .resource = rng_resources, }; -int __init bcm63xx_rng_register(void) +static int __init bcm63xx_rng_register(void) { if (!BCMCPU_IS_6368()) return -ENODEV; diff --git a/arch/mips/bcm63xx/dev-uart.c b/arch/mips/bcm63xx/dev-uart.c index 3bc7f3bfc9ad..5d6bf0445b29 100644 --- a/arch/mips/bcm63xx/dev-uart.c +++ b/arch/mips/bcm63xx/dev-uart.c @@ -10,6 +10,7 @@ #include #include #include +#include static struct resource uart0_resources[] = { { diff --git a/arch/mips/bcm63xx/dev-wdt.c b/arch/mips/bcm63xx/dev-wdt.c index 42130914a3c2..302bf7ed5ad5 100644 --- a/arch/mips/bcm63xx/dev-wdt.c +++ b/arch/mips/bcm63xx/dev-wdt.c @@ -34,7 +34,7 @@ static struct platform_device bcm63xx_wdt_device = { }, }; -int __init bcm63xx_wdt_register(void) +static int __init bcm63xx_wdt_register(void) { wdt_resources[0].start = bcm63xx_regset_address(RSET_WDT); wdt_resources[0].end = wdt_resources[0].start; diff --git a/arch/mips/bcm63xx/irq.c b/arch/mips/bcm63xx/irq.c index 2548013442f6..6240a8f88ea3 100644 --- a/arch/mips/bcm63xx/irq.c +++ b/arch/mips/bcm63xx/irq.c @@ -72,7 +72,7 @@ static inline int enable_irq_for_cpu(int cpu, struct irq_data *d, */ #define BUILD_IPIC_INTERNAL(width) \ -void __dispatch_internal_##width(int cpu) \ +static void __dispatch_internal_##width(int cpu) \ { \ u32 pending[width / 32]; \ unsigned int src, tgt; \ diff --git a/arch/mips/bcm63xx/setup.c b/arch/mips/bcm63xx/setup.c index d811e3e03f81..c13ddb544a23 100644 --- a/arch/mips/bcm63xx/setup.c +++ b/arch/mips/bcm63xx/setup.c @@ -159,7 +159,7 @@ void __init plat_mem_setup(void) board_setup(); } -int __init bcm63xx_register_devices(void) +static int __init bcm63xx_register_devices(void) { /* register gpiochip */ bcm63xx_gpio_init(); diff --git a/arch/mips/bcm63xx/timer.c b/arch/mips/bcm63xx/timer.c index a86065854c0c..74b83807df30 100644 --- a/arch/mips/bcm63xx/timer.c +++ b/arch/mips/bcm63xx/timer.c @@ -178,7 +178,7 @@ int bcm63xx_timer_set(int id, int monotonic, unsigned int countdown_us) EXPORT_SYMBOL(bcm63xx_timer_set); -int bcm63xx_timer_init(void) +static int bcm63xx_timer_init(void) { int ret, irq; u32 reg; From abcabb9e30a1f9a69c76776f8abffc31c377b542 Mon Sep 17 00:00:00 2001 From: Huang Pei Date: Tue, 23 Jan 2024 09:47:57 +0800 Subject: [PATCH 227/347] MIPS: reserve exception vector space ONLY ONCE "cpu_probe" is called both by BP and APs, but reserving exception vector (like 0x0-0x1000) called by "cpu_probe" need once and calling on APs is too late since memblock is unavailable at that time. So, reserve exception vector ONLY by BP. Suggested-by: Thomas Bogendoerfer Signed-off-by: Huang Pei Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/traps.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index dec6878b35f6..a1c1cb5de913 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2007,7 +2007,13 @@ unsigned long vi_handlers[64]; void reserve_exception_space(phys_addr_t addr, unsigned long size) { - memblock_reserve(addr, size); + /* + * reserve exception space on CPUs other than CPU0 + * is too late, since memblock is unavailable when APs + * up + */ + if (smp_processor_id() == 0) + memblock_reserve(addr, size); } void __init *set_except_vector(int n, void *addr) From ce7b1b97776ec0b068c4dd6b6dbb48ae09a23519 Mon Sep 17 00:00:00 2001 From: Huang Pei Date: Tue, 23 Jan 2024 09:47:58 +0800 Subject: [PATCH 228/347] MIPS: loongson64: set nid for reserved memblock region Commit 61167ad5fecd("mm: pass nid to reserve_bootmem_region()") reveals that reserved memblock regions have no valid node id set, just set it right since loongson64 firmware makes it clear in memory layout info. This works around booting failure on 3A1000+ since commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") under CONFIG_DEFERRED_STRUCT_PAGE_INIT. Signed-off-by: Huang Pei Signed-off-by: Thomas Bogendoerfer --- arch/mips/loongson64/init.c | 2 ++ arch/mips/loongson64/numa.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index f25caa6aa9d3..000ba91c0886 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -103,6 +103,8 @@ void __init szmem(unsigned int node) if (loongson_sysconf.vgabios_addr) memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr), SZ_256K); + /* set nid for reserved memory */ + memblock_set_node((u64)node << 44, (u64)(node+1) << 44, &memblock.reserved, node); } #ifndef CONFIG_NUMA diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index 8f61e93c0c5b..6345e096c532 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -132,6 +132,8 @@ static void __init node_mem_init(unsigned int node) /* Reserve pfn range 0~node[0]->node_start_pfn */ memblock_reserve(0, PAGE_SIZE * start_pfn); + /* set nid for reserved memory on node 0 */ + memblock_set_node(0, (u64)1 << 44, &memblock.reserved, 1); } } From 4bf2a626dc4bb46f0754d8ac02ec8584ff114ad5 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Mon, 22 Jan 2024 19:47:09 +0100 Subject: [PATCH 229/347] MIPS: lantiq: register smp_ops on non-smp platforms Lantiq uses a common kernel config for devices with 24Kc and 34Kc cores. The changes made previously to add support for interrupts on all cores work on 24Kc platforms with SMP disabled and 34Kc platforms with SMP enabled. This patch fixes boot issues on Danube (single core 24Kc) with SMP enabled. Fixes: 730320fd770d ("MIPS: lantiq: enable all hardware interrupts on second VPE") Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: Thomas Bogendoerfer --- arch/mips/lantiq/prom.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c index a3cf29365858..0c45767eacf6 100644 --- a/arch/mips/lantiq/prom.c +++ b/arch/mips/lantiq/prom.c @@ -108,10 +108,9 @@ void __init prom_init(void) prom_init_cmdline(); #if defined(CONFIG_MIPS_MT_SMP) - if (cpu_has_mipsmt) { - lantiq_smp_ops = vsmp_smp_ops; + lantiq_smp_ops = vsmp_smp_ops; + if (cpu_has_mipsmt) lantiq_smp_ops.init_secondary = lantiq_init_secondary; - register_smp_ops(&lantiq_smp_ops); - } + register_smp_ops(&lantiq_smp_ops); #endif } From cc4b2dd95f0d1eba8c691b36e8f4d1795582f1ff Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 25 Jan 2024 20:00:39 +0800 Subject: [PATCH 230/347] erofs: fix infinite loop due to a race of filling compressed_bvecs I encountered a race issue after lengthy (~594647 secs) stress tests on a 64k-page arm64 VM with several 4k-block EROFS images. The timing is like below: z_erofs_try_inplace_io z_erofs_fill_bio_vec cmpxchg(&compressed_bvecs[].page, NULL, ..) [access bufvec] compressed_bvecs[] = *bvec; Previously, z_erofs_submit_queue() just accessed bufvec->page only, so other fields in bufvec didn't matter. After the subpage block support is landed, .offset and .end can be used too, but filling bufvec isn't an atomic operation which can cause inconsistency. Let's use a spinlock to keep the atomicity of each bufvec. More specifically, just reuse the existing spinlock `pcl->obj.lockref.lock` since it's rarely used (also it takes a short time if even used) as long as the pcluster has a reference. Fixes: 192351616a9d ("erofs: support I/O submission for sub-page compressed blocks") Signed-off-by: Gao Xiang Reviewed-by: Yue Hu Reviewed-by: Sandeep Dhavale Link: https://lore.kernel.org/r/20240125120039.3228103-1-hsiangkao@linux.alibaba.com --- fs/erofs/zdata.c | 74 +++++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 583c062cd0e4..c1c77166b30f 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -563,21 +563,19 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; unsigned int i; - if (i_blocksize(fe->inode) != PAGE_SIZE) - return; - if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED) + if (i_blocksize(fe->inode) != PAGE_SIZE || + fe->mode < Z_EROFS_PCLUSTER_FOLLOWED) return; for (i = 0; i < pclusterpages; ++i) { struct page *page, *newpage; void *t; /* mark pages just found for debugging */ - /* the compressed page was loaded before */ + /* Inaccurate check w/o locking to avoid unneeded lookups */ if (READ_ONCE(pcl->compressed_bvecs[i].page)) continue; page = find_get_page(mc, pcl->obj.index + i); - if (page) { t = (void *)((unsigned long)page | 1); newpage = NULL; @@ -597,9 +595,13 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); t = (void *)((unsigned long)newpage | 1); } - - if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL, t)) + spin_lock(&pcl->obj.lockref.lock); + if (!pcl->compressed_bvecs[i].page) { + pcl->compressed_bvecs[i].page = t; + spin_unlock(&pcl->obj.lockref.lock); continue; + } + spin_unlock(&pcl->obj.lockref.lock); if (page) put_page(page); @@ -718,31 +720,25 @@ int erofs_init_managed_cache(struct super_block *sb) return 0; } -static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe, - struct z_erofs_bvec *bvec) -{ - struct z_erofs_pcluster *const pcl = fe->pcl; - - while (fe->icur > 0) { - if (!cmpxchg(&pcl->compressed_bvecs[--fe->icur].page, - NULL, bvec->page)) { - pcl->compressed_bvecs[fe->icur] = *bvec; - return true; - } - } - return false; -} - /* callers must be with pcluster lock held */ static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, struct z_erofs_bvec *bvec, bool exclusive) { + struct z_erofs_pcluster *pcl = fe->pcl; int ret; if (exclusive) { /* give priority for inplaceio to use file pages first */ - if (z_erofs_try_inplace_io(fe, bvec)) + spin_lock(&pcl->obj.lockref.lock); + while (fe->icur > 0) { + if (pcl->compressed_bvecs[--fe->icur].page) + continue; + pcl->compressed_bvecs[fe->icur] = *bvec; + spin_unlock(&pcl->obj.lockref.lock); return 0; + } + spin_unlock(&pcl->obj.lockref.lock); + /* otherwise, check if it can be used as a bvpage */ if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED && !fe->candidate_bvpage) @@ -1423,23 +1419,26 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, { gfp_t gfp = mapping_gfp_mask(mc); bool tocache = false; - struct z_erofs_bvec *zbv = pcl->compressed_bvecs + nr; + struct z_erofs_bvec zbv; struct address_space *mapping; - struct page *page, *oldpage; + struct page *page; int justfound, bs = i_blocksize(f->inode); /* Except for inplace pages, the entire page can be used for I/Os */ bvec->bv_offset = 0; bvec->bv_len = PAGE_SIZE; repeat: - oldpage = READ_ONCE(zbv->page); - if (!oldpage) + spin_lock(&pcl->obj.lockref.lock); + zbv = pcl->compressed_bvecs[nr]; + page = zbv.page; + justfound = (unsigned long)page & 1UL; + page = (struct page *)((unsigned long)page & ~1UL); + pcl->compressed_bvecs[nr].page = page; + spin_unlock(&pcl->obj.lockref.lock); + if (!page) goto out_allocpage; - justfound = (unsigned long)oldpage & 1UL; - page = (struct page *)((unsigned long)oldpage & ~1UL); bvec->bv_page = page; - DBG_BUGON(z_erofs_is_shortlived_page(page)); /* * Handle preallocated cached pages. We tried to allocate such pages @@ -1448,7 +1447,6 @@ repeat: */ if (page->private == Z_EROFS_PREALLOCATED_PAGE) { set_page_private(page, 0); - WRITE_ONCE(zbv->page, page); tocache = true; goto out_tocache; } @@ -1459,9 +1457,9 @@ repeat: * therefore it is impossible for `mapping` to be NULL. */ if (mapping && mapping != mc) { - if (zbv->offset < 0) - bvec->bv_offset = round_up(-zbv->offset, bs); - bvec->bv_len = round_up(zbv->end, bs) - bvec->bv_offset; + if (zbv.offset < 0) + bvec->bv_offset = round_up(-zbv.offset, bs); + bvec->bv_len = round_up(zbv.end, bs) - bvec->bv_offset; return; } @@ -1471,7 +1469,6 @@ repeat: /* the cached page is still in managed cache */ if (page->mapping == mc) { - WRITE_ONCE(zbv->page, page); /* * The cached page is still available but without a valid * `->private` pcluster hint. Let's reconnect them. @@ -1503,11 +1500,15 @@ repeat: put_page(page); out_allocpage: page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); - if (oldpage != cmpxchg(&zbv->page, oldpage, page)) { + spin_lock(&pcl->obj.lockref.lock); + if (pcl->compressed_bvecs[nr].page) { erofs_pagepool_add(&f->pagepool, page); + spin_unlock(&pcl->obj.lockref.lock); cond_resched(); goto repeat; } + pcl->compressed_bvecs[nr].page = page; + spin_unlock(&pcl->obj.lockref.lock); bvec->bv_page = page; out_tocache: if (!tocache || bs != PAGE_SIZE || @@ -1685,6 +1686,7 @@ submit_bio_retry: if (cur + bvec.bv_len > end) bvec.bv_len = end - cur; + DBG_BUGON(bvec.bv_len < sb->s_blocksize); if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len, bvec.bv_offset)) goto submit_bio_retry; From 1f4a994be2c3d13852fd5c1054f292bd303352cc Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Fri, 26 Jan 2024 17:20:00 +0800 Subject: [PATCH 231/347] riscv: dts: sophgo: separate sg2042 mtime and mtimecmp to fit aclint format Change the timer layout in the dtb to fit the format that needed by the SBI. Fixes: 967a94a92aaa ("riscv: dts: add initial Sophgo SG2042 SoC device tree") Reviewed-by: Chen Wang Reviewed-by: Guo Ren Signed-off-by: Inochi Amaoto Signed-off-by: Chen Wang Signed-off-by: Arnd Bergmann --- arch/riscv/boot/dts/sophgo/sg2042.dtsi | 80 +++++++++++++++----------- 1 file changed, 48 insertions(+), 32 deletions(-) diff --git a/arch/riscv/boot/dts/sophgo/sg2042.dtsi b/arch/riscv/boot/dts/sophgo/sg2042.dtsi index 93256540d078..ead1cc35d88b 100644 --- a/arch/riscv/boot/dts/sophgo/sg2042.dtsi +++ b/arch/riscv/boot/dts/sophgo/sg2042.dtsi @@ -93,144 +93,160 @@ <&cpu63_intc 3>; }; - clint_mtimer0: timer@70ac000000 { + clint_mtimer0: timer@70ac004000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac000000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac004000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu0_intc 7>, <&cpu1_intc 7>, <&cpu2_intc 7>, <&cpu3_intc 7>; }; - clint_mtimer1: timer@70ac010000 { + clint_mtimer1: timer@70ac014000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac010000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac014000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu4_intc 7>, <&cpu5_intc 7>, <&cpu6_intc 7>, <&cpu7_intc 7>; }; - clint_mtimer2: timer@70ac020000 { + clint_mtimer2: timer@70ac024000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac020000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac024000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu8_intc 7>, <&cpu9_intc 7>, <&cpu10_intc 7>, <&cpu11_intc 7>; }; - clint_mtimer3: timer@70ac030000 { + clint_mtimer3: timer@70ac034000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac030000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac034000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu12_intc 7>, <&cpu13_intc 7>, <&cpu14_intc 7>, <&cpu15_intc 7>; }; - clint_mtimer4: timer@70ac040000 { + clint_mtimer4: timer@70ac044000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac040000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac044000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu16_intc 7>, <&cpu17_intc 7>, <&cpu18_intc 7>, <&cpu19_intc 7>; }; - clint_mtimer5: timer@70ac050000 { + clint_mtimer5: timer@70ac054000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac050000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac054000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu20_intc 7>, <&cpu21_intc 7>, <&cpu22_intc 7>, <&cpu23_intc 7>; }; - clint_mtimer6: timer@70ac060000 { + clint_mtimer6: timer@70ac064000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac060000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac064000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu24_intc 7>, <&cpu25_intc 7>, <&cpu26_intc 7>, <&cpu27_intc 7>; }; - clint_mtimer7: timer@70ac070000 { + clint_mtimer7: timer@70ac074000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac070000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac074000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu28_intc 7>, <&cpu29_intc 7>, <&cpu30_intc 7>, <&cpu31_intc 7>; }; - clint_mtimer8: timer@70ac080000 { + clint_mtimer8: timer@70ac084000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac080000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac084000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu32_intc 7>, <&cpu33_intc 7>, <&cpu34_intc 7>, <&cpu35_intc 7>; }; - clint_mtimer9: timer@70ac090000 { + clint_mtimer9: timer@70ac094000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac090000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac094000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu36_intc 7>, <&cpu37_intc 7>, <&cpu38_intc 7>, <&cpu39_intc 7>; }; - clint_mtimer10: timer@70ac0a0000 { + clint_mtimer10: timer@70ac0a4000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac0a0000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac0a4000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu40_intc 7>, <&cpu41_intc 7>, <&cpu42_intc 7>, <&cpu43_intc 7>; }; - clint_mtimer11: timer@70ac0b0000 { + clint_mtimer11: timer@70ac0b4000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac0b0000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac0b4000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu44_intc 7>, <&cpu45_intc 7>, <&cpu46_intc 7>, <&cpu47_intc 7>; }; - clint_mtimer12: timer@70ac0c0000 { + clint_mtimer12: timer@70ac0c4000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac0c0000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac0c4000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu48_intc 7>, <&cpu49_intc 7>, <&cpu50_intc 7>, <&cpu51_intc 7>; }; - clint_mtimer13: timer@70ac0d0000 { + clint_mtimer13: timer@70ac0d4000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac0d0000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac0d4000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu52_intc 7>, <&cpu53_intc 7>, <&cpu54_intc 7>, <&cpu55_intc 7>; }; - clint_mtimer14: timer@70ac0e0000 { + clint_mtimer14: timer@70ac0e4000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac0e0000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac0e4000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu56_intc 7>, <&cpu57_intc 7>, <&cpu58_intc 7>, <&cpu59_intc 7>; }; - clint_mtimer15: timer@70ac0f0000 { + clint_mtimer15: timer@70ac0f4000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac0f0000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac0f4000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu60_intc 7>, <&cpu61_intc 7>, <&cpu62_intc 7>, From ff3d5d04db07e5374758baa7e877fde8d683ebab Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 13 Nov 2023 17:43:44 +0100 Subject: [PATCH 232/347] drm: bridge: samsung-dsim: Don't use FORCE_STOP_STATE The FORCE_STOP_STATE bit is unsuitable to force the DSI link into LP-11 mode. It seems the bridge internally queues DSI packets and when the FORCE_STOP_STATE bit is cleared, they are sent in close succession without any useful timing (this also means that the DSI lanes won't go into LP-11 mode). The length of this gibberish varies between 1ms and 5ms. This sometimes breaks an attached bridge (TI SN65DSI84 in this case). In our case, the bridge will fail in about 1 per 500 reboots. The FORCE_STOP_STATE handling was introduced to have the DSI lanes in LP-11 state during the .pre_enable phase. But as it turns out, none of this is needed at all. Between samsung_dsim_init() and samsung_dsim_set_display_enable() the lanes are already in LP-11 mode. The code as it was before commit 20c827683de0 ("drm: bridge: samsung-dsim: Fix init during host transfer") and 0c14d3130654 ("drm: bridge: samsung-dsim: Fix i.MX8M enable flow to meet spec") was correct in this regard. This patch basically reverts both commits. It was tested on an i.MX8M SoC with an SN65DSI84 bridge. The signals were probed and the DSI packets were decoded during initialization and link start-up. After this patch the first DSI packet on the link is a VSYNC packet and the timing is correct. Command mode between .pre_enable and .enable was also briefly tested by a quick hack. There was no DSI link partner which would have responded, but it was made sure the DSI packet was send on the link. As a side note, the command mode seems to just work in HS mode. I couldn't find that the bridge will handle commands in LP mode. Fixes: 20c827683de0 ("drm: bridge: samsung-dsim: Fix init during host transfer") Fixes: 0c14d3130654 ("drm: bridge: samsung-dsim: Fix i.MX8M enable flow to meet spec") Signed-off-by: Michael Walle Signed-off-by: Inki Dae Link: https://patchwork.freedesktop.org/patch/msgid/20231113164344.1612602-1-mwalle@kernel.org --- drivers/gpu/drm/bridge/samsung-dsim.c | 32 ++------------------------- 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index be5914caa17d..63a1a0c88be4 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -969,10 +969,6 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi) reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG); reg &= ~DSIM_STOP_STATE_CNT_MASK; reg |= DSIM_STOP_STATE_CNT(driver_data->reg_values[STOP_STATE_CNT]); - - if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) - reg |= DSIM_FORCE_STOP_STATE; - samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg); reg = DSIM_BTA_TIMEOUT(0xff) | DSIM_LPDR_TIMEOUT(0xffff); @@ -1431,18 +1427,6 @@ static void samsung_dsim_disable_irq(struct samsung_dsim *dsi) disable_irq(dsi->irq); } -static void samsung_dsim_set_stop_state(struct samsung_dsim *dsi, bool enable) -{ - u32 reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG); - - if (enable) - reg |= DSIM_FORCE_STOP_STATE; - else - reg &= ~DSIM_FORCE_STOP_STATE; - - samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg); -} - static int samsung_dsim_init(struct samsung_dsim *dsi) { const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; @@ -1492,9 +1476,6 @@ static void samsung_dsim_atomic_pre_enable(struct drm_bridge *bridge, ret = samsung_dsim_init(dsi); if (ret) return; - - samsung_dsim_set_display_mode(dsi); - samsung_dsim_set_display_enable(dsi, true); } } @@ -1503,12 +1484,8 @@ static void samsung_dsim_atomic_enable(struct drm_bridge *bridge, { struct samsung_dsim *dsi = bridge_to_dsi(bridge); - if (samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) { - samsung_dsim_set_display_mode(dsi); - samsung_dsim_set_display_enable(dsi, true); - } else { - samsung_dsim_set_stop_state(dsi, false); - } + samsung_dsim_set_display_mode(dsi); + samsung_dsim_set_display_enable(dsi, true); dsi->state |= DSIM_STATE_VIDOUT_AVAILABLE; } @@ -1521,9 +1498,6 @@ static void samsung_dsim_atomic_disable(struct drm_bridge *bridge, if (!(dsi->state & DSIM_STATE_ENABLED)) return; - if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) - samsung_dsim_set_stop_state(dsi, true); - dsi->state &= ~DSIM_STATE_VIDOUT_AVAILABLE; } @@ -1828,8 +1802,6 @@ static ssize_t samsung_dsim_host_transfer(struct mipi_dsi_host *host, if (ret) return ret; - samsung_dsim_set_stop_state(dsi, false); - ret = mipi_dsi_create_packet(&xfer.packet, msg); if (ret < 0) return ret; From 61f61c89fa5d9925e0b874854fb62e51948a6de1 Mon Sep 17 00:00:00 2001 From: Andreas Larsson Date: Mon, 15 Jan 2024 16:02:00 +0100 Subject: [PATCH 233/347] MAINTAINERS: Add Andreas Larsson as co-maintainer for arch/sparc Dave has not been very active on arch/sparc for the past two years. I have been contributing to the SPARC32 port as well as maintaining out-of-tree SPARC32 patches for LEON3/4/5 (SPARCv8 with CAS support) since 2012. I am willing to step up as an arch/sparc (co-)maintainer. For recent discussions on the matter, see [1] and [2]. [1] https://lore.kernel.org/r/20230713075235.2164609-1-u.kleine-koenig@pengutronix.de [2] https://lore.kernel.org/r/20231209105816.GA1085691@ravnborg.org/ Signed-off-by: Andreas Larsson Suggested-by: Arnd Bergmann Acked-by: Sam Ravnborg Acked-by: John Paul Adrian Glaubitz Acked-by: Jose E. Marchesi Signed-off-by: Arnd Bergmann --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..542ab762be7d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20549,6 +20549,7 @@ F: Documentation/translations/sp_SP/ SPARC + UltraSPARC (sparc/sparc64) M: "David S. Miller" +M: Andreas Larsson L: sparclinux@vger.kernel.org S: Maintained Q: http://patchwork.ozlabs.org/project/sparclinux/list/ From 00aab7dcb2267f2aef59447602f34501efe1a07f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 26 Jan 2024 18:09:01 +0100 Subject: [PATCH 234/347] HID: i2c-hid-of: fix NULL-deref on failed power up A while back the I2C HID implementation was split in an ACPI and OF part, but the new OF driver never initialises the client pointer which is dereferenced on power-up failures. Fixes: b33752c30023 ("HID: i2c-hid: Reorganize so ACPI and OF are separate modules") Cc: stable@vger.kernel.org # 5.12 Cc: Douglas Anderson Signed-off-by: Johan Hovold Reviewed-by: Douglas Anderson Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid-of.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-of.c b/drivers/hid/i2c-hid/i2c-hid-of.c index c4e1fa0273c8..8be4d576da77 100644 --- a/drivers/hid/i2c-hid/i2c-hid-of.c +++ b/drivers/hid/i2c-hid/i2c-hid-of.c @@ -87,6 +87,7 @@ static int i2c_hid_of_probe(struct i2c_client *client) if (!ihid_of) return -ENOMEM; + ihid_of->client = client; ihid_of->ops.power_up = i2c_hid_of_power_up; ihid_of->ops.power_down = i2c_hid_of_power_down; From 4d7acc8f48bcf27d0dc068f02e55c77e840b9110 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 27 Jan 2024 04:04:34 +1000 Subject: [PATCH 235/347] Revert "nouveau: push event block/allowing out of the fence context" This reverts commit eacabb5462717a52fccbbbba458365a4f5e61f35. This commit causes some regressions in desktop usage, this will reintroduce the original deadlock in DRI_PRIME situations, I've got an idea to fix it by offloading to a workqueue in a different spot, however this code has a race condition where we sometimes miss interrupts so I'd like to fix that as well. Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nouveau_fence.c | 28 +++++-------------------- drivers/gpu/drm/nouveau/nouveau_fence.h | 5 +---- 2 files changed, 6 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 5057d976fa57..ca762ea55413 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -62,7 +62,7 @@ nouveau_fence_signal(struct nouveau_fence *fence) if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) { struct nouveau_fence_chan *fctx = nouveau_fctx(fence); - if (atomic_dec_and_test(&fctx->notify_ref)) + if (!--fctx->notify_ref) drop = 1; } @@ -103,7 +103,6 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) void nouveau_fence_context_del(struct nouveau_fence_chan *fctx) { - cancel_work_sync(&fctx->allow_block_work); nouveau_fence_context_kill(fctx, 0); nvif_event_dtor(&fctx->event); fctx->dead = 1; @@ -168,18 +167,6 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc return ret; } -static void -nouveau_fence_work_allow_block(struct work_struct *work) -{ - struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan, - allow_block_work); - - if (atomic_read(&fctx->notify_ref) == 0) - nvif_event_block(&fctx->event); - else - nvif_event_allow(&fctx->event); -} - void nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx) { @@ -191,7 +178,6 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha } args; int ret; - INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block); INIT_LIST_HEAD(&fctx->flip); INIT_LIST_HEAD(&fctx->pending); spin_lock_init(&fctx->lock); @@ -535,19 +521,15 @@ static bool nouveau_fence_enable_signaling(struct dma_fence *f) struct nouveau_fence *fence = from_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); bool ret; - bool do_work; - if (atomic_inc_return(&fctx->notify_ref) == 0) - do_work = true; + if (!fctx->notify_ref++) + nvif_event_allow(&fctx->event); ret = nouveau_fence_no_signaling(f); if (ret) set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags); - else if (atomic_dec_and_test(&fctx->notify_ref)) - do_work = true; - - if (do_work) - schedule_work(&fctx->allow_block_work); + else if (!--fctx->notify_ref) + nvif_event_block(&fctx->event); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 28f5cf013b89..64d33ae7f356 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -3,7 +3,6 @@ #define __NOUVEAU_FENCE_H__ #include -#include #include struct nouveau_drm; @@ -46,9 +45,7 @@ struct nouveau_fence_chan { char name[32]; struct nvif_event event; - struct work_struct allow_block_work; - atomic_t notify_ref; - int dead, killed; + int notify_ref, dead, killed; }; struct nouveau_fence_priv { From 118063f3803324411be0e601d560ed7d1c9824b0 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 23 Jan 2024 19:44:57 +0530 Subject: [PATCH 236/347] platform/x86/amd/pmf: Get Human presence information from AMD SFH driver AMD SFH driver has APIs defined to export the human presence information; use this within the PMF driver to send inputs to the PMF TA, so that PMF driver can enact to the actions coming from the TA. Signed-off-by: Shyam Sundar S K Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20240123141458.3715211-1-Shyam-sundar.S-k@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/Kconfig | 1 + drivers/platform/x86/amd/pmf/spc.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/Kconfig b/drivers/platform/x86/amd/pmf/Kconfig index f246252bddd8..f4fa8bd8bda8 100644 --- a/drivers/platform/x86/amd/pmf/Kconfig +++ b/drivers/platform/x86/amd/pmf/Kconfig @@ -10,6 +10,7 @@ config AMD_PMF depends on AMD_NB select ACPI_PLATFORM_PROFILE depends on TEE && AMDTEE + depends on AMD_SFH_HID help This driver provides support for the AMD Platform Management Framework. The goal is to enhance end user experience by making AMD PCs smarter, diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c index a0423942f771..87ae7c41c9f8 100644 --- a/drivers/platform/x86/amd/pmf/spc.c +++ b/drivers/platform/x86/amd/pmf/spc.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include "pmf.h" @@ -44,6 +45,7 @@ void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table * dev_dbg(dev->dev, "Max C0 Residency: %u\n", in->ev_info.max_c0residency); dev_dbg(dev->dev, "GFX Busy: %u\n", in->ev_info.gfx_busy); dev_dbg(dev->dev, "LID State: %s\n", in->ev_info.lid_state ? "close" : "open"); + dev_dbg(dev->dev, "User Presence: %s\n", in->ev_info.user_present ? "Present" : "Away"); dev_dbg(dev->dev, "==== TA inputs END ====\n"); } #else @@ -147,6 +149,31 @@ static int amd_pmf_get_slider_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_ return 0; } +static int amd_pmf_get_sensor_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) +{ + struct amd_sfh_info sfh_info; + int ret; + + /* get HPD data */ + ret = amd_get_sfh_info(&sfh_info, MT_HPD); + if (ret) + return ret; + + switch (sfh_info.user_present) { + case SFH_NOT_DETECTED: + in->ev_info.user_present = 0xff; /* assume no sensors connected */ + break; + case SFH_USER_PRESENT: + in->ev_info.user_present = 1; + break; + case SFH_USER_AWAY: + in->ev_info.user_present = 0; + break; + } + + return 0; +} + void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) { /* TA side lid open is 1 and close is 0, hence the ! here */ @@ -155,4 +182,5 @@ void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_tab amd_pmf_get_smu_info(dev, in); amd_pmf_get_battery_info(dev, in); amd_pmf_get_slider_info(dev, in); + amd_pmf_get_sensor_info(dev, in); } From cedecdba60f4a42a8562574119f317ed0c674b5a Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 23 Jan 2024 19:44:58 +0530 Subject: [PATCH 237/347] platform/x86/amd/pmf: Get ambient light information from AMD SFH driver AMD SFH driver has APIs defined to export the ambient light information; use this within the PMF driver to send inputs to the PMF TA, so that PMF driver can enact to the actions coming from the TA. Signed-off-by: Shyam Sundar S K Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20240123141458.3715211-2-Shyam-sundar.S-k@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/spc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c index 87ae7c41c9f8..a3dec14c3004 100644 --- a/drivers/platform/x86/amd/pmf/spc.c +++ b/drivers/platform/x86/amd/pmf/spc.c @@ -46,6 +46,7 @@ void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table * dev_dbg(dev->dev, "GFX Busy: %u\n", in->ev_info.gfx_busy); dev_dbg(dev->dev, "LID State: %s\n", in->ev_info.lid_state ? "close" : "open"); dev_dbg(dev->dev, "User Presence: %s\n", in->ev_info.user_present ? "Present" : "Away"); + dev_dbg(dev->dev, "Ambient Light: %d\n", in->ev_info.ambient_light); dev_dbg(dev->dev, "==== TA inputs END ====\n"); } #else @@ -154,6 +155,13 @@ static int amd_pmf_get_sensor_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_ struct amd_sfh_info sfh_info; int ret; + /* Get ALS data */ + ret = amd_get_sfh_info(&sfh_info, MT_ALS); + if (!ret) + in->ev_info.ambient_light = sfh_info.ambient_light; + else + return ret; + /* get HPD data */ ret = amd_get_sfh_info(&sfh_info, MT_HPD); if (ret) From a692a86efe97fe0aba7cb15f38cbce866c080689 Mon Sep 17 00:00:00 2001 From: Cong Liu Date: Wed, 24 Jan 2024 09:29:38 +0800 Subject: [PATCH 238/347] platform/x86/amd/pmf: Fix memory leak in amd_pmf_get_pb_data() amd_pmf_get_pb_data() will allocate memory for the policy buffer, but does not free it if copy_from_user() fails. This leads to a memory leak. Fixes: 10817f28e533 ("platform/x86/amd/pmf: Add capability to sideload of policy binary") Reviewed-by: Shyam Sundar S K Signed-off-by: Cong Liu Link: https://lore.kernel.org/r/20240124012939.6550-1-liucong2@kylinos.cn Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/tee-if.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index 502ce93d5cdd..f8c0177afb0d 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -298,8 +298,10 @@ static ssize_t amd_pmf_get_pb_data(struct file *filp, const char __user *buf, if (!new_policy_buf) return -ENOMEM; - if (copy_from_user(new_policy_buf, buf, length)) + if (copy_from_user(new_policy_buf, buf, length)) { + kfree(new_policy_buf); return -EFAULT; + } kfree(dev->policy_buf); dev->policy_buf = new_policy_buf; From 8c898ec07a2fc1d4694e81097a48e94a3816308d Mon Sep 17 00:00:00 2001 From: Jithu Joseph Date: Thu, 25 Jan 2024 00:22:50 -0800 Subject: [PATCH 239/347] platform/x86/intel/ifs: Call release_firmware() when handling errors. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Missing release_firmware() due to error handling blocked any future image loading. Fix the return code and release_fiwmare() to release the bad image. Fixes: 25a76dbb36dd ("platform/x86/intel/ifs: Validate image size") Reported-by: Pengfei Xu Signed-off-by: Jithu Joseph Signed-off-by: Ashok Raj Tested-by: Pengfei Xu Reviewed-by: Tony Luck Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240125082254.424859-2-ashok.raj@intel.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/ifs/load.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/ifs/load.c b/drivers/platform/x86/intel/ifs/load.c index a1ee1a74fc3c..2cf3b4a8813f 100644 --- a/drivers/platform/x86/intel/ifs/load.c +++ b/drivers/platform/x86/intel/ifs/load.c @@ -399,7 +399,8 @@ int ifs_load_firmware(struct device *dev) if (fw->size != expected_size) { dev_err(dev, "File size mismatch (expected %u, actual %zu). Corrupted IFS image.\n", expected_size, fw->size); - return -EINVAL; + ret = -EINVAL; + goto release; } ret = image_sanity_check(dev, (struct microcode_header_intel *)fw->data); From 1abdf288b0ef5606f76b6e191fa6df05330e3d7e Mon Sep 17 00:00:00 2001 From: Phoenix Chen Date: Fri, 26 Jan 2024 17:53:08 +0800 Subject: [PATCH 240/347] platform/x86: touchscreen_dmi: Add info for the TECLAST X16 Plus tablet Add touch screen info for TECLAST X16 Plus tablet. Signed-off-by: Phoenix Chen Link: https://lore.kernel.org/r/20240126095308.5042-1-asbeltogf@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/touchscreen_dmi.c | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 0c6733772698..7aee5e9ff2b8 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -944,6 +944,32 @@ static const struct ts_dmi_data teclast_tbook11_data = { .properties = teclast_tbook11_props, }; +static const struct property_entry teclast_x16_plus_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 8), + PROPERTY_ENTRY_U32("touchscreen-min-y", 14), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1916), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1264), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-teclast-x16-plus.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + PROPERTY_ENTRY_BOOL("silead,home-button"), + { } +}; + +static const struct ts_dmi_data teclast_x16_plus_data = { + .embedded_fw = { + .name = "silead/gsl3692-teclast-x16-plus.fw", + .prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 }, + .length = 43560, + .sha256 = { 0x9d, 0xb0, 0x3d, 0xf1, 0x00, 0x3c, 0xb5, 0x25, + 0x62, 0x8a, 0xa0, 0x93, 0x4b, 0xe0, 0x4e, 0x75, + 0xd1, 0x27, 0xb1, 0x65, 0x3c, 0xba, 0xa5, 0x0f, + 0xcd, 0xb4, 0xbe, 0x00, 0xbb, 0xf6, 0x43, 0x29 }, + }, + .acpi_name = "MSSL1680:00", + .properties = teclast_x16_plus_props, +}; + static const struct property_entry teclast_x3_plus_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1980), PROPERTY_ENTRY_U32("touchscreen-size-y", 1500), @@ -1612,6 +1638,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_SKU, "E5A6_A1"), }, }, + { + /* Teclast X16 Plus */ + .driver_data = (void *)&teclast_x16_plus_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"), + DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), + DMI_MATCH(DMI_PRODUCT_SKU, "D3A5_A1"), + }, + }, { /* Teclast X3 Plus */ .driver_data = (void *)&teclast_x3_plus_data, From fcf67d82b8b878bdd95145382be43927bce07ec6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 26 Jan 2024 16:32:36 +0100 Subject: [PATCH 241/347] selftests: net: add missing config for big tcp tests The big_tcp test-case requires a few kernel knobs currently not specified in the net selftests config, causing the following failure: # selftests: net: big_tcp.sh # Error: Failed to load TC action module. # We have an error talking to the kernel ... # Testing for BIG TCP: # CLI GSO | GW GRO | GW GSO | SER GRO # ./big_tcp.sh: line 107: test: !=: unary operator expected ... # on on on on : [FAIL_on_link1] Add the missing configs Fixes: 6bb382bcf742 ("selftests: add a selftest for big tcp") Signed-off-by: Paolo Abeni Acked-by: Aaron Conole Acked-by: Xin Long Link: https://lore.kernel.org/all/21630ecea872fea13f071342ac64ef52a991a9b5.1706282943.git.pabeni@redhat.com/ Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 19ff75051660..413ab9abcf1b 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -29,7 +29,9 @@ CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_RAW=m CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_RAW=m CONFIG_IPV6_GRE=m CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_L2TP_ETH=m @@ -45,6 +47,8 @@ CONFIG_NF_TABLES=m CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_NAT=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_U32=m @@ -55,6 +59,7 @@ CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y +CONFIG_NF_FLOW_TABLE=m CONFIG_PSAMPLE=m CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m From 0958b33ef5a04ed91f61cef4760ac412080c4e08 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 26 Jan 2024 09:42:58 +0900 Subject: [PATCH 242/347] tracing/trigger: Fix to return error if failed to alloc snapshot Fix register_snapshot_trigger() to return error code if it failed to allocate a snapshot instead of 0 (success). Unless that, it will register snapshot trigger without an error. Link: https://lore.kernel.org/linux-trace-kernel/170622977792.270660.2789298642759362200.stgit@devnote2 Fixes: 0bbe7f719985 ("tracing: Fix the race between registering 'snapshot' event trigger and triggering 'snapshot' operation") Cc: stable@vger.kernel.org Cc: Vincent Donnefort Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_trigger.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 46439e3bcec4..b33c3861fbbb 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1470,8 +1470,10 @@ register_snapshot_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { - if (tracing_alloc_snapshot_instance(file->tr) != 0) - return 0; + int ret = tracing_alloc_snapshot_instance(file->tr); + + if (ret < 0) + return ret; return register_trigger(glob, data, file); } From c3dfcdb65ec1a4813ec1e0871c52c671ba9c71ac Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 25 Jan 2024 20:39:16 +0800 Subject: [PATCH 243/347] net/smc: fix incorrect SMC-D link group matching logic The logic to determine if SMC-D link group matches is incorrect. The correct logic should be that it only returns true when the GID is the same, and the SMC-D device is the same and the extended GID is the same (in the case of virtual ISM). It can be fixed by adding brackets around the conditional (or ternary) operator expression. But for better readability and maintainability, it has been changed to an if-else statement. Reported-by: Matthew Rosato Closes: https://lore.kernel.org/r/13579588-eb9d-4626-a063-c0b77ed80f11@linux.ibm.com Fixes: b40584d14570 ("net/smc: compatible with 128-bits extended GID of virtual ISM device") Link: https://lore.kernel.org/r/13579588-eb9d-4626-a063-c0b77ed80f11@linux.ibm.com Signed-off-by: Wen Gu Reviewed-by: Alexandra Winter Link: https://lore.kernel.org/r/20240125123916.77928-1-guwen@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 95cc95458e2d..e4c858411207 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1877,9 +1877,15 @@ static bool smcd_lgr_match(struct smc_link_group *lgr, struct smcd_dev *smcismdev, struct smcd_gid *peer_gid) { - return lgr->peer_gid.gid == peer_gid->gid && lgr->smcd == smcismdev && - smc_ism_is_virtual(smcismdev) ? - (lgr->peer_gid.gid_ext == peer_gid->gid_ext) : 1; + if (lgr->peer_gid.gid != peer_gid->gid || + lgr->smcd != smcismdev) + return false; + + if (smc_ism_is_virtual(smcismdev) && + lgr->peer_gid.gid_ext != peer_gid->gid_ext) + return false; + + return true; } /* create a new SMC connection (and a new link group if necessary) */ From 281cb9d65a95c00bb844f332cd187491d2d55496 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 25 Jan 2024 05:41:03 -0800 Subject: [PATCH 244/347] bnxt_en: Make PTP timestamp HWRM more silent commit 056bce63c469 ("bnxt_en: Make PTP TX timestamp HWRM query silent") changed a netdev_err() to netdev_WARN_ONCE(). netdev_WARN_ONCE() is it generates a kernel WARNING, which is bad, for the following reasons: * You do not a kernel warning if the firmware queries are late * In busy networks, timestamp query failures fairly regularly * A WARNING message doesn't bring much value, since the code path is clear. (This was discussed in-depth in [1]) Transform the netdev_WARN_ONCE() into a netdev_warn_once(), and print a more well-behaved message, instead of a full WARN(). bnxt_en 0000:67:00.0 eth0: TS query for TX timer failed rc = fffffff5 [1] Link: https://lore.kernel.org/all/ZbDj%2FFI4EJezcfd1@gmail.com/ Signed-off-by: Breno Leitao Reviewed-by: Pavan Chebbi Reviewed-by: Michael Chan Fixes: 056bce63c469 ("bnxt_en: Make PTP TX timestamp HWRM query silent") Link: https://lore.kernel.org/r/20240125134104.2045573-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index adad188e38b8..cc07660330f5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -684,7 +684,7 @@ static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb) timestamp.hwtstamp = ns_to_ktime(ns); skb_tstamp_tx(ptp->tx_skb, ×tamp); } else { - netdev_WARN_ONCE(bp->dev, + netdev_warn_once(bp->dev, "TS query for TX timer failed rc = %x\n", rc); } From d3cb3b0088ca92082e2bebc40cc6894a632173e2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 25 Jan 2024 09:22:50 +0100 Subject: [PATCH 245/347] selftests: net: add missing required classifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the udpgro_fraglist self-test uses the BPF classifiers, but the current net self-test configuration does not include it, causing CI failures: # selftests: net: udpgro_frglist.sh # ipv6 # tcp - over veth touching data # -l 4 -6 -D 2001:db8::1 -t rx -4 -t # Error: TC classifier not found. # We have an error talking to the kernel # Error: TC classifier not found. # We have an error talking to the kernel Add the missing knob. Fixes: edae34a3ed92 ("selftests net: add UDP GRO fraglist + bpf self-tests") Signed-off-by: Paolo Abeni Reviewed-by: Maciej Żenczykowski Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/7c3643763b331e9a400e1874fe089193c99a1c3f.1706170897.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 413ab9abcf1b..56da5d52674c 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -51,6 +51,7 @@ CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_U32=m CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m From 89abe628375301fedb68770644df845d49018d8b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 25 Jan 2024 19:09:06 +0100 Subject: [PATCH 246/347] selftests: net: give more time for GRO aggregation The gro.sh test-case relay on the gro_flush_timeout to ensure that all the segments belonging to any given batch are properly aggregated. The other end, the sender is a user-space program transmitting each packet with a separate write syscall. A busy host and/or stracing the sender program can make the relevant segments reach the GRO engine after the flush timeout triggers. Give the GRO flush timeout more slack, to avoid sporadic self-tests failures. Fixes: 9af771d2ec04 ("selftests/net: allow GRO coalesce test on veth") Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Tested-by: Eric Dumazet Link: https://lore.kernel.org/r/bffec2beab3a5672dd13ecabe4fad81d2155b367.1706206101.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/setup_veth.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh index a9a1759e035c..1f78a87f6f37 100644 --- a/tools/testing/selftests/net/setup_veth.sh +++ b/tools/testing/selftests/net/setup_veth.sh @@ -11,7 +11,7 @@ setup_veth_ns() { local -r ns_mac="$4" [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" + echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 ip -netns "${ns_name}" link set dev "${ns_dev}" up From 8d975c15c0cd744000ca386247432d57b21f9df0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 Jan 2024 17:05:57 +0000 Subject: [PATCH 247/347] ip6_tunnel: make sure to pull inner header in __ip6_tnl_rcv() syzbot found __ip6_tnl_rcv() could access unitiliazed data [1]. Call pskb_inet_may_pull() to fix this, and initialize ipv6h variable after this call as it can change skb->head. [1] BUG: KMSAN: uninit-value in __INET_ECN_decapsulate include/net/inet_ecn.h:253 [inline] BUG: KMSAN: uninit-value in INET_ECN_decapsulate include/net/inet_ecn.h:275 [inline] BUG: KMSAN: uninit-value in IP6_ECN_decapsulate+0x7df/0x1e50 include/net/inet_ecn.h:321 __INET_ECN_decapsulate include/net/inet_ecn.h:253 [inline] INET_ECN_decapsulate include/net/inet_ecn.h:275 [inline] IP6_ECN_decapsulate+0x7df/0x1e50 include/net/inet_ecn.h:321 ip6ip6_dscp_ecn_decapsulate+0x178/0x1b0 net/ipv6/ip6_tunnel.c:727 __ip6_tnl_rcv+0xd4e/0x1590 net/ipv6/ip6_tunnel.c:845 ip6_tnl_rcv+0xce/0x100 net/ipv6/ip6_tunnel.c:888 gre_rcv+0x143f/0x1870 ip6_protocol_deliver_rcu+0xda6/0x2a60 net/ipv6/ip6_input.c:438 ip6_input_finish net/ipv6/ip6_input.c:483 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip6_input+0x15d/0x430 net/ipv6/ip6_input.c:492 ip6_mc_input+0xa7e/0xc80 net/ipv6/ip6_input.c:586 dst_input include/net/dst.h:461 [inline] ip6_rcv_finish+0x5db/0x870 net/ipv6/ip6_input.c:79 NF_HOOK include/linux/netfilter.h:314 [inline] ipv6_rcv+0xda/0x390 net/ipv6/ip6_input.c:310 __netif_receive_skb_one_core net/core/dev.c:5532 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5646 netif_receive_skb_internal net/core/dev.c:5732 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5791 tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1555 tun_get_user+0x53af/0x66d0 drivers/net/tun.c:2002 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:2084 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0x786/0x1200 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x6d/0x140 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook+0x129/0xa70 mm/slab.h:768 slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x5e9/0xb10 mm/slub.c:3523 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x318/0x740 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1286 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6334 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2787 tun_alloc_skb drivers/net/tun.c:1531 [inline] tun_get_user+0x1e8a/0x66d0 drivers/net/tun.c:1846 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:2084 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0x786/0x1200 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x6d/0x140 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b CPU: 0 PID: 5034 Comm: syz-executor331 Not tainted 6.7.0-syzkaller-00562-g9f8413c4a66f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 Fixes: 0d3c703a9d17 ("ipv6: Cleanup IPv6 tunnel receive path") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240125170557.2663942-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_tunnel.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 46c19bd48990..9bbabf750a21 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -796,8 +796,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, struct sk_buff *skb), bool log_ecn_err) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - int err; + const struct ipv6hdr *ipv6h; + int nh, err; if ((!(tpi->flags & TUNNEL_CSUM) && (tunnel->parms.i_flags & TUNNEL_CSUM)) || @@ -829,7 +829,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, goto drop; } - ipv6h = ipv6_hdr(skb); skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { @@ -837,7 +836,23 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_reset_mac_header(skb); } + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(tunnel->dev, rx_length_errors); + DEV_STATS_INC(tunnel->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + ipv6h = (struct ipv6hdr *)(skb->head + nh); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); From dfa988b4c7c3a48bde7c2713308920c7741fff29 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 24 Jan 2024 05:17:25 +0000 Subject: [PATCH 248/347] net: dsa: mt7530: fix 10M/100M speed on MT7988 switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setup PMCR port register for actual speed and duplex on internally connected PHYs of the MT7988 built-in switch. This fixes links with speeds other than 1000M. Fixes: 110c18bfed41 ("net: dsa: mt7530: introduce driver for MT7988 built-in switch") Signed-off-by: Daniel Golle Reviewed-by: Vladimir Oltean Acked-by: Arınç ÜNAL Link: https://lore.kernel.org/r/a5b04dfa8256d8302f402545a51ac4c626fdba25.1706071272.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 391c4dbdff42..3c1f657593a8 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2838,8 +2838,7 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port, /* MT753x MAC works in 1G full duplex mode for all up-clocked * variants. */ - if (interface == PHY_INTERFACE_MODE_INTERNAL || - interface == PHY_INTERFACE_MODE_TRGMII || + if (interface == PHY_INTERFACE_MODE_TRGMII || (phy_interface_mode_is_8023z(interface))) { speed = SPEED_1000; duplex = DUPLEX_FULL; From 99b817c173cd213671daecd25ca27f56b0c7c4ec Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 26 Jan 2024 11:44:03 +0100 Subject: [PATCH 249/347] lsm: fix the logic in security_inode_getsecctx() The inode_getsecctx LSM hook has previously been corrected to have -EOPNOTSUPP instead of 0 as the default return value to fix BPF LSM behavior. However, the call_int_hook()-generated loop in security_inode_getsecctx() was left treating 0 as the neutral value, so after an LSM returns 0, the loop continues to try other LSMs, and if one of them returns a non-zero value, the function immediately returns with said value. So in a situation where SELinux and the BPF LSMs registered this hook, -EOPNOTSUPP would be incorrectly returned whenever SELinux returned 0. Fix this by open-coding the call_int_hook() loop and making it use the correct LSM_RET_DEFAULT() value as the neutral one, similar to what other hooks do. Cc: stable@vger.kernel.org Reported-by: Stephen Smalley Link: https://lore.kernel.org/selinux/CAEjxPJ4ev-pasUwGx48fDhnmjBnq_Wh90jYPwRQRAqXxmOKD4Q@mail.gmail.com/ Link: https://bugzilla.redhat.com/show_bug.cgi?id=2257983 Fixes: b36995b8609a ("lsm: fix default return value for inode_getsecctx") Signed-off-by: Ondrej Mosnacek Reviewed-by: Casey Schaufler [PM: subject line tweak] Signed-off-by: Paul Moore --- security/security.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/security/security.c b/security/security.c index 0144a98d3712..6196ccaba433 100644 --- a/security/security.c +++ b/security/security.c @@ -4255,7 +4255,19 @@ EXPORT_SYMBOL(security_inode_setsecctx); */ int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) { - return call_int_hook(inode_getsecctx, -EOPNOTSUPP, inode, ctx, ctxlen); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.inode_getsecctx, list) { + rc = hp->hook.inode_getsecctx(inode, ctx, ctxlen); + if (rc != LSM_RET_DEFAULT(inode_getsecctx)) + return rc; + } + + return LSM_RET_DEFAULT(inode_getsecctx); } EXPORT_SYMBOL(security_inode_getsecctx); From d9281660ff3ffb4a05302b485cc59a87e709aefc Mon Sep 17 00:00:00 2001 From: Chunhai Guo Date: Fri, 26 Jan 2024 22:01:42 +0800 Subject: [PATCH 250/347] erofs: relaxed temporary buffers allocation on readahead Even with inplace decompression, sometimes very few temporary buffers may be still needed for a single decompression shot (e.g. 16 pages for 64k sliding window or 4 pages for 16k sliding window). In low-memory scenarios, it would be better to try to allocate with GFP_NOWAIT on readahead first. That can help reduce the time spent on page allocation under durative memory pressure. Here are detailed performance numbers under multi-app launch benchmark workload [1] on ARM64 Android devices (8-core CPU and 8GB of memory) running a 5.15 LTS kernel with EROFS of 4k pclusters: +----------------------------------------------+ | LZ4 | vanilla | patched | diff | |----------------+---------+---------+---------| | Average (ms) | 3364 | 2684 | -20.21% | [64k sliding window] |----------------+---------+---------+---------| | Average (ms) | 2079 | 1610 | -22.56% | [16k sliding window] +----------------------------------------------+ The total size of system images for 4k pclusters is almost unchanged: (64k sliding window) 9,117,044 KB (16k sliding window) 9,113,096 KB Therefore, in addition to switch the sliding window from 64k to 16k, after applying this patch, it can eventually save 52.14% (3364 -> 1610) on average with no memory reservation. That is particularly useful for embedded devices with limited resources. [1] https://lore.kernel.org/r/20240109074143.4138783-1-guochunhai@vivo.com Suggested-by: Gao Xiang Signed-off-by: Chunhai Guo Signed-off-by: Gao Xiang Reviewed-by: Yue Hu Link: https://lore.kernel.org/r/20240126140142.201718-1-hsiangkao@linux.alibaba.com --- fs/erofs/compress.h | 5 ++--- fs/erofs/decompressor.c | 5 +++-- fs/erofs/decompressor_deflate.c | 19 +++++++++++++------ fs/erofs/decompressor_lzma.c | 17 ++++++++++++----- fs/erofs/zdata.c | 16 ++++++++++++---- 5 files changed, 42 insertions(+), 20 deletions(-) diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 279933e007d2..7cc5841577b2 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -11,13 +11,12 @@ struct z_erofs_decompress_req { struct super_block *sb; struct page **in, **out; - unsigned short pageofs_in, pageofs_out; unsigned int inputsize, outputsize; - /* indicate the algorithm will be used for decompression */ - unsigned int alg; + unsigned int alg; /* the algorithm for decompression */ bool inplace_io, partial_decoding, fillgaps; + gfp_t gfp; /* allocation flags for extra temporary buffers */ }; struct z_erofs_decompressor { diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 072ef6a66823..d4cee95af14c 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -111,8 +111,9 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, victim = availables[--top]; get_page(victim); } else { - victim = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + victim = erofs_allocpage(pagepool, rq->gfp); + if (!victim) + return -ENOMEM; set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE); } rq->out[i] = victim; diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c index 4a64a9c91dd3..b98872058abe 100644 --- a/fs/erofs/decompressor_deflate.c +++ b/fs/erofs/decompressor_deflate.c @@ -95,7 +95,7 @@ int z_erofs_load_deflate_config(struct super_block *sb, } int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, - struct page **pagepool) + struct page **pgpl) { const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; @@ -158,8 +158,12 @@ again: strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs); outsz -= strm->z.avail_out; if (!rq->out[no]) { - rq->out[no] = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + rq->out[no] = erofs_allocpage(pgpl, rq->gfp); + if (!rq->out[no]) { + kout = NULL; + err = -ENOMEM; + break; + } set_page_private(rq->out[no], Z_EROFS_SHORTLIVED_PAGE); } @@ -211,8 +215,11 @@ again: DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb), rq->in[j])); - tmppage = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + tmppage = erofs_allocpage(pgpl, rq->gfp); + if (!tmppage) { + err = -ENOMEM; + goto failed; + } set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); copy_highpage(tmppage, rq->in[j]); rq->in[j] = tmppage; @@ -230,7 +237,7 @@ again: break; } } - +failed: if (zlib_inflateEnd(&strm->z) != Z_OK && !err) err = -EIO; if (kout) diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 2dd14f99c1dc..6ca357d83cfa 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -148,7 +148,7 @@ again: } int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, - struct page **pagepool) + struct page **pgpl) { const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; @@ -215,8 +215,11 @@ again: PAGE_SIZE - pageofs); outlen -= strm->buf.out_size; if (!rq->out[no] && rq->fillgaps) { /* deduped */ - rq->out[no] = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + rq->out[no] = erofs_allocpage(pgpl, rq->gfp); + if (!rq->out[no]) { + err = -ENOMEM; + break; + } set_page_private(rq->out[no], Z_EROFS_SHORTLIVED_PAGE); } @@ -258,8 +261,11 @@ again: DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb), rq->in[j])); - tmppage = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + tmppage = erofs_allocpage(pgpl, rq->gfp); + if (!tmppage) { + err = -ENOMEM; + goto failed; + } set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); copy_highpage(tmppage, rq->in[j]); rq->in[j] = tmppage; @@ -277,6 +283,7 @@ again: break; } } +failed: if (no < nrpages_out && strm->buf.out) kunmap(rq->out[no]); if (ni < nrpages_in) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index c1c77166b30f..ff0aa72b0db3 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -82,6 +82,9 @@ struct z_erofs_pcluster { /* L: indicate several pageofs_outs or not */ bool multibases; + /* L: whether extra buffer allocations are best-effort */ + bool besteffort; + /* A: compressed bvecs (can be cached or inplaced pages) */ struct z_erofs_bvec compressed_bvecs[]; }; @@ -960,7 +963,7 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page, } static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, - struct page *page) + struct page *page, bool ra) { struct inode *const inode = fe->inode; struct erofs_map_blocks *const map = &fe->map; @@ -1010,6 +1013,7 @@ repeat: err = z_erofs_pcluster_begin(fe); if (err) goto out; + fe->pcl->besteffort |= !ra; } /* @@ -1276,6 +1280,9 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, .inplace_io = overlapped, .partial_decoding = pcl->partial, .fillgaps = pcl->multibases, + .gfp = pcl->besteffort ? + GFP_KERNEL | __GFP_NOFAIL : + GFP_NOWAIT | __GFP_NORETRY }, be->pagepool); /* must handle all compressed pages before actual file pages */ @@ -1318,6 +1325,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, pcl->length = 0; pcl->partial = true; pcl->multibases = false; + pcl->besteffort = false; pcl->bvset.nextpage = NULL; pcl->vcnt = 0; @@ -1787,7 +1795,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, if (PageUptodate(page)) unlock_page(page); else - (void)z_erofs_do_read_page(f, page); + (void)z_erofs_do_read_page(f, page, !!rac); put_page(page); } @@ -1808,7 +1816,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio) f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT; z_erofs_pcluster_readmore(&f, NULL, true); - err = z_erofs_do_read_page(&f, &folio->page); + err = z_erofs_do_read_page(&f, &folio->page, false); z_erofs_pcluster_readmore(&f, NULL, false); z_erofs_pcluster_end(&f); @@ -1849,7 +1857,7 @@ static void z_erofs_readahead(struct readahead_control *rac) folio = head; head = folio_get_private(folio); - err = z_erofs_do_read_page(&f, &folio->page); + err = z_erofs_do_read_page(&f, &folio->page, true); if (err && err != -EINTR) erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu", folio->index, EROFS_I(inode)->nid); From e622502c310f1069fd9f41cd38210553115f610a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Jan 2024 15:18:47 +0100 Subject: [PATCH 251/347] ipmr: fix kernel panic when forwarding mcast packets The stacktrace was: [ 86.305548] BUG: kernel NULL pointer dereference, address: 0000000000000092 [ 86.306815] #PF: supervisor read access in kernel mode [ 86.307717] #PF: error_code(0x0000) - not-present page [ 86.308624] PGD 0 P4D 0 [ 86.309091] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 86.309883] CPU: 2 PID: 3139 Comm: pimd Tainted: G U 6.8.0-6wind-knet #1 [ 86.311027] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.1-0-g0551a4be2c-prebuilt.qemu-project.org 04/01/2014 [ 86.312728] RIP: 0010:ip_mr_forward (/build/work/knet/net/ipv4/ipmr.c:1985) [ 86.313399] Code: f9 1f 0f 87 85 03 00 00 48 8d 04 5b 48 8d 04 83 49 8d 44 c5 00 48 8b 40 70 48 39 c2 0f 84 d9 00 00 00 49 8b 46 58 48 83 e0 fe <80> b8 92 00 00 00 00 0f 84 55 ff ff ff 49 83 47 38 01 45 85 e4 0f [ 86.316565] RSP: 0018:ffffad21c0583ae0 EFLAGS: 00010246 [ 86.317497] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 86.318596] RDX: ffff9559cb46c000 RSI: 0000000000000000 RDI: 0000000000000000 [ 86.319627] RBP: ffffad21c0583b30 R08: 0000000000000000 R09: 0000000000000000 [ 86.320650] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001 [ 86.321672] R13: ffff9559c093a000 R14: ffff9559cc00b800 R15: ffff9559c09c1d80 [ 86.322873] FS: 00007f85db661980(0000) GS:ffff955a79d00000(0000) knlGS:0000000000000000 [ 86.324291] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 86.325314] CR2: 0000000000000092 CR3: 000000002f13a000 CR4: 0000000000350ef0 [ 86.326589] Call Trace: [ 86.327036] [ 86.327434] ? show_regs (/build/work/knet/arch/x86/kernel/dumpstack.c:479) [ 86.328049] ? __die (/build/work/knet/arch/x86/kernel/dumpstack.c:421 /build/work/knet/arch/x86/kernel/dumpstack.c:434) [ 86.328508] ? page_fault_oops (/build/work/knet/arch/x86/mm/fault.c:707) [ 86.329107] ? do_user_addr_fault (/build/work/knet/arch/x86/mm/fault.c:1264) [ 86.329756] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.330350] ? __irq_work_queue_local (/build/work/knet/kernel/irq_work.c:111 (discriminator 1)) [ 86.331013] ? exc_page_fault (/build/work/knet/./arch/x86/include/asm/paravirt.h:693 /build/work/knet/arch/x86/mm/fault.c:1515 /build/work/knet/arch/x86/mm/fault.c:1563) [ 86.331702] ? asm_exc_page_fault (/build/work/knet/./arch/x86/include/asm/idtentry.h:570) [ 86.332468] ? ip_mr_forward (/build/work/knet/net/ipv4/ipmr.c:1985) [ 86.333183] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.333920] ipmr_mfc_add (/build/work/knet/./include/linux/rcupdate.h:782 /build/work/knet/net/ipv4/ipmr.c:1009 /build/work/knet/net/ipv4/ipmr.c:1273) [ 86.334583] ? __pfx_ipmr_hash_cmp (/build/work/knet/net/ipv4/ipmr.c:363) [ 86.335357] ip_mroute_setsockopt (/build/work/knet/net/ipv4/ipmr.c:1470) [ 86.336135] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.336854] ? ip_mroute_setsockopt (/build/work/knet/net/ipv4/ipmr.c:1470) [ 86.337679] do_ip_setsockopt (/build/work/knet/net/ipv4/ip_sockglue.c:944) [ 86.338408] ? __pfx_unix_stream_read_actor (/build/work/knet/net/unix/af_unix.c:2862) [ 86.339232] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.339809] ? aa_sk_perm (/build/work/knet/security/apparmor/include/cred.h:153 /build/work/knet/security/apparmor/net.c:181) [ 86.340342] ip_setsockopt (/build/work/knet/net/ipv4/ip_sockglue.c:1415) [ 86.340859] raw_setsockopt (/build/work/knet/net/ipv4/raw.c:836) [ 86.341408] ? security_socket_setsockopt (/build/work/knet/security/security.c:4561 (discriminator 13)) [ 86.342116] sock_common_setsockopt (/build/work/knet/net/core/sock.c:3716) [ 86.342747] do_sock_setsockopt (/build/work/knet/net/socket.c:2313) [ 86.343363] __sys_setsockopt (/build/work/knet/./include/linux/file.h:32 /build/work/knet/net/socket.c:2336) [ 86.344020] __x64_sys_setsockopt (/build/work/knet/net/socket.c:2340) [ 86.344766] do_syscall_64 (/build/work/knet/arch/x86/entry/common.c:52 /build/work/knet/arch/x86/entry/common.c:83) [ 86.345433] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.346161] ? syscall_exit_work (/build/work/knet/./include/linux/audit.h:357 /build/work/knet/kernel/entry/common.c:160) [ 86.346938] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.347657] ? syscall_exit_to_user_mode (/build/work/knet/kernel/entry/common.c:215) [ 86.348538] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.349262] ? do_syscall_64 (/build/work/knet/./arch/x86/include/asm/cpufeature.h:171 /build/work/knet/arch/x86/entry/common.c:98) [ 86.349971] entry_SYSCALL_64_after_hwframe (/build/work/knet/arch/x86/entry/entry_64.S:129) The original packet in ipmr_cache_report() may be queued and then forwarded with ip_mr_forward(). This last function has the assumption that the skb dst is set. After the below commit, the skb dst is dropped by ipv4_pktinfo_prepare(), which causes the oops. Fixes: bb7403655b3c ("ipmr: support IP_PKTINFO on cache report IGMP msg") Signed-off-by: Nicolas Dichtel Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240125141847.1931933-1-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 2 +- net/ipv4/ip_sockglue.c | 6 ++++-- net/ipv4/ipmr.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index de0c69c57e3c..25cb688bdc62 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -767,7 +767,7 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); * Functions provided by ip_sockglue.c */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst); void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 7aa9dc0e6760..21d2ffa919e9 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1363,12 +1363,13 @@ e_inval: * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket * @skb: buffer + * @drop_dst: if true, drops skb dst * * To support IP_CMSG_PKTINFO option, we store rt_iif and specific * destination in skb->cb[] before dst drop. * This way, receiver doesn't make cache line misses to read rtable. */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); bool prepare = inet_test_bit(PKTINFO, sk) || @@ -1397,7 +1398,8 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + if (drop_dst) + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d6f59531b3a..362229836510 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1073,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt, msg = (struct igmpmsg *)skb_network_header(skb); msg->im_vif = vifi; msg->im_vif_hi = vifi >> 8; - ipv4_pktinfo_prepare(mroute_sk, pkt); + ipv4_pktinfo_prepare(mroute_sk, pkt, false); memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); /* Add our header */ igmp = skb_put(skb, sizeof(struct igmphdr)); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 27da9d7294c0..aea89326c697 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -292,7 +292,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Charge it to the socket. */ - ipv4_pktinfo_prepare(sk, skb); + ipv4_pktinfo_prepare(sk, skb, true); if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { kfree_skb_reason(skb, reason); return NET_RX_DROP; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 148ffb007969..f631b0a21af4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2169,7 +2169,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_csum_pull_header(skb); - ipv4_pktinfo_prepare(sk, skb); + ipv4_pktinfo_prepare(sk, skb, true); return __udp_queue_rcv_skb(sk, skb); csum_error: From 59f7ea703c38abc3f239068d49cc8897740e4c54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 9 Jan 2024 22:58:59 +0100 Subject: [PATCH 252/347] batman-adv: mcast: fix mcast packet type counter on timeouted nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a node which does not have the new batman-adv multicast packet type capability vanishes then the according, global counter erroneously would not be reduced in response on other nodes. Which in turn leads to the mesh never switching back to sending with the new multicast packet type. Fix this by reducing the according counter when such a node times out. Fixes: 90039133221e ("batman-adv: mcast: implement multicast packet generation") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index d982daea8329..b4f8b4af1722 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -2198,6 +2198,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) BATADV_MCAST_WANT_NO_RTR4); batadv_mcast_want_rtr6_update(bat_priv, orig, BATADV_MCAST_WANT_NO_RTR6); + batadv_mcast_have_mc_ptype_update(bat_priv, orig, + BATADV_MCAST_HAVE_MC_PTYPE_CAPA); spin_unlock_bh(&orig->mcast_handler_lock); } From 0a186b49bba596b81de5a686ce5bfc9cd48ab3ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Wed, 17 Jan 2024 06:22:20 +0100 Subject: [PATCH 253/347] batman-adv: mcast: fix memory leak on deleting a batman-adv interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The batman-adv multicast tracker TVLV handler is registered for the new batman-adv multicast packet type upon creating a batman-adv interface, but not unregistered again upon the interface's deletion, leading to a memory leak. Fix this memory leak by calling the according TVLV handler unregister routine for the multicast tracker TVLV upon batman-adv interface deletion. Fixes: 07afe1ba288c ("batman-adv: mcast: implement multicast packet reception and forwarding") Reported-and-tested-by: syzbot+ebe64cc5950868e77358@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000beadc4060f0cbc23@google.com/ Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index b4f8b4af1722..14088c4ff2f6 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -2175,6 +2175,7 @@ void batadv_mcast_free(struct batadv_priv *bat_priv) cancel_delayed_work_sync(&bat_priv->mcast.work); batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2); + batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST_TRACKER, 1); batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2); /* safely calling outside of worker, as worker was canceled above */ From 586e40aa883cab255ab8ddfe332c4092a5349cb7 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 24 Jan 2024 09:16:22 +0000 Subject: [PATCH 254/347] MAINTAINERS: Add connector headers to NETWORKING DRIVERS Commit 46cf789b68b2 ("connector: Move maintainence under networking drivers umbrella.") moved the connector maintenance but did not include the connector header files. It seems that it has always been implied that these headers were maintained along with the rest of the connector code, both before and after the cited commit. Make this explicit. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 92152ac346c8..9435ea0f8cda 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15179,6 +15179,7 @@ F: Documentation/networking/net_cachelines/net_device.rst F: drivers/connector/ F: drivers/net/ F: include/dt-bindings/net/ +F: include/linux/cn_proc.h F: include/linux/etherdevice.h F: include/linux/fcdevice.h F: include/linux/fddidevice.h @@ -15186,6 +15187,7 @@ F: include/linux/hippidevice.h F: include/linux/if_* F: include/linux/inetdevice.h F: include/linux/netdevice.h +F: include/uapi/linux/cn_proc.h F: include/uapi/linux/if_* F: include/uapi/linux/netdevice.h X: drivers/net/wireless/ From 62b4248105353e7d1debd30ca5c57ec5e5f28e35 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 24 Jan 2024 11:17:58 +0100 Subject: [PATCH 255/347] net: lan966x: Fix port configuration when using SGMII interface In case the interface between the MAC and the PHY is SGMII, then the bit GIGA_MODE on the MAC side needs to be set regardless of the speed at which it is running. Fixes: d28d6d2e37d1 ("net: lan966x: add port module support") Signed-off-by: Horatiu Vultur Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan966x/lan966x_port.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 92108d354051..2e83bbb9477e 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -168,9 +168,10 @@ static void lan966x_port_link_up(struct lan966x_port *port) lan966x_taprio_speed_set(port, config->speed); /* Also the GIGA_MODE_ENA(1) needs to be set regardless of the - * port speed for QSGMII ports. + * port speed for QSGMII or SGMII ports. */ - if (phy_interface_num_ports(config->portmode) == 4) + if (phy_interface_num_ports(config->portmode) == 4 || + config->portmode == PHY_INTERFACE_MODE_SGMII) mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA_SET(1); lan_wr(config->duplex | mode, From c91c6b2f08afb7be111678ceade563158c9a31ba Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Sat, 27 Jan 2024 11:07:49 +0100 Subject: [PATCH 256/347] Revert "MIPS: loongson64: set nid for reserved memblock region" This reverts commit ce7b1b97776ec0b068c4dd6b6dbb48ae09a23519. Signed-off-by: Thomas Bogendoerfer --- arch/mips/loongson64/init.c | 2 -- arch/mips/loongson64/numa.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index 000ba91c0886..f25caa6aa9d3 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -103,8 +103,6 @@ void __init szmem(unsigned int node) if (loongson_sysconf.vgabios_addr) memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr), SZ_256K); - /* set nid for reserved memory */ - memblock_set_node((u64)node << 44, (u64)(node+1) << 44, &memblock.reserved, node); } #ifndef CONFIG_NUMA diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index 6345e096c532..8f61e93c0c5b 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -132,8 +132,6 @@ static void __init node_mem_init(unsigned int node) /* Reserve pfn range 0~node[0]->node_start_pfn */ memblock_reserve(0, PAGE_SIZE * start_pfn); - /* set nid for reserved memory on node 0 */ - memblock_set_node(0, (u64)1 << 44, &memblock.reserved, 1); } } From 822df315cc7c85c3c10afcc6408b254a6fa0f166 Mon Sep 17 00:00:00 2001 From: Huang Pei Date: Sat, 27 Jan 2024 17:12:21 +0800 Subject: [PATCH 257/347] MIPS: loongson64: set nid for reserved memblock region Commit 61167ad5fecd("mm: pass nid to reserve_bootmem_region()") reveals that reserved memblock regions have no valid node id set, just set it right since loongson64 firmware makes it clear in memory layout info. This works around booting failure on 3A1000+ since commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") under CONFIG_DEFERRED_STRUCT_PAGE_INIT. Signed-off-by: Huang Pei Signed-off-by: Thomas Bogendoerfer --- arch/mips/loongson64/init.c | 3 +++ arch/mips/loongson64/numa.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index f25caa6aa9d3..553142c1f14f 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -103,6 +103,9 @@ void __init szmem(unsigned int node) if (loongson_sysconf.vgabios_addr) memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr), SZ_256K); + /* set nid for reserved memory */ + memblock_set_node((u64)node << 44, (u64)(node + 1) << 44, + &memblock.reserved, node); } #ifndef CONFIG_NUMA diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index 8f61e93c0c5b..68dafd6d3e25 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -132,6 +132,8 @@ static void __init node_mem_init(unsigned int node) /* Reserve pfn range 0~node[0]->node_start_pfn */ memblock_reserve(0, PAGE_SIZE * start_pfn); + /* set nid for reserved memory on node 0 */ + memblock_set_node(0, 1ULL << 44, &memblock.reserved, 0); } } From 59be5c35850171e307ca5d3d703ee9ff4096b948 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 27 Jan 2024 05:05:57 +0800 Subject: [PATCH 258/347] mips: Call lose_fpu(0) before initializing fcr31 in mips_set_personality_nan If we still own the FPU after initializing fcr31, when we are preempted the dirty value in the FPU will be read out and stored into fcr31, clobbering our setting. This can cause an improper floating-point environment after execve(). For example: zsh% cat measure.c #include int main() { return fetestexcept(FE_INEXACT); } zsh% cc measure.c -o measure -lm zsh% echo $((1.0/3)) # raising FE_INEXACT 0.33333333333333331 zsh% while ./measure; do ; done (stopped in seconds) Call lose_fpu(0) before setting fcr31 to prevent this. Closes: https://lore.kernel.org/linux-mips/7a6aa1bbdbbe2e63ae96ff163fab0349f58f1b9e.camel@xry111.site/ Fixes: 9b26616c8d9d ("MIPS: Respect the ISA level in FCSR handling") Cc: stable@vger.kernel.org Signed-off-by: Xi Ruoyao Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/elf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c index 5582a4ca1e9e..7aa2c2360ff6 100644 --- a/arch/mips/kernel/elf.c +++ b/arch/mips/kernel/elf.c @@ -11,6 +11,7 @@ #include #include +#include #ifdef CONFIG_MIPS_FP_SUPPORT @@ -309,6 +310,11 @@ void mips_set_personality_nan(struct arch_elf_state *state) struct cpuinfo_mips *c = &boot_cpu_data; struct task_struct *t = current; + /* Do this early so t->thread.fpu.fcr31 won't be clobbered in case + * we are preempted before the lose_fpu(0) in start_thread. + */ + lose_fpu(0); + t->thread.fpu.fcr31 = c->fpu_csr31; switch (state->nan_2008) { case 0: From 6c20faec8ffc97af21acb43382adda011eb39359 Mon Sep 17 00:00:00 2001 From: Zhang Bingwu Date: Sun, 14 Jan 2024 16:13:59 +0800 Subject: [PATCH 259/347] kbuild: defconf: use SRCARCH to find merged configs For some ARCH values, SRCARCH, which should be used for finding arch/ subdirectory, is different from ARCH. Signed-off-by: Zhang Bingwu Signed-off-by: Masahiro Yamada --- scripts/Makefile.defconf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/Makefile.defconf b/scripts/Makefile.defconf index ab271b2051a2..226ea3df3b4b 100644 --- a/scripts/Makefile.defconf +++ b/scripts/Makefile.defconf @@ -9,8 +9,8 @@ # Input config fragments without '.config' suffix define merge_into_defconfig $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ - -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \ - $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config) + -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \ + $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config) +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig endef @@ -23,7 +23,7 @@ endef # Input config fragments without '.config' suffix define merge_into_defconfig_override $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ - -Q -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \ - $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config) + -Q -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \ + $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config) +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig endef From 846cfbeed09b45d985079a9173cf390cc053715b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 23 Jan 2024 15:59:54 -0700 Subject: [PATCH 260/347] um: Fix adding '-no-pie' for clang The kernel builds with -fno-PIE, so commit 883354afbc10 ("um: link vmlinux with -no-pie") added the compiler linker flag '-no-pie' via cc-option because '-no-pie' was only supported in GCC 6.1.0 and newer. While this works for GCC, this does not work for clang because cc-option uses '-c', which stops the pipeline right before linking, so '-no-pie' is unconsumed and clang warns, causing cc-option to fail just as it would if the option was entirely unsupported: $ clang -Werror -no-pie -c -o /dev/null -x c /dev/null clang-16: error: argument unused during compilation: '-no-pie' [-Werror,-Wunused-command-line-argument] A recent version of clang exposes this because it generates a relocation under '-mcmodel=large' that is not supported in PIE mode: /usr/sbin/ld: init/main.o: relocation R_X86_64_32 against symbol `saved_command_line' can not be used when making a PIE object; recompile with -fPIE /usr/sbin/ld: failed to set dynamic section sizes: bad value clang: error: linker command failed with exit code 1 (use -v to see invocation) Remove the cc-option check altogether. It is wasteful to invoke the compiler to check for '-no-pie' because only one supported compiler version does not support it, GCC 5.x (as it is supported with the minimum version of clang and GCC 6.1.0+). Use a combination of the gcc-min-version macro and CONFIG_CC_IS_CLANG to unconditionally add '-no-pie' with CONFIG_LD_SCRIPT_DYN=y, so that it is enabled with all compilers that support this. Furthermore, using gcc-min-version can help turn this back into LINK-$(CONFIG_LD_SCRIPT_DYN) += -no-pie when the minimum version of GCC is bumped past 6.1.0. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/1982 Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- arch/um/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/um/Makefile b/arch/um/Makefile index 82f05f250634..34957dcb88b9 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -115,7 +115,9 @@ archprepare: $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static -LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie) +ifdef CONFIG_LD_SCRIPT_DYN +LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie +endif LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ From 397586506c3da005b9333ce5947ad01e8018a3be Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 23 Jan 2024 15:59:55 -0700 Subject: [PATCH 261/347] modpost: Add '.ltext' and '.ltext.*' to TEXT_SECTIONS After the linked LLVM change, building ARCH=um defconfig results in a segmentation fault in modpost. Prior to commit a23e7584ecf3 ("modpost: unify 'sym' and 'to' in default_mismatch_handler()"), there was a warning: WARNING: modpost: vmlinux.o(__ex_table+0x88): Section mismatch in reference to the .ltext:(unknown) WARNING: modpost: The relocation at __ex_table+0x88 references section ".ltext" which is not in the list of authorized sections. If you're adding a new section and/or if this reference is valid, add ".ltext" to the list of authorized sections to jump to on fault. This can be achieved by adding ".ltext" to OTHER_TEXT_SECTIONS in scripts/mod/modpost.c. The linked LLVM change moves global objects to the '.ltext' (and '.ltext.*' with '-ffunction-sections') sections with '-mcmodel=large', which ARCH=um uses. These sections should be handled just as '.text' and '.text.*' are, so add them to TEXT_SECTIONS. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/1981 Link: https://github.com/llvm/llvm-project/commit/4bf8a688956a759b7b6b8d94f42d25c13c7af130 Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 795b21154446..acf72112acd8 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -806,7 +806,8 @@ static void check_section(const char *modname, struct elf_info *elf, #define DATA_SECTIONS ".data", ".data.rel" #define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \ - ".kprobes.text", ".cpuidle.text", ".noinstr.text" + ".kprobes.text", ".cpuidle.text", ".noinstr.text", \ + ".ltext", ".ltext.*" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ ".fixup", ".entry.text", ".exception.text", \ ".coldtext", ".softirqentry.text" From 2751153b9945c31eb905deb9fbe2d7f127b4b34c Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 1 Dec 2023 21:20:50 +0100 Subject: [PATCH 262/347] parisc: Make RO_DATA page aligned in vmlinux.lds.S The rodata_test program for CONFIG_DEBUG_RODATA_TEST=y complains if read-only data does not start at page boundary. Signed-off-by: Helge Deller --- arch/parisc/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 548051b0b4af..b445e47903cf 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -127,7 +127,7 @@ SECTIONS } #endif - RO_DATA(8) + RO_DATA(PAGE_SIZE) /* unwind info */ . = ALIGN(4); From b9402e3b97289ca9e0f0f79f4df64bd6c9176a86 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 17 Jan 2024 17:46:43 +0100 Subject: [PATCH 263/347] parisc: Check for valid stride size for cache flushes Report if the calculated cache stride size is zero, otherwise the cache flushing routine will never finish and hang the machine. This can be reproduced with a testcase in qemu, where the firmware reports wrong cache values. Signed-off-by: Helge Deller --- arch/parisc/kernel/cache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 268d90a9325b..0c015487e5db 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -264,6 +264,10 @@ parisc_cache_init(void) icache_stride = CAFL_STRIDE(cache_info.ic_conf); #undef CAFL_STRIDE + /* stride needs to be non-zero, otherwise cache flushes will not work */ + WARN_ON(cache_info.dc_size && dcache_stride == 0); + WARN_ON(cache_info.ic_size && icache_stride == 0); + if ((boot_cpu_data.pdc.capabilities & PDC_MODEL_NVA_MASK) == PDC_MODEL_NVA_UNSUPPORTED) { printk(KERN_WARNING "parisc_cache_init: Only equivalent aliasing supported!\n"); From c8708d758e715c3824a73bf0cda97292b52be44d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 19 Jan 2024 21:16:39 +0100 Subject: [PATCH 264/347] parisc: Prevent hung tasks when printing inventory on serial console Printing the inventory on a serial console can be quite slow and thus may trigger the hung task detector (CONFIG_DETECT_HUNG_TASK=y) and possibly reboot the machine. Adding a cond_resched() prevents this. Signed-off-by: Helge Deller Cc: # v6.0+ --- arch/parisc/kernel/drivers.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 25f9b9e9d6df..404ea37705ce 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -1004,6 +1004,9 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data) pr_info("\n"); + /* Prevent hung task messages when printing on serial console */ + cond_resched(); + pr_info("#define HPA_%08lx_DESCRIPTION \"%s\"\n", hpa, parisc_hardware_description(&dev->id)); From 20e08a720cc526dacc0678067ffc81613aa738ca Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 22 Jan 2024 17:51:15 +0100 Subject: [PATCH 265/347] parisc: Drop unneeded semicolon in parse_tree_node() Reported-by: kernel test robot Signed-off-by: Helge Deller Closes: https://lore.kernel.org/oe-kbuild-all/202401222059.Wli6OGT0-lkp@intel.com/ --- arch/parisc/kernel/drivers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 404ea37705ce..c7ff339732ba 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -742,7 +742,7 @@ parse_tree_node(struct device *parent, int index, struct hardware_path *modpath) }; if (device_for_each_child(parent, &recurse_data, descend_children)) - { /* nothing */ }; + { /* nothing */ } return d.dev; } From 29142dc92c37d3259a33aef15b03e6ee25b0d188 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 27 Jan 2024 13:21:14 -0800 Subject: [PATCH 266/347] tracefs: remove stale 'update_gid' code The 'eventfs_update_gid()' function is no longer called, so remove it (and the helper function it uses). Link: https://lore.kernel.org/all/CAHk-=wj+DsZZ=2iTUkJ-Nojs9fjYMvPs1NuoM3yK7aTDtJfPYQ@mail.gmail.com/ Fixes: 8186fff7ab64 ("tracefs/eventfs: Use root and instance inodes as default ownership") Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 38 -------------------------------------- fs/tracefs/internal.h | 1 - 2 files changed, 39 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 6b211522a13e..1c3dd0ad4660 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -281,44 +281,6 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode, inode->i_gid = attr->gid; } -static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level) -{ - struct eventfs_inode *ei_child; - - /* at most we have events/system/event */ - if (WARN_ON_ONCE(level > 3)) - return; - - ei->attr.gid = gid; - - if (ei->entry_attrs) { - for (int i = 0; i < ei->nr_entries; i++) { - ei->entry_attrs[i].gid = gid; - } - } - - /* - * Only eventfs_inode with dentries are updated, make sure - * all eventfs_inodes are updated. If one of the children - * do not have a dentry, this function must traverse it. - */ - list_for_each_entry_srcu(ei_child, &ei->children, list, - srcu_read_lock_held(&eventfs_srcu)) { - if (!ei_child->dentry) - update_gid(ei_child, gid, level + 1); - } -} - -void eventfs_update_gid(struct dentry *dentry, kgid_t gid) -{ - struct eventfs_inode *ei = dentry->d_fsdata; - int idx; - - idx = srcu_read_lock(&eventfs_srcu); - update_gid(ei, gid, 0); - srcu_read_unlock(&eventfs_srcu, idx); -} - /** * create_file - create a file in the tracefs filesystem * @name: the name of the file to create. diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 45397df9bb65..91c2bf0b91d9 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -82,7 +82,6 @@ struct inode *tracefs_get_inode(struct super_block *sb); struct dentry *eventfs_start_creating(const char *name, struct dentry *parent); struct dentry *eventfs_failed_creating(struct dentry *dentry); struct dentry *eventfs_end_creating(struct dentry *dentry); -void eventfs_update_gid(struct dentry *dentry, kgid_t gid); void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); #endif /* _TRACEFS_INTERNAL_H */ From 41bccc98fb7931d63d03f326a746ac4d429c1dd3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Jan 2024 17:01:12 -0800 Subject: [PATCH 267/347] Linux 6.8-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9f9b76d3a4b7..6c0a4d294444 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 37e8c97e539015637cb920d3e6f1e404f707a06e Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Wed, 24 Jan 2024 02:21:47 -0800 Subject: [PATCH 268/347] net: hsr: remove WARN_ONCE() in send_hsr_supervision_frame() Syzkaller reported [1] hitting a warning after failing to allocate resources for skb in hsr_init_skb(). Since a WARN_ONCE() call will not help much in this case, it might be prudent to switch to netdev_warn_once(). At the very least it will suppress syzkaller reports such as [1]. Just in case, use netdev_warn_once() in send_prp_supervision_frame() for similar reasons. [1] HSR: Could not send supervision frame WARNING: CPU: 1 PID: 85 at net/hsr/hsr_device.c:294 send_hsr_supervision_frame+0x60a/0x810 net/hsr/hsr_device.c:294 RIP: 0010:send_hsr_supervision_frame+0x60a/0x810 net/hsr/hsr_device.c:294 ... Call Trace: hsr_announce+0x114/0x370 net/hsr/hsr_device.c:382 call_timer_fn+0x193/0x590 kernel/time/timer.c:1700 expire_timers kernel/time/timer.c:1751 [inline] __run_timers+0x764/0xb20 kernel/time/timer.c:2022 run_timer_softirq+0x58/0xd0 kernel/time/timer.c:2035 __do_softirq+0x21a/0x8de kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0xb7/0x120 kernel/softirq.c:644 sysvec_apic_timer_interrupt+0x95/0xb0 arch/x86/kernel/apic/apic.c:1076 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:649 ... This issue is also found in older kernels (at least up to 5.10). Cc: stable@vger.kernel.org Reported-by: syzbot+3ae0a3f42c84074b7c8e@syzkaller.appspotmail.com Fixes: 121c33b07b31 ("net: hsr: introduce common code for skb initialization") Signed-off-by: Nikita Zhandarovich Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 7ceb9ac6e730..9d71b66183da 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -308,7 +308,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "HSR: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n"); return; } @@ -355,7 +355,7 @@ static void send_prp_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "PRP: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n"); return; } From bfb007aebe6bff451f7f3a4be19f4f286d0d5d9c Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Thu, 25 Jan 2024 12:53:09 +0300 Subject: [PATCH 269/347] nfc: nci: free rx_data_reassembly skb on NCI device cleanup rx_data_reassembly skb is stored during NCI data exchange for processing fragmented packets. It is dropped only when the last fragment is processed or when an NTF packet with NCI_OP_RF_DEACTIVATE_NTF opcode is received. However, the NCI device may be deallocated before that which leads to skb leak. As by design the rx_data_reassembly skb is bound to the NCI device and nothing prevents the device to be freed before the skb is processed in some way and cleaned, free it on the NCI device cleanup. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 6a2968aaf50c ("NFC: basic NCI protocol implementation") Cc: stable@vger.kernel.org Reported-by: syzbot+6b7c68d9c21e4ee4251b@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/000000000000f43987060043da7b@google.com/ Signed-off-by: Fedor Pchelkin Signed-off-by: David S. Miller --- net/nfc/nci/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 97348cedb16b..cdad47b140fa 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1208,6 +1208,10 @@ void nci_free_device(struct nci_dev *ndev) { nfc_free_device(ndev->nfc_dev); nci_hci_deallocate(ndev); + + /* drop partial rx data packet if present */ + if (ndev->rx_data_reassembly) + kfree_skb(ndev->rx_data_reassembly); kfree(ndev); } EXPORT_SYMBOL(nci_free_device); From 577e4432f3ac810049cb7e6b71f4d96ec7c6e894 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 Jan 2024 10:33:17 +0000 Subject: [PATCH 270/347] tcp: add sanity checks to rx zerocopy TCP rx zerocopy intent is to map pages initially allocated from NIC drivers, not pages owned by a fs. This patch adds to can_map_frag() these additional checks: - Page must not be a compound one. - page->mapping must be NULL. This fixes the panic reported by ZhangPeng. syzbot was able to loopback packets built with sendfile(), mapping pages owned by an ext4 file to TCP rx zerocopy. r3 = socket$inet_tcp(0x2, 0x1, 0x0) mmap(&(0x7f0000ff9000/0x4000)=nil, 0x4000, 0x0, 0x12, r3, 0x0) r4 = socket$inet_tcp(0x2, 0x1, 0x0) bind$inet(r4, &(0x7f0000000000)={0x2, 0x4e24, @multicast1}, 0x10) connect$inet(r4, &(0x7f00000006c0)={0x2, 0x4e24, @empty}, 0x10) r5 = openat$dir(0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00', 0x181e42, 0x0) fallocate(r5, 0x0, 0x0, 0x85b8) sendfile(r4, r5, 0x0, 0x8ba0) getsockopt$inet_tcp_TCP_ZEROCOPY_RECEIVE(r4, 0x6, 0x23, &(0x7f00000001c0)={&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, &(0x7f0000000440)=0x40) r6 = openat$dir(0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00', 0x181e42, 0x0) Fixes: 93ab6cc69162 ("tcp: implement mmap() for zero copy receive") Link: https://lore.kernel.org/netdev/5106a58e-04da-372a-b836-9d3d0bd2507b@huawei.com/T/ Reported-and-bisected-by: ZhangPeng Signed-off-by: Eric Dumazet Cc: Arjun Roy Cc: Matthew Wilcox Cc: linux-mm@vger.kernel.org Cc: Andrew Morton Cc: linux-fsdevel@vger.kernel.org Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a1c6de385cce..7e2481b9eae1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1786,7 +1786,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, static bool can_map_frag(const skb_frag_t *frag) { - return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag); + struct page *page; + + if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag)) + return false; + + page = skb_frag_page(frag); + + if (PageCompound(page) || page->mapping) + return false; + + return true; } static int find_next_mappable_frag(const skb_frag_t *frag, From e42e29cc442395d62f1a8963ec2dfb700ba6a5d7 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 29 Jan 2024 08:40:23 -0600 Subject: [PATCH 271/347] Revert "jfs: fix shift-out-of-bounds in dbJoin" This reverts commit cca974daeb6c43ea971f8ceff5a7080d7d49ee30. The added sanity check is incorrect. BUDMIN is not the wrong value and is too small. Signed-off-by: Dave Kleikamp --- fs/jfs/jfs_dmap.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 8eec84c651bf..cb3cda1390ad 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -2763,9 +2763,7 @@ static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl) * leafno - the number of the leaf to be updated. * newval - the new value for the leaf. * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * RETURN VALUES: none */ static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl) { @@ -2792,10 +2790,6 @@ static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl) * get the buddy size (number of words covered) of * the new value. */ - - if ((newval - tp->dmt_budmin) > BUDMIN) - return -EIO; - budsz = BUDSIZE(newval, tp->dmt_budmin); /* try to join. From f1f6a6b1830a8f1dc92ee26fae76333f446b0663 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 11 Dec 2023 18:05:52 -0800 Subject: [PATCH 272/347] e1000e: correct maximum frequency adjustment values The e1000e driver supports hardware with a variety of different clock speeds, and thus a variety of different increment values used for programming its PTP hardware clock. The values currently programmed in e1000e_ptp_init are incorrect. In particular, only two maximum adjustments are used: 24000000 - 1, and 600000000 - 1. These were originally intended to be used with the 96 MHz clock and the 25 MHz clock. Both of these values are actually slightly too high. For the 96 MHz clock, the actual maximum value that can safely be programmed is 23,999,938. For the 25 MHz clock, the maximum value is 599,999,904. Worse, several devices use a 24 MHz clock or a 38.4 MHz clock. These parts are incorrectly assigned one of either the 24million or 600million values. For the 24 MHz clock, this is not a significant issue: its current increment value can support an adjustment up to 7billion in the positive direction. However, the 38.4 KHz clock uses an increment value which can only support up to 230,769,157 before it starts overflowing. To understand where these values come from, consider that frequency adjustments have the form of: new_incval = base_incval + (base_incval * adjustment) / (unit of adjustment) The maximum adjustment is reported in terms of parts per billion: new_incval = base_incval + (base_incval * adjustment) / 1 billion The largest possible adjustment is thus given by the following: max_incval = base_incval + (base_incval * max_adj) / 1 billion Re-arranging to solve for max_adj: max_adj = (max_incval - base_incval) * 1 billion / base_incval We also need to ensure that negative adjustments cannot underflow. This can be achieved simply by ensuring max_adj is always less than 1 billion. Introduce new macros in e1000.h codifying the maximum adjustment in PPB for each frequency given its associated increment values. Also clarify where these values come from by commenting about the above equations. Replace the switch statement in e1000e_ptp_init with one which mirrors the increment value switch statement from e1000e_get_base_timinica. For each device, assign the appropriate maximum adjustment based on its frequency. Some parts can have one of two frequency modes as determined by E1000_TSYNCRXCTL_SYSCFI. Since the new flow directly matches the assignments in e1000e_get_base_timinca, and uses well defined macro names, it is much easier to verify that the resulting maximum adjustments are correct. It also avoids difficult to parse construction such as the "hw->mac.type < e1000_phc_lpt", and the use of fallthrough which was especially confusing when combined with a conditional block. Note that I believe the current increment value configuration used for 24MHz clocks is sub-par, as it leaves at least 3 extra bits available in the INCVALUE register. However, fixing that requires more careful review of the clock rate and associated values. Reported-by: Trey Harrison Fixes: 68fe1d5da548 ("e1000e: Add Support for 38.4MHZ frequency") Fixes: d89777bf0e42 ("e1000e: add support for IEEE-1588 PTP") Signed-off-by: Jacob Keller Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/e1000.h | 20 ++++++++++++++++++++ drivers/net/ethernet/intel/e1000e/ptp.c | 22 +++++++++++++++------- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index a187582d2299..ba9c19e6994c 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -360,23 +360,43 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca); * As a result, a shift of INCVALUE_SHIFT_n is used to fit a value of * INCVALUE_n into the TIMINCA register allowing 32+8+(24-INCVALUE_SHIFT_n) * bits to count nanoseconds leaving the rest for fractional nonseconds. + * + * Any given INCVALUE also has an associated maximum adjustment value. This + * maximum adjustment value is the largest increase (or decrease) which can be + * safely applied without overflowing the INCVALUE. Since INCVALUE has + * a maximum range of 24 bits, its largest value is 0xFFFFFF. + * + * To understand where the maximum value comes from, consider the following + * equation: + * + * new_incval = base_incval + (base_incval * adjustment) / 1billion + * + * To avoid overflow that means: + * max_incval = base_incval + (base_incval * max_adj) / billion + * + * Re-arranging: + * max_adj = floor(((max_incval - base_incval) * 1billion) / 1billion) */ #define INCVALUE_96MHZ 125 #define INCVALUE_SHIFT_96MHZ 17 #define INCPERIOD_SHIFT_96MHZ 2 #define INCPERIOD_96MHZ (12 >> INCPERIOD_SHIFT_96MHZ) +#define MAX_PPB_96MHZ 23999900 /* 23,999,900 ppb */ #define INCVALUE_25MHZ 40 #define INCVALUE_SHIFT_25MHZ 18 #define INCPERIOD_25MHZ 1 +#define MAX_PPB_25MHZ 599999900 /* 599,999,900 ppb */ #define INCVALUE_24MHZ 125 #define INCVALUE_SHIFT_24MHZ 14 #define INCPERIOD_24MHZ 3 +#define MAX_PPB_24MHZ 999999999 /* 999,999,999 ppb */ #define INCVALUE_38400KHZ 26 #define INCVALUE_SHIFT_38400KHZ 19 #define INCPERIOD_38400KHZ 1 +#define MAX_PPB_38400KHZ 230769100 /* 230,769,100 ppb */ /* Another drawback of scaling the incvalue by a large factor is the * 64-bit SYSTIM register overflows more quickly. This is dealt with diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 02d871bc112a..bbcfd529399b 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -280,8 +280,17 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) switch (hw->mac.type) { case e1000_pch2lan: + adapter->ptp_clock_info.max_adj = MAX_PPB_96MHZ; + break; case e1000_pch_lpt: + if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) + adapter->ptp_clock_info.max_adj = MAX_PPB_96MHZ; + else + adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ; + break; case e1000_pch_spt: + adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ; + break; case e1000_pch_cnp: case e1000_pch_tgp: case e1000_pch_adp: @@ -289,15 +298,14 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) case e1000_pch_lnp: case e1000_pch_ptp: case e1000_pch_nvp: - if ((hw->mac.type < e1000_pch_lpt) || - (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) { - adapter->ptp_clock_info.max_adj = 24000000 - 1; - break; - } - fallthrough; + if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) + adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ; + else + adapter->ptp_clock_info.max_adj = MAX_PPB_38400KHZ; + break; case e1000_82574: case e1000_82583: - adapter->ptp_clock_info.max_adj = 600000000 - 1; + adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ; break; default: break; From bbc404d20d1b46d89b461918bc44587620eda200 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 20 Jan 2024 18:25:36 +0100 Subject: [PATCH 273/347] ixgbe: Fix an error handling path in ixgbe_read_iosf_sb_reg_x550() All error handling paths, except this one, go to 'out' where release_swfw_sync() is called. This call balances the acquire_swfw_sync() call done at the beginning of the function. Branch to the error handling path in order to correctly release some resources in case of error. Fixes: ae14a1d8e104 ("ixgbe: Fix IOSF SB access issues") Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 6208923e29a2..c1adc94a5a65 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -716,7 +716,8 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) { error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command); hw_dbg(hw, "Failed to read, error %x\n", error); - return -EIO; + ret = -EIO; + goto out; } if (!ret) From ccbca118ef1a71d5faa012b9bb1ecd784e9e2b42 Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Mon, 22 Jan 2024 21:35:09 -0800 Subject: [PATCH 274/347] NFSv4.1: Assign the right value for initval and retries for rpc timeout Make sure the rpc timeout was assigned with the correct value for initial timeout and max number of retries. Fixes: 57331a59ac0d ("NFSv4.1: Use the nfs_client's rpc timeouts for backchannel") Signed-off-by: Samasth Norway Ananda Reviewed-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/svc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index f60c93e5a25d..b969e505c7b7 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1598,10 +1598,10 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp) /* Finally, send the reply synchronously */ if (rqstp->bc_to_initval > 0) { timeout.to_initval = rqstp->bc_to_initval; - timeout.to_retries = rqstp->bc_to_initval; + timeout.to_retries = rqstp->bc_to_retries; } else { timeout.to_initval = req->rq_xprt->timeout->to_initval; - timeout.to_initval = req->rq_xprt->timeout->to_retries; + timeout.to_retries = req->rq_xprt->timeout->to_retries; } memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); task = rpc_run_bc_task(req, &timeout); From c44fc98f0a8ffd94fa0bd291928e7e312ffc7ca4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Fri, 26 Jan 2024 11:49:35 +0100 Subject: [PATCH 275/347] net: dsa: qca8k: fix illegal usage of GPIO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When working with GPIO, its direction must be set either when the GPIO is requested by gpiod_get*() or later on by one of the gpiod_direction_*() functions. Neither of this is done here which results in undefined behavior on some systems. As the reset GPIO is used right after it is requested here, it makes sense to configure it as GPIOD_OUT_HIGH right away. With that, the following gpiod_set_value_cansleep(1) becomes redundant and can be safely removed. Fixes: a653f2f538f9 ("net: dsa: qca8k: introduce reset via gpio feature") Signed-off-by: Michal Vokáč Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/1706266175-3408-1-git-send-email-michal.vokac@ysoft.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/qca/qca8k-8xxx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index c51f40960961..7a864329cb72 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -2051,12 +2051,11 @@ qca8k_sw_probe(struct mdio_device *mdiodev) priv->info = of_device_get_match_data(priv->dev); priv->reset_gpio = devm_gpiod_get_optional(priv->dev, "reset", - GPIOD_ASIS); + GPIOD_OUT_HIGH); if (IS_ERR(priv->reset_gpio)) return PTR_ERR(priv->reset_gpio); if (priv->reset_gpio) { - gpiod_set_value_cansleep(priv->reset_gpio, 1); /* The active low duration must be greater than 10 ms * and checkpatch.pl wants 20 ms. */ From 59c93583491ab15db109f9902524d241c4fa4c0b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 26 Jan 2024 12:13:08 -0800 Subject: [PATCH 276/347] selftests: net: add missing config for nftables-backed iptables Modern OSes use iptables implementation with nf_tables as a backend, e.g.: $ iptables -V iptables v1.8.8 (nf_tables) Pablo points out that we need CONFIG_NFT_COMPAT to make that work, otherwise we see a lot of: Warning: Extension DNAT revision 0 not supported, missing kernel module? with DNAT being just an example here, other modules we need include udp, TTL, length etc. Link: https://lore.kernel.org/r/20240126201308.2903602-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 56da5d52674c..d4b38177ed04 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -60,6 +60,7 @@ CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y +CONFIG_NFT_COMPAT=m CONFIG_NF_FLOW_TABLE=m CONFIG_PSAMPLE=m CONFIG_TCP_MD5SIG=y From 60365049ccbacd101654a66ddcb299abfabd4fc5 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 26 Jan 2024 09:32:20 +0100 Subject: [PATCH 277/347] ipv6: Ensure natural alignment of const ipv6 loopback and router addresses On a parisc64 kernel I sometimes notice this kernel warning: Kernel unaligned access to 0x40ff8814 at ndisc_send_skb+0xc0/0x4d8 The address 0x40ff8814 points to the in6addr_linklocal_allrouters variable and the warning simply means that some ipv6 function tries to read a 64-bit word directly from the not-64-bit aligned in6addr_linklocal_allrouters variable. Unaligned accesses are non-critical as the architecture or exception handlers usually will fix it up at runtime. Nevertheless it may trigger a performance penality for some architectures. For details read the "unaligned-memory-access" kernel documentation. The patch below ensures that the ipv6 loopback and router addresses will always be naturally aligned. This prevents the unaligned accesses for all architectures. Signed-off-by: Helge Deller Fixes: 034dfc5df99eb ("ipv6: export in6addr_loopback to modules") Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/ZbNuFM1bFqoH-UoY@p100 Signed-off-by: Paolo Abeni --- net/ipv6/addrconf_core.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 507a8353a6bd..c008d21925d7 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -220,19 +220,26 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { EXPORT_SYMBOL_GPL(ipv6_stub); /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ -const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; +const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8) + = IN6ADDR_LOOPBACK_INIT; EXPORT_SYMBOL(in6addr_loopback); -const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; +const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8) + = IN6ADDR_ANY_INIT; EXPORT_SYMBOL(in6addr_any); -const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_linklocal_allnodes); -const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_linklocal_allrouters); -const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allnodes); -const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allrouters); -const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_sitelocal_allrouters); static void snmp6_free_dev(struct inet6_dev *idev) From 4896bb7c0b31a0a3379b290ea7729900c59e0c69 Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 26 Jan 2024 10:10:41 +0100 Subject: [PATCH 278/347] net: stmmac: do not clear TBS enable bit on link up/down With the dma conf being reallocated on each call to stmmac_open(), any information in there is lost, unless we specifically handle it. The STMMAC_TBS_EN bit is set when adding an etf qdisc, and the etf qdisc therefore would stop working when link was set down and then back up. Fixes: ba39b344e924 ("net: ethernet: stmicro: stmmac: generate stmmac dma conf before open") Cc: stable@vger.kernel.org Signed-off-by: Esben Haabendal Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b334eb16da23..25519952f754 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3932,6 +3932,9 @@ static int __stmmac_open(struct net_device *dev, priv->rx_copybreak = STMMAC_RX_COPYBREAK; buf_sz = dma_conf->dma_buf_sz; + for (int i = 0; i < MTL_MAX_TX_QUEUES; i++) + if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN) + dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs; memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf)); stmmac_reset_queues_param(priv); From 3b12ec8f618ebaccfe43ea4621a6f5fb586edef8 Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 26 Jan 2024 10:10:42 +0100 Subject: [PATCH 279/347] net: stmmac: dwmac-imx: set TSO/TBS TX queues default settings TSO and TBS cannot coexist. For now we set i.MX Ethernet QOS controller to use the first TX queue with TSO and the rest for TBS. TX queues with TBS can support etf qdisc hw offload. Signed-off-by: Esben Haabendal Reviewed-by: Kurt Kanzenbach Reviewed-by: Vadim Fedorenko Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 8f730ada71f9..6b65420e11b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -353,6 +353,10 @@ static int imx_dwmac_probe(struct platform_device *pdev) if (data->flags & STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY) plat_dat->flags |= STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY; + /* Default TX Q0 to use TSO and rest TXQ for TBS */ + for (int i = 1; i < plat_dat->tx_queues_to_use; i++) + plat_dat->tx_queues_cfg[i].tbs_en = 1; + plat_dat->host_dma_width = dwmac->ops->addr_width; plat_dat->init = imx_dwmac_init; plat_dat->exit = imx_dwmac_exit; From aa2b2eb3934859904c287bf5434647ba72e14c1c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Jan 2024 16:55:32 +0000 Subject: [PATCH 280/347] llc: call sock_orphan() at release time syzbot reported an interesting trace [1] caused by a stale sk->sk_wq pointer in a closed llc socket. In commit ff7b11aa481f ("net: socket: set sock->sk to NULL after calling proto_ops::release()") Eric Biggers hinted that some protocols are missing a sock_orphan(), we need to perform a full audit. In net-next, I plan to clear sock->sk from sock_orphan() and amend Eric patch to add a warning. [1] BUG: KASAN: slab-use-after-free in list_empty include/linux/list.h:373 [inline] BUG: KASAN: slab-use-after-free in waitqueue_active include/linux/wait.h:127 [inline] BUG: KASAN: slab-use-after-free in sock_def_write_space_wfree net/core/sock.c:3384 [inline] BUG: KASAN: slab-use-after-free in sock_wfree+0x9a8/0x9d0 net/core/sock.c:2468 Read of size 8 at addr ffff88802f4fc880 by task ksoftirqd/1/27 CPU: 1 PID: 27 Comm: ksoftirqd/1 Not tainted 6.8.0-rc1-syzkaller-00049-g6098d87eaf31 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc4/0x620 mm/kasan/report.c:488 kasan_report+0xda/0x110 mm/kasan/report.c:601 list_empty include/linux/list.h:373 [inline] waitqueue_active include/linux/wait.h:127 [inline] sock_def_write_space_wfree net/core/sock.c:3384 [inline] sock_wfree+0x9a8/0x9d0 net/core/sock.c:2468 skb_release_head_state+0xa3/0x2b0 net/core/skbuff.c:1080 skb_release_all net/core/skbuff.c:1092 [inline] napi_consume_skb+0x119/0x2b0 net/core/skbuff.c:1404 e1000_unmap_and_free_tx_resource+0x144/0x200 drivers/net/ethernet/intel/e1000/e1000_main.c:1970 e1000_clean_tx_irq drivers/net/ethernet/intel/e1000/e1000_main.c:3860 [inline] e1000_clean+0x4a1/0x26e0 drivers/net/ethernet/intel/e1000/e1000_main.c:3801 __napi_poll.constprop.0+0xb4/0x540 net/core/dev.c:6576 napi_poll net/core/dev.c:6645 [inline] net_rx_action+0x956/0xe90 net/core/dev.c:6778 __do_softirq+0x21a/0x8de kernel/softirq.c:553 run_ksoftirqd kernel/softirq.c:921 [inline] run_ksoftirqd+0x31/0x60 kernel/softirq.c:913 smpboot_thread_fn+0x660/0xa10 kernel/smpboot.c:164 kthread+0x2c6/0x3a0 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 Allocated by task 5167: kasan_save_stack+0x33/0x50 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 unpoison_slab_object mm/kasan/common.c:314 [inline] __kasan_slab_alloc+0x81/0x90 mm/kasan/common.c:340 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slub.c:3813 [inline] slab_alloc_node mm/slub.c:3860 [inline] kmem_cache_alloc_lru+0x142/0x6f0 mm/slub.c:3879 alloc_inode_sb include/linux/fs.h:3019 [inline] sock_alloc_inode+0x25/0x1c0 net/socket.c:308 alloc_inode+0x5d/0x220 fs/inode.c:260 new_inode_pseudo+0x16/0x80 fs/inode.c:1005 sock_alloc+0x40/0x270 net/socket.c:634 __sock_create+0xbc/0x800 net/socket.c:1535 sock_create net/socket.c:1622 [inline] __sys_socket_create net/socket.c:1659 [inline] __sys_socket+0x14c/0x260 net/socket.c:1706 __do_sys_socket net/socket.c:1720 [inline] __se_sys_socket net/socket.c:1718 [inline] __x64_sys_socket+0x72/0xb0 net/socket.c:1718 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd3/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Freed by task 0: kasan_save_stack+0x33/0x50 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 kasan_save_free_info+0x3f/0x60 mm/kasan/generic.c:640 poison_slab_object mm/kasan/common.c:241 [inline] __kasan_slab_free+0x121/0x1b0 mm/kasan/common.c:257 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2121 [inline] slab_free mm/slub.c:4299 [inline] kmem_cache_free+0x129/0x350 mm/slub.c:4363 i_callback+0x43/0x70 fs/inode.c:249 rcu_do_batch kernel/rcu/tree.c:2158 [inline] rcu_core+0x819/0x1680 kernel/rcu/tree.c:2433 __do_softirq+0x21a/0x8de kernel/softirq.c:553 Last potentially related work creation: kasan_save_stack+0x33/0x50 mm/kasan/common.c:47 __kasan_record_aux_stack+0xba/0x100 mm/kasan/generic.c:586 __call_rcu_common.constprop.0+0x9a/0x7b0 kernel/rcu/tree.c:2683 destroy_inode+0x129/0x1b0 fs/inode.c:315 iput_final fs/inode.c:1739 [inline] iput.part.0+0x560/0x7b0 fs/inode.c:1765 iput+0x5c/0x80 fs/inode.c:1755 dentry_unlink_inode+0x292/0x430 fs/dcache.c:400 __dentry_kill+0x1ca/0x5f0 fs/dcache.c:603 dput.part.0+0x4ac/0x9a0 fs/dcache.c:845 dput+0x1f/0x30 fs/dcache.c:835 __fput+0x3b9/0xb70 fs/file_table.c:384 task_work_run+0x14d/0x240 kernel/task_work.c:180 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xa8a/0x2ad0 kernel/exit.c:871 do_group_exit+0xd4/0x2a0 kernel/exit.c:1020 __do_sys_exit_group kernel/exit.c:1031 [inline] __se_sys_exit_group kernel/exit.c:1029 [inline] __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:1029 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd3/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b The buggy address belongs to the object at ffff88802f4fc800 which belongs to the cache sock_inode_cache of size 1408 The buggy address is located 128 bytes inside of freed 1408-byte region [ffff88802f4fc800, ffff88802f4fcd80) The buggy address belongs to the physical page: page:ffffea0000bd3e00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x2f4f8 head:ffffea0000bd3e00 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 anon flags: 0xfff00000000840(slab|head|node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000840 ffff888013b06b40 0000000000000000 0000000000000001 raw: 0000000000000000 0000000080150015 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Reclaimable, gfp_mask 0xd20d0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_RECLAIMABLE), pid 4956, tgid 4956 (sshd), ts 31423924727, free_ts 0 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x2d0/0x350 mm/page_alloc.c:1533 prep_new_page mm/page_alloc.c:1540 [inline] get_page_from_freelist+0xa28/0x3780 mm/page_alloc.c:3311 __alloc_pages+0x22f/0x2440 mm/page_alloc.c:4567 __alloc_pages_node include/linux/gfp.h:238 [inline] alloc_pages_node include/linux/gfp.h:261 [inline] alloc_slab_page mm/slub.c:2190 [inline] allocate_slab mm/slub.c:2354 [inline] new_slab+0xcc/0x3a0 mm/slub.c:2407 ___slab_alloc+0x4af/0x19a0 mm/slub.c:3540 __slab_alloc.constprop.0+0x56/0xa0 mm/slub.c:3625 __slab_alloc_node mm/slub.c:3678 [inline] slab_alloc_node mm/slub.c:3850 [inline] kmem_cache_alloc_lru+0x379/0x6f0 mm/slub.c:3879 alloc_inode_sb include/linux/fs.h:3019 [inline] sock_alloc_inode+0x25/0x1c0 net/socket.c:308 alloc_inode+0x5d/0x220 fs/inode.c:260 new_inode_pseudo+0x16/0x80 fs/inode.c:1005 sock_alloc+0x40/0x270 net/socket.c:634 __sock_create+0xbc/0x800 net/socket.c:1535 sock_create net/socket.c:1622 [inline] __sys_socket_create net/socket.c:1659 [inline] __sys_socket+0x14c/0x260 net/socket.c:1706 __do_sys_socket net/socket.c:1720 [inline] __se_sys_socket net/socket.c:1718 [inline] __x64_sys_socket+0x72/0xb0 net/socket.c:1718 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd3/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b page_owner free stack trace missing Memory state around the buggy address: ffff88802f4fc780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88802f4fc800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88802f4fc880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88802f4fc900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88802f4fc980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 43815482370c ("net: sock_def_readable() and friends RCU conversion") Reported-and-tested-by: syzbot+32b89eaa102b372ff76d@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Eric Biggers Cc: Kuniyuki Iwashima Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240126165532.3396702-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/llc/af_llc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 20551cfb7da6..fde1140d899e 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -226,6 +226,8 @@ static int llc_ui_release(struct socket *sock) } netdev_put(llc->dev, &llc->dev_tracker); sock_put(sk); + sock_orphan(sk); + sock->sk = NULL; llc_sk_free(sk); out: return 0; From a3fa9838e8140584a6f338e8516f2b05d3bea812 Mon Sep 17 00:00:00 2001 From: Patrick Rudolph Date: Tue, 30 Jan 2024 20:32:56 +0530 Subject: [PATCH 281/347] regulator (max5970): Fix IRQ handler The max5970 datasheet gives the impression that IRQ status bits must be cleared by writing a one to set bits, as those are marked with 'R/C', however tests showed that a zero must be written. Fixes an IRQ storm as the interrupt handler actually clears the IRQ status bits. Signed-off-by: Patrick Rudolph Signed-off-by: Naresh Solanki Link: https://msgid.link/r/20240130150257.3643657-1-naresh.solanki@9elements.com Signed-off-by: Mark Brown --- drivers/regulator/max5970-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/max5970-regulator.c b/drivers/regulator/max5970-regulator.c index bc88a40a88d4..830a1c4cd705 100644 --- a/drivers/regulator/max5970-regulator.c +++ b/drivers/regulator/max5970-regulator.c @@ -392,7 +392,7 @@ static int max597x_regmap_read_clear(struct regmap *map, unsigned int reg, return ret; if (*val) - return regmap_write(map, reg, *val); + return regmap_write(map, reg, 0); return 0; } From 6500ad28fd5d67d5ca0fee9da73c463090842440 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Jan 2024 10:40:53 +0100 Subject: [PATCH 282/347] spi: sh-msiof: avoid integer overflow in constants cppcheck rightfully warned: drivers/spi/spi-sh-msiof.c:792:28: warning: Signed integer overflow for expression '7<<29'. [integerOverflow] sh_msiof_write(p, SIFCTR, SIFCTR_TFWM_1 | SIFCTR_RFWM_1); Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://msgid.link/r/20240130094053.10672-1-wsa+renesas@sang-engineering.com Signed-off-by: Mark Brown --- drivers/spi/spi-sh-msiof.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index cfc3b1ddbd22..6f12e4fb2e2e 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -136,14 +136,14 @@ struct sh_msiof_spi_priv { /* SIFCTR */ #define SIFCTR_TFWM_MASK GENMASK(31, 29) /* Transmit FIFO Watermark */ -#define SIFCTR_TFWM_64 (0 << 29) /* Transfer Request when 64 empty stages */ -#define SIFCTR_TFWM_32 (1 << 29) /* Transfer Request when 32 empty stages */ -#define SIFCTR_TFWM_24 (2 << 29) /* Transfer Request when 24 empty stages */ -#define SIFCTR_TFWM_16 (3 << 29) /* Transfer Request when 16 empty stages */ -#define SIFCTR_TFWM_12 (4 << 29) /* Transfer Request when 12 empty stages */ -#define SIFCTR_TFWM_8 (5 << 29) /* Transfer Request when 8 empty stages */ -#define SIFCTR_TFWM_4 (6 << 29) /* Transfer Request when 4 empty stages */ -#define SIFCTR_TFWM_1 (7 << 29) /* Transfer Request when 1 empty stage */ +#define SIFCTR_TFWM_64 (0UL << 29) /* Transfer Request when 64 empty stages */ +#define SIFCTR_TFWM_32 (1UL << 29) /* Transfer Request when 32 empty stages */ +#define SIFCTR_TFWM_24 (2UL << 29) /* Transfer Request when 24 empty stages */ +#define SIFCTR_TFWM_16 (3UL << 29) /* Transfer Request when 16 empty stages */ +#define SIFCTR_TFWM_12 (4UL << 29) /* Transfer Request when 12 empty stages */ +#define SIFCTR_TFWM_8 (5UL << 29) /* Transfer Request when 8 empty stages */ +#define SIFCTR_TFWM_4 (6UL << 29) /* Transfer Request when 4 empty stages */ +#define SIFCTR_TFWM_1 (7UL << 29) /* Transfer Request when 1 empty stage */ #define SIFCTR_TFUA_MASK GENMASK(26, 20) /* Transmit FIFO Usable Area */ #define SIFCTR_TFUA_SHIFT 20 #define SIFCTR_TFUA(i) ((i) << SIFCTR_TFUA_SHIFT) From f1fea725cc93fcc3c5af9a2af63ffdc40dd2259e Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Wed, 20 Dec 2023 10:11:51 -0500 Subject: [PATCH 283/347] selftests/livepatch: fix and refactor new dmesg message code The livepatching kselftests rely on comparing expected vs. observed dmesg output. After each test, new dmesg entries are determined by the 'comm' utility comparing a saved, pre-test copy of dmesg to post-test dmesg output. Alexander reports that the 'comm --nocheck-order -13' invocation used by the tests can be confused when dmesg entry timestamps vary in magnitude (ie, "[ 98.820331]" vs. "[ 100.031067]"), in which case, additional messages are reported as new. The unexpected entries then spoil the test results. Instead of relying on 'comm' or 'diff' to determine new testing dmesg entries, refactor the code: - pre-test : log a unique canary dmesg entry - test : run tests, log messages - post-test : filter dmesg starting from pre-test message Reported-by: Alexander Gordeev Closes: https://lore.kernel.org/live-patching/ZYAimyPYhxVA9wKg@li-008a6a4c-3549-11b2-a85c-c5cc2836eea2.ibm.com/ Signed-off-by: Joe Lawrence Acked-by: Alexander Gordeev Tested-by: Marcos Paulo de Souza Reviewed-by: Marcos Paulo de Souza Acked-by: Miroslav Benes Signed-off-by: Shuah Khan --- .../testing/selftests/livepatch/functions.sh | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index c8416c54b463..b1fd7362c2fe 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -42,17 +42,6 @@ function die() { exit 1 } -# save existing dmesg so we can detect new content -function save_dmesg() { - SAVED_DMESG=$(mktemp --tmpdir -t klp-dmesg-XXXXXX) - dmesg > "$SAVED_DMESG" -} - -# cleanup temporary dmesg file from save_dmesg() -function cleanup_dmesg_file() { - rm -f "$SAVED_DMESG" -} - function push_config() { DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \ awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}') @@ -99,7 +88,6 @@ function set_ftrace_enabled() { function cleanup() { pop_config - cleanup_dmesg_file } # setup_config - save the current config and set a script exit trap that @@ -280,7 +268,15 @@ function set_pre_patch_ret { function start_test { local test="$1" - save_dmesg + # Dump something unique into the dmesg log, then stash the entry + # in LAST_DMESG. The check_result() function will use it to + # find new kernel messages since the test started. + local last_dmesg_msg="livepatch kselftest timestamp: $(date --rfc-3339=ns)" + log "$last_dmesg_msg" + loop_until 'dmesg | grep -q "$last_dmesg_msg"' || + die "buffer busy? can't find canary dmesg message: $last_dmesg_msg" + LAST_DMESG=$(dmesg | grep "$last_dmesg_msg") + echo -n "TEST: $test ... " log "===== TEST: $test =====" } @@ -291,23 +287,24 @@ function check_result { local expect="$*" local result - # Note: when comparing dmesg output, the kernel log timestamps - # help differentiate repeated testing runs. Remove them with a - # post-comparison sed filter. - - result=$(dmesg | comm --nocheck-order -13 "$SAVED_DMESG" - | \ + # Test results include any new dmesg entry since LAST_DMESG, then: + # - include lines matching keywords + # - exclude lines matching keywords + # - filter out dmesg timestamp prefixes + result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \ grep -e 'livepatch:' -e 'test_klp' | \ grep -v '\(tainting\|taints\) kernel' | \ sed 's/^\[[ 0-9.]*\] //') if [[ "$expect" == "$result" ]] ; then echo "ok" + elif [[ "$result" == "" ]] ; then + echo -e "not ok\n\nbuffer overrun? can't find canary dmesg entry: $LAST_DMESG\n" + die "livepatch kselftest(s) failed" else echo -e "not ok\n\n$(diff -upr --label expected --label result <(echo "$expect") <(echo "$result"))\n" die "livepatch kselftest(s) failed" fi - - cleanup_dmesg_file } # check_sysfs_rights(modname, rel_path, expected_rights) - check sysfs From 5820cfee443f8a90ea3eb9f99f57f2049a4a93c3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 24 Jan 2024 13:00:18 +0000 Subject: [PATCH 284/347] kselftest/seccomp: Use kselftest output functions for benchmark In preparation for trying to output the test results themselves in TAP format rework all the prints in the benchmark to use the kselftest output functions. The uses of system() all produce single line output so we can avoid having to deal with fully managing the child process and continue to use system() by simply printing an empty message before we invoke system(). We also leave one printf() used to complete a line of output in place. Tested-by: Anders Roxell Acked-by: Kees Cook Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- .../selftests/seccomp/seccomp_benchmark.c | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 5b5c9d558dee..93168dd2c1e3 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -38,10 +38,10 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples) i *= 1000000000ULL; i += finish.tv_nsec - start.tv_nsec; - printf("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n", - finish.tv_sec, finish.tv_nsec, - start.tv_sec, start.tv_nsec, - i, (double)i / 1000000000.0); + ksft_print_msg("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n", + finish.tv_sec, finish.tv_nsec, + start.tv_sec, start.tv_nsec, + i, (double)i / 1000000000.0); return i; } @@ -53,7 +53,7 @@ unsigned long long calibrate(void) pid_t pid, ret; int seconds = 15; - printf("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds); + ksft_print_msg("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds); samples = 0; pid = getpid(); @@ -102,14 +102,14 @@ long compare(const char *name_one, const char *name_eval, const char *name_two, { bool good; - printf("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, - (long long)one, name_eval, (long long)two); + ksft_print_msg("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, + (long long)one, name_eval, (long long)two); if (one > INT_MAX) { - printf("Miscalculation! Measurement went negative: %lld\n", (long long)one); + ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)one); return 1; } if (two > INT_MAX) { - printf("Miscalculation! Measurement went negative: %lld\n", (long long)two); + ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)two); return 1; } @@ -145,12 +145,15 @@ int main(int argc, char *argv[]) setbuf(stdout, NULL); - printf("Running on:\n"); + ksft_print_msg("Running on:\n"); + ksft_print_msg(""); system("uname -a"); - printf("Current BPF sysctl settings:\n"); + ksft_print_msg("Current BPF sysctl settings:\n"); /* Avoid using "sysctl" which may not be installed. */ + ksft_print_msg(""); system("grep -H . /proc/sys/net/core/bpf_jit_enable"); + ksft_print_msg(""); system("grep -H . /proc/sys/net/core/bpf_jit_harden"); if (argc > 1) @@ -158,11 +161,11 @@ int main(int argc, char *argv[]) else samples = calibrate(); - printf("Benchmarking %llu syscalls...\n", samples); + ksft_print_msg("Benchmarking %llu syscalls...\n", samples); /* Native call */ native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid native: %llu ns\n", native); + ksft_print_msg("getpid native: %llu ns\n", native); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); assert(ret == 0); @@ -172,33 +175,33 @@ int main(int argc, char *argv[]) assert(ret == 0); bitmap1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1); + ksft_print_msg("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1); /* Second filter resulting in a bitmap */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); assert(ret == 0); bitmap2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2); + ksft_print_msg("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2); /* Third filter, can no longer be converted to bitmap */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); assert(ret == 0); filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1); + ksft_print_msg("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1); /* Fourth filter, can not be converted to bitmap because of filter 3 */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); assert(ret == 0); filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2); + ksft_print_msg("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2); /* Estimations */ #define ESTIMATE(fmt, var, what) do { \ var = (what); \ - printf("Estimated " fmt ": %llu ns\n", var); \ + ksft_print_msg("Estimated " fmt ": %llu ns\n", var); \ if (var > INT_MAX) \ goto more_samples; \ } while (0) @@ -218,7 +221,7 @@ int main(int argc, char *argv[]) ESTIMATE("seccomp per-filter overhead (filters / 4)", per_filter2, (filter2 - native - entry) / 4); - printf("Expectations:\n"); + ksft_print_msg("Expectations:\n"); ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1); bits = compare("native", "≤", "1 filter", native, le, filter1); if (bits) @@ -230,7 +233,7 @@ int main(int argc, char *argv[]) bits = compare("1 bitmapped", "≈", "2 bitmapped", bitmap1 - native, approx, bitmap2 - native); if (bits) { - printf("Skipping constant action bitmap expectations: they appear unsupported.\n"); + ksft_print_msg("Skipping constant action bitmap expectations: they appear unsupported.\n"); goto out; } @@ -242,7 +245,7 @@ int main(int argc, char *argv[]) goto out; more_samples: - printf("Saw unexpected benchmark result. Try running again with more samples?\n"); + ksft_print_msg("Saw unexpected benchmark result. Try running again with more samples?\n"); out: return 0; } From b54761f6e9773350c0d1fb8e1e5aacaba7769d0f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 24 Jan 2024 13:00:19 +0000 Subject: [PATCH 285/347] kselftest/seccomp: Report each expectation we assert as a KTAP test The seccomp benchmark test makes a number of checks on the performance it measures and logs them to the output but does so in a custom format which none of the automated test runners understand meaning that the chances that anyone is paying attention are slim. Let's additionally log each result in KTAP format so that automated systems parsing the test output will see each comparison as a test case. The original logs are left in place since they provide the actual numbers for analysis. As part of this rework the flow for the main program so that when we skip tests we still log all the tests we skip, this is because the standard KTAP headers and footers include counts of the number of expected and run tests. Tested-by: Anders Roxell Acked-by: Kees Cook Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- .../selftests/seccomp/seccomp_benchmark.c | 61 +++++++++++++------ 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 93168dd2c1e3..97b86980b768 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -98,24 +98,36 @@ bool le(int i_one, int i_two) } long compare(const char *name_one, const char *name_eval, const char *name_two, - unsigned long long one, bool (*eval)(int, int), unsigned long long two) + unsigned long long one, bool (*eval)(int, int), unsigned long long two, + bool skip) { bool good; + if (skip) { + ksft_test_result_skip("%s %s %s\n", name_one, name_eval, + name_two); + return 0; + } + ksft_print_msg("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, (long long)one, name_eval, (long long)two); if (one > INT_MAX) { ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)one); - return 1; + good = false; + goto out; } if (two > INT_MAX) { ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)two); - return 1; + good = false; + goto out; } good = eval(one, two); printf("%s\n", good ? "✔️" : "❌"); +out: + ksft_test_result(good, "%s %s %s\n", name_one, name_eval, name_two); + return good ? 0 : 1; } @@ -142,9 +154,13 @@ int main(int argc, char *argv[]) unsigned long long samples, calc; unsigned long long native, filter1, filter2, bitmap1, bitmap2; unsigned long long entry, per_filter1, per_filter2; + bool skip = false; setbuf(stdout, NULL); + ksft_print_header(); + ksft_set_plan(7); + ksft_print_msg("Running on:\n"); ksft_print_msg(""); system("uname -a"); @@ -202,8 +218,10 @@ int main(int argc, char *argv[]) #define ESTIMATE(fmt, var, what) do { \ var = (what); \ ksft_print_msg("Estimated " fmt ": %llu ns\n", var); \ - if (var > INT_MAX) \ - goto more_samples; \ + if (var > INT_MAX) { \ + skip = true; \ + ret |= 1; \ + } \ } while (0) ESTIMATE("total seccomp overhead for 1 bitmapped filter", calc, @@ -222,30 +240,33 @@ int main(int argc, char *argv[]) (filter2 - native - entry) / 4); ksft_print_msg("Expectations:\n"); - ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1); - bits = compare("native", "≤", "1 filter", native, le, filter1); + ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1, + skip); + bits = compare("native", "≤", "1 filter", native, le, filter1, + skip); if (bits) - goto more_samples; + skip = true; ret |= compare("per-filter (last 2 diff)", "≈", "per-filter (filters / 4)", - per_filter1, approx, per_filter2); + per_filter1, approx, per_filter2, skip); bits = compare("1 bitmapped", "≈", "2 bitmapped", - bitmap1 - native, approx, bitmap2 - native); + bitmap1 - native, approx, bitmap2 - native, skip); if (bits) { ksft_print_msg("Skipping constant action bitmap expectations: they appear unsupported.\n"); - goto out; + skip = true; } - ret |= compare("entry", "≈", "1 bitmapped", entry, approx, bitmap1 - native); - ret |= compare("entry", "≈", "2 bitmapped", entry, approx, bitmap2 - native); + ret |= compare("entry", "≈", "1 bitmapped", entry, approx, + bitmap1 - native, skip); + ret |= compare("entry", "≈", "2 bitmapped", entry, approx, + bitmap2 - native, skip); ret |= compare("native + entry + (per filter * 4)", "≈", "4 filters total", - entry + (per_filter1 * 4) + native, approx, filter2); - if (ret == 0) - goto out; + entry + (per_filter1 * 4) + native, approx, filter2, + skip); -more_samples: - ksft_print_msg("Saw unexpected benchmark result. Try running again with more samples?\n"); -out: - return 0; + if (ret) + ksft_print_msg("Saw unexpected benchmark result. Try running again with more samples?\n"); + + ksft_finished(); } From 8b1d72395635af45410b66cc4c4ab37a12c4a831 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 20 Jan 2024 15:29:27 +0100 Subject: [PATCH 286/347] parisc: Fix random data corruption from exception handler The current exception handler implementation, which assists when accessing user space memory, may exhibit random data corruption if the compiler decides to use a different register than the specified register %r29 (defined in ASM_EXCEPTIONTABLE_REG) for the error code. If the compiler choose another register, the fault handler will nevertheless store -EFAULT into %r29 and thus trash whatever this register is used for. Looking at the assembly I found that this happens sometimes in emulate_ldd(). To solve the issue, the easiest solution would be if it somehow is possible to tell the fault handler which register is used to hold the error code. Using %0 or %1 in the inline assembly is not posssible as it will show up as e.g. %r29 (with the "%r" prefix), which the GNU assembler can not convert to an integer. This patch takes another, better and more flexible approach: We extend the __ex_table (which is out of the execution path) by one 32-word. In this word we tell the compiler to insert the assembler instruction "or %r0,%r0,%reg", where %reg references the register which the compiler choosed for the error return code. In case of an access failure, the fault handler finds the __ex_table entry and can examine the opcode. The used register is encoded in the lowest 5 bits, and the fault handler can then store -EFAULT into this register. Since we extend the __ex_table to 3 words we can't use the BUILDTIME_TABLE_SORT config option any longer. Signed-off-by: Helge Deller Cc: # v6.0+ --- arch/parisc/Kconfig | 1 - arch/parisc/include/asm/assembly.h | 1 + arch/parisc/include/asm/extable.h | 64 +++++++++++++++++++++++++ arch/parisc/include/asm/special_insns.h | 6 ++- arch/parisc/include/asm/uaccess.h | 48 +++---------------- arch/parisc/kernel/cache.c | 4 +- arch/parisc/kernel/unaligned.c | 44 ++++++++--------- arch/parisc/mm/fault.c | 11 +++-- 8 files changed, 108 insertions(+), 71 deletions(-) create mode 100644 arch/parisc/include/asm/extable.h diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index d14ccc948a29..5c845e8d59d9 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -25,7 +25,6 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select BUILDTIME_TABLE_SORT select HAVE_KERNEL_UNCOMPRESSED select HAVE_PCI select HAVE_PERF_EVENTS diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 74d17d7e759d..5937d5edaba1 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -576,6 +576,7 @@ .section __ex_table,"aw" ! \ .align 4 ! \ .word (fault_addr - .), (except_addr - .) ! \ + or %r0,%r0,%r0 ! \ .previous diff --git a/arch/parisc/include/asm/extable.h b/arch/parisc/include/asm/extable.h new file mode 100644 index 000000000000..4ea23e3d79dc --- /dev/null +++ b/arch/parisc/include/asm/extable.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PARISC_EXTABLE_H +#define __PARISC_EXTABLE_H + +#include +#include + +/* + * The exception table consists of three addresses: + * + * - A relative address to the instruction that is allowed to fault. + * - A relative address at which the program should continue (fixup routine) + * - An asm statement which specifies which CPU register will + * receive -EFAULT when an exception happens if the lowest bit in + * the fixup address is set. + * + * Note: The register specified in the err_opcode instruction will be + * modified at runtime if a fault happens. Register %r0 will be ignored. + * + * Since relative addresses are used, 32bit values are sufficient even on + * 64bit kernel. + */ + +struct pt_regs; +int fixup_exception(struct pt_regs *regs); + +#define ARCH_HAS_RELATIVE_EXTABLE +struct exception_table_entry { + int insn; /* relative address of insn that is allowed to fault. */ + int fixup; /* relative address of fixup routine */ + int err_opcode; /* sample opcode with register which holds error code */ +}; + +#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr, opcode )\ + ".section __ex_table,\"aw\"\n" \ + ".align 4\n" \ + ".word (" #fault_addr " - .), (" #except_addr " - .)\n" \ + opcode "\n" \ + ".previous\n" + +/* + * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry + * (with lowest bit set) for which the fault handler in fixup_exception() will + * load -EFAULT on fault into the register specified by the err_opcode instruction, + * and zeroes the target register in case of a read fault in get_user(). + */ +#define ASM_EXCEPTIONTABLE_VAR(__err_var) \ + int __err_var = 0 +#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr, register )\ + ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1, "or %%r0,%%r0," register) + +static inline void swap_ex_entry_fixup(struct exception_table_entry *a, + struct exception_table_entry *b, + struct exception_table_entry tmp, + int delta) +{ + a->fixup = b->fixup + delta; + b->fixup = tmp.fixup - delta; + a->err_opcode = b->err_opcode; + b->err_opcode = tmp.err_opcode; +} +#define swap_ex_entry_fixup swap_ex_entry_fixup + +#endif diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index c822bd0c0e3c..51f40eaf7780 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -8,7 +8,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ @@ -22,7 +23,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%%sr3,%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 4165079898d9..88d0ae5769dd 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -7,6 +7,7 @@ */ #include #include +#include #include #include @@ -26,37 +27,6 @@ #define STD_USER(sr, x, ptr) __put_user_asm(sr, "std", x, ptr) #endif -/* - * The exception table contains two values: the first is the relative offset to - * the address of the instruction that is allowed to fault, and the second is - * the relative offset to the address of the fixup routine. Since relative - * addresses are used, 32bit values are sufficient even on 64bit kernel. - */ - -#define ARCH_HAS_RELATIVE_EXTABLE -struct exception_table_entry { - int insn; /* relative address of insn that is allowed to fault. */ - int fixup; /* relative address of fixup routine */ -}; - -#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ - ".section __ex_table,\"aw\"\n" \ - ".align 4\n" \ - ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ - ".previous\n" - -/* - * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry - * (with lowest bit set) for which the fault handler in fixup_exception() will - * load -EFAULT into %r29 for a read or write fault, and zeroes the target - * register in case of a read fault in get_user(). - */ -#define ASM_EXCEPTIONTABLE_REG 29 -#define ASM_EXCEPTIONTABLE_VAR(__variable) \ - register long __variable __asm__ ("r29") = 0 -#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\ - ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) - #define __get_user_internal(sr, val, ptr) \ ({ \ ASM_EXCEPTIONTABLE_VAR(__gu_err); \ @@ -83,7 +53,7 @@ struct exception_table_entry { \ __asm__("1: " ldx " 0(%%sr%2,%3),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ : "=r"(__gu_val), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -115,8 +85,8 @@ struct exception_table_entry { "1: ldw 0(%%sr%2,%3),%0\n" \ "2: ldw 4(%%sr%2,%3),%R0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%1") \ : "=&r"(__gu_tmp.l), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -174,7 +144,7 @@ struct exception_table_entry { __asm__ __volatile__ ( \ "1: " stx " %1,0(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(x), "i"(sr), "r"(ptr)) @@ -186,15 +156,14 @@ struct exception_table_entry { "1: stw %1,0(%%sr%2,%3)\n" \ "2: stw %R1,4(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(__val), "i"(sr), "r"(ptr)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ - /* * Complex access routines -- external declarations */ @@ -216,7 +185,4 @@ unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src, #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER -struct pt_regs; -int fixup_exception(struct pt_regs *regs); - #endif /* __PARISC_UACCESS_H */ diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 0c015487e5db..5552602fcaef 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -854,7 +854,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, #endif " fic,m %3(%4,%0)\n" "2: sync\n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1") : "+r" (start), "+r" (error) : "r" (end), "r" (dcache_stride), "i" (SR_USER)); } @@ -869,7 +869,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, #endif " fdc,m %3(%4,%0)\n" "2: sync\n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1") : "+r" (start), "+r" (error) : "r" (end), "r" (icache_stride), "i" (SR_USER)); } diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index ce25acfe4889..c520e551a165 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -120,8 +120,8 @@ static int emulate_ldh(struct pt_regs *regs, int toreg) "2: ldbs 1(%%sr1,%3), %0\n" " depw %2, 23, 24, %0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1) : "r" (saddr), "r" (regs->isr) ); @@ -152,8 +152,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) " mtctl %2,11\n" " vshd %0,%3,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2) : "r" (saddr), "r" (regs->isr) ); @@ -189,8 +189,8 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " mtsar %%r19\n" " shrpd %0,%%r20,%%sar,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "=r" (val), "+r" (ret) : "0" (val), "r" (saddr), "r" (regs->isr) : "r19", "r20" ); @@ -209,9 +209,9 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " vshd %0,%R0,%0\n" " vshd %R0,%4,%R0\n" "4: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1") : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) : "r" (regs->isr) ); } @@ -244,8 +244,8 @@ static int emulate_sth(struct pt_regs *regs, int frreg) "1: stb %1, 0(%%sr1, %3)\n" "2: stb %2, 1(%%sr1, %3)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret), "=&r" (temp1) : "r" (val), "r" (regs->ior), "r" (regs->isr) ); @@ -285,8 +285,8 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) " stw %%r20,0(%%sr1,%2)\n" " stw %%r21,4(%%sr1,%2)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -329,10 +329,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "3: std %%r20,0(%%sr1,%2)\n" "4: std %%r21,8(%%sr1,%2)\n" "5: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -357,11 +357,11 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "4: stw %%r1,4(%%sr1,%2)\n" "5: stw %R1,8(%%sr1,%2)\n" "6: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r1" ); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 2fe5b44986e0..c39de84e98b0 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -150,11 +150,16 @@ int fixup_exception(struct pt_regs *regs) * Fix up get_user() and put_user(). * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant * bit in the relative address of the fixup routine to indicate - * that gr[ASM_EXCEPTIONTABLE_REG] should be loaded with - * -EFAULT to report a userspace access error. + * that the register encoded in the "or %r0,%r0,register" + * opcode should be loaded with -EFAULT to report a userspace + * access error. */ if (fix->fixup & 1) { - regs->gr[ASM_EXCEPTIONTABLE_REG] = -EFAULT; + int fault_error_reg = fix->err_opcode & 0x1f; + if (!WARN_ON(!fault_error_reg)) + regs->gr[fault_error_reg] = -EFAULT; + pr_debug("Unalignment fixup of register %d at %pS\n", + fault_error_reg, (void*)regs->iaoq[0]); /* zero target register for get_user() */ if (parisc_acctyp(0, regs->iir) == VM_READ) { From 53ed2ac8fc1de6658aadae5714627ac99b9dddb0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 30 Jan 2024 11:34:49 -0800 Subject: [PATCH 287/347] soc: apple: mailbox: error pointers are negative integers In an entirely unrelated discussion where I pointed out a stupid thinko of mine, Rasmus piped up and noted that that obvious mistake already existed elsewhere in the kernel tree. An "error pointer" is the negative error value encoded as a pointer, making the whole "return error or valid pointer" use-case simple and straightforward. We use it all over the kernel. But the key here is that errors are _negative_ error numbers, not the horrid UNIX user-level model of "-1 and the value of 'errno'". The Apple mailbox driver used the positive error values, and thus just returned invalid normal pointers instead of actual errors. Of course, the reason nobody ever noticed is that the errors presumably never actually happen, so this is fixing a conceptual bug rather than an actual one. Reported-by: Rasmus Villemoes Link: https://lore.kernel.org/all/5c30afe0-f9fb-45d5-9333-dd914a1ea93a@prevas.dk/ Signed-off-by: Linus Torvalds --- drivers/soc/apple/mailbox.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/soc/apple/mailbox.c b/drivers/soc/apple/mailbox.c index 780199bf351e..49a0955e82d6 100644 --- a/drivers/soc/apple/mailbox.c +++ b/drivers/soc/apple/mailbox.c @@ -296,14 +296,14 @@ struct apple_mbox *apple_mbox_get(struct device *dev, int index) of_node_put(args.np); if (!pdev) - return ERR_PTR(EPROBE_DEFER); + return ERR_PTR(-EPROBE_DEFER); mbox = platform_get_drvdata(pdev); if (!mbox) - return ERR_PTR(EPROBE_DEFER); + return ERR_PTR(-EPROBE_DEFER); if (!device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_CONSUMER)) - return ERR_PTR(ENODEV); + return ERR_PTR(-ENODEV); return mbox; } From 5a287d3d2b9de2b3e747132c615599907ba5c3c1 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 26 Jan 2024 19:45:31 +0100 Subject: [PATCH 288/347] lsm: fix default return value of the socket_getpeersec_*() hooks For these hooks the true "neutral" value is -EOPNOTSUPP, which is currently what is returned when no LSM provides this hook and what LSMs return when there is no security context set on the socket. Correct the value in and adjust the dispatch functions in security/security.c to avoid issues when the BPF LSM is enabled. Cc: stable@vger.kernel.org Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Signed-off-by: Ondrej Mosnacek [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 4 ++-- security/security.c | 31 +++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 185924c56378..76458b6d53da 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -315,9 +315,9 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) -LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) -LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock, struct sk_buff *skb, u32 *secid) LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk) diff --git a/security/security.c b/security/security.c index 6196ccaba433..3aaad75c9ce8 100644 --- a/security/security.c +++ b/security/security.c @@ -4624,8 +4624,20 @@ EXPORT_SYMBOL(security_sock_rcv_skb); int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) { - return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock, - optval, optlen, len); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_stream, + list) { + rc = hp->hook.socket_getpeersec_stream(sock, optval, optlen, + len); + if (rc != LSM_RET_DEFAULT(socket_getpeersec_stream)) + return rc; + } + return LSM_RET_DEFAULT(socket_getpeersec_stream); } /** @@ -4645,8 +4657,19 @@ int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) { - return call_int_hook(socket_getpeersec_dgram, -ENOPROTOOPT, sock, - skb, secid); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_dgram, + list) { + rc = hp->hook.socket_getpeersec_dgram(sock, skb, secid); + if (rc != LSM_RET_DEFAULT(socket_getpeersec_dgram)) + return rc; + } + return LSM_RET_DEFAULT(socket_getpeersec_dgram); } EXPORT_SYMBOL(security_socket_getpeersec_dgram); From b40f873a7c80dbafbb6f4a7a569f2dcaf969d283 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 29 Jan 2024 14:37:03 +0200 Subject: [PATCH 289/347] selftests: net: Add missing matchall classifier One of the test cases in the test_bridge_backup_port.sh selftest relies on a matchall classifier to drop unrelated traffic so that the Tx drop counter on the VXLAN device will only be incremented as a result of traffic generated by the test. However, the configuration option for the matchall classifier is missing from the configuration file which might explain the failures we see in the netdev CI [1]. Fix by adding CONFIG_NET_CLS_MATCHALL to the configuration file. [1] # Backup nexthop ID - invalid IDs # ------------------------------- [...] # TEST: Forwarding out of vx0 [ OK ] # TEST: No forwarding using backup nexthop ID [ OK ] # TEST: Tx drop increased [FAIL] # TEST: IPv6 address family nexthop as backup nexthop [ OK ] # TEST: No forwarding out of swp1 [ OK ] # TEST: Forwarding out of vx0 [ OK ] # TEST: No forwarding using backup nexthop ID [ OK ] # TEST: Tx drop increased [FAIL] [...] Fixes: b408453053fb ("selftests: net: Add bridge backup port and backup nexthop ID test") Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240129123703.1857843-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index d4b38177ed04..3d908b52f22f 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -52,6 +52,7 @@ CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_U32=m CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m From f5c3eb4b7251baba5cd72c9e93920e710ac8194a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Sat, 27 Jan 2024 18:50:32 +0100 Subject: [PATCH 290/347] bridge: mcast: fix disabled snooping after long uptime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original idea of the delay_time check was to not apply multicast snooping too early when an MLD querier appears. And to instead wait at least for MLD reports to arrive before switching from flooding to group based, MLD snooped forwarding, to avoid temporary packet loss. However in a batman-adv mesh network it was noticed that after 248 days of uptime 32bit MIPS based devices would start to signal that they had stopped applying multicast snooping due to missing queriers - even though they were the elected querier and still sending MLD queries themselves. While time_is_before_jiffies() generally is safe against jiffies wrap-arounds, like the code comments in jiffies.h explain, it won't be able to track a difference larger than ULONG_MAX/2. With a 32bit large jiffies and one jiffies tick every 10ms (CONFIG_HZ=100) on these MIPS devices running OpenWrt this would result in a difference larger than ULONG_MAX/2 after 248 (= 2^32/100/60/60/24/2) days and time_is_before_jiffies() would then start to return false instead of true. Leading to multicast snooping not being applied to multicast packets anymore. Fix this issue by using a proper timer_list object which won't have this ULONG_MAX/2 difference limitation. Fixes: b00589af3b04 ("bridge: disable snooping if there is no querier") Signed-off-by: Linus Lüssing Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240127175033.9640-1-linus.luessing@c0d3.blue Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 20 +++++++++++++++----- net/bridge/br_private.h | 4 ++-- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d7d021af1029..2d7b73242958 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1762,6 +1762,10 @@ static void br_ip6_multicast_querier_expired(struct timer_list *t) } #endif +static void br_multicast_query_delay_expired(struct timer_list *t) +{ +} + static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx, struct br_ip *ip, struct sk_buff *skb) @@ -3198,7 +3202,7 @@ br_multicast_update_query_timer(struct net_bridge_mcast *brmctx, unsigned long max_delay) { if (!timer_pending(&query->timer)) - query->delay_time = jiffies + max_delay; + mod_timer(&query->delay_timer, jiffies + max_delay); mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval); } @@ -4041,13 +4045,11 @@ void br_multicast_ctx_init(struct net_bridge *br, brmctx->multicast_querier_interval = 255 * HZ; brmctx->multicast_membership_interval = 260 * HZ; - brmctx->ip4_other_query.delay_time = 0; brmctx->ip4_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock); brmctx->multicast_igmp_version = 2; #if IS_ENABLED(CONFIG_IPV6) brmctx->multicast_mld_version = 1; - brmctx->ip6_other_query.delay_time = 0; brmctx->ip6_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock); #endif @@ -4056,6 +4058,8 @@ void br_multicast_ctx_init(struct net_bridge *br, br_ip4_multicast_local_router_expired, 0); timer_setup(&brmctx->ip4_other_query.timer, br_ip4_multicast_querier_expired, 0); + timer_setup(&brmctx->ip4_other_query.delay_timer, + br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip4_own_query.timer, br_ip4_multicast_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) @@ -4063,6 +4067,8 @@ void br_multicast_ctx_init(struct net_bridge *br, br_ip6_multicast_local_router_expired, 0); timer_setup(&brmctx->ip6_other_query.timer, br_ip6_multicast_querier_expired, 0); + timer_setup(&brmctx->ip6_other_query.delay_timer, + br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip6_own_query.timer, br_ip6_multicast_query_expired, 0); #endif @@ -4197,10 +4203,12 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx) { del_timer_sync(&brmctx->ip4_mc_router_timer); del_timer_sync(&brmctx->ip4_other_query.timer); + del_timer_sync(&brmctx->ip4_other_query.delay_timer); del_timer_sync(&brmctx->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) del_timer_sync(&brmctx->ip6_mc_router_timer); del_timer_sync(&brmctx->ip6_other_query.timer); + del_timer_sync(&brmctx->ip6_other_query.delay_timer); del_timer_sync(&brmctx->ip6_own_query.timer); #endif } @@ -4643,13 +4651,15 @@ int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val) max_delay = brmctx->multicast_query_response_interval; if (!timer_pending(&brmctx->ip4_other_query.timer)) - brmctx->ip4_other_query.delay_time = jiffies + max_delay; + mod_timer(&brmctx->ip4_other_query.delay_timer, + jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) if (!timer_pending(&brmctx->ip6_other_query.timer)) - brmctx->ip6_other_query.delay_time = jiffies + max_delay; + mod_timer(&brmctx->ip6_other_query.delay_timer, + jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip6_own_query); #endif diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index b0a92c344722..86ea5e6689b5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -78,7 +78,7 @@ struct bridge_mcast_own_query { /* other querier */ struct bridge_mcast_other_query { struct timer_list timer; - unsigned long delay_time; + struct timer_list delay_timer; }; /* selected querier */ @@ -1159,7 +1159,7 @@ __br_multicast_querier_exists(struct net_bridge_mcast *brmctx, own_querier_enabled = false; } - return time_is_before_jiffies(querier->delay_time) && + return !timer_pending(&querier->delay_timer) && (own_querier_enabled || timer_pending(&querier->timer)); } From 1a89e24f8bfd3e3562d69709c9d9cd185ded869b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 29 Jan 2024 21:10:59 +0200 Subject: [PATCH 291/347] devlink: Fix referring to hw_addr attribute during state validation When port function state change is requested, and when the driver does not support it, it refers to the hw address attribute instead of state attribute. Seems like a copy paste error. Fix it by referring to the port function state attribute. Fixes: c0bea69d1ca7 ("devlink: Validate port function request") Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240129191059.129030-1-parav@nvidia.com Signed-off-by: Jakub Kicinski --- net/devlink/port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/devlink/port.c b/net/devlink/port.c index 62e54e152ecf..78592912f657 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -674,7 +674,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port, return -EOPNOTSUPP; } if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) { - NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR], + NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_STATE], "Function does not support state setting"); return -EOPNOTSUPP; } From 7cdd2108903a4e369eb37579830afc12a6877ec2 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 24 Jan 2024 12:26:57 +0100 Subject: [PATCH 292/347] HID: bpf: remove double fdget() When the kfunc hid_bpf_attach_prog() is called, we called twice fdget(): one for fetching the type of the bpf program, and one for actually attaching the program to the device. The problem is that between those two calls, we have no guarantees that the prog_fd is still the same file descriptor for the given program. Solve this by calling bpf_prog_get() earlier, and use this to fetch the program type. Reported-by: Dan Carpenter Link: https://lore.kernel.org/bpf/CAO-hwJJ8vh8JD3-P43L-_CLNmPx0hWj44aom0O838vfP4=_1CA@mail.gmail.com/T/#t Cc: Fixes: f5c27da4e3c8 ("HID: initial BPF implementation") Link: https://lore.kernel.org/r/20240124-b4-hid-bpf-fixes-v2-1-052520b1e5e6@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_dispatch.c | 66 +++++++++++++++++++---------- drivers/hid/bpf/hid_bpf_dispatch.h | 4 +- drivers/hid/bpf/hid_bpf_jmp_table.c | 20 ++------- 3 files changed, 49 insertions(+), 41 deletions(-) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index d9ef45fcaeab..5111d1fef0d3 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -241,6 +241,39 @@ int hid_bpf_reconnect(struct hid_device *hdev) return 0; } +static int do_hid_bpf_attach_prog(struct hid_device *hdev, int prog_fd, struct bpf_prog *prog, + __u32 flags) +{ + int fd, err, prog_type; + + prog_type = hid_bpf_get_prog_attach_type(prog); + if (prog_type < 0) + return prog_type; + + if (prog_type >= HID_BPF_PROG_TYPE_MAX) + return -EINVAL; + + if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) { + err = hid_bpf_allocate_event_data(hdev); + if (err) + return err; + } + + fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, prog, flags); + if (fd < 0) + return fd; + + if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) { + err = hid_bpf_reconnect(hdev); + if (err) { + close_fd(fd); + return err; + } + } + + return fd; +} + /** * hid_bpf_attach_prog - Attach the given @prog_fd to the given HID device * @@ -257,18 +290,13 @@ noinline int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) { struct hid_device *hdev; + struct bpf_prog *prog; struct device *dev; - int fd, err, prog_type = hid_bpf_get_prog_attach_type(prog_fd); + int fd; if (!hid_bpf_ops) return -EINVAL; - if (prog_type < 0) - return prog_type; - - if (prog_type >= HID_BPF_PROG_TYPE_MAX) - return -EINVAL; - if ((flags & ~HID_BPF_FLAG_MASK)) return -EINVAL; @@ -278,23 +306,17 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) hdev = to_hid_device(dev); - if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) { - err = hid_bpf_allocate_event_data(hdev); - if (err) - return err; - } + /* + * take a ref on the prog itself, it will be released + * on errors or when it'll be detached + */ + prog = bpf_prog_get(prog_fd); + if (IS_ERR(prog)) + return PTR_ERR(prog); - fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, flags); + fd = do_hid_bpf_attach_prog(hdev, prog_fd, prog, flags); if (fd < 0) - return fd; - - if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) { - err = hid_bpf_reconnect(hdev); - if (err) { - close_fd(fd); - return err; - } - } + bpf_prog_put(prog); return fd; } diff --git a/drivers/hid/bpf/hid_bpf_dispatch.h b/drivers/hid/bpf/hid_bpf_dispatch.h index 63dfc8605cd2..fbe0639d09f2 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.h +++ b/drivers/hid/bpf/hid_bpf_dispatch.h @@ -12,9 +12,9 @@ struct hid_bpf_ctx_kern { int hid_bpf_preload_skel(void); void hid_bpf_free_links_and_skel(void); -int hid_bpf_get_prog_attach_type(int prog_fd); +int hid_bpf_get_prog_attach_type(struct bpf_prog *prog); int __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, int prog_fd, - __u32 flags); + struct bpf_prog *prog, __u32 flags); void __hid_bpf_destroy_device(struct hid_device *hdev); int hid_bpf_prog_run(struct hid_device *hdev, enum hid_bpf_prog_type type, struct hid_bpf_ctx_kern *ctx_kern); diff --git a/drivers/hid/bpf/hid_bpf_jmp_table.c b/drivers/hid/bpf/hid_bpf_jmp_table.c index eca34b7372f9..12f7cebddd73 100644 --- a/drivers/hid/bpf/hid_bpf_jmp_table.c +++ b/drivers/hid/bpf/hid_bpf_jmp_table.c @@ -333,15 +333,10 @@ static int hid_bpf_insert_prog(int prog_fd, struct bpf_prog *prog) return err; } -int hid_bpf_get_prog_attach_type(int prog_fd) +int hid_bpf_get_prog_attach_type(struct bpf_prog *prog) { - struct bpf_prog *prog = NULL; - int i; int prog_type = HID_BPF_PROG_TYPE_UNDEF; - - prog = bpf_prog_get(prog_fd); - if (IS_ERR(prog)) - return PTR_ERR(prog); + int i; for (i = 0; i < HID_BPF_PROG_TYPE_MAX; i++) { if (hid_bpf_btf_ids[i] == prog->aux->attach_btf_id) { @@ -350,8 +345,6 @@ int hid_bpf_get_prog_attach_type(int prog_fd) } } - bpf_prog_put(prog); - return prog_type; } @@ -388,19 +381,13 @@ static const struct bpf_link_ops hid_bpf_link_lops = { /* called from syscall */ noinline int __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, - int prog_fd, __u32 flags) + int prog_fd, struct bpf_prog *prog, __u32 flags) { struct bpf_link_primer link_primer; struct hid_bpf_link *link; - struct bpf_prog *prog = NULL; struct hid_bpf_prog_entry *prog_entry; int cnt, err = -EINVAL, prog_table_idx = -1; - /* take a ref on the prog itself */ - prog = bpf_prog_get(prog_fd); - if (IS_ERR(prog)) - return PTR_ERR(prog); - mutex_lock(&hid_bpf_attach_lock); link = kzalloc(sizeof(*link), GFP_USER); @@ -467,7 +454,6 @@ __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, err_unlock: mutex_unlock(&hid_bpf_attach_lock); - bpf_prog_put(prog); kfree(link); return err; From 89be8aa5b0ecb3b729c7bcff64bb2af7921fec63 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 24 Jan 2024 12:26:58 +0100 Subject: [PATCH 293/347] HID: bpf: actually free hdev memory after attaching a HID-BPF program Turns out that I got my reference counts wrong and each successful bus_find_device() actually calls get_device(), and we need to manually call put_device(). Ensure each bus_find_device() gets a matching put_device() when releasing the bpf programs and fix all the error paths. Cc: Fixes: f5c27da4e3c8 ("HID: initial BPF implementation") Link: https://lore.kernel.org/r/20240124-b4-hid-bpf-fixes-v2-2-052520b1e5e6@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_dispatch.c | 29 +++++++++++++++++++++++------ drivers/hid/bpf/hid_bpf_jmp_table.c | 20 +++++++++++++++++--- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 5111d1fef0d3..7903c8638e81 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -292,7 +292,7 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) struct hid_device *hdev; struct bpf_prog *prog; struct device *dev; - int fd; + int err, fd; if (!hid_bpf_ops) return -EINVAL; @@ -311,14 +311,24 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) * on errors or when it'll be detached */ prog = bpf_prog_get(prog_fd); - if (IS_ERR(prog)) - return PTR_ERR(prog); + if (IS_ERR(prog)) { + err = PTR_ERR(prog); + goto out_dev_put; + } fd = do_hid_bpf_attach_prog(hdev, prog_fd, prog, flags); - if (fd < 0) - bpf_prog_put(prog); + if (fd < 0) { + err = fd; + goto out_prog_put; + } return fd; + + out_prog_put: + bpf_prog_put(prog); + out_dev_put: + put_device(dev); + return err; } /** @@ -345,8 +355,10 @@ hid_bpf_allocate_context(unsigned int hid_id) hdev = to_hid_device(dev); ctx_kern = kzalloc(sizeof(*ctx_kern), GFP_KERNEL); - if (!ctx_kern) + if (!ctx_kern) { + put_device(dev); return NULL; + } ctx_kern->ctx.hid = hdev; @@ -363,10 +375,15 @@ noinline void hid_bpf_release_context(struct hid_bpf_ctx *ctx) { struct hid_bpf_ctx_kern *ctx_kern; + struct hid_device *hid; ctx_kern = container_of(ctx, struct hid_bpf_ctx_kern, ctx); + hid = (struct hid_device *)ctx_kern->ctx.hid; /* ignore const */ kfree(ctx_kern); + + /* get_device() is called by bus_find_device() */ + put_device(&hid->dev); } /** diff --git a/drivers/hid/bpf/hid_bpf_jmp_table.c b/drivers/hid/bpf/hid_bpf_jmp_table.c index 12f7cebddd73..aa8e1c79cdf5 100644 --- a/drivers/hid/bpf/hid_bpf_jmp_table.c +++ b/drivers/hid/bpf/hid_bpf_jmp_table.c @@ -196,6 +196,7 @@ static void __hid_bpf_do_release_prog(int map_fd, unsigned int idx) static void hid_bpf_release_progs(struct work_struct *work) { int i, j, n, map_fd = -1; + bool hdev_destroyed; if (!jmp_table.map) return; @@ -220,6 +221,12 @@ static void hid_bpf_release_progs(struct work_struct *work) if (entry->hdev) { hdev = entry->hdev; type = entry->type; + /* + * hdev is still valid, even if we are called after hid_destroy_device(): + * when hid_bpf_attach() gets called, it takes a ref on the dev through + * bus_find_device() + */ + hdev_destroyed = hdev->bpf.destroyed; hid_bpf_populate_hdev(hdev, type); @@ -232,12 +239,19 @@ static void hid_bpf_release_progs(struct work_struct *work) if (test_bit(next->idx, jmp_table.enabled)) continue; - if (next->hdev == hdev && next->type == type) + if (next->hdev == hdev && next->type == type) { + /* + * clear the hdev reference and decrement the device ref + * that was taken during bus_find_device() while calling + * hid_bpf_attach() + */ next->hdev = NULL; + put_device(&hdev->dev); + } } - /* if type was rdesc fixup, reconnect device */ - if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP) + /* if type was rdesc fixup and the device is not gone, reconnect device */ + if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP && !hdev_destroyed) hid_bpf_reconnect(hdev); } } From 764ad6b02777d77dca3659ca490f0898aa593670 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 24 Jan 2024 12:26:59 +0100 Subject: [PATCH 294/347] HID: bpf: use __bpf_kfunc instead of noinline Follow the docs at Documentation/bpf/kfuncs.rst: - declare the function with `__bpf_kfunc` - disables missing prototype warnings, which allows to remove them from include/linux/hid-bpf.h Removing the prototypes is not an issue because we currently have to redeclare them when writing the BPF program. They will eventually be generated by bpftool directly AFAIU. Link: https://lore.kernel.org/r/20240124-b4-hid-bpf-fixes-v2-3-052520b1e5e6@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_dispatch.c | 18 +++++++++++++----- include/linux/hid_bpf.h | 11 ----------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 7903c8638e81..470ae2c29c94 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -143,6 +143,9 @@ u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *s } EXPORT_SYMBOL_GPL(call_hid_bpf_rdesc_fixup); +/* Disables missing prototype warnings */ +__bpf_kfunc_start_defs(); + /** * hid_bpf_get_data - Get the kernel memory pointer associated with the context @ctx * @@ -152,7 +155,7 @@ EXPORT_SYMBOL_GPL(call_hid_bpf_rdesc_fixup); * * @returns %NULL on error, an %__u8 memory pointer on success */ -noinline __u8 * +__bpf_kfunc __u8 * hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr_buf_size) { struct hid_bpf_ctx_kern *ctx_kern; @@ -167,6 +170,7 @@ hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr return ctx_kern->data + offset; } +__bpf_kfunc_end_defs(); /* * The following set contains all functions we agree BPF programs @@ -274,6 +278,9 @@ static int do_hid_bpf_attach_prog(struct hid_device *hdev, int prog_fd, struct b return fd; } +/* Disables missing prototype warnings */ +__bpf_kfunc_start_defs(); + /** * hid_bpf_attach_prog - Attach the given @prog_fd to the given HID device * @@ -286,7 +293,7 @@ static int do_hid_bpf_attach_prog(struct hid_device *hdev, int prog_fd, struct b * is pinned to the BPF file system). */ /* called from syscall */ -noinline int +__bpf_kfunc int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) { struct hid_device *hdev; @@ -338,7 +345,7 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) * * @returns A pointer to &struct hid_bpf_ctx on success, %NULL on error. */ -noinline struct hid_bpf_ctx * +__bpf_kfunc struct hid_bpf_ctx * hid_bpf_allocate_context(unsigned int hid_id) { struct hid_device *hdev; @@ -371,7 +378,7 @@ hid_bpf_allocate_context(unsigned int hid_id) * @ctx: the HID-BPF context to release * */ -noinline void +__bpf_kfunc void hid_bpf_release_context(struct hid_bpf_ctx *ctx) { struct hid_bpf_ctx_kern *ctx_kern; @@ -397,7 +404,7 @@ hid_bpf_release_context(struct hid_bpf_ctx *ctx) * * @returns %0 on success, a negative error code otherwise. */ -noinline int +__bpf_kfunc int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, enum hid_report_type rtype, enum hid_class_request reqtype) { @@ -465,6 +472,7 @@ hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, kfree(dma_data); return ret; } +__bpf_kfunc_end_defs(); /* our HID-BPF entrypoints */ BTF_SET8_START(hid_bpf_fmodret_ids) diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 840cd254172d..7118ac28d468 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -77,17 +77,6 @@ enum hid_bpf_attach_flags { int hid_bpf_device_event(struct hid_bpf_ctx *ctx); int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx); -/* Following functions are kfunc that we export to BPF programs */ -/* available everywhere in HID-BPF */ -__u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz); - -/* only available in syscall */ -int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags); -int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, - enum hid_report_type rtype, enum hid_class_request reqtype); -struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id); -void hid_bpf_release_context(struct hid_bpf_ctx *ctx); - /* * Below is HID internal */ From 913b9d443a0180cf0de3548f1ab3149378998486 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 31 Jan 2024 13:37:25 +0100 Subject: [PATCH 295/347] parisc: BTLB: Fix crash when setting up BTLB at CPU bringup When using hotplug and bringing up a 32-bit CPU, ask the firmware about the BTLB information to set up the static (block) TLB entries. For that write access to the static btlb_info struct is needed, but since it is marked __ro_after_init the kernel segfaults with missing write permissions. Fix the crash by dropping the __ro_after_init annotation. Fixes: e5ef93d02d6c ("parisc: BTLB: Initialize BTLB tables at CPU startup") Signed-off-by: Helge Deller Cc: # v6.6+ --- arch/parisc/kernel/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 5552602fcaef..422f3e1e6d9c 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -58,7 +58,7 @@ int pa_serialize_tlb_flushes __ro_after_init; struct pdc_cache_info cache_info __ro_after_init; #ifndef CONFIG_PA20 -struct pdc_btlb_info btlb_info __ro_after_init; +struct pdc_btlb_info btlb_info; #endif DEFINE_STATIC_KEY_TRUE(parisc_has_cache); From 89876175c8c83c35cf0cc8e21b7460dfed7b118a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 20 Jan 2024 17:32:55 +0900 Subject: [PATCH 296/347] kbuild: fix W= flags in the help message W=c and W=e are supported. Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9869f57c3fb3..02e3500c2c17 100644 --- a/Makefile +++ b/Makefile @@ -1662,7 +1662,7 @@ help: @echo ' (sparse by default)' @echo ' make C=2 [targets] Force check of all c source with $$CHECK' @echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections' - @echo ' make W=n [targets] Enable extra build checks, n=1,2,3 where' + @echo ' make W=n [targets] Enable extra build checks, n=1,2,3,c,e where' @echo ' 1: warnings which may be relevant and do not occur too often' @echo ' 2: warnings which occur quite often but may still be relevant' @echo ' 3: more obscure warnings, can most likely be ignored' From cda5f94e88b45c9209599bac15fc44add5a59f60 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 27 Jan 2024 22:28:11 +0900 Subject: [PATCH 297/347] modpost: avoid using the alias attribute Aiden Leong reported modpost fails to build on macOS since commit 16a473f60edc ("modpost: inform compilers that fatal() never returns"): scripts/mod/modpost.c:93:21: error: aliases are not supported on darwin Nathan's research indicates that Darwin seems to support weak aliases at least [1]. Although the situation might be improved in future Clang versions, we can achieve a similar outcome without relying on it. This commit makes fatal() a macro of error() + exit(1) in modpost.h, as compilers recognize that exit() never returns. [1]: https://github.com/llvm/llvm-project/issues/71001 Fixes: 16a473f60edc ("modpost: inform compilers that fatal() never returns") Reported-by: Aiden Leong Closes: https://lore.kernel.org/all/d9ac2960-6644-4a87-b5e4-4bfb6e0364a8@aibsd.com/ Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 12 +----------- scripts/mod/modpost.h | 6 +----- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index acf72112acd8..267b9a0a3abc 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -70,9 +70,7 @@ void modpost_log(enum loglevel loglevel, const char *fmt, ...) break; case LOG_ERROR: fprintf(stderr, "ERROR: "); - break; - case LOG_FATAL: - fprintf(stderr, "FATAL: "); + error_occurred = true; break; default: /* invalid loglevel, ignore */ break; @@ -83,16 +81,8 @@ void modpost_log(enum loglevel loglevel, const char *fmt, ...) va_start(arglist, fmt); vfprintf(stderr, fmt, arglist); va_end(arglist); - - if (loglevel == LOG_FATAL) - exit(1); - if (loglevel == LOG_ERROR) - error_occurred = true; } -void __attribute__((alias("modpost_log"))) -modpost_log_noret(enum loglevel loglevel, const char *fmt, ...); - static inline bool strends(const char *str, const char *postfix) { if (strlen(str) < strlen(postfix)) diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index 835cababf1b0..ee43c7950636 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -194,15 +194,11 @@ void *sym_get_data(const struct elf_info *info, const Elf_Sym *sym); enum loglevel { LOG_WARN, LOG_ERROR, - LOG_FATAL }; void __attribute__((format(printf, 2, 3))) modpost_log(enum loglevel loglevel, const char *fmt, ...); -void __attribute__((format(printf, 2, 3), noreturn)) -modpost_log_noret(enum loglevel loglevel, const char *fmt, ...); - /* * warn - show the given message, then let modpost continue running, still * allowing modpost to exit successfully. This should be used when @@ -218,4 +214,4 @@ modpost_log_noret(enum loglevel loglevel, const char *fmt, ...); */ #define warn(fmt, args...) modpost_log(LOG_WARN, fmt, ##args) #define error(fmt, args...) modpost_log(LOG_ERROR, fmt, ##args) -#define fatal(fmt, args...) modpost_log_noret(LOG_FATAL, fmt, ##args) +#define fatal(fmt, args...) do { error(fmt, ##args); exit(1); } while (1) From 82175d1f9430d5a026e2231782d13da0bf57155c Mon Sep 17 00:00:00 2001 From: Dmitry Goncharov Date: Sun, 28 Jan 2024 10:10:39 -0500 Subject: [PATCH 298/347] kbuild: Replace tabs with spaces when followed by conditionals This is needed for the future (post make-4.4.1) versions of gnu make. Starting from https://git.savannah.gnu.org/cgit/make.git/commit/?id=07fcee35f058a876447c8a021f9eb1943f902534 gnu make won't allow conditionals to follow recipe prefix. For example there is a tab followed by ifeq on line 324 in the root Makefile. With the new make this conditional causes the following $ make cpu.o /home/dgoncharov/src/linux-kbuild/Makefile:2063: *** missing 'endif'. Stop. make: *** [Makefile:240: __sub-make] Error 2 This patch replaces tabs followed by conditionals with 8 spaces. See https://savannah.gnu.org/bugs/?64185 and https://savannah.gnu.org/bugs/?64259 for details. Signed-off-by: Dmitry Goncharov Reported-by: Martin Dorey Reviewed-by: Miguel Ojeda Signed-off-by: Masahiro Yamada --- Makefile | 12 ++++++------ arch/m68k/Makefile | 4 ++-- arch/parisc/Makefile | 4 ++-- arch/x86/Makefile | 10 +++++----- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 02e3500c2c17..087c9c64f611 100644 --- a/Makefile +++ b/Makefile @@ -294,15 +294,15 @@ may-sync-config := 1 single-build := ifneq ($(filter $(no-dot-config-targets), $(MAKECMDGOALS)),) - ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) + ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) need-config := - endif + endif endif ifneq ($(filter $(no-sync-config-targets), $(MAKECMDGOALS)),) - ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),) + ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),) may-sync-config := - endif + endif endif need-compiler := $(may-sync-config) @@ -323,9 +323,9 @@ endif # We cannot build single targets and the others at the same time ifneq ($(filter $(single-targets), $(MAKECMDGOALS)),) single-build := 1 - ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),) + ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),) mixed-build := 1 - endif + endif endif # For "make -j clean all", "make -j mrproper defconfig all", etc. diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index 43e39040d3ac..76ef1a67c361 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -15,10 +15,10 @@ KBUILD_DEFCONFIG := multi_defconfig ifdef cross_compiling - ifeq ($(CROSS_COMPILE),) + ifeq ($(CROSS_COMPILE),) CROSS_COMPILE := $(call cc-cross-prefix, \ m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-) - endif + endif endif # diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 920db57b6b4c..7486b3b30594 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -50,12 +50,12 @@ export CROSS32CC # Set default cross compiler for kernel build ifdef cross_compiling - ifeq ($(CROSS_COMPILE),) + ifeq ($(CROSS_COMPILE),) CC_SUFFIXES = linux linux-gnu unknown-linux-gnu suse-linux CROSS_COMPILE := $(call cc-cross-prefix, \ $(foreach a,$(CC_ARCHES), \ $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-))) - endif + endif endif ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1a068de12a56..2264db14a25d 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -112,13 +112,13 @@ ifeq ($(CONFIG_X86_32),y) # temporary until string.h is fixed KBUILD_CFLAGS += -ffreestanding - ifeq ($(CONFIG_STACKPROTECTOR),y) - ifeq ($(CONFIG_SMP),y) + ifeq ($(CONFIG_STACKPROTECTOR),y) + ifeq ($(CONFIG_SMP),y) KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - else + else KBUILD_CFLAGS += -mstack-protector-guard=global - endif - endif + endif + endif else BITS := 64 UTS_MACHINE := x86_64 From 358de8b4f201bc05712484b15f0109b1ae3516a8 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Mon, 29 Jan 2024 10:28:19 +0100 Subject: [PATCH 299/347] kbuild: rpm-pkg: simplify installkernel %post The new installkernel application that is now included in systemd-udev package allows installation although destination files are already present in the boot directory of the kernel package, but is failing with the implemented workaround for the old installkernel application from grubby package. For the new installkernel application, as Davide says: <> But we need to keep the old behavior as well, because the old installkernel application from grubby package, does not allow this simplification and we need to be backward compatible to avoid issues with the different packages. Mimic Fedora shipping process and store vmlinuz, config amd System.map in the module directory instead of the boot directory. In this way, we will avoid the commented problem for all the cases, because the new destination files are not going to exist in the boot directory of the kernel package. Replace installkernel tool with kernel-install tool, because the latter is more complete. Besides, after installkernel tool execution, check to complete if the correct package files vmlinuz, System.map and config files are present in /boot directory, and if necessary, copy manually for install operation. In this way, take into account if files were not previously copied from /usr/lib/kernel/install.d/* scripts and if the suitable files for the requested package are present (it could be others if the rpm files were replace with a new pacakge with the same release and a different build). Tested with Fedora 38, Fedora 39, RHEL 9, Oracle Linux 9.3, openSUSE Tumbleweed and openMandrive ROME, using dnf/zypper and rpm tools. cc: stable@vger.kernel.org Co-Developed-by: Davide Cavalca Signed-off-by: Jose Ignacio Tornos Martinez Signed-off-by: Masahiro Yamada --- scripts/package/kernel.spec | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index 89298983a169..f58726671fb3 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -55,12 +55,12 @@ patch -p1 < %{SOURCE2} %{make} %{makeflags} KERNELRELEASE=%{KERNELRELEASE} KBUILD_BUILD_VERSION=%{release} %install -mkdir -p %{buildroot}/boot -cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/vmlinuz-%{KERNELRELEASE} +mkdir -p %{buildroot}/lib/modules/%{KERNELRELEASE} +cp $(%{make} %{makeflags} -s image_name) %{buildroot}/lib/modules/%{KERNELRELEASE}/vmlinuz %{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install %{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install -cp System.map %{buildroot}/boot/System.map-%{KERNELRELEASE} -cp .config %{buildroot}/boot/config-%{KERNELRELEASE} +cp System.map %{buildroot}/lib/modules/%{KERNELRELEASE} +cp .config %{buildroot}/lib/modules/%{KERNELRELEASE}/config ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEASE}/build %if %{with_devel} %{make} %{makeflags} run-command KBUILD_RUN_COMMAND='${srctree}/scripts/package/install-extmod-build %{buildroot}/usr/src/kernels/%{KERNELRELEASE}' @@ -70,13 +70,14 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA rm -rf %{buildroot} %post -if [ -x /sbin/installkernel -a -r /boot/vmlinuz-%{KERNELRELEASE} -a -r /boot/System.map-%{KERNELRELEASE} ]; then -cp /boot/vmlinuz-%{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm -cp /boot/System.map-%{KERNELRELEASE} /boot/.System.map-%{KERNELRELEASE}-rpm -rm -f /boot/vmlinuz-%{KERNELRELEASE} /boot/System.map-%{KERNELRELEASE} -/sbin/installkernel %{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm -rm -f /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm +if [ -x /usr/bin/kernel-install ]; then + /usr/bin/kernel-install add %{KERNELRELEASE} /lib/modules/%{KERNELRELEASE}/vmlinuz fi +for file in vmlinuz System.map config; do + if ! cmp --silent "/lib/modules/%{KERNELRELEASE}/${file}" "/boot/${file}-%{KERNELRELEASE}"; then + cp "/lib/modules/%{KERNELRELEASE}/${file}" "/boot/${file}-%{KERNELRELEASE}" + fi +done %preun if [ -x /sbin/new-kernel-pkg ]; then @@ -94,7 +95,6 @@ fi %defattr (-, root, root) /lib/modules/%{KERNELRELEASE} %exclude /lib/modules/%{KERNELRELEASE}/build -/boot/* %files headers %defattr (-, root, root) From bfef491df67022c56aab3b831044f8d259f9441f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 26 Jan 2024 22:30:10 +0900 Subject: [PATCH 300/347] kconfig: initialize sym->curr.tri to 'no' for all symbol types again Geert Uytterhoeven reported that commit 4e244c10eab3 ("kconfig: remove unneeded symbol_empty variable") changed the default value of CONFIG_LOG_CPU_MAX_BUF_SHIFT from 12 to 0. As it turned out, this is an undefined behavior because sym_calc_value() stopped setting the sym->curr.tri field for 'int', 'hex', and 'string' symbols. This commit restores the original behavior, where 'int', 'hex', 'string' symbols are interpreted as false if used in boolean contexts. CONFIG_LOG_CPU_MAX_BUF_SHIFT will default to 12 again, irrespective of CONFIG_BASE_SMALL. Presumably, this is not the intended behavior, as already reported [1], but this is another issue that should be addressed by a separate patch. [1]: https://lore.kernel.org/all/f6856be8-54b7-0fa0-1d17-39632bf29ada@oracle.com/ Fixes: 4e244c10eab3 ("kconfig: remove unneeded symbol_empty variable") Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdWm6u1wX7efZQf=2XUAHascps76YQac6rdnQGhc8nop_Q@mail.gmail.com/ Tested-by: Geert Uytterhoeven Signed-off-by: Masahiro Yamada --- scripts/kconfig/symbol.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 3e808528aaea..e9e9fb8d8674 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -345,6 +345,8 @@ void sym_calc_value(struct symbol *sym) oldval = sym->curr; + newval.tri = no; + switch (sym->type) { case S_INT: newval.val = "0"; @@ -357,7 +359,7 @@ void sym_calc_value(struct symbol *sym) break; case S_BOOLEAN: case S_TRISTATE: - newval = symbol_no.curr; + newval.val = "n"; break; default: sym->curr.val = sym->name; From c9ec85153fea6873c52ed4f5055c87263f1b54f9 Mon Sep 17 00:00:00 2001 From: Matthias May Date: Tue, 30 Jan 2024 10:12:18 +0000 Subject: [PATCH 301/347] selftests: net: add missing config for GENEVE l2_tos_ttl_inherit.sh verifies the inheritance of tos and ttl for GRETAP, VXLAN and GENEVE. Before testing it checks if the required module is available and if not skips the tests accordingly. Currently only GRETAP and VXLAN are tested because the GENEVE module is missing. Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Signed-off-by: Matthias May Link: https://lore.kernel.org/r/20240130101157.196006-1-matthias.may@westermo.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 3d908b52f22f..77a173635a29 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -19,6 +19,7 @@ CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_VLAN_8021Q=y +CONFIG_GENEVE=m CONFIG_IFB=y CONFIG_INET_DIAG=y CONFIG_IP_GRE=m From fb366fc7541a1de521ab3df58471746aa793b833 Mon Sep 17 00:00:00 2001 From: Ryan Schaefer Date: Sun, 21 Jan 2024 21:51:44 +0000 Subject: [PATCH 302/347] netfilter: conntrack: correct window scaling with retransmitted SYN commit c7aab4f17021 ("netfilter: nf_conntrack_tcp: re-init for syn packets only") introduces a bug where SYNs in ORIGINAL direction on reused 5-tuple result in incorrect window scale negotiation. This commit merged the SYN re-initialization and simultaneous open or SYN retransmits cases. Merging this block added the logic in tcp_init_sender() that performed window scale negotiation to the retransmitted syn case. Previously. this would only result in updating the sender's scale and flags. After the merge the additional logic results in improperly clearing the scale in ORIGINAL direction before any packets in the REPLY direction are received. This results in packets incorrectly being marked invalid for being out-of-window. This can be reproduced with the following trace: Packet Sequence: > Flags [S], seq 1687765604, win 62727, options [.. wscale 7], length 0 > Flags [S], seq 1944817196, win 62727, options [.. wscale 7], length 0 In order to fix the issue, only evaluate window negotiation for packets in the REPLY direction. This was tested with simultaneous open, fast open, and the above reproduction. Fixes: c7aab4f17021 ("netfilter: nf_conntrack_tcp: re-init for syn packets only") Signed-off-by: Ryan Schaefer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index e573be5afde7..ae493599a3ef 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -457,7 +457,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, - u32 end, u32 win) + u32 end, u32 win, + enum ip_conntrack_dir dir) { /* SYN-ACK in reply to a SYN * or SYN from reply direction in simultaneous open. @@ -471,7 +472,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, * Both sides must send the Window Scale option * to enable window scaling in either direction. */ - if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && + if (dir == IP_CT_DIR_REPLY && + !(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) { sender->td_scale = 0; receiver->td_scale = 0; @@ -542,7 +544,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, if (tcph->syn) { tcp_init_sender(sender, receiver, skb, dataoff, tcph, - end, win); + end, win, dir); if (!tcph->ack) /* Simultaneous open */ return NFCT_TCP_ACCEPT; @@ -585,7 +587,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, */ tcp_init_sender(sender, receiver, skb, dataoff, tcph, - end, win); + end, win, dir); if (dir == IP_CT_DIR_REPLY && !tcph->ack) return NFCT_TCP_ACCEPT; From 776d451648443f9884be4a1b4e38e8faf1c621f9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 23 Jan 2024 23:45:32 +0100 Subject: [PATCH 303/347] netfilter: nf_tables: restrict tunnel object to NFPROTO_NETDEV Bail out on using the tunnel dst template from other than netdev family. Add the infrastructure to check for the family in objects. Fixes: af308b94a2a4 ("netfilter: nf_tables: add tunnel support") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 14 +++++++++----- net/netfilter/nft_tunnel.c | 1 + 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4e1ea18eb5f0..001226c34621 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1351,6 +1351,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, * @type: stateful object numeric type * @owner: module owner * @maxattr: maximum netlink attribute + * @family: address family for AF-specific object types * @policy: netlink attribute policy */ struct nft_object_type { @@ -1360,6 +1361,7 @@ struct nft_object_type { struct list_head list; u32 type; unsigned int maxattr; + u8 family; struct module *owner; const struct nla_policy *policy; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c537104411e7..fc016befb46f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7551,11 +7551,15 @@ nla_put_failure: return -1; } -static const struct nft_object_type *__nft_obj_type_get(u32 objtype) +static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) { const struct nft_object_type *type; list_for_each_entry(type, &nf_tables_objects, list) { + if (type->family != NFPROTO_UNSPEC && + type->family != family) + continue; + if (objtype == type->type) return type; } @@ -7563,11 +7567,11 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype) } static const struct nft_object_type * -nft_obj_type_get(struct net *net, u32 objtype) +nft_obj_type_get(struct net *net, u32 objtype, u8 family) { const struct nft_object_type *type; - type = __nft_obj_type_get(objtype); + type = __nft_obj_type_get(objtype, family); if (type != NULL && try_module_get(type->owner)) return type; @@ -7660,7 +7664,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - type = __nft_obj_type_get(objtype); + type = __nft_obj_type_get(objtype, family); if (WARN_ON_ONCE(!type)) return -ENOENT; @@ -7674,7 +7678,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (!nft_use_inc(&table->use)) return -EMFILE; - type = nft_obj_type_get(net, objtype); + type = nft_obj_type_get(net, objtype, family); if (IS_ERR(type)) { err = PTR_ERR(type); goto err_type; diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 9f21953c7433..f735d79d8be5 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -713,6 +713,7 @@ static const struct nft_object_ops nft_tunnel_obj_ops = { static struct nft_object_type nft_tunnel_obj_type __read_mostly = { .type = NFT_OBJECT_TUNNEL, + .family = NFPROTO_NETDEV, .ops = &nft_tunnel_obj_ops, .maxattr = NFTA_TUNNEL_KEY_MAX, .policy = nft_tunnel_key_policy, From 6e348067ee4bc5905e35faa3a8fafa91c9124bc7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 25 Jan 2024 17:29:46 -0500 Subject: [PATCH 304/347] netfilter: conntrack: check SCTP_CID_SHUTDOWN_ACK for vtag setting in sctp_new The annotation says in sctp_new(): "If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8)". However, it does not check SCTP_CID_SHUTDOWN_ACK before setting vtag[REPLY] in the conntrack entry(ct). Because of that, if the ct in Router disappears for some reason in [1] with the packet sequence like below: Client > Server: sctp (1) [INIT] [init tag: 3201533963] Server > Client: sctp (1) [INIT ACK] [init tag: 972498433] Client > Server: sctp (1) [COOKIE ECHO] Server > Client: sctp (1) [COOKIE ACK] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057809] Server > Client: sctp (1) [SACK] [cum ack 3075057809] Server > Client: sctp (1) [HB REQ] (the ct in Router disappears somehow) <-------- [1] Client > Server: sctp (1) [HB ACK] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [HB REQ] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [HB REQ] Client > Server: sctp (1) [ABORT] when processing HB ACK packet in Router it calls sctp_new() to initialize the new ct with vtag[REPLY] set to HB_ACK packet's vtag. Later when sending DATA from Client, all the SACKs from Server will get dropped in Router, as the SACK packet's vtag does not match vtag[REPLY] in the ct. The worst thing is the vtag in this ct will never get fixed by the upcoming packets from Server. This patch fixes it by checking SCTP_CID_SHUTDOWN_ACK before setting vtag[REPLY] in the ct in sctp_new() as the annotation says. With this fix, it will leave vtag[REPLY] in ct to 0 in the case above, and the next HB REQ/ACK from Server is able to fix the vtag as its value is 0 in nf_conntrack_sctp_packet(). Signed-off-by: Xin Long Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_sctp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c6bd533983c1..4cc97f971264 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -283,7 +283,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; - } else { + } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) { /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ pr_debug("Setting vtag %x for new conn OOTB\n", From 97f7cf1cd80eeed3b7c808b7c12463295c751001 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 29 Jan 2024 10:57:01 +0100 Subject: [PATCH 305/347] netfilter: ipset: fix performance regression in swap operation The patch "netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test", commit 28628fa9 fixes a race condition. But the synchronize_rcu() added to the swap function unnecessarily slows it down: it can safely be moved to destroy and use call_rcu() instead. Eric Dumazet pointed out that simply calling the destroy functions as rcu callback does not work: sets with timeout use garbage collectors which need cancelling at destroy which can wait. Therefore the destroy functions are split into two: cancelling garbage collectors safely at executing the command received by netlink and moving the remaining part only into the rcu callback. Link: https://lore.kernel.org/lkml/C0829B10-EAA6-4809-874E-E1E9C05A8D84@automattic.com/ Fixes: 28628fa952fe ("netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test") Reported-by: Ale Crismani Reported-by: David Wang <00107082@163.com> Tested-by: David Wang <00107082@163.com> Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 4 +++ net/netfilter/ipset/ip_set_bitmap_gen.h | 14 ++++++++-- net/netfilter/ipset/ip_set_core.c | 37 +++++++++++++++++++------ net/netfilter/ipset/ip_set_hash_gen.h | 15 ++++++++-- net/netfilter/ipset/ip_set_list_set.c | 13 +++++++-- 5 files changed, 65 insertions(+), 18 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index e8c350a3ade1..e9f4f845d760 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -186,6 +186,8 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Cancel ongoing garbage collectors before destroying the set*/ + void (*cancel_gc)(struct ip_set *set); /* Region-locking is used */ bool region_lock; }; @@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type); /* A generic IP set */ struct ip_set { + /* For call_cru in destroy */ + struct rcu_head rcu; /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 21f7860e8fa1..cb48a2b9cb9f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -30,6 +30,7 @@ #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_list IPSET_TOKEN(MTYPE, _list) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype MTYPE #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) @@ -59,9 +60,6 @@ mtype_destroy(struct ip_set *set) { struct mtype *map = set->data; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - if (set->dsize && set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); ip_set_free(map->members); @@ -290,6 +288,15 @@ mtype_gc(struct timer_list *t) add_timer(&map->gc); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct mtype *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant mtype = { .kadt = mtype_kadt, .uadt = mtype_uadt, @@ -303,6 +310,7 @@ static const struct ip_set_type_variant mtype = { .head = mtype_head, .list = mtype_list, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, }; #endif /* __IP_SET_BITMAP_IP_GEN_H */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 4c133e06be1d..bcaad9c009fe 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1182,6 +1182,14 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } +static void +ip_set_destroy_set_rcu(struct rcu_head *head) +{ + struct ip_set *set = container_of(head, struct ip_set, rcu); + + ip_set_destroy_set(set); +} + static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { @@ -1193,8 +1201,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); /* Commands are serialized and references are * protected by the ip_set_ref_lock. @@ -1206,8 +1212,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, * counter, so if it's already zero, we can proceed * without holding the lock. */ - read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { + /* Must wait for flush to be really finished in list:set */ + rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && (s->ref || s->ref_netlink)) { @@ -1221,6 +1229,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, s = ip_set(inst, i); if (s) { ip_set(inst, i) = NULL; + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); ip_set_destroy_set(s); } } @@ -1228,6 +1238,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, inst->is_destroyed = false; } else { u32 flags = flag_exist(info->nlh); + u16 features = 0; + + read_lock_bh(&ip_set_ref_lock); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { @@ -1238,10 +1251,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, ret = -IPSET_ERR_BUSY; goto out; } + features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); - - ip_set_destroy_set(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; out: @@ -1394,9 +1413,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); - /* Make sure all readers of the old set pointers are completed. */ - synchronize_rcu(); - return 0; } @@ -2409,8 +2425,11 @@ ip_set_fini(void) { nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - unregister_pernet_subsys(&ip_set_net_ops); + + /* Wait for call_rcu() in destroy */ + rcu_barrier(); + pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index cbf80da9a01c..1136510521a8 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -222,6 +222,7 @@ static const union nf_inet_addr zeromask = {}; #undef mtype_gc_do #undef mtype_gc #undef mtype_gc_init +#undef mtype_cancel_gc #undef mtype_variant #undef mtype_data_match @@ -266,6 +267,7 @@ static const union nf_inet_addr zeromask = {}; #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) @@ -450,9 +452,6 @@ mtype_destroy(struct ip_set *set) struct htype *h = set->data; struct list_head *l, *lt; - if (SET_WITH_TIMEOUT(set)) - cancel_delayed_work_sync(&h->gc.dwork); - mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); list_for_each_safe(l, lt, &h->ad) { list_del(l); @@ -599,6 +598,15 @@ mtype_gc_init(struct htable_gc *gc) queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct htype *h = set->data; + + if (SET_WITH_TIMEOUT(set)) + cancel_delayed_work_sync(&h->gc.dwork); +} + static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags); @@ -1441,6 +1449,7 @@ static const struct ip_set_type_variant mtype_variant = { .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, .region_lock = true, }; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index e162636525cf..6c3f28bc59b3 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set) struct list_set *map = set->data; struct set_elem *e, *n; - if (SET_WITH_TIMEOUT(set)) - timer_shutdown_sync(&map->gc); - list_for_each_entry_safe(e, n, &map->members, list) { list_del(&e->list); ip_set_put_byindex(map->net, e->id); @@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } +static void +list_set_cancel_gc(struct ip_set *set) +{ + struct list_set *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + timer_shutdown_sync(&map->gc); +} + static const struct ip_set_type_variant set_variant = { .kadt = list_set_kadt, .uadt = list_set_uadt, @@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = { .head = list_set_head, .list = list_set_list, .same_set = list_set_same_set, + .cancel_gc = list_set_cancel_gc, }; static void From 259eb32971e9eb24d1777a28d82730659f50fdcb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 29 Jan 2024 11:09:43 +0100 Subject: [PATCH 306/347] netfilter: nf_log: replace BUG_ON by WARN_ON_ONCE when putting logger Module reference is bumped for each user, this should not ever happen. But BUG_ON check should use rcu_access_pointer() instead. If this ever happens, do WARN_ON_ONCE() instead of BUG_ON() and consolidate pointer check under the rcu read side lock section. Fixes: fab4085f4e24 ("netfilter: log: nf_log_packet() as real unified interface") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_log.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8cc52d2bd31b..e16f158388bb 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -193,11 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type) return; } - BUG_ON(loggers[pf][type] == NULL); - rcu_read_lock(); logger = rcu_dereference(loggers[pf][type]); - module_put(logger->me); + if (!logger) + WARN_ON_ONCE(1); + else + module_put(logger->me); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_logger_put); From 8059918a1377f2f1fff06af4f5a4ed3d5acd6bc4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 29 Jan 2024 13:12:33 +0100 Subject: [PATCH 307/347] netfilter: nft_ct: sanitize layer 3 and 4 protocol number in custom expectations - Disallow families other than NFPROTO_{IPV4,IPV6,INET}. - Disallow layer 4 protocol with no ports, since destination port is a mandatory attribute for this object. Fixes: 857b46027d6f ("netfilter: nft_ct: add ct expectations support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 86bb9d7797d9..aac98a3c966e 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1250,7 +1250,31 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, if (tb[NFTA_CT_EXPECT_L3PROTO]) priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO])); + switch (priv->l3num) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + if (priv->l3num != ctx->family) + return -EINVAL; + + fallthrough; + case NFPROTO_INET: + break; + default: + return -EOPNOTSUPP; + } + priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]); + switch (priv->l4proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_DCCP: + case IPPROTO_SCTP: + break; + default: + return -EOPNOTSUPP; + } + priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]); priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]); priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]); From e028243003ad6e1417cc8ac19af9ded672b9ec59 Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Mon, 29 Jan 2024 11:12:11 -0600 Subject: [PATCH 308/347] MAINTAINERS: Drop unreachable reviewer for Qualcomm ETHQOS ethernet driver Bhupesh's email responds indicating they've changed employers and with no new contact information. Let's drop the line from MAINTAINERS to avoid getting the same response over and over. Signed-off-by: Andrew Halaney Link: https://lore.kernel.org/r/20240129-remove-dwmac-qcom-ethqos-reviewer-v1-1-2645eab61451@redhat.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9435ea0f8cda..6ae30c50e79f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18085,7 +18085,6 @@ F: drivers/net/ethernet/qualcomm/emac/ QUALCOMM ETHQOS ETHERNET DRIVER M: Vinod Koul -R: Bhupesh Sharma L: netdev@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained From 5dee6d6923458e26966717f2a3eae7d09fc10bf6 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Mon, 29 Jan 2024 17:10:17 +0800 Subject: [PATCH 309/347] net: ipv4: fix a memleak in ip_setup_cork When inetdev_valid_mtu fails, cork->opt should be freed if it is allocated in ip_setup_cork. Otherwise there could be a memleak. Fixes: 501a90c94510 ("inet: protect against too small mtu values.") Signed-off-by: Zhipeng Lu Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240129091017.2938835-1-alexious@zju.edu.cn Signed-off-by: Jakub Kicinski --- net/ipv4/ip_output.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b06f678b03a1..41537d18eecf 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1287,6 +1287,12 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, if (unlikely(!rt)) return -EFAULT; + cork->fragsize = ip_sk_use_pmtu(sk) ? + dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); + + if (!inetdev_valid_mtu(cork->fragsize)) + return -ENETUNREACH; + /* * setup for corking. */ @@ -1303,12 +1309,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->addr = ipc->addr; } - cork->fragsize = ip_sk_use_pmtu(sk) ? - dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); - - if (!inetdev_valid_mtu(cork->fragsize)) - return -ENETUNREACH; - cork->gso_size = ipc->gso_size; cork->dst = &rt->dst; From 585b40e25dc9ff3d2b03d1495150540849009e5b Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 29 Jan 2024 23:49:48 +0100 Subject: [PATCH 310/347] net: dsa: mv88e6xxx: Fix failed probe due to unsupported C45 reads Not all mv88e6xxx device support C45 read/write operations. Those which do not return -EOPNOTSUPP. However, when phylib scans the bus, it considers this fatal, and the probe of the MDIO bus fails, which in term causes the mv88e6xxx probe as a whole to fail. When there is no device on the bus for a given address, the pull up resistor on the data line results in the read returning 0xffff. The phylib core code understands this when scanning for devices on the bus. C45 allows multiple devices to be supported at one address, so phylib will perform a few reads at each address, so although thought not the most efficient solution, it is a way to avoid fatal errors. Make use of this as a minimal fix for stable to fix the probing problems. Follow up patches will rework how C45 operates to make it similar to C22 which considers -ENODEV as a none-fatal, and swap mv88e6xxx to using this. Cc: stable@vger.kernel.org Fixes: 743a19e38d02 ("net: dsa: mv88e6xxx: Separate C22 and C45 transactions") Reported-by: Tim Menninger Signed-off-by: Andrew Lunn Link: https://lore.kernel.org/r/20240129224948.1531452-1-andrew@lunn.ch Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 383b3c4d6f59..614cabb5c1b0 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3659,7 +3659,7 @@ static int mv88e6xxx_mdio_read_c45(struct mii_bus *bus, int phy, int devad, int err; if (!chip->info->ops->phy_read_c45) - return -EOPNOTSUPP; + return 0xffff; mv88e6xxx_reg_lock(chip); err = chip->info->ops->phy_read_c45(chip, bus, phy, devad, reg, &val); From d8f5df1fcea54923b74558035b8de8fb2da3e816 Mon Sep 17 00:00:00 2001 From: Mohammad Nassiri Date: Tue, 30 Jan 2024 03:51:52 +0000 Subject: [PATCH 311/347] selftests/net: Argument value mismatch when calling verify_counters() The end_server() function only operates in the server thread and always takes an accept socket instead of a listen socket as its input argument. To align with this, invert the boolean values used when calling verify_counters() within the end_server() function. As a result of this typo, the test didn't correctly check for the non-symmetrical scenario, where i.e. peer-A uses a key <100:200> to send data, but peer-B uses another key <105:205> to send its data. So, in simple words, different keys for TX and RX. Fixes: 3c3ead555648 ("selftests/net: Add TCP-AO key-management test") Signed-off-by: Mohammad Nassiri Link: https://lore.kernel.org/all/934627c5-eebb-4626-be23-cfb134c01d1a@arista.com/ [amended 'Fixes' tag, added the issue description and carried-over to lkml] Signed-off-by: Dmitry Safonov Link: https://lore.kernel.org/r/20240130-tcp-ao-test-key-mgmt-v2-1-d190430a6c60@arista.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tcp_ao/key-management.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c index c48b4970ca17..f6a9395e3cd7 100644 --- a/tools/testing/selftests/net/tcp_ao/key-management.c +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -843,7 +843,7 @@ static void end_server(const char *tst_name, int sk, synchronize_threads(); /* 4: verified => closed */ close(sk); - verify_counters(tst_name, true, false, begin, &end); + verify_counters(tst_name, false, true, begin, &end); synchronize_threads(); /* 5: counters */ } From 384aa16d3776a9ca5c0c1f1e7af4030fe176a993 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 30 Jan 2024 03:51:53 +0000 Subject: [PATCH 312/347] selftests/net: Rectify key counters checks As the names of (struct test_key) members didn't reflect whether the key was used for TX or RX, the verification for the counters was done incorrectly for asymmetrical selftests. Rename these with _tx appendix and fix checks in verify_counters(). While at it, as the checks are now correct, introduce skip_counters_checks, which is intended for tests where it's expected that a key that was set with setsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, ...) might had no chance of getting used on the wire. Fixes the following failures, exposed by the previous commit: > not ok 51 server: Check current != rnext keys set before connect(): Counter pkt_good was expected to increase 0 => 0 for key 132:5 > not ok 52 server: Check current != rnext keys set before connect(): Counter pkt_good was not expected to increase 0 => 21 for key 137:10 > > not ok 63 server: Check current flapping back on peer's RnextKey request: Counter pkt_good was expected to increase 0 => 0 for key 132:5 > not ok 64 server: Check current flapping back on peer's RnextKey request: Counter pkt_good was not expected to increase 0 => 40 for key 137:10 Cc: Mohammad Nassiri Fixes: 3c3ead555648 ("selftests/net: Add TCP-AO key-management test") Signed-off-by: Dmitry Safonov Link: https://lore.kernel.org/r/20240130-tcp-ao-test-key-mgmt-v2-2-d190430a6c60@arista.com Signed-off-by: Jakub Kicinski --- .../selftests/net/tcp_ao/key-management.c | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c index f6a9395e3cd7..24e62120b792 100644 --- a/tools/testing/selftests/net/tcp_ao/key-management.c +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -417,9 +417,9 @@ struct test_key { matches_vrf : 1, is_current : 1, is_rnext : 1, - used_on_handshake : 1, - used_after_accept : 1, - used_on_client : 1; + used_on_server_tx : 1, + used_on_client_tx : 1, + skip_counters_checks : 1; }; struct key_collection { @@ -609,16 +609,14 @@ static int key_collection_socket(bool server, unsigned int port) addr = &this_ip_dest; sndid = key->client_keyid; rcvid = key->server_keyid; - set_current = key->is_current; - set_rnext = key->is_rnext; + key->used_on_client_tx = set_current = key->is_current; + key->used_on_server_tx = set_rnext = key->is_rnext; } if (test_add_key_cr(sk, key->password, key->len, *addr, vrf, sndid, rcvid, key->maclen, key->alg, set_current, set_rnext)) test_key_error("setsockopt(TCP_AO_ADD_KEY)", key); - if (set_current || set_rnext) - key->used_on_handshake = 1; #ifdef DEBUG test_print("%s [%u/%u] key: { %s, %u:%u, %u, %u:%u:%u:%u (%u)}", server ? "server" : "client", i, collection.nr_keys, @@ -640,22 +638,22 @@ static void verify_counters(const char *tst_name, bool is_listen_sk, bool server for (i = 0; i < collection.nr_keys; i++) { struct test_key *key = &collection.keys[i]; uint8_t sndid, rcvid; - bool was_used; + bool rx_cnt_expected; + if (key->skip_counters_checks) + continue; if (server) { sndid = key->server_keyid; rcvid = key->client_keyid; - if (is_listen_sk) - was_used = key->used_on_handshake; - else - was_used = key->used_after_accept; + rx_cnt_expected = key->used_on_client_tx; } else { sndid = key->client_keyid; rcvid = key->server_keyid; - was_used = key->used_on_client; + rx_cnt_expected = key->used_on_server_tx; } - test_tcp_ao_key_counters_cmp(tst_name, a, b, was_used, + test_tcp_ao_key_counters_cmp(tst_name, a, b, + rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, sndid, rcvid); } test_tcp_ao_counters_free(a); @@ -916,9 +914,8 @@ static int run_client(const char *tst_name, unsigned int port, current_index = nr_keys - 1; if (rnext_index < 0) rnext_index = nr_keys - 1; - collection.keys[current_index].used_on_handshake = 1; - collection.keys[rnext_index].used_after_accept = 1; - collection.keys[rnext_index].used_on_client = 1; + collection.keys[current_index].used_on_client_tx = 1; + collection.keys[rnext_index].used_on_server_tx = 1; synchronize_threads(); /* 3: accepted => send data */ if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) { @@ -1059,7 +1056,16 @@ static void check_current_back(const char *tst_name, unsigned int port, test_error("Can't change the current key"); if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) test_fail("verify failed"); - collection.keys[rotate_to_index].used_after_accept = 1; + /* There is a race here: between setting the current_key with + * setsockopt(TCP_AO_INFO) and starting to send some data - there + * might have been a segment received with the desired + * RNext_key set. In turn that would mean that the first outgoing + * segment will have the desired current_key (flipped back). + * Which is what the user/test wants. As it's racy, skip checking + * the counters, yet check what are the resulting current/rnext + * keys on both sides. + */ + collection.keys[rotate_to_index].skip_counters_checks = 1; end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); } @@ -1089,7 +1095,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port, } verify_current_rnext(tst_name, sk, -1, collection.keys[i].server_keyid); - collection.keys[i].used_on_client = 1; + collection.keys[i].used_on_server_tx = 1; synchronize_threads(); /* verify current/rnext */ } end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); From 6caf3adcc877b3470e98133d52b360ebe5f7a6a3 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 30 Jan 2024 03:51:54 +0000 Subject: [PATCH 313/347] selftests/net: Repair RST passive reset selftest Currently, the test is racy and seems to not pass anymore. In order to rectify it, aim on TCP_TW_RST. Doesn't seem way too good with this sleep() part, but it seems as a reasonable compromise for the test. There is a plan in-line comment on how-to improve it, going to do it on the top, at this moment I want it to run on netdev/patchwork selftests dashboard. It also slightly changes tcp_ao-lib in order to get SO_ERROR propagated to test_client_verify() return value. Fixes: c6df7b2361d7 ("selftests/net: Add TCP-AO RST test") Signed-off-by: Dmitry Safonov Link: https://lore.kernel.org/r/20240130-tcp-ao-test-key-mgmt-v2-3-d190430a6c60@arista.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tcp_ao/lib/sock.c | 12 +- tools/testing/selftests/net/tcp_ao/rst.c | 138 ++++++++++++------ 2 files changed, 98 insertions(+), 52 deletions(-) diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c index c75d82885a2e..15aeb0963058 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/sock.c +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -62,7 +62,9 @@ int test_wait_fd(int sk, time_t sec, bool write) return -ETIMEDOUT; } - if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen) || ret) + if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen)) + return -errno; + if (ret) return -ret; return 0; } @@ -584,9 +586,11 @@ int test_client_verify(int sk, const size_t msg_len, const size_t nr, { size_t buf_sz = msg_len * nr; char *buf = alloca(buf_sz); + ssize_t ret; randomize_buffer(buf, buf_sz); - if (test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec) != buf_sz) - return -1; - return 0; + ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec); + if (ret < 0) + return (int)ret; + return ret != buf_sz ? -1 : 0; } diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c index ac06009a7f5f..7df8b8700e39 100644 --- a/tools/testing/selftests/net/tcp_ao/rst.c +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -1,10 +1,33 @@ // SPDX-License-Identifier: GPL-2.0 -/* Author: Dmitry Safonov */ +/* + * The test checks that both active and passive reset have correct TCP-AO + * signature. An "active" reset (abort) here is procured from closing + * listen() socket with non-accepted connections in the queue: + * inet_csk_listen_stop() => inet_child_forget() => + * => tcp_disconnect() => tcp_send_active_reset() + * + * The passive reset is quite hard to get on established TCP connections. + * It could be procured from non-established states, but the synchronization + * part from userspace in order to reliably get RST seems uneasy. + * So, instead it's procured by corrupting SEQ number on TIMED-WAIT state. + * + * It's important to test both passive and active RST as they go through + * different code-paths: + * - tcp_send_active_reset() makes no-data skb, sends it with tcp_transmit_skb() + * - tcp_v*_send_reset() create their reply skbs and send them with + * ip_send_unicast_reply() + * + * In both cases TCP-AO signatures have to be correct, which is verified by + * (1) checking that the TCP-AO connection was reset and (2) TCP-AO counters. + * + * Author: Dmitry Safonov + */ #include #include "../../../../include/linux/kernel.h" #include "aolib.h" const size_t quota = 1000; +const size_t packet_sz = 100; /* * Backlog == 0 means 1 connection in queue, see: * commit 64a146513f8f ("[NET]: Revert incorrect accept queue...") @@ -59,26 +82,6 @@ static void close_forced(int sk) close(sk); } -static int test_wait_for_exception(int sk, time_t sec) -{ - struct timeval tv = { .tv_sec = sec }; - struct timeval *ptv = NULL; - fd_set efds; - int ret; - - FD_ZERO(&efds); - FD_SET(sk, &efds); - - if (sec) - ptv = &tv; - - errno = 0; - ret = select(sk + 1, NULL, NULL, &efds, ptv); - if (ret < 0) - return -errno; - return ret ? sk : 0; -} - static void test_server_active_rst(unsigned int port) { struct tcp_ao_counters cnt1, cnt2; @@ -155,17 +158,16 @@ static void test_server_passive_rst(unsigned int port) test_fail("server returned %zd", bytes); } - synchronize_threads(); /* 3: chekpoint/restore the connection */ + synchronize_threads(); /* 3: checkpoint the client */ + synchronize_threads(); /* 4: close the server, creating twsk */ if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); - - synchronize_threads(); /* 4: terminate server + send more on client */ - bytes = test_server_run(sk, quota, TEST_RETRANSMIT_SEC); close(sk); + + synchronize_threads(); /* 5: restore the socket, send more data */ test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD); - synchronize_threads(); /* 5: verified => closed */ - close(sk); + synchronize_threads(); /* 6: server exits */ } static void *server_fn(void *arg) @@ -284,7 +286,7 @@ static void test_client_active_rst(unsigned int port) test_error("test_wait_fds(): %d", err); synchronize_threads(); /* 3: close listen socket */ - if (test_client_verify(sk[0], 100, quota / 100, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); @@ -323,7 +325,6 @@ static void test_client_passive_rst(unsigned int port) struct tcp_sock_state img; sockaddr_af saddr; int sk, err; - socklen_t slen = sizeof(err); sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); if (sk < 0) @@ -337,18 +338,51 @@ static void test_client_passive_rst(unsigned int port) test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, 100, quota / 100, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); - synchronize_threads(); /* 3: chekpoint/restore the connection */ + synchronize_threads(); /* 3: checkpoint the client */ test_enable_repair(sk); test_sock_checkpoint(sk, &img, &saddr); test_ao_checkpoint(sk, &ao_img); - test_kill_sk(sk); + test_disable_repair(sk); - img.out.seq += quota; + synchronize_threads(); /* 4: close the server, creating twsk */ + + /* + * The "corruption" in SEQ has to be small enough to fit into TCP + * window, see tcp_timewait_state_process() for out-of-window + * segments. + */ + img.out.seq += 5; /* 5 is more noticeable in tcpdump than 1 */ + + /* + * FIXME: This is kind-of ugly and dirty, but it works. + * + * At this moment, the server has close'ed(sk). + * The passive RST that is being targeted here is new data after + * half-duplex close, see tcp_timewait_state_process() => TCP_TW_RST + * + * What is needed here is: + * (1) wait for FIN from the server + * (2) make sure that the ACK from the client went out + * (3) make sure that the ACK was received and processed by the server + * + * Otherwise, the data that will be sent from "repaired" socket + * post SEQ corruption may get to the server before it's in + * TCP_FIN_WAIT2. + * + * (1) is easy with select()/poll() + * (2) is possible by polling tcpi_state from TCP_INFO + * (3) is quite complex: as server's socket was already closed, + * probably the way to do it would be tcp-diag. + */ + sleep(TEST_RETRANSMIT_SEC); + + synchronize_threads(); /* 5: restore the socket, send more data */ + test_kill_sk(sk); sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); if (sk < 0) @@ -366,25 +400,33 @@ static void test_client_passive_rst(unsigned int port) test_disable_repair(sk); test_sock_state_free(&img); - synchronize_threads(); /* 4: terminate server + send more on client */ - if (test_client_verify(sk, 100, quota / 100, 2 * TEST_TIMEOUT_SEC)) - test_ok("client connection broken post-seq-adjust"); + /* + * This is how "passive reset" is acquired in this test from TCP_TW_RST: + * + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [P.], seq 901:1001, ack 1001, win 249, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x10217d6c36a22379086ef3b1], length 100 + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [F.], seq 1001, ack 1001, win 249, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x104ffc99b98c10a5298cc268], length 0 + * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [.], ack 1002, win 251, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0xe496dd4f7f5a8a66873c6f93,nop,nop,sack 1 {1001:1002}], length 0 + * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [P.], seq 1006:1106, ack 1001, win 251, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x1b5f3330fb23fbcd0c77d0ca], length 100 + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0 + */ + err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC); + /* Make sure that the connection was reset, not timeouted */ + if (err && err == -ECONNRESET) + test_ok("client sock was passively reset post-seq-adjust"); + else if (err) + test_fail("client sock was not reset post-seq-adjust: %d", err); else - test_fail("client connection still works post-seq-adjust"); - - test_wait_for_exception(sk, TEST_TIMEOUT_SEC); - - if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &err, &slen)) - test_error("getsockopt()"); - if (err != ECONNRESET && err != EPIPE) - test_fail("client connection was not reset: %d", err); - else - test_ok("client connection was reset"); + test_fail("client sock is yet connected post-seq-adjust"); if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); - synchronize_threads(); /* 5: verified => closed */ + synchronize_threads(); /* 6: server exits */ close(sk); test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD); } @@ -410,6 +452,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(15, server_fn, client_fn); + test_init(14, server_fn, client_fn); return 0; } From 4d322dce82a1d44f8c83f0f54f95dd1b8dcf46c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Jan 2024 18:42:35 +0000 Subject: [PATCH 314/347] af_unix: fix lockdep positive in sk_diag_dump_icons() syzbot reported a lockdep splat [1]. Blamed commit hinted about the possible lockdep violation, and code used unix_state_lock_nested() in an attempt to silence lockdep. It is not sufficient, because unix_state_lock_nested() is already used from unix_state_double_lock(). We need to use a separate subclass. This patch adds a distinct enumeration to make things more explicit. Also use swap() in unix_state_double_lock() as a clean up. v2: add a missing inline keyword to unix_state_lock_nested() [1] WARNING: possible circular locking dependency detected 6.8.0-rc1-syzkaller-00356-g8a696a29c690 #0 Not tainted syz-executor.1/2542 is trying to acquire lock: ffff88808b5df9e8 (rlock-AF_UNIX){+.+.}-{2:2}, at: skb_queue_tail+0x36/0x120 net/core/skbuff.c:3863 but task is already holding lock: ffff88808b5dfe70 (&u->lock/1){+.+.}-{2:2}, at: unix_dgram_sendmsg+0xfc7/0x2200 net/unix/af_unix.c:2089 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&u->lock/1){+.+.}-{2:2}: lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 _raw_spin_lock_nested+0x31/0x40 kernel/locking/spinlock.c:378 sk_diag_dump_icons net/unix/diag.c:87 [inline] sk_diag_fill+0x6ea/0xfe0 net/unix/diag.c:157 sk_diag_dump net/unix/diag.c:196 [inline] unix_diag_dump+0x3e9/0x630 net/unix/diag.c:220 netlink_dump+0x5c1/0xcd0 net/netlink/af_netlink.c:2264 __netlink_dump_start+0x5d7/0x780 net/netlink/af_netlink.c:2370 netlink_dump_start include/linux/netlink.h:338 [inline] unix_diag_handler_dump+0x1c3/0x8f0 net/unix/diag.c:319 sock_diag_rcv_msg+0xe3/0x400 netlink_rcv_skb+0x1df/0x430 net/netlink/af_netlink.c:2543 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:280 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7e6/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa37/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] sock_write_iter+0x39a/0x520 net/socket.c:1160 call_write_iter include/linux/fs.h:2085 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xa74/0xca0 fs/read_write.c:590 ksys_write+0x1a0/0x2c0 fs/read_write.c:643 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b -> #0 (rlock-AF_UNIX){+.+.}-{2:2}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x1909/0x5ab0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0xd5/0x120 kernel/locking/spinlock.c:162 skb_queue_tail+0x36/0x120 net/core/skbuff.c:3863 unix_dgram_sendmsg+0x15d9/0x2200 net/unix/af_unix.c:2112 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x592/0x890 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmmsg+0x3b2/0x730 net/socket.c:2724 __do_sys_sendmmsg net/socket.c:2753 [inline] __se_sys_sendmmsg net/socket.c:2750 [inline] __x64_sys_sendmmsg+0xa0/0xb0 net/socket.c:2750 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&u->lock/1); lock(rlock-AF_UNIX); lock(&u->lock/1); lock(rlock-AF_UNIX); *** DEADLOCK *** 1 lock held by syz-executor.1/2542: #0: ffff88808b5dfe70 (&u->lock/1){+.+.}-{2:2}, at: unix_dgram_sendmsg+0xfc7/0x2200 net/unix/af_unix.c:2089 stack backtrace: CPU: 1 PID: 2542 Comm: syz-executor.1 Not tainted 6.8.0-rc1-syzkaller-00356-g8a696a29c690 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106 check_noncircular+0x366/0x490 kernel/locking/lockdep.c:2187 check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x1909/0x5ab0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0xd5/0x120 kernel/locking/spinlock.c:162 skb_queue_tail+0x36/0x120 net/core/skbuff.c:3863 unix_dgram_sendmsg+0x15d9/0x2200 net/unix/af_unix.c:2112 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x592/0x890 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmmsg+0x3b2/0x730 net/socket.c:2724 __do_sys_sendmmsg net/socket.c:2753 [inline] __se_sys_sendmmsg net/socket.c:2750 [inline] __x64_sys_sendmmsg+0xa0/0xb0 net/socket.c:2750 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7f26d887cda9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f26d95a60c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00007f26d89abf80 RCX: 00007f26d887cda9 RDX: 000000000000003e RSI: 00000000200bd000 RDI: 0000000000000004 RBP: 00007f26d88c947a R08: 0000000000000000 R09: 0000000000000000 R10: 00000000000008c0 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f26d89abf80 R15: 00007ffcfe081a68 Fixes: 2aac7a2cb0d9 ("unix_diag: Pending connections IDs NLA") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240130184235.1620738-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 20 ++++++++++++++------ net/unix/af_unix.c | 14 ++++++-------- net/unix/diag.c | 2 +- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 49c4640027d8..afd40dce40f3 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -46,12 +46,6 @@ struct scm_stat { #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) -#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) -#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) -#define unix_state_lock_nested(s) \ - spin_lock_nested(&unix_sk(s)->lock, \ - SINGLE_DEPTH_NESTING) - /* The AF_UNIX socket */ struct unix_sock { /* WARNING: sk has to be the first member */ @@ -77,6 +71,20 @@ struct unix_sock { #define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk) #define unix_peer(sk) (unix_sk(sk)->peer) +#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) +enum unix_socket_lock_class { + U_LOCK_NORMAL, + U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ + U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ +}; + +static inline void unix_state_lock_nested(struct sock *sk, + enum unix_socket_lock_class subclass) +{ + spin_lock_nested(&unix_sk(sk)->lock, subclass); +} + #define peer_wait peer_wq.wait long unix_inq_len(struct sock *sk); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ac1f2bc18fc9..30b178ebba60 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1344,13 +1344,11 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) unix_state_lock(sk1); return; } - if (sk1 < sk2) { - unix_state_lock(sk1); - unix_state_lock_nested(sk2); - } else { - unix_state_lock(sk2); - unix_state_lock_nested(sk1); - } + if (sk1 > sk2) + swap(sk1, sk2); + + unix_state_lock(sk1); + unix_state_lock_nested(sk2, U_LOCK_SECOND); } static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) @@ -1591,7 +1589,7 @@ restart: goto out_unlock; } - unix_state_lock_nested(sk); + unix_state_lock_nested(sk, U_LOCK_SECOND); if (sk->sk_state != st) { unix_state_unlock(sk); diff --git a/net/unix/diag.c b/net/unix/diag.c index bec09a3a1d44..be19827eca36 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) * queue lock. With the other's queue locked it's * OK to lock the state. */ - unix_state_lock_nested(req); + unix_state_lock_nested(req, U_LOCK_DIAG); peer = unix_sk(req)->peer; buf[i++] = (peer ? sock_i_ino(peer) : 0); unix_state_unlock(req); From d9407ff11809c6812bb84fe7be9c1367d758e5c8 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:30 -0800 Subject: [PATCH 315/347] pds_core: Prevent health thread from running during reset/remove The PCIe reset handlers can run at the same time as the health thread. This can cause the health thread to stomp on the PCIe reset. Fix this by preventing the health thread from running while a PCIe reset is happening. As part of this use timer_shutdown_sync() during reset and remove to make sure the timer doesn't ever get rearmed. Fixes: ffa55858330f ("pds_core: implement pci reset handlers") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-2-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/main.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 3080898d7b95..5172a5ad8ec6 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -293,7 +293,7 @@ err_out_stop: err_out_teardown: pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING); err_out_unmap_bars: - del_timer_sync(&pdsc->wdtimer); + timer_shutdown_sync(&pdsc->wdtimer); if (pdsc->wq) destroy_workqueue(pdsc->wq); mutex_destroy(&pdsc->config_lock); @@ -420,7 +420,7 @@ static void pdsc_remove(struct pci_dev *pdev) */ pdsc_sriov_configure(pdev, 0); - del_timer_sync(&pdsc->wdtimer); + timer_shutdown_sync(&pdsc->wdtimer); if (pdsc->wq) destroy_workqueue(pdsc->wq); @@ -445,10 +445,24 @@ static void pdsc_remove(struct pci_dev *pdev) devlink_free(dl); } +static void pdsc_stop_health_thread(struct pdsc *pdsc) +{ + timer_shutdown_sync(&pdsc->wdtimer); + if (pdsc->health_work.func) + cancel_work_sync(&pdsc->health_work); +} + +static void pdsc_restart_health_thread(struct pdsc *pdsc) +{ + timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0); + mod_timer(&pdsc->wdtimer, jiffies + 1); +} + void pdsc_reset_prepare(struct pci_dev *pdev) { struct pdsc *pdsc = pci_get_drvdata(pdev); + pdsc_stop_health_thread(pdsc); pdsc_fw_down(pdsc); pci_free_irq_vectors(pdev); @@ -486,6 +500,7 @@ void pdsc_reset_done(struct pci_dev *pdev) } pdsc_fw_up(pdsc); + pdsc_restart_health_thread(pdsc); } static const struct pci_error_handlers pdsc_err_handler = { From d321067e2cfa4d5e45401a00912ca9da8d1af631 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:31 -0800 Subject: [PATCH 316/347] pds_core: Cancel AQ work on teardown There is a small window where pdsc_work_thread() calls pdsc_process_adminq() and pdsc_process_adminq() passes the PDSC_S_STOPPING_DRIVER check and starts to process adminq/notifyq work and then the driver starts a fw_down cycle. This could cause some undefined behavior if the notifyqcq/adminqcq are free'd while pdsc_process_adminq() is running. Use cancel_work_sync() on the adminqcq's work struct to make sure any pending work items are cancelled and any in progress work items are completed. Also, make sure to not call cancel_work_sync() if the work item has not be initialized. Without this, traces will happen in cases where a reset fails and teardown is called again or if reset fails and the driver is removed. Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-3-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 0d2091e9eb28..b582729331eb 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -464,6 +464,8 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) if (!pdsc->pdev->is_virtfn) pdsc_devcmd_reset(pdsc); + if (pdsc->adminqcq.work.func) + cancel_work_sync(&pdsc->adminqcq.work); pdsc_qcq_free(pdsc, &pdsc->notifyqcq); pdsc_qcq_free(pdsc, &pdsc->adminqcq); From 951705151e50f9022bc96ec8b3fd5697380b1df6 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:32 -0800 Subject: [PATCH 317/347] pds_core: Use struct pdsc for the pdsc_adminq_isr private data The initial design for the adminq interrupt was done based on client drivers having their own adminq and adminq interrupt. So, each client driver's adminq isr would use their specific adminqcq for the private data struct. For the time being the design has changed to only use a single adminq for all clients. So, instead use the struct pdsc for the private data to simplify things a bit. This also has the benefit of not dereferencing the adminqcq to access the pdsc struct when the PDSC_S_STOPPING_DRIVER bit is set and the adminqcq has actually been cleared/freed. Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-4-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/adminq.c | 5 +++-- drivers/net/ethernet/amd/pds_core/core.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index 5beadabc2136..68be5ea251fc 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -135,8 +135,8 @@ void pdsc_work_thread(struct work_struct *work) irqreturn_t pdsc_adminq_isr(int irq, void *data) { - struct pdsc_qcq *qcq = data; - struct pdsc *pdsc = qcq->pdsc; + struct pdsc *pdsc = data; + struct pdsc_qcq *qcq; /* Don't process AdminQ when shutting down */ if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) { @@ -145,6 +145,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) return IRQ_HANDLED; } + qcq = &pdsc->adminqcq; queue_work(pdsc->wq, &qcq->work); pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR); diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index b582729331eb..0356e56a6e99 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -125,7 +125,7 @@ static int pdsc_qcq_intr_alloc(struct pdsc *pdsc, struct pdsc_qcq *qcq) snprintf(name, sizeof(name), "%s-%d-%s", PDS_CORE_DRV_NAME, pdsc->pdev->bus->number, qcq->q.name); - index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, qcq); + index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, pdsc); if (index < 0) return index; qcq->intx = index; From 7e82a8745b951b1e794cc780d46f3fbee5e93447 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:33 -0800 Subject: [PATCH 318/347] pds_core: Prevent race issues involving the adminq There are multiple paths that can result in using the pdsc's adminq. [1] pdsc_adminq_isr and the resulting work from queue_work(), i.e. pdsc_work_thread()->pdsc_process_adminq() [2] pdsc_adminq_post() When the device goes through reset via PCIe reset and/or a fw_down/fw_up cycle due to bad PCIe state or bad device state the adminq is destroyed and recreated. A NULL pointer dereference can happen if [1] or [2] happens after the adminq is already destroyed. In order to fix this, add some further state checks and implement reference counting for adminq uses. Reference counting was used because multiple threads can attempt to access the adminq at the same time via [1] or [2]. Additionally, multiple clients (i.e. pds-vfio-pci) can be using [2] at the same time. The adminq_refcnt is initialized to 1 when the adminq has been allocated and is ready to use. Users/clients of the adminq (i.e. [1] and [2]) will increment the refcnt when they are using the adminq. When the driver goes into a fw_down cycle it will set the PDSC_S_FW_DEAD bit and then wait for the adminq_refcnt to hit 1. Setting the PDSC_S_FW_DEAD before waiting will prevent any further adminq_refcnt increments. Waiting for the adminq_refcnt to hit 1 allows for any current users of the adminq to finish before the driver frees the adminq. Once the adminq_refcnt hits 1 the driver clears the refcnt to signify that the adminq is deleted and cannot be used. On the fw_up cycle the driver will once again initialize the adminq_refcnt to 1 allowing the adminq to be used again. Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-5-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/adminq.c | 31 +++++++++++++++++----- drivers/net/ethernet/amd/pds_core/core.c | 21 +++++++++++++++ drivers/net/ethernet/amd/pds_core/core.h | 1 + 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index 68be5ea251fc..5edff33d56f3 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -63,6 +63,15 @@ static int pdsc_process_notifyq(struct pdsc_qcq *qcq) return nq_work; } +static bool pdsc_adminq_inc_if_up(struct pdsc *pdsc) +{ + if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER) || + pdsc->state & BIT_ULL(PDSC_S_FW_DEAD)) + return false; + + return refcount_inc_not_zero(&pdsc->adminq_refcnt); +} + void pdsc_process_adminq(struct pdsc_qcq *qcq) { union pds_core_adminq_comp *comp; @@ -75,9 +84,9 @@ void pdsc_process_adminq(struct pdsc_qcq *qcq) int aq_work = 0; int credits; - /* Don't process AdminQ when shutting down */ - if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) { - dev_err(pdsc->dev, "%s: called while PDSC_S_STOPPING_DRIVER\n", + /* Don't process AdminQ when it's not up */ + if (!pdsc_adminq_inc_if_up(pdsc)) { + dev_err(pdsc->dev, "%s: called while adminq is unavailable\n", __func__); return; } @@ -124,6 +133,7 @@ credits: pds_core_intr_credits(&pdsc->intr_ctrl[qcq->intx], credits, PDS_CORE_INTR_CRED_REARM); + refcount_dec(&pdsc->adminq_refcnt); } void pdsc_work_thread(struct work_struct *work) @@ -138,9 +148,9 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) struct pdsc *pdsc = data; struct pdsc_qcq *qcq; - /* Don't process AdminQ when shutting down */ - if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) { - dev_err(pdsc->dev, "%s: called while PDSC_S_STOPPING_DRIVER\n", + /* Don't process AdminQ when it's not up */ + if (!pdsc_adminq_inc_if_up(pdsc)) { + dev_err(pdsc->dev, "%s: called while adminq is unavailable\n", __func__); return IRQ_HANDLED; } @@ -148,6 +158,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) qcq = &pdsc->adminqcq; queue_work(pdsc->wq, &qcq->work); pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR); + refcount_dec(&pdsc->adminq_refcnt); return IRQ_HANDLED; } @@ -231,6 +242,12 @@ int pdsc_adminq_post(struct pdsc *pdsc, int err = 0; int index; + if (!pdsc_adminq_inc_if_up(pdsc)) { + dev_dbg(pdsc->dev, "%s: preventing adminq cmd %u\n", + __func__, cmd->opcode); + return -ENXIO; + } + wc.qcq = &pdsc->adminqcq; index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp, &wc); if (index < 0) { @@ -286,6 +303,8 @@ err_out: queue_work(pdsc->wq, &pdsc->health_work); } + refcount_dec(&pdsc->adminq_refcnt); + return err; } EXPORT_SYMBOL_GPL(pdsc_adminq_post); diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 0356e56a6e99..f44333bd1256 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -450,6 +450,7 @@ int pdsc_setup(struct pdsc *pdsc, bool init) pdsc_debugfs_add_viftype(pdsc); } + refcount_set(&pdsc->adminq_refcnt, 1); clear_bit(PDSC_S_FW_DEAD, &pdsc->state); return 0; @@ -514,6 +515,24 @@ void pdsc_stop(struct pdsc *pdsc) PDS_CORE_INTR_MASK_SET); } +static void pdsc_adminq_wait_and_dec_once_unused(struct pdsc *pdsc) +{ + /* The driver initializes the adminq_refcnt to 1 when the adminq is + * allocated and ready for use. Other users/requesters will increment + * the refcnt while in use. If the refcnt is down to 1 then the adminq + * is not in use and the refcnt can be cleared and adminq freed. Before + * calling this function the driver will set PDSC_S_FW_DEAD, which + * prevent subsequent attempts to use the adminq and increment the + * refcnt to fail. This guarantees that this function will eventually + * exit. + */ + while (!refcount_dec_if_one(&pdsc->adminq_refcnt)) { + dev_dbg_ratelimited(pdsc->dev, "%s: adminq in use\n", + __func__); + cpu_relax(); + } +} + void pdsc_fw_down(struct pdsc *pdsc) { union pds_core_notifyq_comp reset_event = { @@ -529,6 +548,8 @@ void pdsc_fw_down(struct pdsc *pdsc) if (pdsc->pdev->is_virtfn) return; + pdsc_adminq_wait_and_dec_once_unused(pdsc); + /* Notify clients of fw_down */ if (pdsc->fw_reporter) devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc); diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index e35d3e7006bf..cbd5716f46e6 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -184,6 +184,7 @@ struct pdsc { struct mutex devcmd_lock; /* lock for dev_cmd operations */ struct mutex config_lock; /* lock for configuration operations */ spinlock_t adminq_lock; /* lock for adminq operations */ + refcount_t adminq_refcnt; struct pds_core_dev_info_regs __iomem *info_regs; struct pds_core_dev_cmd_regs __iomem *cmd_regs; struct pds_core_intr __iomem *intr_ctrl; From e96094c1d11cce4deb5da3c0500d49041ab845b8 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:34 -0800 Subject: [PATCH 319/347] pds_core: Clear BARs on reset During reset the BARs might be accessed when they are unmapped. This can cause unexpected issues, so fix it by clearing the cached BAR values so they are not accessed until they are re-mapped. Also, make sure any places that can access the BARs when they are NULL are prevented. Fixes: 49ce92fbee0b ("pds_core: add FW update feature to devlink") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-6-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/adminq.c | 24 +++++++++++++++------ drivers/net/ethernet/amd/pds_core/core.c | 8 ++++++- drivers/net/ethernet/amd/pds_core/dev.c | 9 +++++++- drivers/net/ethernet/amd/pds_core/devlink.c | 3 ++- drivers/net/ethernet/amd/pds_core/fw.c | 3 +++ drivers/net/ethernet/amd/pds_core/main.c | 5 +++++ 6 files changed, 43 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index 5edff33d56f3..ea773cfa0af6 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -191,10 +191,16 @@ static int __pdsc_adminq_post(struct pdsc *pdsc, /* Check that the FW is running */ if (!pdsc_is_fw_running(pdsc)) { - u8 fw_status = ioread8(&pdsc->info_regs->fw_status); + if (pdsc->info_regs) { + u8 fw_status = + ioread8(&pdsc->info_regs->fw_status); - dev_info(pdsc->dev, "%s: post failed - fw not running %#02x:\n", - __func__, fw_status); + dev_info(pdsc->dev, "%s: post failed - fw not running %#02x:\n", + __func__, fw_status); + } else { + dev_info(pdsc->dev, "%s: post failed - BARs not setup\n", + __func__); + } ret = -ENXIO; goto err_out_unlock; @@ -266,10 +272,16 @@ int pdsc_adminq_post(struct pdsc *pdsc, break; if (!pdsc_is_fw_running(pdsc)) { - u8 fw_status = ioread8(&pdsc->info_regs->fw_status); + if (pdsc->info_regs) { + u8 fw_status = + ioread8(&pdsc->info_regs->fw_status); - dev_dbg(pdsc->dev, "%s: post wait failed - fw not running %#02x:\n", - __func__, fw_status); + dev_dbg(pdsc->dev, "%s: post wait failed - fw not running %#02x:\n", + __func__, fw_status); + } else { + dev_dbg(pdsc->dev, "%s: post wait failed - BARs not setup\n", + __func__); + } err = -ENXIO; break; } diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index f44333bd1256..65c8a7072e35 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -600,7 +600,13 @@ err_out: static void pdsc_check_pci_health(struct pdsc *pdsc) { - u8 fw_status = ioread8(&pdsc->info_regs->fw_status); + u8 fw_status; + + /* some sort of teardown already in progress */ + if (!pdsc->info_regs) + return; + + fw_status = ioread8(&pdsc->info_regs->fw_status); /* is PCI broken? */ if (fw_status != PDS_RC_BAD_PCI) diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index 31940b857e0e..62a38e0a8454 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -57,6 +57,9 @@ int pdsc_err_to_errno(enum pds_core_status_code code) bool pdsc_is_fw_running(struct pdsc *pdsc) { + if (!pdsc->info_regs) + return false; + pdsc->fw_status = ioread8(&pdsc->info_regs->fw_status); pdsc->last_fw_time = jiffies; pdsc->last_hb = ioread32(&pdsc->info_regs->fw_heartbeat); @@ -182,13 +185,17 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd, { int err; + if (!pdsc->cmd_regs) + return -ENXIO; + memcpy_toio(&pdsc->cmd_regs->cmd, cmd, sizeof(*cmd)); pdsc_devcmd_dbell(pdsc); err = pdsc_devcmd_wait(pdsc, cmd->opcode, max_seconds); - memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp)); if ((err == -ENXIO || err == -ETIMEDOUT) && pdsc->wq) queue_work(pdsc->wq, &pdsc->health_work); + else + memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp)); return err; } diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index e9948ea5bbcd..54864f27c87a 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -111,7 +111,8 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, mutex_lock(&pdsc->devcmd_lock); err = pdsc_devcmd_locked(pdsc, &cmd, &comp, pdsc->devcmd_timeout * 2); - memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list)); + if (!err) + memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list)); mutex_unlock(&pdsc->devcmd_lock); if (err && err != -EIO) return err; diff --git a/drivers/net/ethernet/amd/pds_core/fw.c b/drivers/net/ethernet/amd/pds_core/fw.c index 90a811f3878a..fa626719e68d 100644 --- a/drivers/net/ethernet/amd/pds_core/fw.c +++ b/drivers/net/ethernet/amd/pds_core/fw.c @@ -107,6 +107,9 @@ int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw, dev_info(pdsc->dev, "Installing firmware\n"); + if (!pdsc->cmd_regs) + return -ENXIO; + dl = priv_to_devlink(pdsc); devlink_flash_update_status_notify(dl, "Preparing to flash", NULL, 0, 0); diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 5172a5ad8ec6..05fdeb235e5f 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -37,6 +37,11 @@ static void pdsc_unmap_bars(struct pdsc *pdsc) struct pdsc_dev_bar *bars = pdsc->bars; unsigned int i; + pdsc->info_regs = NULL; + pdsc->cmd_regs = NULL; + pdsc->intr_status = NULL; + pdsc->intr_ctrl = NULL; + for (i = 0; i < PDS_CORE_BARS_MAX; i++) { if (bars[i].vaddr) pci_iounmap(pdsc->pdev, bars[i].vaddr); From bc90fbe0c3182157d2be100a2f6c2edbb1820677 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:35 -0800 Subject: [PATCH 320/347] pds_core: Rework teardown/setup flow to be more common Currently the teardown/setup flow for driver probe/remove is quite a bit different from the reset flows in pdsc_fw_down()/pdsc_fw_up(). One key piece that's missing are the calls to pci_alloc_irq_vectors() and pci_free_irq_vectors(). The pcie reset case is calling pci_free_irq_vectors() on reset_prepare, but not calling the corresponding pci_alloc_irq_vectors() on reset_done. This is causing unexpected/unwanted interrupt behavior due to the adminq interrupt being accidentally put into legacy interrupt mode. Also, the pci_alloc_irq_vectors()/pci_free_irq_vectors() functions are being called directly in probe/remove respectively. Fix this inconsistency by making the following changes: 1. Always call pdsc_dev_init() in pdsc_setup(), which calls pci_alloc_irq_vectors() and get rid of the now unused pds_dev_reinit(). 2. Always free/clear the pdsc->intr_info in pdsc_teardown() since this structure will get re-alloced in pdsc_setup(). 3. Move the calls of pci_free_irq_vectors() to pdsc_teardown() since pci_alloc_irq_vectors() will always be called in pdsc_setup()->pdsc_dev_init() for both the probe/remove and reset flows. 4. Make sure to only create the debugfs "identity" entry when it doesn't already exist, which it will in the reset case because it's already been created in the initial call to pdsc_dev_init(). Fixes: ffa55858330f ("pds_core: implement pci reset handlers") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Link: https://lore.kernel.org/r/20240129234035.69802-7-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/core.c | 13 +++++-------- drivers/net/ethernet/amd/pds_core/core.h | 1 - drivers/net/ethernet/amd/pds_core/debugfs.c | 4 ++++ drivers/net/ethernet/amd/pds_core/dev.c | 7 ------- drivers/net/ethernet/amd/pds_core/main.c | 2 -- 5 files changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 65c8a7072e35..7658a7286767 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -404,10 +404,7 @@ int pdsc_setup(struct pdsc *pdsc, bool init) int numdescs; int err; - if (init) - err = pdsc_dev_init(pdsc); - else - err = pdsc_dev_reinit(pdsc); + err = pdsc_dev_init(pdsc); if (err) return err; @@ -479,10 +476,9 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) for (i = 0; i < pdsc->nintrs; i++) pdsc_intr_free(pdsc, i); - if (removing) { - kfree(pdsc->intr_info); - pdsc->intr_info = NULL; - } + kfree(pdsc->intr_info); + pdsc->intr_info = NULL; + pdsc->nintrs = 0; } if (pdsc->kern_dbpage) { @@ -490,6 +486,7 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) pdsc->kern_dbpage = NULL; } + pci_free_irq_vectors(pdsc->pdev); set_bit(PDSC_S_FW_DEAD, &pdsc->state); } diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index cbd5716f46e6..110c4b826b22 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -281,7 +281,6 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd, union pds_core_dev_comp *comp, int max_seconds); int pdsc_devcmd_init(struct pdsc *pdsc); int pdsc_devcmd_reset(struct pdsc *pdsc); -int pdsc_dev_reinit(struct pdsc *pdsc); int pdsc_dev_init(struct pdsc *pdsc); void pdsc_reset_prepare(struct pci_dev *pdev); diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c index 8ec392299b7d..4e8579ca1c8c 100644 --- a/drivers/net/ethernet/amd/pds_core/debugfs.c +++ b/drivers/net/ethernet/amd/pds_core/debugfs.c @@ -64,6 +64,10 @@ DEFINE_SHOW_ATTRIBUTE(identity); void pdsc_debugfs_add_ident(struct pdsc *pdsc) { + /* This file will already exist in the reset flow */ + if (debugfs_lookup("identity", pdsc->dentry)) + return; + debugfs_create_file("identity", 0400, pdsc->dentry, pdsc, &identity_fops); } diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index 62a38e0a8454..e65a1632df50 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -316,13 +316,6 @@ static int pdsc_identify(struct pdsc *pdsc) return 0; } -int pdsc_dev_reinit(struct pdsc *pdsc) -{ - pdsc_init_devinfo(pdsc); - - return pdsc_identify(pdsc); -} - int pdsc_dev_init(struct pdsc *pdsc) { unsigned int nintrs; diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 05fdeb235e5f..cdbf053b5376 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -438,7 +438,6 @@ static void pdsc_remove(struct pci_dev *pdev) mutex_destroy(&pdsc->config_lock); mutex_destroy(&pdsc->devcmd_lock); - pci_free_irq_vectors(pdev); pdsc_unmap_bars(pdsc); pci_release_regions(pdev); } @@ -470,7 +469,6 @@ void pdsc_reset_prepare(struct pci_dev *pdev) pdsc_stop_health_thread(pdsc); pdsc_fw_down(pdsc); - pci_free_irq_vectors(pdev); pdsc_unmap_bars(pdsc); pci_release_regions(pdev); pci_disable_device(pdev); From f7c25d8e17dd759d97ca093faf92eeb7da7b3890 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Jan 2024 18:47:16 +0100 Subject: [PATCH 321/347] selftests: net: add missing config for pmtu.sh tests The mentioned test uses a few Kconfig still missing the net config, add them. Before: # Error: Specified qdisc kind is unknown. # Error: Specified qdisc kind is unknown. # Error: Qdisc not classful. # We have an error talking to the kernel # Error: Qdisc not classful. # We have an error talking to the kernel # policy_routing not supported # TEST: ICMPv4 with DSCP and ECN: PMTU exceptions [SKIP] After: # TEST: ICMPv4 with DSCP and ECN: PMTU exceptions [ OK ] Fixes: ec730c3e1f0e ("selftest: net: Test IPv4 PMTU exceptions with DSCP and ECN") Signed-off-by: Paolo Abeni Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/8d27bf6762a5c7b3acc457d6e6872c533040f9c1.1706635101.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 77a173635a29..98c6bd2228c6 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -49,8 +49,10 @@ CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_NAT=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m +CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_MATCHALL=m @@ -62,6 +64,7 @@ CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y +CONFIG_NET_SCH_PRIO=m CONFIG_NFT_COMPAT=m CONFIG_NF_FLOW_TABLE=m CONFIG_PSAMPLE=m From e4e4b6d568d2549583cbda3f8ce567e586cb05da Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Jan 2024 18:47:17 +0100 Subject: [PATCH 322/347] selftests: net: fix available tunnels detection The pmtu.sh test tries to detect the tunnel protocols available in the running kernel and properly skip the unsupported cases. In a few more complex setup, such detection is unsuccessful, as the script currently ignores some intermediate error code at setup time. Before: # which: no nettest in (/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin) # TEST: vti6: PMTU exceptions (ESP-in-UDP) [FAIL] # PMTU exception wasn't created after creating tunnel exceeding link layer MTU # ./pmtu.sh: line 931: kill: (7543) - No such process # ./pmtu.sh: line 931: kill: (7544) - No such process After: # xfrm4 not supported # TEST: vti4: PMTU exceptions [SKIP] Fixes: ece1278a9b81 ("selftests: net: add ESP-in-UDP PMTU test") Signed-off-by: Paolo Abeni Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/cab10e75fda618e6fff8c595b632f47db58b9309.1706635101.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/pmtu.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index f10879788f61..31892b366913 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -707,23 +707,23 @@ setup_xfrm6() { } setup_xfrm4udp() { - setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 4 4500 + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 } setup_xfrm6udp() { - setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 6 4500 + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 } setup_xfrm4udprouted() { - setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 4 4500 + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 } setup_xfrm6udprouted() { - setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 6 4500 + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 } setup_routing_old() { From bc0970d5ac1d1317e212bdf55533935ecb6ae95c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Jan 2024 18:47:18 +0100 Subject: [PATCH 323/347] selftests: net: don't access /dev/stdout in pmtu.sh When running the pmtu.sh via the kselftest infra, accessing /dev/stdout gives unexpected results: # dd: failed to open '/dev/stdout': Device or resource busy # TEST: IPv4, bridged vxlan4: PMTU exceptions [FAIL] Let dd use directly the standard output to fix the above: # TEST: IPv4, bridged vxlan4: PMTU exceptions - nexthop objects [ OK ] Fixes: 136a1b434bbb ("selftests: net: test vxlan pmtu exceptions with tcp") Signed-off-by: Paolo Abeni Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/23d7592c5d77d75cff9b34f15c227f92e911c2ae.1706635101.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/pmtu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 31892b366913..3f118e3f1c66 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1339,7 +1339,7 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { sleep 1 - dd if=/dev/zero of=/dev/stdout status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 + dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 size=$(du -sb $tmpoutfile) size=${size%%/tmp/*} From 5f9ab17394f831cb7986ec50900fa37507a127f1 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 1 Feb 2024 20:53:18 +0900 Subject: [PATCH 324/347] firewire: core: correct documentation of fw_csr_string() kernel API Against its current description, the kernel API can accepts all types of directory entries. This commit corrects the documentation. Cc: stable@vger.kernel.org Fixes: 3c2c58cb33b3 ("firewire: core: fw_csr_string addendum") Link: https://lore.kernel.org/r/20240130100409.30128-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-device.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 0547253d16fe..e4cb5106fb7d 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -118,10 +118,9 @@ static int textual_leaf_to_string(const u32 *block, char *buf, size_t size) * @buf: where to put the string * @size: size of @buf, in bytes * - * The string is taken from a minimal ASCII text descriptor leaf after - * the immediate entry with @key. The string is zero-terminated. - * An overlong string is silently truncated such that it and the - * zero byte fit into @size. + * The string is taken from a minimal ASCII text descriptor leaf just after the entry with the + * @key. The string is zero-terminated. An overlong string is silently truncated such that it + * and the zero byte fit into @size. * * Returns strlen(buf) or a negative error code. */ From 47dc55181dcbee69fc84c902e7fb060213b9b8a5 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 1 Feb 2024 20:53:18 +0900 Subject: [PATCH 325/347] firewire: core: search descriptor leaf just after vendor directory entry in root directory It appears that Sony DVMC-DA1 has a quirk that the descriptor leaf entry locates just after the vendor directory entry in root directory. This is not conformant to the legacy layout of configuration ROM described in Configuration ROM for AV/C Devices 1.0 (1394 Trading Association, Dec 2000, TA Document 1999027). This commit changes current implementation to parse configuration ROM for device attributes so that the descriptor leaf entry can be detected for the vendor name. $ config-rom-pretty-printer < Sony-DVMC-DA1.img ROM header and bus information block ----------------------------------------------------------------- 1024 041ee7fb bus_info_length 4, crc_length 30, crc 59387 1028 31333934 bus_name "1394" 1032 e0644000 irmc 1, cmc 1, isc 1, bmc 0, cyc_clk_acc 100, max_rec 4 (32) 1036 08004603 company_id 080046 | 1040 0014193c device_id 12886219068 | EUI-64 576537731003586876 root directory ----------------------------------------------------------------- 1044 0006b681 directory_length 6, crc 46721 1048 03080046 vendor 1052 0c0083c0 node capabilities: per IEEE 1394 1056 8d00000a --> eui-64 leaf at 1096 1060 d1000003 --> unit directory at 1072 1064 c3000005 --> vendor directory at 1084 1068 8100000a --> descriptor leaf at 1108 unit directory at 1072 ----------------------------------------------------------------- 1072 0002cdbf directory_length 2, crc 52671 1076 1200a02d specifier id 1080 13010000 version vendor directory at 1084 ----------------------------------------------------------------- 1084 00020cfe directory_length 2, crc 3326 1088 17fa0000 model 1092 81000008 --> descriptor leaf at 1124 eui-64 leaf at 1096 ----------------------------------------------------------------- 1096 0002c66e leaf_length 2, crc 50798 1100 08004603 company_id 080046 | 1104 0014193c device_id 12886219068 | EUI-64 576537731003586876 descriptor leaf at 1108 ----------------------------------------------------------------- 1108 00039e26 leaf_length 3, crc 40486 1112 00000000 textual descriptor 1116 00000000 minimal ASCII 1120 536f6e79 "Sony" descriptor leaf at 1124 ----------------------------------------------------------------- 1124 0005001d leaf_length 5, crc 29 1128 00000000 textual descriptor 1132 00000000 minimal ASCII 1136 44564d43 "DVMC" 1140 2d444131 "-DA1" 1144 00000000 Suggested-by: Adam Goldman Tested-by: Adam Goldman Link: https://lore.kernel.org/r/20240130100409.30128-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-device.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index e4cb5106fb7d..7d3346b3a2bf 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -367,8 +367,17 @@ static ssize_t show_text_leaf(struct device *dev, for (i = 0; i < ARRAY_SIZE(directories) && !!directories[i]; ++i) { int result = fw_csr_string(directories[i], attr->key, buf, bufsize); // Detected. - if (result >= 0) + if (result >= 0) { ret = result; + } else if (i == 0 && attr->key == CSR_VENDOR) { + // Sony DVMC-DA1 has configuration ROM such that the descriptor leaf entry + // in the root directory follows to the directory entry for vendor ID + // instead of the immediate value for vendor ID. + result = fw_csr_string(directories[i], CSR_DIRECTORY | attr->key, buf, + bufsize); + if (result >= 0) + ret = result; + } } if (ret >= 0) { From 04f647c8e456fcfabe9c252a4dcaee03b586fa4f Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 30 Jan 2024 17:36:10 +0530 Subject: [PATCH 326/347] octeontx2-pf: Remove xdp queues on program detach XDP queues are created/destroyed when a XDP program is attached/detached. In current driver xdp_queues are not getting destroyed on program exit due to incorrect xdp_queue and tot_tx_queue count values. This patch fixes the issue by setting tot_tx_queue and xdp_queue count to correct values. It also fixes xdp.data_hard_start address. Fixes: 06059a1a9a4a ("octeontx2-pf: Add XDP support to netdev PF") Signed-off-by: Geetha sowjanya Link: https://lore.kernel.org/r/20240130120610.16673-1-gakula@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 1 - drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 3 +-- drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 7 +++---- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 2928898c7f8d..7f786de61014 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -314,7 +314,6 @@ static int otx2_set_channels(struct net_device *dev, pfvf->hw.tx_queues = channel->tx_count; if (pfvf->xdp_prog) pfvf->hw.xdp_queues = channel->rx_count; - pfvf->hw.non_qos_queues = pfvf->hw.tx_queues + pfvf->hw.xdp_queues; if (if_up) err = dev->netdev_ops->ndo_open(dev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index a57455aebff6..e5fe67e73865 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1744,6 +1744,7 @@ int otx2_open(struct net_device *netdev) /* RQ and SQs are mapped to different CQs, * so find out max CQ IRQs (i.e CINTs) needed. */ + pf->hw.non_qos_queues = pf->hw.tx_queues + pf->hw.xdp_queues; pf->hw.cint_cnt = max3(pf->hw.rx_queues, pf->hw.tx_queues, pf->hw.tc_tx_queues); @@ -2643,8 +2644,6 @@ static int otx2_xdp_setup(struct otx2_nic *pf, struct bpf_prog *prog) xdp_features_clear_redirect_target(dev); } - pf->hw.non_qos_queues += pf->hw.xdp_queues; - if (if_up) otx2_open(pf->netdev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 4d519ea833b2..f828d32737af 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -1403,7 +1403,7 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, bool *need_xdp_flush) { - unsigned char *hard_start, *data; + unsigned char *hard_start; int qidx = cq->cq_idx; struct xdp_buff xdp; struct page *page; @@ -1417,9 +1417,8 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, xdp_init_buff(&xdp, pfvf->rbsize, &cq->xdp_rxq); - data = (unsigned char *)phys_to_virt(pa); - hard_start = page_address(page); - xdp_prepare_buff(&xdp, hard_start, data - hard_start, + hard_start = (unsigned char *)phys_to_virt(pa); + xdp_prepare_buff(&xdp, hard_start, OTX2_HEAD_ROOM, cqe->sg.seg_size, false); act = bpf_prog_run_xdp(prog, &xdp); From ae3f4b44641dfff969604735a0dcbf931f383285 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 31 Jan 2024 02:21:49 -0800 Subject: [PATCH 327/347] net: sysfs: Fix /sys/class/net/ path The documentation is pointing to the wrong path for the interface. Documentation is pointing to /sys/class/, instead of /sys/class/net/. Fix it by adding the `net/` directory before the interface. Fixes: 1a02ef76acfa ("net: sysfs: add documentation entries for /sys/class//queues") Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240131102150.728960-2-leitao@debian.org Signed-off-by: Jakub Kicinski --- .../ABI/testing/sysfs-class-net-queues | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues index 906ff3ca928a..5bff64d256c2 100644 --- a/Documentation/ABI/testing/sysfs-class-net-queues +++ b/Documentation/ABI/testing/sysfs-class-net-queues @@ -1,4 +1,4 @@ -What: /sys/class//queues/rx-/rps_cpus +What: /sys/class/net//queues/rx-/rps_cpus Date: March 2010 KernelVersion: 2.6.35 Contact: netdev@vger.kernel.org @@ -8,7 +8,7 @@ Description: network device queue. Possible values depend on the number of available CPU(s) in the system. -What: /sys/class//queues/rx-/rps_flow_cnt +What: /sys/class/net//queues/rx-/rps_flow_cnt Date: April 2010 KernelVersion: 2.6.35 Contact: netdev@vger.kernel.org @@ -16,7 +16,7 @@ Description: Number of Receive Packet Steering flows being currently processed by this particular network device receive queue. -What: /sys/class//queues/tx-/tx_timeout +What: /sys/class/net//queues/tx-/tx_timeout Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -24,7 +24,7 @@ Description: Indicates the number of transmit timeout events seen by this network interface transmit queue. -What: /sys/class//queues/tx-/tx_maxrate +What: /sys/class/net//queues/tx-/tx_maxrate Date: March 2015 KernelVersion: 4.1 Contact: netdev@vger.kernel.org @@ -32,7 +32,7 @@ Description: A Mbps max-rate set for the queue, a value of zero means disabled, default is disabled. -What: /sys/class//queues/tx-/xps_cpus +What: /sys/class/net//queues/tx-/xps_cpus Date: November 2010 KernelVersion: 2.6.38 Contact: netdev@vger.kernel.org @@ -42,7 +42,7 @@ Description: network device transmit queue. Possible values depend on the number of available CPU(s) in the system. -What: /sys/class//queues/tx-/xps_rxqs +What: /sys/class/net//queues/tx-/xps_rxqs Date: June 2018 KernelVersion: 4.18.0 Contact: netdev@vger.kernel.org @@ -53,7 +53,7 @@ Description: number of available receive queue(s) in the network device. Default is disabled. -What: /sys/class//queues/tx-/byte_queue_limits/hold_time +What: /sys/class/net//queues/tx-/byte_queue_limits/hold_time Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -62,7 +62,7 @@ Description: of this particular network device transmit queue. Default value is 1000. -What: /sys/class//queues/tx-/byte_queue_limits/inflight +What: /sys/class/net//queues/tx-/byte_queue_limits/inflight Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -70,7 +70,7 @@ Description: Indicates the number of bytes (objects) in flight on this network device transmit queue. -What: /sys/class//queues/tx-/byte_queue_limits/limit +What: /sys/class/net//queues/tx-/byte_queue_limits/limit Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -79,7 +79,7 @@ Description: on this network device transmit queue. This value is clamped to be within the bounds defined by limit_max and limit_min. -What: /sys/class//queues/tx-/byte_queue_limits/limit_max +What: /sys/class/net//queues/tx-/byte_queue_limits/limit_max Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -88,7 +88,7 @@ Description: queued on this network device transmit queue. See include/linux/dynamic_queue_limits.h for the default value. -What: /sys/class//queues/tx-/byte_queue_limits/limit_min +What: /sys/class/net//queues/tx-/byte_queue_limits/limit_min Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org From 7b55984c96ffe9e236eb9c82a2196e0b1f84990d Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 29 Jan 2024 14:03:08 +0100 Subject: [PATCH 328/347] xen-netback: properly sync TX responses Invoking the make_tx_response() / push_tx_responses() pair with no lock held would be acceptable only if all such invocations happened from the same context (NAPI instance or dealloc thread). Since this isn't the case, and since the interface "spec" also doesn't demand that multicast operations may only be performed with no in-flight transmits, MCAST_{ADD,DEL} processing also needs to acquire the response lock around the invocations. To prevent similar mistakes going forward, "downgrade" the present functions to private helpers of just the two remaining ones using them directly, with no forward declarations anymore. This involves renaming what so far was make_tx_response(), for the new function of that name to serve the new (wrapper) purpose. While there, - constify the txp parameters, - correct xenvif_idx_release()'s status parameter's type, - rename {,_}make_tx_response()'s status parameters for consistency with xenvif_idx_release()'s. Fixes: 210c34dcd8d9 ("xen-netback: add support for multicast control") Cc: stable@vger.kernel.org Signed-off-by: Jan Beulich Reviewed-by: Paul Durrant Link: https://lore.kernel.org/r/980c6c3d-e10e-4459-8565-e8fbde122f00@suse.com Signed-off-by: Jakub Kicinski --- drivers/net/xen-netback/netback.c | 100 ++++++++++++++---------------- 1 file changed, 48 insertions(+), 52 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index d7503aef599f..fab361a250d6 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -104,13 +104,12 @@ bool provides_xdp_headroom = true; module_param(provides_xdp_headroom, bool, 0644); static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status); + s8 status); static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st); -static void push_tx_responses(struct xenvif_queue *queue); + s8 status); static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); @@ -208,13 +207,9 @@ static void xenvif_tx_err(struct xenvif_queue *queue, unsigned int extra_count, RING_IDX end) { RING_IDX cons = queue->tx.req_cons; - unsigned long flags; do { - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); if (cons == end) break; RING_COPY_REQUEST(&queue->tx, cons++, txp); @@ -465,12 +460,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS; nr_slots--) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); ++txp; continue; } @@ -496,14 +486,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, - flags); continue; } @@ -995,7 +979,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, (ret == 0) ? XEN_NETIF_RSP_OKAY : XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); continue; } @@ -1007,7 +990,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, make_tx_response(queue, &txreq, extra_count, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); continue; } @@ -1433,44 +1415,17 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget) return work_done; } -static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status) -{ - struct pending_tx_info *pending_tx_info; - pending_ring_idx_t index; - unsigned long flags; - - pending_tx_info = &queue->pending_tx_info[pending_idx]; - - spin_lock_irqsave(&queue->response_lock, flags); - - make_tx_response(queue, &pending_tx_info->req, - pending_tx_info->extra_count, status); - - /* Release the pending index before pusing the Tx response so - * its available before a new Tx request is pushed by the - * frontend. - */ - index = pending_index(queue->pending_prod++); - queue->pending_ring[index] = pending_idx; - - push_tx_responses(queue); - - spin_unlock_irqrestore(&queue->response_lock, flags); -} - - -static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, +static void _make_tx_response(struct xenvif_queue *queue, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st) + s8 status) { RING_IDX i = queue->tx.rsp_prod_pvt; struct xen_netif_tx_response *resp; resp = RING_GET_RESPONSE(&queue->tx, i); resp->id = txp->id; - resp->status = st; + resp->status = status; while (extra_count-- != 0) RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; @@ -1487,6 +1442,47 @@ static void push_tx_responses(struct xenvif_queue *queue) notify_remote_via_irq(queue->tx_irq); } +static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, + s8 status) +{ + struct pending_tx_info *pending_tx_info; + pending_ring_idx_t index; + unsigned long flags; + + pending_tx_info = &queue->pending_tx_info[pending_idx]; + + spin_lock_irqsave(&queue->response_lock, flags); + + _make_tx_response(queue, &pending_tx_info->req, + pending_tx_info->extra_count, status); + + /* Release the pending index before pusing the Tx response so + * its available before a new Tx request is pushed by the + * frontend. + */ + index = pending_index(queue->pending_prod++); + queue->pending_ring[index] = pending_idx; + + push_tx_responses(queue); + + spin_unlock_irqrestore(&queue->response_lock, flags); +} + +static void make_tx_response(struct xenvif_queue *queue, + const struct xen_netif_tx_request *txp, + unsigned int extra_count, + s8 status) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->response_lock, flags); + + _make_tx_response(queue, txp, extra_count, status); + push_tx_responses(queue); + + spin_unlock_irqrestore(&queue->response_lock, flags); +} + static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) { int ret; From e0526ec5360a48ad3ab2e26e802b0532302a7e11 Mon Sep 17 00:00:00 2001 From: Souradeep Chakrabarti Date: Tue, 30 Jan 2024 23:35:51 -0800 Subject: [PATCH 329/347] hv_netvsc: Fix race condition between netvsc_probe and netvsc_remove In commit ac5047671758 ("hv_netvsc: Disable NAPI before closing the VMBus channel"), napi_disable was getting called for all channels, including all subchannels without confirming if they are enabled or not. This caused hv_netvsc getting hung at napi_disable, when netvsc_probe() has finished running but nvdev->subchan_work has not started yet. netvsc_subchan_work() -> rndis_set_subchannel() has not created the sub-channels and because of that netvsc_sc_open() is not running. netvsc_remove() calls cancel_work_sync(&nvdev->subchan_work), for which netvsc_subchan_work did not run. netif_napi_add() sets the bit NAPI_STATE_SCHED because it ensures NAPI cannot be scheduled. Then netvsc_sc_open() -> napi_enable will clear the NAPIF_STATE_SCHED bit, so it can be scheduled. napi_disable() does the opposite. Now during netvsc_device_remove(), when napi_disable is called for those subchannels, napi_disable gets stuck on infinite msleep. This fix addresses this problem by ensuring that napi_disable() is not getting called for non-enabled NAPI struct. But netif_napi_del() is still necessary for these non-enabled NAPI struct for cleanup purpose. Call trace: [ 654.559417] task:modprobe state:D stack: 0 pid: 2321 ppid: 1091 flags:0x00004002 [ 654.568030] Call Trace: [ 654.571221] [ 654.573790] __schedule+0x2d6/0x960 [ 654.577733] schedule+0x69/0xf0 [ 654.581214] schedule_timeout+0x87/0x140 [ 654.585463] ? __bpf_trace_tick_stop+0x20/0x20 [ 654.590291] msleep+0x2d/0x40 [ 654.593625] napi_disable+0x2b/0x80 [ 654.597437] netvsc_device_remove+0x8a/0x1f0 [hv_netvsc] [ 654.603935] rndis_filter_device_remove+0x194/0x1c0 [hv_netvsc] [ 654.611101] ? do_wait_intr+0xb0/0xb0 [ 654.615753] netvsc_remove+0x7c/0x120 [hv_netvsc] [ 654.621675] vmbus_remove+0x27/0x40 [hv_vmbus] Cc: stable@vger.kernel.org Fixes: ac5047671758 ("hv_netvsc: Disable NAPI before closing the VMBus channel") Signed-off-by: Souradeep Chakrabarti Reviewed-by: Dexuan Cui Reviewed-by: Haiyang Zhang Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1706686551-28510-1-git-send-email-schakrabarti@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 1dafa44155d0..a6fcbda64ecc 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -708,7 +708,10 @@ void netvsc_device_remove(struct hv_device *device) /* Disable NAPI and disassociate its context from the device. */ for (i = 0; i < net_device->num_chn; i++) { /* See also vmbus_reset_channel_cb(). */ - napi_disable(&net_device->chan_table[i].napi); + /* only disable enabled NAPI channel */ + if (i < ndev->real_num_rx_queues) + napi_disable(&net_device->chan_table[i].napi); + netif_napi_del(&net_device->chan_table[i].napi); } From 7b6fb3050d8f5e2b6858eef344e47ac1f5442827 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:44 -0500 Subject: [PATCH 330/347] selftests: team: Add missing config options Similar to commit dd2d40acdbb2 ("selftests: bonding: Add more missing config options"), add more networking-specific config options which are needed for team device tests. For testing, I used the minimal config generated by virtme-ng and I added the options in the config file. Afterwards, the team device test passed. Fixes: bbb774d921e2 ("net: Add tests for bonding and team address list management") Reviewed-by: Petr Machata Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-2-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/team/config | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index 265b6882cc21..b5e3a3aad4bf 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -1,3 +1,5 @@ +CONFIG_DUMMY=y +CONFIG_IPV6=y +CONFIG_MACVLAN=y CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y -CONFIG_MACVLAN=y From 8cc063ae1b3dbe416ce62a15d49af4c2314b45fe Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:45 -0500 Subject: [PATCH 331/347] selftests: bonding: Check initial state The purpose of the test_LAG_cleanup() function is to check that some hardware addresses are removed from underlying devices after they have been unenslaved. The test function simply checks that those addresses are not present at the end. However, if the addresses were never added to begin with due to some error in device setup, the test function currently passes. This is a false positive since in that situation the test did not actually exercise the intended functionality. Add a check that the expected addresses are indeed present after device setup. This makes the test function more robust. I noticed this problem when running the team/dev_addr_lists.sh test on a system without support for dummy and ipv6: tools/testing/selftests/drivers/net/team# ./dev_addr_lists.sh Error: Unknown device type. Error: Unknown device type. This program is not intended to be run as root. RTNETLINK answers: Operation not supported TEST: team cleanup mode lacp [ OK ] Fixes: bbb774d921e2 ("net: Add tests for bonding and team address list management") Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-3-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/bonding/lag_lib.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh index 2a268b17b61f..dbdd736a41d3 100644 --- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh +++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh @@ -48,6 +48,17 @@ test_LAG_cleanup() ip link add mv0 link "$name" up address "$ucaddr" type macvlan # Used to test dev->mc handling ip address add "$addr6" dev "$name" + + # Check that addresses were added as expected + (grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy1 || + grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy2) >/dev/null + check_err $? "macvlan unicast address not found on a slave" + + # mcaddr is added asynchronously by addrconf_dad_work(), use busywait + (busywait 10000 grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy1 || + grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy2) >/dev/null + check_err $? "IPv6 solicited-node multicast mac address not found on a slave" + ip link set dev "$name" down ip link del "$name" From 9d851dd4dab63e95c1911a2fa847796d1ec5d58d Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:46 -0500 Subject: [PATCH 332/347] selftests: net: Remove executable bits from library scripts setup_loopback.sh and net_helper.sh are meant to be sourced from other scripts, not executed directly. Therefore, remove the executable bits from those files' permissions. This change is similar to commit 49078c1b80b6 ("selftests: forwarding: Remove executable bits from lib.sh") Fixes: 7d1575014a63 ("selftests/net: GRO coalesce test") Fixes: 3bdd9fd29cb0 ("selftests/net: synchronize udpgro tests' tx and rx connection") Suggested-by: Paolo Abeni Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-4-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/net_helper.sh | 0 tools/testing/selftests/net/setup_loopback.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tools/testing/selftests/net/net_helper.sh mode change 100755 => 100644 tools/testing/selftests/net/setup_loopback.sh diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh old mode 100755 new mode 100644 diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh old mode 100755 new mode 100644 From 06efafd8608dac0c3a480539acc66ee41d2fb430 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:47 -0500 Subject: [PATCH 333/347] selftests: net: List helper scripts in TEST_FILES Makefile variable Some scripts are not tests themselves; they contain utility functions used by other tests. According to Documentation/dev-tools/kselftest.rst, such files should be listed in TEST_FILES. Move those utility scripts to TEST_FILES. Fixes: 1751eb42ddb5 ("selftests: net: use TEST_PROGS_EXTENDED") Fixes: 25ae948b4478 ("selftests/net: add lib.sh") Fixes: b99ac1841147 ("kselftests/net: add missed setup_loopback.sh/setup_veth.sh to Makefile") Fixes: f5173fe3e13b ("selftests: net: included needed helper in the install targets") Suggested-by: Petr Machata Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-5-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 48c6f93b8149..211753756bde 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -53,9 +53,7 @@ TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh -TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh -TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh lib.sh -TEST_PROGS_EXTENDED += net_helper.sh +TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite @@ -97,6 +95,7 @@ TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh TEST_FILES := settings +TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh include ../lib.mk From 96cd5ac4c0e6b91b74c8fbfcaa7e5c943dfa4835 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:48 -0500 Subject: [PATCH 334/347] selftests: forwarding: List helper scripts in TEST_FILES Makefile variable Some scripts are not tests themselves; they contain utility functions used by other tests. According to Documentation/dev-tools/kselftest.rst, such files should be listed in TEST_FILES. Currently they are incorrectly listed in TEST_PROGS_EXTENDED so rename the variable. Fixes: c085dbfb1cfc ("selftests/net/forwarding: define libs as TEST_PROGS_EXTENDED") Suggested-by: Petr Machata Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-6-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 452693514be4..4de92632f483 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -112,7 +112,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_symmetric_ipv6.sh \ vxlan_symmetric.sh -TEST_PROGS_EXTENDED := devlink_lib.sh \ +TEST_FILES := devlink_lib.sh \ ethtool_lib.sh \ fib_offload_lib.sh \ forwarding.config.sample \ From 1939f738c73dfdb8389839bdc9624c765e3326e6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 31 Jan 2024 08:56:05 -0800 Subject: [PATCH 335/347] selftests: net: add missing config for NF_TARGET_TTL amt test uses the TTL iptables module: ip netns exec "${RELAY}" iptables -t mangle -I PREROUTING \ -d 239.0.0.1 -j TTL --ttl-set 2 Fixes: c08e8baea78e ("selftests: add amt interface selftest script") Link: https://lore.kernel.org/r/20240131165605.4051645-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 98c6bd2228c6..24a7c7bcbbc1 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -33,6 +33,7 @@ CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_RAW=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_TARGET_TTL=m CONFIG_IPV6_GRE=m CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_L2TP_ETH=m From c15a729c9d45aa142fb01a3afee822ab1f0e62a8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 Jan 2024 18:52:29 +0100 Subject: [PATCH 336/347] selftests: net: enable some more knobs The rtnetlink tests require additional options currently off by default. Fixes: 2766a11161cc ("selftests: rtnetlink: add ipsec offload API test") Fixes: 5e596ee171ba ("selftests: add xfrm state-policy-monitor to rtnetlink.sh") Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/9048ca58e49b962f35dba1dfb2beaf3dab3e0411.1706723341.git.pabeni@redhat.com/ Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 24a7c7bcbbc1..3b749addd364 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -22,6 +22,8 @@ CONFIG_VLAN_8021Q=y CONFIG_GENEVE=m CONFIG_IFB=y CONFIG_INET_DIAG=y +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y CONFIG_IP_GRE=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y @@ -93,3 +95,4 @@ CONFIG_IP_SCTP=m CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_CRYPTO_ARIA=y CONFIG_XFRM_INTERFACE=m +CONFIG_XFRM_USER=m From b6c620dc43ccb4e802894e54b651cf81495e9598 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 Jan 2024 22:49:46 +0100 Subject: [PATCH 337/347] mptcp: fix data re-injection from stale subflow When the MPTCP PM detects that a subflow is stale, all the packet scheduler must re-inject all the mptcp-level unacked data. To avoid acquiring unneeded locks, it first try to check if any unacked data is present at all in the RTX queue, but such check is currently broken, as it uses TCP-specific helper on an MPTCP socket. Funnily enough fuzzers and static checkers are happy, as the accessed memory still belongs to the mptcp_sock struct, and even from a functional perspective the recovery completed successfully, as the short-cut test always failed. A recent unrelated TCP change - commit d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") - exposed the issue, as the tcp field reorganization makes the mptcp code always skip the re-inection. Fix the issue dropping the bogus call: we are on a slow path, the early optimization proved once again to be evil. Fixes: 1e1d9d6f119c ("mptcp: handle pending data on closed subflow") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/468 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-1-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3ed4709a7509..028e8b473626 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2314,9 +2314,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) if (__mptcp_check_fallback(msk)) return false; - if (tcp_rtx_and_write_queues_empty(sk)) - return false; - /* the closing socket has some data untransmitted and/or unacked: * some data in the mptcp rtx queue has not really xmitted yet. * keep it simple and re-inject the whole mptcp level rtx queue From 3645c844902bd4e173d6704fc2a37e8746904d67 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:47 +0100 Subject: [PATCH 338/347] selftests: mptcp: add missing kconfig for NF Filter Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Filter table. It is then required to have IP_NF_FILTER KConfig. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: 8d014eaa9254 ("selftests: mptcp: add ADD_ADDR timeout test case") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index e317c2e44dae..2a00bf4acdfa 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -22,6 +22,7 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SOCKET=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_ACT_CSUM=m From 8c86fad2cecdc6bf7283ecd298b4d0555bd8b8aa Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:48 +0100 Subject: [PATCH 339/347] selftests: mptcp: add missing kconfig for NF Filter in v6 Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Filter table for IPv6. It is then required to have IP6_NF_FILTER KConfig. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: 523514ed0a99 ("selftests: mptcp: add ADD_ADDR IPv6 test cases") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-3-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 2a00bf4acdfa..26fe466f803d 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -25,6 +25,7 @@ CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IP6_NF_FILTER=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_ACT=y From 2d41f10fa497182df9012d3e95d9cea24eb42e61 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:49 +0100 Subject: [PATCH 340/347] selftests: mptcp: add missing kconfig for NF Mangle Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Mangle table, only in IPv4. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: b6e074e171bc ("selftests: mptcp: add infinite map testcase") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-4-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 26fe466f803d..4f80014cae49 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -23,6 +23,7 @@ CONFIG_NFT_SOCKET=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IP6_NF_FILTER=m From 4d4dfb2019d7010efb65926d9d1c1793f9a367c6 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:50 +0100 Subject: [PATCH 341/347] selftests: mptcp: increase timeout to 30 min On very slow environments -- e.g. when QEmu is used without KVM --, mptcp_join.sh selftest can take a bit more than 20 minutes. Bump the default timeout by 50% as it seems normal to take that long on some environments. When a debug kernel config is used, this selftest will take even longer, but that's certainly not a common test env to consider for the timeout. The Fixes tag that has been picked here is there simply to help having this patch backported to older stable versions. It is difficult to point to the exact commit that made some env reaching the timeout from time to time. Fixes: d17b968b9876 ("selftests: mptcp: increase timeout to 20 minutes") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-5-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/settings | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings index 79b65bdf05db..abc5648b59ab 100644 --- a/tools/testing/selftests/net/mptcp/settings +++ b/tools/testing/selftests/net/mptcp/settings @@ -1 +1 @@ -timeout=1200 +timeout=1800 From 5e2f3c65af47e527ccac54060cf909e3306652ff Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:51 +0100 Subject: [PATCH 342/347] selftests: mptcp: decrease BW in simult flows When running the simult_flow selftest in slow environments -- e.g. QEmu without KVM support --, the results can be unstable. This selftest checks if the aggregated bandwidth is (almost) fully used as expected. To help improving the stability while still keeping the same validation in place, the BW and the delay are reduced to lower the pressure on the CPU. Fixes: 1a418cb8e888 ("mptcp: simult flow self-tests") Fixes: 219d04992b68 ("mptcp: push pending frames when subflow has free space") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-6-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/simult_flows.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index ae8ad5d6fb9d..0cc964e6f2c1 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -284,12 +284,12 @@ done setup run_test 10 10 0 0 "balanced bwidth" -run_test 10 10 1 50 "balanced bwidth with unbalanced delay" +run_test 10 10 1 25 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -run_test 30 10 0 0 "unbalanced bwidth" -run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" -run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +run_test 10 3 0 0 "unbalanced bwidth" +run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" +run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" mptcp_lib_result_print_all_tap exit $ret From de46d138e7735eded9756906747fd3a8c3a42225 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:52 +0100 Subject: [PATCH 343/347] selftests: mptcp: allow changing subtests prefix If a CI executes the same selftest multiple times with different options, all results from the same subtests will have the same title, which confuse the CI. With the same title printed in TAP, the tests are considered as the same ones. Now, it is possible to override this prefix by using MPTCP_LIB_KSFT_TEST env var, and have a different title. While at it, use 'basename' to remove the suffix as well instead of using an extra 'sed'. Fixes: c4192967e62f ("selftests: mptcp: lib: format subtests results in TAP") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-7-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 022262a2cfe0..3a2abae5993e 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -6,7 +6,7 @@ readonly KSFT_FAIL=1 readonly KSFT_SKIP=4 # shellcheck disable=SC2155 # declare and assign separately -readonly KSFT_TEST=$(basename "${0}" | sed 's/\.sh$//g') +readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" MPTCP_LIB_SUBTESTS=() From 31ee4ad86afd6ed6f4bb1b38c43011216080c42a Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:53 +0100 Subject: [PATCH 344/347] selftests: mptcp: join: stop transfer when check is done (part 1) Since the "Fixes" commit mentioned below, "userspace pm" subtests of mptcp_join selftests introduced in v6.5 are launching the whole transfer in the background, do the required checks, then wait for the end of transfer. There is no need to wait longer, especially because the checks at the end of the transfer are ignored (which is fine). This saves quite a few seconds in slow environments. Note that old versions will need commit bdbef0a6ff10 ("selftests: mptcp: add mptcp_lib_kill_wait") as well to get 'mptcp_lib_kill_wait()' helper. Fixes: 4369c198e599 ("selftests: mptcp: test userspace pm out of transfer") Cc: stable@vger.kernel.org # 6.5.x: bdbef0a6ff10: selftests: mptcp: add mptcp_lib_kill_wait Cc: stable@vger.kernel.org # 6.5.x Reviewed-and-tested-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-8-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 3a5b63026191..85bcc95f4ede 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3453,7 +3453,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm create destroy subflow @@ -3475,7 +3475,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm create id 0 subflow From 04b57c9e096a9479fe0ad31e3956e336fa589cb2 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:54 +0100 Subject: [PATCH 345/347] selftests: mptcp: join: stop transfer when check is done (part 2) Since the "Fixes" commits mentioned below, the newly added "userspace pm" subtests of mptcp_join selftests are launching the whole transfer in the background, do the required checks, then wait for the end of transfer. There is no need to wait longer, especially because the checks at the end of the transfer are ignored (which is fine). This saves quite a few seconds on slow environments. While at it, use 'mptcp_lib_kill_wait()' helper everywhere, instead of on a specific one with 'kill_tests_wait()'. Fixes: b2e2248f365a ("selftests: mptcp: userspace pm create id 0 subflow") Fixes: e3b47e460b4b ("selftests: mptcp: userspace pm remove initial subflow") Fixes: b9fb176081fb ("selftests: mptcp: userspace pm send RM_ADDR for ID 0") Cc: stable@vger.kernel.org Reviewed-and-tested-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-9-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 85bcc95f4ede..c07386e21e0a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -643,13 +643,6 @@ kill_events_pids() mptcp_lib_kill_wait $evts_ns2_pid } -kill_tests_wait() -{ - #shellcheck disable=SC2046 - kill -SIGUSR1 $(ip netns pids $ns2) $(ip netns pids $ns1) - wait -} - pm_nl_set_limits() { local ns=$1 @@ -3494,7 +3487,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm remove initial subflow @@ -3518,7 +3511,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm send RM_ADDR for ID 0 @@ -3544,7 +3537,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi } @@ -3558,7 +3551,8 @@ endpoint_tests() pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal speed=slow \ - run_tests $ns1 $ns2 10.0.1.1 2>/dev/null & + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! wait_mpj $ns1 pm_nl_check_endpoint "creation" \ @@ -3573,7 +3567,7 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - kill_tests_wait + mptcp_lib_kill_wait $tests_pid fi if reset "delete and re-add" && @@ -3582,7 +3576,8 @@ endpoint_tests() pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow test_linkfail=4 speed=20 \ - run_tests $ns1 $ns2 10.0.1.1 2>/dev/null & + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! wait_mpj $ns2 chk_subflow_nr "before delete" 2 @@ -3597,7 +3592,7 @@ endpoint_tests() wait_mpj $ns2 chk_subflow_nr "after re-add" 2 chk_mptcp_info subflows 1 subflows 1 - kill_tests_wait + mptcp_lib_kill_wait $tests_pid fi } From f0588b157f48b9c6277a75c9f14650e86d969e03 Mon Sep 17 00:00:00 2001 From: Pavan Kumar Linga Date: Wed, 31 Jan 2024 14:22:40 -0800 Subject: [PATCH 346/347] idpf: avoid compiler padding in virtchnl2_ptype struct In the arm random config file, kconfig option 'CONFIG_AEABI' is disabled which results in adding the compiler flag '-mabi=apcs-gnu'. This causes the compiler to add padding in virtchnl2_ptype structure to align it to 8 bytes, resulting in the following size check failure: include/linux/build_bug.h:78:41: error: static assertion failed: "(6) == sizeof(struct virtchnl2_ptype)" 78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) | ^~~~~~~~~~~~~~ include/linux/build_bug.h:77:34: note: in expansion of macro '__static_assert' 77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) | ^~~~~~~~~~~~~~~ drivers/net/ethernet/intel/idpf/virtchnl2.h:26:9: note: in expansion of macro 'static_assert' 26 | static_assert((n) == sizeof(struct X)) | ^~~~~~~~~~~~~ drivers/net/ethernet/intel/idpf/virtchnl2.h:982:1: note: in expansion of macro 'VIRTCHNL2_CHECK_STRUCT_LEN' 982 | VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ Avoid the compiler padding by using "__packed" structure attribute for the virtchnl2_ptype struct. Also align the structure by using "__aligned(2)" for better code optimization. Fixes: 0d7502a9b4a7 ("virtchnl: add virtchnl version 2 ops") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312220250.ufEm8doQ-lkp@intel.com Reviewed-by: Przemek Kitszel Reviewed-by: Paul Menzel Reviewed-by: Simon Horman Signed-off-by: Pavan Kumar Linga Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240131222241.2087516-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/virtchnl2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h index 8dc837889723..4a3c4454d25a 100644 --- a/drivers/net/ethernet/intel/idpf/virtchnl2.h +++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h @@ -978,7 +978,7 @@ struct virtchnl2_ptype { u8 proto_id_count; __le16 pad; __le16 proto_id[]; -}; +} __packed __aligned(2); VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype); /** From 069a6ed2992df8eaae90d69d7770de8d545327d9 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Thu, 1 Feb 2024 11:38:53 +0000 Subject: [PATCH 347/347] doc/netlink/specs: Add missing attr in rt_link spec IFLA_DPLL_PIN was added to rt_link messages but not to the spec, which breaks ynl. Add the missing definitions to the rt_link ynl spec. Fixes: 5f1842692880 ("netdev: expose DPLL pin handle for netdevice") Signed-off-by: Donald Hunter Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240201113853.37432-1-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_link.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 1ad01d52a863..8e4d19adee8c 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -942,6 +942,10 @@ attribute-sets: - name: gro-ipv4-max-size type: u32 + - + name: dpll-pin + type: nest + nested-attributes: link-dpll-pin-attrs - name: af-spec-attrs attributes: @@ -1627,6 +1631,12 @@ attribute-sets: - name: used type: u8 + - + name: link-dpll-pin-attrs + attributes: + - + name: id + type: u32 sub-messages: -