From 8e98b87f515d8c4bae521048a037b2cc431c3fd5 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 17 Jan 2024 14:10:49 +0100 Subject: iio: imu: adis: ensure proper DMA alignment Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for the sigma_delta ADCs. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: ccd2b52f4ac6 ("staging:iio: Add common ADIS library") Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20240117-adis-improv-v1-1-7f90e9fad200@analog.com Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index dc9ea299e088..8898966bc0f0 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -11,6 +11,7 @@ #include #include +#include #include #define ADIS_WRITE_REG(reg) ((0x80 | (reg))) @@ -131,7 +132,7 @@ struct adis { unsigned long irq_flag; void *buffer; - u8 tx[10] ____cacheline_aligned; + u8 tx[10] __aligned(IIO_DMA_MINALIGN); u8 rx[4]; }; -- cgit From 59598510be1d49e1cff7fd7593293bb8e1b2398b Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 17 Jan 2024 13:41:03 +0100 Subject: iio: adc: ad_sigma_delta: ensure proper DMA alignment Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for the sigma_delta ADCs. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: 0fb6ee8d0b5e ("iio: ad_sigma_delta: Don't put SPI transfer buffer on the stack") Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20240117-dev_sigma_delta_no_irq_flags-v1-1-db39261592cf@analog.com Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/adc/ad_sigma_delta.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 7852f6c9a714..719cf9cc6e1a 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -8,6 +8,8 @@ #ifndef __AD_SIGMA_DELTA_H__ #define __AD_SIGMA_DELTA_H__ +#include + enum ad_sigma_delta_mode { AD_SD_MODE_CONTINUOUS = 0, AD_SD_MODE_SINGLE = 1, @@ -99,7 +101,7 @@ struct ad_sigma_delta { * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, * rounded to 16 bytes to take into account padding. */ - uint8_t tx_buf[4] ____cacheline_aligned; + uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN); uint8_t rx_buf[16] __aligned(8); }; -- cgit From 4d5e86a56615cc387d21c629f9af8fb0e958d350 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 28 Jan 2024 11:29:11 +0200 Subject: RDMA/mlx5: Fix fortify source warning while accessing Eth segment ------------[ cut here ]------------ memcpy: detected field-spanning write (size 56) of single field "eseg->inline_hdr.start" at /var/lib/dkms/mlnx-ofed-kernel/5.8/build/drivers/infiniband/hw/mlx5/wr.c:131 (size 2) WARNING: CPU: 0 PID: 293779 at /var/lib/dkms/mlnx-ofed-kernel/5.8/build/drivers/infiniband/hw/mlx5/wr.c:131 mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] Modules linked in: 8021q garp mrp stp llc rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) ib_uverbs(OE) ib_core(OE) mlx5_core(OE) pci_hyperv_intf mlxdevm(OE) mlx_compat(OE) tls mlxfw(OE) psample nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables libcrc32c nfnetlink mst_pciconf(OE) knem(OE) vfio_pci vfio_pci_core vfio_iommu_type1 vfio iommufd irqbypass cuse nfsv3 nfs fscache netfs xfrm_user xfrm_algo ipmi_devintf ipmi_msghandler binfmt_misc crct10dif_pclmul crc32_pclmul polyval_clmulni polyval_generic ghash_clmulni_intel sha512_ssse3 snd_pcsp aesni_intel crypto_simd cryptd snd_pcm snd_timer joydev snd soundcore input_leds serio_raw evbug nfsd auth_rpcgss nfs_acl lockd grace sch_fq_codel sunrpc drm efi_pstore ip_tables x_tables autofs4 psmouse virtio_net net_failover failover floppy [last unloaded: mlx_compat(OE)] CPU: 0 PID: 293779 Comm: ssh Tainted: G OE 6.2.0-32-generic #32~22.04.1-Ubuntu Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] Code: 0c 01 00 a8 01 75 25 48 8b 75 a0 b9 02 00 00 00 48 c7 c2 10 5b fd c0 48 c7 c7 80 5b fd c0 c6 05 57 0c 03 00 01 e8 95 4d 93 da <0f> 0b 44 8b 4d b0 4c 8b 45 c8 48 8b 4d c0 e9 49 fb ff ff 41 0f b7 RSP: 0018:ffffb5b48478b570 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffb5b48478b628 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffb5b48478b5e8 R13: ffff963a3c609b5e R14: ffff9639c3fbd800 R15: ffffb5b480475a80 FS: 00007fc03b444c80(0000) GS:ffff963a3dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000556f46bdf000 CR3: 0000000006ac6003 CR4: 00000000003706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? show_regs+0x72/0x90 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] ? __warn+0x8d/0x160 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] ? report_bug+0x1bb/0x1d0 ? handle_bug+0x46/0x90 ? exc_invalid_op+0x19/0x80 ? asm_exc_invalid_op+0x1b/0x20 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] mlx5_ib_post_send_nodrain+0xb/0x20 [mlx5_ib] ipoib_send+0x2ec/0x770 [ib_ipoib] ipoib_start_xmit+0x5a0/0x770 [ib_ipoib] dev_hard_start_xmit+0x8e/0x1e0 ? validate_xmit_skb_list+0x4d/0x80 sch_direct_xmit+0x116/0x3a0 __dev_xmit_skb+0x1fd/0x580 __dev_queue_xmit+0x284/0x6b0 ? _raw_spin_unlock_irq+0xe/0x50 ? __flush_work.isra.0+0x20d/0x370 ? push_pseudo_header+0x17/0x40 [ib_ipoib] neigh_connected_output+0xcd/0x110 ip_finish_output2+0x179/0x480 ? __smp_call_single_queue+0x61/0xa0 __ip_finish_output+0xc3/0x190 ip_finish_output+0x2e/0xf0 ip_output+0x78/0x110 ? __pfx_ip_finish_output+0x10/0x10 ip_local_out+0x64/0x70 __ip_queue_xmit+0x18a/0x460 ip_queue_xmit+0x15/0x30 __tcp_transmit_skb+0x914/0x9c0 tcp_write_xmit+0x334/0x8d0 tcp_push_one+0x3c/0x60 tcp_sendmsg_locked+0x2e1/0xac0 tcp_sendmsg+0x2d/0x50 inet_sendmsg+0x43/0x90 sock_sendmsg+0x68/0x80 sock_write_iter+0x93/0x100 vfs_write+0x326/0x3c0 ksys_write+0xbd/0xf0 ? do_syscall_64+0x69/0x90 __x64_sys_write+0x19/0x30 do_syscall_64+0x59/0x90 ? do_user_addr_fault+0x1d0/0x640 ? exit_to_user_mode_prepare+0x3b/0xd0 ? irqentry_exit_to_user_mode+0x9/0x20 ? irqentry_exit+0x43/0x50 ? exc_page_fault+0x92/0x1b0 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7fc03ad14a37 Code: 10 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 RSP: 002b:00007ffdf8697fe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000008024 RCX: 00007fc03ad14a37 RDX: 0000000000008024 RSI: 0000556f46bd8270 RDI: 0000000000000003 RBP: 0000556f46bb1800 R08: 0000000000007fe3 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002 R13: 0000556f46bc66b0 R14: 000000000000000a R15: 0000556f46bb2f50 ---[ end trace 0000000000000000 ]--- Link: https://lore.kernel.org/r/8228ad34bd1a25047586270f7b1fb4ddcd046282.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/wr.c | 2 +- include/linux/mlx5/qp.h | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index df1d1b0a3ef7..9947feb7fb8a 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -78,7 +78,7 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, */ copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, left); - memcpy(eseg->inline_hdr.start, pdata, copysz); + memcpy(eseg->inline_hdr.data, pdata, copysz); stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - sizeof(eseg->inline_hdr.start) + copysz, 16); *size += stride / 16; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index bd53cf4be7bd..f0e55bf3ec8b 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -269,7 +269,10 @@ struct mlx5_wqe_eth_seg { union { struct { __be16 sz; - u8 start[2]; + union { + u8 start[2]; + DECLARE_FLEX_ARRAY(u8, data); + }; } inline_hdr; struct { __be16 type; -- cgit From 43fdbd140238d44e7e847232719fef7d20f9d326 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 28 Jan 2024 11:29:12 +0200 Subject: IB/mlx5: Don't expose debugfs entries for RRoCE general parameters if not supported debugfs entries for RRoCE general CC parameters must be exposed only when they are supported, otherwise when accessing them there may be a syndrome error in kernel log, for example: $ cat /sys/kernel/debug/mlx5/0000:08:00.1/cc_params/rtt_resp_dscp cat: '/sys/kernel/debug/mlx5/0000:08:00.1/cc_params/rtt_resp_dscp': Invalid argument $ dmesg mlx5_core 0000:08:00.1: mlx5_cmd_out_err:805:(pid 1253): QUERY_CONG_PARAMS(0x824) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x325a82), err(-22) Fixes: 66fb1d5df6ac ("IB/mlx5: Extend debug control for CC parameters") Reviewed-by: Edward Srouji Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/e7ade70bad52b7468bdb1de4d41d5fad70c8b71c.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/cong.c | 6 ++++++ include/linux/mlx5/mlx5_ifc.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index f87531318feb..a78a067e3ce7 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -458,6 +458,12 @@ void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num) dbg_cc_params->root = debugfs_create_dir("cc_params", mlx5_debugfs_get_dev_root(mdev)); for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) { + if ((i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID || + i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP)) + if (!MLX5_CAP_GEN(mdev, roce) || + !MLX5_CAP_ROCE(mdev, roce_cc_general)) + continue; + dbg_cc_params->params[i].offset = i; dbg_cc_params->params[i].dev = dev; dbg_cc_params->params[i].port_num = port_num; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index bf5320b28b8b..2c10350bd422 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1103,7 +1103,7 @@ struct mlx5_ifc_roce_cap_bits { u8 sw_r_roce_src_udp_port[0x1]; u8 fl_rc_qp_when_roce_disabled[0x1]; u8 fl_rc_qp_when_roce_enabled[0x1]; - u8 reserved_at_7[0x1]; + u8 roce_cc_general[0x1]; u8 qp_ooo_transmit_default[0x1]; u8 reserved_at_9[0x15]; u8 qp_ts_format[0x2]; -- cgit From 54ce1927eb787f7bbb7ee664841c8f5932703f39 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 31 Jan 2024 15:55:38 -0800 Subject: cxl/cper: Fix errant CPER prints for CXL events Jonathan reports that CXL CPER events dump an extra generic error message. {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1 {1}[Hardware Error]: event severity: recoverable {1}[Hardware Error]: Error 0, type: recoverable {1}[Hardware Error]: section type: unknown, fbcd0a77-c260-417f-85a9-088b1621eba6 {1}[Hardware Error]: section length: 0x90 {1}[Hardware Error]: 00000000: 00000090 00000007 00000000 0d938086 ................ {1}[Hardware Error]: 00000010: 00100000 00000000 00040000 00000000 ................ ... CXL events were rerouted though the CXL subsystem for additional processing. However, when that work was done it was missed that cper_estatus_print_section() continued with a generic error message which is confusing. Teach CPER print code to ignore printing details of some section types. Assign the CXL event GUIDs to this set to prevent confusing unknown prints. Reported-by: Jonathan Cameron Suggested-by: Jonathan Cameron Signed-off-by: Ira Weiny Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Signed-off-by: Ard Biesheuvel --- drivers/acpi/apei/ghes.c | 26 -------------------------- drivers/firmware/efi/cper.c | 19 +++++++++++++++++++ include/linux/cper.h | 23 +++++++++++++++++++++++ 3 files changed, 42 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 7b7c605166e0..fe825a432c5b 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -680,32 +680,6 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, static DECLARE_RWSEM(cxl_cper_rw_sem); static cxl_cper_callback cper_callback; -/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ - -/* - * General Media Event Record - * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 - */ -#define CPER_SEC_CXL_GEN_MEDIA_GUID \ - GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \ - 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6) - -/* - * DRAM Event Record - * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 - */ -#define CPER_SEC_CXL_DRAM_GUID \ - GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \ - 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24) - -/* - * Memory Module Event Record - * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 - */ -#define CPER_SEC_CXL_MEM_MODULE_GUID \ - GUID_INIT(0xfe927475, 0xdd59, 0x4339, \ - 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74) - static void cxl_cper_post_event(enum cxl_event_type event_type, struct cxl_cper_event_rec *rec) { diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 35c37f667781..9b3884ff81e6 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -523,6 +523,17 @@ static void cper_print_tstamp(const char *pfx, } } +struct ignore_section { + guid_t guid; + const char *name; +}; + +static const struct ignore_section ignore_sections[] = { + { .guid = CPER_SEC_CXL_GEN_MEDIA_GUID, .name = "CXL General Media Event" }, + { .guid = CPER_SEC_CXL_DRAM_GUID, .name = "CXL DRAM Event" }, + { .guid = CPER_SEC_CXL_MEM_MODULE_GUID, .name = "CXL Memory Module Event" }, +}; + static void cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata, int sec_no) @@ -543,6 +554,14 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text); snprintf(newpfx, sizeof(newpfx), "%s ", pfx); + + for (int i = 0; i < ARRAY_SIZE(ignore_sections); i++) { + if (guid_equal(sec_type, &ignore_sections[i].guid)) { + printk("%ssection_type: %s\n", newpfx, ignore_sections[i].name); + return; + } + } + if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) { struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata); diff --git a/include/linux/cper.h b/include/linux/cper.h index c1a7dc325121..265b0f8fc0b3 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -90,6 +90,29 @@ enum { GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ 0x72, 0x2D, 0xEB, 0x41) +/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CPER_SEC_CXL_GEN_MEDIA_GUID \ + GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \ + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6) +/* + * DRAM Event Record + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +#define CPER_SEC_CXL_DRAM_GUID \ + GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \ + 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24) +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +#define CPER_SEC_CXL_MEM_MODULE_GUID \ + GUID_INIT(0xfe927475, 0xdd59, 0x4339, \ + 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74) + /* * Flags bits definitions for flags in struct cper_record_header * If set, the error has been recovered -- cgit From 862cf85fef85becc55a173387527adb4f076fab0 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 31 Jan 2024 10:16:47 +0100 Subject: iio: commom: st_sensors: ensure proper DMA alignment Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for st_sensors common buffer. While at it, moved the odr_lock before buffer_data as we definitely don't want any other data to share a cacheline with the buffer. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: e031d5f558f1 ("iio:st_sensors: remove buffer allocation at each buffer enable") Signed-off-by: Nuno Sa Cc: Link: https://lore.kernel.org/r/20240131-dev_dma_safety_stm-v2-1-580c07fae51b@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/common/st_sensors.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 607c3a89a647..f9ae5cdd884f 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -258,9 +258,9 @@ struct st_sensor_data { bool hw_irq_trigger; s64 hw_timestamp; - char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; - struct mutex odr_lock; + + char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN); }; #ifdef CONFIG_IIO_BUFFER -- cgit From dad6a09f3148257ac1773cd90934d721d68ab595 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 29 Jan 2024 15:56:36 -0800 Subject: hrtimer: Report offline hrtimer enqueue The hrtimers migration on CPU-down hotplug process has been moved earlier, before the CPU actually goes to die. This leaves a small window of opportunity to queue an hrtimer in a blind spot, leaving it ignored. For example a practical case has been reported with RCU waking up a SCHED_FIFO task right before the CPUHP_AP_IDLE_DEAD stage, queuing that way a sched/rt timer to the local offline CPU. Make sure such situations never go unnoticed and warn when that happens. Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier") Reported-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240129235646.3171983-4-boqun.feng@gmail.com --- include/linux/hrtimer.h | 4 +++- kernel/time/hrtimer.c | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 87e3bedf8eb0..641c4567cfa7 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -157,6 +157,7 @@ enum hrtimer_base_type { * @max_hang_time: Maximum time spent in hrtimer_interrupt * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are * expired + * @online: CPU is online from an hrtimers point of view * @timer_waiters: A hrtimer_cancel() invocation waits for the timer * callback to finish. * @expires_next: absolute time of the next event, is required for remote @@ -179,7 +180,8 @@ struct hrtimer_cpu_base { unsigned int hres_active : 1, in_hrtirq : 1, hang_detected : 1, - softirq_activated : 1; + softirq_activated : 1, + online : 1; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 760793998cdd..edb0f821dcea 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1085,6 +1085,7 @@ static int enqueue_hrtimer(struct hrtimer *timer, enum hrtimer_mode mode) { debug_activate(timer, mode); + WARN_ON_ONCE(!base->cpu_base->online); base->cpu_base->active_bases |= 1 << base->index; @@ -2183,6 +2184,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->softirq_next_timer = NULL; cpu_base->expires_next = KTIME_MAX; cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->online = 1; hrtimer_cpu_base_init_expiry_lock(cpu_base); return 0; } @@ -2250,6 +2252,7 @@ int hrtimers_cpu_dying(unsigned int dying_cpu) smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); raw_spin_unlock(&new_base->lock); + old_base->online = 0; raw_spin_unlock(&old_base->lock); return 0; -- cgit From 3ee07964d407411fd578a3bc998de44fd64d266a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 1 Feb 2024 11:55:56 +0100 Subject: serial: core: introduce uart_port_tx_flags() And an enum with a flag: UART_TX_NOSTOP. To NOT call __port->ops->stop_tx() when the circular buffer is empty. mxs-uart needs this (see the next patch). Signed-off-by: "Jiri Slaby (SUSE)" Cc: stable Tested-by: Emil Kronborg Link: https://lore.kernel.org/r/20240201105557.28043-1-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 536b2581d3e2..55b1f3ba48ac 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -748,8 +748,17 @@ struct uart_driver { void uart_write_wakeup(struct uart_port *port); -#define __uart_port_tx(uport, ch, tx_ready, put_char, tx_done, for_test, \ - for_post) \ +/** + * enum UART_TX_FLAGS -- flags for uart_port_tx_flags() + * + * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer + */ +enum UART_TX_FLAGS { + UART_TX_NOSTOP = BIT(0), +}; + +#define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done, \ + for_test, for_post) \ ({ \ struct uart_port *__port = (uport); \ struct circ_buf *xmit = &__port->state->xmit; \ @@ -777,7 +786,7 @@ void uart_write_wakeup(struct uart_port *port); if (pending < WAKEUP_CHARS) { \ uart_write_wakeup(__port); \ \ - if (pending == 0) \ + if (!((flags) & UART_TX_NOSTOP) && pending == 0) \ __port->ops->stop_tx(__port); \ } \ \ @@ -812,7 +821,7 @@ void uart_write_wakeup(struct uart_port *port); */ #define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \ unsigned int __count = (count); \ - __uart_port_tx(port, ch, tx_ready, put_char, tx_done, __count, \ + __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count, \ __count--); \ }) @@ -826,8 +835,21 @@ void uart_write_wakeup(struct uart_port *port); * See uart_port_tx_limited() for more details. */ #define uart_port_tx(port, ch, tx_ready, put_char) \ - __uart_port_tx(port, ch, tx_ready, put_char, ({}), true, ({})) + __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({})) + +/** + * uart_port_tx_flags -- transmit helper for uart_port with flags + * @port: uart port + * @ch: variable to store a character to be written to the HW + * @flags: %UART_TX_NOSTOP or similar + * @tx_ready: can HW accept more data function + * @put_char: function to write a character + * + * See uart_port_tx_limited() for more details. + */ +#define uart_port_tx_flags(port, ch, flags, tx_ready, put_char) \ + __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({})) /* * Baud rate helpers. */ -- cgit From 853b8d7597eea4ccaaefbcf0942cd42fc86d542a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 2 Feb 2024 12:22:58 +0200 Subject: remap_range: merge do_clone_file_range() into vfs_clone_file_range() commit dfad37051ade ("remap_range: move permission hooks out of do_clone_file_range()") moved the permission hooks from do_clone_file_range() out to its caller vfs_clone_file_range(), but left all the fast sanity checks in do_clone_file_range(). This makes the expensive security hooks be called in situations that they would not have been called before (e.g. fs does not support clone). The only reason for the do_clone_file_range() helper was that overlayfs did not use to be able to call vfs_clone_file_range() from copy up context with sb_writers lock held. However, since commit c63e56a4a652 ("ovl: do not open/llseek lower file with upper sb_writers held"), overlayfs just uses an open coded version of vfs_clone_file_range(). Merge_clone_file_range() into vfs_clone_file_range(), restoring the original order of checks as it was before the regressing commit and adapt the overlayfs code to call vfs_clone_file_range() before the permission hooks that were added by commit ca7ab482401c ("ovl: add permission hooks outside of do_splice_direct()"). Note that in the merge of do_clone_file_range(), the file_start_write() context was reduced to cover ->remap_file_range() without holding it over the permission hooks, which was the reason for doing the regressing commit in the first place. Reported-and-tested-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202401312229.eddeb9a6-oliver.sang@intel.com Fixes: dfad37051ade ("remap_range: move permission hooks out of do_clone_file_range()") Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20240202102258.1582671-1-amir73il@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/overlayfs/copy_up.c | 14 ++++++-------- fs/remap_range.c | 31 +++++++++---------------------- include/linux/fs.h | 3 --- 3 files changed, 15 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index b8e25ca51016..8586e2f5d243 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -265,20 +265,18 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, if (IS_ERR(old_file)) return PTR_ERR(old_file); + /* Try to use clone_file_range to clone up within the same fs */ + cloned = vfs_clone_file_range(old_file, 0, new_file, 0, len, 0); + if (cloned == len) + goto out_fput; + + /* Couldn't clone, so now we try to copy the data */ error = rw_verify_area(READ, old_file, &old_pos, len); if (!error) error = rw_verify_area(WRITE, new_file, &new_pos, len); if (error) goto out_fput; - /* Try to use clone_file_range to clone up within the same fs */ - ovl_start_write(dentry); - cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0); - ovl_end_write(dentry); - if (cloned == len) - goto out_fput; - /* Couldn't clone, so now we try to copy the data */ - /* Check if lower fs supports seek operation */ if (old_file->f_mode & FMODE_LSEEK) skip_hole = true; diff --git a/fs/remap_range.c b/fs/remap_range.c index f8c1120b8311..de07f978ce3e 100644 --- a/fs/remap_range.c +++ b/fs/remap_range.c @@ -373,9 +373,9 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, } EXPORT_SYMBOL(generic_remap_file_range_prep); -loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t len, unsigned int remap_flags) +loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags) { loff_t ret; @@ -391,23 +391,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, if (!file_in->f_op->remap_file_range) return -EOPNOTSUPP; - ret = file_in->f_op->remap_file_range(file_in, pos_in, - file_out, pos_out, len, remap_flags); - if (ret < 0) - return ret; - - fsnotify_access(file_in); - fsnotify_modify(file_out); - return ret; -} -EXPORT_SYMBOL(do_clone_file_range); - -loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t len, unsigned int remap_flags) -{ - loff_t ret; - ret = remap_verify_area(file_in, pos_in, len, false); if (ret) return ret; @@ -417,10 +400,14 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, return ret; file_start_write(file_out); - ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len, - remap_flags); + ret = file_in->f_op->remap_file_range(file_in, pos_in, + file_out, pos_out, len, remap_flags); file_end_write(file_out); + if (ret < 0) + return ret; + fsnotify_access(file_in); + fsnotify_modify(file_out); return ret; } EXPORT_SYMBOL(vfs_clone_file_range); diff --git a/include/linux/fs.h b/include/linux/fs.h index ed5966a70495..023f37c60709 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2101,9 +2101,6 @@ int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); -extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t len, unsigned int remap_flags); extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); -- cgit From f814bdda774c183b0cc15ec8f3b6e7c6f4527ba5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 23 Jan 2024 18:58:26 +0100 Subject: blk-wbt: Fix detection of dirty-throttled tasks The detection of dirty-throttled tasks in blk-wbt has been subtly broken since its beginning in 2016. Namely if we are doing cgroup writeback and the throttled task is not in the root cgroup, balance_dirty_pages() will set dirty_sleep for the non-root bdi_writeback structure. However blk-wbt checks dirty_sleep only in the root cgroup bdi_writeback structure. Thus detection of recently throttled tasks is not working in this case (we noticed this when we switched to cgroup v2 and suddently writeback was slow). Since blk-wbt has no easy way to get to proper bdi_writeback and furthermore its intention has always been to work on the whole device rather than on individual cgroups, just move the dirty_sleep timestamp from bdi_writeback to backing_dev_info. That fixes the checking for recently throttled task and saves memory for everybody as a bonus. CC: stable@vger.kernel.org Fixes: b57d74aff9ab ("writeback: track if we're sleeping on progress in balance_dirty_pages()") Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20240123175826.21452-1-jack@suse.cz [axboe: fixup indentation errors] Signed-off-by: Jens Axboe --- block/blk-wbt.c | 4 ++-- include/linux/backing-dev-defs.h | 7 +++++-- mm/backing-dev.c | 2 +- mm/page-writeback.c | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 5ba3cd574eac..0c0e270a8265 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -163,9 +163,9 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) */ static bool wb_recent_wait(struct rq_wb *rwb) { - struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb; + struct backing_dev_info *bdi = rwb->rqos.disk->bdi; - return time_before(jiffies, wb->dirty_sleep + HZ); + return time_before(jiffies, bdi->last_bdp_sleep + HZ); } static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index ae12696ec492..2ad261082bba 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -141,8 +141,6 @@ struct bdi_writeback { struct delayed_work dwork; /* work item used for writeback */ struct delayed_work bw_dwork; /* work item used for bandwidth estimate */ - unsigned long dirty_sleep; /* last wait */ - struct list_head bdi_node; /* anchored at bdi->wb_list */ #ifdef CONFIG_CGROUP_WRITEBACK @@ -179,6 +177,11 @@ struct backing_dev_info { * any dirty wbs, which is depended upon by bdi_has_dirty(). */ atomic_long_t tot_write_bandwidth; + /* + * Jiffies when last process was dirty throttled on this bdi. Used by + * blk-wbt. + */ + unsigned long last_bdp_sleep; struct bdi_writeback wb; /* the root writeback info for this bdi */ struct list_head wb_list; /* list of all wbs */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 1e3447bccdb1..e039d05304dd 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -436,7 +436,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, INIT_LIST_HEAD(&wb->work_list); INIT_DELAYED_WORK(&wb->dwork, wb_workfn); INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn); - wb->dirty_sleep = jiffies; err = fprop_local_init_percpu(&wb->completions, gfp); if (err) @@ -921,6 +920,7 @@ int bdi_init(struct backing_dev_info *bdi) INIT_LIST_HEAD(&bdi->bdi_list); INIT_LIST_HEAD(&bdi->wb_list); init_waitqueue_head(&bdi->wb_waitq); + bdi->last_bdp_sleep = jiffies; return cgwb_bdi_init(bdi); } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index cd4e4ae77c40..cc37fa7f3364 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1921,7 +1921,7 @@ pause: break; } __set_current_state(TASK_KILLABLE); - wb->dirty_sleep = now; + bdi->last_bdp_sleep = jiffies; io_schedule_timeout(pause); current->dirty_paused_when = now + pause; -- cgit From cd7d469c25704d414d71bf3644f163fb74e7996b Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 13 Oct 2023 13:55:44 +0800 Subject: libceph: fail sparse-read if the data length doesn't match Once this happens that means there have bugs. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 3 ++- net/ceph/osd_client.c | 18 +++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index fa018d5864e7..f66f6aac74f6 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -45,6 +45,7 @@ enum ceph_sparse_read_state { CEPH_SPARSE_READ_HDR = 0, CEPH_SPARSE_READ_EXTENTS, CEPH_SPARSE_READ_DATA_LEN, + CEPH_SPARSE_READ_DATA_PRE, CEPH_SPARSE_READ_DATA, }; @@ -64,7 +65,7 @@ struct ceph_sparse_read { u64 sr_req_len; /* orig request length */ u64 sr_pos; /* current pos in buffer */ int sr_index; /* current extent index */ - __le32 sr_datalen; /* length of actual data */ + u32 sr_datalen; /* length of actual data */ u32 sr_count; /* extent count in reply */ int sr_ext_len; /* length of extent array */ struct ceph_sparse_extent *sr_extent; /* extent array */ diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 625622016f57..2cea35e4ff8e 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -5857,8 +5857,8 @@ static int osd_sparse_read(struct ceph_connection *con, struct ceph_osd *o = con->private; struct ceph_sparse_read *sr = &o->o_sparse_read; u32 count = sr->sr_count; - u64 eoff, elen; - int ret; + u64 eoff, elen, len = 0; + int i, ret; switch (sr->sr_state) { case CEPH_SPARSE_READ_HDR: @@ -5903,8 +5903,20 @@ next_op: convert_extent_map(sr); ret = sizeof(sr->sr_datalen); *pbuf = (char *)&sr->sr_datalen; - sr->sr_state = CEPH_SPARSE_READ_DATA; + sr->sr_state = CEPH_SPARSE_READ_DATA_PRE; break; + case CEPH_SPARSE_READ_DATA_PRE: + /* Convert sr_datalen to host-endian */ + sr->sr_datalen = le32_to_cpu((__force __le32)sr->sr_datalen); + for (i = 0; i < count; i++) + len += sr->sr_extent[i].len; + if (sr->sr_datalen != len) { + pr_warn_ratelimited("data len %u != extent len %llu\n", + sr->sr_datalen, len); + return -EREMOTEIO; + } + sr->sr_state = CEPH_SPARSE_READ_DATA; + fallthrough; case CEPH_SPARSE_READ_DATA: if (sr->sr_index >= count) { sr->sr_state = CEPH_SPARSE_READ_HDR; -- cgit From 8e46a2d068c92a905d01cbb018b00d66991585ab Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 14 Dec 2023 16:01:03 +0800 Subject: libceph: just wait for more data to be available on the socket A short read may occur while reading the message footer from the socket. Later, when the socket is ready for another read, the messenger invokes all read_partial_*() handlers, including read_partial_sparse_msg_data(). The expectation is that read_partial_sparse_msg_data() would bail, allowing the messenger to invoke read_partial() for the footer and pick up where it left off. However read_partial_sparse_msg_data() violates that and ends up calling into the state machine in the OSD client. The sparse-read state machine assumes that it's a new op and interprets some piece of the footer as the sparse-read header and returns bogus extents/data length, etc. To determine whether read_partial_sparse_msg_data() should bail, let's reuse cursor->total_resid. Because once it reaches to zero that means all the extents and data have been successfully received in last read, else it could break out when partially reading any of the extents and data. And then osd_sparse_read() could continue where it left off. [ idryomov: changelog ] Link: https://tracker.ceph.com/issues/63586 Fixes: d396f89db39a ("libceph: add sparse read support to msgr1") Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 2 +- net/ceph/messenger_v1.c | 25 +++++++++++++------------ net/ceph/messenger_v2.c | 4 ++-- net/ceph/osd_client.c | 9 +++------ 4 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 2eaaabbe98cb..1717cc57cdac 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -283,7 +283,7 @@ struct ceph_msg { struct kref kref; bool more_to_follow; bool needs_out_seq; - bool sparse_read; + u64 sparse_read_total; int front_alloc_len; struct ceph_msgpool *pool; diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index 4cb60bacf5f5..0cb61c76b9b8 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -160,8 +160,9 @@ static size_t sizeof_footer(struct ceph_connection *con) static void prepare_message_data(struct ceph_msg *msg, u32 data_len) { /* Initialize data cursor if it's not a sparse read */ - if (!msg->sparse_read) - ceph_msg_data_cursor_init(&msg->cursor, msg, data_len); + u64 len = msg->sparse_read_total ? : data_len; + + ceph_msg_data_cursor_init(&msg->cursor, msg, len); } /* @@ -1036,7 +1037,7 @@ static int read_partial_sparse_msg_data(struct ceph_connection *con) if (do_datacrc) crc = con->in_data_crc; - do { + while (cursor->total_resid) { if (con->v1.in_sr_kvec.iov_base) ret = read_partial_message_chunk(con, &con->v1.in_sr_kvec, @@ -1044,23 +1045,23 @@ static int read_partial_sparse_msg_data(struct ceph_connection *con) &crc); else if (cursor->sr_resid > 0) ret = read_partial_sparse_msg_extent(con, &crc); - - if (ret <= 0) { - if (do_datacrc) - con->in_data_crc = crc; - return ret; - } + if (ret <= 0) + break; memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec)); ret = con->ops->sparse_read(con, cursor, (char **)&con->v1.in_sr_kvec.iov_base); + if (ret <= 0) { + ret = ret ? ret : 1; /* must return > 0 to indicate success */ + break; + } con->v1.in_sr_len = ret; - } while (ret > 0); + } if (do_datacrc) con->in_data_crc = crc; - return ret < 0 ? ret : 1; /* must return > 0 to indicate success */ + return ret; } static int read_partial_msg_data(struct ceph_connection *con) @@ -1253,7 +1254,7 @@ static int read_partial_message(struct ceph_connection *con) if (!m->num_data_items) return -EIO; - if (m->sparse_read) + if (m->sparse_read_total) ret = read_partial_sparse_msg_data(con); else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) ret = read_partial_msg_data_bounce(con); diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index f8ec60e1aba3..a0ca5414b333 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -1128,7 +1128,7 @@ static int decrypt_tail(struct ceph_connection *con) struct sg_table enc_sgt = {}; struct sg_table sgt = {}; struct page **pages = NULL; - bool sparse = con->in_msg->sparse_read; + bool sparse = !!con->in_msg->sparse_read_total; int dpos = 0; int tail_len; int ret; @@ -2060,7 +2060,7 @@ static int prepare_read_tail_plain(struct ceph_connection *con) } if (data_len(msg)) { - if (msg->sparse_read) + if (msg->sparse_read_total) con->v2.in_state = IN_S_PREPARE_SPARSE_DATA; else con->v2.in_state = IN_S_PREPARE_READ_DATA; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 2cea35e4ff8e..9d078b37fe0b 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -5510,7 +5510,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, } m = ceph_msg_get(req->r_reply); - m->sparse_read = (bool)srlen; + m->sparse_read_total = srlen; dout("get_reply tid %lld %p\n", tid, m); @@ -5777,11 +5777,8 @@ static int prep_next_sparse_read(struct ceph_connection *con, } if (o->o_sparse_op_idx < 0) { - u64 srlen = sparse_data_requested(req); - - dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n", - __func__, o->o_osd, srlen); - ceph_msg_data_cursor_init(cursor, con->in_msg, srlen); + dout("%s: [%d] starting new sparse read req\n", + __func__, o->o_osd); } else { u64 end; -- cgit From 7d708c145b2631941b8b0b4a740dc2990818c39c Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 9 Feb 2024 01:24:54 +0000 Subject: Revert "usb: dwc3: Support EBC feature of DWC_usb31" This reverts commit 398aa9a7e77cf23c2a6f882ddd3dcd96f21771dc. The update to the gadget API to support EBC feature is incomplete. It's missing at least the following: * New usage documentation * Gadget capability check * Condition for the user to check how many and which endpoints can be used as "fifo_mode" * Description of how it can affect completed request (e.g. dwc3 won't update TRB on completion -- ie. how it can affect request's actual length report) Let's revert this until it's ready. Fixes: 398aa9a7e77c ("usb: dwc3: Support EBC feature of DWC_usb31") Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/3042f847ff904b4dd4e4cf66a1b9df470e63439e.1707441690.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 1 - drivers/usb/dwc3/gadget.c | 6 ------ drivers/usb/dwc3/gadget.h | 2 -- include/linux/usb/gadget.h | 1 - 4 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index e3eea965e57b..e120611a5174 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -376,7 +376,6 @@ /* Global HWPARAMS4 Register */ #define DWC3_GHWPARAMS4_HIBER_SCRATCHBUFS(n) (((n) & (0x0f << 13)) >> 13) #define DWC3_MAX_HIBER_SCRATCHBUFS 15 -#define DWC3_EXT_BUFF_CONTROL BIT(21) /* Global HWPARAMS6 Register */ #define DWC3_GHWPARAMS6_BCSUPPORT BIT(14) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 564976b3e2b9..4c8dd6724678 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -673,12 +673,6 @@ static int dwc3_gadget_set_ep_config(struct dwc3_ep *dep, unsigned int action) params.param1 |= DWC3_DEPCFG_BINTERVAL_M1(bInterval_m1); } - if (dep->endpoint.fifo_mode) { - if (!(dwc->hwparams.hwparams4 & DWC3_EXT_BUFF_CONTROL)) - return -EINVAL; - params.param1 |= DWC3_DEPCFG_EBC_HWO_NOWB | DWC3_DEPCFG_USE_EBC; - } - return dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_SETEPCONFIG, ¶ms); } diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h index fd7a4e94397e..55a56cf67d73 100644 --- a/drivers/usb/dwc3/gadget.h +++ b/drivers/usb/dwc3/gadget.h @@ -26,8 +26,6 @@ struct dwc3; #define DWC3_DEPCFG_XFER_NOT_READY_EN BIT(10) #define DWC3_DEPCFG_FIFO_ERROR_EN BIT(11) #define DWC3_DEPCFG_STREAM_EVENT_EN BIT(13) -#define DWC3_DEPCFG_EBC_HWO_NOWB BIT(14) -#define DWC3_DEPCFG_USE_EBC BIT(15) #define DWC3_DEPCFG_BINTERVAL_M1(n) (((n) & 0xff) << 16) #define DWC3_DEPCFG_STREAM_CAPABLE BIT(24) #define DWC3_DEPCFG_EP_NUMBER(n) (((n) & 0x1f) << 25) diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index a771ccc038ac..6532beb587b1 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -236,7 +236,6 @@ struct usb_ep { unsigned max_streams:16; unsigned mult:2; unsigned maxburst:5; - unsigned fifo_mode:1; u8 address; const struct usb_endpoint_descriptor *desc; const struct usb_ss_ep_comp_descriptor *comp_desc; -- cgit From 4356e9f841f7fbb945521cef3577ba394c65f3fc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 9 Feb 2024 12:39:31 -0800 Subject: work around gcc bugs with 'asm goto' with outputs We've had issues with gcc and 'asm goto' before, and we created a 'asm_volatile_goto()' macro for that in the past: see commits 3f0116c3238a ("compiler/gcc4: Add quirk for 'asm goto' miscompilation bug") and a9f180345f53 ("compiler/gcc4: Make quirk for asm_volatile_goto() unconditional"). Then, much later, we ended up removing the workaround in commit 43c249ea0b1e ("compiler-gcc.h: remove ancient workaround for gcc PR 58670") because we no longer supported building the kernel with the affected gcc versions, but we left the macro uses around. Now, Sean Christopherson reports a new version of a very similar problem, which is fixed by re-applying that ancient workaround. But the problem in question is limited to only the 'asm goto with outputs' cases, so instead of re-introducing the old workaround as-is, let's rename and limit the workaround to just that much less common case. It looks like there are at least two separate issues that all hit in this area: (a) some versions of gcc don't mark the asm goto as 'volatile' when it has outputs: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 which is easy to work around by just adding the 'volatile' by hand. (b) Internal compiler errors: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 which are worked around by adding the extra empty 'asm' as a barrier, as in the original workaround. but the problem Sean sees may be a third thing since it involves bad code generation (not an ICE) even with the manually added 'volatile'. but the same old workaround works for this case, even if this feels a bit like voodoo programming and may only be hiding the issue. Reported-and-tested-by: Sean Christopherson Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/ Cc: Nick Desaulniers Cc: Uros Bizjak Cc: Jakub Jelinek Cc: Andrew Pinski Signed-off-by: Linus Torvalds --- arch/arc/include/asm/jump_label.h | 4 ++-- arch/arm/include/asm/jump_label.h | 4 ++-- arch/arm64/include/asm/alternative-macros.h | 4 ++-- arch/arm64/include/asm/jump_label.h | 4 ++-- arch/csky/include/asm/jump_label.h | 4 ++-- arch/loongarch/include/asm/jump_label.h | 4 ++-- arch/mips/include/asm/jump_label.h | 4 ++-- arch/parisc/include/asm/jump_label.h | 4 ++-- arch/powerpc/include/asm/jump_label.h | 4 ++-- arch/powerpc/include/asm/uaccess.h | 12 ++++++------ arch/powerpc/kernel/irq_64.c | 2 +- arch/riscv/include/asm/arch_hweight.h | 4 ++-- arch/riscv/include/asm/bitops.h | 8 ++++---- arch/riscv/include/asm/checksum.h | 2 +- arch/riscv/include/asm/cpufeature.h | 4 ++-- arch/riscv/include/asm/jump_label.h | 4 ++-- arch/riscv/lib/csum.c | 10 +++++----- arch/s390/include/asm/jump_label.h | 4 ++-- arch/sparc/include/asm/jump_label.h | 4 ++-- arch/um/include/asm/cpufeature.h | 2 +- arch/x86/include/asm/cpufeature.h | 2 +- arch/x86/include/asm/jump_label.h | 6 +++--- arch/x86/include/asm/rmwcc.h | 2 +- arch/x86/include/asm/special_insns.h | 2 +- arch/x86/include/asm/uaccess.h | 10 +++++----- arch/x86/kvm/svm/svm_ops.h | 6 +++--- arch/x86/kvm/vmx/vmx.c | 4 ++-- arch/x86/kvm/vmx/vmx_ops.h | 6 +++--- arch/xtensa/include/asm/jump_label.h | 4 ++-- include/linux/compiler-gcc.h | 19 +++++++++++++++++++ include/linux/compiler_types.h | 4 ++-- net/netfilter/nft_set_pipapo_avx2.c | 2 +- samples/bpf/asm_goto_workaround.h | 8 ++++---- tools/arch/x86/include/asm/rmwcc.h | 2 +- tools/include/linux/compiler_types.h | 4 ++-- 35 files changed, 96 insertions(+), 77 deletions(-) (limited to 'include/linux') diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h index 9d9618079739..a339223d9e05 100644 --- a/arch/arc/include/asm/jump_label.h +++ b/arch/arc/include/asm/jump_label.h @@ -31,7 +31,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" "1: \n" "nop \n" ".pushsection __jump_table, \"aw\" \n" @@ -47,7 +47,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" "1: \n" "b %l[l_yes] \n" ".pushsection __jump_table, \"aw\" \n" diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index e12d7d096fc0..e4eb54f6cd9f 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -11,7 +11,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" WASM(nop) "\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" @@ -25,7 +25,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" WASM(b) " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 210bb43cff2c..d328f549b1a6 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -229,7 +229,7 @@ alternative_has_cap_likely(const unsigned long cpucap) if (!cpucap_is_possible(cpucap)) return false; - asm_volatile_goto( + asm goto( ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops) : : [cpucap] "i" (cpucap) @@ -247,7 +247,7 @@ alternative_has_cap_unlikely(const unsigned long cpucap) if (!cpucap_is_possible(cpucap)) return false; - asm_volatile_goto( + asm goto( ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap]) : : [cpucap] "i" (cpucap) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 48ddc0f45d22..6aafbb789991 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -18,7 +18,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( "1: nop \n\t" " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" @@ -35,7 +35,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( "1: b %l[l_yes] \n\t" " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" diff --git a/arch/csky/include/asm/jump_label.h b/arch/csky/include/asm/jump_label.h index 98a3f4b168bd..ef2e37a10a0f 100644 --- a/arch/csky/include/asm/jump_label.h +++ b/arch/csky/include/asm/jump_label.h @@ -12,7 +12,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: nop32 \n" " .pushsection __jump_table, \"aw\" \n" " .align 2 \n" @@ -29,7 +29,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: bsr32 %l[label] \n" " .pushsection __jump_table, \"aw\" \n" " .align 2 \n" diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h index 3cea299a5ef5..29acfe3de3fa 100644 --- a/arch/loongarch/include/asm/jump_label.h +++ b/arch/loongarch/include/asm/jump_label.h @@ -22,7 +22,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( "1: nop \n\t" JUMP_TABLE_ENTRY : : "i"(&((char *)key)[branch]) : : l_yes); @@ -35,7 +35,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( "1: b %l[l_yes] \n\t" JUMP_TABLE_ENTRY : : "i"(&((char *)key)[branch]) : : l_yes); diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 081be98c71ef..ff5d388502d4 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -39,7 +39,7 @@ extern void jump_label_apply_nops(struct module *mod); static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" B_INSN " 2f\n\t" + asm goto("1:\t" B_INSN " 2f\n\t" "2:\t.insn\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" @@ -53,7 +53,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" J_INSN " %l[l_yes]\n\t" + asm goto("1:\t" J_INSN " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h index 94428798b6aa..317ebc5edc9f 100644 --- a/arch/parisc/include/asm/jump_label.h +++ b/arch/parisc/include/asm/jump_label.h @@ -12,7 +12,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align %1\n\t" @@ -29,7 +29,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b,n %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align %1\n\t" diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 93ce3ec25387..2f2a86ed2280 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -17,7 +17,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop # arch_static_branch\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".long 1b - ., %l[l_yes] - .\n\t" @@ -32,7 +32,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b %l[l_yes] # arch_static_branch_jump\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".long 1b - ., %l[l_yes] - .\n\t" diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index f1f9890f50d3..de10437fd206 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -74,7 +74,7 @@ __pu_failed: \ /* -mprefixed can generate offsets beyond range, fall back hack */ #ifdef CONFIG_PPC_KERNEL_PREFIXED #define __put_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm goto( \ "1: " op " %0,0(%1) # put_user\n" \ EX_TABLE(1b, %l2) \ : \ @@ -83,7 +83,7 @@ __pu_failed: \ : label) #else #define __put_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm goto( \ "1: " op "%U1%X1 %0,%1 # put_user\n" \ EX_TABLE(1b, %l2) \ : \ @@ -97,7 +97,7 @@ __pu_failed: \ __put_user_asm_goto(x, ptr, label, "std") #else /* __powerpc64__ */ #define __put_user_asm2_goto(x, addr, label) \ - asm_volatile_goto( \ + asm goto( \ "1: stw%X1 %0, %1\n" \ "2: stw%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ @@ -146,7 +146,7 @@ do { \ /* -mprefixed can generate offsets beyond range, fall back hack */ #ifdef CONFIG_PPC_KERNEL_PREFIXED #define __get_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm_goto_output( \ "1: "op" %0,0(%1) # get_user\n" \ EX_TABLE(1b, %l2) \ : "=r" (x) \ @@ -155,7 +155,7 @@ do { \ : label) #else #define __get_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm_goto_output( \ "1: "op"%U1%X1 %0, %1 # get_user\n" \ EX_TABLE(1b, %l2) \ : "=r" (x) \ @@ -169,7 +169,7 @@ do { \ __get_user_asm_goto(x, addr, label, "ld") #else /* __powerpc64__ */ #define __get_user_asm2_goto(x, addr, label) \ - asm_volatile_goto( \ + asm_goto_output( \ "1: lwz%X1 %0, %1\n" \ "2: lwz%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index 938e66829eae..d5c48d1b0a31 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -230,7 +230,7 @@ again: * This allows interrupts to be unmasked without hard disabling, and * also without new hard interrupts coming in ahead of pending ones. */ - asm_volatile_goto( + asm goto( "1: \n" " lbz 9,%0(13) \n" " cmpwi 9,0 \n" diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h index c20236a0725b..85b2c443823e 100644 --- a/arch/riscv/include/asm/arch_hweight.h +++ b/arch/riscv/include/asm/arch_hweight.h @@ -20,7 +20,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w) { #ifdef CONFIG_RISCV_ISA_ZBB - asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -51,7 +51,7 @@ static inline unsigned int __arch_hweight8(unsigned int w) static __always_inline unsigned long __arch_hweight64(__u64 w) { # ifdef CONFIG_RISCV_ISA_ZBB - asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 9ffc35537024..329d8244a9b3 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -39,7 +39,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word) { int num; - asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -95,7 +95,7 @@ static __always_inline unsigned long variable__fls(unsigned long word) { int num; - asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -154,7 +154,7 @@ static __always_inline int variable_ffs(int x) if (!x) return 0; - asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -209,7 +209,7 @@ static __always_inline int variable_fls(unsigned int x) if (!x) return 0; - asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h index a5b60b54b101..88e6f1499e88 100644 --- a/arch/riscv/include/asm/checksum.h +++ b/arch/riscv/include/asm/checksum.h @@ -53,7 +53,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { unsigned long fold_temp; - asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 5a626ed2c47a..0bd11862b760 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -80,7 +80,7 @@ riscv_has_extension_likely(const unsigned long ext) "ext must be < RISCV_ISA_EXT_MAX"); if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm_volatile_goto( + asm goto( ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) : : [ext] "i" (ext) @@ -103,7 +103,7 @@ riscv_has_extension_unlikely(const unsigned long ext) "ext must be < RISCV_ISA_EXT_MAX"); if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm_volatile_goto( + asm goto( ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) : : [ext] "i" (ext) diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h index 14a5ea8d8ef0..4a35d787c019 100644 --- a/arch/riscv/include/asm/jump_label.h +++ b/arch/riscv/include/asm/jump_label.h @@ -17,7 +17,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( " .align 2 \n\t" " .option push \n\t" " .option norelax \n\t" @@ -39,7 +39,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( " .align 2 \n\t" " .option push \n\t" " .option norelax \n\t" diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c index af3df5274ccb..74af3ab520b6 100644 --- a/arch/riscv/lib/csum.c +++ b/arch/riscv/lib/csum.c @@ -53,7 +53,7 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, * support, so nop when Zbb is available and jump when Zbb is * not available. */ - asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : @@ -170,7 +170,7 @@ do_csum_with_alignment(const unsigned char *buff, int len) * support, so nop when Zbb is available and jump when Zbb is * not available. */ - asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : @@ -178,7 +178,7 @@ do_csum_with_alignment(const unsigned char *buff, int len) : no_zbb); #ifdef CONFIG_32BIT - asm_volatile_goto(".option push \n\ + asm_goto_output(".option push \n\ .option arch,+zbb \n\ rori %[fold_temp], %[csum], 16 \n\ andi %[offset], %[offset], 1 \n\ @@ -193,7 +193,7 @@ do_csum_with_alignment(const unsigned char *buff, int len) return (unsigned short)csum; #else /* !CONFIG_32BIT */ - asm_volatile_goto(".option push \n\ + asm_goto_output(".option push \n\ .option arch,+zbb \n\ rori %[fold_temp], %[csum], 32 \n\ add %[csum], %[fold_temp], %[csum] \n\ @@ -257,7 +257,7 @@ do_csum_no_alignment(const unsigned char *buff, int len) * support, so nop when Zbb is available and jump when Zbb is * not available. */ - asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 895f774bbcc5..bf78cf381dfc 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -25,7 +25,7 @@ */ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("0: brcl 0,%l[label]\n" + asm goto("0: brcl 0,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" ".balign 8\n" ".long 0b-.,%l[label]-.\n" @@ -39,7 +39,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("0: brcl 15,%l[label]\n" + asm goto("0: brcl 15,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" ".balign 8\n" ".long 0b-.,%l[label]-.\n" diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 94eb529dcb77..2718cbea826a 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -10,7 +10,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" @@ -26,7 +26,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b %l[l_yes]\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h index 4b6d1b526bc1..66fe06db872f 100644 --- a/arch/um/include/asm/cpufeature.h +++ b/arch/um/include/asm/cpufeature.h @@ -75,7 +75,7 @@ extern void setup_clear_cpu_cap(unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto("1: jmp 6f\n" + asm goto("1: jmp 6f\n" "2:\n" ".skip -(((5f-4f) - (2b-1b)) > 0) * " "((5f-4f) - (2b-1b)),0x90\n" diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index a26bebbdff87..a1273698fc43 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -168,7 +168,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto( + asm goto( ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") ".pushsection .altinstr_aux,\"ax\"\n" "6:\n" diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 071572e23d3a..cbbef32517f0 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -24,7 +24,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:" + asm goto("1:" "jmp %l[l_yes] # objtool NOPs this \n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (2 | branch) : : l_yes); @@ -38,7 +38,7 @@ l_yes: static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto("1:" + asm goto("1:" ".byte " __stringify(BYTES_NOP5) "\n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); @@ -52,7 +52,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto("1:" + asm goto("1:" "jmp %l[l_yes]\n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 4b081e0d3306..363266cbcada 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -13,7 +13,7 @@ #define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ ({ \ bool c = false; \ - asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ + asm goto (fullop "; j" #cc " %l[cc_label]" \ : : [var] "m" (_var), ## __VA_ARGS__ \ : clobbers : cc_label); \ if (0) { \ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index d6cd9344f6c7..48f8dd47cf68 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -205,7 +205,7 @@ static inline void clwb(volatile void *__p) #ifdef CONFIG_X86_USER_SHADOW_STACK static inline int write_user_shstk_64(u64 __user *addr, u64 val) { - asm_volatile_goto("1: wrussq %[val], (%[addr])\n" + asm goto("1: wrussq %[val], (%[addr])\n" _ASM_EXTABLE(1b, %l[fail]) :: [addr] "r" (addr), [val] "r" (val) :: fail); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 5c367c1290c3..237dc8cdd12b 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -133,7 +133,7 @@ extern int __get_user_bad(void); #ifdef CONFIG_X86_32 #define __put_user_goto_u64(x, addr, label) \ - asm_volatile_goto("\n" \ + asm goto("\n" \ "1: movl %%eax,0(%1)\n" \ "2: movl %%edx,4(%1)\n" \ _ASM_EXTABLE_UA(1b, %l2) \ @@ -295,7 +295,7 @@ do { \ } while (0) #define __get_user_asm(x, addr, itype, ltype, label) \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: mov"itype" %[umem],%[output]\n" \ _ASM_EXTABLE_UA(1b, %l2) \ : [output] ltype(x) \ @@ -375,7 +375,7 @@ do { \ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ _ASM_EXTABLE_UA(1b, %l[label]) \ : CC_OUT(z) (success), \ @@ -394,7 +394,7 @@ do { \ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ _ASM_EXTABLE_UA(1b, %l[label]) \ : CC_OUT(z) (success), \ @@ -477,7 +477,7 @@ struct __large_struct { unsigned long buf[100]; }; * aliasing issues. */ #define __put_user_goto(x, addr, itype, ltype, label) \ - asm_volatile_goto("\n" \ + asm goto("\n" \ "1: mov"itype" %0,%1\n" \ _ASM_EXTABLE_UA(1b, %l2) \ : : ltype(x), "m" (__m(addr)) \ diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h index 36c8af87a707..4e725854c63a 100644 --- a/arch/x86/kvm/svm/svm_ops.h +++ b/arch/x86/kvm/svm/svm_ops.h @@ -8,7 +8,7 @@ #define svm_asm(insn, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) "\n\t" \ + asm goto("1: " __stringify(insn) "\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ ::: clobber : fault); \ return; \ @@ -18,7 +18,7 @@ fault: \ #define svm_asm1(insn, op1, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + asm goto("1: " __stringify(insn) " %0\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ :: op1 : clobber : fault); \ return; \ @@ -28,7 +28,7 @@ fault: \ #define svm_asm2(insn, op1, op2, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + asm goto("1: " __stringify(insn) " %1, %0\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ :: op1, op2 : clobber : fault); \ return; \ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e262bc2ba4e5..1111d9d08903 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -738,7 +738,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx, */ static int kvm_cpu_vmxoff(void) { - asm_volatile_goto("1: vmxoff\n\t" + asm goto("1: vmxoff\n\t" _ASM_EXTABLE(1b, %l[fault]) ::: "cc", "memory" : fault); @@ -2784,7 +2784,7 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer) cr4_set_bits(X86_CR4_VMXE); - asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t" + asm goto("1: vmxon %[vmxon_pointer]\n\t" _ASM_EXTABLE(1b, %l[fault]) : : [vmxon_pointer] "m"(vmxon_pointer) : : fault); diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index f41ce3c24123..8060e5fc6dbd 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -94,7 +94,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT - asm_volatile_goto("1: vmread %[field], %[output]\n\t" + asm_goto_output("1: vmread %[field], %[output]\n\t" "jna %l[do_fail]\n\t" _ASM_EXTABLE(1b, %l[do_exception]) @@ -188,7 +188,7 @@ static __always_inline unsigned long vmcs_readl(unsigned long field) #define vmx_asm1(insn, op1, error_args...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + asm goto("1: " __stringify(insn) " %0\n\t" \ ".byte 0x2e\n\t" /* branch not taken hint */ \ "jna %l[error]\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ @@ -205,7 +205,7 @@ fault: \ #define vmx_asm2(insn, op1, op2, error_args...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + asm goto("1: " __stringify(insn) " %1, %0\n\t" \ ".byte 0x2e\n\t" /* branch not taken hint */ \ "jna %l[error]\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ diff --git a/arch/xtensa/include/asm/jump_label.h b/arch/xtensa/include/asm/jump_label.h index c812bf85021c..46c8596259d2 100644 --- a/arch/xtensa/include/asm/jump_label.h +++ b/arch/xtensa/include/asm/jump_label.h @@ -13,7 +13,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "_nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" @@ -38,7 +38,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, * make it reachable and wrap both into a no-transform block * to avoid any assembler interference with this. */ - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" ".begin no-transform\n\t" "_j %l[l_yes]\n\t" "2:\n\t" diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index aebb65bf95a7..c1a963be7d28 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -64,6 +64,25 @@ __builtin_unreachable(); \ } while (0) +/* + * GCC 'asm goto' with outputs miscompiles certain code sequences: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 + * + * Work it around via the same compiler barrier quirk that we used + * to use for the old 'asm goto' workaround. + * + * Also, always mark such 'asm goto' statements as volatile: all + * asm goto statements are supposed to be volatile as per the + * documentation, but some versions of gcc didn't actually do + * that for asms with outputs: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 + */ +#define asm_goto_output(x...) \ + do { asm volatile goto(x); asm (""); } while (0) + #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6f1ca49306d2..663d8791c871 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -362,8 +362,8 @@ struct ftrace_likely_data { #define __member_size(p) __builtin_object_size(p, 1) #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +#ifndef asm_goto_output +#define asm_goto_output(x...) asm goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 90e275bb3e5d..a3a8ddca9918 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -57,7 +57,7 @@ /* Jump to label if @reg is zero */ #define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \ - asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ + asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ "je %l[" #label "]" : : : : label) /* Store 256 bits from YMM register into memory. Contrary to bucket load diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h index 7048bb3594d6..634e81d83efd 100644 --- a/samples/bpf/asm_goto_workaround.h +++ b/samples/bpf/asm_goto_workaround.h @@ -4,14 +4,14 @@ #define __ASM_GOTO_WORKAROUND_H /* - * This will bring in asm_volatile_goto and asm_inline macro definitions + * This will bring in asm_goto_output and asm_inline macro definitions * if enabled by compiler and config options. */ #include -#ifdef asm_volatile_goto -#undef asm_volatile_goto -#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto") +#ifdef asm_goto_output +#undef asm_goto_output +#define asm_goto_output(x...) asm volatile("invalid use of asm_goto_output") #endif /* diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h index 11ff975242ca..e2ff22b379a4 100644 --- a/tools/arch/x86/include/asm/rmwcc.h +++ b/tools/arch/x86/include/asm/rmwcc.h @@ -4,7 +4,7 @@ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ + asm goto (fullop "; j" cc " %l[cc_label]" \ : : "m" (var), ## __VA_ARGS__ \ : "memory" : cc_label); \ return 0; \ diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h index 1bdd834bdd57..d09f9dc172a4 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -36,8 +36,8 @@ #include #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +#ifndef asm_goto_output +#define asm_goto_output(x...) asm goto(x) #endif #endif /* __LINUX_COMPILER_TYPES_H */ -- cgit From 7e4a205fe56b9092f0143dad6aa5fee081139b09 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 3 Feb 2024 23:53:05 -0500 Subject: Revert "get rid of DCACHE_GENOCIDE" This reverts commit 57851607326a2beef21e67f83f4f53a90df8445a. Unfortunately, while we only call that thing once, the callback *can* be called more than once for the same dentry - all it takes is rename_lock being touched while we are in d_walk(). For now let's revert it. Signed-off-by: Al Viro --- fs/dcache.c | 5 ++++- include/linux/dcache.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index b813528fb147..6ebccba33336 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3061,7 +3061,10 @@ static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) if (d_unhashed(dentry) || !dentry->d_inode) return D_WALK_SKIP; - dentry->d_lockref.count--; + if (!(dentry->d_flags & DCACHE_GENOCIDE)) { + dentry->d_flags |= DCACHE_GENOCIDE; + dentry->d_lockref.count--; + } } return D_WALK_CONTINUE; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1666c387861f..d07cf2f1bb7d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -173,6 +173,7 @@ struct dentry_operations { #define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ #define DCACHE_CANT_MOUNT BIT(8) +#define DCACHE_GENOCIDE BIT(9) #define DCACHE_SHRINK_LIST BIT(10) #define DCACHE_OP_WEAK_REVALIDATE BIT(11) -- cgit From 119ff04864a24470b1e531bb53e5c141aa8fefb0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Feb 2024 14:43:21 +0000 Subject: tcp: move tp->scaling_ratio to tcp_sock_read_txrx group tp->scaling_ratio is a read mostly field, used in rx and tx fast paths. Fixes: d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: Wei Wang Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/networking/net_cachelines/tcp_sock.rst | 2 +- include/linux/tcp.h | 2 +- net/ipv4/tcp.c | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst index 97d7a5c8e01c..803912291479 100644 --- a/Documentation/networking/net_cachelines/tcp_sock.rst +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -38,7 +38,7 @@ u32 max_window read_mostly - u32 mss_cache read_mostly read_mostly tcp_rate_check_app_limited,tcp_current_mss,tcp_sync_mss,tcp_sndbuf_expand,tcp_tso_should_defer(tx);tcp_update_pacing_rate,tcp_clean_rtx_queue(rx) u32 window_clamp read_mostly read_write tcp_rcv_space_adjust,__tcp_select_window u32 rcv_ssthresh read_mostly - __tcp_select_window -u82 scaling_ratio +u8 scaling_ratio read_mostly read_mostly tcp_win_from_space struct tcp_rack u16 advmss - read_mostly tcp_rcv_space_adjust u8 compressed_ack diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 89b290d8c8dc..168f5dca6609 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -221,6 +221,7 @@ struct tcp_sock { u32 lost_out; /* Lost packets */ u32 sacked_out; /* SACK'd packets */ u16 tcp_header_len; /* Bytes of tcp header to send */ + u8 scaling_ratio; /* see tcp_win_from_space() */ u8 chrono_type : 2, /* current chronograph type */ repair : 1, is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ @@ -352,7 +353,6 @@ struct tcp_sock { u32 compressed_ack_rcv_nxt; struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - u8 scaling_ratio; /* see tcp_win_from_space() */ /* Information of the most recently (s)acked skb */ struct tcp_rack { u64 mstamp; /* (Re)sent time of the skb */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7e2481b9eae1..c82dc42f57c6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4615,7 +4615,8 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, prr_out); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out); - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 31); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, scaling_ratio); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 32); /* RX read-mostly hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq); -- cgit From 666a877deab2bcf8fd11c962d69e687e18168a6f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Feb 2024 14:43:22 +0000 Subject: tcp: move tp->tcp_usec_ts to tcp_sock_read_txrx group tp->tcp_usec_ts is a read mostly field, used in rx and tx fast paths. Fixes: d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: Wei Wang Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/networking/net_cachelines/tcp_sock.rst | 2 +- include/linux/tcp.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst index 803912291479..1c154cbd1848 100644 --- a/Documentation/networking/net_cachelines/tcp_sock.rst +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -44,7 +44,7 @@ u16 advmss - read_m u8 compressed_ack u8:2 dup_ack_counter u8:1 tlp_retrans -u8:1 tcp_usec_ts +u8:1 tcp_usec_ts read_mostly read_mostly u32 chrono_start read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) u32[3] chrono_stat read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) u8:2 chrono_type read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 168f5dca6609..a1c47a6d69b0 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -224,6 +224,7 @@ struct tcp_sock { u8 scaling_ratio; /* see tcp_win_from_space() */ u8 chrono_type : 2, /* current chronograph type */ repair : 1, + tcp_usec_ts : 1, /* TSval values in usec */ is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ __cacheline_group_end(tcp_sock_read_txrx); @@ -368,8 +369,7 @@ struct tcp_sock { u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ - tcp_usec_ts:1, /* TSval values in usec */ - unused:4; + unused:5; u8 thin_lto : 1,/* Use linear timeouts for thin streams */ recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ -- cgit From c353c7b7ffb7ae6ed8f3339906fe33c8be6cf344 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Feb 2024 14:43:23 +0000 Subject: net-device: move lstats in net_device_read_txrx dev->lstats is notably used from loopback ndo_start_xmit() and other virtual drivers. Per cpu stats updates are dirtying per-cpu data, but the pointer itself is read-only. Fixes: 43a71cd66b9c ("net-device: reorganize net_device fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: Simon Horman Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/networking/net_cachelines/net_device.rst | 4 ++-- include/linux/netdevice.h | 10 +++++----- net/core/dev.c | 3 ++- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index e75a53593bb9..dceb49d56a91 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -136,8 +136,8 @@ struct_netpoll_info* npinfo - possible_net_t nd_net - read_mostly (dev_net)napi_busy_loop,tcp_v(4/6)_rcv,ip(v6)_rcv,ip(6)_input,ip(6)_input_finish void* ml_priv enum_netdev_ml_priv_type ml_priv_type -struct_pcpu_lstats__percpu* lstats -struct_pcpu_sw_netstats__percpu* tstats +struct_pcpu_lstats__percpu* lstats read_mostly dev_lstats_add() +struct_pcpu_sw_netstats__percpu* tstats read_mostly dev_sw_netstats_tx_add() struct_pcpu_dstats__percpu* dstats struct_garp_port* garp_port struct_mrp_port* mrp_port diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 118c40258d07..ef7bfbb98497 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2141,6 +2141,11 @@ struct net_device { /* TXRX read-mostly hotpath */ __cacheline_group_begin(net_device_read_txrx); + union { + struct pcpu_lstats __percpu *lstats; + struct pcpu_sw_netstats __percpu *tstats; + struct pcpu_dstats __percpu *dstats; + }; unsigned int flags; unsigned short hard_header_len; netdev_features_t features; @@ -2395,11 +2400,6 @@ struct net_device { enum netdev_ml_priv_type ml_priv_type; enum netdev_stat_type pcpu_stat_type:8; - union { - struct pcpu_lstats __percpu *lstats; - struct pcpu_sw_netstats __percpu *tstats; - struct pcpu_dstats __percpu *dstats; - }; #if IS_ENABLED(CONFIG_GARP) struct garp_port __rcu *garp_port; diff --git a/net/core/dev.c b/net/core/dev.c index cb2dab0feee0..9bb792cecc16 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11652,11 +11652,12 @@ static void __init net_dev_struct_check(void) CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160); /* TXRX read-mostly hotpath */ + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, lstats); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr); - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 30); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 38); /* RX read-mostly hotpath */ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific); -- cgit From 11ba1728be3edb6928791f4c622f154ebe228ae6 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 2 Feb 2024 12:30:26 +0000 Subject: ptrace: Introduce exception_ip arch hook On architectures with delay slot, architecture level instruction pointer (or program counter) in pt_regs may differ from where exception was triggered. Introduce exception_ip hook to invoke architecture code and determine actual instruction pointer to the exception. Link: https://lore.kernel.org/lkml/00d1b813-c55f-4365-8d81-d70258e10b16@app.fastmail.com/ Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/ptrace.h | 2 ++ arch/mips/kernel/ptrace.c | 7 +++++++ include/linux/ptrace.h | 4 ++++ 3 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index daf3cf244ea9..701a233583c2 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -154,6 +154,8 @@ static inline long regs_return_value(struct pt_regs *regs) } #define instruction_pointer(regs) ((regs)->cp0_epc) +extern unsigned long exception_ip(struct pt_regs *regs); +#define exception_ip(regs) exception_ip(regs) #define profile_pc(regs) instruction_pointer(regs) extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index d9df543f7e2c..59288c13b581 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -48,6 +49,12 @@ #define CREATE_TRACE_POINTS #include +unsigned long exception_ip(struct pt_regs *regs) +{ + return exception_epc(regs); +} +EXPORT_SYMBOL(exception_ip); + /* * Called by kernel/ptrace.c when detaching.. * diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index eaaef3ffec22..90507d4afcd6 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -393,6 +393,10 @@ static inline void user_single_step_report(struct pt_regs *regs) #define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) #endif +#ifndef exception_ip +#define exception_ip(x) instruction_pointer(x) +#endif + extern int task_current_syscall(struct task_struct *target, struct syscall_info *info); extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); -- cgit From 6ac86372102b477083db99a9af8246fb916271b5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 25 Jan 2024 09:15:59 +0100 Subject: gpiolib: add gpiod_to_gpio_device() stub for !GPIOLIB Add empty stub of gpiod_to_gpio_device() when GPIOLIB is not enabled. Cc: Fixes: 370232d096e3 ("gpiolib: provide gpiod_to_gpio_device()") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 9a5c6c76e653..012797e7106d 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -819,6 +819,12 @@ static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) return ERR_PTR(-ENODEV); } +static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) +{ + WARN_ON(1); + return ERR_PTR(-ENODEV); +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { -- cgit From ebe0c15b135b1e4092c25b95d89e9a5899467499 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 25 Jan 2024 09:16:00 +0100 Subject: gpiolib: add gpio_device_get_base() stub for !GPIOLIB Add empty stub of gpio_device_get_base() when GPIOLIB is not enabled. Cc: Fixes: 8c85a102fc4e ("gpiolib: provide gpio_device_get_base()") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 012797e7106d..c1df7698edb0 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -825,6 +825,12 @@ static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) return ERR_PTR(-ENODEV); } +static inline int gpio_device_get_base(struct gpio_device *gdev) +{ + WARN_ON(1); + return -ENODEV; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { -- cgit From 2df8aa3cad407044f2febdbbdf220c6dae839c79 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 25 Jan 2024 09:16:01 +0100 Subject: gpiolib: add gpio_device_get_label() stub for !GPIOLIB Add empty stub of gpio_device_get_label() when GPIOLIB is not enabled. Cc: Fixes: d1f7728259ef ("gpiolib: provide gpio_device_get_label()") Suggested-by: kernel test robot Signed-off-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index c1df7698edb0..7f75c9a51874 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -831,6 +831,12 @@ static inline int gpio_device_get_base(struct gpio_device *gdev) return -ENODEV; } +static inline const char *gpio_device_get_label(struct gpio_device *gdev) +{ + WARN_ON(1); + return NULL; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { -- cgit From 29f6975332479f92233594901c649ff4d71f8cb6 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 5 Feb 2024 11:10:25 -0800 Subject: nvme: implement support for relaxed effects NVM Express TP4167 provides a way for controllers to report a relaxed execution constraint. Specifically, it notifies of exclusivity for IO vs. admin commands instead of grouping these together. If set, then we don't need to freeze IO in order to execute that admin command. The freezing distrupts IO processes, so it's nice to avoid that if the controller tells us it's not necessary. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 4 ++++ include/linux/nvme.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 60537c9224bf..0a96362912ce 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1153,6 +1153,10 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) effects &= ~NVME_CMD_EFFECTS_CSE_MASK; } else { effects = le32_to_cpu(ctrl->effects->acs[opcode]); + + /* Ignore execution restrictions if any relaxation bits are set */ + if (effects & NVME_CMD_EFFECTS_CSER_MASK) + effects &= ~NVME_CMD_EFFECTS_CSE_MASK; } return effects; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index bc605ec4a3fd..3ef4053ea950 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -646,6 +646,7 @@ enum { NVME_CMD_EFFECTS_NCC = 1 << 2, NVME_CMD_EFFECTS_NIC = 1 << 3, NVME_CMD_EFFECTS_CCC = 1 << 4, + NVME_CMD_EFFECTS_CSER_MASK = GENMASK(15, 14), NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16), NVME_CMD_EFFECTS_UUID_SEL = 1 << 19, NVME_CMD_EFFECTS_SCOPE_MASK = GENMASK(31, 20), -- cgit From 8a566f94104df87a067458351675129bb4e1ece2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Feb 2024 16:22:55 +0200 Subject: seq_buf: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Link: https://lkml.kernel.org/r/20240215142255.400264-1-andriy.shevchenko@linux.intel.com Cc: "Matthew Wilcox (Oracle)" Cc: Andrew Morton Signed-off-by: Andy Shevchenko Signed-off-by: Steven Rostedt (Google) --- include/linux/seq_buf.h | 5 ++++- lib/seq_buf.c | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index c44f4b47b945..07b26e751060 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -2,7 +2,10 @@ #ifndef _LINUX_SEQ_BUF_H #define _LINUX_SEQ_BUF_H -#include +#include +#include +#include +#include /* * Trace sequences are used to allow a function to call several other functions diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 010c730ca7fc..dfbfdc497d85 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -13,9 +13,19 @@ * seq_buf_init() more than once to reset the seq_buf to start * from scratch. */ -#include -#include + +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include /** * seq_buf_can_fit - can the new data fit in the current buffer? -- cgit From 6efe4d18796934b8ada66c1c446510e7f2d9b972 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Feb 2024 17:25:06 +0200 Subject: seq_buf: Fix kernel documentation There are plenty of issues with the kernel documentation here: - misspelled word "sequence" - different style of returned value descriptions - missed Return sections - unaligned style of ASCII / NUL-terminated / etc - wrong function references Fix all these. Link: https://lkml.kernel.org/r/20240215152506.598340-1-andriy.shevchenko@linux.intel.com Cc: Andrew Morton Signed-off-by: Andy Shevchenko Reviewed-by: Randy Dunlap Signed-off-by: Steven Rostedt (Google) --- include/linux/seq_buf.h | 12 ++++++------ lib/seq_buf.c | 35 ++++++++++++++++++----------------- 2 files changed, 24 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 07b26e751060..fe41da005970 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -13,7 +13,7 @@ */ /** - * seq_buf - seq buffer structure + * struct seq_buf - seq buffer structure * @buffer: pointer to the buffer * @size: size of the buffer * @len: the amount of data inside the buffer @@ -80,10 +80,10 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) } /** - * seq_buf_str - get %NUL-terminated C string from seq_buf + * seq_buf_str - get NUL-terminated C string from seq_buf * @s: the seq_buf handle * - * This makes sure that the buffer in @s is nul terminated and + * This makes sure that the buffer in @s is NUL-terminated and * safe to read as a string. * * Note, if this is called when the buffer has overflowed, then @@ -93,7 +93,7 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) * After this function is called, s->buffer is safe to use * in string operations. * - * Returns @s->buf after making sure it is terminated. + * Returns: @s->buf after making sure it is terminated. */ static inline const char *seq_buf_str(struct seq_buf *s) { @@ -113,7 +113,7 @@ static inline const char *seq_buf_str(struct seq_buf *s) * @s: the seq_buf handle * @bufp: the beginning of the buffer is stored here * - * Return the number of bytes available in the buffer, or zero if + * Returns: the number of bytes available in the buffer, or zero if * there's no space. */ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) @@ -135,7 +135,7 @@ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) * @num: the number of bytes to commit * * Commit @num bytes of data written to a buffer previously acquired - * by seq_buf_get. To signal an error condition, or that the data + * by seq_buf_get_buf(). To signal an error condition, or that the data * didn't fit in the available space, pass a negative @num value. */ static inline void seq_buf_commit(struct seq_buf *s, int num) diff --git a/lib/seq_buf.c b/lib/seq_buf.c index dfbfdc497d85..f3f3436d60a9 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -32,7 +32,7 @@ * @s: the seq_buf descriptor * @len: The length to see if it can fit in the current buffer * - * Returns true if there's enough unused space in the seq_buf buffer + * Returns: true if there's enough unused space in the seq_buf buffer * to fit the amount of new data according to @len. */ static bool seq_buf_can_fit(struct seq_buf *s, size_t len) @@ -45,7 +45,7 @@ static bool seq_buf_can_fit(struct seq_buf *s, size_t len) * @m: the seq_file descriptor that is the destination * @s: the seq_buf descriptor that is the source. * - * Returns zero on success, non zero otherwise + * Returns: zero on success, non-zero otherwise. */ int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s) { @@ -60,9 +60,9 @@ int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s) * @fmt: printf format string * @args: va_list of arguments from a printf() type function * - * Writes a vnprintf() format into the sequencce buffer. + * Writes a vnprintf() format into the sequence buffer. * - * Returns zero on success, -1 on overflow. + * Returns: zero on success, -1 on overflow. */ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args) { @@ -88,7 +88,7 @@ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args) * * Writes a printf() format into the sequence buffer. * - * Returns zero on success, -1 on overflow. + * Returns: zero on success, -1 on overflow. */ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) { @@ -104,12 +104,12 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) EXPORT_SYMBOL_GPL(seq_buf_printf); /** - * seq_buf_do_printk - printk seq_buf line by line + * seq_buf_do_printk - printk() seq_buf line by line * @s: seq_buf descriptor * @lvl: printk level * * printk()-s a multi-line sequential buffer line by line. The function - * makes sure that the buffer in @s is nul terminated and safe to read + * makes sure that the buffer in @s is NUL-terminated and safe to read * as a string. */ void seq_buf_do_printk(struct seq_buf *s, const char *lvl) @@ -149,7 +149,7 @@ EXPORT_SYMBOL_GPL(seq_buf_do_printk); * This function will take the format and the binary array and finish * the conversion into the ASCII string within the buffer. * - * Returns zero on success, -1 on overflow. + * Returns: zero on success, -1 on overflow. */ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary) { @@ -177,7 +177,7 @@ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary) * * Copy a simple string into the sequence buffer. * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_puts(struct seq_buf *s, const char *str) { @@ -206,7 +206,7 @@ EXPORT_SYMBOL_GPL(seq_buf_puts); * * Copy a single character into the sequence buffer. * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_putc(struct seq_buf *s, unsigned char c) { @@ -222,7 +222,7 @@ int seq_buf_putc(struct seq_buf *s, unsigned char c) EXPORT_SYMBOL_GPL(seq_buf_putc); /** - * seq_buf_putmem - write raw data into the sequenc buffer + * seq_buf_putmem - write raw data into the sequence buffer * @s: seq_buf descriptor * @mem: The raw memory to copy into the buffer * @len: The length of the raw memory to copy (in bytes) @@ -231,7 +231,7 @@ EXPORT_SYMBOL_GPL(seq_buf_putc); * buffer and a strcpy() would not work. Using this function allows * for such cases. * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len) { @@ -259,7 +259,7 @@ int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len) * raw memory into the buffer it writes its ASCII representation of it * in hex characters. * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_putmem_hex(struct seq_buf *s, const void *mem, unsigned int len) @@ -307,7 +307,7 @@ int seq_buf_putmem_hex(struct seq_buf *s, const void *mem, * * Write a path name into the sequence buffer. * - * Returns the number of written bytes on success, -1 on overflow + * Returns: the number of written bytes on success, -1 on overflow. */ int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc) { @@ -342,6 +342,7 @@ int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc) * or until it reaches the end of the content in the buffer (@s->len), * whichever comes first. * + * Returns: * On success, it returns a positive number of the number of bytes * it copied. * @@ -392,11 +393,11 @@ int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, size_t start, int cnt) * linebuf size is maximal length for one line. * 32 * 3 - maximum bytes per line, each printed into 2 chars + 1 for * separating space - * 2 - spaces separating hex dump and ascii representation - * 32 - ascii representation + * 2 - spaces separating hex dump and ASCII representation + * 32 - ASCII representation * 1 - terminating '\0' * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_hex_dump(struct seq_buf *s, const char *prefix_str, int prefix_type, int rowsize, int groupsize, -- cgit From 68fb3ca0e408e00db1c3f8fccdfa19e274c033be Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 15 Feb 2024 11:14:33 -0800 Subject: update workarounds for gcc "asm goto" issue In commit 4356e9f841f7 ("work around gcc bugs with 'asm goto' with outputs") I did the gcc workaround unconditionally, because the cause of the bad code generation wasn't entirely clear. In the meantime, Jakub Jelinek debugged the issue, and has come up with a fix in gcc [2], which also got backported to the still maintained branches of gcc-11, gcc-12 and gcc-13. Note that while the fix technically wasn't in the original gcc-14 branch, Jakub says: "while it is true that no GCC 14 snapshots until today (or whenever the fix will be committed) have the fix, for GCC trunk it is up to the distros to use the latest snapshot if they use it at all and would allow better testing of the kernel code without the workaround, so that if there are other issues they won't be discovered years later. Most userland code doesn't actually use asm goto with outputs..." so we will consider gcc-14 to be fixed - if somebody is using gcc snapshots of the gcc-14 before the fix, they should upgrade. Note that while the bug goes back to gcc-11, in practice other gcc changes seem to have effectively hidden it since gcc-12.1 as per a bisect by Jakub. So even a gcc-14 snapshot without the fix likely doesn't show actual problems. Also, make the default 'asm_goto_output()' macro mark the asm as volatile by hand, because of an unrelated gcc issue [1] where it doesn't match the documented behavior ("asm goto is always volatile"). Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 [1] Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 [2] Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/ Requested-by: Jakub Jelinek Cc: Uros Bizjak Cc: Nick Desaulniers Cc: Sean Christopherson Cc: Andrew Pinski Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 7 ++++--- include/linux/compiler_types.h | 9 ++++++++- init/Kconfig | 9 +++++++++ 3 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index c1a963be7d28..75bd1692d2e3 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -67,10 +67,9 @@ /* * GCC 'asm goto' with outputs miscompiles certain code sequences: * - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 * - * Work it around via the same compiler barrier quirk that we used + * Work around it via the same compiler barrier quirk that we used * to use for the old 'asm goto' workaround. * * Also, always mark such 'asm goto' statements as volatile: all @@ -80,8 +79,10 @@ * * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 */ +#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND #define asm_goto_output(x...) \ do { asm volatile goto(x); asm (""); } while (0) +#endif #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 663d8791c871..0caf354cb94b 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -362,8 +362,15 @@ struct ftrace_likely_data { #define __member_size(p) __builtin_object_size(p, 1) #endif +/* + * Some versions of gcc do not mark 'asm goto' volatile: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 + * + * We do it here by hand, because it doesn't hurt. + */ #ifndef asm_goto_output -#define asm_goto_output(x...) asm goto(x) +#define asm_goto_output(x...) asm volatile goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE diff --git a/init/Kconfig b/init/Kconfig index deda3d14135b..8426d59cc634 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -89,6 +89,15 @@ config CC_HAS_ASM_GOTO_TIED_OUTPUT # Detect buggy gcc and clang, fixed in gcc-11 clang-14. def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) +config GCC_ASM_GOTO_OUTPUT_WORKAROUND + bool + depends on CC_IS_GCC && CC_HAS_ASM_GOTO_OUTPUT + # Fixed in GCC 14, 13.3, 12.4 and 11.5 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 + default y if GCC_VERSION < 110500 + default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400 + default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300 + config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) -- cgit From 9b99c17f7510bed2adbe17751fb8abddba5620bc Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Fri, 12 Jan 2024 12:09:50 -0800 Subject: x86/numa: Fix the address overlap check in numa_fill_memblks() numa_fill_memblks() fills in the gaps in numa_meminfo memblks over a physical address range. To do so, it first creates a list of existing memblks that overlap that address range. The issue is that it is off by one when comparing to the end of the address range, so memblks that do not overlap are selected. The impact of selecting a memblk that does not actually overlap is that an existing memblk may be filled when the expected action is to do nothing and return NUMA_NO_MEMBLK to the caller. The caller can then add a new NUMA node and memblk. Replace the broken open-coded search for address overlap with the memblock helper memblock_addrs_overlap(). Update the kernel doc and in code comments. Suggested by: "Huang, Ying" Fixes: 8f012db27c95 ("x86/numa: Introduce numa_fill_memblks()") Signed-off-by: Alison Schofield Acked-by: Mike Rapoport (IBM) Acked-by: Dave Hansen Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/10a3e6109c34c21a8dd4c513cf63df63481a2b07.1705085543.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- arch/x86/mm/numa.c | 19 +++++++------------ include/linux/memblock.h | 2 ++ mm/memblock.c | 5 +++-- 3 files changed, 12 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index adc497b93f03..8ada9bbfad58 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -944,14 +944,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; * @start: address to begin fill * @end: address to end fill * - * Find and extend numa_meminfo memblks to cover the @start-@end - * physical address range, such that the first memblk includes - * @start, the last memblk includes @end, and any gaps in between - * are filled. + * Find and extend numa_meminfo memblks to cover the physical + * address range @start-@end * * RETURNS: * 0 : Success - * NUMA_NO_MEMBLK : No memblk exists in @start-@end range + * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end */ int __init numa_fill_memblks(u64 start, u64 end) @@ -963,17 +961,14 @@ int __init numa_fill_memblks(u64 start, u64 end) /* * Create a list of pointers to numa_meminfo memblks that - * overlap start, end. Exclude (start == bi->end) since - * end addresses in both a CFMWS range and a memblk range - * are exclusive. - * - * This list of pointers is used to make in-place changes - * that fill out the numa_meminfo memblks. + * overlap start, end. The list is used to make in-place + * changes that fill out the numa_meminfo memblks. */ for (int i = 0; i < mi->nr_blks; i++) { struct numa_memblk *bi = &mi->blk[i]; - if (start < bi->end && end >= bi->start) { + if (memblock_addrs_overlap(start, end - start, bi->start, + bi->end - bi->start)) { blk[count] = &mi->blk[i]; count++; } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b695f9e946da..e2082240586d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -121,6 +121,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif void memblock_trim_memory(phys_addr_t align); +unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2); bool memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); bool memblock_validate_numa_coverage(unsigned long threshold_bytes); diff --git a/mm/memblock.c b/mm/memblock.c index 4dcb2ee35eca..964eb72db539 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -180,8 +180,9 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) /* * Address comparison utilities */ -static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, - phys_addr_t base2, phys_addr_t size2) +unsigned long __init_memblock +memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, + phys_addr_t size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } -- cgit From 13ddaf26be324a7f951891ecd9ccd04466d27458 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 7 Feb 2024 02:25:59 +0800 Subject: mm/swap: fix race when skipping swapcache When skipping swapcache for SWP_SYNCHRONOUS_IO, if two or more threads swapin the same entry at the same time, they get different pages (A, B). Before one thread (T0) finishes the swapin and installs page (A) to the PTE, another thread (T1) could finish swapin of page (B), swap_free the entry, then swap out the possibly modified page reusing the same entry. It breaks the pte_same check in (T0) because PTE value is unchanged, causing ABA problem. Thread (T0) will install a stalled page (A) into the PTE and cause data corruption. One possible callstack is like this: CPU0 CPU1 ---- ---- do_swap_page() do_swap_page() with same entry swap_read_folio() <- read to page A swap_read_folio() <- read to page B ... set_pte_at() swap_free() <- entry is free pte_same() <- Check pass, PTE seems unchanged, but page A is stalled! swap_free() <- page B content lost! set_pte_at() <- staled page A installed! And besides, for ZRAM, swap_free() allows the swap device to discard the entry content, so even if page (B) is not modified, if swap_read_folio() on CPU0 happens later than swap_free() on CPU1, it may also cause data loss. To fix this, reuse swapcache_prepare which will pin the swap entry using the cache flag, and allow only one thread to swap it in, also prevent any parallel code from putting the entry in the cache. Release the pin after PT unlocked. Racers just loop and wait since it's a rare and very short event. A schedule_timeout_uninterruptible(1) call is added to avoid repeated page faults wasting too much CPU, causing livelock or adding too much noise to perf statistics. A similar livelock issue was described in commit 029c4628b2eb ("mm: swap: get rid of livelock in swapin readahead") Reproducer: This race issue can be triggered easily using a well constructed reproducer and patched brd (with a delay in read path) [1]: With latest 6.8 mainline, race caused data loss can be observed easily: $ gcc -g -lpthread test-thread-swap-race.c && ./a.out Polulating 32MB of memory region... Keep swapping out... Starting round 0... Spawning 65536 workers... 32746 workers spawned, wait for done... Round 0: Error on 0x5aa00, expected 32746, got 32743, 3 data loss! Round 0: Error on 0x395200, expected 32746, got 32743, 3 data loss! Round 0: Error on 0x3fd000, expected 32746, got 32737, 9 data loss! Round 0 Failed, 15 data loss! This reproducer spawns multiple threads sharing the same memory region using a small swap device. Every two threads updates mapped pages one by one in opposite direction trying to create a race, with one dedicated thread keep swapping out the data out using madvise. The reproducer created a reproduce rate of about once every 5 minutes, so the race should be totally possible in production. After this patch, I ran the reproducer for over a few hundred rounds and no data loss observed. Performance overhead is minimal, microbenchmark swapin 10G from 32G zram: Before: 10934698 us After: 11157121 us Cached: 13155355 us (Dropping SWP_SYNCHRONOUS_IO flag) [kasong@tencent.com: v4] Link: https://lkml.kernel.org/r/20240219082040.7495-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20240206182559.32264-1-ryncsn@gmail.com Fixes: 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device") Reported-by: "Huang, Ying" Closes: https://lore.kernel.org/lkml/87bk92gqpx.fsf_-_@yhuang6-desk2.ccr.corp.intel.com/ Link: https://github.com/ryncsn/emm-test-project/tree/master/swap-stress-race [1] Signed-off-by: Kairui Song Reviewed-by: "Huang, Ying" Acked-by: Yu Zhao Acked-by: David Hildenbrand Acked-by: Chris Li Cc: Hugh Dickins Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Cc: Yosry Ahmed Cc: Yu Zhao Cc: Barry Song <21cnbao@gmail.com> Cc: SeongJae Park Cc: Signed-off-by: Andrew Morton --- include/linux/swap.h | 5 +++++ mm/memory.c | 20 ++++++++++++++++++++ mm/swap.h | 5 +++++ mm/swapfile.c | 13 +++++++++++++ 4 files changed, 43 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 4db00ddad261..8d28f6091a32 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -549,6 +549,11 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } +static inline int swapcache_prepare(swp_entry_t swp) +{ + return 0; +} + static inline void swap_free(swp_entry_t swp) { } diff --git a/mm/memory.c b/mm/memory.c index 15f8b10ea17c..0bfc8b007c01 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3799,6 +3799,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) struct page *page; struct swap_info_struct *si = NULL; rmap_t rmap_flags = RMAP_NONE; + bool need_clear_cache = false; bool exclusive = false; swp_entry_t entry; pte_t pte; @@ -3867,6 +3868,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!folio) { if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { + /* + * Prevent parallel swapin from proceeding with + * the cache flag. Otherwise, another thread may + * finish swapin first, free the entry, and swapout + * reusing the same entry. It's undetectable as + * pte_same() returns true due to entry reuse. + */ + if (swapcache_prepare(entry)) { + /* Relax a bit to prevent rapid repeated page faults */ + schedule_timeout_uninterruptible(1); + goto out; + } + need_clear_cache = true; + /* skip swapcache */ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vmf->address, false); @@ -4117,6 +4132,9 @@ unlock: if (vmf->pte) pte_unmap_unlock(vmf->pte, vmf->ptl); out: + /* Clear the swap cache pin for direct swapin after PTL unlock */ + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; @@ -4131,6 +4149,8 @@ out_release: folio_unlock(swapcache); folio_put(swapcache); } + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; diff --git a/mm/swap.h b/mm/swap.h index 758c46ca671e..fc2f6ade7f80 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -41,6 +41,7 @@ void __delete_from_swap_cache(struct folio *folio, void delete_from_swap_cache(struct folio *folio); void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry); struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); struct folio *filemap_get_incore_folio(struct address_space *mapping, @@ -97,6 +98,10 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc) return 0; } +static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ +} + static inline struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr) { diff --git a/mm/swapfile.c b/mm/swapfile.c index 556ff7347d5f..746aa9da5302 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3365,6 +3365,19 @@ int swapcache_prepare(swp_entry_t entry) return __swap_duplicate(entry, SWAP_HAS_CACHE); } +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ + struct swap_cluster_info *ci; + unsigned long offset = swp_offset(entry); + unsigned char usage; + + ci = lock_cluster_or_swap_info(si, offset); + usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE); + unlock_cluster_or_swap_info(si, ci); + if (!usage) + free_swap_slot(entry); +} + struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry)); -- cgit From f3e6b3ae9cfc128af11b665c6ef4022ba2683778 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 18 Feb 2024 14:28:57 -0800 Subject: acpi/ghes: Remove CXL CPER notifications Initial tests with the CXL CPER implementation identified that error reports were being duplicated in the log and the trace event [1]. Then it was discovered that the notification handler took sleeping locks while the GHES event handling runs in spin_lock_irqsave() context [2] While the duplicate reporting was fixed in v6.8-rc4, the fix for the sleeping-lock-vs-atomic collision would enjoy more time to settle and gain some test cycles. Given how late it is in the development cycle, remove the CXL hookup for now and try again during the next merge window. Note that end result is that v6.8 does not emit CXL CPER payloads to the kernel log, but this is in line with the CXL trend to move error reporting to trace events instead of the kernel log. Cc: Ard Biesheuvel Cc: Rafael J. Wysocki Cc: Jonathan Cameron Reviewed-by: Ira Weiny Link: http://lore.kernel.org/r/20240108165855.00002f5a@Huawei.com [1] Closes: http://lore.kernel.org/r/b963c490-2c13-4b79-bbe7-34c6568423c7@moroto.mountain [2] Signed-off-by: Dan Williams --- drivers/acpi/apei/ghes.c | 63 ----------------------------------------------- drivers/cxl/pci.c | 57 +----------------------------------------- include/linux/cxl-event.h | 18 -------------- 3 files changed, 1 insertion(+), 137 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index fe825a432c5b..ab2a82cb1b0b 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -674,52 +673,6 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, schedule_work(&entry->work); } -/* - * Only a single callback can be registered for CXL CPER events. - */ -static DECLARE_RWSEM(cxl_cper_rw_sem); -static cxl_cper_callback cper_callback; - -static void cxl_cper_post_event(enum cxl_event_type event_type, - struct cxl_cper_event_rec *rec) -{ - if (rec->hdr.length <= sizeof(rec->hdr) || - rec->hdr.length > sizeof(*rec)) { - pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n", - rec->hdr.length); - return; - } - - if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) { - pr_err(FW_WARN "CXL CPER invalid event\n"); - return; - } - - guard(rwsem_read)(&cxl_cper_rw_sem); - if (cper_callback) - cper_callback(event_type, rec); -} - -int cxl_cper_register_callback(cxl_cper_callback callback) -{ - guard(rwsem_write)(&cxl_cper_rw_sem); - if (cper_callback) - return -EINVAL; - cper_callback = callback; - return 0; -} -EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL); - -int cxl_cper_unregister_callback(cxl_cper_callback callback) -{ - guard(rwsem_write)(&cxl_cper_rw_sem); - if (callback != cper_callback) - return -EINVAL; - cper_callback = NULL; - return 0; -} -EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL); - static bool ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { @@ -754,22 +707,6 @@ static bool ghes_do_proc(struct ghes *ghes, } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { queued = ghes_handle_arm_hw_error(gdata, sev, sync); - } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { - struct cxl_cper_event_rec *rec = - acpi_hest_get_payload(gdata); - - cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec); - } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) { - struct cxl_cper_event_rec *rec = - acpi_hest_get_payload(gdata); - - cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec); - } else if (guid_equal(sec_type, - &CPER_SEC_CXL_MEM_MODULE_GUID)) { - struct cxl_cper_event_rec *rec = - acpi_hest_get_payload(gdata); - - cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec); } else { void *err = acpi_hest_get_payload(gdata); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 233e7c42c161..2ff361e756d6 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -974,61 +974,6 @@ static struct pci_driver cxl_pci_driver = { }, }; -#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0) -static void cxl_cper_event_call(enum cxl_event_type ev_type, - struct cxl_cper_event_rec *rec) -{ - struct cper_cxl_event_devid *device_id = &rec->hdr.device_id; - struct pci_dev *pdev __free(pci_dev_put) = NULL; - enum cxl_event_log_type log_type; - struct cxl_dev_state *cxlds; - unsigned int devfn; - u32 hdr_flags; - - devfn = PCI_DEVFN(device_id->device_num, device_id->func_num); - pdev = pci_get_domain_bus_and_slot(device_id->segment_num, - device_id->bus_num, devfn); - if (!pdev) - return; - - guard(pci_dev)(pdev); - if (pdev->driver != &cxl_pci_driver) - return; - - cxlds = pci_get_drvdata(pdev); - if (!cxlds) - return; - - /* Fabricate a log type */ - hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags); - log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags); - - cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type, - &uuid_null, &rec->event); -} - -static int __init cxl_pci_driver_init(void) -{ - int rc; - - rc = cxl_cper_register_callback(cxl_cper_event_call); - if (rc) - return rc; - - rc = pci_register_driver(&cxl_pci_driver); - if (rc) - cxl_cper_unregister_callback(cxl_cper_event_call); - - return rc; -} - -static void __exit cxl_pci_driver_exit(void) -{ - pci_unregister_driver(&cxl_pci_driver); - cxl_cper_unregister_callback(cxl_cper_event_call); -} - -module_init(cxl_pci_driver_init); -module_exit(cxl_pci_driver_exit); +module_pci_driver(cxl_pci_driver); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 91125eca4c8a..03fa6d50d46f 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -140,22 +140,4 @@ struct cxl_cper_event_rec { union cxl_event event; } __packed; -typedef void (*cxl_cper_callback)(enum cxl_event_type type, - struct cxl_cper_event_rec *rec); - -#ifdef CONFIG_ACPI_APEI_GHES -int cxl_cper_register_callback(cxl_cper_callback callback); -int cxl_cper_unregister_callback(cxl_cper_callback callback); -#else -static inline int cxl_cper_register_callback(cxl_cper_callback callback) -{ - return 0; -} - -static inline int cxl_cper_unregister_callback(cxl_cper_callback callback) -{ - return 0; -} -#endif - #endif /* _LINUX_CXL_EVENT_H */ -- cgit From b820de741ae48ccf50dd95e297889c286ff4f760 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 15 Feb 2024 12:47:38 -0800 Subject: fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio If kiocb_set_cancel_fn() is called for I/O submitted via io_uring, the following kernel warning appears: WARNING: CPU: 3 PID: 368 at fs/aio.c:598 kiocb_set_cancel_fn+0x9c/0xa8 Call trace: kiocb_set_cancel_fn+0x9c/0xa8 ffs_epfile_read_iter+0x144/0x1d0 io_read+0x19c/0x498 io_issue_sqe+0x118/0x27c io_submit_sqes+0x25c/0x5fc __arm64_sys_io_uring_enter+0x104/0xab0 invoke_syscall+0x58/0x11c el0_svc_common+0xb4/0xf4 do_el0_svc+0x2c/0xb0 el0_svc+0x2c/0xa4 el0t_64_sync_handler+0x68/0xb4 el0t_64_sync+0x1a4/0x1a8 Fix this by setting the IOCB_AIO_RW flag for read and write I/O that is submitted by libaio. Suggested-by: Jens Axboe Cc: Christoph Hellwig Cc: Avi Kivity Cc: Sandeep Dhavale Cc: Jens Axboe Cc: Greg Kroah-Hartman Cc: Kent Overstreet Cc: stable@vger.kernel.org Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240215204739.2677806-2-bvanassche@acm.org Signed-off-by: Christian Brauner --- fs/aio.c | 9 ++++++++- include/linux/fs.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index bb2ff48991f3..da18dbcfcb22 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -593,6 +593,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) struct kioctx *ctx = req->ki_ctx; unsigned long flags; + /* + * kiocb didn't come from aio or is neither a read nor a write, hence + * ignore it. + */ + if (!(iocb->ki_flags & IOCB_AIO_RW)) + return; + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) return; @@ -1509,7 +1516,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) req->ki_complete = aio_complete_rw; req->private = NULL; req->ki_pos = iocb->aio_offset; - req->ki_flags = req->ki_filp->f_iocb_flags; + req->ki_flags = req->ki_filp->f_iocb_flags | IOCB_AIO_RW; if (iocb->aio_flags & IOCB_FLAG_RESFD) req->ki_flags |= IOCB_EVENTFD; if (iocb->aio_flags & IOCB_FLAG_IOPRIO) { diff --git a/include/linux/fs.h b/include/linux/fs.h index ed5966a70495..c2dcc98cb4c8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -352,6 +352,8 @@ enum rw_hint { * unrelated IO (like cache flushing, new IO generation, etc). */ #define IOCB_DIO_CALLER_COMP (1 << 22) +/* kiocb is a read or write operation submitted by fs/aio.c. */ +#define IOCB_AIO_RW (1 << 23) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ -- cgit From 65d4418c5002ec5b0e529455bf4152fd43459079 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 22 Feb 2024 10:07:41 -0400 Subject: iommu/sva: Restore SVA handle sharing Prior to commit 092edaddb660 ("iommu: Support mm PASID 1:n with sva domains") the code allowed a SVA handle to be bound multiple times to the same (mm, device) pair. This was alluded to in the kdoc comment, but we had understood this to be more a remark about allowing multiple devices, not a literal same-driver re-opening the same SVA. It turns out uacce and idxd were both relying on the core code to handle reference counting for same-device same-mm scenarios. As this looks hard to resolve in the drivers bring it back to the core code. The new design has changed the meaning of the domain->users refcount to refer to the number of devices that are sharing that domain for the same mm. This is part of the design to lift the SVA domain de-duplication out of the drivers. Return the old behavior by explicitly de-duplicating the struct iommu_sva handle. The same (mm, device) will return the same handle pointer and the core code will handle tracking this. The last unbind of the handle will destroy it. Fixes: 092edaddb660 ("iommu: Support mm PASID 1:n with sva domains") Reported-by: Zhangfei Gao Closes: https://lore.kernel.org/all/20240221110658.529-1-zhangfei.gao@linaro.org/ Tested-by: Zhangfei Gao Signed-off-by: Jason Gunthorpe Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/0-v1-9455fc497a6f+3b4-iommu_sva_sharing_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-sva.c | 17 +++++++++++++++++ include/linux/iommu.h | 3 +++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index c3fc9201d0be..7f91c8d0064b 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -41,6 +41,7 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de } iommu_mm->pasid = pasid; INIT_LIST_HEAD(&iommu_mm->sva_domains); + INIT_LIST_HEAD(&iommu_mm->sva_handles); /* * Make sure the write to mm->iommu_mm is not reordered in front of * initialization to iommu_mm fields. If it does, readers may see a @@ -82,6 +83,14 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm goto out_unlock; } + list_for_each_entry(handle, &mm->iommu_mm->sva_handles, handle_item) { + if (handle->dev == dev) { + refcount_inc(&handle->users); + mutex_unlock(&iommu_sva_lock); + return handle; + } + } + handle = kzalloc(sizeof(*handle), GFP_KERNEL); if (!handle) { ret = -ENOMEM; @@ -108,7 +117,9 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm if (ret) goto out_free_domain; domain->users = 1; + refcount_set(&handle->users, 1); list_add(&domain->next, &mm->iommu_mm->sva_domains); + list_add(&handle->handle_item, &mm->iommu_mm->sva_handles); out: mutex_unlock(&iommu_sva_lock); @@ -141,6 +152,12 @@ void iommu_sva_unbind_device(struct iommu_sva *handle) struct device *dev = handle->dev; mutex_lock(&iommu_sva_lock); + if (!refcount_dec_and_test(&handle->users)) { + mutex_unlock(&iommu_sva_lock); + return; + } + list_del(&handle->handle_item); + iommu_detach_device_pasid(domain, dev, iommu_mm->pasid); if (--domain->users == 0) { list_del(&domain->next); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1ea2a820e1eb..5e27cb3a3be9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -892,11 +892,14 @@ struct iommu_fwspec { struct iommu_sva { struct device *dev; struct iommu_domain *domain; + struct list_head handle_item; + refcount_t users; }; struct iommu_mm_data { u32 pasid; struct list_head sva_domains; + struct list_head sva_handles; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, -- cgit From c1b967d03c5d570ed7b90a88031fa2af34bf5b20 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2023 22:11:26 -0400 Subject: nfs: fix UAF on pathwalk running into umount NFS ->d_revalidate(), ->permission() and ->get_link() need to access some parts of nfs_server when called in RCU mode: server->flags server->caps *(server->io_stats) and, worst of all, call server->nfs_client->rpc_ops->have_delegation (the last one - as NFS_PROTO(inode)->have_delegation()). We really don't want to RCU-delay the entire nfs_free_server() (it would have to be done with schedule_work() from RCU callback, since it can't be made to run from interrupt context), but actual freeing of nfs_server and ->io_stats can be done via call_rcu() just fine. nfs_client part is handled simply by making nfs_free_client() use kfree_rcu(). Acked-by: Christian Brauner Signed-off-by: Al Viro --- fs/nfs/client.c | 13 ++++++++++--- include/linux/nfs_fs_sb.h | 2 ++ 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 44eca51b2808..fbdc9ca80f71 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -246,7 +246,7 @@ void nfs_free_client(struct nfs_client *clp) put_nfs_version(clp->cl_nfs_mod); kfree(clp->cl_hostname); kfree(clp->cl_acceptor); - kfree(clp); + kfree_rcu(clp, rcu); } EXPORT_SYMBOL_GPL(nfs_free_client); @@ -1006,6 +1006,14 @@ struct nfs_server *nfs_alloc_server(void) } EXPORT_SYMBOL_GPL(nfs_alloc_server); +static void delayed_free(struct rcu_head *p) +{ + struct nfs_server *server = container_of(p, struct nfs_server, rcu); + + nfs_free_iostats(server->io_stats); + kfree(server); +} + /* * Free up a server record */ @@ -1031,10 +1039,9 @@ void nfs_free_server(struct nfs_server *server) ida_destroy(&server->lockowner_id); ida_destroy(&server->openowner_id); - nfs_free_iostats(server->io_stats); put_cred(server->cred); - kfree(server); nfs_release_automount_timer(); + call_rcu(&server->rcu, delayed_free); } EXPORT_SYMBOL_GPL(nfs_free_server); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index cd797e00fe35..92de074e63b9 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -124,6 +124,7 @@ struct nfs_client { char cl_ipaddr[48]; struct net *cl_net; struct list_head pending_cb_stateids; + struct rcu_head rcu; }; /* @@ -265,6 +266,7 @@ struct nfs_server { const struct cred *cred; bool has_sec_mnt_opts; struct kobject kobj; + struct rcu_head rcu; }; /* Server capabilities */ -- cgit From e31f0a57ae1ab2f6e17adb8e602bc120ad722232 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Sep 2023 00:12:00 -0400 Subject: procfs: make freeing proc_fs_info rcu-delayed makes proc_pid_ns() safe from rcu pathwalk (put_pid_ns() is still synchronous, but that's not a problem - it does rcu-delay everything that needs to be) Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/proc/root.c | 2 +- include/linux/proc_fs.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/proc/root.c b/fs/proc/root.c index b55dbc70287b..06a297a27ba3 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -271,7 +271,7 @@ static void proc_kill_sb(struct super_block *sb) kill_anon_super(sb); put_pid_ns(fs_info->pid_ns); - kfree(fs_info); + kfree_rcu(fs_info, rcu); } static struct file_system_type proc_fs_type = { diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index de407e7c3b55..0b2a89854440 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -65,6 +65,7 @@ struct proc_fs_info { kgid_t pid_gid; enum proc_hidepid hide_pid; enum proc_pidonly pidonly; + struct rcu_head rcu; }; static inline struct proc_fs_info *proc_sb_info(struct super_block *sb) -- cgit